
    :jzQ                     (   U d Z ddlZddlZddlmZmZ ddlmZmZm	Z	 ddl
mZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZmZmZmZ ddlmZ ddlm Z m!Z!m"Z"m#Z#  ed      Z$da%ed   e&d<    ejN                         Z( G d d      Z)y)zH
Main regex engine interface for pattern analysis and query generation.
    N)ListOptional)ALLOWED_OPERATORSPOPULAR_LANGUAGESSIZE_RANGES)
get_logger)extract_github_query_pattern)handle_exceptions   )RefineEngineConfig)QueryGenerator)EnumerationOptimizer)RegexParser)CharClassSegmentFixedSegmentGroupSegmentOptionalSegment)SplittabilityAnalyzer)IEnumerationOptimizerIQueryGeneratorIRegexParserISplittabilityAnalyzerrefineRefineEngine	_instancec                   z   e Zd ZdZ	 	 	 	 	 d!dee   dee   dee   dee   dee	   f
dZ
ed"dee   d	d fd
       Zed#d       Zded	ee   fdZded	efdZdeded	ee   fdZdeded	ee   fdZded	ee   fdZ edddd      ded	efd       Zd$deded	eeef   fdZd%deded	efdZdedee   d	dfd Zy)&r   z,Main interface for regex pattern processing.Nconfigparser	optimizer	generatorsplittabilityc                    |
t               }|| _        |xs t        |j                        | _        |xs t        |j                        | _        |xs t        |j                        | _
        |xsM t        |j                  |j                  |j                  |j                  |j                   |j"                        | _        y )N)enable_recursion_limitenable_value_thresholdenable_resource_limitmax_recursion_depthmin_enumeration_valuemax_resource_cost)r   r   r   max_quantifier_lengthr   r   max_queriesr   r   	max_depthr    r   r#   r$   r%   r&   r'   r(   r!   )selfr   r   r   r    r!   s         4/root/.openclaw/workspace/harvester/refine/engine.py__init__zRefineEngine.__init__&   s     >')F IF,H,H I"N&:6;M;M&N"FnV5E5E&F* 
.C#)#@#@#)#@#@"(">"> & : :"(">">$66/
    returnc                     t         8t        5  t          | |      a t        j                  d       ddd       t         S |t        j	                  d       t         S # 1 sw Y   t         S xY w)z#Get thread-safe singleton instance.Nz'RefineEngine singleton instance createdz:RefineEngine already initialized, config parameter ignored)r   _lockloggerdebugwarning)clsr   s     r-   get_instancezRefineEngine.get_instance@   sg      L$ #FILL!JKL  NNWXL s   $AA*c                 h    t         5  dat        j                  d       ddd       y# 1 sw Y   yxY w)z.Reset singleton instance (mainly for testing).Nz%RefineEngine singleton instance reset)r2   r   r3   r4   )r6   s    r-   reset_instancezRefineEngine.reset_instanceM   s2      	BILL@A	B 	B 	Bs   (1queryc                    |sy|j                  d      r|j                  d      r|dd S t        j                  d|      r|S t	        t        j
                  d|            }|rt        |d       }|j                  d      S t	        t        j
                  d	|            }|rt        |d
       }|j                  d      S y)a  
        Extract regex pattern from GitHub search format, handling escaped slashes.

        Args:
            query: Query string in format "/pattern/" or "/pattern/ other terms" or "terms AND /pattern/"

        Returns:
            str: Extracted pattern or None if not found
        N/r   z[\[\]{}+*?\\]z/([^/]*[\[\]{}+*?\\][^/]*)/c                 6    t        | j                  d            S Nr   lengroupms    r-   <lambda>z5RefineEngine._extract_regex_pattern.<locals>.<lambda>p       #aggaj/ r/   )keyz	/([^/]+)/c                 6    t        | j                  d            S r?   r@   rC   s    r-   rE   z5RefineEngine._extract_regex_pattern.<locals>.<lambda>w   rF   r/   )
startswithendswithresearchlistfinditermaxrB   )r,   r:   patternslongests       r-   _extract_regex_patternz#RefineEngine._extract_regex_patternU   s      C U^^C%82; 99%u-L $BEJK((ABG==## L%89((ABG==##r/   c           
      V   |sy| j                  |      }|sy	 | j                  j                  |      }fd |      }t        |      dkD  }t        j                  d| d| dt        |       d       |S # t        $ r%}t        j                  d| d	|        Y d
}~yd
}~ww xY w)z2Check if query contains enumerable regex patterns.Fc                     g }| D ]\  }t        |t              r|j                  |       %t        |t        t        f      s<|j                   |j                               ^ |S N)
isinstancer   appendr   r   extendcontent)segs	variablessegfind_variable_segmentss      r-   r]   z8RefineEngine.has_pattern.<locals>.find_variable_segments   sa    	 NC!#'78!((-#C,)HI!(()?)LM	N
 ! r/   r   zPattern check: 'z' -> z (z variables)zPattern check failed for '': N)rR   r   parserA   r3   r4   	Exceptionr5   )r,   r:   patternsegmentsvariable_segmentsresulter]   s          @r-   has_patternzRefineEngine.has_pattern|   s     --e4	{{((1H! !7x @*+a/FLL+G9E&CHYDZC[[fghM 	NN7yA3GH	s   AA: :	B(B##B(
partitionsc                    |dk  s|st         j                  d| d|        g S | j                  ||      }|st         j                  d       g S t        |      |k\  r&t         j                  dt        |       d|        |S t	               }|D ]F  }| j                  |      }|st         j                  d       ,|D ]  }|s|j                  |        H t        |      }t         j                  dt        |       d	| d
|        |S )z-Generate refined queries from a query string.r   zInvalid partitions=z or query: z+No queries generated from divide with regexzAlready have enough queries: z >= z.No queries generated from divide with language
Generated z queries from query: z, partitions: )r3   r4   _dividerA   set_divide_with_languageaddrM   )	r,   r:   rg   queries
candidatesitemresultsrd   
conditionss	            r-   generate_querieszRefineEngine.generate_queries   s   ?%LL.zl+eWMNI ,,uj1LLFGIw<:%LL8Wd:,WXNU
  	+D006GMN! +NN6*+	+ *%
z#j/!22Gwn]g\hijr/   c                    |dk  s|sg S | j                  |      }|s|gS 	 | j                  j                  |      }| j                  j	                  ||      \  }}|st
        j                  d|        |gS | j                  j                  ||      \  }}|r7t
        j                  d| d       | j                  j                  |||      }	nBt
        j                  d| d|j                   d       | j                  j                  ||      }	g }
|	D ]b  }|j                  d      r,|j                  d      r|j                  d| dd| d      }n|j                  ||      }|
j                  |       d t
        j                  d	t!        |
       d
| d|        |
S # t"        $ r(}t
        j%                  d| d|        |gcY d}~S d}~ww xY w)at  
        Split a broad regular expression query into multiple regular expressions that match smaller ranges

        Args:
            query: Query string in format "/pattern/" or "/pattern/ other terms" or "terms AND /pattern/"
            partitions: Number of partitions to divide the query into

        Returns:
            List[str]: List of refined queries
        r   z!Pattern cannot be split further: zFound suitable strategy for z, partitions, using minimum enumeration depthzNo strategy found for z2 partitions, using strategy with maximum queries ()r<   ri   z queries from pattern: 'z', requested partitions: zQuery generation failed for 'r^   N)rR   r   r_   r!   	can_splitr3   infor   "evaluate_strategies_for_partitionsr    generatern   rI   rJ   replacerW   rA   r`   r5   )r,   r:   rg   ra   rb   enabledreasonstrategyfoundrefined_patternsrn   rp   textre   s                 r-   rj   zRefineEngine._divide   s    ?%I --e47N/	{{((1H #00::7HMOGV?xHI w #nnOOPXZdeOHe ::,Fvwx#'>>#:#:8Xz#Z  ,ZL 9<<D<L<L;MQP $(>>#:#:8X#N  G( %##C(U^^C-@ ==1WIQ1TF!ED !==$7Dt$% KKS\N*B7)Kdeodpq N 	NN:7)3qcJK7N	s%   AF$ 8D+F$ $	G-G
GGc                    |r|j                         nd}|st        j                  d       g S t               }t	        j
                  d|t        j                        s,t        D ]  }|j                  | d|         t        |      S t	        j
                  d|t        j                        s,t        D ]  }|j                  | d|         t        |      S t        j                  d       |j                  |       t        |      S )	zAGenerate refined queries with adaptive refinement language level. z)No query provided for language refinementz language:[a-zA-Z0-9#]+ )flagsz
 language:z size:[a-zA-Z0-9#=<>.]+ z size:zQCannot refine with language or sie refinement due to existing refinement criteria)stripr3   r4   rk   rK   matchIr   rm   r   rM   )r,   r:   basern   langsizes         r-   rl   z"RefineEngine._divide_with_language  s     %u{{}2LLDEI%xx3TF) 7tfJtf567 G} 5t244H# 3tfF4&123 G} LLlmKKG}r/   FzAnalysis failed)	parseableerrorr5   )default_result	log_levelra   c           
      N   | j                   j                  |      }| j                  j                  |      }fdt	        |       |t
               |t               |t               |t              t	        |j                        |j                  |j                  dd	}|S )z0Analyze pattern and return detailed information.c                     d}| D ]A  }t        ||      r|dz  }t        |t        t        f      s,| |j                  |      z  }C |S )Nr   r   )rV   r   r   rY   )rZ   seg_typecountr\   count_segments_recursives       r-   r   z>RefineEngine.analyze_pattern.<locals>.count_segments_recursive!  sX    E Mc8,QJElO%DE5ckk8LLE	M
 Lr/   T)	rb   fixed_segmentsrc   optional_segmentsgroup_segmentsenumeration_segmentsenumeration_valueestimated_queriesr   )r   r_   r   optimizerA   r   r   r   r   rb   valuern   )r,   ra   rb   r}   analysisr   s        @r-   analyze_patternzRefineEngine.analyze_pattern  s     ;;$$W->>**84	 H6xN!9(DT!U!9(O!T6xN$'(9(9$:!)!)!1!1

 r/   recursion_depthc                    t        |      }|sy	 | j                  j                  |      }| j                  j	                  |||      S # t
        $ r5}t        j                  d| d|        ddt        |       fcY d}~S d}~ww xY w)z<Check if a query can be split safely without infinite loops.)FzNo regex pattern found in queryz Splittability check failed for 'r^   FzAnalysis failed: N)	r	   r   r_   r!   rv   r`   r3   r5   str)r,   r:   r   ra   rb   re   s         r-   can_split_safelyzRefineEngine.can_split_safely8  s     /u5;	7{{((1H%%//?SS 	7NN=gYc!MN-c!fX666	7s   7A 	B*B;BB	separatorc                 ,   |sy|t         vrd}d| d}|j                  |      }g }|D ]S  }|j                         }|s||k(  r|j                  d      r#|j	                  d      r|j                  |       Qt        j                  d|      r|j                  |       y|j                  d      xr |j	                  d      }|r|dd }	 | j                  j                  |      }	g }
| j                  |	|
       g }|
D ]  }|r"|j                  d	d      j                  d
d      }||k(  st        |      dk  r;|j                  d      r|j	                  d      st        j                  d|      sd| d}|j                  |        |j                  |       w	 | j                  j                  |      }	g }
| j                  |	|
       g }|
D ]e  }||k(  st        |      dk  r|j                  d      r|j	                  d      st        j                  d|      sd| d}|j                  |       g |r|j                  |       n-t        j                  d|      sd| d}|j                  |       V |syt        |      dk(  r|d   S |j#                  |      S # t        $ r&}t        j!                  d| d|        Y d}~d}~ww xY w# t        $ r1 t        j                  d|      sd| d}|j                  |       Y w xY w)a`  
        Clean regex query by extracting fixed strings from regex patterns.

        Args:
            query: Input query string containing regex patterns
            separator: Separator to use ("AND", "OR", "NOT", "AND NOT"), defaults to "AND"

        Returns:
            str: Cleaned query with fixed strings extracted from regex patterns
        r   AND "z^[a-zA-Z]+:.*\S.*$r<   r   r=   \/\\\   z^[a-zA-Z]+:[\"\'"].*[\"\'"]$zFailed to parse regex pattern 'r^   Nr   )r   splitr   rI   rJ   rW   rK   r   r   r_   _extract_fixed_stringsrz   rA   rX   r`   r3   r5   join)r,   r:   r   	delimiterpartsrq   partis_regexra   rb   fixed	processedr   re   s                 r-   clean_regexzRefineEngine.clean_regexF  s.     --I 	{!$	 I& `	)D::<D y  s#c(:t$ xx-t4t$ s+Bc0BHq*##{{009H E//%@ !#I % /##'<<s#;#C#CFD#QD  9,D	A$ !% 4s9KUWU]U];TV &'tfA;D!((.#/( NN9- )#{{006HE//%@ "I % /9,D	A$ !% 4s9KUWU]U];TV &'tfA;D!((./ !y1  "xx(=tD%&tfA;Dt,u`	)F \Q1:>>'**] ! NN%DWISQRPS#TUD ! )88$94@!"4&{NN4(	)s,   CJ'!CK'	K0KK6LLrb   r   c                 6   g }d}|D ]V  }t        |t              rI|j                  s!|j                  dk(  rdndk(  rddv r|sC|j                  |       d}W|z  }]t        |t              r|r|j                  |       d}t        |j                        dk(  rat        |j                  d   t              rD|j                  d   j                  d	v rt        fd
dD              s|j                         | j                  |j                  |       t        |t              r|r|j                  |       d}@|sD|j                  |       d}Y |r|j                  |       |j                  |       |r|j                  |       yy)z
        Recursively extract fixed strings from segments.

        Args:
            segments: List of segments to process
            fixed: List to append found fixed strings to
        r   r   r<   r   r   ).*+?{}^$|(ru   []r   r   r   c              3   &   K   | ]  }|v  
 y wrU    ).0charrY   s     r-   	<genexpr>z6RefineEngine._extract_fixed_strings.<locals>.<genexpr>  s     1\d$'/1\s   z
[]{}*+?()\N)
rV   r   rY   rW   r   rA   anyr   r   rX   )r,   rb   r   mergedcurrentr\   rY   s         @r-   r   z#RefineEngine._extract_fixed_strings  sy     1	!C#|,;;!kkG%'"% F*"& "cc""MM'2&(G  7*C.MM'* G s{{#q(ZA-U!kk!n44Gg~c1\m1\.\  g. //VDC1MM'* G MM'* Gc1	!h MM'" 	VMM'" r/   )NNNNNrU   )r0   N)r   )r   )__name__
__module____qualname____doc__r   r   r   r   r   r   r.   classmethodr7   r9   r   rR   boolrf   intr   rs   rj   rl   r
   dictr   tupler   r   r   r   r/   r-   r   r   #   s   6 04)-59/3:>
+,
 &
 12	

 O,
   67
4 
(+="> 
. 
 
 B B%C %HSM %N  B c  s  tCy  DCS Cc Cd3i CJ3 49 , EDU&Vbkls t  m:7c 7C 7dTWiHX 7B+ B+ B+ B+HG#t G#DI G#$ G#r/   )*r   rK   	threadingtypingr   r   constant.searchr   r   r   tools.loggerr   tools.patternsr	   tools.utilsr
   r   r   r    r   r   r   r   r   segmentr   r   r   r   r!   r   typesr   r   r   r   r3   r   __annotations__Lockr2   r   r   r/   r-   <module>r      sx    
  ! M M # 7 ) & % +  R R 0  
H	&*	8N# *	n# n#r/   