
    :jjd                         d Z ddlZddlZddlZddlmZmZmZ ddlm	Z	 ddl
mZmZmZmZmZmZ ddlmZmZmZmZ ddlmZmZ  e	d	      Z G d
 de      Zy)z4
Enumeration strategy optimizer for regex patterns.
    N)DictListTuple)
get_logger   )CharClassSegmentEnumerationStrategyFixedSegmentGroupSegmentOptionalSegmentSegment)AggressiveStrategyBalancedStrategyConservativeStrategyGreedyStrategy)IEnumerationOptimizerIOptimizationStrategyrefinec                      e Zd ZdZd)dedefdZdeddfdZdeddfd	Z	d
e
e   defdZd
e
e   dedeeef   fdZd
e
e   de
e   fdZde
e   de
e   dedefdZde
e   dedefdZd
e
e   de
e
e      fdZde
e
e      de
e
e      fdZde
e   deeef   defdZdedefdZdedefdZde
e   defdZd
e
e   defdZdede
e   defd Zd!e
e   de
e   defd"Zdedefd#Z dedefd$Z!d%edefd&Z"d'ede#eef   fd(Z$y)*EnumerationOptimizerzLOptimize enumeration strategy for regex patterns using pluggable strategies.Nmax_queriesstrategyc                     || _         |xs t        |      | _        t        |      t        |      t	        |      t        |      d| _        y )N)greedybalancedconservative
aggressive)r   r   r   r   r   r   
strategies)selfr   r   s      7/root/.openclaw/workspace/harvester/refine/optimizer.py__init__zEnumerationOptimizer.__init__$   sG    & A$4[$A %[1(50=,[9	
    strategy_namereturnc                     || j                   v r.| j                   |   | _        t        j                  d| d       yt        j	                  d| d       y)z"Set optimization strategy by name.zSwitched to z optimization strategyzUnknown strategy: z, keeping current strategyN)r   r   loggerinfowarning)r   r#   s     r    set_strategyz!EnumerationOptimizer.set_strategy0   sK    DOO+ OOM:DMKK,}o5KLMNN/>XYZr"   c                 j    || _         t        j                  d|j                  j                          y)z!Set custom optimization strategy.z"Set custom optimization strategy: N)r   r&   r'   	__class____name__)r   r   s     r    set_custom_strategyz(EnumerationOptimizer.set_custom_strategy8   s*     89K9K9T9T8UVWr"   segmentsc                    |st        g |dd      S 	 | j                  |      }d}d}|D ]1  }| j                  |      }|j                  |kD  s$|}|j                  }3 ||j                  dk(  rfd |      }|r|D ]  }| j	                  ||      |_         t        |d d      }	|	d   }
| j                  |
      }|dkD  rt        |
j                        |z  nd}t        |
g||
j                  |      }t        j                  d	|j                  d
d|j                          |xs t        g |dd      S # t        $ r0}t        j                  d|        t        g |dd      cY d}~S d}~ww xY w)z"Find optimal enumeration strategy.        r   Nc                     g }| D ]\  }t        |t              r|j                  |       %t        |t        t        f      s<|j                   |j                               ^ |S N
isinstancer   appendr   r   extendcontentsegsvarsseg	find_varss      r    r<   z0EnumerationOptimizer.optimize.<locals>.find_varsS   s[    D# @%c+;< KK,'lO-LM KK	#++(>?	@
  Kr"   c                     | j                   S r2   )valuess    r    <lambda>z/EnumerationOptimizer.optimize.<locals>.<lambda>c   s
    QWW r"   T)keyreverser   z-Fallback strategy selected: 1 segment, value=.3f
, queries=zOptimization failed: )r	   _expand_variants_optimize_variantr>   _calculate_valuesorted _calculate_segment_optimal_depthlencharsetr&   r'   queries	Exceptionr(   )r   r.   variantsbest_strategy
best_valuevariantr   r:   segmentsorted_segsbest_segmentoptimal_depthrM   er<   s                 @r    optimizezEnumerationOptimizer.optimize=   s   &r8S!<<1	=,,X6H MJ $ 011':>>J.$,M!)J	0 $(;(;s(B  !*#' Q(,(=(=gx(PQ #)3Dd"SK#.q>L %)$I$I,$WMLY\]L]c,"6"67=HcdG$7R^RdRdfm$nMKKGH[H[\_G``jkx  lA  lA  kB  C !M$7Hc1$MM 	=NN21#67&r8S!<<	=s$   :D; C,D; ;	E4%E/)E4/E4
partitionsc                    |r|dk  rt        g |dd      dfS 	 | j                  |      }g }g }|D ]L  }| j                  |      }|j                  |       |D ]#  }|j                  |k\  s|j                  |       % N |rS|D 	cg c]  }	t        |	j                        dk(  s|	 }
}	|
r| j                  |
|      }n| j                  ||      }|dfS |rK|D 	cg c]  }	t        |	j                        dk(  s|	 }
}	|
rt        |
d       }nt        |d       }|dfS t        g |dd      dfS c c}	w c c}	w # t        $ r2}t        j                  d	|        t        g |dd      dfcY d
}~S d
}~ww xY w)z
        Evaluate strategies to find one that generates >= partitions queries.

        Returns:
            tuple: (best_strategy, found_suitable_strategy)
        r   r0   r   FTc                     | j                   S r2   rM   r?   s    r    rA   zIEnumerationOptimizer.evaluate_strategies_for_partitions.<locals>.<lambda>   s
    QYY r"   rB   c                     | j                   S r2   r\   r?   s    r    rA   zIEnumerationOptimizer.evaluate_strategies_for_partitions.<locals>.<lambda>   s
     r"   zStrategy evaluation failed: N)r	   rF   _generate_all_strategiesr6   rM   r5   rK   r.   _select_strategy_with_min_depthmaxrN   r&   r(   )r   r.   rY   rO   suitable
all_stratsrR   r   r   r@   singlebestrW   s                r    "evaluate_strategies_for_partitionsz7EnumerationOptimizer.evaluate_strategies_for_partitionsu   s    :?&r8S!<eCC)	D,,X6HHJ $ 2!::7C
!!*- !+ 2H'':5 122 %-FQZZA1E!FF??
SD??*UDTz! %/H3qzz?a3G!HHv+>?Dz/BCDU{"&r8S!<eCC# G I  	DNN9!=>&r8S!<eCC	DsZ   AD; *D; D1#D1',D; D; D68D6<$D; !D; 1
D; ;	E6'E1+E61E6c           	        	 	fd	 	|      }|st        g |dd      gS |D ]  }| j                  ||      |_         g }t        dt	        dt        |      dz               D ]k  }t        j                  ||      D ]P  }t        dd      D ]?  }| j                  t        |      ||      }|j                  dkD  s/|j                  |       A R m |S )z@Generate all possible enumeration strategies for given segments.c                     g }| D ]\  }t        |t              r|j                  |       %t        |t        t        f      s<|j                   |j                               ^ |S r2   r3   r8   s      r    r<   z@EnumerationOptimizer._generate_all_strategies.<locals>.find_vars   Y    D 8c#34KK$lO%DEKK	#++ 67	8
 Kr"   r0   r         r   )r	   rH   r>   rangeminrK   	itertoolscombinations_create_strategy_with_depthlistrM   r5   )
r   r.   r:   rS   r   
combo_sizecombodepthr   r<   s
            @r    r_   z-EnumerationOptimizer._generate_all_strategies   s    	 "'Hc1=>>  	EG 11'8DGM	E 
  3q#d)a-#89 	4J"//jA 4"1a[ 4E#??UXW\]H''!+"))(344	4 r"   segments_to_enumall_segmentsrt   c                    d}d}|D ]q  }t        |j                        }|dkD  rAt        ||j                        }|j                  dk(  r|}	n||z  }	||	z  }||j                  z  }|| j
                  dz  kD  sq n t        ||||      S )z0Create enumeration strategy with specific depth.r   r0   r   d   )rK   rL   rm   
max_lengthr>   r   r	   )
r   ru   rv   rt   total_queriestotal_valuerS   charset_sizeeffective_depthsegment_queriess
             r    rp   z0EnumerationOptimizer._create_strategy_with_depth   s     ' 	Gw/La #&eW-?-?"@ %%*&2O '3O&CO0w}}, t//#55)	, ##3\;P]^^r"   r   c                 0   dt         dt        fddt         dt        dt        fddt         dt        fddt         dt        ffd}|D cg c]  }|j                  k\  s| }}|rt        ||      S |r|d	   S t        g g d
d      S c c}w )zPSelect strategy with minimum enumeration depth that meets partition requirement.r   r$   c                    | j                   syd}| j                   D ]  }t        |j                        }|dkD  s| j                  dkD  s.| j                  dt        | j                         z  z  }|dkD  sYt	        j
                  |      t	        j
                  |      z  }||z  } | j                   r|t        | j                         z  S dS )Nr0   r         ?)r.   rK   rL   rM   mathlog)r   total_depthrS   r|   segment_contributionrt   s         r    calculate_effective_depthzWEnumerationOptimizer._select_strategy_with_min_depth.<locals>.calculate_effective_depth   s    $$K#,, -"7??3!#(8(81(<+3+;+;c(J[J[F\@\+](+a/ $)= >,AW W#u,- <D;L;L;X%6%6!77URUUr"   rt   c                 &   | j                   rt        | j                         dk7  ry| j                   d   }| j                  }|j                  }d}t	        |dz
  dd      D ]F  }t        ||   t              r|t        ||   j                        z  }2t        ||   t              rF n d}|j                  |z
  }|dk  rMt	        |dz   t        |            D ]2  }t        ||   t              r|t        ||   j                        z  }2 n ||z   |z   S )zRCalculate the length of fixed context that can be formed with enumeration segment.r   r   )
r.   rK   originalpositionrl   r4   r
   r7   r   
min_length)	r   rt   rS   r.   posbeforeiafter	remainings	            r    calc_context_lengthzQEnumerationOptimizer._select_strategy_with_min_depth.<locals>.calc_context_length  s   $$H,=,=(>!(C''*G((H""C F37B+ hqk<8c(1+"5"566F_= E**U2IA~sQwH6 A!(1+|<Xa[%8%8!99	 E>E))r"   c                 T    | j                   syt        d | j                   D              S )Nr   c              3   4   K   | ]  }|j                     y wr2   )r   ).0r;   s     r    	<genexpr>zdEnumerationOptimizer._select_strategy_with_min_depth.<locals>.calc_segment_length.<locals>.<genexpr>*  s     C#s~~Cs   )r.   sum)r   s    r    calc_segment_lengthzQEnumerationOptimizer._select_strategy_with_min_depth.<locals>.calc_segment_length'  s%    $$C1B1BCCCr"   c                 L    |       } |       }| j                   z
  } 
| t        dt        t        j                  |                        }d}| j
                  r!| j
                  D ]  }|t        |dd      z  } d}| j
                  r| j
                  d   j                  }t        |dz
  dd      D ]b  }	t        | j                  |	   t              r$t        | j                  |	   j                        } n!t        | j                  |	   t              rb n | || | ||fS )Nr   r0   r>   r   r   )rM   ra   intr   ceilr.   getattrr   rl   r4   r   r
   rK   r7   r   )r   rt   lengthexcesscontextsegment_valuerS   r   r   r   r   r   r   rY   s             r    
calc_scorezHEnumerationOptimizer._select_strategy_with_min_depth.<locals>.calc_score-  s-   -h7E(2F%%
2F)(C3tyy?O;P4QRG  M  '00 DG!WWgs%CCMD F  ''*33sQwB/ A!("3"3A"6E!$X%6%6q%9%A%A!B#H$5$5a$8/J  #NFWHvgufMMr"   r]   r   r0   r   )r	   floatr   tuplerM   rm   )	r   r   rY   r   r@   rb   r   r   r   s	     `   @@@r    r`   z4EnumerationOptimizer._select_strategy_with_min_depth   s    	V0C 	V 	V"	**= 	*c 	*c 	*B	D*= 	D# 	D	N!4 	N 	N 	N8  *E!QYY*-DAEExZ00 *z!}S0CBCQR0SS	 Fs   B)Bc           	         g g}|D ]  }g }|D ]  }t        |t              r>|j                  |j                                |j                  ||j                  z          Qt        |t
              r%|j                         }|j                  ||z          |j                  ||gz           |}t        |      dkD  s| j                  |      }t        j                  dt        |       dt        |       d        |S )z4Expand all possible variants from optional segments.  z
Optimized z variants to z most valuable ones)r4   r   r5   copyr7   r   flattenrK   _optimize_variantsr&   r'   )r   r.   rO   rS   new_variantsrR   	flatteneds          r    rF   z%EnumerationOptimizer._expand_variantsO  s    4 	mGL# =g7 ''7 '''//(AB6 ' 1I '')(;< !''7)(;<= $H 8}t#228<j\):(;=XWjkl-	m0 r"   rO   c           	      2    t        |      dk  r|S t        dt        dt        |      dz              }i dt        t           dt
        f fd}t        j                  |||      }t        j                  dt        |       d	t        |       d
       |S )z6Optimize variants by selecting the most valuable ones.r   rx      rR   r$   c                 (    j                  |       S r2   )_score_variant_cached)rR   segment_score_cacher   s    r    score_variantz>EnumerationOptimizer._optimize_variants.<locals>.score_varianty  s    --g7JKKr"   r]   z	Selected z highest-value variants from z using optimized selection)
rK   rm   ra   r   r   r   heapqnlargestr&   r'   )r   rO   target_sizer   
top_scoredr   s   `    @r    r   z'EnumerationOptimizer._optimize_variantsm  s    x=D O $CX!); <= 13	L4= 	LU 	L ^^K}M
iJ00McRZm_\vwxr"   rR   cachec                     d}|D ]8  }| j                  |      }||v r||   }n| j                  |      }|||<   ||z  }: |S )zICalculate variant score with segment-level caching for better performancer0   )_get_segment_cache_key_score_segment)r   rR   r   total_scorerS   	cache_keysegment_scores          r    r   z*EnumerationOptimizer._score_variant_cached  sd     
	)G33G<IE! %i 0 $ 3 3G <#0i =(K
	) r"   rS   c                 .   t        |t        t        t        f      r$t	        |      j
                   d|j                   S t        |t              r$t	        |      j
                   d|j                   S t	        |      j
                   dt        |       S )z&Generate cache key for segment scoring:)
r4   r
   r   r   typer,   r7   r   r>   strr   rS   s     r    r   z+EnumerationOptimizer._get_segment_cache_key  s    glOLM7m,,-Qw.?@@!127m,,-Qw}}o>>7m,,-Qs7|n==r"   c                     t        |t              rt        |j                        dz  S t        |t              ryt        |t
              ryt        |t              ryy)z&Calculate score for individual segment       @r   g      ?      ?)r4   r
   rK   r7   r   r   r   r   s     r    r   z#EnumerationOptimizer._score_segment  sU     g|,w'#-- g/0 g/ g|, r"   c                 @   d}d}|D ],  }t        |t              r|t        |j                        z  }, n ||dz  z  }|D ][  }t        |t              s|j
                  dkD  r||j
                  z  }3t        |j                        }|dkD  sN|dk  sT|d|z  z  }] |S )zACalculate score for a variant based on its enumeration potential.r0   r   r   rx   r   )r4   r
   rK   r7   r   r>   rL   )r   rR   scorefixed_lengthrS   r|   s         r    _score_variantz#EnumerationOptimizer._score_variant  s      	G'<0GOO 44		 	!!  	4G'#34==1$W]]*E $'w#7L#a'LC,?|!33	4 r"   c                     fd |      }|st        g |dd      S |D ]  }| j                  ||      |_         | j                  ||      S )z(Optimize enumeration for single variant.c                     g }| D ]\  }t        |t              r|j                  |       %t        |t        t        f      s<|j                   |j                               ^ |S r2   r3   r8   s      r    r<   z9EnumerationOptimizer._optimize_variant.<locals>.find_vars  ri   r"   r0   r   )r	   rH   r>   _select_combination)r   r.   r:   rS   r<   s       @r    rG   z&EnumerationOptimizer._optimize_variant  sd    	 "&r8S!<<  	EG 11'8DGM	E ''h77r"   c                    	 |j                   }d}t        |j                  dz   t        |            D ]'  }t	        ||   t
              s|t        ||         z  }) |j                  }t        j                  t        d|dz               }t        j                  t        d|dz               dz  }|dkD  rt        j                  |      }	nd}	||z   t        d|	      z  }
| j                  |      }|
|z  }t        j                  d| d| d| d	|d
d|d
       |S # t        $ r"}t        j                  d|        Y d}~yd}~ww xY w)z(Calculate enumeration value for segment.r   r   333333?r   g?zSegment value: prefix=z	, suffix=z, combinations=z, priority=z.2fz, value=rD   zValue calculation failed: Nr0   )prefix_lengthrl   r   rK   r4   r
   ro   r   r   ra   _calculate_priority_factorr&   debugrN   r(   )r   rS   rv   r   suffix_lengthr   ro   prefix_weightsuffix_weightcost_weight
base_valuepriority_factorfinal_valuerW   s                 r    rH   z%EnumerationOptimizer._calculate_value  sd   &	#11M M7++a/\1BC :l1o|<!Sa%99M:
 #//L !HHSMA,=%>?M HHSMA,=%>?#EM a"hh|4!'-73sK;PPJ #==gFO$6KLL(y P  ,~[8MXVabeUfh
  	NN7s;<	s   AD 	CD 	E%EEr:   c           
      b   |st        g |dd      S | j                  j                  |      }|st        g |dd      S | j                  j                  |      \  }}t        j                  d| j                  j                  j                   dt        |       d|dd|        t        ||||      S )z;Select best enumeration combination using current strategy.r0   r   zSelected strategy (z): z segments, value=rD   rE   )	r	   r   select_segmentsevaluate_combinationr&   r'   r+   r,   rK   )r   r:   rv   selected_segmentsrz   r{   s         r    r   z(EnumerationOptimizer._select_combination  s    &r<a@@ !MM99$? &r<a@@ &*]]%G%GHY%Z"{!$--"9"9"B"B!C3sK\G]F^ _ %Z@	

 ##4lKQ^__r"   c                 \   d}|j                         r|dz  }nj|j                         r0|j                  dk\  r|dz  }nE|j                  dk\  r|dz  }n0|dz  }n*|j                         r|j                  dk\  r|dz  }n|d	z  }|j                  dk\  r|d	z  }n|j                  dk\  r|d
z  }|j	                         r|dz  }n|dz  }t        |j                        }d|cxk  rdk  r
n n|dz  }|S d|cxk  rdk  r
n n|d	z  }|S d|cxk  rdk  r
n n|d
z  }|S |dk  r|dz  }|S |dz  }|S )z?Calculate priority multiplier based on segment characteristics.r   g      @   g      @   g      @g      @r   r   g333333?g?2   F      
   g?r   )	has_rangeis_specificr   has_minis_positive_classrK   rL   )r   rS   factorr|   s       r    r   z/EnumerationOptimizer._calculate_priority_factor%  sr    cMF  "!!R'###q(##__!!Q&## #cMF1$cMF $$&cMF cMF 7??+##cMF  <$"$cMF  <$"$cMF  BcMF  cMFr"   c                 8    | j                   j                  |      S )z;Calculate optimal enumeration depth using current strategy.)r   calculate_depthr   s     r    rJ   z5EnumerationOptimizer._calculate_segment_optimal_depthX  s    }},,W55r"   query_countc                 b    |dk  ry|| j                   k  xs || j                   dz  k  xr |dk  S )z7Check if a strategy with given query count is feasible.r   Fr   iP  )r   )r   r   s     r    _is_strategy_feasiblez*EnumerationOptimizer._is_strategy_feasible\  sF     ! d... 
4++b00I[E5I	
r"   rs   c                 J    | j                   j                  t        |            S )z:Evaluate a combination of segments using current strategy.)r   r   rq   )r   rs   s     r    _evaluate_combinationz*EnumerationOptimizer._evaluate_combinationh  s    }}11$u+>>r"   )i N)%r,   
__module____qualname____doc__r   r   r!   r   r)   r-   r   r   r	   rX   r   boolrf   r_   r   rp   r`   rF   r   r   r   r   r   r   r   rG   rH   r   r   rJ   r   r   r    r"   r    r   r   !   ss   V

C 

?T 

[# [$ [X,A Xd X
6=g 6=3F 6=p5DW5D365D	"D(	)5Dn"g "4H[C\ "H_ $%5 6_FJ7m_\__	_>^T23^TAD^T	^T@g 4W;N <4W+> 4WCV *T'] 4U
CS X] $>g ># >g % *d7m  68$w- 8<O 82((8 (W (Z_ (T`-=(> `dSZm ``s `*12B 1u 1f68H 6S 6

 

 

?5 ?U3:5F ?r"   r   )r   r   rn   r   typingr   r   r   tools.loggerr   rS   r   r	   r
   r   r   r   r   r   r   r   r   typesr   r   r&   r   r   r"   r    <module>r      sQ       $ $ #   @	H	I	?0 I	?r"   