
    :jcs                        U d Z ddlZddlZddlZddlZddlZddlZddlZddlZ	ddl
mZmZmZmZmZmZ ddlZddlmZ ddlmZ ddlmZmZmZ  ed      Z ej6                         adt8        _        d	adJd
edej6                  fdZ dejB                  jD                  de#fdZ$dejB                  jD                  defdZ%d
ee   ddfdZ&dKdedede'dedejP                  f
dZ)ddl*m+Z+m,Z, ddl-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7m8Z8 ddl9m:Z:m;Z; ddlm<Z< ddl=m>Z> ddl?m@Z@ ddlAmBZB ddlCmDZD ddlEmFZF ddlmGZG  G d  d!      ZHdaIeeH   eJd"<   d#eee<f   ddfd$ZKdeHfd%ZLdeeeee'f   f   fd&ZMdLd'ZNeF	 	 	 	 	 dMded(ee   d)ee   d*e#d+e'de'defd,       ZO	 dNded(ed-ed)ee   d*e#de#dee#ef   fd.ZPd/ed0ed1e#defd2ZQd3e+fd/ed4ed1e#d5e#dee   f
d6ZR	 	 dOd/ed0ed1e#d7eeee   egdf      deee   e#ef   f
d8ZSd3e+fd/ed4ed1e#d5e#deee   e#ef   f
d9ZT	 dPd/ed0ed1e#d:eUd5e#d7eeee   egdf      deee   e#ef   fd;ZV eGdd<      d/ed4ede#fd=       ZWdPd/ed0ed>ee   de#fd?ZXd>ed/ede#fd@ZY	 dPd/ed0ed1e#d:eUd5e#d7eeee   egdf      deee   ef   fdAZZ eGg d<      	 	 	 	 	 	 dQdBeded*e#dCedDedEedFee   dee   fdG       Z[ eGg d<      dFedHedee   fdI       Z\y)RzS
HTTP client utilities and GitHub-specific search functions for the search engine.
    N)AnyCallableDictListOptionalTuple)Service)
get_logger)encoding_urlisblanktrimsearchF proxyreturnc                 |    t        j                         }d|_        | r|j                  j	                  | | d       |S )NF)httphttps)requestsSession	trust_envproxiesupdate)r   sessions     4/root/.openclaw/workspace/harvester/search/client.py_new_sessionr      s7     GG>?N    errorc                 <    t        | dd       }||j                  S dS )Nresponser   )getattrstatus_code)r   r    s     r   http_error_statusr#   &   s&    uj$/H#+#78>Q>r   c                    t        | dd       }|t        |       S 	 |j                  j                  d      j	                  d      }t        |      dkD  r|d d }|xs |j                  xs t        |       S # t        $ r d}Y *w xY w)Nr    
i,  r   )r!   strtextremoveprefixremovesuffixlen	Exceptionreason)r   r    messages      r   http_error_messager.   +   s    uj$/H5z--,,T2??Ew<#dsmG 3hoo3U3  s   =A6 6BBc           	          t        |       } | s"dat               at        j                  d       yt        j                  j                  |       }|j                  j                         }|dvrt        d      |j                  st        d      	 |j                   t        |       a| at        j                  d| d	|j                   d
|j                  xs d        y# t        $ r}t        d|       |d}~ww xY w)zIConfigure the process-wide requests session used by search HTTP requests.r   zHTTP proxy disabledN>   r   r   socks5z0proxy scheme must be one of: http, https, socks5zproxy must include a hostzinvalid proxy port: zHTTP proxy enabled: z://:)r   _HTTP_PROXYr   _HTTP_SESSIONloggerinfourllibparseurlparseschemelower
ValueErrorhostnameport)r   parsedr9   es       r   	set_proxyr@   :   s     KE$)*\\""5)F]]  "F00KLL??455< !'MK
KK&vhc&//1B!FKKDUSUCVWX  </s34!;<s   C   	C=)C88C=methodurltimeoutkwargsc           	      l    t        j                  d| |t        d|      d|}|j                          |S )z5Send a request through the configured global session.   )rA   rB   rC    )r3   requestmaxraise_for_status)rA   rB   rC   rD   r    s        r   rH   rH   W   s6    $$_FSG__X^_HOr   )API_RESULTS_PER_PAGEWEB_RESULTS_PER_PAGE)CHAT_RETRY_INTERVALCOLLECT_RETRY_INTERVALDEFAULT_HEADERSDEFAULT_QUESTIONGITHUB_API_INTERVALGITHUB_API_RATE_LIMIT_BACKOFFGITHUB_API_TIMEOUTGITHUB_WEB_COUNT_DELAY_MAXNO_RETRY_ERROR_CODESSERVICE_TYPE_GITHUB_APISERVICE_TYPE_GITHUB_WEB)NetworkErrorValidationError)RateLimitConfig)IAuthProvider)get_user_agent)RateLimiter)managed_network)network_retry)handle_exceptionsc                       e Zd ZdZddee   dee   fdZdefdZ	dedee   fd	Z
d
edefdZd
ededdfdZd
edededdfdZ	 	 	 	 	 ddedee   dee   dedededefdZy)GitHubClientzGGitHub-specific HTTP client with rate limiting and dependency injectionNlimiterresource_providerc                      || _         || _        y)zInitialize GitHub client

        Args:
            limiter: Rate limiter for request throttling
            resource_provider: Resource provider for credentials and user agents
        N)rc   rd   )selfrc   rd   s      r   __init__zGitHubClient.__init__y   s     !2r   r   c                 b    | j                   r| j                   j                         S t               S )zzGet User-Agent string using dependency injection or fallback

        Returns:
            str: User-Agent string
        )rd   r\   )rf   s    r   _get_user_agentzGitHubClient._get_user_agent   s-     !!))88:: "##r   rB   c                 R    |sy|j                         }d|v rt        S d|v rt        S y)zDetect service type from URLNzapi.github.comz
github.com)r:   rV   rW   )rf   rB   	url_lowers      r   _servicezGitHubClient._service   s2    IIK	y(**Y&**r   servicec                    | j                   r|sy| j                   j                  |      ry| j                   j                  |      }|dkD  rz| j                   j                  |      }|r|j                  nd}t
        j                  d| d|dd|        t        j                  |       | j                   j                  |      S y)	z7Apply rate limiting, return True if request can proceedTr   unknownzRate limit hit for z
, waiting .2fzs, max: F)	rc   acquire	wait_time_get_bucketburstr4   r5   timesleep)rf   rm   waitbucket	max_values        r   _limitzGitHubClient._limit   s    ||7 <<( ||%%g.!8\\--g6F(.IIKK-gYjc
(S\R]^_JJt<<''00r   successc                 \    | j                   r |r| j                   j                  ||       yyy)z-Report request result for adaptive adjustmentN)rc   report_result)rf   rm   r{   s      r   _reportzGitHubClient._report   s&    <<GLL&&w8 $<r   statusr-   c                     |dk(  rL|t         k(  rBd|j                         v r/t        j                  d       t	        j
                  t               yyyy)zHandle GitHub-specific errors  z
rate limitz+GitHub API rate limit exceeded, backing offN)rV   r:   r4   r5   ru   rv   rR   )rf   rm   r   r-   s       r   _handle_errorzGitHubClient._handle_error   sF    S=W(??w}}.IJ

89 / @=r   headersparamsretriesintervalrC   c                     | j                  |      }|r*| j                  |      st        j                  d|        yt	        ||||||      }t        |      }	| j                  ||	       |S )z,Make rate-limited HTTP GET request to GitHubz"Rate limit acquisition failed for r   )rl   rz   r4   debughttp_getboolr~   )
rf   rB   r   r   r   r   rC   rm   resultr{   s
             r   getzGitHubClient.get   sm     --$ 4;;w/LL=gYGH #w7Kv, 	Wg&r   )NN)NN   r   
   )__name__
__module____qualname____doc__r   r]   r[   rg   r&   ri   rl   r   rz   r~   intr   r   floatr   rG   r   r   rb   rb   v   s    Q3 5 3QYZgQh 3
$ 
$C HSM c d (9s 9T 9d 9
:S :# : : : #'!% $ 	
    
r   rb   _github_clientlimitsc                 Z    t        |       }t        |      at        j	                  d       y)z*Initialize GitHub client with rate limiterz,GitHub client initialized with rate limitingN)r]   rb   r   r4   r5   )r   rc   s     r   init_github_clientr      s%     &!G!'*N
KK>?r   c                  .    t         s
t               S t         S )zGet GitHub client instance)r   rb   rG   r   r   get_github_clientr      s    ~r   c            
      v   t         rt         j                  rt         j                  j                         } i }| j                  j	                         D ]c  \  }}|j
                  |j                  |j                  |j                  |j                  |j                  |j                  |j                  d||<   e |S i S )zGet rate limiter statistics)ratert   tokensutilizationconsecutive_successconsecutive_failuresadaptiveoriginal_rate)r   rc   	get_statsservicesitemsr   rt   r   r   r   r   r   r   )statsr   rm   bucket_statss       r   get_github_statsr      s    .00&&002%*^^%9%9%; 
	!G\$))%++&--+77'3'G'G(4(I(I(11!-!;!;	F7O
	 Ir   c                  P   t         rt         j                  syt         j                  j                         } | j                  j	                         D ]T  \  }}t
        j                  d| d|j                  dd|j                  dd|j                   d|j                  d	
       V y)
z#Log current rate limiter statisticsNzRate limiter [z]: rate=rp   z/s, tokens=z.1f/z, utilization=z.1%)r   rc   r   r   r   r4   r5   r   r   rt   r   )r   rm   r   s      r   log_github_statsr      s    !7!7"",,.E!&!5!5!7 
WIXl.?.?-D E"))#.a0B0B/C D'33C8:	

r   r   r   r   r   c                    t        |       rt        dd      |xs t        j                         }t	        d|      }	 t        |       }|r@t        |t              r0t        j                  j                  |      }d|v rdnd}|| | z  }t        t        d|||      d	      5 }	|	j                  }
|	j                  }|d
k7  rt        d| d|        	 |
j!                  d      cddd       S # t"        $ rI 	 t%        j&                  |
      j!                  d      cY cddd       S # t(        $ r t        d      w xY ww xY w# 1 sw Y   yxY w# t*        j,                  j.                  $ r}t1        |      }t3        |      }|dk(  rt5        d| d      |dk(  rt7        d| d|        |dv rt        d| d      |dk\  rt5        d| d|       t        d| d|       d}~wt*        j,                  j8                  $ r}t;        d|       d}~wt*        j,                  j<                  $ r}t5        d|       d}~wt(        $ r<}dt?        |      jA                         v rt;        d|       t        d|       d}~ww xY w) a  HTTP GET request with configurable retry handling

    Args:
        url: URL to request
        headers: HTTP headers
        params: URL parameters
        retries: Number of retry attempts (default: 3, minimum: 1)
        interval: Initial delay between retries in seconds (default: 1.0, minimum: 0.1)
        timeout: Request timeout in seconds

    Returns:
        str: Response content

    Raises:
        ValidationError: For invalid input
        NetworkError: For network-related issues
        FileNotFoundError: For access resource not exists
        ConnectionError: For connection failures (will be retried)
        TimeoutError: For timeout errors (will be retried)

    Note:
        The @network_retry decorator automatically extracts retries and interval
        parameters to configure retry behavior dynamically. Retry logic uses
        exponential backoff with jitter for optimal performance.
    zURL cannot be emptyrB   )fieldrF   ?&GET)r   rC   http_connection   zHTTP z error for URL: zutf-8Nz!Failed to decode response contenti  zRate limit exceeded (HTTP )i  zFile not found (HTTP z), url: )  r   zAuthentication failed (HTTP i  zServer error (HTTP z): z error: zRequest timeout: zRequest error: rC   zUnexpected error: )!r   rY   rO   copyrI   r   
isinstancedictr6   r7   	urlencoder^   rH   contentr"   rX   decodeUnicodeDecodeErrorgzip
decompressr+   r   
exceptions	HTTPErrorr#   r.   ConnectionErrorFileNotFoundErrorTimeoutTimeoutErrorRequestExceptionr&   r:   )rB   r   r   r   r   rC   encoded_urldata	separatorr    r   r"   r?   coder,   s                  r   r   r     s   H s|35AA /--/G!WoG<9"3'j.<<))&1D"k1sIi[//KE;IK\
 	L&&G"..Kc!"U;-7Gu#MNNL~~g.	L 	L & LL??73::7CC	L 	L  ! L&'JKKLL	L 	L& (( ? ##A&3;!$>tfA"FGGS[#&;D6#$OPPZ!=dV1EFFS[!$7vS"IJJ tfHVH=>>&& 4.qc233// 5s344 9A&!21#677 !3A37889s   A&E $/EC.$	E .	E 8#D'E E	E 'D<<E  EEE E J
,A:G&& J
H J
4IJ
7JJ
modelc           	          ddt         dt        dt        ddf fd}t               t        |      c } st        j                  d       y	t        t              st        j                  d
       y	t              dk(  rdd<   |rt        |t              s&|st        j                  d       y	|ddt        dgd}t        j                  |      j                  d      }t        d|      }t        d|      }d\  }}	}
|
|k  r7	 t        d ||      5 }|j                  }|j                   }		 ddd       ||	fS ||	fS # 1 sw Y   nxY wn# t"        j$                  j&                  $ r}t)        |      }|dk7  r	 t+        |      }	|	j-                  d      r|	j/                  d      s-|j0                  |j0                  j2                  n
t        |      }	n# t4        $ r t        |      }	Y nw xY w |||	d       |t6        v rY d}~||	fS Y d}~n/d}~wt4        $ r   ||t9        j:                         d       Y nw xY w|
dz  }
t=        j>                  t@               |
|k  r_*)z'Make chat API request with retry logic.Fr   r-   r   r   Nc                 |    d d d|  d| }|rt         j                  |       y t         j                  |       y )Nz[chat] failed to request URL: z, headers: z, status code: z, message: )r4   r   r   )r   r-   r   r'   r   rB   s       r   outputzchat.<locals>.output}  sA    /uKyX\W]]hiphqrLLLLr   z[chat] url cannot be empty)  Nz[chat] headers must be a dictr   application/jsonzcontent-typez[chat] model cannot be emptyuser)roler   )r   streammessagesutf8rF   )r   Nr   POST)r   r   rC   r   {})r   r-   r   T)F)!r   r&   r   r   r4   r   r   r   r*   rP   jsondumpsencoderI   rH   r"   r'   r   r   r   r#   r.   
startswithendswithr    r,   r+   rU   	traceback
format_excru   rv   rM   )rB   r   r   r   r   rC   r   payloadr   r-   attemptr    r?   s   ``           r   chatr   x  s[   
S 3 t   cDKJC12gt$45	W	"4FD1LL78 "(5EFG
 jj ''/G!WoG!WoG)D'7
G
	J7GWU Ya++"--8 =4=9   "",, 	$Q'Ds{%03G #--c2':J:J3:O78zz7M!**"3"3SVWXSY  %!!fG% D'?++ = , 	Ji&:&:&<DI	J 	1

&'9 G
s[   E E1E E
E H=+H<AGHG.+H-G..H)H=<H=queryr   pagec                 $   |dk  st        |      st        |       ryd| d|  }ddt               d| d}t               }|j                  ||	      }t	        j
                  d
|t        j                        rt        j                  d       y|S )zMUse github web search instead of rest api due to it not support regex syntax.r   r   z#https://github.com/search?o=desc&p=z&type=code&q=z|text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9https://github.comuser_session=)AcceptReferer
User-AgentCookie)rB   r   u*   <title>Sign in to GitHub · GitHub</title>flagszO[GithubCrawl] Session has expired, please provide a valid session and try again)	r   r\   r   r   rer   Ir4   r   )r   r   r   rB   r   clientr   s          r   search_github_webr     s    qyGG$/v]5'
JC Q'$&!'+	G  FjjS'j2G	yy>rttTfgNr   rF   token	peer_pagec                 <   t        |      st        |       rg S t        t        |d      t              t        d|      }}d|  d| d| }dd| dd}t	               }|j                  ||t        t        	      }t        |      rg S 	 t        j                  |      j                  d
g       }t               }	|D ]F  }
|
rt        |
      t        k7  r|
j                  dd      }t        |      r6|	j                  |       H t        |	      S # t        $ r g cY S w xY w)zRate limit: 10RPM.rF   %https://api.github.com/search/code?q="&sort=indexed&order=desc&per_page=&page=application/vnd.github+jsonBearer 
2022-11-28r   AuthorizationzX-GitHub-Api-VersionrB   r   r   rC   r   html_urlr   r   minrI   rK   r   r   rQ   rS   r   loadssettyper   addlistr+   )r   r   r   r   rB   r   r   r   r   linksitemlinks               r   search_github_apir    s"   u~	#i+-ABC4LtI1%8Z[dZeeklpkq
rC/"5'* ,G  FjjS'<OYkjlGw	

7#''4% 	D4:-88J+Dt}IIdO	 E{ 	s   BD DDcallbackc                 L   |dk  st        |      st        |       rg ddfS t        | ||      }t        |      rg ddfS 	 d}t        j                  ||t        j                        }|rt        t        |            ng }t               }|D ]  }	|j                  d|	         t        |      }
|rt        |t              r|
r
	  ||
|       |dk(  rt        | ||      }nd}|
||fS # t        $ r g }
Y Ew xY w# t        $ r"}t        j                  d|        Y d}~Qd}~ww xY w)	z{
    Search GitHub web and return results, total count, and content.
    Returns: (results_list, total_count, content)
    r   r   &href="(/[^\s"]+/blob/(?:[^"]+)?)#L\d+"r   r   [search] callback failed: NrF   )r   r   r   findallr   r  r   r  r+   r   r   r4   r   estimate_web_total)r   r   r   r  r   regexgroupsurisr  uriresultsr?   totals                r   search_web_with_countr    s7    qyGG$1by  w5Gw1by9E7"$$7$*tCK  	2CII*3%01	2 u+
 Jx2w	;Wg&
 qy"5'7; E7""#    	;LL5aS9::	;s*   A0C' 	C8 'C54C58	D#DD#c                 |   t        |      st        |       rg ddfS t        t        |d      t              t        d|      }}d|  d| d| }dd| d	d
}t	               }|j                  ||t        t              }t        |      rg ddfS 	 t        j                  |      }|j                  dg       }	|j                  dd      }
t               }|	D ]F  }|rt        |      t        k7  r|j                  dd      }t        |      r6|j                  |       H t        |      |
|fS # t        $ r g ddfcY S w xY w)a  
    Search GitHub API and return results, total count, and raw content.

    Args:
        query: Search query string
        token: GitHub API token for authentication
        page: Page number to retrieve (default: 1)
        peer_page: Results per page (default: API_RESULTS_PER_PAGE)

    Returns:
        Tuple containing:
        - List[str]: List of GitHub URLs found
        - int: Total count of results available
        - str: Raw JSON response content
    r   r   rF   r   r   r   r   r   r   r   r   r   total_countr   r   )r   r   r   r   rB   r   r   r   r   r   r  r  r  r  s                 r   search_api_with_countr  *  sS   $ u~1by#i+-ABC4LtI1%8Z[dZeeklpkq
rC/"5'* ,G  FjjS'<OYkjlGw1byzz'""%* 	D4:-88J+Dt}IIdO	 E{E7** 1bys   BD* *D;:D;with_apic                 |    t         j                  j                  |       }|rt        ||||      S t	        ||||      S )z
    Unified search interface that returns results, total count, and content.
    Returns: (results_list, total_count, content)
    )r6   r7   
quote_plusr  r  )r   r   r   r  r   r  keywordss          r   search_with_countr  `  s=     ||&&u-H$XwiHH$XwhGGr   )default_result	log_levelc                     t        |      st        |       ryd|  d}dd| dd}t               }|j                  ||d	      }t        j                  |      }|j                  d
d      S )z,Get total number of results from GitHub API.r   r   z+&sort=indexed&order=desc&per_page=20&page=1r   r   r   r   rF   rB   r   r   r  )r   r   r   r   r   )r   r   rB   r   r   r   r   s          r   get_total_numr!  s  sw     u~1%8c
dC/"5'* ,G  FjjS'Aj>G::gD88M1%%r   r   c                     t        |      st               ry	 t        j                  j                         }	 |t         |d      }t        |      }|st        j                  d| d       t        S d v r't         fdd	D              r j                  d
d      }nt        j                  j                         }d| }t               ddd| ddd| d}t        j                  t!        j                          t"        z         t%               }|j'                  ||d      }|r|t)        j*                  |      }	|	j'                  dd      sU|	j'                  dd      }
|	j'                  dd      }t        j-                  d|
 d| d|        |
dkD  r|
S t/        |       S t/        |       S # t        $ r  }Y w xY w# t        $ r,}t        j1                  d| d| d       t        cY d}~S d}~ww xY w)z
    Get total count for web search using GitHub's blackbird_count API.
    Performs a single search and then queries the count API.
    r   NrF   r   r   r   z*[search] initial search failed for query: z, using conservative estimate%c              3   &   K   | ]  }|v  
 y wNrG   ).0cr   s     r   	<genexpr>z%estimate_web_total.<locals>.<genexpr>  s     XqU
Xs   )z%2Fz%5Bz%5Dz%7Bz%7D +z=https://github.com/search/blackbird_count?saved_searches=^&q=r   zgzip, deflate, brzhttps://github.com/search?q=z^&type=codeXMLHttpRequestr   )r   r   zAccept-Encodingr   zX-Requested-Withr   r   failedTcountmodero   z[search] got z results, mode: z	, query: z&[search] estimation failed for query: z	, error: )r   r6   r7   unquote_plusr+   r   r   r4   warningrL   anyreplacer  r\   ru   rv   randomrT   r   r   r   r   r5   extract_count_from_pager   )r   r   r   r-   encodedrB   r   r   r    r   r.  r/  r?   s   `            r   r  r    s   
 w75>,,++E21$?'eW1MGw-NNGyPmno'' %<CX4WXXmmC-Gll--e4G NgYW(*(25gYkJ 0%gY/
 	

6==?%??@"$::#w:C::h'D88Hd+!,xx	2mE72B4&	RYQZ[\ !&	uV/FwPU/VV 'w66_  b  $=gYiPQsRopq##$sA   F3 ;G 6D$G G 'G 3GG	G:!G5/G:5G:c                    t        |       rt        S 	 t        j                  j	                  |      }g d}|D ]t  }t        j                  || t
        j                        }|s+|j                  d      j                  dd      }t        |      }t        j                  d| d|        |c S  t        j                  d|        t        S # t        $ r(}t        j                  d|        t        cY d	}~S d	}~ww xY w)
z5Extract result count from GitHub search page content.)z"We\'ve found ([\d,]+) code resultsz([\d,]+) code resultszdata-total-count="([\d,]+)"z"total_count":(\d+)rF   ,r   z[search] extracted z results from page for query: z6[search] could not extract count from page for query: z,[search] failed to extract count from page: N)r   rL   r6   r7   r0  r   r   r   groupr3  r   r4   r5   r1  r+   r   )	r   r   r-   patternspatternmatchr'   r.  r?   s	            r   r5  r5    s    w##$,,++E2
   	GIIgw5E{{1~--c26D	1%8VW^V_`a	 	OPWyYZ## $CA3GH##$s+   AC #A
C .C 	C>C93C>9C>c                 j   t         j                  j                  t        |             }|sg dfS |rt	        ||||      }|dfS t        |||      }t        |      rg dfS 	 d}	t        j                  |	|t        j                        }
|
rt        t        |
            ng }t               }|D ]  }|j                  d|         t        |      }|rt        |t              r|r
	  |||       ||fS # t        $ r"}t         j#                  d|        Y d}~*d}~ww xY w# t        $ r g dfcY S w xY w)	zR
    Search code with unified interface.
    Returns: (results_list, content)
    r   )r   r   r   r   r#  r
  r   r   r  N)r6   r7   r  r   r  r   r   r   r  r   r  r   r  r   r   r+   r4   r   )r   r   r   r  r   r  keywordr  r   r  r  r  r  r  r?   s                  r   search_coder?    s:    ll%%d5k2G2v#'tW`a{gwTJGw2v9E7"$$7$*tCK  	2CII*3%01	2 u+ 
8X67?'*   ?9!=>>?  2vs=   "BD" '	C4 0D" 4	D=DD" DD" "D21D2key_patternaddress_patternendpoint_patternmodel_patternr'   c           	         t        |t              st        |t              rt        | t              sg S |r|}nt        ||t              }|sg S t	        |       } t        ||       }|sg S t	        |      }t        ||      }	|r|	sg S |	s|	j                  d       t	        |      }t        ||      }
|r|
sg S |
s|
j                  d       t	        |      }t        ||      }|r|sg S |s|j                  d       t               }t        j                  ||	|
|      D ]%  \  }}}}|j                  t        ||||             ' |S )aS  Extract API keys and related information from URLs or text content

    Args:
        key_pattern: Regex pattern to match API keys
        url: URL to fetch content from (if text not provided)
        retries: Number of retry attempts for HTTP requests
        address_pattern: Regex pattern to match service addresses
        endpoint_pattern: Regex pattern to match endpoints
        model_pattern: Regex pattern to match model names
        text: Text content to search (if provided, url is ignored)

    Returns:
        List[Service]: List of Service objects with extracted information
    )rB   r   r   )r'   r  r   )addressendpointkeyr   )r   r&   r   rN   r   extractappendr  	itertoolsproductr	   )r@  rB   r   rA  rB  rC  r'   r   keys	addresses	endpointsmodels
candidatesrG  rE  rF  r   s                    r   collectrQ    sW   0 sC D#)>zR]_bGc	sG>TU	 {#K{3D	 ?+OWO<Iy	 ,-W,<=I		 'M'7FV	bJ *3):):4IW])^ ]%Wh''H#UZ[\] r   r  c                    t        |       t        |      }}|r|sg S t               }t        j                  ||      }|D ]  }g }t	        |t
              r|j                  |       nWt	        |t        t        f      r|j                  t        |             n&t        j                  dt        |       d| d       ~|D ]!  }t        |      }	|	s|j                  |	       #  t        |      S )z.Extract strings from text using regex pattern.zUnknown type: z	, value: z. Please optimize your regex)r   r   r   r  r   r&   rI  tupler  extendr4   r   r  r  )
r'   r  r   r;  r   r  xwordswordrG  s
             r   rH  rH  a  s     Dz4;WG'	eEZZ)F aLLOE4=)LLa!LL>$q')A3>Z[\ 	Dt*C		#	 ;r   )r   )r   )r   N)NNr   g      ?r   )r   N   r   )rF   Nr&  )r   r   r   r   r   N)]r   r   rJ  r   r4  r   ru   r   urllib.parser6   typingr   r   r   r   r   r   r   core.modelsr	   tools.loggerr
   tools.utilsr   r   r   r4   r   r3   r   r2   r&   r   r   r   r   r#   r.   r@   r   ResponserH   constant.searchrK   rL   constant.systemrM   rN   rO   rP   rQ   rR   rS   rT   rU   rV   rW   core.exceptionsrX   rY   rZ   
core.typesr[   tools.coordinatorr\   tools.ratelimitr]   tools.resourcesr^   tools.retryr_   r`   rb   r   __annotations__r   r   r   r   r   r   r   r  r  r  r   r  r!  r  r5  r?  rQ  rH  rG   r   r   <module>rh     s       	    = =   # 3 3	H	   "  X%5%5 ?X00:: ?s ?
4h11;; 4 4YXc] Yt Y:C c E # (J[J[  G    : ' $ , ' + % )^ ^D *.& -@tC$89 @d @< $sDe$445 *
  #!f9	f9d^f9 TNf9 	f9
 f9 f9 	f9 f9T oqE	EE$'E6>tnEVYEhkE
38_EPS 3 c c , ;<Nb  S    C    gklogp  L ;?	.#.#.# .# xcC 0$ 678	.#
 49c3.#d )*<P333"%3693
49c33x <@HHH H 	H
 H xcC 0$ 678H 49c3H& !w7& &S &S & 8&$>$c >$C >$(3- >$SV >$B$S $ $ $N <@,,, , 	,
 , xcC 0$ 678, 49c>,^ "8 FF	F F 	F
 F F 3-F 
']F 9FR "8# c d3i  9r   