
    [i2                        d dl Z d dlZd dlZd dlZd dlZej
                  d    dkD  rd dlmZ d dlm	Z	m
Z
 d dlmZmZmZ nd dlmZ d dlmZ d dlm	Z	m
Z
 d dlmZmZ 	 d dlmZ d	Zg dZdZdZdZdZdZdZ e j:                  d      Zes e j:                  d      ZesdZ ee j>                  jA                  ed            Z!	 e!jE                          dZ$	 e j>                  jK                  e j>                  jM                  e'      d          Z(	 e j>                  jA                  e(d      Z)d dl*Z* e*jV                  e)d      Z,	 e,j[                         D  cg c]  } | j]                          c} Z/e,ja                          [,	 d Z1d Z2d dZ3d Z4	 	 	 d!dZ5d Z6y# e$ r d dlmZ d
ZY Bw xY w# e#$ r Y w xY wc c} w # e,ja                          [,w xY w# e#$ ro e j>                  jA                  e(d      Z) e+e)      5 Z,e,j[                         D  cg c]  } | j]                          nc c} w c} Z/ddd       n# 1 sw Y   nxY wY w xY w# e#$ r e$gZ/Y w xY w)"    N   )LWPCookieJar)Requesturlopen)
quote_plusurlparseparse_qs)r   )r   r	   )BeautifulSoupTF)searchluckyget_random_user_agentget_tbszhttps://www.google.%(tld)s/zuhttps://www.google.%(tld)s/search?hl=%(lang)s&q=%(query)s&btnG=Google+Search&tbs=%(tbs)s&safe=%(safe)s&cr=%(country)szrhttps://www.google.%(tld)s/search?hl=%(lang)s&q=%(query)s&start=%(start)d&tbs=%(tbs)s&safe=%(safe)s&cr=%(country)szhttps://www.google.%(tld)s/search?hl=%(lang)s&q=%(query)s&num=%(num)d&btnG=Google+Search&tbs=%(tbs)s&safe=%(safe)s&cr=%(country)sz~https://www.google.%(tld)s/search?hl=%(lang)s&q=%(query)s&num=%(num)d&start=%(start)d&tbs=%(tbs)s&safe=%(safe)s&cr=%(country)s)hlqnumbtnGstarttbssafecrHOMEUSERHOME.z.google-cookiez2Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0)zuser_agents.txt.gzrbzuser_agents.txtc                  4    t        j                  t              S )za
    Get a random user agent string.

    :rtype: str
    :return: Random user agent string.
    )randomchoiceuser_agents_list     N/mnt/data/GridWatch/venv/lib/python3.12/site-packages/googlesearch/__init__.pyr   r   {   s     ==)**r    c                 `    | j                  d      } |j                  d      }dt               z  S )z
    Helper function to format the tbs parameter.

    :param datetime.date from_date: Python date object.
    :param datetime.date to_date: Python date object.

    :rtype: str
    :return: Dates encoded in tbs format.
    z%m/%d/%Yz-cdr:1,cd_min:%(from_date)s,cd_max:%(to_date)s)strftimevars)	from_dateto_dates     r!   r   r      s1     "":.Iz*G:TVCCr    c                    |t         }t        |       }|j                  d|       t        j	                  |       |rt        |      }n!t        j                         }t        ||      }t        j                  ||       |j                         }|j                          	 t        j                          |S # t        $ r Y |S w xY w)aL  
    Request the given URL and return the response page, using the cookie jar.

    :param str url: URL to retrieve.
    :param str user_agent: User agent for the HTTP requests.
        Use None for the default.
    :param bool verify_ssl: Verify the SSL certificate to prevent
        traffic interception attacks. Defaults to True.

    :rtype: str
    :return: Web page retrieved for the given URL.

    :raises IOError: An exception is raised on error.
    :raises urllib2.URLError: An exception is raised on error.
    :raises urllib2.HTTPError: An exception is raised on error.
    z
User-Agent)context)
USER_AGENTr   
add_header
cookie_jaradd_cookie_headerr   ssl_create_unverified_contextextract_cookiesreadclosesave	Exception)url
user_agent
verify_sslrequestresponser(   htmls          r!   get_pager:      s    " 
clG|Z0  )7#0027G4x1==?DNN K  Ks   !B7 7	CCc                     	 | j                  d      r't        | d      }t        |j                        d   d   } t        | d      }|j                  rd|j                  vr| S y y # t
        $ r Y y w xY w)Nz/url?httpr   r   google)
startswithr   r	   querynetlocr3   )linkos     r!   filter_resultrC      sy     ??7#v&AAGG$S)!,D
 T6"880K 18  s   AA$ $	A0/A0c              #   ~  K   t               }d}t        |       } |
si }
t        D ]   }||
j                         v st	        d|       t        t        t               z  ||       |r)|dk(  rt        t               z  }n:t        t               z  }n(|dk(  rt        t               z  }nt        t               z  }|r||k  r+|}|
j                         D ]&  \  }}t        |      }t        |      }|d|d|z   }( t        j                  |       t        |||      }t        rt!        |d      }nt!        |      }	 |j#                  d      j%                  d	      }|D ]J  }	 |d   }t-        |      }|st/        |      }||v r'|j1                  |       | |dz  }|sD||k\  sJ y ||k(  ry||z  }|dk(  rt        t               z  }nt        t               z  }|s#||k  r*yy# t&        $ r8 |j#                  d
      }|r|j)                          |j%                  d	      }Y w xY w# t*        $ r Y w xY ww)a  
    Search the given query string using Google.

    :param str query: Query string. Must NOT be url-encoded.
    :param str tld: Top level domain.
    :param str lang: Language.
    :param str tbs: Time limits (i.e "qdr:h" => last hour,
        "qdr:d" => last 24 hours, "qdr:m" => last month).
    :param str safe: Safe search.
    :param int num: Number of results per page.
    :param int start: First result to retrieve.
    :param int stop: Last result to retrieve.
        Use None to keep searching forever.
    :param float pause: Lapse to wait between HTTP requests.
        A lapse too long will make the search slow, but a lapse too short may
        cause Google to block your IP. Your mileage may vary!
    :param str country: Country or region to focus the search on. Similar to
        changing the TLD, but does not yield exactly the same results.
        Only Google knows why...
    :param dict extra_params: A dictionary of extra HTTP GET
        parameters, which must be URL encoded. For example if you don't want
        Google to filter similar results you can set the extra_params to
        {'filter': '0'} which will append '&filter=0' to every query.
    :param str user_agent: User agent for the HTTP requests.
        Use None for the default.
    :param bool verify_ssl: Verify the SSL certificate to prevent
        traffic interception attacks. Defaults to True.

    :rtype: generator of str
    :return: Generator (iterator) that yields found URLs.
        If the stop parameter is None the iterator will loop forever.
    r   zQGET parameter "%s" is overlapping with                 the built-in GET parameter
   &=zhtml.parserr   )idagbarhref   N)setr   url_parameterskeys
ValueErrorr:   url_homer$   url_next_pageurl_next_page_num
url_searchurl_search_numitemstimesleepis_bs4r
   findfindAllAttributeErrorclearKeyErrorrC   hashadd)r?   tldlangr   r   r   r   stoppausecountryextra_paramsr5   r6   hashescountbuiltin_paramr4   
last_countkvr9   soupanchorsrJ   rI   rA   hs                              r!   r   r      se    J UF E uE
  ( L--//,  X
J7 "9$&(C#df,C"9tv%C 46)C edl 

 !&&( 	,DAq1A1AQ*+C	, 	

5 Z4  }5D &D		(ii8i,44S9G  	Ay
 !&D T
AF{JJqM J QJE5	<  	"9$&(C#df,CW edl:  	(999'D

ll3'G	(  sk   7H=C?H=:!G* H=!H.&:H=!H='8H=!H=(H=*>H+(H=*H++H=.	H:7H=9H::H=c                  *    t        t        | i |      S )z
    Shortcut to single-item search.

    Same arguments as the main search function, but the return value changes.

    :rtype: str
    :return: URL found by Google.
    )nextr   )argskwargss     r!   r   r   o  s     ''((r    )NT)comen0offrE   r   Ng       @ NNT)7osr   sysrW   r-   version_infohttp.cookiejarr   urllib.requestr   r   urllib.parser   r   r	   	cookieliburlliburllib2bs4r
   rY   ImportError__all__rQ   rT   rR   rU   rS   rN   getenvhome_folderpathjoinr+   loadr3   r)   abspathsplit__file__install_folderuser_agents_filegzipopenfp	readlinesstripr   r1   r   r   r:   rC   r   r   )_s   0r!   <module>r      sY  < 
  
  
A+/;;&!(+!F

 )
!"3 <
 bii"))J'K"'',,{4DEF
	OO
 B
$WW__RWW]]8%<Q%?@NC77<<8LMTYY'.	35<<>Ba	BHHJ+D""N* LM:>'+W-x	)}	  +FV  		$  CHHJ C77<<8IJ"# 	Cr35<<>Ba	BB	C 	C 	CC  $"|$s   F F+ 7I 3G F; F6*F; -G >I F('F(+F32F36F; ;GG )I:H6H%$	H6-	I6H?	;II II II