
    o[i/                       U d dl mZ d dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d d	lmZ d
dlmZmZmZmZmZmZmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$  ee      d2d       Z% ee      d3d       Z& ee      d4d       Z' e(d  ejR                         D              Z*de+d<   e*D  cg c]  } | d    	 c} Z,de+d<    ee      d5d       Z- ee      d3d       Z. ee      d3d       Z/ ee      d3d       Z0 ee      d3d       Z1 ee      d3d       Z2 ee      d3d       Z3 ee      d3d       Z4 ee      d3d       Z5 ee      d3d       Z6 ee      d3d       Z7 ee      d3d        Z8 ee      d3d!       Z9 ee      d3d"       Z: ee      d3d#       Z; e e<e            d6d$       Z= ee      d3d%       Z>d7d8d&Z? ed'      d9d(       Z@d:d)ZAd;d*ZBd<d=d+ZCd>d,ZDd?d-ZEd.ej                  d/f	 	 	 	 	 	 	 d@d0ZG	 dA	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dBd1ZHyc c} w )C    )annotationsN)bisect_right)IncrementalDecoder)aliases)	lru_cache)findall)	Generator)MultibyteIncrementalDecoder   )ENCODING_MARKSIANA_SUPPORTED_SIMILARRE_POSSIBLE_ENCODING_INDICATIONUNICODE_RANGES_COMBINEDUNICODE_SECONDARY_RANGE_KEYWORDUTF8_MAXIMAL_ALLOCATIONCOMMON_CJK_CHARACTERS_LATIN_CJK_HANGUL	_KATAKANA	_HIRAGANA_THAI_ARABIC_ARABIC_ISOLATED_FORM_ACCENT_KEYWORDS_ACCENTUATED)maxsizec                Z   	 t        j                  |       }d}d|v r	|t        z  }d|v r	|t        z  }d|v r	|t
        z  }d|v r	|t        z  }d|v r	|t        z  }d|v r	|t        z  }d|v r|t        z  }d	|v r	|t        z  }t        D ]  }||v s|t        z  } |S  |S # t        $ r Y yw xY w)
zRCompute all name-based classification flags with a single unicodedata.name() call.r   LATINCJKHANGULKATAKANAHIRAGANATHAIARABICzISOLATED FORM)unicodedataname
ValueErrorr   r   r   r   r   r   r   r   r   r   )	characterdescflagskws       Q/mnt/data/GridWatch/venv/lib/python3.12/site-packages/charset_normalizer/utils.py_character_flagsr.   &   s    $$Y/ E$}4TT~4d"**E :\!EL
 L7  s   B 	B*)B*c                8    t        t        |       t        z        S N)boolr.   r   r)   s    r-   is_accentuatedr3   I   s     +l:;;    c                    t        j                  |       }|s| S |j                  d      }t        t	        |d   d            S )N r      )r&   decompositionsplitchrint)r)   
decomposedcodess      r-   remove_accentr>   N   sA    !//	:J!'',Es58R !!r4   c              #  T   K   | ]   \  }}|j                   |j                  |f " y wr0   )startstop).0r'   	ord_ranges      r-   	<genexpr>rD   [   s+      <i __innd+<s   &(zlist[tuple[int, int, str]]_UNICODE_RANGES_SORTEDz	list[int]_UNICODE_RANGE_STARTSc                r    t        |       }t        t        |      dz
  }|dk\  rt        |   \  }}}||k  r|S y)zK
    Retrieve the Unicode range official name from a single character.
    r   r   N)ordr   rF   rE   )r)   character_ordidxr@   rA   r'   s         r-   unicode_rangerK   b   sI    
 YM ,m
<q
@C
ax237tT4Kr4   c                8    t        t        |       t        z        S r0   )r1   r.   r   r2   s    r-   is_latinrM   s   s     +f455r4   c                Z    t        j                  |       }d|v ryt        |       }|yd|v S )NPTFPunctuationr&   categoryrK   r)   character_categorycharacter_ranges      r-   is_punctuationrV   x   s=    )229=
  "/	":OO++r4   c                p    t        j                  |       }d|v sd|v ryt        |       }|yd|v xr |dk7  S )NSNTFFormsLorQ   rS   s      r-   	is_symbolr\      sP    )229=
  C+=$="/	":Oo%D*<*DDr4   c                2    t        |       }|yd|v xs d|v S )NF	EmoticonsPictographs)rK   )r)   rU   s     r-   is_emoticonr`      s*    "/	":O/)M]o-MMr4   c                j    | j                         s| dv ryt        j                  |       }d|v xs |dv S )N>      ｜+<>TZ>   PcPdPo)isspacer&   rR   )r)   rT   s     r-   is_separatorrk      sB    i+AA)229=$$P(:>P(PPr4   c                D    | j                         | j                         k7  S r0   )islowerisupperr2   s    r-   is_case_variablero      s    )"3"3"555r4   c                8    t        t        |       t        z        S r0   )r1   r.   r   r2   s    r-   is_cjkrq      s     +d233r4   c                8    t        t        |       t        z        S r0   )r1   r.   r   r2   s    r-   is_hiraganars           +i788r4   c                8    t        t        |       t        z        S r0   )r1   r.   r   r2   s    r-   is_katakanarv      rt   r4   c                8    t        t        |       t        z        S r0   )r1   r.   r   r2   s    r-   	is_hangulrx           +g566r4   c                8    t        t        |       t        z        S r0   )r1   r.   r   r2   s    r-   is_thair{      s     +e344r4   c                8    t        t        |       t        z        S r0   )r1   r.   r   r2   s    r-   	is_arabicr}      ry   r4   c                8    t        t        |       t        z        S r0   )r1   r.   r   r2   s    r-   is_arabic_isolated_formr      s     +.CCDDr4   c                    | t         vS r0   )r   r2   s    r-   is_cjk_uncommonr      s    111r4   c                4     t         fdt        D              S )Nc              3  &   K   | ]  }|v  
 y wr0    )rB   keyword
range_names     r-   rD   z-is_unicode_range_secondary.<locals>.<genexpr>   s     Tw*$Ts   )anyr   )r   s   `r-   is_unicode_range_secondaryr      s    T4STTTr4   c                j    | j                         du xr  | j                         du xr | dk7  xr | dk7  S )NFu   ﻿)rj   isprintabler2   s    r-   is_unprintabler      sL     	u$ 	"!!#u,	"	" !	r4   c           	     ~   t        | t        t        f      st        t	        |       }t        t        | dt        ||       j                  dd            }t	        |      dk(  ry|D ]T  }|j                         j                  dd      }t        j                         D ]  \  }}||k(  r|c c S ||k(  s|c c S  V y)zW
    Extract using ASCII-only decoder any specified encoding in the first n-bytes.
    Nasciiignoreerrorsr   -_)
isinstancebytes	bytearray	TypeErrorlenr   r   mindecodelowerreplacer   items)sequencesearch_zoneseq_lenresultsspecified_encodingencoding_aliasencoding_ianas          r-   any_specified_encodingr      s     h	 23x=G ',3w,-44WX4NG
 7|q% 
%/557??SI
 .5]]_ 	%)NM!33$$ 22$$		%
% r4      c                n    | dv xs0 t        t        j                  d|        j                  t              S )zQ
    Verify is a specific encoding is a multi byte one based on it IANA name
    >	   utf_7utf_8utf_16utf_32	utf_16_be	utf_16_le	utf_32_be	utf_32_le	utf_8_sig
encodings.)
issubclass	importlibimport_moduler   r
   )r'   s    r-   is_multi_byte_encodingr     sC    
  
 
  
*TF 34GG#
r4   c                    t         D ]>  }t         |   }t        |t              r|g}|D ]  }| j                  |      s||fc c S  @ y)z9
    Identify and extract SIG/BOM in given sequence.
    )Nr4   )r   r   r   
startswith)r   iana_encodingmarksmarks       r-   identify_sig_or_bomr     s[    
 ( +%3M%BeU#GE 	+D""4($d**	++ r4   c                
    | dvS )N>   r   r   r   )r   s    r-   should_strip_sig_or_bomr   .  s     444r4   c                    | j                         j                  dd      } t        j                         D ]  \  }}| ||fv s|c S  |rt	        d|  d      | S )zIReturns the Python normalized encoding name (Not the IANA official name).r   r   zUnable to retrieve IANA for '')r   r   r   r   r(   )cp_namestrictr   r   s       r-   	iana_namer   2  sh    mmo%%c3/G
 *1 !%~}55  ! 8	CDDNr4   c                t   t        |       st        |      ryt        j                  d|        j                  }t        j                  d|       j                  } |d      } |d      }d}t	        d      D ]7  }t        |g      }|j                  |      |j                  |      k(  s3|dz  }9 |dz  S )Ng        r   r   r   r      r   )r   r   r   r   ranger   r   )	iana_name_aiana_name_b	decoder_a	decoder_bid_aid_bcharacter_match_countito_be_decodeds	            r-   cp_similarityr   C  s    k*.D[.Q''*[M(BCVVI''*[M(BCVVI(9D(9D!"3Z '$aSz;;}%])CC!Q&!'
 !3&&r4   c                ,    | t         v xr |t         |    v S )z
    Determine if two code page are at least 80% similar. IANA_SUPPORTED_SIMILAR dict was generated using
    the function cp_similarity.
    )r   )r   r   s     r-   is_cp_similarr   W  s%     	-- 	?1+>>r4   charset_normalizerz)%(asctime)s | %(levelname)s | %(message)sc                    t        j                  |       }|j                  |       t        j                         }|j	                  t        j
                  |             |j                  |       y r0   )logging	getLoggersetLevelStreamHandlersetFormatter	Formatter
addHandler)r'   levelformat_stringloggerhandlers        r-   set_logging_handlerr   b  sU    
 t$F
OOE##%G**=9:
gr4   c	              #    K   |r|du r|D ]  }	||	|	|z    }
|
s y |
  y |D ]  }	|	|z   }|t        |       dz   kD  r| |	|	|z    }|r	|du r||z   }|j                  ||rdnd      }
|r[|	dkD  rVt        |d      }|rH|
d | |vrAt        |	|	dz
  d	      D ].  }| || }|r	|du r||z   }|j                  |d      }
|
d | |v s. n |
  y w)
NF   r   r   r   r   r7      )r   r   r   r   )	sequencesr   offsets
chunk_sizebom_or_sig_availablestrip_sig_or_bomsig_payloadis_multi_byte_decoderdecoded_payloadr   chunk	chunk_endcut_sequencechunk_partial_size_chkjs                  r-   cut_sequence_chunksr   o  sK     0E9 	A#AJ7EK		  #	AJI3y>A--$QZ8L#(8E(A*\9 ''#8xh ( E %Q.1*b.A& $556oM"1a!eR0 	"'09'=/4D4M+6+EL , 3 3M( 3 S !8"89_L!	" KG#	s   B>C
C)r)   strreturnr;   )r)   r   r   r1   )r)   r   r   r   )r)   r   r   
str | None)r   r   r   r1   )i    )r   r   r   r;   r   r   )r'   r   r   r1   )r   r   r   ztuple[str | None, bytes])r   r   r   r1   )T)r   r   r   r1   r   r   )r   r   r   r   r   float)r   r   r   r   r   r1   )r'   r   r   r;   r   r   r   Noner0   )r   r   r   r   r   r   r   r;   r   r1   r   r1   r   r   r   r1   r   r   r   zGenerator[str, None, None])I
__future__r   r   r   r&   bisectr   codecsr   encodings.aliasesr   	functoolsr   rer   typingr	   _multibytecodecr
   constantr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r.   r3   r>   sortedr   rE   __annotations__rF   rK   rM   rV   r\   r`   rk   ro   rq   rs   rv   rx   r{   r}   r   r   r   r   r   r   r   r   r   r   r   r   INFOr   r   )es   0r-   <module>r     s   "     % %       * *+ ,D *+< ,< *+" ," 6< <8288:< 6 2  3I#IQAaD#I y I *+ ,  *+6 ,6 *+, ,, *+E ,E *+N ,N *+Q ,Q *+6 ,6 *+4 ,4 *+9 ,9 *+9 ,9 *+7 ,7 *+5 ,5 *+7 ,7 *+E ,E *+2 ,2 3./0U 1U *+ ,@ 3 ($5"'( %D



 
 
	
, #'555 5 	5
 5 5 5  5  5  5a $Js   ;I