
    o[iY                       U d dl mZ d dlZd dlmZ d dlmZ ej                  dk\  rd dlm	Z	 n	 d dl
m	Z	 dd	lmZmZmZmZmZmZmZmZmZmZ dd
lmZmZmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z& eez  ez  ez  ez  Z'de(d<    G d d      Z)e	 G d de)             Z*e	 G d de)             Z+e	 G d de)             Z,e	 G d de)             Z-e	 G d de)             Z.e	 G d de)             Z/e	 G d de)             Z0e	 G d de)             Z1e	 G d d e)             Z2 ed!"      	 	 	 	 	 	 d)d#       Z3 e4d$ e)jk                         D              Z6d%e(d&<    ed'"      	 d*	 	 	 	 	 	 	 d+d(       Z7y# e$ r d Z	Y Dw xY w),    )annotationsN)	lru_cache)	getLogger)      )finalc                    | S N )clss    N/mnt/data/GridWatch/venv/lib/python3.12/site-packages/charset_normalizer/md.pyr   r      s    J       )
COMMON_SAFE_ASCII_CHARACTERSTRACEUNICODE_SECONDARY_RANGE_KEYWORD_ACCENTUATED_CJK_HANGUL	_HIRAGANA	_KATAKANA_LATIN_THAI)_character_flagsis_accentuated	is_arabicis_arabic_isolated_formis_case_variableis_cjkis_emoticonis_latinis_punctuationis_separator	is_symbolis_unprintableremove_accentunicode_rangeis_cjk_uncommonint_GLYPH_MASKc                  >    e Zd ZdZdZddZd	dZd
dZedd       Z	y)MessDetectorPluginzy
    Base abstract class used for mess detection plugins.
    All detectors MUST extend and implement given methods.
    r   c                    t         )z@
        Determine if given character should be fed in.
        NotImplementedErrorself	characters     r   eligiblezMessDetectorPlugin.eligible<   
     "!r   c                    t         )z
        The main routine to be executed upon character.
        Insert the logic in witch the text would be considered chaotic.
        r.   r0   s     r   feedzMessDetectorPlugin.feedB   s
    
 "!r   c                    t         )zB
        Permit to reset the plugin to the initial state.
        r.   r1   s    r   resetzMessDetectorPlugin.resetI   r4   r   c                    t         )z
        Compute the chaos ratio based on what your feed() has seen.
        Must NOT be lower than 0.; No restriction gt 0.
        r.   r8   s    r   ratiozMessDetectorPlugin.ratioO   s
     "!r   Nr2   strreturnboolr2   r=   r>   Noner>   rA   r>   float)
__name__
__module____qualname____doc__	__slots__r3   r6   r9   propertyr;   r   r   r   r,   r,   4   s1    
 I""" " "r   r,   c                  B    e Zd ZdZddZd	dZd
dZddZedd       Z	y) TooManySymbolOrPunctuationPlugin_punctuation_count_symbol_count_character_count_last_printable_char_frenzy_symbol_in_wordc                J    d| _         d| _        d| _        d | _        d| _        y Nr   FrM   r8   s    r   __init__z)TooManySymbolOrPunctuationPlugin.__init__b   s*    '("#%&04!,1#r   c                "    |j                         S r
   isprintabler0   s     r   r3   z)TooManySymbolOrPunctuationPlugin.eligiblej       $$&&r   c                0   | xj                   dz  c_         || j                  k7  rk|t        vrct        |      r| xj                  dz  c_        || _        y |j                         s+t        |      r t        |      s| xj                  dz  c_        || _        y )Nr      )	rP   rQ   r   r"   rN   isdigitr$   r    rO   r0   s     r   r6   z%TooManySymbolOrPunctuationPlugin.feedm   s    " 222!==i(''1,' %.! %%'i(#I.""a'"$-!r   c                .    d| _         d| _        d| _        y Nr   )rN   rP   rO   r8   s    r   r9   z&TooManySymbolOrPunctuationPlugin.reset   s    "# !r   c                    | j                   dk(  ry| j                  | j                  z   | j                   z  }|dk\  r|S dS )Nr           333333?)rP   rN   rO   )r1   ratio_of_punctuations     r   r;   z&TooManySymbolOrPunctuationPlugin.ratio   sO      A% ##d&8&88!!'" (<s'B#KKr   NrB   r<   r@   rC   
rE   rF   rG   rI   rU   r3   r6   r9   rJ   r;   r   r   r   rL   rL   X   s3    I2'.$
 L Lr   rL   c                  B    e Zd ZdZddZd	dZd
dZddZedd       Z	y)TooManyAccentuatedPluginrP   _accentuated_countc                     d| _         d| _        y r^   rf   r8   s    r   rU   z!TooManyAccentuatedPlugin.__init__   s    %&'(r   c                "    |j                         S r
   )isalphar0   s     r   r3   z!TooManyAccentuatedPlugin.eligible   s      ""r   c                p    | xj                   dz  c_         t        |      r| xj                  dz  c_        y y Nr   )rP   r   rg   r0   s     r   r6   zTooManyAccentuatedPlugin.feed   s1    ")$##q(# %r   c                     d| _         d| _        y r^   rf   r8   s    r   r9   zTooManyAccentuatedPlugin.reset   s     !"#r   c                f    | j                   dk  ry| j                  | j                   z  }|dk\  r|S dS )Nr   r`   gffffff?rf   )r1   ratio_of_accentuations     r   r;   zTooManyAccentuatedPlugin.ratio   s=      1$'+'>'>AVAV'V(=(E$N3Nr   NrB   r<   r@   rC   rc   r   r   r   re   re      s1    :I)#)$ O Or   re   c                  B    e Zd ZdZddZd	dZd
dZddZedd       Z	y)UnprintablePlugin_unprintable_countrP   c                     d| _         d| _        y r^   rr   r8   s    r   rU   zUnprintablePlugin.__init__   s    '(%&r   c                     yNTr   r0   s     r   r3   zUnprintablePlugin.eligible       r   c                n    t        |      r| xj                  dz  c_        | xj                  dz  c_        y rl   )r%   rs   rP   r0   s     r   r6   zUnprintablePlugin.feed   s,    )$##q(#"r   c                    d| _         y r^   )rs   r8   s    r   r9   zUnprintablePlugin.reset   s
    "#r   c                Z    | j                   dk(  ry| j                  dz  | j                   z  S )Nr   r`   r   )rP   rs   r8   s    r   r;   zUnprintablePlugin.ratio   s/      A%''!+t/D/DDDr   NrB   r<   r@   rC   rc   r   r   r   rq   rq      s1    :I'#
$ E Er   rq   c                  B    e Zd ZdZddZd	dZd
dZddZedd       Z	y)SuspiciousDuplicateAccentPlugin_successive_countrP   _last_latin_character_last_was_accentuatedc                <    d| _         d| _        d | _        d| _        y rT   r}   r8   s    r   rU   z(SuspiciousDuplicateAccentPlugin.__init__   s"    &'%&15"+0"r   c                <    |j                         xr t        |      S r
   )rj   r!   r0   s     r   r3   z(SuspiciousDuplicateAccentPlugin.eligible   s      ":x	'::r   c                ~   | xj                   dz  c_         t        |      }| j                  |r| j                  ru|j	                         r/| j                  j	                         r| xj
                  dz  c_        t        |      t        | j                        k(  r| xj
                  dz  c_        || _        || _        y rl   )rP   r   r   r   isupperr~   r&   )r1   r2   current_accentuateds      r   r6   z$SuspiciousDuplicateAccentPlugin.feed   s    "$29$=&&2#**  "t'A'A'I'I'K&&!+&Y'=9S9S+TT&&!+&%."%8"r   c                <    d| _         d| _        d | _        d| _        y rT   r}   r8   s    r   r9   z%SuspiciousDuplicateAccentPlugin.reset   s"    !" !%)"%*"r   c                Z    | j                   dk(  ry| j                  dz  | j                   z  S )Nr   r`   r[   )rP   r~   r8   s    r   r;   z%SuspiciousDuplicateAccentPlugin.ratio   s/      A%&&*d.C.CCCr   NrB   r<   r@   rC   rc   r   r   r   r|   r|      s3    I1;9 + D Dr   r|   c                  B    e Zd ZdZddZd	dZd
dZddZedd       Z	y)SuspiciousRange"_suspicious_successive_range_countrP   _last_printable_seen_last_printable_rangec                <    d| _         d| _        d | _        d | _        y r^   r   r8   s    r   rU   zSuspiciousRange.__init__  s"    78/%&04!15"r   c                "    |j                         S r
   rW   r0   s     r   r3   zSuspiciousRange.eligible	  rY   r   c                f   | xj                   dz  c_         |j                         st        |      s|t        v rd | _        d | _        y | j                  || _        t        |      | _        y | j
                  }t        |      }t        ||      r| xj                  dz  c_        || _        || _        y rl   )	rP   isspacer"   r   r   r   r'    is_suspiciously_successive_ranger   )r1   r2   unicode_range_aunicode_range_bs       r   r6   zSuspiciousRange.feed  s    " i(88(,D%)-D&$$,(1D%)6y)AD&&*&@&@&3I&>+O_M33q83$-!%4"r   c                <    d| _         d| _        d | _        d | _        y r^   )rP   r   r   r   r8   s    r   r9   zSuspiciousRange.reset&  s"     !23/$(!%)"r   c                ^    | j                   dk  ry| j                  dz  | j                   z  }|S )N   r`   r[   )rP   r   )r1   ratio_of_suspicious_range_usages     r   r;   zSuspiciousRange.ratio,  s<      B& 33a7!!2"' /.r   NrB   r<   r@   rC   rc   r   r   r   r   r      s1    I6'54* / /r   r   c                  B    e Zd ZdZddZd	dZd
dZddZedd       Z	y)SuperWeirdWordPlugin_word_count_bad_word_count_foreign_long_count_is_current_word_bad_foreign_long_watchrP   _bad_character_count_buffer_length_buffer_last_char_buffer_last_char_accentuated_buffer_accent_count_buffer_glyph_count_buffer_upper_countc                    d| _         d| _        d| _        d| _        d| _        d| _        d| _        d| _        d | _        d| _	        d| _
        d| _        d| _        y rT   r   r8   s    r   rU   zSuperWeirdWordPlugin.__init__J  sj     !$%() */!). %&)*!#$-138*)*!() () r   c                     yrv   r   r0   s     r   r3   zSuperWeirdWordPlugin.eligible\  rw   r   c                   |j                         r| xj                  dz  c_        || _        |j                         r| xj                  dz  c_        t        |      }t        |t        z        }|| _        |r| xj                  dz  c_	        | j                  s|t        z  r|r|t        z  sd| _
        |t        z  r| xj                  dz  c_        y | j                  sy |j                         st        |      st!        |      r| j                  r| xj"                  dz  c_        | j                  }| xj$                  |z  c_        |dk\  r| j                  |z  dk\  rd| _        n}| j                  rF| j                  j                         r,| j                  |k7  r| xj(                  dz  c_        d| _        n+| j                  dk(  rd| _        | xj(                  dz  c_        |dk\  rM| j                  rA| j                  dkD  xr | j                  |z  dk  }|s| xj(                  dz  c_        d| _        | j&                  r1| xj*                  dz  c_        | xj,                  |z  c_        d| _        d| _
        d| _        d | _        d| _        d| _	        d| _        d| _        y |d	vrH|j/                         s7t1        |      r+d| _        | xj                  dz  c_        || _        d| _        y y y y )
Nr   T         ?   r   ra   F>   _-<=>|~)rj   r   r   r   r   r   r?   r   r   r   r   r   r*   r   r   r"   r#   r   rP   r   r   r   r   r\   r$   )r1   r2   flagschar_accentuatedbuffer_lengthprobable_camel_caseds         r   r6   zSuperWeirdWordPlugin.feed_  s   1$%.D"  "((A-())4E%)%,*>%?1AD.))Q.),,&.-=,+/({"((A-("">)#<Y@W!!!!%!4!4M!!]2!!,,}<C04D- 66..66800MA,,1,04D---204D-,,1,"t'?'?,,q0 H00=@CG %
 ,,,1,04D-(($$)$))]:),1)',D$"#D%)D"16D.()D%'(D$'(D$@@%%')$(,D%1$%.D"16D. % ( Ar   c                    d| _         d | _        d| _        d| _        d| _        d| _        d| _        d| _        d| _        d| _	        d| _
        d| _        d| _        y rT   )r   r   r   r   r   r   r   rP   r   r   r   r   r   r8   s    r   r9   zSuperWeirdWordPlugin.reset  sj    !%-2*$)!#(   !$%!#$ $%!#$ #$ r   c                r    | j                   dk  r| j                  dk(  ry| j                  | j                  z  S )N
   r   r`   )r   r   r   rP   r8   s    r   r;   zSuperWeirdWordPlugin.ratio  s7    r!d&>&>!&C((4+@+@@@r   NrB   r<   r@   rC   rc   r   r   r   r   r   8  s5    I *$N7`% A Ar   r   c                  F    e Zd ZdZdZd	dZd
dZddZd	dZe	dd       Z
y)CjkUncommonPluginz<
    Detect messy CJK text that probably means nothing.
    rP   _uncommon_countc                     d| _         d| _        y r^   r   r8   s    r   rU   zCjkUncommonPlugin.__init__  s    %&$%r   c                    t        |      S r
   )r   r0   s     r   r3   zCjkUncommonPlugin.eligible  s    i  r   c                p    | xj                   dz  c_         t        |      r| xj                  dz  c_        y y rl   )rP   r(   r   r0   s     r   r6   zCjkUncommonPlugin.feed  s4    "9%  A%  &r   c                     d| _         d| _        y r^   r   r8   s    r   r9   zCjkUncommonPlugin.reset  s     ! r   c                l    | j                   dk  ry| j                  | j                   z  }|dkD  r|dz  S dS )Nr   r`   r   r   r   )r1   uncommon_form_usages     r   r;   zCjkUncommonPlugin.ratio  sD      1$%)%9%9D<Q<Q%Q ,?+D"R'M#Mr   NrB   r<   r@   rC   )rE   rF   rG   rH   rI   rU   r3   r6   r9   rJ   r;   r   r   r   r   r     s8     8I&!! N Nr   r   c                  B    e Zd ZdZddZd	dZd
dZddZedd       Z	y)ArchaicUpperLowerPlugin_buf_character_count_since_last_sep_successive_upper_lower_count#_successive_upper_lower_count_finalrP   _last_alpha_seen_current_ascii_onlyc                f    d| _         d| _        d| _        d| _        d| _        d | _        d| _        y )NFr   Tr   r8   s    r   rU   z ArchaicUpperLowerPlugin.__init__  s9    	45,23*890%&,0)- r   c                     yrv   r   r0   s     r   r3   z ArchaicUpperLowerPlugin.eligible  rw   r   c                D   |j                         xr t        |      }| }|r| j                  dkD  r| j                  dk  r;|j                         s+| j                  s| xj
                  | j                  z  c_        d| _        d| _        d | _        d| _        | xj                  dz  c_	        d| _        y | j                  r|j                         sd| _        | j                  |j                         r| j                  j                         s*|j                         rK| j                  j                         r1| j                  r| xj                  dz  c_        d| _        nd| _        nd| _        | xj                  dz  c_	        | xj                  dz  c_        || _        y )Nr   @   Fr   Tr[   )rj   r   r   r\   r   r   r   r   r   rP   isasciir   islower)r1   r2   is_concerned	chunk_seps       r   r6   zArchaicUpperLowerPlugin.feed  sf   &..0P5Ei5P**	==A44:!))+0088668 23D.34D0$(D!DI!!Q&!'+D$##I,=,=,?',D$  ,!!#(=(=(E(E(G!!#(=(=(E(E(G9966!;6 %DI $DI!	",,1, )r   c                f    d| _         d| _        d| _        d| _        d | _        d| _        d| _        y )Nr   FT)rP   r   r   r   r   r   r   r8   s    r   r9   zArchaicUpperLowerPlugin.reset2  s9     !/0,-.*340 $	#' r   c                T    | j                   dk(  ry| j                  | j                   z  S )Nr   r`   )rP   r   r8   s    r   r;   zArchaicUpperLowerPlugin.ratio;  s*      A%77$:O:OOOr   NrB   r<   r@   rC   rc   r   r   r   r   r     s4    I.(*T( P Pr   r   c                  B    e Zd ZdZddZddZd	dZd
dZedd       Z	y)ArabicIsolatedFormPluginrP   _isolated_form_countc                     d| _         d| _        y r^   r   r8   s    r   rU   z!ArabicIsolatedFormPlugin.__init__G  s    %&)*!r   c                     d| _         d| _        y r^   r   r8   s    r   r9   zArabicIsolatedFormPlugin.resetK  s     !$%!r   c                    t        |      S r
   )r   r0   s     r   r3   z!ArabicIsolatedFormPlugin.eligibleO  s    ##r   c                p    | xj                   dz  c_         t        |      r| xj                  dz  c_        y y rl   )rP   r   r   r0   s     r   r6   zArabicIsolatedFormPlugin.feedR  s1    ""9-%%*% .r   c                X    | j                   dk  ry| j                  | j                   z  }|S )Nr   r`   r   )r1   isolated_form_usages     r   r;   zArabicIsolatedFormPlugin.ratioX  s0      1$%)%>%>AVAV%V""r   NrB   r<   r@   rC   )
rE   rF   rG   rI   rU   r9   r3   r6   rJ   r;   r   r   r   r   r   C  s/    <I+&$+ # #r   r      )maxsizec                   | |y| |k(  ryd| v rd|v ryd| v sd|v ryd| v sd|v r	d| v sd|v ry| j                  d      |j                  d      }}|D ]  }|t        v r||v s y | dv |dv }}|s|r	d| v sd|v ry|r|ryd	| v sd	|v rd| v sd|v ry| d
k(  s|d
k(  ryd| v sd|v s| dv r!|dv rd| v sd|v ryd| v sd|v ry| d
k(  s|d
k(  ryy)za
    Determine if two Unicode range seen next to each other can be considered as suspicious.
    TFLatin	Emoticons	Combining )HiraganaKatakanaCJKHangulzBasic Latin)r   r   PunctuationForms)splitr   )r   r   keywords_range_akeywords_range_belrange_a_jp_charsrange_b_jp_charss          r   r   r   b  s    /"9/)/!g&@o%)G 	?"g&@&+*H 	c"c" '
  00!!	 	
	

 	33 ' 	, E_$<,?"h/&AO#u'?m+-/O 	 E_$<3377O+}/Oo%O)Cm+-/Or   c              #      K   | ]  }|  y wr
   r   ).0md_classs     r   	<genexpr>r     s      @H@s   z$tuple[type[MessDetectorPlugin], ...]_DETECTOR_CLASSESi   c           	        t         D cg c]	  } |        }}t        |       }|dk  rd}n
|dk  rd}nd}t        d||      D ]R  }| |||z    D ],  }|D ]%  }	|	j                  |      s|	j	                  |       ' . t        d |D              }
|
|k\  sR n= |D ]%  }	|	j                  d      s|	j	                  d       ' t        d	 |D              }
|rt        d
      }|j                  t        d| d|
 d|        |dkD  r8|j                  t        d| dd         |j                  t        d| dd         |D ]1  }|j                  t        |j                   d|j                          3 t        |
d      S c c}w )zw
    Compute a mess ratio given a decoded bytes sequence. The maximum threshold does stop the computation earlier.
    i      r   r      r   c              3  4   K   | ]  }|j                     y wr
   r;   r   dts     r   r   zmess_ratio.<locals>.<genexpr>       ;2bhh;   
c              3  4   K   | ]  }|j                     y wr
   r   r   s     r   r   zmess_ratio.<locals>.<genexpr>  r  r  charset_normalizerzIMess-detector extended-analysis start. intermediary_mean_mess_ratio_calc=z mean_mess_ratio=z maximum_threshold=   zStarting with: NzEnding with: iz: r   )r   lenranger3   r6   sumr   logr   	__class__r;   round)decoded_sequencemaximum_thresholddebugr   	detectorsseq_lenstepblock_startr2   detectormean_mess_ratiologgerr  s                r   
mess_ratior    s    GX*X(8:*XI*X '(G}	4Q. <)+d8JK 	-I% -$$Y/MM),-	-
 ;;;//< " 	$H  &d#	$ ;;;/0

1156GGX Y!!2 35	
 R<JJu0@"0E/FGHJJu.>su.E-FGH 	=BJJub
;<	= !$$] +Ys   E?)r   
str | Noner   r  r>   r?   )g?F)r  r=   r  rD   r  r?   r>   rD   )8
__future__r   sys	functoolsr   loggingr   version_infotypingr   typing_extensionsImportErrorconstantr   r   r   r   r   r   r   r   r   r   utilsr   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r*   __annotations__r,   rL   re   rq   r|   r   r   r   r   r   r   tuple__subclasses__r   r  r   r   r   <module>r'     sQ   " 
  v+      & '>I-	9EAS A!" !"H 4L'9 4L 4Ln O1 O O: E* E E4 -D&8 -D -D` :/( :/ :/z JA- JA JAZ "N* "N "NJ SP0 SP SPl #1 # #< 4FF2<F	F FT ;@ @/>>@@ ; 7 
 4IN5%5%.35%BF5%
5% 5%O  	s   E. .E:9E: