
    o[i 0                        d dl mZ d dlmZ d dlmZ d dlmZ d dlm	Z	m
Z
mZmZ ddlmZmZ ddlmZmZmZ  G d	 d
      Z G d d      Zeeef   Zee   Z G d d      Zy)    )annotations)aliases)dumps)sub)AnyIteratorListTuple   )RE_POSSIBLE_ENCODING_INDICATIONTOO_BIG_SEQUENCE)	iana_nameis_multi_byte_encodingunicode_rangec                     e Zd Z	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 ddZddZddZedd       ZddZddZ	ddZ
edd	       Zed d
       Zed!d       Zed!d       Zed d       Zedd       Zedd       Zedd       Zedd       Zedd       Zed"d       Zed#d       Zed!d       Zed d       Zed d       Zd$d%dZed&d       Zy)'CharsetMatchNc                    || _         || _        || _        || _        || _        d | _        g | _        d| _        d | _        d | _	        || _
        || _        y )N        )_payload	_encoding_mean_mess_ratio
_languages_has_sig_or_bom_unicode_ranges_leaves_mean_coherence_ratio_output_payload_output_encoding_string_preemptive_declaration)selfpayloadguessed_encodingmean_mess_ratiohas_sig_or_bom	languagesdecoded_payloadpreemptive_declarations           R/mnt/data/GridWatch/venv/lib/python3.12/site-packages/charset_normalizer/models.py__init__zCharsetMatch.__init__   s_      '.'6,5%315+-,/"-1,0#23I$    c                    t        |t              s)t        |t              rt        |      | j                  k(  S y| j                  |j                  k(  xr | j
                  |j
                  k(  S )NF)
isinstancer   strr   encodingfingerprintr!   others     r)   __eq__zCharsetMatch.__eq__)   sV    %.%% '4==88}}.X43C3CuGXGX3XXr+   c                   t        |t              st        t        | j                  |j                  z
        }t        | j
                  |j
                  z
        }|dk  r|dkD  r| j
                  |j
                  kD  S |dk  rS|dk  rNt        | j                        t        k\  r| j                  |j                  k  S | j                  |j                  kD  S | j                  |j                  k  S )zQ
        Implemented to make sorted available upon CharsetMatches items.
        g{Gz?g{Gz?)
r-   r   
ValueErrorabschaos	coherencelenr   r   multi_byte_usage)r!   r2   chaos_differencecoherence_differences       r)   __lt__zCharsetMatch.__lt__0   s     %."%djj5;;&>"?&)$..5??*J&K d"';d'B>>EOO33$)=)E 4==!%55zzEKK//((5+A+AAAzzEKK''r+   c                \    dt        t        |             t        | j                        z  z
  S )Ng      ?)r9   r.   rawr!   s    r)   r:   zCharsetMatch.multi_byte_usageF   s"    c#d)ns488}455r+   c                ~    | j                   &t        | j                  | j                  d      | _         | j                   S )Nstrict)r   r.   r   r   r@   s    r)   __str__zCharsetMatch.__str__J   s.    <<t}}dnnhGDL||r+   c                <    d| j                    d| j                   dS )Nz<CharsetMatch 'z' fp(z)>)r/   r0   r@   s    r)   __repr__zCharsetMatch.__repr__P   s"     uT5E5E4FbIIr+   c                    t        |t              r|| k(  r$t        dj                  |j                              d |_        | j                  j                  |       y )Nz;Unable to add instance <{}> as a submatch of a CharsetMatch)r-   r   r5   format	__class__r   r   appendr1   s     r)   add_submatchzCharsetMatch.add_submatchS   sO    %.%4-MTTOO  E"r+   c                    | j                   S N)r   r@   s    r)   r/   zCharsetMatch.encoding^   s    ~~r+   c                    g }t        j                         D ]G  \  }}| j                  |k(  r|j                  |       '| j                  |k(  s7|j                  |       I |S )z
        Encoding name are known by many name, using this could help when searching for IBM855 when it's listed as CP855.
        )r   itemsr/   rI   )r!   also_known_asups       r)   encoding_aliaseszCharsetMatch.encoding_aliasesb   s^    
 $&MMO 	(DAq}}!$$Q'!#$$Q'		(
 r+   c                    | j                   S rL   r   r@   s    r)   bomzCharsetMatch.bomo       ###r+   c                    | j                   S rL   rT   r@   s    r)   byte_order_markzCharsetMatch.byte_order_marks   rV   r+   c                F    | j                   D cg c]  }|d   	 c}S c c}w )z
        Return the complete list of possible languages found in decoded sequence.
        Usually not really useful. Returned list may be empty even if 'language' property return something != 'Unknown'.
        r   r   )r!   es     r)   r&   zCharsetMatch.languagesw   s      #oo.!...s   c                   | j                   shd| j                  v ryddlm}m} t        | j                        r || j                        n || j                        }t        |      dk(  sd|v ry|d   S | j                   d   d   S )z
        Most probable language found in decoded sequence. If none were detected or inferred, the property will return
        "Unknown".
        asciiEnglishr   )encoding_languagesmb_encoding_languageszLatin BasedUnknown)r   could_be_from_charsetcharset_normalizer.cdr_   r`   r   r/   r9   )r!   r_   r`   r&   s       r)   languagezCharsetMatch.language   s      $444  X *$--8 &dmm4'6  9~"my&@ Q<q!!$$r+   c                    | j                   S rL   )r   r@   s    r)   r7   zCharsetMatch.chaos   s    $$$r+   c                @    | j                   sy| j                   d   d   S )Nr   r   r   rZ   r@   s    r)   r8   zCharsetMatch.coherence   s     q!!$$r+   c                6    t        | j                  dz  d      S Nd      )ndigits)roundr7   r@   s    r)   percent_chaoszCharsetMatch.percent_chaos   s    TZZ#%q11r+   c                6    t        | j                  dz  d      S rh   )rl   r8   r@   s    r)   percent_coherencezCharsetMatch.percent_coherence   s    T^^c)155r+   c                    | j                   S )z+
        Original untouched bytes.
        )r   r@   s    r)   r?   zCharsetMatch.raw   s    
 }}r+   c                    | j                   S rL   )r   r@   s    r)   submatchzCharsetMatch.submatch   s    ||r+   c                2    t        | j                        dkD  S Nr   )r9   r   r@   s    r)   has_submatchzCharsetMatch.has_submatch   s    4<< 1$$r+   c                    | j                   | j                   S t        |       D cg c]  }t        |       }}t        t	        |D ch c]  }|s|	 c}            | _         | j                   S c c}w c c}w rL   )r   r.   r   sortedlist)r!   chardetected_rangesrs       r)   	alphabetszCharsetMatch.alphabets   sk    +'''MPQUY,WT]4-@,W,W%d+L!!A+L&MN### -X+Ls   A0A5A5c                p    | j                   g| j                  D cg c]  }|j                   c}z   S c c}w )z
        The complete list of encoding that output the exact SAME str result and therefore could be the originating
        encoding.
        This list does include the encoding available in property 'encoding'.
        )r   r   r/   )r!   ms     r)   rb   z"CharsetMatch.could_be_from_charset   s,     t||"D!1::"DDD"Ds   3c                6     j                    j                   |k7  rr| _         t               } j                  = j                  j                         dvr!t	        t
         fd|dd d      }||dd z   }|j                  |d       _         j                  S )z
        Method to get re-encoded bytes payload using given target encoding. Default to UTF-8.
        Any errors will be simply ignored by the encoder NOT replaced.
        N)zutf-8utf8utf_8c                    | j                   | j                         d   | j                         d    j                  | j                         d   t	        j
                        j                  dd            S )Nr   r   _-)stringspanreplacegroupsr   r   )r~   r!   s    r)   <lambda>z%CharsetMatch.output.<locals>.<lambda>   sY    ahhqvvx{QVVXa[AII
1!$"7"78@@cJ r+   i    r   )countr   )r   r.   r    lowerr   r   encoder   )r!   r/   decoded_stringpatched_headers   `   r)   outputzCharsetMatch.output   s    
   (D,A,AX,M$,D! YN,,80066812 "%3 #5D)" "0.2G!G#1#8#89#MD ###r+   c                *    t        t        |             S )z]
        Retrieve a hash fingerprint of the decoded payload, used for deduplication.
        )hashr.   r@   s    r)   r0   zCharsetMatch.fingerprint   s    
 CIr+   )NN)r"   bytesr#   r.   r$   floatr%   boolr&   CoherenceMatchesr'   
str | Noner(   r   )r2   objectreturnr   )r   r   r   r.   )r2   r   r   None)r   	list[str]r   r   )r   r   )r   zlist[CharsetMatch])r   )r/   r.   r   r   r   int)__name__
__module____qualname__r*   r3   r=   propertyr:   rC   rE   rJ   r/   rR   rU   rX   r&   rd   r7   r8   rm   ro   r?   rr   ru   r|   rb   r   r0    r+   r)   r   r      s    '+-1JJ J 	J
 J $J $J !+J8Y(, 6 6J	#   
 
 $ $ $ $ / / % %6 % % % %
 2 2 6 6     % % $ $ E E$:  r+   r   c                  R    e Zd ZdZdddZddZddZddZddZddZ	dd	Z
dd
Zy)CharsetMatchesz
    Container with every CharsetMatch items ordered by default from most probable to the less one.
    Act like a list(iterable) but does not implements all related methods.
    Nc                8    |rt        |      | _        y g | _        y rL   )rw   _results)r!   resultss     r)   r*   zCharsetMatches.__init__   s    ?FF7OBr+   c              #  8   K   | j                   E d {    y 7 wrL   r   r@   s    r)   __iter__zCharsetMatches.__iter__   s     ==  s   c                    t        |t              r| j                  |   S t        |t              r/t	        |d      }| j                  D ]  }||j
                  v s|c S  t        )z
        Retrieve a single item either by its position or encoding name (alias may be used here).
        Raise KeyError upon invalid index or encoding not present in results.
        F)r-   r   r   r.   r   rb   KeyError)r!   itemresults      r)   __getitem__zCharsetMatches.__getitem__   s`    
 dC ==&&dC T5)D-- "6777!M" r+   c                ,    t        | j                        S rL   r9   r   r@   s    r)   __len__zCharsetMatches.__len__  s    4==!!r+   c                2    t        | j                        dkD  S rt   r   r@   s    r)   __bool__zCharsetMatches.__bool__  s    4==!A%%r+   c                   t        |t              s-t        dj                  t	        |j
                                    t        |j                        t        k  rW| j                  D ]H  }|j                  |j                  k(  s|j                  |j                  k(  s7|j                  |        y | j                  j                  |       t        | j                        | _	        y)z~
        Insert a single match. Will be inserted accordingly to preserve sort.
        Can be inserted as a submatch.
        z-Cannot append instance '{}' to CharsetMatchesN)r-   r   r5   rG   r.   rH   r9   r?   r   r   r0   r7   rJ   rI   rw   )r!   r   matchs      r)   rI   zCharsetMatches.append  s    
 $-?FF'  txx=++ $$(8(88U[[DJJ=V&&t, 	T"t}}-r+   c                :    | j                   sy| j                   d   S )zQ
        Simply return the first match. Strict equivalent to matches[0].
        Nr   r   r@   s    r)   bestzCharsetMatches.best(  s     }}}}Qr+   c                "    | j                         S )zP
        Redundant method, call the method best(). Kept for BC reasons.
        )r   r@   s    r)   firstzCharsetMatches.first0  s     yy{r+   rL   )r   zlist[CharsetMatch] | None)r   zIterator[CharsetMatch])r   z	int | strr   r   r   r   )r   r   r   r   )r   zCharsetMatch | None)r   r   r   __doc__r*   r   r   r   r   rI   r   r   r   r+   r)   r   r      s0    
O!"&.( r+   r   c                  Z    e Zd Z	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZedd       ZddZy)CliDetectionResultc                    || _         |
| _        || _        || _        || _        || _        || _        || _        || _        |	| _	        || _
        y rL   )pathunicode_pathr/   rR   alternative_encodingsrd   r|   r%   r7   r8   is_preferred)r!   r   r/   rR   r   rd   r|   r%   r7   r8   r   r   s               r)   r*   zCliDetectionResult.__init__<  sV     	(4$,+;0E"%$-$2!
 )".r+   c                    | j                   | j                  | j                  | j                  | j                  | j
                  | j                  | j                  | j                  | j                  | j                  dS )Nr   r/   rR   r   rd   r|   r%   r7   r8   r   r   r   r@   s    r)   __dict__zCliDetectionResult.__dict__V  se     II $ 5 5%)%?%?"11ZZ -- --
 	
r+   c                2    t        | j                  dd      S )NT   )ensure_asciiindent)r   r   r@   s    r)   to_jsonzCliDetectionResult.to_jsonf  s    T]]a@@r+   N)r   r.   r/   r   rR   r   r   r   rd   r.   r|   r   r%   r   r7   r   r8   r   r   r   r   r   )r   zdict[str, Any]r   )r   r   r   r*   r   r   r   r   r+   r)   r   r   ;  s    // / $	/
  )/ / / / / / !/ /4 
 
Ar+   r   N)
__future__r   encodings.aliasesr   jsonr   rer   typingr   r   r	   r
   constantr   r   utilsr   r   r   r   r   r.   r   CoherenceMatchr   r   r   r+   r)   <module>r      sa    " %   - - G C Ce eP@ @F sEz"' ,A ,Ar+   