
    `i1                        S SK Jr  S SKJr  S SKJr  S SKJr  S SKJ	r	J
r
JrJr  SSKJrJr  SSKJrJrJr   " S	 S
5      r " S S5      r\\\4   r\\   r " S S5      rg)    )annotations)aliases)dumps)sub)AnyIteratorListTuple   )RE_POSSIBLE_ENCODING_INDICATIONTOO_BIG_SEQUENCE)	iana_nameis_multi_byte_encodingunicode_rangec                     \ rS rSr  S             SS jjrSS jrSS jr\SS j5       rS S jr	S S jr
S!S	 jr\S S
 j5       r\S"S j5       r\S#S j5       r\S#S j5       r\S"S j5       r\S S j5       r\SS j5       r\SS j5       r\SS j5       r\SS j5       r\S$S j5       r\S%S j5       r\S#S j5       r\S"S j5       r\S"S j5       rS&S'S jjr\S(S j5       rSrg))CharsetMatch   Nc                    Xl         X l        X0l        XPl        X@l        S U l        / U l        SU l        S U l        S U l	        X`l
        Xpl        g )N        )_payload	_encoding_mean_mess_ratio
_languages_has_sig_or_bom_unicode_ranges_leaves_mean_coherence_ratio_output_payload_output_encoding_string_preemptive_declaration)selfpayloadguessed_encodingmean_mess_ratiohas_sig_or_bom	languagesdecoded_payloadpreemptive_declarations           WC:\Users\13-19\Desktop\python_web\travel\Lib\site-packages\charset_normalizer/models.py__init__CharsetMatch.__init__   sQ      '.'6,5%315+-,/"-1,0#23I$    c                    [        U[        5      (       d.  [        U[        5      (       a  [        U5      U R                  :H  $ gU R                  UR                  :H  =(       a    U R
                  UR
                  :H  $ )NF)
isinstancer   strr   encodingfingerprintr"   others     r*   __eq__CharsetMatch.__eq__)   s\    %..%%% '4==88}}.X43C3CuGXGX3XXr-   c                   [        U[        5      (       d  [        e[        U R                  UR                  -
  5      n[        U R
                  UR
                  -
  5      nUS:  a  US:  a  U R
                  UR
                  :  $ US:  aU  US::  aO  [        U R                  5      [        :  a  U R                  UR                  :  $ U R                  UR                  :  $ U R                  UR                  :  $ )zA
Implemented to make sorted available upon CharsetMatches items.
g{Gz?g{Gz?)
r/   r   
ValueErrorabschaos	coherencelenr   r   multi_byte_usage)r"   r4   chaos_differencecoherence_differences       r*   __lt__CharsetMatch.__lt__0   s     %.."%djj5;;&>"?&)$..5??*J&K d"';d'B>>EOO33$)=)E 4==!%55zzEKK//((5+A+AAAzzEKK''r-   c                \    S[        [        U 5      5      [        U R                  5      -  -
  $ )Ng      ?)r<   r0   rawr"   s    r*   r=   CharsetMatch.multi_byte_usageF   s"    c#d)ns488}455r-   c                    U R                   c&  [        U R                  U R                  S5      U l         U R                   $ )Nstrict)r    r0   r   r   rD   s    r*   __str__CharsetMatch.__str__J   s.    <<t}}dnnhGDL||r-   c                <    SU R                    SU R                   S3$ )Nz<CharsetMatch 'z' fp(z)>)r1   r2   rD   s    r*   __repr__CharsetMatch.__repr__P   s"     uT5E5E4FbIIr-   c                    [        U[        5      (       a  X:X  a$  [        SR                  UR                  5      5      eS Ul        U R                  R                  U5        g )Nz;Unable to add instance <{}> as a submatch of a CharsetMatch)r/   r   r8   format	__class__r    r   appendr3   s     r*   add_submatchCharsetMatch.add_submatchS   sP    %..%-MTTOO  E"r-   c                    U R                   $ N)r   rD   s    r*   r1   CharsetMatch.encoding^   s    ~~r-   c                    / n[         R                  " 5        HK  u  p#U R                  U:X  a  UR                  U5        M(  U R                  U:X  d  M:  UR                  U5        MM     U$ )zr
Encoding name are known by many name, using this could help when searching for IBM855 when it's listed as CP855.
)r   itemsr1   rP   )r"   also_known_asups       r*   encoding_aliasesCharsetMatch.encoding_aliasesb   sW    
 $&MMODA}}!$$Q'!#$$Q'	 $
 r-   c                    U R                   $ rT   r   rD   s    r*   bomCharsetMatch.bomo       ###r-   c                    U R                   $ rT   r^   rD   s    r*   byte_order_markCharsetMatch.byte_order_marks   ra   r-   c                J    U R                    Vs/ s H  oS   PM	     sn$ s  snf )z
Return the complete list of possible languages found in decoded sequence.
Usually not really useful. Returned list may be empty even if 'language' property return something != 'Unknown'.
r   r   )r"   es     r*   r'   CharsetMatch.languagesw   s"     #oo.o!o...s    c                ,   U R                   (       dr  SU R                  ;   a  gSSKJnJn  [        U R                  5      (       a  U" U R                  5      OU" U R                  5      n[        U5      S:X  d  SU;   a  gUS   $ U R                   S   S   $ )zz
Most probable language found in decoded sequence. If none were detected or inferred, the property will return
"Unknown".
asciiEnglishr   )encoding_languagesmb_encoding_languageszLatin BasedUnknown)r   could_be_from_charsetcharset_normalizer.cdrl   rm   r   r1   r<   )r"   rl   rm   r'   s       r*   languageCharsetMatch.language   s      $444  X *$--88 &dmm4'6  9~"my&@ Q<q!!$$r-   c                    U R                   $ rT   )r   rD   s    r*   r:   CharsetMatch.chaos   s    $$$r-   c                J    U R                   (       d  gU R                   S   S   $ )Nr   r   r   rf   rD   s    r*   r;   CharsetMatch.coherence   s     q!!$$r-   c                0    [        U R                  S-  SS9$ Nd      )ndigits)roundr:   rD   s    r*   percent_chaosCharsetMatch.percent_chaos   s    TZZ#%q11r-   c                0    [        U R                  S-  SS9$ rx   )r|   r;   rD   s    r*   percent_coherenceCharsetMatch.percent_coherence   s    T^^c)155r-   c                    U R                   $ )z
Original untouched bytes.
)r   rD   s    r*   rC   CharsetMatch.raw   s    
 }}r-   c                    U R                   $ rT   )r   rD   s    r*   submatchCharsetMatch.submatch   s    ||r-   c                2    [        U R                  5      S:  $ Nr   )r<   r   rD   s    r*   has_submatchCharsetMatch.has_submatch   s    4<< 1$$r-   c                   U R                   b  U R                   $ [        U 5       Vs/ s H  n[        U5      PM     nn[        [	        U Vs1 s H  o3(       d  M  UiM     sn5      5      U l         U R                   $ s  snf s  snf rT   )r   r0   r   sortedlist)r"   chardetected_rangesrs       r*   	alphabetsCharsetMatch.alphabets   sp    +'''MPQUY,WYT]4-@Y,W%d+L!!A+L&MN### -X+Ls   A<
BBc                t    U R                   /U R                   Vs/ s H  oR                  PM     sn-   $ s  snf )z
The complete list of encoding that output the exact SAME str result and therefore could be the originating
encoding.
This list does include the encoding available in property 'encoding'.
)r   r   r1   )r"   ms     r*   ro   "CharsetMatch.could_be_from_charset   s.     t||"D|!::|"DDD"Ds   5c                :  ^  T R                   b  T R                   U:w  ar  UT l         [        T 5      nT R                  b<  T R                  R                  5       S;  a  [	        [
        U 4S jUSS SS9nX2SS -   nUR                  US5      T l        T R                  $ )z
Method to get re-encoded bytes payload using given target encoding. Default to UTF-8.
Any errors will be simply ignored by the encoder NOT replaced.
N)zutf-8utf8utf_8c                   > U R                   U R                  5       S   U R                  5       S    R                  U R                  5       S   [	        TR
                  5      R                  SS5      5      $ )Nr   r   _-)stringspanreplacegroupsr   r   )r   r"   s    r*   <lambda>%CharsetMatch.output.<locals>.<lambda>   sW    ahhqvvx{QVVXa[AII
1!$"7"78@@cJr-   i    r   )countr   )r   r0   r!   lowerr   r   encoder   )r"   r1   decoded_stringpatched_headers   `   r*   outputCharsetMatch.output   s    
   (D,A,AX,M$,D! YN,,80066812 "%3 #5D)" "02G!G#1#8#89#MD ###r-   c                *    [        [        U 5      5      $ )zM
Retrieve a hash fingerprint of the decoded payload, used for deduplication.
)hashr0   rD   s    r*   r2   CharsetMatch.fingerprint   s    
 CIr-   )r   r   r   r   r   r   r   r   r   r!   r    r   )NN)r#   bytesr$   r0   r%   floatr&   boolr'   CoherenceMatchesr(   
str | Noner)   r   )r4   objectreturnr   )r   r   r   r0   )r4   r   r   None)r   	list[str]r   r   )r   r   )r   zlist[CharsetMatch])r   )r1   r0   r   r   r   int)__name__
__module____qualname____firstlineno__r+   r5   r@   propertyr=   rH   rK   rQ   r1   r[   r_   rc   r'   rq   r:   r;   r}   r   rC   r   r   r   ro   r   r2   __static_attributes__ r-   r*   r   r      s    '+-1JJ J 	J
 J $J $J !+J8Y(, 6 6J	#   
 
 $ $ $ $ / / % %6 % % % %
 2 2 6 6     % % $ $ E E$:  r-   r   c                  l    \ rS rSrSrSSS jjrSS jrSS jrSS jrSS jr	SS	 jr
SS
 jrSS jrSrg)CharsetMatches   z
Container with every CharsetMatch items ordered by default from most probable to the less one.
Act like a list(iterable) but does not implements all related methods.
Nc                B    U(       a  [        U5      U l        g / U l        g rT   )r   _results)r"   resultss     r*   r+   CharsetMatches.__init__   s    ?FF7OBr-   c              #  8   #    U R                    S h  vN   g  N7frT   r   rD   s    r*   __iter__CharsetMatches.__iter__   s     ==  s   c                    [        U[        5      (       a  U R                  U   $ [        U[        5      (       a2  [	        US5      nU R                   H  nXR
                  ;   d  M  Us  $    [        e)z
Retrieve a single item either by its position or encoding name (alias may be used here).
Raise KeyError upon invalid index or encoding not present in results.
F)r/   r   r   r0   r   ro   KeyError)r"   itemresults      r*   __getitem__CharsetMatches.__getitem__   s_    
 dC  ==&&dC  T5)D--777!M ( r-   c                ,    [        U R                  5      $ rT   r<   r   rD   s    r*   __len__CharsetMatches.__len__  s    4==!!r-   c                2    [        U R                  5      S:  $ r   r   rD   s    r*   __bool__CharsetMatches.__bool__  s    4==!A%%r-   c                   [        U[        5      (       d-  [        SR                  [	        UR
                  5      5      5      e[        UR                  5      [        :  a\  U R                   HL  nUR                  UR                  :X  d  M  UR                  UR                  :X  d  M;  UR                  U5          g   U R                  R                  U5        [        U R                  5      U l	        g)zf
Insert a single match. Will be inserted accordingly to preserve sort.
Can be inserted as a submatch.
z-Cannot append instance '{}' to CharsetMatchesN)r/   r   r8   rN   r0   rO   r<   rC   r   r   r2   r:   rQ   rP   r   )r"   r   matchs      r*   rP   CharsetMatches.append  s    
 $--?FF'  txx=++$$(8(88U[[DJJ=V&&t, ' 	T"t}}-r-   c                D    U R                   (       d  gU R                   S   $ )zA
Simply return the first match. Strict equivalent to matches[0].
Nr   r   rD   s    r*   bestCharsetMatches.best(  s     }}}}Qr-   c                "    U R                  5       $ )z@
Redundant method, call the method best(). Kept for BC reasons.
)r   rD   s    r*   firstCharsetMatches.first0  s     yy{r-   r   rT   )r   zlist[CharsetMatch] | None)r   zIterator[CharsetMatch])r   z	int | strr   r   r   r   )r   r   r   r   )r   zCharsetMatch | None)r   r   r   r   __doc__r+   r   r   r   r   rP   r   r   r   r   r-   r*   r   r      s0    
O!"&.( r-   r   c                  h    \ rS rSr                      SS jr\SS j5       rS	S jrSrg)
CliDetectionResulti;  c                    Xl         Xl        X l        X0l        X@l        XPl        X`l        Xpl        Xl        Xl	        Xl
        g rT   )pathunicode_pathr1   r[   alternative_encodingsrq   r   r&   r:   r;   is_preferred)r"   r   r1   r[   r   rq   r   r&   r:   r;   r   r   s               r*   r+   CliDetectionResult.__init__<  s@     	(4$,+;0E"%$-$2!
 )".r-   c                    U R                   U R                  U R                  U R                  U R                  U R
                  U R                  U R                  U R                  U R                  U R                  S.$ )Nr   r1   r[   r   rq   r   r&   r:   r;   r   r   r   rD   s    r*   __dict__CliDetectionResult.__dict__V  se     II $ 5 5%)%?%?"11ZZ -- --
 	
r-   c                ,    [        U R                  SSS9$ )NT   )ensure_asciiindent)r   r   rD   s    r*   to_jsonCliDetectionResult.to_jsonf  s    T]]a@@r-   )r   r   r:   r;   r1   r[   r&   r   rq   r   r   N)r   r0   r1   r   r[   r   r   r   rq   r0   r   r   r&   r   r:   r   r;   r   r   r   r   r   )r   zdict[str, Any]r   )	r   r   r   r   r+   r   r   r   r   r   r-   r*   r   r   ;  s    // / $	/
  )/ / / / / / !/ /4 
 
Ar-   r   N)
__future__r   encodings.aliasesr   jsonr   rer   typingr   r   r	   r
   constantr   r   utilsr   r   r   r   r   r0   r   CoherenceMatchr   r   r   r-   r*   <module>r      sa    " %   - - G C Ce eP@ @F sEz"' ,A ,Ar-   