<!DOCTYPE html>
<html lang="en">

<head>
    <meta charset="UTF-8">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0-alpha1/dist/css/bootstrap.min.css" rel="stylesheet"
        integrity="sha384-GLhlTQ8iRABdZLl6O3oVMWSktQOp6b7In1Zl3/Jr59b6EGGoI1aFkw7cmDA6j6gD" crossorigin="anonymous">
    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.3.0/css/all.min.css"
        integrity="sha512-SzlrxWUlpfuzQ+pcUCosxcglQRNAq/DZjVsC0lE40xsADsfeQoEypE+enwcOiGjk/bSuGGKHEyjSoQ1zVisanQ=="
        crossorigin="anonymous" referrerpolicy="no-referrer" />
</head>
</html>
U
    ʗRe3                     @   s   d Z ddlZddlZddlZddlmZ ddlmZmZm	Z	 ddl
mZ ddlmZ ddlmZ dd	lmZ dd
lmZ G dd dZdS )a  
Module containing the UniversalDetector detector class, which is the primary
class a user of ``chardet`` should use.

:author: Mark Pilgrim (initial port to Python)
:author: Shy Shalom (original C code)
:author: Dan Blanchard (major refactoring for 3.0)
:author: Ian Cordasco
    N   )CharSetGroupProber)
InputStateLanguageFilterProbingState)EscCharSetProber)Latin1Prober)MBCSGroupProber)SBCSGroupProber)UTF1632Proberc                	   @   s   e Zd ZdZdZedZedZedZ	dddd	d
ddddZ
ejfddZedd Zedd Zedd Zdd Zdd Zdd ZdS )UniversalDetectoraq  
    The ``UniversalDetector`` class underlies the ``chardet.detect`` function
    and coordinates all of the different charset probers.

    To get a ``dict`` containing an encoding and its confidence, you can simply
    run:

    .. code::

            u = UniversalDetector()
            u.feed(some_bytes)
            u.close()
            detected = u.result

    g?s   [-]s   (|~{)s   [-]zWindows-1252zWindows-1250zWindows-1251zWindows-1256zWindows-1253zWindows-1255zWindows-1254zWindows-1257)z
iso-8859-1z
iso-8859-2z
iso-8859-5z
iso-8859-6z
iso-8859-7z
iso-8859-8z
iso-8859-9ziso-8859-13c                 C   sT   d | _ d | _g | _d | _d | _d | _d | _d | _|| _t	
t| _d | _|   d S N)_esc_charset_prober_utf1632_prober_charset_probersresultdone	_got_data_input_state
_last_charlang_filterlogging	getLogger__name__logger_has_win_bytesreset)selfr    r   /builddir/build/BUILDROOT/alt-python38-pip-22.2.1-2.el8.x86_64/opt/alt/python38/lib/python3.8/site-packages/pip/_vendor/chardet/universaldetector.py__init__T   s    zUniversalDetector.__init__c                 C   s   | j S r   )r   r   r   r   r   input_stateb   s    zUniversalDetector.input_statec                 C   s   | j S r   )r   r!   r   r   r   has_win_bytesf   s    zUniversalDetector.has_win_bytesc                 C   s   | j S r   )r   r!   r   r   r   charset_probersj   s    z!UniversalDetector.charset_probersc                 C   sf   dddd| _ d| _d| _d| _tj| _d| _| jr>| j	  | j
rN| j
	  | jD ]}|	  qTdS )z
        Reset the UniversalDetector and all of its probers back to their
        initial states.  This is called by ``__init__``, so you only need to
        call this directly in between analyses of different documents.
        N        encoding
confidencelanguageF    )r   r   r   r   r   
PURE_ASCIIr   r   r   r   r   r   )r   proberr   r   r   r   n   s    


zUniversalDetector.resetc                 C   s  | j r
dS |sdS t|ts$t|}| js|tjrFdddd| _nv|tjtj	frhdddd| _nT|drdddd| _n:|d	rd
ddd| _n |tj
tjfrdddd| _d| _| jd dk	rd| _ dS | jtjkr(| j|rtj| _n*| jtjkr(| j| j| r(tj| _|dd | _| jsFt | _| jjtjkr| j|tjkr| jj| j dd| _d| _ dS | jtjkr| jst| j | _| j|tjkr| jj| j | jj!d| _d| _ n| jtjkr| j"s4t#| j g| _"| j t$j%@ r&| j"&t'  | j"&t(  | j"D ]:}||tjkr:|j| |j!d| _d| _  qvq:| j)|rd| _*dS )a  
        Takes a chunk of a document and feeds it through all of the relevant
        charset probers.

        After calling ``feed``, you can check the value of the ``done``
        attribute to see if you need to continue feeding the
        ``UniversalDetector`` more data, or if it has made a prediction
        (in the ``result`` attribute).

        .. note::
           You should always call ``close`` when you're done feeding in your
           document if ``done`` is not already ``True``.
        Nz	UTF-8-SIG      ? r&   zUTF-32s     zX-ISO-10646-UCS-4-3412s     zX-ISO-10646-UCS-4-2143zUTF-16Tr'   )+r   
isinstance	bytearrayr   
startswithcodecsBOM_UTF8r   BOM_UTF32_LEBOM_UTF32_BEBOM_LEBOM_BEr   r   r+   HIGH_BYTE_DETECTORsearch	HIGH_BYTEESC_DETECTORr   	ESC_ASCIIr   r   stater   	DETECTINGfeedFOUND_ITcharset_nameget_confidencer   r   r   r)   r   r	   r   NON_CJKappendr
   r   WIN_BYTE_DETECTORr   )r   byte_strr,   r   r   r   r@      s    









zUniversalDetector.feedc           	   	   C   st  | j r| jS d| _ | js&| jd n| jtjkrBdddd| _n| jtjkrd}d}d}| j	D ]"}|sjq`|
 }||kr`|}|}q`|r|| jkr|j}|j }|
 }|d	r| jr| j||}|||jd| _| j tjkrn| jd
 dkrn| jd | j	D ]`}|sqt|trP|jD ] }| jd|j|j|
  q,n| jd|j|j|
  q| jS )z
        Stop analyzing the current document and come up with a final
        prediction.

        :returns:  The ``result`` attribute, a ``dict`` with the keys
                   `encoding`, `confidence`, and `language`.
        Tzno data received!asciir-   r.   r&   Nr%   ziso-8859r'   z no probers hit minimum thresholdz%s %s confidence = %s)r   r   r   r   debugr   r   r+   r;   r   rC   MINIMUM_THRESHOLDrB   lowerr2   r   ISO_WIN_MAPgetr)   getEffectiveLevelr   DEBUGr0   r   probers)	r   prober_confidencemax_prober_confidence
max_proberr,   rB   lower_charset_namer(   group_proberr   r   r   close  sj    	


 


zUniversalDetector.closeN)r   
__module____qualname____doc__rJ   recompiler9   r<   rF   rL   r   ALLr    propertyr"   r#   r$   r   r@   rV   r   r   r   r   r   4   s2   





 r   )rY   r3   r   rZ   charsetgroupproberr   enumsr   r   r   	escproberr   latin1proberr   mbcsgroupproberr	   sbcsgroupproberr
   utf1632proberr   r   r   r   r   r   <module>   s   