<!DOCTYPE html>
<html lang="en">

<head>
    <meta charset="UTF-8">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0-alpha1/dist/css/bootstrap.min.css" rel="stylesheet"
        integrity="sha384-GLhlTQ8iRABdZLl6O3oVMWSktQOp6b7In1Zl3/Jr59b6EGGoI1aFkw7cmDA6j6gD" crossorigin="anonymous">
    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.3.0/css/all.min.css"
        integrity="sha512-SzlrxWUlpfuzQ+pcUCosxcglQRNAq/DZjVsC0lE40xsADsfeQoEypE+enwcOiGjk/bSuGGKHEyjSoQ1zVisanQ=="
        crossorigin="anonymous" referrerpolicy="no-referrer" />
</head>
</html>
B
    <_9Yö  ã               @   s0   d dl Z d dlZddlmZ G dd„ deƒZdS )é    Né   )ÚProbingStatec               @   sn   e Zd ZdZddd„Zdd„ Zedd„ ƒZd	d
„ Zedd„ ƒZ	dd„ Z
edd„ ƒZedd„ ƒZedd„ ƒZdS )ÚCharSetProbergffffffî?Nc             C   s   d | _ || _t t¡| _d S )N)Ú_stateÚlang_filterÚloggingZ	getLoggerÚ__name__Zlogger)Úselfr   © r
   úF/opt/alt/python37/lib/python3.7/site-packages/chardet/charsetprober.pyÚ__init__'   s    zCharSetProber.__init__c             C   s   t j| _d S )N)r   Z	DETECTINGr   )r	   r
   r
   r   Úreset,   s    zCharSetProber.resetc             C   s   d S )Nr
   )r	   r
   r
   r   Úcharset_name/   s    zCharSetProber.charset_namec             C   s   d S )Nr
   )r	   Úbufr
   r
   r   Úfeed3   s    zCharSetProber.feedc             C   s   | j S )N)r   )r	   r
   r
   r   Ústate6   s    zCharSetProber.statec             C   s   dS )Ng        r
   )r	   r
   r
   r   Úget_confidence:   s    zCharSetProber.get_confidencec             C   s   t  dd| ¡} | S )Ns   ([ -])+ó    )ÚreÚsub)r   r
   r
   r   Úfilter_high_byte_only=   s    z#CharSetProber.filter_high_byte_onlyc             C   s`   t ƒ }t d| ¡}xH|D ]@}| |dd… ¡ |dd… }| ¡ sN|dk rNd}| |¡ qW |S )u9  
        We define three types of bytes:
        alphabet: english alphabets [a-zA-Z]
        international: international characters [Â€-Ã¿]
        marker: everything else [^a-zA-ZÂ€-Ã¿]

        The input buffer can be thought to contain a series of words delimited
        by markers. This function works to filter all words that contain at
        least one international character. All contiguous sequences of markers
        are replaced by a single space ascii character.

        This filter applies to all scripts which do not use English characters.
        s%   [a-zA-Z]*[€-ÿ]+[a-zA-Z]*[^a-zA-Z€-ÿ]?Néÿÿÿÿó   €r   )Ú	bytearrayr   ÚfindallÚextendÚisalpha)r   ÚfilteredÚwordsZwordZ	last_charr
   r
   r   Úfilter_international_wordsB   s    
z(CharSetProber.filter_international_wordsc             C   s¨   t ƒ }d}d}x~tt| ƒƒD ]n}| ||d … }|dkr>d}n|dkrJd}|dk r| ¡ s||kr‚|s‚| | ||… ¡ | d¡ |d }qW |s¤| | |d	… ¡ |S )
aÈ  
        Returns a copy of ``buf`` that retains only the sequences of English
        alphabet and high byte characters that are not between <> characters.
        Also retains English alphabet and high byte characters immediately
        before occurrences of >.

        This filter can be applied to all scripts which contain both English
        characters and extended ASCII characters, but is currently only used by
        ``Latin1Prober``.
        Fr   r   ó   >ó   <Tr   r   N)r   ÚrangeÚlenr   r   )r   r   Zin_tagÚprevZcurrZbuf_charr
   r
   r   Úfilter_with_english_lettersg   s"    
z)CharSetProber.filter_with_english_letters)N)r   Ú
__module__Ú__qualname__ZSHORTCUT_THRESHOLDr   r   Úpropertyr   r   r   r   Ústaticmethodr   r   r%   r
   r
   r
   r   r   #   s   
%r   )r   r   Zenumsr   Úobjectr   r
   r
   r
   r   Ú<module>   s   