<!DOCTYPE html>
<html lang="en">

<head>
    <meta charset="UTF-8">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0-alpha1/dist/css/bootstrap.min.css" rel="stylesheet"
        integrity="sha384-GLhlTQ8iRABdZLl6O3oVMWSktQOp6b7In1Zl3/Jr59b6EGGoI1aFkw7cmDA6j6gD" crossorigin="anonymous">
    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.3.0/css/all.min.css"
        integrity="sha512-SzlrxWUlpfuzQ+pcUCosxcglQRNAq/DZjVsC0lE40xsADsfeQoEypE+enwcOiGjk/bSuGGKHEyjSoQ1zVisanQ=="
        crossorigin="anonymous" referrerpolicy="no-referrer" />
</head>
</html>
3
Re,                @   s   d dl mZmZmZ d dlmZ d dlmZm	Z	 d dl
mZ ddlmZ ddlmZ ddlmZmZ dd	lmZmZmZ dd
lmZmZ ddlmZ ddlmZ ddlmZ eeZedkreZne	ZG dd deZdS )    )absolute_importdivisionunicode_literals)unichr)dequeOrderedDict)version_info   )spaceCharacters)entities)asciiLettersasciiUpper2Lower)digits	hexDigitsEOF)
tokenTypestagTokenTypes)replacementCharacters)HTMLInputStream)Trie      c                   sd  e Zd ZdZd fdd	Zdd Zdd Zdd
dZdd Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zd d! Zd"d# Zd$d% Zd&d' Zd(d) Zd*d+ Zd,d- Zd.d/ Zd0d1 Zd2d3 Zd4d5 Zd6d7 Zd8d9 Zd:d; Zd<d= Z d>d? Z!d@dA Z"dBdC Z#dDdE Z$dFdG Z%dHdI Z&dJdK Z'dLdM Z(dNdO Z)dPdQ Z*dRdS Z+dTdU Z,dVdW Z-dXdY Z.dZd[ Z/d\d] Z0d^d_ Z1d`da Z2dbdc Z3ddde Z4dfdg Z5dhdi Z6djdk Z7dldm Z8dndo Z9dpdq Z:drds Z;dtdu Z<dvdw Z=dxdy Z>dzd{ Z?d|d} Z@d~d ZAdd ZBdd ZCdd ZDdd ZEdd ZFdd ZGdd ZHdd ZIdd ZJdd ZKdd ZL  ZMS )HTMLTokenizera	   This class takes care of tokenizing HTML.

    * self.currentToken
      Holds the token that is currently being processed.

    * self.state
      Holds a reference to the method to be invoked... XXX

    * self.stream
      Points to HTMLInputStream object.
    Nc                sF   t |f|| _|| _d| _g | _| j| _d| _d | _t	t
| j  d S )NF)r   streamparser
escapeFlaglastFourChars	dataStatestateescapecurrentTokensuperr   __init__)selfr   r   kwargs)	__class__ /builddir/build/BUILDROOT/alt-python36-pip-20.2.4-5.el8.x86_64/opt/alt/python36/lib/python3.6/site-packages/pip/_vendor/html5lib/_tokenizer.pyr"   (   s    zHTMLTokenizer.__init__c             c   s\   t g | _xL| j rVx&| jjr:td | jjjddV  qW x| jrR| jj V  q>W qW dS )z This is where the magic happens.

        We do our usually processing through the states and when we have a token
        to return we yield the token which pauses processing until the next token
        is requested.
        
ParseErrorr   )typedataN)r   
tokenQueuer   r   errorsr   poppopleft)r#   r&   r&   r'   __iter__7   s    


zHTMLTokenizer.__iter__c       	   %   C   s(  t }d}|rt}d}g }| jj }x(||krJ|tk	rJ|j| | jj }q$W tdj||}|tkrt| }| j	jt
d dd|id nld|  kod	kn  s|d
krd}| j	jt
d dd|id n(d|  kodkn  sd|  kodkn  sd|  kodkn  sd|  ko4dkn  s|tddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d
g#kr| j	jt
d dd|id yt|}W n> tk
r   |d6 }td|d? B td7|d8@ B  }Y nX |d9kr$| j	jt
d d:d; | jj| |S )<zThis function returns either U+FFFD or the character based on the
        decimal or hexadecimal representation. It also discards ";" if present.
        If not present self.tokenQueue.append({"type": tokenTypes["ParseError"]}) is invoked.
        
       r(   z$illegal-codepoint-for-numeric-entity	charAsInt)r)   r*   datavarsi   i  i u   �r	                  i  i     i  i  i i i i i i i i i i i i i i i i i	 i	 i
 i
 i i i i i i i i i i i i   i   i  ;z numeric-entity-without-semicolon)r)   r*   )r   r   r   charr   appendintjoinr   r+   r   	frozensetchr
ValueErrorunget)	r#   isHexallowedradix	charStackcr3   r<   vr&   r&   r'   consumeNumberEntityG   s`    

&

z!HTMLTokenizer.consumeNumberEntityFc       	      C   s  d}| j j g}|d tksB|d tddfksB|d k	rV||d krV| j j|d  n"|d dkrd}|j| j j  |d dkrd	}|j| j j  |r|d tks| r|d tkr| j j|d  | j|}n4| j	jt
d
 dd | j j|j  ddj| }njx8|d tk	rFtjdj|s2P |j| j j  qW y$tjdj|d d }t|}W n tk
r   d }Y nX |d k	rD|d dkr| j	jt
d
 dd |d dkr|r|| tks|| tks|| dkr| j j|j  ddj| }n.t| }| j j|j  |dj||d  7 }n4| j	jt
d
 dd | j j|j  ddj| }|r| jd d d  |7  < n*|tkrd}nd}| j	jt
| |d d S )N&r   <#Fr	   xXTr(   zexpected-numeric-entity)r)   r*   r2   r;   znamed-entity-without-semicolon=zexpected-named-entityr*   SpaceCharacters
Characters)rN   rO   rS   rS   rS   rS   rS   rS   rS   rS   )r   r<   r
   r   rC   r=   r   r   rJ   r+   r   r-   r?   entitiesTriehas_keys_with_prefixlongest_prefixlenKeyErrorr   r   r    )	r#   allowedCharfromAttributeoutputrG   hex
entityNameentityLength	tokenTyper&   r&   r'   consumeEntity   sf    





zHTMLTokenizer.consumeEntityc             C   s   | j |dd dS )zIThis method replaces the need for "entityInAttributeValueState".
        T)rY   rZ   N)r`   )r#   rY   r&   r&   r'   processEntityInAttribute   s    z&HTMLTokenizer.processEntityInAttributec             C   s   | j }|d tkr|d jt|d< |d td krp|d }t|}t|t|krh|j|ddd  ||d< |d td kr|d r| jj	td d	d
 |d r| jj	td dd
 | jj	| | j
| _dS )zThis method is a generic handler for emitting the tags. It also sets
        the state to "data" because that's what's needed after a token has been
        emitted.
        r)   nameStartTagr*   Nr	   EndTagr(   zattributes-in-end-tag)r)   r*   selfClosingzself-closing-flag-on-end-tagrS   )r    r   	translater   r   attributeMaprW   updater+   r=   r   r   )r#   tokenrawr*   r&   r&   r'   emitCurrentToken   s$    

zHTMLTokenizer.emitCurrentTokenc             C   s   | j j }|dkr| j| _n|dkr.| j| _n|dkrd| jjtd dd | jjtd dd n`|tkrpdS |t	kr| jjtd	 || j j
t	d
 d n&| j j
d}| jjtd || d d
S )NrK   rL    r(   zinvalid-codepoint)r)   r*   rR   FrQ   T)rK   rL   rl   )r   r<   entityDataStater   tagOpenStater+   r=   r   r   r
   
charsUntil)r#   r*   charsr&   r&   r'   r      s&    



zHTMLTokenizer.dataStatec             C   s   | j   | j| _dS )NT)r`   r   r   )r#   r&   r&   r'   rm     s    zHTMLTokenizer.entityDataStatec             C   s   | j j }|dkr| j| _n|dkr.| j| _n|tkr:dS |dkrp| jjtd dd | jjtd d	d nT|t	kr| jjtd
 || j j
t	d d n&| j j
d}| jjtd || d dS )NrK   rL   Frl   r(   zinvalid-codepoint)r)   r*   rR   u   �rQ   T)rK   rL   rl   )r   r<   characterReferenceInRcdatar   rcdataLessThanSignStater   r+   r=   r   r
   ro   )r#   r*   rp   r&   r&   r'   rcdataState"  s&    



zHTMLTokenizer.rcdataStatec             C   s   | j   | j| _dS )NT)r`   rs   r   )r#   r&   r&   r'   rq   ?  s    z(HTMLTokenizer.characterReferenceInRcdatac             C   s   | j j }|dkr| j| _nh|dkrR| jjtd dd | jjtd dd n2|tkr^dS | j jd
}| jjtd || d d	S )NrL   rl   r(   zinvalid-codepoint)r)   r*   rR   u   �FT)rL   rl   )	r   r<   rawtextLessThanSignStater   r+   r=   r   r   ro   )r#   r*   rp   r&   r&   r'   rawtextStateD  s    


zHTMLTokenizer.rawtextStatec             C   s   | j j }|dkr| j| _nh|dkrR| jjtd dd | jjtd dd n2|tkr^dS | j jd
}| jjtd || d d	S )NrL   rl   r(   zinvalid-codepoint)r)   r*   rR   u   �FT)rL   rl   )	r   r<   scriptDataLessThanSignStater   r+   r=   r   r   ro   )r#   r*   rp   r&   r&   r'   scriptDataStateV  s    


zHTMLTokenizer.scriptDataStatec             C   sr   | j j }|tkrdS |dkrL| jjtd dd | jjtd dd n"| jjtd || j jd d dS )	NFrl   r(   zinvalid-codepoint)r)   r*   rR   u   �T)r   r<   r   r+   r=   r   ro   )r#   r*   r&   r&   r'   plaintextStateh  s    

zHTMLTokenizer.plaintextStatec             C   s  | j j }|dkr| j| _n|dkr.| j| _n|tkrVtd |g ddd| _| j| _n|dkr| j	j
td dd	 | j	j
td
 dd	 | j| _nt|dkr| j	j
td dd	 | j j| | j| _n@| j	j
td dd	 | j	j
td
 dd	 | j j| | j| _dS )N!/rc   F)r)   rb   r*   re   selfClosingAcknowledged>r(   z'expected-tag-name-but-got-right-bracket)r)   r*   rR   z<>?z'expected-tag-name-but-got-question-markzexpected-tag-namerL   T)r   r<   markupDeclarationOpenStater   closeTagOpenStater   r   r    tagNameStater+   r=   r   rC   bogusCommentState)r#   r*   r&   r&   r'   rn   w  s6    









zHTMLTokenizer.tagOpenStatec             C   s   | j j }|tkr0td |g dd| _| j| _n|dkrX| jjtd dd | j	| _nn|t
kr| jjtd dd | jjtd	 d
d | j	| _n0| jjtd dd|id | j j| | j| _dS )Nrd   F)r)   rb   r*   re   r|   r(   z*expected-closing-tag-but-got-right-bracket)r)   r*   z expected-closing-tag-but-got-eofrR   z</z!expected-closing-tag-but-got-charr*   )r)   r*   r4   T)r   r<   r   r   r    r   r   r+   r=   r   r   rC   r   )r#   r*   r&   r&   r'   r     s(    





zHTMLTokenizer.closeTagOpenStatec             C   s   | j j }|tkr| j| _n|dkr.| j  n~|tkrV| jjt	d dd | j
| _nV|dkrh| j| _nD|dkr| jjt	d dd | jd  d	7  < n| jd  |7  < d
S )Nr|   r(   zeof-in-tag-name)r)   r*   rz   rl   zinvalid-codepointrb   u   �T)r   r<   r
   beforeAttributeNameStater   rk   r   r+   r=   r   r   selfClosingStartTagStater    )r#   r*   r&   r&   r'   r     s"    






zHTMLTokenizer.tagNameStatec             C   sP   | j j }|dkr"d| _| j| _n*| jjtd dd | j j| | j	| _dS )Nrz   r2   rR   rL   )r)   r*   T)
r   r<   temporaryBufferrcdataEndTagOpenStater   r+   r=   r   rC   rs   )r#   r*   r&   r&   r'   rr     s    

z%HTMLTokenizer.rcdataLessThanSignStatec             C   sX   | j j }|tkr*|  j|7  _| j| _n*| jjtd dd | j j	| | j
| _dS )NrR   z</)r)   r*   T)r   r<   r   r   rcdataEndTagNameStater   r+   r=   r   rC   rs   )r#   r*   r&   r&   r'   r     s    

z#HTMLTokenizer.rcdataEndTagOpenStatec             C   s   | j o| j d j | jj k}| jj }|tkrT|rTtd | jg dd| _ | j| _n|dkr|rtd | jg dd| _ | j	| _n||dkr|rtd | jg dd| _ | j
  | j| _nH|tkr|  j|7  _n0| jjtd d| j d	 | jj| | j| _d
S )Nrb   rd   F)r)   rb   r*   re   rz   r|   rR   z</)r)   r*   T)r    lowerr   r   r<   r
   r   r   r   r   rk   r   r   r+   r=   rC   rs   )r#   appropriater*   r&   r&   r'   r     s2    



z#HTMLTokenizer.rcdataEndTagNameStatec             C   sP   | j j }|dkr"d| _| j| _n*| jjtd dd | j j| | j	| _dS )Nrz   r2   rR   rL   )r)   r*   T)
r   r<   r   rawtextEndTagOpenStater   r+   r=   r   rC   ru   )r#   r*   r&   r&   r'   rt     s    

z&HTMLTokenizer.rawtextLessThanSignStatec             C   sX   | j j }|tkr*|  j|7  _| j| _n*| jjtd dd | j j	| | j
| _dS )NrR   z</)r)   r*   T)r   r<   r   r   rawtextEndTagNameStater   r+   r=   r   rC   ru   )r#   r*   r&   r&   r'   r     s    

z$HTMLTokenizer.rawtextEndTagOpenStatec             C   s   | j o| j d j | jj k}| jj }|tkrT|rTtd | jg dd| _ | j| _n|dkr|rtd | jg dd| _ | j	| _n||dkr|rtd | jg dd| _ | j
  | j| _nH|tkr|  j|7  _n0| jjtd d| j d	 | jj| | j| _d
S )Nrb   rd   F)r)   rb   r*   re   rz   r|   rR   z</)r)   r*   T)r    r   r   r   r<   r
   r   r   r   r   rk   r   r   r+   r=   rC   ru   )r#   r   r*   r&   r&   r'   r     s2    



z$HTMLTokenizer.rawtextEndTagNameStatec             C   sx   | j j }|dkr"d| _| j| _nR|dkrJ| jjtd dd | j| _n*| jjtd dd | j j	| | j
| _dS )	Nrz   r2   ry   rR   z<!)r)   r*   rL   T)r   r<   r   scriptDataEndTagOpenStater   r+   r=   r   scriptDataEscapeStartStaterC   rw   )r#   r*   r&   r&   r'   rv   ,  s    


z)HTMLTokenizer.scriptDataLessThanSignStatec             C   sX   | j j }|tkr*|  j|7  _| j| _n*| jjtd dd | j j	| | j
| _dS )NrR   z</)r)   r*   T)r   r<   r   r   scriptDataEndTagNameStater   r+   r=   r   rC   rw   )r#   r*   r&   r&   r'   r   :  s    

z'HTMLTokenizer.scriptDataEndTagOpenStatec             C   s   | j o| j d j | jj k}| jj }|tkrT|rTtd | jg dd| _ | j| _n|dkr|rtd | jg dd| _ | j	| _n||dkr|rtd | jg dd| _ | j
  | j| _nH|tkr|  j|7  _n0| jjtd d| j d	 | jj| | j| _d
S )Nrb   rd   F)r)   rb   r*   re   rz   r|   rR   z</)r)   r*   T)r    r   r   r   r<   r
   r   r   r   r   rk   r   r   r+   r=   rC   rw   )r#   r   r*   r&   r&   r'   r   E  s2    



z'HTMLTokenizer.scriptDataEndTagNameStatec             C   sJ   | j j }|dkr2| jjtd dd | j| _n| j j| | j| _dS )N-rR   )r)   r*   T)	r   r<   r+   r=   r   scriptDataEscapeStartDashStater   rC   rw   )r#   r*   r&   r&   r'   r   a  s    

z(HTMLTokenizer.scriptDataEscapeStartStatec             C   sJ   | j j }|dkr2| jjtd dd | j| _n| j j| | j| _dS )Nr   rR   )r)   r*   T)	r   r<   r+   r=   r   scriptDataEscapedDashDashStater   rC   rw   )r#   r*   r&   r&   r'   r   k  s    

z,HTMLTokenizer.scriptDataEscapeStartDashStatec             C   s   | j j }|dkr2| jjtd dd | j| _n|dkrD| j| _nn|dkrz| jjtd dd | jjtd dd n8|tkr| j	| _n&| j j
d
}| jjtd || d d	S )Nr   rR   )r)   r*   rL   rl   r(   zinvalid-codepointu   �T)rL   r   rl   )r   r<   r+   r=   r   scriptDataEscapedDashStater   "scriptDataEscapedLessThanSignStater   r   ro   )r#   r*   rp   r&   r&   r'   scriptDataEscapedStateu  s"    




z$HTMLTokenizer.scriptDataEscapedStatec             C   s   | j j }|dkr2| jjtd dd | j| _n|dkrD| j| _nn|dkr| jjtd dd | jjtd dd | j| _n0|t	kr| j
| _n| jjtd |d | j| _d	S )
Nr   rR   )r)   r*   rL   rl   r(   zinvalid-codepointu   �T)r   r<   r+   r=   r   r   r   r   r   r   r   )r#   r*   r&   r&   r'   r     s"    






z(HTMLTokenizer.scriptDataEscapedDashStatec             C   s   | j j }|dkr*| jjtd dd n|dkr<| j| _n|dkrd| jjtd dd | j| _nn|dkr| jjtd dd | jjtd d	d | j| _n0|t	kr| j
| _n| jjtd |d | j| _d
S )Nr   rR   )r)   r*   rL   r|   rl   r(   zinvalid-codepointu   �T)r   r<   r+   r=   r   r   r   rw   r   r   r   )r#   r*   r&   r&   r'   r     s&    






z,HTMLTokenizer.scriptDataEscapedDashDashStatec             C   s   | j j }|dkr"d| _| j| _n\|tkrT| jjtd d| d || _| j	| _n*| jjtd dd | j j
| | j| _dS )Nrz   r2   rR   rL   )r)   r*   T)r   r<   r    scriptDataEscapedEndTagOpenStater   r   r+   r=   r    scriptDataDoubleEscapeStartStaterC   r   )r#   r*   r&   r&   r'   r     s    


z0HTMLTokenizer.scriptDataEscapedLessThanSignStatec             C   sP   | j j }|tkr"|| _| j| _n*| jjtd dd | j j	| | j
| _dS )NrR   z</)r)   r*   T)r   r<   r   r    scriptDataEscapedEndTagNameStater   r+   r=   r   rC   r   )r#   r*   r&   r&   r'   r     s    

z.HTMLTokenizer.scriptDataEscapedEndTagOpenStatec             C   s   | j o| j d j | jj k}| jj }|tkrT|rTtd | jg dd| _ | j| _n|dkr|rtd | jg dd| _ | j	| _n||dkr|rtd | jg dd| _ | j
  | j| _nH|tkr|  j|7  _n0| jjtd d| j d	 | jj| | j| _d
S )Nrb   rd   F)r)   rb   r*   re   rz   r|   rR   z</)r)   r*   T)r    r   r   r   r<   r
   r   r   r   r   rk   r   r   r+   r=   rC   r   )r#   r   r*   r&   r&   r'   r     s2    



z.HTMLTokenizer.scriptDataEscapedEndTagNameStatec             C   s   | j j }|ttdB krR| jjtd |d | jj dkrH| j	| _
q| j| _
nB|tkr| jjtd |d |  j|7  _n| j j| | j| _
dS )Nrz   r|   rR   )r)   r*   scriptT)rz   r|   )r   r<   r
   r@   r+   r=   r   r   r   scriptDataDoubleEscapedStater   r   r   rC   )r#   r*   r&   r&   r'   r     s    


z.HTMLTokenizer.scriptDataDoubleEscapeStartStatec             C   s   | j j }|dkr2| jjtd dd | j| _n|dkrZ| jjtd dd | j| _nt|dkr| jjtd dd | jjtd dd n>|tkr| jjtd d	d | j	| _n| jjtd |d d
S )Nr   rR   )r)   r*   rL   rl   r(   zinvalid-codepointu   �zeof-in-script-in-scriptT)
r   r<   r+   r=   r    scriptDataDoubleEscapedDashStater   (scriptDataDoubleEscapedLessThanSignStater   r   )r#   r*   r&   r&   r'   r     s$    





z*HTMLTokenizer.scriptDataDoubleEscapedStatec             C   s   | j j }|dkr2| jjtd dd | j| _n|dkrZ| jjtd dd | j| _n|dkr| jjtd dd | jjtd dd | j| _nF|t	kr| jjtd d	d | j
| _n| jjtd |d | j| _d
S )Nr   rR   )r)   r*   rL   rl   r(   zinvalid-codepointu   �zeof-in-script-in-scriptT)r   r<   r+   r=   r   $scriptDataDoubleEscapedDashDashStater   r   r   r   r   )r#   r*   r&   r&   r'   r     s(    







z.HTMLTokenizer.scriptDataDoubleEscapedDashStatec             C   s  | j j }|dkr*| jjtd dd n|dkrR| jjtd dd | j| _n|dkrz| jjtd dd | j| _n|dkr| jjtd dd | jjtd d	d | j| _nF|t	kr| jjtd d
d | j
| _n| jjtd |d | j| _dS )Nr   rR   )r)   r*   rL   r|   rl   r(   zinvalid-codepointu   �zeof-in-script-in-scriptT)r   r<   r+   r=   r   r   r   rw   r   r   r   )r#   r*   r&   r&   r'   r   %  s,    







z2HTMLTokenizer.scriptDataDoubleEscapedDashDashStatec             C   sP   | j j }|dkr8| jjtd dd d| _| j| _n| j j| | j	| _dS )Nrz   rR   )r)   r*   r2   T)
r   r<   r+   r=   r   r   scriptDataDoubleEscapeEndStater   rC   r   )r#   r*   r&   r&   r'   r   >  s    

z6HTMLTokenizer.scriptDataDoubleEscapedLessThanSignStatec             C   s   | j j }|ttdB krR| jjtd |d | jj dkrH| j	| _
q| j| _
nB|tkr| jjtd |d |  j|7  _n| j j| | j| _
dS )Nrz   r|   rR   )r)   r*   r   T)rz   r|   )r   r<   r
   r@   r+   r=   r   r   r   r   r   r   r   rC   )r#   r*   r&   r&   r'   r   I  s    


z,HTMLTokenizer.scriptDataDoubleEscapeEndStatec             C   s0  | j j }|tkr$| j jtd n|tkrJ| jd j|dg | j| _n|dkr\| j	  n|dkrn| j
| _n|dkr| jjtd
 dd | jd j|dg | j| _n|dkr| jjtd
 dd | jd jddg | j| _nF|tkr| jjtd
 dd | j| _n| jd j|dg | j| _dS )NTr*   r2   r|   rz   '"rP   rL   r(   z#invalid-character-in-attribute-name)r)   r*   rl   zinvalid-codepointu   �z#expected-attribute-name-but-got-eof)r   r   rP   rL   )r   r<   r
   ro   r   r    r=   attributeNameStater   rk   r   r+   r   r   r   )r#   r*   r&   r&   r'   r   Y  s6    










z&HTMLTokenizer.beforeAttributeNameStatec             C   s  | j j }d}d}|dkr&| j| _n0|tkr^| jd d d  || j jtd 7  < d} n|dkrld}n|tkr~| j| _n|dkr| j	| _n|d	kr| j
jtd
 dd | jd d d  d7  < d}n|dkr| j
jtd
 dd | jd d d  |7  < d}nH|tkr8| j
jtd
 dd | j| _n| jd d d  |7  < d}|r| jd d d jt| jd d d< xP| jd d d D ]:\}}| jd d d |kr| j
jtd
 dd P qW |r| j  dS )NTFrP   r*   r	   r   r|   rz   rl   r(   zinvalid-codepoint)r)   r*   u   �r   r   rL   z#invalid-character-in-attribute-namezeof-in-attribute-namezduplicate-attributerS   rS   )r   r   rL   rS   rS   rS   rS   rS   rS   )r   r<   beforeAttributeValueStater   r   r    ro   r
   afterAttributeNameStater   r+   r=   r   r   r   rf   r   rk   )r#   r*   leavingThisState	emitTokenrb   _r&   r&   r'   r   w  sR    








&
z HTMLTokenizer.attributeNameStatec             C   sF  | j j }|tkr$| j jtd n|dkr8| j| _n
|dkrJ| j  n|tkrp| jd j	|dg | j
| _n|dkr| j| _n|dkr| jj	td d	d
 | jd j	ddg | j
| _n|dk r| jj	td dd
 | jd j	|dg | j
| _nF|tkr&| jj	td dd
 | j| _n| jd j	|dg | j
| _dS )NTrP   r|   r*   r2   rz   rl   r(   zinvalid-codepoint)r)   r*   u   �r   r   rL   z&invalid-character-after-attribute-namezexpected-end-of-tag-but-got-eof)r   r   rL   )r   r<   r
   ro   r   r   rk   r   r    r=   r   r   r+   r   r   r   )r#   r*   r&   r&   r'   r     s:    











z%HTMLTokenizer.afterAttributeNameStatec             C   sj  | j j }|tkr$| j jtd nB|dkr8| j| _n.|dkrX| j| _| j j| n|dkrl| j| _ n|dkr| j	j
td dd | j  n|d	kr| j	j
td d
d | jd d d  d7  < | j| _n|dkr| j	j
td dd | jd d d  |7  < | j| _nL|tkrD| j	j
td dd | j| _n"| jd d d  |7  < | j| _dS )NTr   rK   r   r|   r(   z.expected-attribute-value-but-got-right-bracket)r)   r*   rl   zinvalid-codepointr*   r	   u   �rP   rL   `z"equals-in-unquoted-attribute-valuez$expected-attribute-value-but-got-eofrS   )rP   rL   r   rS   rS   )r   r<   r
   ro   attributeValueDoubleQuotedStater   attributeValueUnQuotedStaterC   attributeValueSingleQuotedStater+   r=   r   rk   r    r   r   )r#   r*   r&   r&   r'   r     s>    










z'HTMLTokenizer.beforeAttributeValueStatec             C   s   | j j }|dkr| j| _n|dkr0| jd n|dkrj| jjtd dd | jd d d  d	7  < nN|t	kr| jjtd d
d | j
| _n&| jd d d  || j jd 7  < dS )Nr   rK   rl   r(   zinvalid-codepoint)r)   r*   r*   r	   u   �z#eof-in-attribute-value-double-quoteTrS   rS   )r   rK   rl   )r   r<   afterAttributeValueStater   ra   r+   r=   r   r    r   r   ro   )r#   r*   r&   r&   r'   r     s     




z-HTMLTokenizer.attributeValueDoubleQuotedStatec             C   s   | j j }|dkr| j| _n|dkr0| jd n|dkrj| jjtd dd | jd d d  d	7  < nN|t	kr| jjtd d
d | j
| _n&| jd d d  || j jd 7  < dS )Nr   rK   rl   r(   zinvalid-codepoint)r)   r*   r*   r	   u   �z#eof-in-attribute-value-single-quoteTrS   rS   )r   rK   rl   )r   r<   r   r   ra   r+   r=   r   r    r   r   ro   )r#   r*   r&   r&   r'   r     s     




z-HTMLTokenizer.attributeValueSingleQuotedStatec             C   s  | j j }|tkr| j| _ n|dkr2| jd n|dkrD| j  n|dkr~| jjt	d d	d
 | j
d d d  |7  < n|dkr| jjt	d dd
 | j
d d d  d7  < nV|tkr| jjt	d dd
 | j| _n.| j
d d d  || j jtdtB  7  < dS )NrK   r|   r   r   rP   rL   r   r(   z0unexpected-character-in-unquoted-attribute-value)r)   r*   r*   r	   rl   zinvalid-codepointu   �z eof-in-attribute-value-no-quotesT)r   r   rP   rL   r   rS   rS   rS   )rK   r|   r   r   rP   rL   r   rl   )r   r<   r
   r   r   ra   rk   r+   r=   r   r    r   r   ro   r@   )r#   r*   r&   r&   r'   r     s,    





z)HTMLTokenizer.attributeValueUnQuotedStatec             C   s   | j j }|tkr| j| _n|dkr.| j  np|dkr@| j| _n^|tkrt| jj	t
d dd | j j| | j| _n*| jj	t
d dd | j j| | j| _dS )Nr|   rz   r(   z$unexpected-EOF-after-attribute-value)r)   r*   z*unexpected-character-after-attribute-valueT)r   r<   r
   r   r   rk   r   r   r+   r=   r   rC   r   )r#   r*   r&   r&   r'   r   .  s"    






z&HTMLTokenizer.afterAttributeValueStatec             C   s   | j j }|dkr&d| jd< | j  n^|tkrZ| jjtd dd | j j| | j	| _
n*| jjtd dd | j j| | j| _
dS )Nr|   Tre   r(   z#unexpected-EOF-after-solidus-in-tag)r)   r*   z)unexpected-character-after-solidus-in-tag)r   r<   r    rk   r   r+   r=   r   rC   r   r   r   )r#   r*   r&   r&   r'   r   B  s    





z&HTMLTokenizer.selfClosingStartTagStatec             C   sD   | j jd}|jdd}| jjtd |d | j j  | j| _dS )Nr|   rl   u   �Comment)r)   r*   T)	r   ro   replacer+   r=   r   r<   r   r   )r#   r*   r&   r&   r'   r   T  s    
zHTMLTokenizer.bogusCommentStatec             C   s  | j j g}|d dkrT|j| j j  |d dkrPtd dd| _| j| _dS  n|d dkrd}x.d&D ]&}|j| j j  |d' |krjd}P qjW |rtd dd d dd| _| j| _dS n|d( dkrH| jd k	rH| jj	j
rH| jj	j
d) j| jj	jkrHd}x2d*D ]*}|j| j j  |d+ |krd}P qW |rH| j| _dS | jjtd dd x|rz| j j|j  q`W | j| _dS ),Nr	   r   r   r2   )r)   r*   TdDoOrH   CtTyYpPeEFDoctype)r)   rb   publicIdsystemIdcorrect[Ar(   zexpected-dashes-or-doctyperS   rS   rS   )r   r   r   r   rH   r   r   r   r   r   r   r   r   r   )r   r   r   r   r   r   rS   rS   rS   )r   r   r   r   r   r   rS   )r   r<   r=   r   r    commentStartStater   doctypeStater   treeopenElements	namespacedefaultNamespacecdataSectionStater+   rC   r-   r   )r#   rG   matchedexpectedr&   r&   r'   r~   c  sR    


z(HTMLTokenizer.markupDeclarationOpenStatec             C   s   | j j }|dkr| j| _n|dkrN| jjtd dd | jd  d7  < n|dkr| jjtd d	d | jj| j | j| _nP|t	kr| jjtd d
d | jj| j | j| _n| jd  |7  < | j
| _dS )Nr   rl   r(   zinvalid-codepoint)r)   r*   r*   u   �r|   zincorrect-commentzeof-in-commentT)r   r<   commentStartDashStater   r+   r=   r   r    r   r   commentState)r#   r*   r&   r&   r'   r     s(    






zHTMLTokenizer.commentStartStatec             C   s   | j j }|dkr| j| _n|dkrN| jjtd dd | jd  d7  < n|dkr| jjtd d	d | jj| j | j| _nT|t	kr| jjtd d
d | jj| j | j| _n| jd  d| 7  < | j
| _dS )Nr   rl   r(   zinvalid-codepoint)r)   r*   r*   u   -�r|   zincorrect-commentzeof-in-commentT)r   r<   commentEndStater   r+   r=   r   r    r   r   r   )r#   r*   r&   r&   r'   r     s(    






z#HTMLTokenizer.commentStartDashStatec             C   s   | j j }|dkr| j| _n|dkrN| jjtd dd | jd  d7  < nT|tkr| jjtd dd | jj| j | j	| _n| jd  || j j
d
 7  < d	S )Nr   rl   r(   zinvalid-codepoint)r)   r*   r*   u   �zeof-in-commentT)r   rl   )r   r<   commentEndDashStater   r+   r=   r   r    r   r   ro   )r#   r*   r&   r&   r'   r     s    




zHTMLTokenizer.commentStatec             C   s   | j j }|dkr| j| _n|dkrV| jjtd dd | jd  d7  < | j| _nT|t	kr| jjtd dd | jj| j | j
| _n| jd  d| 7  < | j| _d	S )
Nr   rl   r(   zinvalid-codepoint)r)   r*   r*   u   -�zeof-in-comment-end-dashT)r   r<   r   r   r+   r=   r   r    r   r   r   )r#   r*   r&   r&   r'   r     s     





z!HTMLTokenizer.commentEndDashStatec             C   s,  | j j }|dkr*| jj| j | j| _n|dkrd| jjtd dd | jd  d7  < | j| _n|dkr| jjtd d	d | j	| _n|d
kr| jjtd dd | jd  |7  < nj|t
kr| jjtd dd | jj| j | j| _n4| jjtd dd | jd  d| 7  < | j| _dS )Nr|   rl   r(   zinvalid-codepoint)r)   r*   r*   u   --�ry   z,unexpected-bang-after-double-dash-in-commentr   z,unexpected-dash-after-double-dash-in-commentzeof-in-comment-double-dashzunexpected-char-in-commentz--T)r   r<   r+   r=   r    r   r   r   r   commentEndBangStater   )r#   r*   r&   r&   r'   r     s6    









zHTMLTokenizer.commentEndStatec             C   s   | j j }|dkr*| jj| j | j| _n|dkrN| jd  d7  < | j| _n|dkr| jjtd dd | jd  d	7  < | j	| _nT|t
kr| jjtd d
d | jj| j | j| _n| jd  d| 7  < | j	| _dS )Nr|   r   r*   z--!rl   r(   zinvalid-codepoint)r)   r*   u   --!�zeof-in-comment-end-bang-stateT)r   r<   r+   r=   r    r   r   r   r   r   r   )r#   r*   r&   r&   r'   r     s(    






z!HTMLTokenizer.commentEndBangStatec             C   s   | j j }|tkr| j| _nj|tkr\| jjtd dd d| j	d< | jj| j	 | j
| _n*| jjtd dd | j j| | j| _dS )Nr(   z!expected-doctype-name-but-got-eof)r)   r*   Fr   zneed-space-after-doctypeT)r   r<   r
   beforeDoctypeNameStater   r   r+   r=   r   r    r   rC   )r#   r*   r&   r&   r'   r     s    





zHTMLTokenizer.doctypeStatec             C   s   | j j }|tkrn|dkrT| jjtd dd d| jd< | jj| j | j| _n|dkr| jjtd dd d	| jd
< | j	| _nR|t
kr| jjtd dd d| jd< | jj| j | j| _n|| jd
< | j	| _dS )Nr|   r(   z+expected-doctype-name-but-got-right-bracket)r)   r*   Fr   rl   zinvalid-codepointu   �rb   z!expected-doctype-name-but-got-eofT)r   r<   r
   r+   r=   r   r    r   r   doctypeNameStater   )r#   r*   r&   r&   r'   r   *  s.    










z$HTMLTokenizer.beforeDoctypeNameStatec             C   s  | j j }|tkr2| jd jt| jd< | j| _n|dkrh| jd jt| jd< | jj	| j | j
| _n|dkr| jj	td dd | jd  d7  < | j| _nh|tkr| jj	td dd d	| jd
< | jd jt| jd< | jj	| j | j
| _n| jd  |7  < dS )Nrb   r|   rl   r(   zinvalid-codepoint)r)   r*   u   �zeof-in-doctype-nameFr   T)r   r<   r
   r    rf   r   afterDoctypeNameStater   r+   r=   r   r   r   r   )r#   r*   r&   r&   r'   r   D  s,    







zHTMLTokenizer.doctypeNameStatec             C   sR  | j j }|tkrn8|dkr8| jj| j | j| _n|tkrd| jd< | j j	| | jjt
d dd | jj| j | j| _ n|d!krd	}x$d'D ]}| j j }||krd}P qW |r| j| _d	S nJ|d(krd	}x(d.D ] }| j j }||k rd}P  qW |r| j| _d	S | j j	| | jjt
d dd|id  d| jd< | j| _d	S )/Nr|   Fr   r(   zeof-in-doctype)r)   r*   r   r   TuUbBlLiIrH   r   sSr   r   r   r   r   r   mMz*expected-space-or-right-bracket-in-doctyper*   )r)   r*   r4   )r   r   r   r   r   r   r   r   r   r   rH   r   )r   r   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   )r   r   r   r   r   )r   r<   r
   r+   r=   r    r   r   r   rC   r   afterDoctypePublicKeywordStateafterDoctypeSystemKeywordStatebogusDoctypeState)r#   r*   r   r   r&   r&   r'   r   ]  sT    







z#HTMLTokenizer.afterDoctypeNameStatec             C   s   | j j }|tkr| j| _n|d
krP| jjtd dd | j j| | j| _nT|t	kr| jjtd dd d| j
d< | jj| j
 | j| _n| j j| | j| _d	S )Nr   r   r(   zunexpected-char-in-doctype)r)   r*   zeof-in-doctypeFr   T)r   r   )r   r<   r
   "beforeDoctypePublicIdentifierStater   r+   r=   r   rC   r   r    r   )r#   r*   r&   r&   r'   r     s"    






z,HTMLTokenizer.afterDoctypePublicKeywordStatec             C   s   | j j }|tkrn|dkr0d| jd< | j| _n|dkrLd| jd< | j| _n|dkr| jjt	d dd d	| jd
< | jj| j | j
| _nh|tkr| jjt	d dd d	| jd
< | jj| j | j
| _n(| jjt	d dd d	| jd
< | j| _dS )Nr   r2   r   r   r|   r(   zunexpected-end-of-doctype)r)   r*   Fr   zeof-in-doctypezunexpected-char-in-doctypeT)r   r<   r
   r    (doctypePublicIdentifierDoubleQuotedStater   (doctypePublicIdentifierSingleQuotedStater+   r=   r   r   r   r   )r#   r*   r&   r&   r'   r     s4    












z0HTMLTokenizer.beforeDoctypePublicIdentifierStatec             C   s   | j j }|dkr| j| _n|dkrN| jjtd dd | jd  d7  < n|dkr| jjtd d	d d
| jd< | jj| j | j| _nR|t	kr| jjtd dd d
| jd< | jj| j | j| _n| jd  |7  < dS )Nr   rl   r(   zinvalid-codepoint)r)   r*   r   u   �r|   zunexpected-end-of-doctypeFr   zeof-in-doctypeT)
r   r<   !afterDoctypePublicIdentifierStater   r+   r=   r   r    r   r   )r#   r*   r&   r&   r'   r     s*    








z6HTMLTokenizer.doctypePublicIdentifierDoubleQuotedStatec             C   s   | j j }|dkr| j| _n|dkrN| jjtd dd | jd  d7  < n|dkr| jjtd d	d d
| jd< | jj| j | j| _nR|t	kr| jjtd dd d
| jd< | jj| j | j| _n| jd  |7  < dS )Nr   rl   r(   zinvalid-codepoint)r)   r*   r   u   �r|   zunexpected-end-of-doctypeFr   zeof-in-doctypeT)
r   r<   r   r   r+   r=   r   r    r   r   )r#   r*   r&   r&   r'   r     s*    








z6HTMLTokenizer.doctypePublicIdentifierSingleQuotedStatec             C   s  | j j }|tkr| j| _n|dkr<| jj| j | j| _n|dkrn| jjt	d dd d| jd< | j
| _n|dkr| jjt	d dd d| jd< | j| _nh|tkr| jjt	d d	d d
| jd< | jj| j | j| _n(| jjt	d dd d
| jd< | j| _dS )Nr|   r   r(   zunexpected-char-in-doctype)r)   r*   r2   r   r   zeof-in-doctypeFr   T)r   r<   r
   -betweenDoctypePublicAndSystemIdentifiersStater   r+   r=   r    r   r   (doctypeSystemIdentifierDoubleQuotedState(doctypeSystemIdentifierSingleQuotedStater   r   )r#   r*   r&   r&   r'   r     s6    













z/HTMLTokenizer.afterDoctypePublicIdentifierStatec             C   s   | j j }|tkrn|dkr4| jj| j | j| _n|dkrPd| jd< | j| _n|dkrld| jd< | j	| _nh|t
kr| jjtd dd d	| jd
< | jj| j | j| _n(| jjtd dd d	| jd
< | j| _dS )Nr|   r   r2   r   r   r(   zeof-in-doctype)r)   r*   Fr   zunexpected-char-in-doctypeT)r   r<   r
   r+   r=   r    r   r   r   r   r   r   r   )r#   r*   r&   r&   r'   r     s.    










z;HTMLTokenizer.betweenDoctypePublicAndSystemIdentifiersStatec             C   s   | j j }|tkr| j| _n|d
krP| jjtd dd | j j| | j| _nT|t	kr| jjtd dd d| j
d< | jj| j
 | j| _n| j j| | j| _d	S )Nr   r   r(   zunexpected-char-in-doctype)r)   r*   zeof-in-doctypeFr   T)r   r   )r   r<   r
   "beforeDoctypeSystemIdentifierStater   r+   r=   r   rC   r   r    r   )r#   r*   r&   r&   r'   r   )  s"    






z,HTMLTokenizer.afterDoctypeSystemKeywordStatec             C   s   | j j }|tkrn|dkr0d| jd< | j| _n|dkrLd| jd< | j| _n|dkr| jjt	d dd d	| jd
< | jj| j | j
| _nh|tkr| jjt	d dd d	| jd
< | jj| j | j
| _n(| jjt	d dd d	| jd
< | j| _dS )Nr   r2   r   r   r|   r(   zunexpected-char-in-doctype)r)   r*   Fr   zeof-in-doctypeT)r   r<   r
   r    r   r   r   r+   r=   r   r   r   r   )r#   r*   r&   r&   r'   r   =  s4    












z0HTMLTokenizer.beforeDoctypeSystemIdentifierStatec             C   s   | j j }|dkr| j| _n|dkrN| jjtd dd | jd  d7  < n|dkr| jjtd d	d d
| jd< | jj| j | j| _nR|t	kr| jjtd dd d
| jd< | jj| j | j| _n| jd  |7  < dS )Nr   rl   r(   zinvalid-codepoint)r)   r*   r   u   �r|   zunexpected-end-of-doctypeFr   zeof-in-doctypeT)
r   r<   !afterDoctypeSystemIdentifierStater   r+   r=   r   r    r   r   )r#   r*   r&   r&   r'   r   Z  s*    








z6HTMLTokenizer.doctypeSystemIdentifierDoubleQuotedStatec             C   s   | j j }|dkr| j| _n|dkrN| jjtd dd | jd  d7  < n|dkr| jjtd d	d d
| jd< | jj| j | j| _nR|t	kr| jjtd dd d
| jd< | jj| j | j| _n| jd  |7  < dS )Nr   rl   r(   zinvalid-codepoint)r)   r*   r   u   �r|   zunexpected-end-of-doctypeFr   zeof-in-doctypeT)
r   r<   r   r   r+   r=   r   r    r   r   )r#   r*   r&   r&   r'   r   r  s*    








z6HTMLTokenizer.doctypeSystemIdentifierSingleQuotedStatec             C   s   | j j }|tkrn~|dkr4| jj| j | j| _n^|tkrt| jjt	d dd d| jd< | jj| j | j| _n| jjt	d dd | j
| _dS )	Nr|   r(   zeof-in-doctype)r)   r*   Fr   zunexpected-char-in-doctypeT)r   r<   r
   r+   r=   r    r   r   r   r   r   )r#   r*   r&   r&   r'   r     s     





z/HTMLTokenizer.afterDoctypeSystemIdentifierStatec             C   sZ   | j j }|dkr*| jj| j | j| _n,|tkrV| j j| | jj| j | j| _n dS )Nr|   T)	r   r<   r+   r=   r    r   r   r   rC   )r#   r*   r&   r&   r'   r     s    


zHTMLTokenizer.bogusDoctypeStatec             C   s   g }x|j | jjd |j | jjd | jj }|tkr@P q|dksLt|d dd  dkrx|d d d |d< P q|j | qW dj|}|jd}|dkrx&t|D ]}| j	j t
d	 d
d qW |jdd}|r| j	j t
d |d | j| _dS )N]r|   r	      z]]r2   rl   r   r(   zinvalid-codepoint)r)   r*   u   �rR   TrS   rS   r   rS   )r=   r   ro   r<   r   AssertionErrorr?   countranger+   r   r   r   r   )r#   r*   r<   	nullCountr   r&   r&   r'   r     s0    



zHTMLTokenizer.cdataSectionState)N)NF)N__name__
__module____qualname____doc__r"   r/   rJ   r`   ra   rk   r   rm   rs   rq   ru   rw   rx   rn   r   r   rr   r   r   rt   r   r   rv   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   __classcell__r&   r&   )r%   r'   r      s   H
P#

6 "-3r   N)r   r   ) 
__future__r   r   r   Zpip._vendor.sixr   rA   collectionsr   r   sysr   	constantsr
   r   r   r   r   r   r   r   r   r   _inputstreamr   _trier   rT   dictrg   objectr   r&   r&   r&   r'   <module>   s    