
    fZ                        d Z ddlZddlZddlZ ej                  dded       ddlmZmZ ddlm	Z	 dd	l
mZmZ dd
l
mZ ddlmZ ddlmZmZmZmZmZmZ ddlmZ ddlmZ ddlmZmZ ddlmZm Z  ddl!m"Z" e	jF                  Z$ e"e$      Z%e	jL                  d   e	jL                  d   e	jL                  d   hZ'e	jL                  d   Z(e	jL                  d   Z)e	jL                  d   Z*e	jL                  d   Z+ e,d      Z- e,d      Z. G d d      Z/ G d de       Z0 G d de      Z1d Z2d Z3d  Z4 ejj                  d!      Z6d" Z7 G d# d$e      Z8y)%z
Shim module between Bleach and html5lib. This makes it easier to upgrade the
html5lib library without having to change a lot of code.
    Nignorez"html5lib's sanitizer is deprecatedzbleach._vendor.html5lib)messagecategorymodule)
HTMLParsergetTreeWalker)	constants)
namespacesprefixes)_ReparseException)Filter)allowed_protocolsallowed_css_propertiesallowed_svg_propertiesattr_val_is_urisvg_attr_val_allows_refsvg_allow_local_href)HTMLInputStream)escapeHTMLSerializer)attributeMapHTMLTokenizer)TrieStartTagEndTagEmptyTag
Characters
ParseError)paabbraddressareaarticleasideaudiobbasebdibdo
blockquotebodybrbuttoncanvascaptioncitecodecolcolgroupdatadatalistdddeldetailsdfndialogdivdldtemembedfieldset
figcaptionfigurefooterformh1h2h3h4h5h6headheaderhgrouphrhtmliiframeimginputinskbdkeygenlabellegendlilinkmapmarkmenumetameternavnoscriptobjectoloptgroupoptionoutputpparampicturepreprogressqrprtrubyssampscriptsectionselectslotsmallsourcespanstrongstylesubsummarysuptabletbodytdtemplatetextareatfootththeadtimetitletrtrackuulvarvideowbr)!r!   r#   r$   r*   r8   r:   r6   r;   r<   r=   r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rL   rM   rN   rY   mainr`   rc   rg   rj   rs   r~   r   c                   f    e Zd ZdZd Zed        Zed        Zed        Zd Z	ddZ
d Zd	 Zd
 Zy)InputStreamWithMemoryzWraps an HTMLInputStream to remember characters since last <

    This wraps existing HTMLInputStream classes to keep track of the stream
    since the last < which marked an open tag state.

    c                     || _         | j                   j                  | _        | j                   j                  | _        g | _        y N)_inner_streamresetposition_buffer)selfinner_streams     O/var/www/cvtools/html/venv/lib/python3.12/site-packages/bleach/html5lib_shim.py__init__zInputStreamWithMemory.__init__   s7    )''--
**33    c                 .    | j                   j                  S r   )r   errorsr   s    r   r   zInputStreamWithMemory.errors  s    !!(((r   c                 .    | j                   j                  S r   )r   charEncodingr   s    r   r   z"InputStreamWithMemory.charEncoding  s    !!...r   c                 .    | j                   j                  S r   )r   changeEncodingr   s    r   r   z$InputStreamWithMemory.changeEncoding
  s    !!000r   c                 t    | j                   j                         }|r| j                  j                  |       |S r   )r   charr   append)r   cs     r   r   zInputStreamWithMemory.char  s0    ##%LL"r   c                     | j                   j                  ||      }| j                  j                  t	        |             |S )N)opposite)r   
charsUntilr   extendlist)r   
charactersr   charss       r   r   z InputStreamWithMemory.charsUntil  s8    ""--j8-LDK(r   c                     | j                   r| j                   j                  d       | j                  j                  |      S )N)r   popr   unget)r   r   s     r   r   zInputStreamWithMemory.unget  s2    <<LLR !!''--r   c                 8    dj                  | j                        S )zReturns the stream history since last '<'

        Since the buffer starts at the last '<' as as seen by tagOpenState(),
        we know that everything from that point to when this method is called
        is the "tag" that is being tokenized.

         )joinr   r   s    r   get_tagzInputStreamWithMemory.get_tag  s     wwt||$$r   c                     dg| _         y)zResets stream history to just '<'

        This gets called by tagOpenState() which marks a '<' that denotes an
        open tag. Any time we see that, we reset the buffer.

        <N)r   r   s    r   	start_tagzInputStreamWithMemory.start_tag)  s     ur   NF)__name__
__module____qualname____doc__r   propertyr   r   r   r   r   r   r   r    r   r   r   r      sa     ) ) / / 1 1
.
%r   r   c                   N     e Zd ZdZd fd	Z fdZd fd	Z fdZ fdZ xZ	S )	BleachHTMLTokenizerz1Tokenizer that doesn't consume character entitiesc                 t    t        |   di | || _        t        | j                        | _        d | _        y )Nr   )superr   consume_entitiesr   streamemitted_last_token)r   r   kwargs	__class__s      r   r   zBleachHTMLTokenizer.__init__6  s7    "6" 0 ,DKK8 #'r   c              #     K   d }t         |          D ]
  }||d   dk(  rI|d   t        v r>|j                  d      r-t	        d |d   j                         D              |d<   d }| n|d   dk(  rz| j                  j                  d|d   j                         j                         | j                  j                  vr-| j                  j                         |d<   t        |d<   d }| n|d   t        k(  r| |}n
| | d }|d   t        k(  r|}|  |r\|d   dk(  r$t        | j                  j                         d y |d   dv r$t        | j                  j                         d y | y y w)	Nr4   z#invalid-character-in-attribute-nametypec              3   B   K   | ]  \  }}d |vrd|vr
d|vr||f  yw)"'r   Nr   ).0	attr_name
attr_values      r   	<genexpr>z/BleachHTMLTokenizer.__iter__.<locals>.<genexpr>Q  s9      11Izy0 #9 4 #9 4 #J/1s   z!expected-closing-tag-but-got-charzeof-in-tag-namer   r4   )zeof-in-attribute-namez eof-in-attribute-value-no-quotes)r   __iter__TAG_TOKEN_TYPESgetr   itemsparsertagslowerstripr   r   TAG_TOKEN_TYPE_CHARACTERSTAG_TOKEN_TYPE_PARSEERROR)r   last_error_tokentokenr   s      r   r   zBleachHTMLTokenizer.__iter__A  s    W%' B	E+$V,0UUf8		&) %1 15:6]5H5H5J1 %E&M (,$K %V,0SS((4f++-335T[[=M=MM %)KK$7$7$9E&M$=E&M'+$K6]&?? +*',$ +*K'+$ V} 99#( KEB	H '+<<
  9$++BUBUBWXX!&) .   9$++BUBUBWXX&&% s   E>Fc                     | j                   rt        | 	  ||      S |r| j                  d   d   dxx   dz  cc<   y | j                  j                  t        dd       y )Nr4   r      &r   )r   r   consumeEntitycurrentToken
tokenQueuer   r   )r   allowedCharfromAttributer   s      r   r   z!BleachHTMLTokenizer.consumeEntity  s_       7(mDD f%b)!,3, OO"",Es#STr   c                 T    | j                   j                          t        |          S r   )r   r   r   tagOpenState)r   r   s    r   r   z BleachHTMLTokenizer.tagOpenState  s#    
 	w#%%r   c                 F   | j                   }| j                  j                  |d   t        v r|d   j	                         | j                  j                  vr| j                  j
                  r7| j                  r(|d   t        k(  r|d   j	                         t        v rd}nd}n| j                  j                         }t        |d}|x| _         | _        | j                  j                  |       | j                  | _        y | j                   | _        t         | E          y )Nr   name
r   r   )r   r   r   r   r   r   r   TAG_TOKEN_TYPE_STARTHTML_TAGS_BLOCK_LEVELr   r   r   r   r   	dataStatestater   emitCurrentToken)r   r   new_data	new_tokenr   s       r   r   z$BleachHTMLTokenizer.emitCurrentToken  s    !! KK(f0f##%T[[-=-==
 {{  ++f)==f++-1FF
  $H  "H  ;;..0!:HMI:CCD 7OO""9-DJ"&"3"3 "r   r   )NF)
r   r   r   r   r   r   r   r   r   __classcell__r   s   @r   r   r   3  s'    ;	'Y'vU$&*# *#r   r   c                   ,     e Zd ZdZ fdZ	 ddZ xZS )BleachHTMLParserz$Parser that uses BleachHTMLTokenizerc                 v    |t        d |D              nd| _        || _        || _        t	        |   di | y)a  
        :arg tags: set of allowed tags--everything else is either stripped or
            escaped; if None, then this doesn't look at tags at all
        :arg strip: whether to strip disallowed tags (True) or escape them (False);
            if tags=None, then this doesn't have any effect
        :arg consume_entities: whether to consume entities (default behavior) or
            leave them as is when tokenizing (BleachHTMLTokenizer-added behavior)

        Nc              3   <   K   | ]  }|j                           y wr   )r   )r   tags     r   r   z,BleachHTMLParser.__init__.<locals>.<genexpr>  s     3ssyy{3s   r   )	frozensetr   r   r   r   r   )r   r   r   r   r   r   s        r   r   zBleachHTMLParser.__init__  sB     9=8HI3d34d 		 
 0"6"r   c                 
   || _         || _        || _        t        d|| j                  | d|| _        | j                          	 | j                          y # t        $ r# | j                          | j                          Y y w xY w)N)r   r   r   r   )	innerHTMLMode	container	scriptingr   r   	tokenizerr   mainLoopReparseException)r   r   	innerHTMLr   r   r   s         r   _parsezBleachHTMLParser._parse  sz     '"", 
D,A,A$
RX
 	

	MMO 	JJLMMO	s   A )BB)Fr;   T)r   r   r   r   r   r  r   r   s   @r   r   r     s    .#$ CGr   r   c                     | d   dk(  rPt        |       dk  ry| d   dv r| dd d}}n| dd d}}|d	k(  ryt        ||      }d|cxk  rd
k  rt        |      S  yyt        j	                  | d      S )a9  Convert an entity (minus the & and ; part) into what it represents

    This handles numeric, hex, and text entities.

    :arg value: the string (minus the ``&`` and ``;`` part) to convert

    :returns: unicode character or None if it's an ambiguous ampersand that
        doesn't match a character entity

    r   #   Nr   xX   
   r   i   )lenintchrENTITIESr   )valueint_as_stringr'   
code_points       r   convert_entityr    s     Qx3u:>8z!"')R4M #()R4MB-
z$H$z?" % <<t$$r   c                 >   d| vr| S g }t        |       D ]w  }|s|j                  d      rPt        |      }|Ct        |      }|6|j	                  |       |t        |      dz   d }|r|j	                  |       g|j	                  |       y dj                  |      S )zConverts all found entities in the text

    :arg text: the text to convert entities in

    :returns: unicode text with converted entities

    r   Nr  r   )next_possible_entity
startswithmatch_entityr  r   r  r   )textnew_textpartentity	converted	remainders         r   convert_entitiesr  1  s     $H$T* ??3!$'F!*62	 (OOI. $S[1_%6 7I 	2%( 778r   c                    | d   dk7  rt        d      | dd } t        |       } d}dt        j                  z   }| rz| d   dk(  rrd}| j	                  d       | r| d   d	v rd
}|| j	                  d      z  }nd}| r,| d   |vr%| j	                  d      }||vrn||z  }| r| d   |vr%|r| r
| d   dk(  r|S y| r=| d   |vr6| j	                  d      }||z  }t
        j                  |      sy| r| d   |vr6|r| r
| d   dk(  r|S y)av  Returns first entity in stream or None if no entity exists

    Note: For Bleach purposes, entities must start with a "&" and end with a
    ";". This ignores ambiguous character entities that have no ";" at the end.

    :arg stream: the character stream

    :returns: the entity string without "&" or ";" if it's a valid character
        entity; ``None`` otherwise

    r   r   zStream should begin with "&"r   Nr   z<&=;r  r  0123456789abcdefABCDEF
0123456789;)
ValueErrorr   string
whitespacer   ENTITIES_TRIEhas_keys_with_prefix)r   possible_entityend_charactersallowedr   s        r   r  r  T  sQ    ayC788ABZF&\FOf///N &)s"

1fQi:-.Gvzz!},O"G .8

1Aq O	 .8 v&)s*:"" VAYn4JJqM111/B  VAYn4 6fQi3&6r   z(&)c              #      K   t        t        j                  |             D ]  \  }}|dk(  r| |dz  dk(  sd|z    ! yw)zTakes a text and generates a list of possible entities

    :arg text: the text to look at

    :returns: generator where each part (except the first) starts with an
        "&"

    r   r  r   N)	enumerateAMP_SPLIT_REsplit)r  rP   r  s      r   r  r    sL      \//56 46JUaZ*	s
   7A
Ac                   .     e Zd ZdZdZd Zd fd	Z xZS )BleachHTMLSerializerz[HTMLSerializer that undoes & -> &amp; in attributes and sets
    escape_rcdata to True
    Tc              #     K   |j                  dd      }t        |      D ]b  }|s|j                  d      r8t        |      }|+t	        |       d| d |t        |      dz   d }|r| O|j                  dd       d yw)z,Escapes just bare & in HTML attribute valuesz&amp;r   Nr#  r  )replacer  r  r  r  r  )r   stokenr  r  s       r   escape_base_ampz$BleachHTMLSerializer.escape_base_amp  s      - )0 	-Ds#%d+ %.*@*LfXQ-'  Fa 12D"
,,sG,,%	-s   BBc              #      K   d}d}t         |   ||      D ]R  }|r7|dk(  rd}n*|r!|dk7  r#| j                  |      E d{    d}0|dk(  rd}| <|j                  d      rd}| T y7 -w)zWrap HTMLSerializer.serialize and conver & to &amp; in attribute values

        Note that this converts & to &amp; in attribute values where the & isn't
        already part of an unambiguous character entity.

        F>r   N=Tr   )r   	serializer5  r  )r   
treewalkerencodingin_tagafter_equalsr4  r   s         r   r9  zBleachHTMLSerializer.serialize  s      g'
H= 	FS="F!}#'#7#7#???', s]#'L$$S)!F'	 @s   =A0 A..A0r   )r   r   r   r   escape_rcdatar5  r9  r   r   s   @r   r1  r1    s     M-> r   r1  )9r   rer%  warningsfilterwarningsDeprecationWarningbleach._vendor.html5libr   r   r	   !bleach._vendor.html5lib.constantsr
   r   r   r  $bleach._vendor.html5lib.filters.baser   )bleach._vendor.html5lib.filters.sanitizerr   r   r   r   r   r   SanitizerFilter$bleach._vendor.html5lib._inputstreamr   "bleach._vendor.html5lib.serializerr   r   "bleach._vendor.html5lib._tokenizerr   r   bleach._vendor.html5lib._trier   entitiesr  r'  
tokenTypesr   r   TAG_TOKEN_TYPE_ENDr   r   r   	HTML_TAGSr   r   r   r   r  r  r  compiler.  r  r1  r   r   r   <module>rQ     s  
 
     0$	   X $"$
 !++J7 ))(3 %00> %00> 
 qs	r ""$ N< <~m#- m#`)z )X%D F9x rzz%  I> Ir   