
    fU              
          d dl mZ d dlZd dlZd dlmZ d dlmZ d dlmZ  e	d      Z
ddgdgdgd	Z e	d
      Zdj                   e ed d       edd       edd            D  cg c]
  }  e|        c}       Z ej"                  dez   dz   ej$                        ZdZ G d de      Z G d d      Zd Z G d dej2                        Zyc c} w )    )chainN)unescape)html5lib_shim)
parse_shim)aabbracronymb
blockquotecodeemiliolstrongulhreftitle)r   r   r	   )httphttpsmailto 	                []?c                       e Zd Zy)NoCssSanitizerWarningN)__name__
__module____qualname__     K/var/www/cvtools/html/venv/lib/python3.12/site-packages/bleach/sanitizer.pyr"   r"   5   s    r'   r"   c                   ,    e Zd ZdZeeeddddfdZd Zy)Cleanera  Cleaner for cleaning HTML fragments of malicious content

    This cleaner is a security-focused function whose sole purpose is to remove
    malicious content from a string such that it can be displayed as content in
    a web page.

    To use::

        from bleach.sanitizer import Cleaner

        cleaner = Cleaner()

        for text in all_the_yucky_things:
            sanitized = cleaner.clean(text)

    .. Note::

       This cleaner is not designed to use to transform content to be used in
       non-web-page contexts.

    .. Warning::

       This cleaner is not thread-safe--the html parser has internal state.
       Create a separate cleaner per thread!


    FTNc                 T   || _         || _        || _        || _        || _        |xs g | _        || _        t        j                  | j                   | j                  dd      | _	        t        j                  d      | _        t        j                  dddddd      | _        |g }t        |t              r|}nOt        |t               r?g }|j#                         D ]*  }	t        |	t        t$        f      s|j'                  |	       , d|v rt)        j*                  d	t,        
       yyy)a:  Initializes a Cleaner

        :arg set tags: set of allowed tags; defaults to
            ``bleach.sanitizer.ALLOWED_TAGS``

        :arg dict attributes: allowed attributes; can be a callable, list or dict;
            defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``

        :arg list protocols: allowed list of protocols for links; defaults
            to ``bleach.sanitizer.ALLOWED_PROTOCOLS``

        :arg bool strip: whether or not to strip disallowed elements

        :arg bool strip_comments: whether or not to strip HTML comments

        :arg list filters: list of html5lib Filter classes to pass streamed content through

            .. seealso:: http://html5lib.readthedocs.io/en/latest/movingparts.html#filters

            .. Warning::

               Using filters changes the output of ``bleach.Cleaner.clean``.
               Make sure the way the filters change the output are secure.

        :arg CSSSanitizer css_sanitizer: instance with a "sanitize_css" method for
            sanitizing style attribute values and style text; defaults to None

        F)tagsstripconsume_entitiesnamespaceHTMLElementsetreealwaysT)quote_attr_valuesomit_optional_tagsescape_lt_in_attrsresolve_entitiessanitizealphabetical_attributesNstylez7'style' attribute specified, but css_sanitizer not set.)category)r,   
attributes	protocolsr-   strip_commentsfilterscss_sanitizerr   BleachHTMLParserparsergetTreeWalkerwalkerBleachHTMLSerializer
serializer
isinstancelistdictvaluestupleextendwarningswarnr"   )
selfr,   r:   r;   r-   r<   r=   r>   attributes_valuesrH   s
             r(   __init__zCleaner.__init__V   s'   L 	$"
,}"*#44**""'	
 $11':'<<&$# #$)
   !#*d+$.!J-$&!(//1 9F!&4-8)0089 ++M2 , !r'   c           	         t        |t              s(d|j                  j                  ddz   }t	        |      |sy| j
                  j                  |      }t        | j                  |      | j                  | j                  | j                  | j                  | j                  | j                        }| j                  D ]  } ||      } | j                   j#                  |      S )zCleans text and returns sanitized result as unicode

        :arg str text: text to be cleaned

        :returns: sanitized text as unicode

        :raises TypeError: if ``text`` is not a text type

        zargument cannot be of z type, zmust be of text typer   )sourceallowed_tagsr:   strip_disallowed_tagsstrip_html_commentsr>   allowed_protocols)rQ   )rE   str	__class__r#   	TypeErrorr@   parseFragmentBleachSanitizerFilterrB   r,   r:   r-   r<   r>   r;   r=   rD   render)rM   textmessagedomfilteredfilter_classs         r(   cleanzCleaner.clean   s     $$()@)@(C7K()  G$$kk''-(;;s#"&** $ 3 3,,"nn
 !LL 	5L#84H	5 %%h//r'   )	r#   r$   r%   __doc__ALLOWED_TAGSALLOWED_ATTRIBUTESALLOWED_PROTOCOLSrO   ra   r&   r'   r(   r*   r*   9   s*    < %#Sj#0r'   r*   c                      t               r S t         t              r fd}|S t         t              r fd}|S t	        d      )a0  Generates attribute filter function for the given attributes value

    The attributes value can take one of several shapes. This returns a filter
    function appropriate to the attributes value. One nice thing about this is
    that there's less if/then shenanigans in the ``allow_token`` method.

    c                     | v r|    }t        |      r
 || ||      S ||v rydv rd   }t        |      r
 || ||      S ||v S y)NT*F)callable)tagattrvalueattr_valr:   s       r(   _attr_filterz.attribute_filter_factory.<locals>._attr_filter   sl    j %c?H%#Cu558#j %c?H%#Cu55x''r'   c                     |v S Nr&   )rj   rk   rl   r:   s      r(   rn   z.attribute_filter_factory.<locals>._attr_filter   s    :%%r'   z3attributes needs to be a callable, a list or a dict)ri   rE   rG   rF   
ValueError)r:   rn   s   ` r(   attribute_filter_factoryrr      sM     
*d#	$ *d#	& 
J
KKr'   c            	           e Zd ZdZeeeej                  ej                  ej                  dddf	dZd Zd Zd Zd	 Zd
 Zd Zd Zd Zy)rZ   zmhtml5lib Filter that sanitizes text

    This filter can be used anywhere html5lib filters can be used.

    FTNc                     t         j                  j                  | |       t        |      | _        t        |      | _        t        |      | _        || _        |	| _	        || _
        || _        |
| _        || _        y)a_  Creates a BleachSanitizerFilter instance

        :arg source: html5lib TreeWalker stream as an html5lib TreeWalker

        :arg set allowed_tags: set of allowed tags; defaults to
            ``bleach.sanitizer.ALLOWED_TAGS``

        :arg dict attributes: allowed attributes; can be a callable, list or dict;
            defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``

        :arg list allowed_protocols: allowed list of protocols for links; defaults
            to ``bleach.sanitizer.ALLOWED_PROTOCOLS``

        :arg attr_val_is_uri: set of attributes that have URI values

        :arg svg_attr_val_allows_ref: set of SVG attributes that can have
            references

        :arg svg_allow_local_href: set of SVG elements that can have local
            hrefs

        :arg bool strip_disallowed_tags: whether or not to strip disallowed
            tags

        :arg bool strip_html_comments: whether or not to strip HTML comments

        :arg CSSSanitizer css_sanitizer: instance with a "sanitize_css" method for
            sanitizing style attribute values and style text; defaults to None

        N)r   FilterrO   	frozensetrR   rU   rr   attr_filterrS   rT   attr_val_is_urisvg_attr_val_allows_refr>   svg_allow_local_href)rM   rQ   rR   r:   rU   rx   ry   rz   rS   rT   r>   s              r(   rO   zBleachSanitizerFilter.__init__  ss    ` 	%%dF3%l3!*+<!=3J?%:"#6 .'>$*$8!r'   c              #      K   |D ]5  }| j                  |      }|st        |t              r|E d {    2| 7 y 7 wrp   )sanitize_tokenrE   rF   )rM   token_iteratortokenrets       r(   sanitize_streamz%BleachSanitizerFilter.sanitize_streamA  sF     # 		E%%e,C#t$			 s   /A >A c              #   P  K   g }|D ]h  }|rF|d   dk(  r|j                  |       dj                  |D cg c]  }|d   	 c}      dd}g }| n|d   dk(  r|j                  |       e| j dj                  |D cg c]  }|d   	 c}      dd}| yc c}w c c}w w)z/Merge consecutive Characters tokens in a streamtype
Charactersr   data)r   r   N)appendjoin)rM   r}   characters_bufferr~   
char_token	new_tokens         r(   merge_charactersz&BleachSanitizerFilter.merge_charactersM  s     # 	E =L0%,,U3
 !#BSTJZ/T! !-	!I )+%#Ov,.!((/K+	0 GGBSTJZ/TU 
	 # U Us   3B&B
A B&B!B&c                 |    | j                  | j                  t        j                  j	                  |                   S rp   )r   r   r   ru   __iter__)rM   s    r(   r   zBleachSanitizerFilter.__iter__n  s4    $$  !5!5!>!>t!DE
 	
r'   c                 ,   |d   }|dv r@|d   | j                   v r| j                  |      S | j                  ry| j                  |      S |dk(  r/| j                  s"t        j                  |d   ddd	
      |d<   |S y|dk(  r| j                  |      S |S )a  Sanitize a token either by HTML-encoding or dropping.

        Unlike sanitizer.Filter, allowed_attributes can be a dict of {'tag':
        ['attribute', 'pairs'], 'tag': callable}.

        Here callable is a function with two arguments of attribute name and
        value. It should return true of false.

        Also gives the option to strip tags instead of encoding.

        :arg dict token: token to sanitize

        :returns: token or list of tokens

        r   )StartTagEndTagEmptyTagnameNCommentr   z&quot;z&#x27;)"')entitiesr   )rR   allow_tokenrS   disallowed_tokenrT   r   escapesanitize_characters)rM   r~   
token_types      r(   r|   z$BleachSanitizerFilter.sanitize_tokens  s      6]
;;V} 1 11''..++ ,,U339$++ - 4 4&M(,J!f <'++E22 Lr'   c                    |j                  dd      }|s|S t        j                  t        |      }||d<   d|vr|S g }t	        j
                  |      D ]  }|s|j                  d      rmt	        j                  |      }|V|dk(  r|j                  ddd       n|j                  d|d	       |t        |      d
z   d }|r|j                  d|d       |j                  d|d        |S )a  Handles Characters tokens

        Our overridden tokenizer doesn't do anything with entities. However,
        that means that the serializer will convert all ``&`` in Characters
        tokens to ``&amp;``.

        Since we don't want that, we extract entities here and convert them to
        Entity tokens so the serializer will let them be.

        :arg token: the Characters token to work on

        :returns: a list of tokens

        r   r   &Nampr   )r   r   Entity)r   r      )
getINVISIBLE_CHARACTERS_REsubINVISIBLE_REPLACEMENT_CHARr   next_possible_entity
startswithmatch_entityr   len)rM   r~   r   
new_tokenspartentity	remainders          r(   r   z)BleachSanitizerFilter.sanitize_characters  s    yy$L&**+EtLf d?L
 "66t< 	DDs#&33D9% #))<*MN"))8V*LM !%S[1_%6 7I "))<*ST|TBC5	D8 r'   c                    t        j                  |      }t        j                  dd|      }|j	                  dd      }|j                         }	 t        j                  |      }|j                  r|j                  |v r|S y|j                  d      r|S d|v r|j                  d      d   |v r|S d|v sd	|v r|S y# t        $ r Y yw xY w)
zChecks a uri value to see if it's allowed

        :arg value: the uri value to sanitize
        :arg allowed_protocols: list of allowed protocols

        :returns: allowed value or None

        z[`\000-\040\177-\240\s]+r   u   �N#:r   r   r   )r   convert_entitiesrer   replacelowerr   urlparserq   schemer   split)rM   rl   rU   normalized_uriparseds        r(   sanitize_uri_valuez(BleachSanitizerFilter.sanitize_uri_value  s     '77>  ;RP (//"= (--/	  ((8F
 ==}} 11&  ((- ~%"((-a04EE **g9J.J5  		s   B< <	CCc                 b   d|v r)i }|d   j                         D ]  \  }}|\  }}| j                  |d   ||      s#|| j                  v r!| j                  || j                        }|P|}|| j
                  v r5t        j                  ddt        |            }|j                         }|s|}d|d   f| j                  v r0|dt        j                  d   dffv rt        j                  d	|      r|d
k(  r*| j                  r| j                  j                  |      }nd}|||<    ||d<   |S )z-Handles the case where we're allowing the tagr   r   Nzurl\s*\(\s*[^#\s][^)]+?\) )Nr   xlinkr   z
^\s*[^#\s])Nr8   r   )itemsrw   rx   r   rU   ry   r   r   r   r-   rz   r   
namespacessearchr>   sanitize_css)	rM   r~   attrsnamespaced_nameval	namespacer   	new_valuenew_vals	            r(   r   z!BleachSanitizerFilter.allow_token  sh   U? E(-f(;(;(= 5-$"1	4 ''ftSA #d&:&:: $ 7 7T=S=S TI ( #C #d&B&BB ff%A3QTVG%mmoG" 
 & %-(D,E,EE&&&11':FC+  99]C8$ #o5))"00==cB ! *-o&k5-n "E&Mr'   c                    |d   }|dk(  rd|d    d|d<   n|d   r|dv sJ g }|d   j                         D ]W  \  \  }}}|r|s||}}||t        j                  vr|}nt        j                  |    d| }|j                  d	| d
| d       Y d|d    dj	                  |       d|d<   nd|d    d|d<   |j                  d      r|d   d d  d|d<   d|d<   |d= |S )Nr   r   z</r   >r   )r   r   r   r   z="r   <r   selfClosingz/>r   )r   r   prefixesr   r   r   )rM   r~   r   r   nsr   vr   s           r(   r   z&BleachSanitizerFilter.disallowed_tokenZ  sE   6]
! vq1E&M6]!9999E!&v!4!4!6 :
TA d#RB :=+A+A!A&*O)6)?)?)C(DAdV&LO
 q 1A3a89!:"  frwwu~.>a@E&M  fa0E&M99]#$V}Sb12"5E&M$f&Mr'   )r#   r$   r%   rb   rc   rd   re   r   rx   ry   rz   rO   r   r   r   r|   r   r   r   r   r&   r'   r(   rZ   rZ      sj     "%+%55 - E E*??# <9|
B

)V;z8tCJ$r'   rZ   )	itertoolsr   r   rK   xml.sax.saxutilsr   bleachr   r   rv   rc   rd   re   r   rangechrINVISIBLE_CHARACTERScompileUNICODEr   r   UserWarningr"   r*   rr   SanitizerFilterrZ   )cs   0r(   <module>r      s     	  %    ( '	Iy  9:  ww5A;b"uR}EFSVF 
 %"**S+?%?#%ErzzR  ! 	K 	U0 U0p(LVBM99 Be Gs   'C