B
    ê¹`K  ã               @   s  d Z dZddlZddlmZ ddlmZ ddlZddlmZm	Z	 ddl
mZ ddlZddlZddlZddlZddlZddlZddlZddlZdd	„ Zd#dd„ZG dd„ deƒZdd„ ZdZdZd$dd„Zd%dd„Zd&dd„Zd'dd„Zd(d d!„Zed"kreej  ¡ ƒ dS ))z=Diagnostic functions, mainly for use when doing tech support.ÚMITé    N)ÚStringIO)Ú
HTMLParser)ÚBeautifulSoupÚ__version__)Úbuilder_registryc             C   sV  t dt ƒ t dtj ƒ dddg}x>|D ]6}x0tjD ]}||jkr6P q6W | |¡ t d| ƒ q*W d|krÌ| d¡ y*dd	l	m
} t d
d tt|jƒ¡ ƒ W n* tk
rÊ } zt dƒ W dd}~X Y nX d|kryddl}t d|j ƒ W n, tk
r } zt dƒ W dd}~X Y nX t| dƒr4|  ¡ } nŠ|  d¡sL|  d¡rdt d|  ƒ t dƒ dS y:tj | ¡rœt d|  ƒ t| ƒ}| ¡ } W dQ R X W n tk
r´   Y nX t dƒ x’|D ]Š}t d| ƒ d}	yt| |d}
d}	W n8 tk
r$ } zt d| ƒ t ¡  W dd}~X Y nX |	rDt d| ƒ t |
 ¡ ƒ t dƒ qÄW dS )z¼Diagnostic suite for isolating common problems.

    :param data: A string containing markup that needs to be explained.
    :return: None; diagnostics are printed to standard output.
    z'Diagnostic running on Beautiful Soup %szPython version %szhtml.parserÚhtml5libÚlxmlz;I noticed that %s is not installed. Installing it may help.zlxml-xmlr   )ÚetreezFound lxml version %sÚ.z.lxml is not installed or couldn't be imported.NzFound html5lib version %sz2html5lib is not installed or couldn't be imported.Úreadzhttp:zhttps:z<"%s" looks like a URL. Beautiful Soup is not an HTTP client.zpYou need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup.z7"%s" looks like a filename. Reading data from the file.Ú z#Trying to parse your markup with %sF)ÚfeaturesTz%s could not parse the markup.z#Here's what %s did with the markup:zP--------------------------------------------------------------------------------)Úprintr   ÚsysÚversionr   Zbuildersr   ÚremoveÚappendr	   r
   ÚjoinÚmapÚstrZLXML_VERSIONÚImportErrorr   Úhasattrr   Ú
startswithÚosÚpathÚexistsÚopenÚ
ValueErrorr   Ú	ExceptionÚ	tracebackÚ	print_excZprettify)ÚdataZbasic_parsersÚnameZbuilderr
   Úer   ÚfpÚparserÚsuccessÚsoup© r)   úT/home/kop/projects/devel/pgwui/test_venv/lib/python3.7/site-packages/bs4/diagnose.pyÚdiagnose   sj    








r+   Tc             K   sN   ddl m} x<|jt| ƒfd|i|—ŽD ]\}}td||j|jf ƒ q(W dS )a´  Print out the lxml events that occur during parsing.

    This lets you see how lxml parses a document when no Beautiful
    Soup code is running. You can use this to determine whether
    an lxml-specific problem is in Beautiful Soup's lxml tree builders
    or in lxml itself.

    :param data: Some markup.
    :param html: If True, markup will be parsed with lxml's HTML parser.
       if False, lxml's XML parser will be used.
    r   )r
   Úhtmlz%s, %4s, %sN)r	   r
   Ú	iterparser   r   ÚtagÚtext)r"   r,   Úkwargsr
   ÚeventÚelementr)   r)   r*   Ú
lxml_trace]   s    $r3   c               @   s`   e Zd ZdZdd„ Zdd„ Zdd„ Zdd	„ Zd
d„ Zdd„ Z	dd„ Z
dd„ Zdd„ Zdd„ ZdS )ÚAnnouncingParserzèSubclass of HTMLParser that announces parse events, without doing
    anything else.

    You can use this to get a picture of how html.parser sees a given
    document. The easiest way to do this is to call `htmlparser_trace`.
    c             C   s   t |ƒ d S )N)r   )ÚselfÚsr)   r)   r*   Ú_pu   s    zAnnouncingParser._pc             C   s   |   d| ¡ d S )Nz%s START)r7   )r5   r#   Úattrsr)   r)   r*   Úhandle_starttagx   s    z AnnouncingParser.handle_starttagc             C   s   |   d| ¡ d S )Nz%s END)r7   )r5   r#   r)   r)   r*   Úhandle_endtag{   s    zAnnouncingParser.handle_endtagc             C   s   |   d| ¡ d S )Nz%s DATA)r7   )r5   r"   r)   r)   r*   Úhandle_data~   s    zAnnouncingParser.handle_datac             C   s   |   d| ¡ d S )Nz
%s CHARREF)r7   )r5   r#   r)   r)   r*   Úhandle_charref   s    zAnnouncingParser.handle_charrefc             C   s   |   d| ¡ d S )Nz%s ENTITYREF)r7   )r5   r#   r)   r)   r*   Úhandle_entityref„   s    z!AnnouncingParser.handle_entityrefc             C   s   |   d| ¡ d S )Nz
%s COMMENT)r7   )r5   r"   r)   r)   r*   Úhandle_comment‡   s    zAnnouncingParser.handle_commentc             C   s   |   d| ¡ d S )Nz%s DECL)r7   )r5   r"   r)   r)   r*   Úhandle_declŠ   s    zAnnouncingParser.handle_declc             C   s   |   d| ¡ d S )Nz%s UNKNOWN-DECL)r7   )r5   r"   r)   r)   r*   Úunknown_decl   s    zAnnouncingParser.unknown_declc             C   s   |   d| ¡ d S )Nz%s PI)r7   )r5   r"   r)   r)   r*   Ú	handle_pi   s    zAnnouncingParser.handle_piN)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r7   r9   r:   r;   r<   r=   r>   r?   r@   rA   r)   r)   r)   r*   r4   m   s   r4   c             C   s   t ƒ }| | ¡ dS )zÂPrint out the HTMLParser events that occur during parsing.

    This lets you see how HTMLParser parses a document when no
    Beautiful Soup code is running.

    :param data: Some markup.
    N)r4   Úfeed)r"   r&   r)   r)   r*   Úhtmlparser_trace“   s    rG   ZaeiouZbcdfghjklmnpqrstvwxyzé   c             C   s>   d}x4t | ƒD ](}|d dkr$t}nt}|t |¡7 }qW |S )z#Generate a random word-like string.r   é   r   )ÚrangeÚ_consonantsÚ_vowelsÚrandomÚchoice)Úlengthr6   ÚiÚtr)   r)   r*   Úrword¡   s    rR   é   c             C   s   d  dd„ t| ƒD ƒ¡S )z'Generate a random sentence-like string.ú c             s   s   | ]}t t d d¡ƒV  qdS )rS   é	   N)rR   rM   Úrandint)Ú.0rP   r)   r)   r*   ú	<genexpr>®   s    zrsentence.<locals>.<genexpr>)r   rJ   )rO   r)   r)   r*   Ú	rsentence¬   s    rY   éè  c             C   s¨   dddddddg}g }x~t | ƒD ]r}t dd	¡}|dkrRt |¡}| d
| ¡ q |dkrr| tt dd¡ƒ¡ q |dkr t |¡}| d| ¡ q W dd |¡ d S )z+Randomly generate an invalid HTML document.ÚpÚdivÚspanrP   ÚbÚscriptÚtabler   é   z<%s>é   rS   rI   z</%s>z<html>Ú
z</html>)rJ   rM   rV   rN   r   rY   r   )Únum_elementsZ	tag_namesÚelementsrP   rN   Ztag_namer)   r)   r*   Úrdoc°   s    

rf   é † c       
      C   s(  t dt ƒ t| ƒ}t dt|ƒ ƒ xŽdddgddgD ]z}d}y"t ¡ }t||ƒ}t ¡ }d}W n6 tk
r– } zt d	| ƒ t ¡  W d
d
}~X Y nX |r6t d||| f ƒ q6W ddl	m
} t ¡ }| |¡ t ¡ }t d||  ƒ dd
l}	|	 ¡ }t ¡ }| |¡ t ¡ }t d||  ƒ d
S )z.Very basic head-to-head performance benchmark.z1Comparative parser benchmark on Beautiful Soup %sz3Generated a large invalid HTML document (%d bytes).r	   r,   r   zhtml.parserFTz%s could not parse the markup.Nz"BS4+%s parsed the markup in %.2fs.r   )r
   z$Raw lxml parsed the markup in %.2fs.z(Raw html5lib parsed the markup in %.2fs.)r   r   rf   ÚlenÚtimer   r   r    r!   r	   r
   ZHTMLr   r   Úparse)
rd   r"   r&   r'   Úar(   r^   r$   r
   r   r)   r)   r*   Úbenchmark_parsersÂ   s4    


rl   r	   c             C   sX   t  ¡ }|j}t| ƒ}tt||d}t d|||¡ t 	|¡}| 
d¡ | dd¡ dS )z7Use Python's profiler on a randomly generated document.)Úbs4r"   r&   zbs4.BeautifulSoup(data, parser)Z
cumulativez_html5lib|bs4é2   N)ÚtempfileÚNamedTemporaryFiler#   rf   Údictrm   ÚcProfileZrunctxÚpstatsZStatsZ
sort_statsZprint_stats)rd   r&   Z
filehandleÚfilenamer"   ÚvarsÚstatsr)   r)   r*   Úprofileâ   s    

rw   Ú__main__)T)rH   )rS   )rZ   )rg   )rg   r	   )!rE   Ú__license__rr   Úior   Úhtml.parserr   rm   r   r   Zbs4.builderr   r   rs   rM   ro   ri   r    r   r+   r3   r4   rG   rL   rK   rR   rY   rf   rl   rw   rB   Ústdinr   r)   r)   r)   r*   Ú<module>   s8   G
&



 

