B
    c-                 @   s  yd dl ZW n ek
r(   d dlZY nX d dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZmZmZmZmZmZ d dlmZ dd	lmZmZmZmZmZmZ e
ed
eedddZe
ed
eedddZ e
ed
eee dddZ!e
ed
eedddZ"e
ed
eedddZ#e
ed
eedddZ$e
ed
eedddZ%e
ed
eedddZ&e
ed
eedddZ'e
ed
eedddZ(eedd d!Z)e
ed
eedd"d#Z*e
ed
eedd$d%Z+e
ed
eedd&d'Z,e
ed
eedd(d)Z-e
ed
eedd*d+Z.e
e/ed
eed,d-d.Z0e
ed
eedd/d0Z1dSe2e3ee d2d3d4Z4e
d5d
eed6d7d8Z5e2eee e2f d9d:d;Z6eed<d=d>Z7dTeeed@dAdBZ8eee dCdDdEZ9eee:dFdGdHZ;eeedFdIdJZ<dKej=dLfee3eddMdNdOZ>dUe2ee?e3eee2eee eeddf dP
dQdRZ@dS )V    N)IncrementalDecoder)aliases)	lru_cache)findall)	GeneratorListOptionalSetTupleUnion)MultibyteIncrementalDecoder   )ENCODING_MARKSIANA_SUPPORTED_SIMILARRE_POSSIBLE_ENCODING_INDICATIONUNICODE_RANGES_COMBINEDUNICODE_SECONDARY_RANGE_KEYWORDUTF8_MAXIMAL_ALLOCATION)maxsize)	characterreturnc             C   sT   yt | }W n tk
r"   dS X d|kpRd|kpRd|kpRd|kpRd|kpRd|kS )NFz
WITH GRAVEz
WITH ACUTEzWITH CEDILLAzWITH DIAERESISzWITH CIRCUMFLEXz
WITH TILDE)unicodedataname
ValueError)r   description r   H/tmp/pip-install-h9fvvp4j/charset-normalizer/charset_normalizer/utils.pyis_accentuated   s    r   c             C   s.   t | }|s| S |d}tt|d dS )N r      )r   decompositionsplitchrint)r   Z
decomposedcodesr   r   r   remove_accent,   s
    

r%   c             C   s.   t | }x t D ]\}}||kr|S qW dS )zK
    Retrieve the Unicode range official name from a single character.
    N)ordr   items)r   Zcharacter_ord
range_nameZ	ord_ranger   r   r   unicode_range7   s
    r)   c             C   s,   yt | }W n tk
r"   dS X d|kS )NFZLATIN)r   r   r   )r   r   r   r   r   is_latinE   s
    r*   c             C   s(   y|  d W n tk
r"   dS X dS )NasciiFT)encodeUnicodeEncodeError)r   r   r   r   is_asciiN   s
    r.   c             C   s2   t | }d|krdS t| }|d kr*dS d|kS )NPTFZPunctuation)r   categoryr)   )r   character_categorycharacter_ranger   r   r   is_punctuationW   s    
r3   c             C   s:   t | }d|ksd|krdS t| }|d kr2dS d|kS )NSNTFZForms)r   r0   r)   )r   r1   r2   r   r   r   	is_symbolf   s    
r6   c             C   s   t | }|d krdS d|kS )NFZ	Emoticons)r)   )r   r2   r   r   r   is_emoticonu   s    r7   c             C   s&   |   s| dkrdS t| }d|kS )N>   +><   ｜,;TZ)isspacer   r0   )r   r1   r   r   r   is_separator   s    
r@   c             C   s   |   |  kS )N)islowerisupper)r   r   r   r   is_case_variable   s    rC   c             C   s   t | }|dkS )NCo)r   r0   )r   r1   r   r   r   is_private_use_only   s    
rE   c             C   s,   yt | }W n tk
r"   dS X d|kS )NFCJK)r   r   r   )r   character_namer   r   r   is_cjk   s
    rH   c             C   s,   yt | }W n tk
r"   dS X d|kS )NFZHIRAGANA)r   r   r   )r   rG   r   r   r   is_hiragana   s
    rI   c             C   s,   yt | }W n tk
r"   dS X d|kS )NFZKATAKANA)r   r   r   )r   rG   r   r   r   is_katakana   s
    rJ   c             C   s,   yt | }W n tk
r"   dS X d|kS )NFZHANGUL)r   r   r   )r   rG   r   r   r   	is_hangul   s
    rK   c             C   s,   yt | }W n tk
r"   dS X d|kS )NFZTHAI)r   r   r   )r   rG   r   r   r   is_thai   s
    rL   )r(   r   c                s   t  fddtD S )Nc             3   s   | ]}| kV  qd S )Nr   ).0keyword)r(   r   r   	<genexpr>   s    z-is_unicode_range_secondary.<locals>.<genexpr>)anyr   )r(   r   )r(   r   is_unicode_range_secondary   s    rQ   c             C   s(   |   dko&|  dko&| dko&| dkS )NFu   ﻿)r?   isprintable)r   r   r   r   is_unprintable   s    rT      )sequencesearch_zoner   c             C   s   t | tstt| }tt| dt|| jddd}t|dkrHdS xJ|D ]B}| 	dd}x,t
 D ] \}}||kr|S ||krl|S qlW qNW dS )zW
    Extract using ASCII-only decoder any specified encoding in the first n-bytes.
    Nr+   ignore)errorsr   -_)
isinstancebytes	TypeErrorlenr   r   mindecodelowerreplacer   r'   )rV   rW   Zseq_lenresultsZspecified_encodingencoding_aliasencoding_ianar   r   r   any_specified_encoding   s     

rg      )r   r   c             C   s    | dkpt td| jtS )zQ
    Verify is a specific encoding is a multi byte one based on it IANA name
    >	   	utf_16_be	utf_32_beutf_16	utf_16_leutf_8	utf_8_sigutf_7utf_32	utf_32_lezencodings.{})
issubclass	importlibimport_moduleformatr   r   )r   r   r   r   is_multi_byte_encoding   s    
rv   )rV   r   c             C   sJ   xDt D ]<}t | }t|tr"|g}x|D ]}| |r(||fS q(W qW dS )z9
    Identify and extract SIG/BOM in given sequence.
    )N    )r   r\   r]   
startswith)rV   iana_encodingZmarksmarkr   r   r   identify_sig_or_bom  s    



r{   )ry   r   c             C   s   | dkS )N>   rp   rk   r   )ry   r   r   r   should_strip_sig_or_bom  s    r|   T)cp_namestrictr   c             C   sL   |   dd} x$t D ]\}}| ||gkr|S qW |rHtd| | S )NrZ   r[   z Unable to retrieve IANA for '{}')rb   rc   r   r'   r   ru   )r}   r~   re   rf   r   r   r   	iana_name!  s    r   )decoded_sequencer   c             C   s8   t  }x(| D ] }t|}|d kr"q|| qW t|S )N)setr)   addlist)r   rangesr   r2   r   r   r   
range_scan1  s    
r   )iana_name_aiana_name_br   c       	      C   s   t | st |rdS td| j}td|j}|dd}|dd}d}x6tdD ]*}t|g}||||krZ|d7 }qZW |d S )	Ng        zencodings.{}rX   )rY   r      r      )rv   rs   rt   ru   r   ranger]   ra   )	r   r   Z	decoder_aZ	decoder_bZid_aZid_bZcharacter_match_countiZto_be_decodedr   r   r   cp_similarity?  s    


r   c             C   s   | t ko|t |  kS )z
    Determine if two code page are at least 80% similar. IANA_SUPPORTED_SIMILAR dict was generated using
    the function cp_similarity.
    )r   )r   r   r   r   r   is_cp_similarX  s    r   Zcharset_normalizerz)%(asctime)s | %(levelname)s | %(message)s)r   levelformat_stringr   c             C   s:   t | }|| t  }|t | || d S )N)logging	getLoggersetLevelStreamHandlersetFormatter	Formatter
addHandler)r   r   r   loggerhandlerr   r   r   set_logging_handlerc  s
    

r   )
	sequencesrf   offsets
chunk_sizebom_or_sig_availablestrip_sig_or_bomsig_payloadis_multi_byte_decoderdecoded_payloadr   c	             c   sF  |r:|dkr:x(|D ] }	||	|	|  }
|
s,P |
V  qW nx|D ]}	|	| }|t | d kr`qB| |	|	|  }|r|dkr|| }|j||rdndd}
|r8|	dkr8| |	 dkr8t|d}|r8|
d | |kr8x\t|	|	d	 d
D ]H}| || }|r|dkr|| }|j|dd}
|
d | |krP qW |
V  qBW d S )NF   rX   r~   )rY   r   rh   r      )r_   ra   r`   r   )r   rf   r   r   r   r   r   r   r   r   chunkZ	chunk_endZcut_sequenceZchunk_partial_size_chkjr   r   r   cut_sequence_chunksq  s8    

r   )rU   )T)N)AZunicodedata2r   ImportErrorrs   r   codecsr   Zencodings.aliasesr   	functoolsr   rer   typingr   r   r   r	   r
   r   Z_multibytecodecr   Zconstantr   r   r   r   r   r   strboolr   r%   r)   r*   r.   r3   r6   r7   r@   rC   rE   rH   rI   rJ   rK   rL   r_   rQ   rT   r]   r#   rg   rv   r{   r|   r   r   floatr   r   INFOr   r   r   r   r   r   r   <module>   s|     

							
 
