|
list | LEADING_PUNCTUATION = ['(', '<', '<'] |
|
list | TRAILING_PUNCTUATION = ['.', ',', ')', '>', '\n', '>'] |
|
list | DOTS = ['·', '*', '\xe2\x80\xa2', '•', '•', '•'] |
|
tuple | unencoded_ampersands_re = re.compile(r'&(?!(\w+|#\d+);)') |
|
tuple | word_split_re = re.compile(r'(\s+)') |
|
tuple | punctuation_re |
|
tuple | simple_email_re = re.compile(r'^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$') |
|
tuple | link_target_attribute_re = re.compile(r'(<a [^>]*?)target=[^\s>]+') |
|
tuple | html_gunk_re = re.compile(r'(?:<br clear="all">|<i><\/i>|<b><\/b>|<em><\/em>|<strong><\/strong>|<\/?smallcaps>|<\/?uppercase>)', re.IGNORECASE) |
|
tuple | hard_coded_bullets_re = re.compile(r'((?:<p>(?:%s).*?[a-zA-Z].*?</p>\s*)+)' % '|'.join([re.escape(x) for x in DOTS]), re.DOTALL) |
|
tuple | trailing_empty_content_re = re.compile(r'(?:<p>(?: |\s|<br \/>)*?</p>\s*)+\Z') |
|
tuple | escape = allow_lazy(escape, unicode) |
|
HTML utilities suitable for global use.