import logging from django.utils.safestring import SafeData, mark_safe from lxml import html from lxml.etree import ParserError, XMLSyntaxError logger = logging.getLogger('judge.html') class HTMLTreeString(SafeData): def __init__(self, str): try: self._tree = html.fromstring(str, parser=html.HTMLParser(recover=True)) except (XMLSyntaxError, ParserError) as e: if str and (not isinstance(e, ParserError) or e.args[0] != 'Document is empty'): logger.exception('Failed to parse HTML string') self._tree = html.Element('div') def __getattr__(self, attr): try: return getattr(self._tree, attr) except AttributeError: return getattr(str(self), attr) def __setattr__(self, key, value): if key[0] == '_': super(HTMLTreeString, self).__setattr__(key, value) setattr(self._tree, key, value) def __repr__(self): return '' % str(self) def __str__(self): return mark_safe(html.tostring(self._tree, encoding='unicode')) def __radd__(self, other): return other + str(self) def __add__(self, other): return str(self) + other def __getitem__(self, item): return str(self)[item] def __getstate__(self): return str(self) def __setstate__(self, state): self._tree = html.fromstring(state) @property def tree(self): return self._tree def fromstring(str): if isinstance(str, HTMLTreeString): return str return HTMLTreeString(str)