NDOJ/judge/lxml_tree.py

62 lines
1.6 KiB
Python
Raw Normal View History

2020-01-21 06:35:58 +00:00
import logging
from django.utils.safestring import SafeData, mark_safe
from lxml import html
from lxml.etree import ParserError, XMLSyntaxError
2022-05-14 17:57:27 +00:00
logger = logging.getLogger("judge.html")
2020-01-21 06:35:58 +00:00
class HTMLTreeString(SafeData):
def __init__(self, str):
try:
self._tree = html.fromstring(str, parser=html.HTMLParser(recover=True))
except (XMLSyntaxError, ParserError) as e:
2022-05-14 17:57:27 +00:00
if str and (
not isinstance(e, ParserError) or e.args[0] != "Document is empty"
):
logger.exception("Failed to parse HTML string")
self._tree = html.Element("div")
2020-01-21 06:35:58 +00:00
def __getattr__(self, attr):
try:
return getattr(self._tree, attr)
except AttributeError:
return getattr(str(self), attr)
def __setattr__(self, key, value):
2022-05-14 17:57:27 +00:00
if key[0] == "_":
2020-01-21 06:35:58 +00:00
super(HTMLTreeString, self).__setattr__(key, value)
setattr(self._tree, key, value)
def __repr__(self):
2022-05-14 17:57:27 +00:00
return "<HTMLTreeString %r>" % str(self)
2020-01-21 06:35:58 +00:00
def __str__(self):
2022-05-14 17:57:27 +00:00
return mark_safe(html.tostring(self._tree, encoding="unicode"))
2020-01-21 06:35:58 +00:00
def __radd__(self, other):
return other + str(self)
def __add__(self, other):
return str(self) + other
def __getitem__(self, item):
return str(self)[item]
def __getstate__(self):
return str(self)
def __setstate__(self, state):
self._tree = html.fromstring(state)
@property
def tree(self):
return self._tree
def fromstring(str):
if isinstance(str, HTMLTreeString):
return str
return HTMLTreeString(str)