NDOJ/judge/jinja2/markdown/__init__.py

184 lines
6.5 KiB
Python
Raw Normal View History

2020-01-21 06:35:58 +00:00
import logging
import re
from html.parser import HTMLParser
from urllib.parse import urlparse
import mistune
from django.conf import settings
2022-09-16 23:02:53 +00:00
from markupsafe import Markup
2020-01-21 06:35:58 +00:00
from lxml import html
from lxml.etree import ParserError, XMLSyntaxError
from judge.highlight_code import highlight_code
from judge.jinja2.markdown.lazy_load import lazy_load as lazy_load_processor
from judge.jinja2.markdown.math import MathInlineGrammar, MathInlineLexer, MathRenderer
from judge.utils.camo import client as camo_client
from judge.utils.texoid import TEXOID_ENABLED, TexoidRenderer
from .. import registry
2022-05-14 17:57:27 +00:00
logger = logging.getLogger("judge.html")
2020-01-21 06:35:58 +00:00
NOFOLLOW_WHITELIST = settings.NOFOLLOW_EXCLUDED
2020-01-21 06:35:58 +00:00
class CodeSafeInlineGrammar(mistune.InlineGrammar):
2022-05-14 17:57:27 +00:00
double_emphasis = re.compile(r"^\*{2}([\s\S]+?)()\*{2}(?!\*)") # **word**
emphasis = re.compile(r"^\*((?:\*\*|[^\*])+?)()\*(?!\*)") # *word*
2020-01-21 06:35:58 +00:00
class AwesomeInlineGrammar(MathInlineGrammar, CodeSafeInlineGrammar):
2020-01-21 06:35:58 +00:00
pass
2022-07-30 10:21:08 +00:00
class AwesomeInlineLexer(MathInlineLexer, mistune.InlineLexer):
2020-01-21 06:35:58 +00:00
grammar_class = AwesomeInlineGrammar
2022-07-30 10:21:08 +00:00
class AwesomeRenderer(MathRenderer, mistune.Renderer):
2020-01-21 06:35:58 +00:00
def __init__(self, *args, **kwargs):
2022-05-14 17:57:27 +00:00
self.nofollow = kwargs.pop("nofollow", True)
self.texoid = TexoidRenderer() if kwargs.pop("texoid", False) else None
2020-01-21 06:35:58 +00:00
self.parser = HTMLParser()
super(AwesomeRenderer, self).__init__(*args, **kwargs)
def _link_rel(self, href):
if href:
try:
url = urlparse(href)
except ValueError:
return ' rel="nofollow"'
else:
if url.netloc and url.netloc not in NOFOLLOW_WHITELIST:
return ' rel="nofollow"'
2022-05-14 17:57:27 +00:00
return ""
2020-01-21 06:35:58 +00:00
def autolink(self, link, is_email=False):
text = link = mistune.escape(link)
if is_email:
2022-05-14 17:57:27 +00:00
link = "mailto:%s" % link
2020-01-21 06:35:58 +00:00
return '<a href="%s"%s>%s</a>' % (link, self._link_rel(link), text)
def table(self, header, body):
return (
'<table class="table">\n<thead>%s</thead>\n'
2022-05-14 17:57:27 +00:00
"<tbody>\n%s</tbody>\n</table>\n"
2020-01-21 06:35:58 +00:00
) % (header, body)
def link(self, link, title, text):
link = mistune.escape_link(link)
if not title:
return '<a href="%s"%s>%s</a>' % (link, self._link_rel(link), text)
title = mistune.escape(title, quote=True)
2022-05-14 17:57:27 +00:00
return '<a href="%s" title="%s"%s>%s</a>' % (
link,
title,
self._link_rel(link),
text,
)
2020-01-21 06:35:58 +00:00
def block_code(self, code, lang=None):
if not lang:
2022-05-14 17:57:27 +00:00
return "\n<pre><code>%s</code></pre>\n" % mistune.escape(code).rstrip()
2020-01-21 06:35:58 +00:00
return highlight_code(code, lang)
def block_html(self, html):
2022-05-14 17:57:27 +00:00
if self.texoid and html.startswith("<latex"):
attr = html[6 : html.index(">")]
latex = html[html.index(">") + 1 : html.rindex("<")]
2020-01-21 06:35:58 +00:00
latex = self.parser.unescape(latex)
result = self.texoid.get_result(latex)
if not result:
2022-05-14 17:57:27 +00:00
return "<pre>%s</pre>" % mistune.escape(latex, smart_amp=False)
elif "error" not in result:
img = (
'''<img src="%(svg)s" onerror="this.src='%(png)s';this.onerror=null"'''
'width="%(width)s" height="%(height)s"%(tail)s>'
) % {
"svg": result["svg"],
"png": result["png"],
"width": result["meta"]["width"],
"height": result["meta"]["height"],
"tail": " /" if self.options.get("use_xhtml") else "",
2020-01-21 06:35:58 +00:00
}
2022-05-14 17:57:27 +00:00
style = [
"max-width: 100%",
"height: %s" % result["meta"]["height"],
"max-height: %s" % result["meta"]["height"],
"width: %s" % result["meta"]["height"],
]
if "inline" in attr:
tag = "span"
2020-01-21 06:35:58 +00:00
else:
2022-05-14 17:57:27 +00:00
tag = "div"
style += ["text-align: center"]
return '<%s style="%s">%s</%s>' % (tag, ";".join(style), img, tag)
2020-01-21 06:35:58 +00:00
else:
2022-05-14 17:57:27 +00:00
return "<pre>%s</pre>" % mistune.escape(
result["error"], smart_amp=False
)
2020-01-21 06:35:58 +00:00
return super(AwesomeRenderer, self).block_html(html)
def header(self, text, level, *args, **kwargs):
return super(AwesomeRenderer, self).header(text, level + 2, *args, **kwargs)
2022-07-30 10:21:08 +00:00
def create_spoiler(value, style):
respoiler = re.compile(r"(^\|\|(.+)\s+([\s\S]+?)\s*\|\|)", re.MULTILINE)
matches = re.findall(respoiler, value)
html = (
'<details><summary style="color: brown">'
+ '<span class="spoiler-summary">{summary}</span>'
+ "</summary>{detail}</details>"
)
2022-07-30 10:21:08 +00:00
for entire, summary, detail in matches:
detail = markdown(detail, style)
new_html = html.format(summary=summary, detail=detail)
value = value.replace(entire, new_html)
return value
2020-01-21 06:35:58 +00:00
@registry.filter
2022-07-30 10:21:08 +00:00
def markdown(value, style, math_engine=None, lazy_load=False, hard_wrap=False):
2020-01-21 06:35:58 +00:00
styles = settings.MARKDOWN_STYLES.get(style, settings.MARKDOWN_DEFAULT_STYLE)
2022-05-14 17:57:27 +00:00
escape = styles.get("safe_mode", True)
nofollow = styles.get("nofollow", True)
texoid = TEXOID_ENABLED and styles.get("texoid", False)
math = hasattr(settings, "MATHOID_URL") and styles.get("math", False)
2020-01-21 06:35:58 +00:00
2022-07-30 10:21:08 +00:00
value = create_spoiler(value, style)
2020-01-21 06:35:58 +00:00
post_processors = []
2022-05-14 17:57:27 +00:00
if styles.get("use_camo", False) and camo_client is not None:
2020-01-21 06:35:58 +00:00
post_processors.append(camo_client.update_tree)
if lazy_load:
post_processors.append(lazy_load_processor)
2022-05-14 17:57:27 +00:00
renderer = AwesomeRenderer(
escape=escape,
nofollow=nofollow,
texoid=texoid,
math=math and math_engine is not None,
math_engine=math_engine,
)
markdown = mistune.Markdown(
renderer=renderer,
inline=AwesomeInlineLexer,
parse_block_html=1,
parse_inline_html=1,
2022-07-30 10:21:08 +00:00
hard_wrap=hard_wrap,
2022-05-14 17:57:27 +00:00
)
2020-01-21 06:35:58 +00:00
result = markdown(value)
if post_processors:
try:
tree = html.fromstring(result, parser=html.HTMLParser(recover=True))
except (XMLSyntaxError, ParserError) as e:
2022-05-14 17:57:27 +00:00
if result and (
not isinstance(e, ParserError) or e.args[0] != "Document is empty"
):
logger.exception("Failed to parse HTML string")
tree = html.Element("div")
2020-01-21 06:35:58 +00:00
for processor in post_processors:
processor(tree)
2022-05-14 17:57:27 +00:00
result = html.tostring(tree, encoding="unicode")
2020-01-21 06:35:58 +00:00
return Markup(result)