NDOJ/judge/jinja2/markdown/__init__.py

182 lines
6.5 KiB
Python
Raw Normal View History

2020-01-21 06:35:58 +00:00
import logging
import re
from html.parser import HTMLParser
from urllib.parse import urlparse
import mistune
from django.conf import settings
from jinja2 import Markup
from lxml import html
from lxml.etree import ParserError, XMLSyntaxError
from judge.highlight_code import highlight_code
from judge.jinja2.markdown.lazy_load import lazy_load as lazy_load_processor
from judge.jinja2.markdown.math import MathInlineGrammar, MathInlineLexer, MathRenderer
from judge.utils.camo import client as camo_client
from judge.utils.texoid import TEXOID_ENABLED, TexoidRenderer
from .. import registry
2022-05-14 17:57:27 +00:00
logger = logging.getLogger("judge.html")
2020-01-21 06:35:58 +00:00
NOFOLLOW_WHITELIST = settings.NOFOLLOW_EXCLUDED
class CodeSafeInlineGrammar(mistune.InlineGrammar):
2022-05-14 17:57:27 +00:00
double_emphasis = re.compile(r"^\*{2}([\s\S]+?)()\*{2}(?!\*)") # **word**
emphasis = re.compile(r"^\*((?:\*\*|[^\*])+?)()\*(?!\*)") # *word*
2020-01-21 06:35:58 +00:00
2022-05-14 17:57:27 +00:00
class AwesomeInlineGrammar(
2022-07-30 10:21:08 +00:00
MathInlineGrammar, CodeSafeInlineGrammar
2022-05-14 17:57:27 +00:00
):
2020-01-21 06:35:58 +00:00
pass
2022-07-30 10:21:08 +00:00
class AwesomeInlineLexer(MathInlineLexer, mistune.InlineLexer):
2020-01-21 06:35:58 +00:00
grammar_class = AwesomeInlineGrammar
2022-07-30 10:21:08 +00:00
class AwesomeRenderer(MathRenderer, mistune.Renderer):
2020-01-21 06:35:58 +00:00
def __init__(self, *args, **kwargs):
2022-05-14 17:57:27 +00:00
self.nofollow = kwargs.pop("nofollow", True)
self.texoid = TexoidRenderer() if kwargs.pop("texoid", False) else None
2020-01-21 06:35:58 +00:00
self.parser = HTMLParser()
super(AwesomeRenderer, self).__init__(*args, **kwargs)
def _link_rel(self, href):
if href:
try:
url = urlparse(href)
except ValueError:
return ' rel="nofollow"'
else:
if url.netloc and url.netloc not in NOFOLLOW_WHITELIST:
return ' rel="nofollow"'
2022-05-14 17:57:27 +00:00
return ""
2020-01-21 06:35:58 +00:00
def autolink(self, link, is_email=False):
text = link = mistune.escape(link)
if is_email:
2022-05-14 17:57:27 +00:00
link = "mailto:%s" % link
2020-01-21 06:35:58 +00:00
return '<a href="%s"%s>%s</a>' % (link, self._link_rel(link), text)
def table(self, header, body):
return (
'<table class="table">\n<thead>%s</thead>\n'
2022-05-14 17:57:27 +00:00
"<tbody>\n%s</tbody>\n</table>\n"
2020-01-21 06:35:58 +00:00
) % (header, body)
def link(self, link, title, text):
link = mistune.escape_link(link)
if not title:
return '<a href="%s"%s>%s</a>' % (link, self._link_rel(link), text)
title = mistune.escape(title, quote=True)
2022-05-14 17:57:27 +00:00
return '<a href="%s" title="%s"%s>%s</a>' % (
link,
title,
self._link_rel(link),
text,
)
2020-01-21 06:35:58 +00:00
def block_code(self, code, lang=None):
if not lang:
2022-05-14 17:57:27 +00:00
return "\n<pre><code>%s</code></pre>\n" % mistune.escape(code).rstrip()
2020-01-21 06:35:58 +00:00
return highlight_code(code, lang)
def block_html(self, html):
2022-05-14 17:57:27 +00:00
if self.texoid and html.startswith("<latex"):
attr = html[6 : html.index(">")]
latex = html[html.index(">") + 1 : html.rindex("<")]
2020-01-21 06:35:58 +00:00
latex = self.parser.unescape(latex)
result = self.texoid.get_result(latex)
if not result:
2022-05-14 17:57:27 +00:00
return "<pre>%s</pre>" % mistune.escape(latex, smart_amp=False)
elif "error" not in result:
img = (
'''<img src="%(svg)s" onerror="this.src='%(png)s';this.onerror=null"'''
'width="%(width)s" height="%(height)s"%(tail)s>'
) % {
"svg": result["svg"],
"png": result["png"],
"width": result["meta"]["width"],
"height": result["meta"]["height"],
"tail": " /" if self.options.get("use_xhtml") else "",
2020-01-21 06:35:58 +00:00
}
2022-05-14 17:57:27 +00:00
style = [
"max-width: 100%",
"height: %s" % result["meta"]["height"],
"max-height: %s" % result["meta"]["height"],
"width: %s" % result["meta"]["height"],
]
if "inline" in attr:
tag = "span"
2020-01-21 06:35:58 +00:00
else:
2022-05-14 17:57:27 +00:00
tag = "div"
style += ["text-align: center"]
return '<%s style="%s">%s</%s>' % (tag, ";".join(style), img, tag)
2020-01-21 06:35:58 +00:00
else:
2022-05-14 17:57:27 +00:00
return "<pre>%s</pre>" % mistune.escape(
result["error"], smart_amp=False
)
2020-01-21 06:35:58 +00:00
return super(AwesomeRenderer, self).block_html(html)
def header(self, text, level, *args, **kwargs):
return super(AwesomeRenderer, self).header(text, level + 2, *args, **kwargs)
2022-07-30 10:21:08 +00:00
def create_spoiler(value, style):
respoiler = re.compile(r"(^\|\|(.+)\s+([\s\S]+?)\s*\|\|)", re.MULTILINE)
matches = re.findall(respoiler, value)
html = "<details><summary style=\"color: brown\">" \
+ "<span class=\"spoiler-summary\">{summary}</span>" \
+ "</summary>{detail}</details>"
for entire, summary, detail in matches:
detail = markdown(detail, style)
new_html = html.format(summary=summary, detail=detail)
value = value.replace(entire, new_html)
return value
2020-01-21 06:35:58 +00:00
@registry.filter
2022-07-30 10:21:08 +00:00
def markdown(value, style, math_engine=None, lazy_load=False, hard_wrap=False):
2020-01-21 06:35:58 +00:00
styles = settings.MARKDOWN_STYLES.get(style, settings.MARKDOWN_DEFAULT_STYLE)
2022-05-14 17:57:27 +00:00
escape = styles.get("safe_mode", True)
nofollow = styles.get("nofollow", True)
texoid = TEXOID_ENABLED and styles.get("texoid", False)
math = hasattr(settings, "MATHOID_URL") and styles.get("math", False)
2020-01-21 06:35:58 +00:00
2022-07-30 10:21:08 +00:00
value = create_spoiler(value, style)
2020-01-21 06:35:58 +00:00
post_processors = []
2022-05-14 17:57:27 +00:00
if styles.get("use_camo", False) and camo_client is not None:
2020-01-21 06:35:58 +00:00
post_processors.append(camo_client.update_tree)
if lazy_load:
post_processors.append(lazy_load_processor)
2022-05-14 17:57:27 +00:00
renderer = AwesomeRenderer(
escape=escape,
nofollow=nofollow,
texoid=texoid,
math=math and math_engine is not None,
math_engine=math_engine,
)
markdown = mistune.Markdown(
renderer=renderer,
inline=AwesomeInlineLexer,
parse_block_html=1,
parse_inline_html=1,
2022-07-30 10:21:08 +00:00
hard_wrap=hard_wrap,
2022-05-14 17:57:27 +00:00
)
2020-01-21 06:35:58 +00:00
result = markdown(value)
if post_processors:
try:
tree = html.fromstring(result, parser=html.HTMLParser(recover=True))
except (XMLSyntaxError, ParserError) as e:
2022-05-14 17:57:27 +00:00
if result and (
not isinstance(e, ParserError) or e.args[0] != "Document is empty"
):
logger.exception("Failed to parse HTML string")
tree = html.Element("div")
2020-01-21 06:35:58 +00:00
for processor in post_processors:
processor(tree)
2022-05-14 17:57:27 +00:00
result = html.tostring(tree, encoding="unicode")
2020-01-21 06:35:58 +00:00
return Markup(result)