2020-01-21 06:35:58 +00:00
|
|
|
import logging
|
|
|
|
import re
|
|
|
|
from html.parser import HTMLParser
|
|
|
|
from urllib.parse import urlparse
|
|
|
|
|
|
|
|
import mistune
|
|
|
|
from django.conf import settings
|
|
|
|
from jinja2 import Markup
|
|
|
|
from lxml import html
|
|
|
|
from lxml.etree import ParserError, XMLSyntaxError
|
|
|
|
|
|
|
|
from judge.highlight_code import highlight_code
|
|
|
|
from judge.jinja2.markdown.lazy_load import lazy_load as lazy_load_processor
|
|
|
|
from judge.jinja2.markdown.math import MathInlineGrammar, MathInlineLexer, MathRenderer
|
2022-05-14 17:57:27 +00:00
|
|
|
from judge.jinja2.markdown.spoiler import (
|
|
|
|
SpoilerInlineGrammar,
|
|
|
|
SpoilerInlineLexer,
|
|
|
|
SpoilerRenderer,
|
|
|
|
)
|
2020-01-21 06:35:58 +00:00
|
|
|
from judge.utils.camo import client as camo_client
|
|
|
|
from judge.utils.texoid import TEXOID_ENABLED, TexoidRenderer
|
|
|
|
from .. import registry
|
|
|
|
|
2022-05-14 17:57:27 +00:00
|
|
|
logger = logging.getLogger("judge.html")
|
2020-01-21 06:35:58 +00:00
|
|
|
|
|
|
|
NOFOLLOW_WHITELIST = settings.NOFOLLOW_EXCLUDED
|
|
|
|
|
|
|
|
|
|
|
|
class CodeSafeInlineGrammar(mistune.InlineGrammar):
|
2022-05-14 17:57:27 +00:00
|
|
|
double_emphasis = re.compile(r"^\*{2}([\s\S]+?)()\*{2}(?!\*)") # **word**
|
|
|
|
emphasis = re.compile(r"^\*((?:\*\*|[^\*])+?)()\*(?!\*)") # *word*
|
2020-01-21 06:35:58 +00:00
|
|
|
|
|
|
|
|
2022-05-14 17:57:27 +00:00
|
|
|
class AwesomeInlineGrammar(
|
|
|
|
MathInlineGrammar, SpoilerInlineGrammar, CodeSafeInlineGrammar
|
|
|
|
):
|
2020-01-21 06:35:58 +00:00
|
|
|
pass
|
|
|
|
|
|
|
|
|
2021-12-16 20:16:49 +00:00
|
|
|
class AwesomeInlineLexer(MathInlineLexer, SpoilerInlineLexer, mistune.InlineLexer):
|
2020-01-21 06:35:58 +00:00
|
|
|
grammar_class = AwesomeInlineGrammar
|
|
|
|
|
|
|
|
|
2021-12-16 20:16:49 +00:00
|
|
|
class AwesomeRenderer(MathRenderer, SpoilerRenderer, mistune.Renderer):
|
2020-01-21 06:35:58 +00:00
|
|
|
def __init__(self, *args, **kwargs):
|
2022-05-14 17:57:27 +00:00
|
|
|
self.nofollow = kwargs.pop("nofollow", True)
|
|
|
|
self.texoid = TexoidRenderer() if kwargs.pop("texoid", False) else None
|
2020-01-21 06:35:58 +00:00
|
|
|
self.parser = HTMLParser()
|
|
|
|
super(AwesomeRenderer, self).__init__(*args, **kwargs)
|
|
|
|
|
|
|
|
def _link_rel(self, href):
|
|
|
|
if href:
|
|
|
|
try:
|
|
|
|
url = urlparse(href)
|
|
|
|
except ValueError:
|
|
|
|
return ' rel="nofollow"'
|
|
|
|
else:
|
|
|
|
if url.netloc and url.netloc not in NOFOLLOW_WHITELIST:
|
|
|
|
return ' rel="nofollow"'
|
2022-05-14 17:57:27 +00:00
|
|
|
return ""
|
2020-01-21 06:35:58 +00:00
|
|
|
|
|
|
|
def autolink(self, link, is_email=False):
|
|
|
|
text = link = mistune.escape(link)
|
|
|
|
if is_email:
|
2022-05-14 17:57:27 +00:00
|
|
|
link = "mailto:%s" % link
|
2020-01-21 06:35:58 +00:00
|
|
|
return '<a href="%s"%s>%s</a>' % (link, self._link_rel(link), text)
|
|
|
|
|
|
|
|
def table(self, header, body):
|
|
|
|
return (
|
|
|
|
'<table class="table">\n<thead>%s</thead>\n'
|
2022-05-14 17:57:27 +00:00
|
|
|
"<tbody>\n%s</tbody>\n</table>\n"
|
2020-01-21 06:35:58 +00:00
|
|
|
) % (header, body)
|
|
|
|
|
|
|
|
def link(self, link, title, text):
|
|
|
|
link = mistune.escape_link(link)
|
|
|
|
if not title:
|
|
|
|
return '<a href="%s"%s>%s</a>' % (link, self._link_rel(link), text)
|
|
|
|
title = mistune.escape(title, quote=True)
|
2022-05-14 17:57:27 +00:00
|
|
|
return '<a href="%s" title="%s"%s>%s</a>' % (
|
|
|
|
link,
|
|
|
|
title,
|
|
|
|
self._link_rel(link),
|
|
|
|
text,
|
|
|
|
)
|
2020-01-21 06:35:58 +00:00
|
|
|
|
|
|
|
def block_code(self, code, lang=None):
|
|
|
|
if not lang:
|
2022-05-14 17:57:27 +00:00
|
|
|
return "\n<pre><code>%s</code></pre>\n" % mistune.escape(code).rstrip()
|
2020-01-21 06:35:58 +00:00
|
|
|
return highlight_code(code, lang)
|
|
|
|
|
|
|
|
def block_html(self, html):
|
2022-05-14 17:57:27 +00:00
|
|
|
if self.texoid and html.startswith("<latex"):
|
|
|
|
attr = html[6 : html.index(">")]
|
|
|
|
latex = html[html.index(">") + 1 : html.rindex("<")]
|
2020-01-21 06:35:58 +00:00
|
|
|
latex = self.parser.unescape(latex)
|
|
|
|
result = self.texoid.get_result(latex)
|
|
|
|
if not result:
|
2022-05-14 17:57:27 +00:00
|
|
|
return "<pre>%s</pre>" % mistune.escape(latex, smart_amp=False)
|
|
|
|
elif "error" not in result:
|
|
|
|
img = (
|
|
|
|
'''<img src="%(svg)s" onerror="this.src='%(png)s';this.onerror=null"'''
|
|
|
|
'width="%(width)s" height="%(height)s"%(tail)s>'
|
|
|
|
) % {
|
|
|
|
"svg": result["svg"],
|
|
|
|
"png": result["png"],
|
|
|
|
"width": result["meta"]["width"],
|
|
|
|
"height": result["meta"]["height"],
|
|
|
|
"tail": " /" if self.options.get("use_xhtml") else "",
|
2020-01-21 06:35:58 +00:00
|
|
|
}
|
2022-05-14 17:57:27 +00:00
|
|
|
style = [
|
|
|
|
"max-width: 100%",
|
|
|
|
"height: %s" % result["meta"]["height"],
|
|
|
|
"max-height: %s" % result["meta"]["height"],
|
|
|
|
"width: %s" % result["meta"]["height"],
|
|
|
|
]
|
|
|
|
if "inline" in attr:
|
|
|
|
tag = "span"
|
2020-01-21 06:35:58 +00:00
|
|
|
else:
|
2022-05-14 17:57:27 +00:00
|
|
|
tag = "div"
|
|
|
|
style += ["text-align: center"]
|
|
|
|
return '<%s style="%s">%s</%s>' % (tag, ";".join(style), img, tag)
|
2020-01-21 06:35:58 +00:00
|
|
|
else:
|
2022-05-14 17:57:27 +00:00
|
|
|
return "<pre>%s</pre>" % mistune.escape(
|
|
|
|
result["error"], smart_amp=False
|
|
|
|
)
|
2020-01-21 06:35:58 +00:00
|
|
|
return super(AwesomeRenderer, self).block_html(html)
|
|
|
|
|
|
|
|
def header(self, text, level, *args, **kwargs):
|
|
|
|
return super(AwesomeRenderer, self).header(text, level + 2, *args, **kwargs)
|
|
|
|
|
|
|
|
|
|
|
|
@registry.filter
|
|
|
|
def markdown(value, style, math_engine=None, lazy_load=False):
|
|
|
|
styles = settings.MARKDOWN_STYLES.get(style, settings.MARKDOWN_DEFAULT_STYLE)
|
2022-05-14 17:57:27 +00:00
|
|
|
escape = styles.get("safe_mode", True)
|
|
|
|
nofollow = styles.get("nofollow", True)
|
|
|
|
texoid = TEXOID_ENABLED and styles.get("texoid", False)
|
|
|
|
math = hasattr(settings, "MATHOID_URL") and styles.get("math", False)
|
2020-01-21 06:35:58 +00:00
|
|
|
|
|
|
|
post_processors = []
|
2022-05-14 17:57:27 +00:00
|
|
|
if styles.get("use_camo", False) and camo_client is not None:
|
2020-01-21 06:35:58 +00:00
|
|
|
post_processors.append(camo_client.update_tree)
|
|
|
|
if lazy_load:
|
|
|
|
post_processors.append(lazy_load_processor)
|
|
|
|
|
2022-05-14 17:57:27 +00:00
|
|
|
renderer = AwesomeRenderer(
|
|
|
|
escape=escape,
|
|
|
|
nofollow=nofollow,
|
|
|
|
texoid=texoid,
|
|
|
|
math=math and math_engine is not None,
|
|
|
|
math_engine=math_engine,
|
|
|
|
)
|
|
|
|
markdown = mistune.Markdown(
|
|
|
|
renderer=renderer,
|
|
|
|
inline=AwesomeInlineLexer,
|
|
|
|
parse_block_html=1,
|
|
|
|
parse_inline_html=1,
|
2022-07-30 05:03:56 +00:00
|
|
|
hard_wrap=True,
|
2022-05-14 17:57:27 +00:00
|
|
|
)
|
2020-01-21 06:35:58 +00:00
|
|
|
result = markdown(value)
|
|
|
|
if post_processors:
|
|
|
|
try:
|
|
|
|
tree = html.fromstring(result, parser=html.HTMLParser(recover=True))
|
|
|
|
except (XMLSyntaxError, ParserError) as e:
|
2022-05-14 17:57:27 +00:00
|
|
|
if result and (
|
|
|
|
not isinstance(e, ParserError) or e.args[0] != "Document is empty"
|
|
|
|
):
|
|
|
|
logger.exception("Failed to parse HTML string")
|
|
|
|
tree = html.Element("div")
|
2020-01-21 06:35:58 +00:00
|
|
|
for processor in post_processors:
|
|
|
|
processor(tree)
|
2022-05-14 17:57:27 +00:00
|
|
|
result = html.tostring(tree, encoding="unicode")
|
2020-01-21 06:35:58 +00:00
|
|
|
return Markup(result)
|