Migrate mistune to markdown
This commit is contained in:
parent
412945626b
commit
77aaae6735
46 changed files with 5112 additions and 420 deletions
|
@ -1,183 +1,36 @@
|
|||
import logging
|
||||
import re
|
||||
from html.parser import HTMLParser
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import mistune
|
||||
from django.conf import settings
|
||||
from markupsafe import Markup
|
||||
from lxml import html
|
||||
from lxml.etree import ParserError, XMLSyntaxError
|
||||
|
||||
from judge.highlight_code import highlight_code
|
||||
from judge.jinja2.markdown.lazy_load import lazy_load as lazy_load_processor
|
||||
from judge.jinja2.markdown.math import MathInlineGrammar, MathInlineLexer, MathRenderer
|
||||
from judge.utils.camo import client as camo_client
|
||||
from judge.utils.texoid import TEXOID_ENABLED, TexoidRenderer
|
||||
from .. import registry
|
||||
|
||||
logger = logging.getLogger("judge.html")
|
||||
|
||||
NOFOLLOW_WHITELIST = settings.NOFOLLOW_EXCLUDED
|
||||
import markdown as _markdown
|
||||
import bleach
|
||||
from django.utils.html import escape
|
||||
|
||||
|
||||
class CodeSafeInlineGrammar(mistune.InlineGrammar):
|
||||
double_emphasis = re.compile(r"^\*{2}([\s\S]+?)()\*{2}(?!\*)") # **word**
|
||||
emphasis = re.compile(r"^\*((?:\*\*|[^\*])+?)()\*(?!\*)") # *word*
|
||||
EXTENSIONS = [
|
||||
"pymdownx.magiclink",
|
||||
"pymdownx.details",
|
||||
"pymdownx.emoji",
|
||||
"pymdownx.inlinehilite",
|
||||
"pymdownx.superfences",
|
||||
"pymdownx.tasklist",
|
||||
"markdown.extensions.footnotes",
|
||||
"markdown.extensions.attr_list",
|
||||
"markdown.extensions.def_list",
|
||||
"markdown.extensions.tables",
|
||||
"markdown.extensions.admonition",
|
||||
"pymdownx.arithmatex",
|
||||
]
|
||||
|
||||
ALLOWED_TAGS = bleach.sanitizer.ALLOWED_TAGS + ["img", "center", "iframe"]
|
||||
|
||||
class AwesomeInlineGrammar(MathInlineGrammar, CodeSafeInlineGrammar):
|
||||
pass
|
||||
|
||||
|
||||
class AwesomeInlineLexer(MathInlineLexer, mistune.InlineLexer):
|
||||
grammar_class = AwesomeInlineGrammar
|
||||
|
||||
|
||||
class AwesomeRenderer(MathRenderer, mistune.Renderer):
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.nofollow = kwargs.pop("nofollow", True)
|
||||
self.texoid = TexoidRenderer() if kwargs.pop("texoid", False) else None
|
||||
self.parser = HTMLParser()
|
||||
super(AwesomeRenderer, self).__init__(*args, **kwargs)
|
||||
|
||||
def _link_rel(self, href):
|
||||
if href:
|
||||
try:
|
||||
url = urlparse(href)
|
||||
except ValueError:
|
||||
return ' rel="nofollow"'
|
||||
else:
|
||||
if url.netloc and url.netloc not in NOFOLLOW_WHITELIST:
|
||||
return ' rel="nofollow"'
|
||||
return ""
|
||||
|
||||
def autolink(self, link, is_email=False):
|
||||
text = link = mistune.escape(link)
|
||||
if is_email:
|
||||
link = "mailto:%s" % link
|
||||
return '<a href="%s"%s>%s</a>' % (link, self._link_rel(link), text)
|
||||
|
||||
def table(self, header, body):
|
||||
return (
|
||||
'<table class="table">\n<thead>%s</thead>\n'
|
||||
"<tbody>\n%s</tbody>\n</table>\n"
|
||||
) % (header, body)
|
||||
|
||||
def link(self, link, title, text):
|
||||
link = mistune.escape_link(link)
|
||||
if not title:
|
||||
return '<a href="%s"%s>%s</a>' % (link, self._link_rel(link), text)
|
||||
title = mistune.escape(title, quote=True)
|
||||
return '<a href="%s" title="%s"%s>%s</a>' % (
|
||||
link,
|
||||
title,
|
||||
self._link_rel(link),
|
||||
text,
|
||||
)
|
||||
|
||||
def block_code(self, code, lang=None):
|
||||
if not lang:
|
||||
return "\n<pre><code>%s</code></pre>\n" % mistune.escape(code).rstrip()
|
||||
return highlight_code(code, lang)
|
||||
|
||||
def block_html(self, html):
|
||||
if self.texoid and html.startswith("<latex"):
|
||||
attr = html[6 : html.index(">")]
|
||||
latex = html[html.index(">") + 1 : html.rindex("<")]
|
||||
latex = self.parser.unescape(latex)
|
||||
result = self.texoid.get_result(latex)
|
||||
if not result:
|
||||
return "<pre>%s</pre>" % mistune.escape(latex, smart_amp=False)
|
||||
elif "error" not in result:
|
||||
img = (
|
||||
'''<img src="%(svg)s" onerror="this.src='%(png)s';this.onerror=null"'''
|
||||
'width="%(width)s" height="%(height)s"%(tail)s>'
|
||||
) % {
|
||||
"svg": result["svg"],
|
||||
"png": result["png"],
|
||||
"width": result["meta"]["width"],
|
||||
"height": result["meta"]["height"],
|
||||
"tail": " /" if self.options.get("use_xhtml") else "",
|
||||
}
|
||||
style = [
|
||||
"max-width: 100%",
|
||||
"height: %s" % result["meta"]["height"],
|
||||
"max-height: %s" % result["meta"]["height"],
|
||||
"width: %s" % result["meta"]["height"],
|
||||
]
|
||||
if "inline" in attr:
|
||||
tag = "span"
|
||||
else:
|
||||
tag = "div"
|
||||
style += ["text-align: center"]
|
||||
return '<%s style="%s">%s</%s>' % (tag, ";".join(style), img, tag)
|
||||
else:
|
||||
return "<pre>%s</pre>" % mistune.escape(
|
||||
result["error"], smart_amp=False
|
||||
)
|
||||
return super(AwesomeRenderer, self).block_html(html)
|
||||
|
||||
def header(self, text, level, *args, **kwargs):
|
||||
return super(AwesomeRenderer, self).header(text, level + 2, *args, **kwargs)
|
||||
|
||||
|
||||
def create_spoiler(value, style):
|
||||
respoiler = re.compile(r"(^\|\|(.+)\s+([\s\S]+?)\s*\|\|)", re.MULTILINE)
|
||||
matches = re.findall(respoiler, value)
|
||||
html = (
|
||||
'<details><summary style="color: brown">'
|
||||
+ '<span class="spoiler-summary">{summary}</span>'
|
||||
+ "</summary>{detail}</details>"
|
||||
)
|
||||
|
||||
for entire, summary, detail in matches:
|
||||
detail = markdown(detail, style)
|
||||
new_html = html.format(summary=summary, detail=detail)
|
||||
value = value.replace(entire, new_html)
|
||||
return value
|
||||
ALLOWED_ATTRS = ["src", "width", "height", "href"]
|
||||
|
||||
|
||||
@registry.filter
|
||||
def markdown(value, style, math_engine=None, lazy_load=False, hard_wrap=False):
|
||||
styles = settings.MARKDOWN_STYLES.get(style, settings.MARKDOWN_DEFAULT_STYLE)
|
||||
escape = styles.get("safe_mode", True)
|
||||
nofollow = styles.get("nofollow", True)
|
||||
texoid = TEXOID_ENABLED and styles.get("texoid", False)
|
||||
math = hasattr(settings, "MATHOID_URL") and styles.get("math", False)
|
||||
|
||||
value = create_spoiler(value, style)
|
||||
post_processors = []
|
||||
if styles.get("use_camo", False) and camo_client is not None:
|
||||
post_processors.append(camo_client.update_tree)
|
||||
if lazy_load:
|
||||
post_processors.append(lazy_load_processor)
|
||||
|
||||
renderer = AwesomeRenderer(
|
||||
escape=escape,
|
||||
nofollow=nofollow,
|
||||
texoid=texoid,
|
||||
math=math and math_engine is not None,
|
||||
math_engine=math_engine,
|
||||
)
|
||||
markdown = mistune.Markdown(
|
||||
renderer=renderer,
|
||||
inline=AwesomeInlineLexer,
|
||||
parse_block_html=1,
|
||||
parse_inline_html=1,
|
||||
hard_wrap=hard_wrap,
|
||||
)
|
||||
result = markdown(value)
|
||||
if post_processors:
|
||||
try:
|
||||
tree = html.fromstring(result, parser=html.HTMLParser(recover=True))
|
||||
except (XMLSyntaxError, ParserError) as e:
|
||||
if result and (
|
||||
not isinstance(e, ParserError) or e.args[0] != "Document is empty"
|
||||
):
|
||||
logger.exception("Failed to parse HTML string")
|
||||
tree = html.Element("div")
|
||||
for processor in post_processors:
|
||||
processor(tree)
|
||||
result = html.tostring(tree, encoding="unicode")
|
||||
return Markup(result)
|
||||
def markdown(value, hard_wrap=False):
|
||||
extensions = EXTENSIONS
|
||||
if hard_wrap:
|
||||
extensions = EXTENSIONS + ["nl2br"]
|
||||
html = bleach.clean(value, tags=ALLOWED_TAGS, attributes=ALLOWED_ATTRS)
|
||||
html = _markdown.markdown(html, extensions=extensions)
|
||||
if not html:
|
||||
html = escape(value)
|
||||
return '<div class="md-typeset">%s</div>' % html
|
||||
|
|
|
@ -1,20 +0,0 @@
|
|||
from copy import deepcopy
|
||||
|
||||
from django.templatetags.static import static
|
||||
from lxml import html
|
||||
|
||||
|
||||
def lazy_load(tree):
|
||||
blank = static("blank.gif")
|
||||
for img in tree.xpath(".//img"):
|
||||
src = img.get("src", "")
|
||||
if src.startswith("data") or "-math" in img.get("class", ""):
|
||||
continue
|
||||
noscript = html.Element("noscript")
|
||||
copy = deepcopy(img)
|
||||
copy.tail = ""
|
||||
noscript.append(copy)
|
||||
img.addprevious(noscript)
|
||||
img.set("data-src", src)
|
||||
img.set("src", blank)
|
||||
img.set("class", img.get("class") + " unveil" if img.get("class") else "unveil")
|
|
@ -1,69 +0,0 @@
|
|||
import re
|
||||
|
||||
import mistune
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
from judge.utils.mathoid import MathoidMathParser
|
||||
|
||||
mistune._pre_tags.append("latex")
|
||||
|
||||
|
||||
class MathInlineGrammar(mistune.InlineGrammar):
|
||||
block_math = re.compile(r"^\$\$(.*?)\$\$|^\\\[(.*?)\\\]", re.DOTALL)
|
||||
math = re.compile(r"^~(.*?)~|^\$(.*?)\$|^\\\((.*?)\\\)", re.DOTALL)
|
||||
text = re.compile(r"^[\s\S]+?(?=[\\<!\[_*`$~]|\\[\[(]|https?://| {2,}\n|$)")
|
||||
|
||||
|
||||
class MathInlineLexer(mistune.InlineLexer):
|
||||
grammar_class = MathInlineGrammar
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.default_rules = self.default_rules[:]
|
||||
self.inline_html_rules = self.default_rules
|
||||
self.default_rules.insert(self.default_rules.index("strikethrough") + 1, "math")
|
||||
self.default_rules.insert(
|
||||
self.default_rules.index("strikethrough") + 1, "block_math"
|
||||
)
|
||||
super(MathInlineLexer, self).__init__(*args, **kwargs)
|
||||
|
||||
def output_block_math(self, m):
|
||||
return self.renderer.block_math(m.group(1) or m.group(2))
|
||||
|
||||
def output_math(self, m):
|
||||
return self.renderer.math(m.group(1) or m.group(2) or m.group(3))
|
||||
|
||||
def output_inline_html(self, m):
|
||||
tag = m.group(1)
|
||||
text = m.group(3)
|
||||
if self._parse_inline_html and text:
|
||||
if tag == "a":
|
||||
self._in_link = True
|
||||
text = self.output(text)
|
||||
self._in_link = False
|
||||
else:
|
||||
text = self.output(text)
|
||||
extra = m.group(2) or ""
|
||||
html = "<%s%s>%s</%s>" % (tag, extra, text, tag)
|
||||
else:
|
||||
html = m.group(0)
|
||||
return self.renderer.inline_html(html)
|
||||
|
||||
|
||||
class MathRenderer(mistune.Renderer):
|
||||
def __init__(self, *args, **kwargs):
|
||||
if kwargs.pop("math", False) and settings.MATHOID_URL != False:
|
||||
self.mathoid = MathoidMathParser(kwargs.pop("math_engine", None) or "svg")
|
||||
else:
|
||||
self.mathoid = None
|
||||
super(MathRenderer, self).__init__(*args, **kwargs)
|
||||
|
||||
def block_math(self, math):
|
||||
if self.mathoid is None or not math:
|
||||
return r"$$%s$$" % mistune.escape(str(math))
|
||||
return self.mathoid.display_math(math)
|
||||
|
||||
def math(self, math):
|
||||
if self.mathoid is None or not math:
|
||||
return r"~%s~" % mistune.escape(str(math))
|
||||
return self.mathoid.inline_math(math)
|
Loading…
Add table
Add a link
Reference in a new issue