NDOJ/judge/jinja2/markdown/__init__.py

import logging
import re
from html.parser import HTMLParser
from urllib.parse import urlparse

import mistune
from django.conf import settings
from markupsafe import Markup
from lxml import html
from lxml.etree import ParserError, XMLSyntaxError

from judge.highlight_code import highlight_code
from judge.jinja2.markdown.lazy_load import lazy_load as lazy_load_processor
from judge.jinja2.markdown.math import MathInlineGrammar, MathInlineLexer, MathRenderer
from judge.utils.camo import client as camo_client
from judge.utils.texoid import TEXOID_ENABLED, TexoidRenderer
from .. import registry

logger = logging.getLogger("judge.html")

NOFOLLOW_WHITELIST = settings.NOFOLLOW_EXCLUDED


class CodeSafeInlineGrammar(mistune.InlineGrammar):
    double_emphasis = re.compile(r"^\*{2}([\s\S]+?)()\*{2}(?!\*)")  # **word**
    emphasis = re.compile(r"^\*((?:\*\*|[^\*])+?)()\*(?!\*)")  # *word*


class AwesomeInlineGrammar(MathInlineGrammar, CodeSafeInlineGrammar):
    pass


class AwesomeInlineLexer(MathInlineLexer, mistune.InlineLexer):
    grammar_class = AwesomeInlineGrammar


class AwesomeRenderer(MathRenderer, mistune.Renderer):
    def __init__(self, *args, **kwargs):
        self.nofollow = kwargs.pop("nofollow", True)
        self.texoid = TexoidRenderer() if kwargs.pop("texoid", False) else None
        self.parser = HTMLParser()
        super(AwesomeRenderer, self).__init__(*args, **kwargs)

    def _link_rel(self, href):
        if href:
            try:
                url = urlparse(href)
            except ValueError:
                return ' rel="nofollow"'
            else:
                if url.netloc and url.netloc not in NOFOLLOW_WHITELIST:
                    return ' rel="nofollow"'
        return ""

    def autolink(self, link, is_email=False):
        text = link = mistune.escape(link)
        if is_email:
            link = "mailto:%s" % link
        return '<a href="%s"%s>%s</a>' % (link, self._link_rel(link), text)

    def table(self, header, body):
        return (
            '<table class="table">\n<thead>%s</thead>\n'
            "<tbody>\n%s</tbody>\n</table>\n"
        ) % (header, body)

    def link(self, link, title, text):
        link = mistune.escape_link(link)
        if not title:
            return '<a href="%s"%s>%s</a>' % (link, self._link_rel(link), text)
        title = mistune.escape(title, quote=True)
        return '<a href="%s" title="%s"%s>%s</a>' % (
            link,
            title,
            self._link_rel(link),
            text,
        )

    def block_code(self, code, lang=None):
        if not lang:
            return "\n<pre><code>%s</code></pre>\n" % mistune.escape(code).rstrip()
        return highlight_code(code, lang)

    def block_html(self, html):
        if self.texoid and html.startswith("<latex"):
            attr = html[6 : html.index(">")]
            latex = html[html.index(">") + 1 : html.rindex("<")]
            latex = self.parser.unescape(latex)
            result = self.texoid.get_result(latex)
            if not result:
                return "<pre>%s</pre>" % mistune.escape(latex, smart_amp=False)
            elif "error" not in result:
                img = (
                    '''<img src="%(svg)s" onerror="this.src='%(png)s';this.onerror=null"'''
                    'width="%(width)s" height="%(height)s"%(tail)s>'
                ) % {
                    "svg": result["svg"],
                    "png": result["png"],
                    "width": result["meta"]["width"],
                    "height": result["meta"]["height"],
                    "tail": " /" if self.options.get("use_xhtml") else "",
                }
                style = [
                    "max-width: 100%",
                    "height: %s" % result["meta"]["height"],
                    "max-height: %s" % result["meta"]["height"],
                    "width: %s" % result["meta"]["height"],
                ]
                if "inline" in attr:
                    tag = "span"
                else:
                    tag = "div"
                    style += ["text-align: center"]
                return '<%s style="%s">%s</%s>' % (tag, ";".join(style), img, tag)
            else:
                return "<pre>%s</pre>" % mistune.escape(
                    result["error"], smart_amp=False
                )
        return super(AwesomeRenderer, self).block_html(html)

    def header(self, text, level, *args, **kwargs):
        return super(AwesomeRenderer, self).header(text, level + 2, *args, **kwargs)


def create_spoiler(value, style):
    respoiler = re.compile(r"(^\|\|(.+)\s+([\s\S]+?)\s*\|\|)", re.MULTILINE)
    matches = re.findall(respoiler, value)
    html = (
        '<details><summary style="color: brown">'
        + '<span class="spoiler-summary">{summary}</span>'
        + "</summary>{detail}</details>"
    )

    for entire, summary, detail in matches:
        detail = markdown(detail, style)
        new_html = html.format(summary=summary, detail=detail)
        value = value.replace(entire, new_html)
    return value


@registry.filter
def markdown(value, style, math_engine=None, lazy_load=False, hard_wrap=False):
    styles = settings.MARKDOWN_STYLES.get(style, settings.MARKDOWN_DEFAULT_STYLE)
    escape = styles.get("safe_mode", True)
    nofollow = styles.get("nofollow", True)
    texoid = TEXOID_ENABLED and styles.get("texoid", False)
    math = hasattr(settings, "MATHOID_URL") and styles.get("math", False)

    value = create_spoiler(value, style)
    post_processors = []
    if styles.get("use_camo", False) and camo_client is not None:
        post_processors.append(camo_client.update_tree)
    if lazy_load:
        post_processors.append(lazy_load_processor)

    renderer = AwesomeRenderer(
        escape=escape,
        nofollow=nofollow,
        texoid=texoid,
        math=math and math_engine is not None,
        math_engine=math_engine,
    )
    markdown = mistune.Markdown(
        renderer=renderer,
        inline=AwesomeInlineLexer,
        parse_block_html=1,
        parse_inline_html=1,
        hard_wrap=hard_wrap,
    )
    result = markdown(value)
    if post_processors:
        try:
            tree = html.fromstring(result, parser=html.HTMLParser(recover=True))
        except (XMLSyntaxError, ParserError) as e:
            if result and (
                not isinstance(e, ParserError) or e.args[0] != "Document is empty"
            ):
                logger.exception("Failed to parse HTML string")
            tree = html.Element("div")
        for processor in post_processors:
            processor(tree)
        result = html.tostring(tree, encoding="unicode")
    return Markup(result)
Cloned DMOJ 2020-01-21 15:35:58 +09:00			`import logging`
			`import re`
			`from html.parser import HTMLParser`
			`from urllib.parse import urlparse`

			`import mistune`
			`from django.conf import settings`
Revert last 3 commits 2022-10-07 12:17:07 -05:00			`from markupsafe import Markup`
Cloned DMOJ 2020-01-21 15:35:58 +09:00			`from lxml import html`
			`from lxml.etree import ParserError, XMLSyntaxError`

			`from judge.highlight_code import highlight_code`
			`from judge.jinja2.markdown.lazy_load import lazy_load as lazy_load_processor`
			`from judge.jinja2.markdown.math import MathInlineGrammar, MathInlineLexer, MathRenderer`
			`from judge.utils.camo import client as camo_client`
			`from judge.utils.texoid import TEXOID_ENABLED, TexoidRenderer`
			`from .. import registry`

Reformat using black 2022-05-14 12:57:27 -05:00			`logger = logging.getLogger("judge.html")`
Cloned DMOJ 2020-01-21 15:35:58 +09:00
			`NOFOLLOW_WHITELIST = settings.NOFOLLOW_EXCLUDED`

Temporary change to wait for cached frontend 2022-07-30 21:01:37 +07:00
Cloned DMOJ 2020-01-21 15:35:58 +09:00			`class CodeSafeInlineGrammar(mistune.InlineGrammar):`
Reformat using black 2022-05-14 12:57:27 -05:00			`double_emphasis = re.compile(r"^\{2}([\s\S]+?)()\{2}(?!\)") # word*`
			`emphasis = re.compile(r"^\((?:\\\|[^\])+?)()\(?!\)") # word`
Cloned DMOJ 2020-01-21 15:35:58 +09:00

Temporary change to wait for cached frontend 2022-07-30 21:01:37 +07:00			`class AwesomeInlineGrammar(MathInlineGrammar, CodeSafeInlineGrammar):`
Cloned DMOJ 2020-01-21 15:35:58 +09:00			`pass`


Improve spoiler 2022-07-30 17:21:08 +07:00			`class AwesomeInlineLexer(MathInlineLexer, mistune.InlineLexer):`
Cloned DMOJ 2020-01-21 15:35:58 +09:00			`grammar_class = AwesomeInlineGrammar`


Improve spoiler 2022-07-30 17:21:08 +07:00			`class AwesomeRenderer(MathRenderer, mistune.Renderer):`
Cloned DMOJ 2020-01-21 15:35:58 +09:00			`def __init__(self, args, *kwargs):`
Reformat using black 2022-05-14 12:57:27 -05:00			`self.nofollow = kwargs.pop("nofollow", True)`
			`self.texoid = TexoidRenderer() if kwargs.pop("texoid", False) else None`
Cloned DMOJ 2020-01-21 15:35:58 +09:00			`self.parser = HTMLParser()`
			`super(AwesomeRenderer, self).__init__(args, *kwargs)`

			`def _link_rel(self, href):`
			`if href:`
			`try:`
			`url = urlparse(href)`
			`except ValueError:`
			`return ' rel="nofollow"'`
			`else:`
			`if url.netloc and url.netloc not in NOFOLLOW_WHITELIST:`
			`return ' rel="nofollow"'`
Reformat using black 2022-05-14 12:57:27 -05:00			`return ""`
Cloned DMOJ 2020-01-21 15:35:58 +09:00
			`def autolink(self, link, is_email=False):`
			`text = link = mistune.escape(link)`
			`if is_email:`
Reformat using black 2022-05-14 12:57:27 -05:00			`link = "mailto:%s" % link`
Cloned DMOJ 2020-01-21 15:35:58 +09:00			`return '<a href="%s"%s>%s</a>' % (link, self._link_rel(link), text)`

			`def table(self, header, body):`
			`return (`
			`'<table class="table">\n<thead>%s</thead>\n'`
Reformat using black 2022-05-14 12:57:27 -05:00			`"<tbody>\n%s</tbody>\n</table>\n"`
Cloned DMOJ 2020-01-21 15:35:58 +09:00			`) % (header, body)`

			`def link(self, link, title, text):`
			`link = mistune.escape_link(link)`
			`if not title:`
			`return '<a href="%s"%s>%s</a>' % (link, self._link_rel(link), text)`
			`title = mistune.escape(title, quote=True)`
Reformat using black 2022-05-14 12:57:27 -05:00			`return '<a href="%s" title="%s"%s>%s</a>' % (`
			`link,`
			`title,`
			`self._link_rel(link),`
			`text,`
			`)`
Cloned DMOJ 2020-01-21 15:35:58 +09:00
			`def block_code(self, code, lang=None):`
			`if not lang:`
Reformat using black 2022-05-14 12:57:27 -05:00			`return "\n<pre><code>%s</code></pre>\n" % mistune.escape(code).rstrip()`
Cloned DMOJ 2020-01-21 15:35:58 +09:00			`return highlight_code(code, lang)`

			`def block_html(self, html):`
Reformat using black 2022-05-14 12:57:27 -05:00			`if self.texoid and html.startswith("<latex"):`
			`attr = html[6 : html.index(">")]`
			`latex = html[html.index(">") + 1 : html.rindex("<")]`
Cloned DMOJ 2020-01-21 15:35:58 +09:00			`latex = self.parser.unescape(latex)`
			`result = self.texoid.get_result(latex)`
			`if not result:`
Reformat using black 2022-05-14 12:57:27 -05:00			`return "<pre>%s</pre>" % mistune.escape(latex, smart_amp=False)`
			`elif "error" not in result:`
			`img = (`
			`'''<img src="%(svg)s" onerror="this.src='%(png)s';this.onerror=null"'''`
			`'width="%(width)s" height="%(height)s"%(tail)s>'`
			`) % {`
			`"svg": result["svg"],`
			`"png": result["png"],`
			`"width": result["meta"]["width"],`
			`"height": result["meta"]["height"],`
			`"tail": " /" if self.options.get("use_xhtml") else "",`
Cloned DMOJ 2020-01-21 15:35:58 +09:00			`}`
Reformat using black 2022-05-14 12:57:27 -05:00			`style = [`
			`"max-width: 100%",`
			`"height: %s" % result["meta"]["height"],`
			`"max-height: %s" % result["meta"]["height"],`
			`"width: %s" % result["meta"]["height"],`
			`]`
			`if "inline" in attr:`
			`tag = "span"`
Cloned DMOJ 2020-01-21 15:35:58 +09:00			`else:`
Reformat using black 2022-05-14 12:57:27 -05:00			`tag = "div"`
			`style += ["text-align: center"]`
			`return '<%s style="%s">%s</%s>' % (tag, ";".join(style), img, tag)`
Cloned DMOJ 2020-01-21 15:35:58 +09:00			`else:`
Reformat using black 2022-05-14 12:57:27 -05:00			`return "<pre>%s</pre>" % mistune.escape(`
			`result["error"], smart_amp=False`
			`)`
Cloned DMOJ 2020-01-21 15:35:58 +09:00			`return super(AwesomeRenderer, self).block_html(html)`

			`def header(self, text, level, args, *kwargs):`
			`return super(AwesomeRenderer, self).header(text, level + 2, args, *kwargs)`


Improve spoiler 2022-07-30 17:21:08 +07:00			`def create_spoiler(value, style):`
			`respoiler = re.compile(r"(^\\|\\|(.+)\s+([\s\S]+?)\s*\\|\\|)", re.MULTILINE)`
			`matches = re.findall(respoiler, value)`
Temporary change to wait for cached frontend 2022-07-30 21:01:37 +07:00			`html = (`
			`'<details><summary style="color: brown">'`
			`+ '<span class="spoiler-summary">{summary}</span>'`
			`+ "</summary>{detail}</details>"`
			`)`
Improve spoiler 2022-07-30 17:21:08 +07:00
			`for entire, summary, detail in matches:`
			`detail = markdown(detail, style)`
			`new_html = html.format(summary=summary, detail=detail)`
			`value = value.replace(entire, new_html)`
			`return value`

Temporary change to wait for cached frontend 2022-07-30 21:01:37 +07:00
Cloned DMOJ 2020-01-21 15:35:58 +09:00			`@registry.filter`
Improve spoiler 2022-07-30 17:21:08 +07:00			`def markdown(value, style, math_engine=None, lazy_load=False, hard_wrap=False):`
Cloned DMOJ 2020-01-21 15:35:58 +09:00			`styles = settings.MARKDOWN_STYLES.get(style, settings.MARKDOWN_DEFAULT_STYLE)`
Reformat using black 2022-05-14 12:57:27 -05:00			`escape = styles.get("safe_mode", True)`
			`nofollow = styles.get("nofollow", True)`
			`texoid = TEXOID_ENABLED and styles.get("texoid", False)`
			`math = hasattr(settings, "MATHOID_URL") and styles.get("math", False)`
Cloned DMOJ 2020-01-21 15:35:58 +09:00
Improve spoiler 2022-07-30 17:21:08 +07:00			`value = create_spoiler(value, style)`
Cloned DMOJ 2020-01-21 15:35:58 +09:00			`post_processors = []`
Reformat using black 2022-05-14 12:57:27 -05:00			`if styles.get("use_camo", False) and camo_client is not None:`
Cloned DMOJ 2020-01-21 15:35:58 +09:00			`post_processors.append(camo_client.update_tree)`
			`if lazy_load:`
			`post_processors.append(lazy_load_processor)`

Reformat using black 2022-05-14 12:57:27 -05:00			`renderer = AwesomeRenderer(`
			`escape=escape,`
			`nofollow=nofollow,`
			`texoid=texoid,`
			`math=math and math_engine is not None,`
			`math_engine=math_engine,`
			`)`
			`markdown = mistune.Markdown(`
			`renderer=renderer,`
			`inline=AwesomeInlineLexer,`
			`parse_block_html=1,`
			`parse_inline_html=1,`
Improve spoiler 2022-07-30 17:21:08 +07:00			`hard_wrap=hard_wrap,`
Reformat using black 2022-05-14 12:57:27 -05:00			`)`
Cloned DMOJ 2020-01-21 15:35:58 +09:00			`result = markdown(value)`
			`if post_processors:`
			`try:`
			`tree = html.fromstring(result, parser=html.HTMLParser(recover=True))`
			`except (XMLSyntaxError, ParserError) as e:`
Reformat using black 2022-05-14 12:57:27 -05:00			`if result and (`
			`not isinstance(e, ParserError) or e.args[0] != "Document is empty"`
			`):`
			`logger.exception("Failed to parse HTML string")`
			`tree = html.Element("div")`
Cloned DMOJ 2020-01-21 15:35:58 +09:00			`for processor in post_processors:`
			`processor(tree)`
Reformat using black 2022-05-14 12:57:27 -05:00			`result = html.tostring(tree, encoding="unicode")`
Cloned DMOJ 2020-01-21 15:35:58 +09:00			`return Markup(result)`