NDOJ/judge/jinja2/markdown/__init__.py

113 lines
2.6 KiB
Python
Raw Permalink Normal View History

2020-01-21 06:35:58 +00:00
from .. import registry
2022-10-25 04:59:04 +00:00
import markdown as _markdown
import bleach
from django.utils.html import escape
2022-10-29 06:25:22 +00:00
from bs4 import BeautifulSoup
2022-11-01 03:26:26 +00:00
from pymdownx import superfences
2020-01-21 06:35:58 +00:00
2022-10-25 04:59:04 +00:00
EXTENSIONS = [
2022-11-10 21:22:17 +00:00
"pymdownx.arithmatex",
2022-10-25 04:59:04 +00:00
"pymdownx.magiclink",
2022-10-25 23:07:45 +00:00
"pymdownx.betterem",
2022-10-25 04:59:04 +00:00
"pymdownx.details",
"pymdownx.emoji",
"pymdownx.inlinehilite",
"pymdownx.superfences",
"pymdownx.tasklist",
"markdown.extensions.footnotes",
"markdown.extensions.attr_list",
"markdown.extensions.def_list",
"markdown.extensions.tables",
"markdown.extensions.admonition",
2022-10-25 23:10:33 +00:00
"nl2br",
2022-10-29 04:28:31 +00:00
"mdx_breakless_lists",
2022-10-25 04:59:04 +00:00
]
2020-01-21 06:35:58 +00:00
2022-11-01 03:26:26 +00:00
EXTENSION_CONFIGS = {
"pymdownx.superfences": {
"custom_fences": [
{
"name": "sample",
"class": "no-border",
"format": superfences.fence_code_format,
}
]
},
}
2022-10-25 23:07:45 +00:00
ALLOWED_TAGS = bleach.sanitizer.ALLOWED_TAGS + [
"img",
"center",
"iframe",
"div",
"span",
"table",
"tr",
"td",
"th",
"tr",
"pre",
"code",
"p",
"hr",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"thead",
"tbody",
"sup",
"dl",
"dt",
"dd",
2022-10-25 23:12:28 +00:00
"br",
2022-10-26 01:23:11 +00:00
"details",
2022-10-26 06:11:37 +00:00
"summary",
2022-10-25 23:07:45 +00:00
]
2020-01-21 06:35:58 +00:00
2022-10-26 01:26:49 +00:00
ALLOWED_ATTRS = ["src", "width", "height", "href", "class", "open"]
2022-07-30 10:21:08 +00:00
2020-01-21 06:35:58 +00:00
@registry.filter
2022-10-29 06:25:22 +00:00
def markdown(value, lazy_load=False):
2022-10-25 04:59:04 +00:00
extensions = EXTENSIONS
2022-11-01 03:26:26 +00:00
html = _markdown.markdown(
value, extensions=extensions, extension_configs=EXTENSION_CONFIGS
)
2022-11-10 21:22:17 +00:00
# Don't clean mathjax
hash_script_tag = {}
soup = BeautifulSoup(html, "html.parser")
for script_tag in soup.find_all("script"):
allow_math_types = ["math/tex", "math/tex; mode=display"]
if script_tag.attrs.get("type", False) in allow_math_types:
hash_script_tag[str(hash(str(script_tag)))] = str(script_tag)
for hashed_tag in hash_script_tag:
tag = hash_script_tag[hashed_tag]
html = html.replace(tag, hashed_tag)
2022-10-25 23:07:45 +00:00
html = bleach.clean(html, tags=ALLOWED_TAGS, attributes=ALLOWED_ATTRS)
2022-11-10 21:22:17 +00:00
for hashed_tag in hash_script_tag:
tag = hash_script_tag[hashed_tag]
html = html.replace(hashed_tag, tag)
2022-10-25 04:59:04 +00:00
if not html:
html = escape(value)
2023-02-13 00:37:34 +00:00
if lazy_load:
2022-11-10 21:22:17 +00:00
soup = BeautifulSoup(html, features="html.parser")
2022-10-29 06:25:22 +00:00
for img in soup.findAll("img"):
2022-11-08 21:11:45 +00:00
if img.get("src"):
img["data-src"] = img["src"]
img["src"] = ""
2022-11-09 20:53:55 +00:00
for img in soup.findAll("iframe"):
if img.get("src"):
img["data-src"] = img["src"]
img["src"] = ""
2022-10-29 06:25:22 +00:00
html = str(soup)
2022-10-25 04:59:04 +00:00
return '<div class="md-typeset">%s</div>' % html