NDOJ/judge/utils/problem_data.py

366 lines
13 KiB
Python
Raw Normal View History

2021-06-28 04:44:25 +00:00
import hashlib
2020-01-21 06:35:58 +00:00
import json
import os
import re
import yaml
2021-06-01 20:34:42 +00:00
import zipfile
import shutil
2021-06-01 20:34:42 +00:00
2020-01-21 06:35:58 +00:00
from django.conf import settings
from django.core.files.base import ContentFile
from django.core.files.storage import FileSystemStorage
from django.urls import reverse
from django.utils.translation import gettext as _
2021-06-01 20:34:42 +00:00
from django.core.cache import cache
2020-03-18 02:41:09 +00:00
2023-10-27 23:02:02 +00:00
from judge.logging import log_exception
2020-01-21 06:35:58 +00:00
if os.altsep:
2022-05-14 17:57:27 +00:00
def split_path_first(
path, repath=re.compile("[%s]" % re.escape(os.sep + os.altsep))
):
2020-01-21 06:35:58 +00:00
return repath.split(path, 1)
2022-05-14 17:57:27 +00:00
2020-01-21 06:35:58 +00:00
else:
2022-05-14 17:57:27 +00:00
2020-01-21 06:35:58 +00:00
def split_path_first(path):
return path.split(os.sep, 1)
class ProblemDataStorage(FileSystemStorage):
def __init__(self):
super(ProblemDataStorage, self).__init__(settings.DMOJ_PROBLEM_DATA_ROOT)
def url(self, name):
path = split_path_first(name)
if len(path) != 2:
2022-05-14 17:57:27 +00:00
raise ValueError("This file is not accessible via a URL.")
return reverse("problem_data_file", args=path)
2020-01-21 06:35:58 +00:00
def _save(self, name, content):
if self.exists(name):
self.delete(name)
return super(ProblemDataStorage, self)._save(name, content)
def get_available_name(self, name, max_length=None):
return name
def rename(self, old, new):
return os.rename(self.path(old), self.path(new))
def delete_directory(self, name):
directory_path = self.path(name)
2024-06-01 06:37:29 +00:00
try:
shutil.rmtree(directory_path)
except FileNotFoundError:
pass
2020-01-21 06:35:58 +00:00
class ProblemDataError(Exception):
def __init__(self, message):
super(ProblemDataError, self).__init__(message)
self.message = message
class ProblemDataCompiler(object):
def __init__(self, problem, data, cases, files):
self.problem = problem
self.data = data
self.cases = cases
self.files = files
self.generator = data.generator
def make_init(self):
cases = []
batch = None
def end_batch():
2022-05-14 17:57:27 +00:00
if not batch["batched"]:
raise ProblemDataError(_("Empty batches not allowed."))
2020-01-21 06:35:58 +00:00
cases.append(batch)
def make_checker(case):
2022-05-14 17:57:27 +00:00
if case.checker == "custom":
2020-01-24 02:32:46 +00:00
custom_checker_path = split_path_first(case.custom_checker.name)
if len(custom_checker_path) != 2:
2022-05-14 17:57:27 +00:00
raise ProblemDataError(
_("How did you corrupt the custom checker path?")
)
return custom_checker_path[1]
2020-03-18 02:41:09 +00:00
2024-03-19 04:53:35 +00:00
if case.checker == "customcpp":
custom_checker_path = split_path_first(case.custom_checker_cpp.name)
2022-06-03 06:27:51 +00:00
if len(custom_checker_path) != 2:
raise ProblemDataError(
_("How did you corrupt the custom checker path?")
)
return {
"name": "bridged",
"args": {
"files": custom_checker_path[1],
"lang": "CPP14",
"type": "lqdoj",
},
}
2020-03-18 02:41:09 +00:00
2022-06-22 07:28:34 +00:00
if case.checker == "testlib":
2024-03-19 04:53:35 +00:00
custom_checker_path = split_path_first(case.custom_checker_cpp.name)
2022-06-22 07:28:34 +00:00
if len(custom_checker_path) != 2:
raise ProblemDataError(
_("How did you corrupt the custom checker path?")
)
return {
"name": "bridged",
"args": {
"files": custom_checker_path[1],
"lang": "CPP14",
"type": "testlib",
},
}
2020-01-21 06:35:58 +00:00
if case.checker_args:
return {
2022-05-14 17:57:27 +00:00
"name": case.checker,
"args": json.loads(case.checker_args),
2020-01-21 06:35:58 +00:00
}
return case.checker
for i, case in enumerate(self.cases, 1):
2022-05-14 17:57:27 +00:00
if case.type == "C":
2020-01-21 06:35:58 +00:00
data = {}
if batch:
2022-11-20 03:41:43 +00:00
if case.points is None:
case.points = 0
2022-05-14 17:57:27 +00:00
case.is_pretest = batch["is_pretest"]
2020-01-21 06:35:58 +00:00
else:
if case.points is None:
2022-05-14 17:57:27 +00:00
raise ProblemDataError(
_("Points must be defined for non-batch case #%d.") % i
)
data["is_pretest"] = case.is_pretest
2020-01-21 06:35:58 +00:00
if not self.generator:
if case.input_file not in self.files:
2022-05-14 17:57:27 +00:00
raise ProblemDataError(
_("Input file for case %d does not exist: %s")
% (i, case.input_file)
)
2020-01-21 06:35:58 +00:00
if case.output_file not in self.files:
2022-05-14 17:57:27 +00:00
raise ProblemDataError(
_("Output file for case %d does not exist: %s")
% (i, case.output_file)
)
2020-01-21 06:35:58 +00:00
if case.input_file:
2022-05-14 17:57:27 +00:00
data["in"] = case.input_file
2020-01-21 06:35:58 +00:00
if case.output_file:
2022-05-14 17:57:27 +00:00
data["out"] = case.output_file
2020-01-21 06:35:58 +00:00
if case.points is not None:
2022-05-14 17:57:27 +00:00
data["points"] = case.points
2020-01-21 06:35:58 +00:00
if case.generator_args:
2022-05-14 17:57:27 +00:00
data["generator_args"] = case.generator_args.splitlines()
2020-01-21 06:35:58 +00:00
if case.output_limit is not None:
2022-05-14 17:57:27 +00:00
data["output_limit_length"] = case.output_limit
2020-01-21 06:35:58 +00:00
if case.output_prefix is not None:
2022-05-14 17:57:27 +00:00
data["output_prefix_length"] = case.output_prefix
2020-01-21 06:35:58 +00:00
if case.checker:
2022-05-14 17:57:27 +00:00
data["checker"] = make_checker(case)
2020-01-21 06:35:58 +00:00
else:
2022-05-14 17:57:27 +00:00
case.checker_args = ""
case.save(update_fields=("checker_args", "is_pretest"))
(batch["batched"] if batch else cases).append(data)
elif case.type == "S":
2020-01-21 06:35:58 +00:00
if batch:
end_batch()
if case.points is None:
2022-05-14 17:57:27 +00:00
raise ProblemDataError(
_("Batch start case #%d requires points.") % i
)
2020-01-21 06:35:58 +00:00
batch = {
2022-05-14 17:57:27 +00:00
"points": case.points,
"batched": [],
"is_pretest": case.is_pretest,
2020-01-21 06:35:58 +00:00
}
if case.generator_args:
2022-05-14 17:57:27 +00:00
batch["generator_args"] = case.generator_args.splitlines()
2020-01-21 06:35:58 +00:00
if case.output_limit is not None:
2022-05-14 17:57:27 +00:00
batch["output_limit_length"] = case.output_limit
2020-01-21 06:35:58 +00:00
if case.output_prefix is not None:
2022-05-14 17:57:27 +00:00
batch["output_prefix_length"] = case.output_prefix
2020-01-21 06:35:58 +00:00
if case.checker:
2022-05-14 17:57:27 +00:00
batch["checker"] = make_checker(case)
2020-01-21 06:35:58 +00:00
else:
2022-05-14 17:57:27 +00:00
case.checker_args = ""
case.input_file = ""
case.output_file = ""
case.save(update_fields=("checker_args", "input_file", "output_file"))
elif case.type == "E":
2020-01-21 06:35:58 +00:00
if not batch:
2022-05-14 17:57:27 +00:00
raise ProblemDataError(
_("Attempt to end batch outside of one in case #%d") % i
)
case.is_pretest = batch["is_pretest"]
case.input_file = ""
case.output_file = ""
case.generator_args = ""
case.checker = ""
case.checker_args = ""
2020-01-21 06:35:58 +00:00
case.save()
end_batch()
batch = None
if batch:
end_batch()
init = {}
if self.data.zipfile:
zippath = split_path_first(self.data.zipfile.name)
if len(zippath) != 2:
2022-05-14 17:57:27 +00:00
raise ProblemDataError(_("How did you corrupt the zip path?"))
init["archive"] = zippath[1]
2020-01-21 06:35:58 +00:00
if self.generator:
generator_path = split_path_first(self.generator.name)
if len(generator_path) != 2:
2022-05-14 17:57:27 +00:00
raise ProblemDataError(_("How did you corrupt the generator path?"))
init["generator"] = generator_path[1]
2020-01-21 06:35:58 +00:00
2022-05-14 17:57:27 +00:00
pretests = [case for case in cases if case["is_pretest"]]
2020-01-21 06:35:58 +00:00
for case in cases:
2022-05-14 17:57:27 +00:00
del case["is_pretest"]
2020-01-21 06:35:58 +00:00
if pretests:
2022-05-14 17:57:27 +00:00
init["pretest_test_cases"] = pretests
2020-01-21 06:35:58 +00:00
if cases:
2022-05-14 17:57:27 +00:00
init["test_cases"] = cases
2020-01-21 06:35:58 +00:00
if self.data.output_limit is not None:
2022-05-14 17:57:27 +00:00
init["output_limit_length"] = self.data.output_limit
2020-01-21 06:35:58 +00:00
if self.data.output_prefix is not None:
2022-05-14 17:57:27 +00:00
init["output_prefix_length"] = self.data.output_prefix
2020-01-21 06:35:58 +00:00
if self.data.checker:
2022-05-14 17:57:27 +00:00
if self.data.checker == "interact":
2022-06-22 07:28:34 +00:00
interactor_path = split_path_first(self.data.interactive_judge.name)
if len(interactor_path) != 2:
2023-08-01 05:26:15 +00:00
raise ProblemDataError(_("Invalid interactor judge"))
2022-05-14 17:57:27 +00:00
init["interactive"] = {
2022-06-22 07:28:34 +00:00
"files": interactor_path[1],
2022-05-14 17:57:27 +00:00
"feedback": True,
2022-06-17 13:54:40 +00:00
"type": "lqdoj",
2022-04-14 19:14:58 +00:00
}
2022-05-14 17:57:27 +00:00
init["unbuffered"] = True
2022-04-14 19:14:58 +00:00
else:
2022-05-14 17:57:27 +00:00
init["checker"] = make_checker(self.data)
2020-01-21 06:35:58 +00:00
else:
2022-05-14 17:57:27 +00:00
self.data.checker_args = ""
2022-06-02 04:59:46 +00:00
if self.data.fileio_input:
if "file_io" not in init:
init["file_io"] = {}
init["file_io"]["input"] = self.data.fileio_input
if self.data.fileio_output:
if "file_io" not in init:
init["file_io"] = {}
init["file_io"]["output"] = self.data.fileio_output
2023-03-10 04:31:55 +00:00
if self.data.output_only:
init["output_only"] = True
2023-08-01 05:26:15 +00:00
if self.data.use_ioi_signature:
handler_path = split_path_first(self.data.signature_handler.name)
if len(handler_path) != 2:
raise ProblemDataError(_("Invalid signature handler"))
header_path = split_path_first(self.data.signature_header.name)
if len(header_path) != 2:
raise ProblemDataError(_("Invalid signature header"))
init["signature_grader"] = {
"entry": handler_path[1],
"header": header_path[1],
}
2020-01-21 06:35:58 +00:00
return init
def compile(self):
from judge.models import problem_data_storage
2022-05-14 17:57:27 +00:00
yml_file = "%s/init.yml" % self.problem.code
2020-01-21 06:35:58 +00:00
try:
init = yaml.safe_dump(self.make_init())
except ProblemDataError as e:
self.data.feedback = e.message
self.data.save()
problem_data_storage.delete(yml_file)
else:
2022-05-14 17:57:27 +00:00
self.data.feedback = ""
2020-01-21 06:35:58 +00:00
self.data.save()
problem_data_storage.save(yml_file, ContentFile(init))
@classmethod
def generate(cls, *args, **kwargs):
self = cls(*args, **kwargs)
self.compile()
2021-06-01 20:34:42 +00:00
def get_visible_content(data):
2022-05-14 17:57:27 +00:00
data = data or b""
data = data.replace(b"\r\n", b"\r").replace(b"\r", b"\n")
2021-06-01 20:34:42 +00:00
2022-05-14 17:57:27 +00:00
data = data.decode("utf-8")
2021-06-01 20:34:42 +00:00
2022-05-14 17:57:27 +00:00
if len(data) > settings.TESTCASE_VISIBLE_LENGTH:
data = data[: settings.TESTCASE_VISIBLE_LENGTH]
data += "." * 3
2021-06-01 20:34:42 +00:00
return data
def get_file_cachekey(file):
2021-06-28 04:44:25 +00:00
return hashlib.sha1(file.encode()).hexdigest()
2021-06-01 20:34:42 +00:00
2022-05-14 17:57:27 +00:00
2021-06-01 20:34:42 +00:00
def get_problem_case(problem, files):
result = {}
uncached_files = []
for file in files:
2022-05-14 17:57:27 +00:00
cache_key = "problem_archive:%s:%s" % (problem.code, get_file_cachekey(file))
2021-06-01 20:34:42 +00:00
qs = cache.get(cache_key)
if qs is None:
uncached_files.append(file)
else:
result[file] = qs
if not uncached_files:
return result
2022-05-14 17:57:27 +00:00
archive_path = os.path.join(
settings.DMOJ_PROBLEM_DATA_ROOT, str(problem.data_files.zipfile)
)
2021-06-01 20:34:42 +00:00
if not os.path.exists(archive_path):
2023-10-27 23:02:02 +00:00
log_exception('archive file "%s" does not exist' % archive_path)
return {}
2021-06-01 20:34:42 +00:00
try:
2022-05-14 17:57:27 +00:00
archive = zipfile.ZipFile(archive_path, "r")
2021-06-01 20:34:42 +00:00
except zipfile.BadZipfile:
2023-10-27 23:02:02 +00:00
log_exception('bad archive: "%s"' % archive_path)
return {}
2021-06-01 20:34:42 +00:00
for file in uncached_files:
2022-05-14 17:57:27 +00:00
cache_key = "problem_archive:%s:%s" % (problem.code, get_file_cachekey(file))
2021-06-01 20:34:42 +00:00
with archive.open(file) as f:
s = f.read(settings.TESTCASE_VISIBLE_LENGTH + 3)
# add this so there are no characters left behind (ex, 'á' = 2 utf-8 chars)
while True:
try:
2022-05-14 17:57:27 +00:00
s.decode("utf-8")
2021-06-01 20:34:42 +00:00
break
except UnicodeDecodeError:
2022-01-05 09:18:59 +00:00
next_char = f.read(1)
if next_char:
s += next_char
else:
s = f"File {file} is not able to decode in utf-8"
s = s.encode("utf-8")
break
2021-06-01 20:34:42 +00:00
qs = get_visible_content(s)
cache.set(cache_key, qs, 86400)
result[file] = qs
2022-05-14 17:57:27 +00:00
return result