diff --git a/dmoj/urls.py b/dmoj/urls.py index 585a890..2b2c52f 100644 --- a/dmoj/urls.py +++ b/dmoj/urls.py @@ -45,6 +45,7 @@ from judge.views import ( license, mailgun, markdown_editor, + test_formatter, notification, organization, preview, @@ -434,6 +435,26 @@ urlpatterns = [ markdown_editor.MarkdownEditor.as_view(), name="markdown_editor", ), + url( + r"^test_formatter/", + include( + [ + url( + r"^$", test_formatter.TestFormatter.as_view(), name="test_formatter" + ), + url( + r"^edit_page$", + test_formatter.EditTestFormatter.as_view(), + name="edit_page", + ), + url( + r"^download_page$", + test_formatter.DownloadTestFormatter.as_view(), + name="download_page", + ), + ] + ), + ), url( r"^submission_source_file/(?P(\w|\.)+)", submission.SubmissionSourceFileView.as_view(), diff --git a/judge/forms.py b/judge/forms.py index 2d7c92d..8daeaba 100644 --- a/judge/forms.py +++ b/judge/forms.py @@ -38,6 +38,7 @@ from judge.models import ( Submission, BlogPost, ContestProblem, + TestFormatterModel, ) from judge.widgets import ( diff --git a/judge/migrations/0174_auto_20231121_1422.py b/judge/migrations/0174_auto_20231121_1422.py new file mode 100644 index 0000000..4407316 --- /dev/null +++ b/judge/migrations/0174_auto_20231121_1422.py @@ -0,0 +1,37 @@ +# Generated by Django 3.2.20 on 2023-11-21 07:22 + +from django.db import migrations, models +import judge.models.test_formatter + + +class Migration(migrations.Migration): + + dependencies = [ + ("judge", "0173_fulltext"), + ] + + operations = [ + migrations.CreateModel( + name="TestFormatterModel", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "file", + models.FileField( + blank=True, + null=True, + upload_to=judge.models.test_formatter.test_formatter_path, + verbose_name="testcase file", + ), + ), + ], + ) + ] diff --git a/judge/models/__init__.py b/judge/models/__init__.py index be073b0..bf2360a 100644 --- a/judge/models/__init__.py +++ b/judge/models/__init__.py @@ -61,6 +61,7 @@ from judge.models.pagevote import PageVote, PageVoteVoter from judge.models.bookmark import BookMark, MakeBookMark from judge.models.course import Course from judge.models.notification import Notification, NotificationProfile +from judge.models.test_formatter import TestFormatterModel revisions.register(Profile, exclude=["points", "last_access", "ip", "rating"]) revisions.register(Problem, follow=["language_limits"]) diff --git a/judge/views/test_formatter.py b/judge/views/test_formatter.py new file mode 100644 index 0000000..ad05c6b --- /dev/null +++ b/judge/views/test_formatter.py @@ -0,0 +1,204 @@ +from django.views import View +from django.shortcuts import render, redirect, get_object_or_404 +from django.urls import reverse +from django.core.files import File +from django.core.files.base import ContentFile +from django.http import ( + FileResponse, + HttpResponseRedirect, + HttpResponseBadRequest, + HttpResponse, +) +from judge.models import TestFormatterModel +from judge.forms import TestFormatterForm +from judge.views import tf_logic, tf_utils +from django.utils.translation import gettext_lazy as _ +from zipfile import ZipFile, ZIP_DEFLATED + +import os +import uuid +from dmoj import settings + + +def id_to_path(id): + return os.path.join(settings.MEDIA_ROOT, "test_formatter/" + id + "/") + + +def get_names_in_archive(file_path): + with ZipFile(os.path.join(settings.MEDIA_ROOT, file_path)) as f: + result = [x for x in f.namelist() if not x.endswith("/")] + return list(sorted(result, key=tf_utils.natural_sorting_key)) + + +def get_renamed_archive(file_str, file_name, file_path, bef, aft): + target_file_id = str(uuid.uuid4()) + source_path = os.path.join(settings.MEDIA_ROOT, file_str) + target_path = os.path.join(settings.MEDIA_ROOT, file_str + "_" + target_file_id) + new_path = os.path.join(settings.MEDIA_ROOT, "test_formatter/" + file_name) + + source = ZipFile(source_path, "r") + target = ZipFile(target_path, "w", ZIP_DEFLATED) + + for bef_name, aft_name in zip(bef, aft): + target.writestr(aft_name, source.read(bef_name)) + + os.remove(source_path) + os.rename(target_path, new_path) + + target.close() + source.close() + + return {"file_path": "test_formatter/" + file_name} + + +class TestFormatter(View): + form_class = TestFormatterForm() + + def get(self, request): + return render( + request, + "test_formatter/test_formatter.html", + {"title": _("Test Formatter"), "form": self.form_class}, + ) + + def post(self, request): + form = TestFormatterForm(request.POST, request.FILES) + if form.is_valid(): + form.save() + return HttpResponseRedirect("edit_page") + return render( + request, "test_formatter/test_formatter.html", {"form": self.form_class} + ) + + +class EditTestFormatter(View): + file_path = "" + + def get(self, request): + file = TestFormatterModel.objects.last() + filestr = str(file.file) + filename = filestr.split("/")[-1] + filepath = filestr.split("/")[0] + + bef_file = get_names_in_archive(filestr) + preview_data = { + "bef_inp_format": bef_file[0], + "bef_out_format": bef_file[1], + "aft_inp_format": "input.000", + "aft_out_format": "output.000", + "file_str": filestr, + } + + preview = tf_logic.preview(preview_data) + + response = "" + for i in range(len(bef_file)): + bef = preview["bef_preview"][i]["value"] + aft = preview["aft_preview"][i]["value"] + response = response + f"

{bef} => {aft}

\n" + + return render( + request, + "test_formatter/edit_test_formatter.html", + { + "title": _("Test Formatter"), + "check": 0, + "files_list": bef_file, + "file_name": filename, + "res": response, + }, + ) + + def post(self, request, *args, **kwargs): + action = request.POST.get("action") + if action == "convert": + try: + file = TestFormatterModel.objects.last() + filestr = str(file.file) + filename = filestr.split("/")[-1] + filepath = filestr.split("/")[0] + bef_inp_format = request.POST["bef_inp_format"] + bef_out_format = request.POST["bef_out_format"] + aft_inp_format = request.POST["aft_inp_format"] + aft_out_format = request.POST["aft_out_format"] + aft_file_name = request.POST["file_name"] + except KeyError: + return HttpResponseBadRequest("No data.") + + if filename != aft_file_name: + source_path = os.path.join(settings.MEDIA_ROOT, filestr) + new_path = os.path.join( + settings.MEDIA_ROOT, "test_formatter/" + aft_file_name + ) + os.rename(source_path, new_path) + filename = aft_file_name + + preview_data = { + "bef_inp_format": bef_inp_format, + "bef_out_format": bef_out_format, + "aft_inp_format": aft_inp_format, + "aft_out_format": aft_out_format, + "file_name": filename, + "file_path": filepath, + "file_str": filepath + "/" + filename, + } + + converted_zip = tf_logic.convert(preview_data) + + global file_path + file_path = converted_zip["file_path"] + + zip_instance = TestFormatterModel() + zip_instance.file = file_path + zip_instance.save() + + preview = tf_logic.preview(preview_data) + response = HttpResponse() + + for i in range(len(preview["bef_preview"])): + bef = preview["bef_preview"][i]["value"] + aft = preview["aft_preview"][i]["value"] + response.write(f"

{bef} => {aft}

") + + return response + + elif action == "download": + return HttpResponse(file_path) + + return HttpResponseBadRequest("Invalid action") + + +class DownloadTestFormatter(View): + def get(self, request): + file_path = request.GET.get("file_path") + file_name = file_path.split("/")[-1] + preview_file = tf_logic.preview_file(file_path) + + response = "" + for i in range(len(preview_file)): + response = response + (f"

{preview_file[i]}

\n") + + files_list = [preview_file[0], preview_file[1]] + + return render( + request, + "test_formatter/download_test_formatter.html", + { + "title": _("Test Formatter"), + "response": response, + "files_list": files_list, + "file_path": os.path.join(settings.MEDIA_ROOT, file_path), + "file_path_getnames": file_path, + "file_name": file_name, + }, + ) + + def post(self, request): + file_path = request.POST.get("file_path") + + with open(file_path, "rb") as zip_file: + response = HttpResponse(zip_file.read(), content_type="application/zip") + response[ + "Content-Disposition" + ] = f"attachment; filename={os.path.basename(file_path)}" + return response diff --git a/judge/views/tf_logic.py b/judge/views/tf_logic.py new file mode 100644 index 0000000..bfad2dc --- /dev/null +++ b/judge/views/tf_logic.py @@ -0,0 +1,116 @@ +import os +from judge.views import test_formatter as tf +from judge.views import tf_pattern as pattern + + +class TestSuite: + def __init__( + self, + file_id: str, + pattern_pair: pattern.PatternPair, + test_id_list: list, + extra_files: list, + ): + self.file_id = file_id + self.pattern_pair = pattern_pair + self.test_id_list = test_id_list + self.extra_files = extra_files + + @classmethod + def get_test_suite(cls, file_name: str, inp_format: str, out_format: str): + pattern_pair = pattern.PatternPair.from_string_pair(inp_format, out_format) + names = tf.get_names_in_archive(file_name) + test_id_list, extra_files = pattern_pair.matches( + names, returns="test_id_with_extra_files" + ) + return cls(file_name, pattern_pair, test_id_list, extra_files) + + def get_name_list(self, add_extra_info=False): + important_files = [] + + for index, t in enumerate(self.test_id_list): + inp_name = self.pattern_pair.x.get_name(t, index=index, use_index=True) + out_name = self.pattern_pair.y.get_name(t, index=index, use_index=True) + important_files.extend([inp_name, out_name]) + + result = [] + + for name in important_files: + if add_extra_info: + result.append({"value": name, "is_extra_file": False}) + else: + result.append(name) + + for name in self.extra_files: + if add_extra_info: + result.append({"value": name, "is_extra_file": True}) + else: + result.append(name) + + return result + + +def is_valid_file_type(file_name): + _, ext = os.path.splitext(file_name) + return ext in [".zip", ".ZIP"] + + +def preview(params): + bif = params["bef_inp_format"] + bof = params["bef_out_format"] + aif = params["aft_inp_format"] + aof = params["aft_out_format"] + file_str = params["file_str"] + + try: + test_suite = TestSuite.get_test_suite(file_str, bif, bof) + bef_preview = test_suite.get_name_list(add_extra_info=True) + try: + test_suite.pattern_pair = pattern.PatternPair.from_string_pair(aif, aof) + aft_preview = test_suite.get_name_list(add_extra_info=True) + return {"bef_preview": bef_preview, "aft_preview": aft_preview} + except: + return {"bef_preview": bef_preview, "aft_preview": []} + except: + test_suite = TestSuite.get_test_suite(file_id, "*", "*") + preview = test_suite.get_name_list(add_extra_info=True) + return {"bef_preview": preview, "aft_preview": []} + + +def convert(params): + bif = params["bef_inp_format"] + bof = params["bef_out_format"] + aif = params["aft_inp_format"] + aof = params["aft_out_format"] + file_str = params["file_str"] + file_name = params["file_name"] + file_path = params["file_path"] + + test_suite = TestSuite.get_test_suite(file_str, bif, bof) + bef_preview = test_suite.get_name_list() + test_suite.pattern_pair = pattern.PatternPair.from_string_pair(aif, aof) + aft_preview = test_suite.get_name_list() + + result = tf.get_renamed_archive( + file_str, file_name, file_path, bef_preview, aft_preview + ) + return result + + +def prefill(params): + file_str = params["file_str"] + file_name = params["file_name"] + + names = tf.get_names_in_archive(file_str) + pattern_pair = pattern.find_best_pattern_pair(names) + + return { + "file_name": file_name, + "inp_format": pattern_pair.x.to_string(), + "out_format": pattern_pair.y.to_string(), + } + + +def preview_file(file_str): + names = tf.get_names_in_archive(file_str) + return names diff --git a/judge/views/tf_pattern.py b/judge/views/tf_pattern.py new file mode 100644 index 0000000..2ef2b9a --- /dev/null +++ b/judge/views/tf_pattern.py @@ -0,0 +1,268 @@ +import os +import random +from judge.views import tf_utils as utils + +SAMPLE_SIZE = 16 +NUMBERED_MM = ["0", "1", "00", "01", "000", "001", "0000", "0001"] +VALID_MM = ["*"] + NUMBERED_MM + +MSG_TOO_MANY_OCCURRENCES = ( + "400: Invalid pattern: Pattern cannot have more than one '{}'" +) +MSG_MM_NOT_FOUND = "400: Invalid pattern: Wildcard not found. Wildcard list: {}" + + +class Pattern: + def __init__(self, ll, mm, rr): + assert mm in VALID_MM, "Invalid wildcard" + self.ll = ll + self.mm = mm + self.rr = rr + + def __repr__(self): + return "Pattern('{}', '{}', '{}')".format(self.ll, self.mm, self.rr) + + def __eq__(self, other): + return self.__repr__() == other.__repr__() + + def __hash__(self): + return self.__repr__().__hash__() + + @classmethod + def from_string(cls, text): + for mm in ["*"] + sorted(NUMBERED_MM, key=len, reverse=True): + if mm in text: + if text.count(mm) > 1: + raise Exception(MSG_TOO_MANY_OCCURRENCES.format(mm)) + i = text.index(mm) + return cls(text[:i], mm, text[i + len(mm) :]) + raise Exception(MSG_MM_NOT_FOUND.format(",".join(VALID_MM))) + + def to_string(self): + return self.ll + self.mm + self.rr + + def is_valid_test_id(self, test_id): + if self.mm == "*": + return True + if self.mm in NUMBERED_MM: + return test_id.isdigit() and len(test_id) >= len(self.mm) + raise NotImplementedError + + def matched(self, name): + return ( + name.startswith(self.ll) + and name.endswith(self.rr) + and len(name) >= len(self.ll) + len(self.rr) + and self.is_valid_test_id(self.get_test_id(name)) + ) + + def get_test_id(self, name): + return name[len(self.ll) : len(name) - len(self.rr)] + + def get_test_id_from_index(self, index): + assert self.mm in NUMBERED_MM, "Wildcard is not a number" + return str(int(self.mm) + index).zfill(len(self.mm)) + + def get_name(self, test_id, index=None, use_index=False): + if use_index and self.mm in NUMBERED_MM: + return self.ll + self.get_test_id_from_index(index) + self.rr + return self.ll + test_id + self.rr + + def matches(self, names, returns): + if returns == "test_id": + result = [n for n in names] + result = [n for n in result if self.matched(n)] + result = [self.get_test_id(n) for n in result] + return result + else: + raise NotImplementedError + + +class PatternPair: + def __init__(self, x: Pattern, y: Pattern): + assert x.mm == y.mm, "Input wildcard and output wildcard must be equal" + self.x = x + self.y = y + + def __repr__(self): + return "PatternPair({}, {})".format(self.x, self.y) + + def __eq__(self, other): + return self.__repr__() == other.__repr__() + + def __hash__(self): + return self.__repr__().__hash__() + + @classmethod + def from_string_pair(cls, inp_format, out_format): + return cls(Pattern.from_string(inp_format), Pattern.from_string(out_format)) + + def matches(self, names, returns): + x_test_ids = self.x.matches(names, returns="test_id") + y_test_ids = self.y.matches(names, returns="test_id") + + test_ids = set(x_test_ids) & set(y_test_ids) + test_ids = list(sorted(test_ids, key=utils.natural_sorting_key)) + + if returns == "fast_count": + if self.x.mm == "*": + return len(test_ids) + elif self.x.mm in NUMBERED_MM: + count_valid = 0 + for t in test_ids: + if t == self.x.get_test_id_from_index(count_valid): + count_valid += 1 + + return count_valid + + extra_files = list(names) + valid_test_ids = [] + for t in test_ids: + if self.x.mm in NUMBERED_MM: + if t != self.x.get_test_id_from_index(len(valid_test_ids)): + continue + + inp_name = self.x.get_name(t) + out_name = self.y.get_name(t) + + if inp_name == out_name: + continue + if inp_name not in extra_files: + continue + if out_name not in extra_files: + continue + + valid_test_ids.append(t) + extra_files.remove(inp_name) + extra_files.remove(out_name) + + if returns == "count": + return len(valid_test_ids) + elif returns == "test_id": + return valid_test_ids + elif returns == "test_id_with_extra_files": + return valid_test_ids, extra_files + else: + raise NotImplementedError + + def score(self, names): + def ls(s): + return len(s) - s.count("0") + + def zs(s): + return -s.count("0") + + def vs(s): + return sum( + s.lower().count(c) * w + for c, w in [("a", -1), ("e", -1), ("i", +1), ("o", -1), ("u", -1)] + ) + + count_score = self.matches(names, returns="fast_count") + + len_score = ls(self.x.ll + self.x.rr + self.y.ll + self.y.rr) + zero_score = zs(self.x.ll + self.x.rr + self.y.ll + self.y.rr) + + assert self.x.mm in ["*"] + NUMBERED_MM + specific_score = 0 if self.x.mm == "*" else len(self.x.mm) + + vowel_score = vs(self.x.ll + self.x.rr) - vs(self.y.ll + self.y.rr) + + return count_score, specific_score, len_score, zero_score, vowel_score + + def is_string_safe(self): + try: + x = Pattern.from_string(self.x.to_string()) + y = Pattern.from_string(self.y.to_string()) + return self == PatternPair(x, y) + except: + return False + + +def maximal(a, key): + max_score = max(map(key, a)) + result = [x for x in a if key(x) == max_score] + if len(result) == 1: + return result[0] + else: + print(result) + raise Exception("More than one maximum values") + + +def get_all_star_pattern_pairs(names): + sample = random.sample(names, min(len(names), SAMPLE_SIZE)) + + star_pattern_pairs = [] + + all_prefixes = [n[:i] for n in sample for i in range(len(n) + 1)] + all_prefixes = list(sorted(set(all_prefixes))) + all_suffixes = [n[i:] for n in sample for i in range(len(n) + 1)] + all_suffixes = list(sorted(set(all_suffixes))) + + for prefix in all_prefixes: + matched_names = [n for n in names if n.startswith(prefix)] + if len(matched_names) == 2: + mn0, mn1 = matched_names + for i in range(len(prefix) + 1): + x = Pattern(prefix[:i], "*", mn0[len(prefix) :]) + y = Pattern(prefix[:i], "*", mn1[len(prefix) :]) + star_pattern_pairs.append(PatternPair(x, y)) + + for suffix in all_suffixes: + matched_names = [n for n in names if n.endswith(suffix)] + if len(matched_names) == 2: + mn0, mn1 = matched_names + for i in range(len(suffix) + 1): + x = Pattern(mn0[: len(mn0) - len(suffix)], "*", suffix[i:]) + y = Pattern(mn1[: len(mn1) - len(suffix)], "*", suffix[i:]) + star_pattern_pairs.append(PatternPair(x, y)) + + star_pattern_pairs = list(set(star_pattern_pairs)) + return star_pattern_pairs + + +def get_variant_pattern_pairs(pp): + return [ + PatternPair(Pattern(pp.x.ll, mm, pp.x.rr), Pattern(pp.y.ll, mm, pp.y.rr)) + for mm in VALID_MM + ] + [ + PatternPair(Pattern(pp.y.ll, mm, pp.y.rr), Pattern(pp.x.ll, mm, pp.x.rr)) + for mm in VALID_MM + ] + + +def find_best_pattern_pair(names): + star_pattern_pairs = get_all_star_pattern_pairs(names) + star_pattern_pairs = [ + pp for pp in star_pattern_pairs if pp.matches(names, returns="fast_count") >= 2 + ] + # for pp in star_pattern_pairs: + # print(pp, pp.is_string_safe(), pp.score(names)) + + if len(star_pattern_pairs) == 0: + return PatternPair(Pattern("", "*", ""), Pattern("", "*", "")) + best_star_pattern_pair = maximal(star_pattern_pairs, key=lambda pp: pp.score(names)) + + pattern_pairs = get_variant_pattern_pairs(best_star_pattern_pair) + # for pp in pattern_pairs: + # print(pp, pp.is_string_safe(), pp.score(names)) + pattern_pairs = [pp for pp in pattern_pairs if pp.is_string_safe()] + best_pattern_pair = maximal(pattern_pairs, key=lambda pp: pp.score(names)) + + return best_pattern_pair + + +def list_dir_recursively(folder): + old_cwd = os.getcwd() + os.chdir(folder) + result = [] + for root, _, filenames in os.walk("."): + for filename in filenames: + result.append(os.path.join(root, filename)) + os.chdir(old_cwd) + return result + + +def test_with_dir(folder): + names = list_dir_recursively(folder) + print(folder, find_best_pattern_pair(names)) diff --git a/judge/views/tf_utils.py b/judge/views/tf_utils.py new file mode 100644 index 0000000..919b069 --- /dev/null +++ b/judge/views/tf_utils.py @@ -0,0 +1,15 @@ +def get_char_kind(char): + return 1 if char.isdigit() else 2 if char.isalpha() else 3 + + +def natural_sorting_key(name): + result = [] + last_kind = -1 + for char in name: + curr_kind = get_char_kind(char) + if curr_kind != last_kind: + result.append("") + result[-1] += char + last_kind = curr_kind + + return [x.zfill(16) if x.isdigit() else x for x in result]