Test Formatter (#100)
This commit is contained in:
parent
14ecef649e
commit
04c6af1dff
19 changed files with 1624 additions and 0 deletions
268
judge/views/test_formatter/tf_pattern.py
Normal file
268
judge/views/test_formatter/tf_pattern.py
Normal file
|
@ -0,0 +1,268 @@
|
|||
import os
|
||||
import random
|
||||
from judge.views.test_formatter import tf_utils as utils
|
||||
|
||||
SAMPLE_SIZE = 16
|
||||
NUMBERED_MM = ["0", "1", "00", "01", "000", "001", "0000", "0001"]
|
||||
VALID_MM = ["*"] + NUMBERED_MM
|
||||
|
||||
MSG_TOO_MANY_OCCURRENCES = (
|
||||
"400: Invalid pattern: Pattern cannot have more than one '{}'"
|
||||
)
|
||||
MSG_MM_NOT_FOUND = "400: Invalid pattern: Wildcard not found. Wildcard list: {}"
|
||||
|
||||
|
||||
class Pattern:
|
||||
def __init__(self, ll, mm, rr):
|
||||
assert mm in VALID_MM, "Invalid wildcard"
|
||||
self.ll = ll
|
||||
self.mm = mm
|
||||
self.rr = rr
|
||||
|
||||
def __repr__(self):
|
||||
return "Pattern('{}', '{}', '{}')".format(self.ll, self.mm, self.rr)
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.__repr__() == other.__repr__()
|
||||
|
||||
def __hash__(self):
|
||||
return self.__repr__().__hash__()
|
||||
|
||||
@classmethod
|
||||
def from_string(cls, text):
|
||||
for mm in ["*"] + sorted(NUMBERED_MM, key=len, reverse=True):
|
||||
if mm in text:
|
||||
if text.count(mm) > 1:
|
||||
raise Exception(MSG_TOO_MANY_OCCURRENCES.format(mm))
|
||||
i = text.index(mm)
|
||||
return cls(text[:i], mm, text[i + len(mm) :])
|
||||
raise Exception(MSG_MM_NOT_FOUND.format(",".join(VALID_MM)))
|
||||
|
||||
def to_string(self):
|
||||
return self.ll + self.mm + self.rr
|
||||
|
||||
def is_valid_test_id(self, test_id):
|
||||
if self.mm == "*":
|
||||
return True
|
||||
if self.mm in NUMBERED_MM:
|
||||
return test_id.isdigit() and len(test_id) >= len(self.mm)
|
||||
raise NotImplementedError
|
||||
|
||||
def matched(self, name):
|
||||
return (
|
||||
name.startswith(self.ll)
|
||||
and name.endswith(self.rr)
|
||||
and len(name) >= len(self.ll) + len(self.rr)
|
||||
and self.is_valid_test_id(self.get_test_id(name))
|
||||
)
|
||||
|
||||
def get_test_id(self, name):
|
||||
return name[len(self.ll) : len(name) - len(self.rr)]
|
||||
|
||||
def get_test_id_from_index(self, index):
|
||||
assert self.mm in NUMBERED_MM, "Wildcard is not a number"
|
||||
return str(int(self.mm) + index).zfill(len(self.mm))
|
||||
|
||||
def get_name(self, test_id, index=None, use_index=False):
|
||||
if use_index and self.mm in NUMBERED_MM:
|
||||
return self.ll + self.get_test_id_from_index(index) + self.rr
|
||||
return self.ll + test_id + self.rr
|
||||
|
||||
def matches(self, names, returns):
|
||||
if returns == "test_id":
|
||||
result = [n for n in names]
|
||||
result = [n for n in result if self.matched(n)]
|
||||
result = [self.get_test_id(n) for n in result]
|
||||
return result
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class PatternPair:
|
||||
def __init__(self, x: Pattern, y: Pattern):
|
||||
assert x.mm == y.mm, "Input wildcard and output wildcard must be equal"
|
||||
self.x = x
|
||||
self.y = y
|
||||
|
||||
def __repr__(self):
|
||||
return "PatternPair({}, {})".format(self.x, self.y)
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.__repr__() == other.__repr__()
|
||||
|
||||
def __hash__(self):
|
||||
return self.__repr__().__hash__()
|
||||
|
||||
@classmethod
|
||||
def from_string_pair(cls, inp_format, out_format):
|
||||
return cls(Pattern.from_string(inp_format), Pattern.from_string(out_format))
|
||||
|
||||
def matches(self, names, returns):
|
||||
x_test_ids = self.x.matches(names, returns="test_id")
|
||||
y_test_ids = self.y.matches(names, returns="test_id")
|
||||
|
||||
test_ids = set(x_test_ids) & set(y_test_ids)
|
||||
test_ids = list(sorted(test_ids, key=utils.natural_sorting_key))
|
||||
|
||||
if returns == "fast_count":
|
||||
if self.x.mm == "*":
|
||||
return len(test_ids)
|
||||
elif self.x.mm in NUMBERED_MM:
|
||||
count_valid = 0
|
||||
for t in test_ids:
|
||||
if t == self.x.get_test_id_from_index(count_valid):
|
||||
count_valid += 1
|
||||
|
||||
return count_valid
|
||||
|
||||
extra_files = list(names)
|
||||
valid_test_ids = []
|
||||
for t in test_ids:
|
||||
if self.x.mm in NUMBERED_MM:
|
||||
if t != self.x.get_test_id_from_index(len(valid_test_ids)):
|
||||
continue
|
||||
|
||||
inp_name = self.x.get_name(t)
|
||||
out_name = self.y.get_name(t)
|
||||
|
||||
if inp_name == out_name:
|
||||
continue
|
||||
if inp_name not in extra_files:
|
||||
continue
|
||||
if out_name not in extra_files:
|
||||
continue
|
||||
|
||||
valid_test_ids.append(t)
|
||||
extra_files.remove(inp_name)
|
||||
extra_files.remove(out_name)
|
||||
|
||||
if returns == "count":
|
||||
return len(valid_test_ids)
|
||||
elif returns == "test_id":
|
||||
return valid_test_ids
|
||||
elif returns == "test_id_with_extra_files":
|
||||
return valid_test_ids, extra_files
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
def score(self, names):
|
||||
def ls(s):
|
||||
return len(s) - s.count("0")
|
||||
|
||||
def zs(s):
|
||||
return -s.count("0")
|
||||
|
||||
def vs(s):
|
||||
return sum(
|
||||
s.lower().count(c) * w
|
||||
for c, w in [("a", -1), ("e", -1), ("i", +1), ("o", -1), ("u", -1)]
|
||||
)
|
||||
|
||||
count_score = self.matches(names, returns="fast_count")
|
||||
|
||||
len_score = ls(self.x.ll + self.x.rr + self.y.ll + self.y.rr)
|
||||
zero_score = zs(self.x.ll + self.x.rr + self.y.ll + self.y.rr)
|
||||
|
||||
assert self.x.mm in ["*"] + NUMBERED_MM
|
||||
specific_score = 0 if self.x.mm == "*" else len(self.x.mm)
|
||||
|
||||
vowel_score = vs(self.x.ll + self.x.rr) - vs(self.y.ll + self.y.rr)
|
||||
|
||||
return count_score, specific_score, len_score, zero_score, vowel_score
|
||||
|
||||
def is_string_safe(self):
|
||||
try:
|
||||
x = Pattern.from_string(self.x.to_string())
|
||||
y = Pattern.from_string(self.y.to_string())
|
||||
return self == PatternPair(x, y)
|
||||
except:
|
||||
return False
|
||||
|
||||
|
||||
def maximal(a, key):
|
||||
max_score = max(map(key, a))
|
||||
result = [x for x in a if key(x) == max_score]
|
||||
if len(result) == 1:
|
||||
return result[0]
|
||||
else:
|
||||
print(result)
|
||||
raise Exception("More than one maximum values")
|
||||
|
||||
|
||||
def get_all_star_pattern_pairs(names):
|
||||
sample = random.sample(names, min(len(names), SAMPLE_SIZE))
|
||||
|
||||
star_pattern_pairs = []
|
||||
|
||||
all_prefixes = [n[:i] for n in sample for i in range(len(n) + 1)]
|
||||
all_prefixes = list(sorted(set(all_prefixes)))
|
||||
all_suffixes = [n[i:] for n in sample for i in range(len(n) + 1)]
|
||||
all_suffixes = list(sorted(set(all_suffixes)))
|
||||
|
||||
for prefix in all_prefixes:
|
||||
matched_names = [n for n in names if n.startswith(prefix)]
|
||||
if len(matched_names) == 2:
|
||||
mn0, mn1 = matched_names
|
||||
for i in range(len(prefix) + 1):
|
||||
x = Pattern(prefix[:i], "*", mn0[len(prefix) :])
|
||||
y = Pattern(prefix[:i], "*", mn1[len(prefix) :])
|
||||
star_pattern_pairs.append(PatternPair(x, y))
|
||||
|
||||
for suffix in all_suffixes:
|
||||
matched_names = [n for n in names if n.endswith(suffix)]
|
||||
if len(matched_names) == 2:
|
||||
mn0, mn1 = matched_names
|
||||
for i in range(len(suffix) + 1):
|
||||
x = Pattern(mn0[: len(mn0) - len(suffix)], "*", suffix[i:])
|
||||
y = Pattern(mn1[: len(mn1) - len(suffix)], "*", suffix[i:])
|
||||
star_pattern_pairs.append(PatternPair(x, y))
|
||||
|
||||
star_pattern_pairs = list(set(star_pattern_pairs))
|
||||
return star_pattern_pairs
|
||||
|
||||
|
||||
def get_variant_pattern_pairs(pp):
|
||||
return [
|
||||
PatternPair(Pattern(pp.x.ll, mm, pp.x.rr), Pattern(pp.y.ll, mm, pp.y.rr))
|
||||
for mm in VALID_MM
|
||||
] + [
|
||||
PatternPair(Pattern(pp.y.ll, mm, pp.y.rr), Pattern(pp.x.ll, mm, pp.x.rr))
|
||||
for mm in VALID_MM
|
||||
]
|
||||
|
||||
|
||||
def find_best_pattern_pair(names):
|
||||
star_pattern_pairs = get_all_star_pattern_pairs(names)
|
||||
star_pattern_pairs = [
|
||||
pp for pp in star_pattern_pairs if pp.matches(names, returns="fast_count") >= 2
|
||||
]
|
||||
# for pp in star_pattern_pairs:
|
||||
# print(pp, pp.is_string_safe(), pp.score(names))
|
||||
|
||||
if len(star_pattern_pairs) == 0:
|
||||
return PatternPair(Pattern("", "*", ""), Pattern("", "*", ""))
|
||||
best_star_pattern_pair = maximal(star_pattern_pairs, key=lambda pp: pp.score(names))
|
||||
|
||||
pattern_pairs = get_variant_pattern_pairs(best_star_pattern_pair)
|
||||
# for pp in pattern_pairs:
|
||||
# print(pp, pp.is_string_safe(), pp.score(names))
|
||||
pattern_pairs = [pp for pp in pattern_pairs if pp.is_string_safe()]
|
||||
best_pattern_pair = maximal(pattern_pairs, key=lambda pp: pp.score(names))
|
||||
|
||||
return best_pattern_pair
|
||||
|
||||
|
||||
def list_dir_recursively(folder):
|
||||
old_cwd = os.getcwd()
|
||||
os.chdir(folder)
|
||||
result = []
|
||||
for root, _, filenames in os.walk("."):
|
||||
for filename in filenames:
|
||||
result.append(os.path.join(root, filename))
|
||||
os.chdir(old_cwd)
|
||||
return result
|
||||
|
||||
|
||||
def test_with_dir(folder):
|
||||
names = list_dir_recursively(folder)
|
||||
print(folder, find_best_pattern_pair(names))
|
Loading…
Add table
Add a link
Reference in a new issue