diff --git a/judge/bridge/judge_handler.py b/judge/bridge/judge_handler.py index 06db794..e36c9f8 100644 --- a/judge/bridge/judge_handler.py +++ b/judge/bridge/judge_handler.py @@ -15,7 +15,7 @@ from django.core.cache import cache from judge import event_poster as event from judge.bridge.base_handler import ZlibPacketHandler, proxy_list -from judge.caching import finished_submission +from judge.utils.problems import finished_submission from judge.models import ( Judge, Language, diff --git a/judge/caching.py b/judge/caching.py index 99bbf81..ad27f2f 100644 --- a/judge/caching.py +++ b/judge/caching.py @@ -1,10 +1,48 @@ +from inspect import signature from django.core.cache import cache +from django.db.models.query import QuerySet + +import hashlib + +MAX_NUM_CHAR = 15 -def finished_submission(sub): - keys = ["user_complete:%d" % sub.user_id, "user_attempted:%s" % sub.user_id] - if hasattr(sub, "contest"): - participation = sub.contest.participation - keys += ["contest_complete:%d" % participation.id] - keys += ["contest_attempted:%d" % participation.id] - cache.delete_many(keys) +def cache_wrapper(prefix, timeout=86400): + def arg_to_str(arg): + if hasattr(arg, "id"): + return str(arg.id) + if isinstance(arg, list) or isinstance(arg, QuerySet): + return hashlib.sha1(str(list(arg)).encode()).hexdigest()[:MAX_NUM_CHAR] + if len(str(arg)) > MAX_NUM_CHAR: + return str(arg)[:MAX_NUM_CHAR] + return str(arg) + + def get_key(func, *args, **kwargs): + args_list = list(args) + signature_args = list(signature(func).parameters.keys()) + args_list += [kwargs.get(k) for k in signature_args[len(args) :]] + args_list = [arg_to_str(i) for i in args_list] + key = prefix + ":" + ":".join(args_list) + key = key.replace(" ", "_") + return key + + def decorator(func): + def wrapper(*args, **kwargs): + cache_key = get_key(func, *args, **kwargs) + result = cache.get(cache_key) + if result is not None: + return result + + result = func(*args, **kwargs) + cache.set(cache_key, result, timeout) + return result + + def dirty(*args, **kwargs): + cache_key = get_key(func, *args, **kwargs) + cache.delete(cache_key) + + wrapper.dirty = dirty + + return wrapper + + return decorator diff --git a/judge/ml/collab_filter.py b/judge/ml/collab_filter.py index f06637f..1b0a1db 100644 --- a/judge/ml/collab_filter.py +++ b/judge/ml/collab_filter.py @@ -4,13 +4,15 @@ import os from django.core.cache import cache import hashlib +from judge.caching import cache_wrapper + class CollabFilter: DOT = "dot" COSINE = "cosine" # name = 'collab_filter' or 'collab_filter_time' - def __init__(self, name, **kwargs): + def __init__(self, name): embeddings = np.load( os.path.join(settings.ML_OUTPUT_PATH, name + "/embeddings.npz"), allow_pickle=True, @@ -20,6 +22,9 @@ class CollabFilter: self.user_embeddings = embeddings[arr0] self.problem_embeddings = embeddings[arr1] + def __str__(self): + return self.name + def compute_scores(self, query_embedding, item_embeddings, measure=DOT): """Computes the scores of the candidates given a query. Args: @@ -39,14 +44,9 @@ class CollabFilter: scores = u.dot(V.T) return scores - def user_recommendations(self, user, problems, measure=DOT, limit=None, **kwargs): + @cache_wrapper(prefix="user_recommendations", timeout=3600) + def user_recommendations(self, user, problems, measure=DOT, limit=None): uid = user.id - problems_hash = hashlib.sha1(str(list(problems)).encode()).hexdigest() - cache_key = ":".join(map(str, [self.name, uid, measure, limit, problems_hash])) - value = cache.get(cache_key) - if value: - return value - if uid >= len(self.user_embeddings): uid = 0 scores = self.compute_scores( @@ -61,7 +61,6 @@ class CollabFilter: res.sort(reverse=True, key=lambda x: x[0]) res = res[:limit] - cache.set(cache_key, res, 3600) return res # return a list of pid diff --git a/judge/signals.py b/judge/signals.py index e308f98..74ae833 100644 --- a/judge/signals.py +++ b/judge/signals.py @@ -8,7 +8,7 @@ from django.core.cache.utils import make_template_fragment_key from django.db.models.signals import post_delete, post_save from django.dispatch import receiver -from .caching import finished_submission +from judge.utils.problems import finished_submission from .models import ( BlogPost, Comment, diff --git a/judge/utils/problems.py b/judge/utils/problems.py index 057edf0..6c351aa 100644 --- a/judge/utils/problems.py +++ b/judge/utils/problems.py @@ -13,7 +13,7 @@ from django.utils.translation import gettext as _, gettext_noop from judge.models import Problem, Submission from judge.ml.collab_filter import CollabFilter - +from judge.caching import cache_wrapper __all__ = [ "contest_completed_ids", @@ -57,61 +57,52 @@ def contest_completed_ids(participation): return result +@cache_wrapper(prefix="user_complete", timeout=86400) def user_completed_ids(profile): - key = "user_complete:%d" % profile.id - result = cache.get(key) - if result is None: - result = set( - Submission.objects.filter( - user=profile, result="AC", points=F("problem__points") - ) - .values_list("problem_id", flat=True) - .distinct() + result = set( + Submission.objects.filter( + user=profile, result="AC", points=F("problem__points") ) - cache.set(key, result, 86400) + .values_list("problem_id", flat=True) + .distinct() + ) return result +@cache_wrapper(prefix="contest_attempted", timeout=86400) def contest_attempted_ids(participation): - key = "contest_attempted:%s" % participation.id - result = cache.get(key) - if result is None: - result = { - id: {"achieved_points": points, "max_points": max_points} - for id, max_points, points in ( - participation.submissions.values_list( - "problem__problem__id", "problem__points" - ) - .annotate(points=Max("points")) - .filter(points__lt=F("problem__points")) + result = { + id: {"achieved_points": points, "max_points": max_points} + for id, max_points, points in ( + participation.submissions.values_list( + "problem__problem__id", "problem__points" ) - } - cache.set(key, result, 86400) + .annotate(points=Max("points")) + .filter(points__lt=F("problem__points")) + ) + } return result +@cache_wrapper(prefix="user_attempted", timeout=86400) def user_attempted_ids(profile): - key = "user_attempted:%s" % profile.id - result = cache.get(key) - if result is None: - result = { - id: { - "achieved_points": points, - "max_points": max_points, - "last_submission": last_submission, - "code": problem_code, - "name": problem_name, - } - for id, max_points, problem_code, problem_name, points, last_submission in ( - Submission.objects.filter(user=profile) - .values_list( - "problem__id", "problem__points", "problem__code", "problem__name" - ) - .annotate(points=Max("points"), last_submission=Max("id")) - .filter(points__lt=F("problem__points")) - ) + result = { + id: { + "achieved_points": points, + "max_points": max_points, + "last_submission": last_submission, + "code": problem_code, + "name": problem_name, } - cache.set(key, result, 86400) + for id, max_points, problem_code, problem_name, points, last_submission in ( + Submission.objects.filter(user=profile) + .values_list( + "problem__id", "problem__points", "problem__code", "problem__name" + ) + .annotate(points=Max("points"), last_submission=Max("id")) + .filter(points__lt=F("problem__points")) + ) + } return result @@ -174,77 +165,67 @@ def editable_problems(user, profile=None): return subquery +@cache_wrapper(prefix="hp", timeout=900) def hot_problems(duration, limit): - cache_key = "hot_problems:%d:%d" % (duration.total_seconds(), limit) - qs = cache.get(cache_key) - if qs is None: - qs = Problem.get_public_problems().filter( - submission__date__gt=timezone.now() - duration - ) - qs0 = ( - qs.annotate(k=Count("submission__user", distinct=True)) - .order_by("-k") - .values_list("k", flat=True) - ) + qs = Problem.get_public_problems().filter( + submission__date__gt=timezone.now() - duration + ) + qs0 = ( + qs.annotate(k=Count("submission__user", distinct=True)) + .order_by("-k") + .values_list("k", flat=True) + ) - if not qs0: - return [] - # make this an aggregate - mx = float(qs0[0]) + if not qs0: + return [] + # make this an aggregate + mx = float(qs0[0]) - qs = qs.annotate(unique_user_count=Count("submission__user", distinct=True)) - # fix braindamage in excluding CE - qs = qs.annotate( - submission_volume=Count( - Case( - When(submission__result="AC", then=1), - When(submission__result="WA", then=1), - When(submission__result="IR", then=1), - When(submission__result="RTE", then=1), - When(submission__result="TLE", then=1), - When(submission__result="OLE", then=1), - output_field=FloatField(), - ) + qs = qs.annotate(unique_user_count=Count("submission__user", distinct=True)) + # fix braindamage in excluding CE + qs = qs.annotate( + submission_volume=Count( + Case( + When(submission__result="AC", then=1), + When(submission__result="WA", then=1), + When(submission__result="IR", then=1), + When(submission__result="RTE", then=1), + When(submission__result="TLE", then=1), + When(submission__result="OLE", then=1), + output_field=FloatField(), ) ) - qs = qs.annotate( - ac_volume=Count( - Case( - When(submission__result="AC", then=1), - output_field=FloatField(), - ) + ) + qs = qs.annotate( + ac_volume=Count( + Case( + When(submission__result="AC", then=1), + output_field=FloatField(), ) ) - qs = qs.filter(unique_user_count__gt=max(mx / 3.0, 1)) + ) + qs = qs.filter(unique_user_count__gt=max(mx / 3.0, 1)) - qs = ( - qs.annotate( - ordering=ExpressionWrapper( - 0.02 - * F("points") - * ( - 0.4 * F("ac_volume") / F("submission_volume") - + 0.6 * F("ac_rate") - ) - + 100 * e ** (F("unique_user_count") / mx), - output_field=FloatField(), - ) + qs = ( + qs.annotate( + ordering=ExpressionWrapper( + 0.02 + * F("points") + * (0.4 * F("ac_volume") / F("submission_volume") + 0.6 * F("ac_rate")) + + 100 * e ** (F("unique_user_count") / mx), + output_field=FloatField(), ) - .order_by("-ordering") - .defer("description")[:limit] ) - - cache.set(cache_key, qs, 900) + .order_by("-ordering") + .defer("description")[:limit] + ) return qs +@cache_wrapper(prefix="grp", timeout=26400) def get_related_problems(profile, problem, limit=8): if not profile or not settings.ML_OUTPUT_PATH: return None - cache_key = "related_problems:%d:%d" % (profile.id, problem.id) - qs = cache.get(cache_key) - if qs is not None: - return qs problemset = Problem.get_visible_problems(profile.user).values_list("id", flat=True) problemset = problemset.exclude(id__in=user_completed_ids(profile)) problemset = problemset.exclude(id=problem.id) @@ -254,8 +235,16 @@ def get_related_problems(profile, problem, limit=8): ) + cf_model.problem_neighbors(problem, problemset, CollabFilter.COSINE, limit) results = list(set([i[1] for i in results])) seed = datetime.now().strftime("%d%m%Y") - random.Random(seed).shuffle(results) + random.shuffle(results) results = results[:limit] results = [Problem.objects.get(id=i) for i in results] - cache.set(cache_key, results, 21600) return results + + +def finished_submission(sub): + keys = ["user_complete:%d" % sub.user_id, "user_attempted:%s" % sub.user_id] + if hasattr(sub, "contest"): + participation = sub.contest.participation + keys += ["contest_complete:%d" % participation.id] + keys += ["contest_attempted:%d" % participation.id] + cache.delete_many(keys)