Add cache wrapper

2023-04-05 12:49:23 -05:00 · 2023-04-05 12:49:23 -05:00 · 57a6233779
commit 57a6233779
parent 9d645841ae
5 changed files with 145 additions and 119 deletions
--- a/judge/bridge/judge_handler.py
+++ b/judge/bridge/judge_handler.py
@ -15,7 +15,7 @@ from django.core.cache import cache

 from judge import event_poster as event
 from judge.bridge.base_handler import ZlibPacketHandler, proxy_list
-from judge.caching import finished_submission
+from judge.utils.problems import finished_submission
 from judge.models import (
    Judge,
    Language,
--- a/judge/caching.py
+++ b/judge/caching.py
@ -1,10 +1,48 @@
+from inspect import signature
 from django.core.cache import cache
+from django.db.models.query import QuerySet
+
+import hashlib
+
+MAX_NUM_CHAR = 15


-def finished_submission(sub):
-    keys = ["user_complete:%d" % sub.user_id, "user_attempted:%s" % sub.user_id]
-    if hasattr(sub, "contest"):
-        participation = sub.contest.participation
-        keys += ["contest_complete:%d" % participation.id]
-        keys += ["contest_attempted:%d" % participation.id]
-    cache.delete_many(keys)
+def cache_wrapper(prefix, timeout=86400):
+    def arg_to_str(arg):
+        if hasattr(arg, "id"):
+            return str(arg.id)
+        if isinstance(arg, list) or isinstance(arg, QuerySet):
+            return hashlib.sha1(str(list(arg)).encode()).hexdigest()[:MAX_NUM_CHAR]
+        if len(str(arg)) > MAX_NUM_CHAR:
+            return str(arg)[:MAX_NUM_CHAR]
+        return str(arg)
+
+    def get_key(func, *args, **kwargs):
+        args_list = list(args)
+        signature_args = list(signature(func).parameters.keys())
+        args_list += [kwargs.get(k) for k in signature_args[len(args) :]]
+        args_list = [arg_to_str(i) for i in args_list]
+        key = prefix + ":" + ":".join(args_list)
+        key = key.replace(" ", "_")
+        return key
+
+    def decorator(func):
+        def wrapper(*args, **kwargs):
+            cache_key = get_key(func, *args, **kwargs)
+            result = cache.get(cache_key)
+            if result is not None:
+                return result
+
+            result = func(*args, **kwargs)
+            cache.set(cache_key, result, timeout)
+            return result
+
+        def dirty(*args, **kwargs):
+            cache_key = get_key(func, *args, **kwargs)
+            cache.delete(cache_key)
+
+        wrapper.dirty = dirty
+
+        return wrapper
+
+    return decorator
--- a/judge/ml/collab_filter.py
+++ b/judge/ml/collab_filter.py
@ -4,13 +4,15 @@ import os
 from django.core.cache import cache
 import hashlib

+from judge.caching import cache_wrapper
+

 class CollabFilter:
    DOT = "dot"
    COSINE = "cosine"

    # name = 'collab_filter' or 'collab_filter_time'
-    def __init__(self, name, **kwargs):
+    def __init__(self, name):
        embeddings = np.load(
            os.path.join(settings.ML_OUTPUT_PATH, name + "/embeddings.npz"),
            allow_pickle=True,
@ -20,6 +22,9 @@ class CollabFilter:
        self.user_embeddings = embeddings[arr0]
        self.problem_embeddings = embeddings[arr1]

+    def __str__(self):
+        return self.name
+
    def compute_scores(self, query_embedding, item_embeddings, measure=DOT):
        """Computes the scores of the candidates given a query.
        Args:
@ -39,14 +44,9 @@ class CollabFilter:
        scores = u.dot(V.T)
        return scores

-    def user_recommendations(self, user, problems, measure=DOT, limit=None, **kwargs):
+    @cache_wrapper(prefix="user_recommendations", timeout=3600)
+    def user_recommendations(self, user, problems, measure=DOT, limit=None):
        uid = user.id
-        problems_hash = hashlib.sha1(str(list(problems)).encode()).hexdigest()
-        cache_key = ":".join(map(str, [self.name, uid, measure, limit, problems_hash]))
-        value = cache.get(cache_key)
-        if value:
-            return value
-
        if uid >= len(self.user_embeddings):
            uid = 0
        scores = self.compute_scores(
@ -61,7 +61,6 @@ class CollabFilter:

        res.sort(reverse=True, key=lambda x: x[0])
        res = res[:limit]
-        cache.set(cache_key, res, 3600)
        return res

    # return a list of pid
--- a/judge/signals.py
+++ b/judge/signals.py
@ -8,7 +8,7 @@ from django.core.cache.utils import make_template_fragment_key
 from django.db.models.signals import post_delete, post_save
 from django.dispatch import receiver

-from .caching import finished_submission
+from judge.utils.problems import finished_submission
 from .models import (
    BlogPost,
    Comment,
--- a/judge/utils/problems.py
+++ b/judge/utils/problems.py
@ -13,7 +13,7 @@ from django.utils.translation import gettext as _, gettext_noop

 from judge.models import Problem, Submission
 from judge.ml.collab_filter import CollabFilter
-
+from judge.caching import cache_wrapper

 __all__ = [
    "contest_completed_ids",
@ -57,61 +57,52 @@ def contest_completed_ids(participation):
    return result


+@cache_wrapper(prefix="user_complete", timeout=86400)
 def user_completed_ids(profile):
-    key = "user_complete:%d" % profile.id
-    result = cache.get(key)
-    if result is None:
-        result = set(
-            Submission.objects.filter(
-                user=profile, result="AC", points=F("problem__points")
-            )
-            .values_list("problem_id", flat=True)
-            .distinct()
+    result = set(
+        Submission.objects.filter(
+            user=profile, result="AC", points=F("problem__points")
        )
-        cache.set(key, result, 86400)
+        .values_list("problem_id", flat=True)
+        .distinct()
+    )
    return result


+@cache_wrapper(prefix="contest_attempted", timeout=86400)
 def contest_attempted_ids(participation):
-    key = "contest_attempted:%s" % participation.id
-    result = cache.get(key)
-    if result is None:
-        result = {
-            id: {"achieved_points": points, "max_points": max_points}
-            for id, max_points, points in (
-                participation.submissions.values_list(
-                    "problem__problem__id", "problem__points"
-                )
-                .annotate(points=Max("points"))
-                .filter(points__lt=F("problem__points"))
+    result = {
+        id: {"achieved_points": points, "max_points": max_points}
+        for id, max_points, points in (
+            participation.submissions.values_list(
+                "problem__problem__id", "problem__points"
            )
-        }
-        cache.set(key, result, 86400)
+            .annotate(points=Max("points"))
+            .filter(points__lt=F("problem__points"))
+        )
+    }
    return result


+@cache_wrapper(prefix="user_attempted", timeout=86400)
 def user_attempted_ids(profile):
-    key = "user_attempted:%s" % profile.id
-    result = cache.get(key)
-    if result is None:
-        result = {
-            id: {
-                "achieved_points": points,
-                "max_points": max_points,
-                "last_submission": last_submission,
-                "code": problem_code,
-                "name": problem_name,
-            }
-            for id, max_points, problem_code, problem_name, points, last_submission in (
-                Submission.objects.filter(user=profile)
-                .values_list(
-                    "problem__id", "problem__points", "problem__code", "problem__name"
-                )
-                .annotate(points=Max("points"), last_submission=Max("id"))
-                .filter(points__lt=F("problem__points"))
-            )
+    result = {
+        id: {
+            "achieved_points": points,
+            "max_points": max_points,
+            "last_submission": last_submission,
+            "code": problem_code,
+            "name": problem_name,
        }
-        cache.set(key, result, 86400)
+        for id, max_points, problem_code, problem_name, points, last_submission in (
+            Submission.objects.filter(user=profile)
+            .values_list(
+                "problem__id", "problem__points", "problem__code", "problem__name"
+            )
+            .annotate(points=Max("points"), last_submission=Max("id"))
+            .filter(points__lt=F("problem__points"))
+        )
+    }
    return result


@ -174,77 +165,67 @@ def editable_problems(user, profile=None):
    return subquery


+@cache_wrapper(prefix="hp", timeout=900)
 def hot_problems(duration, limit):
-    cache_key = "hot_problems:%d:%d" % (duration.total_seconds(), limit)
-    qs = cache.get(cache_key)
-    if qs is None:
-        qs = Problem.get_public_problems().filter(
-            submission__date__gt=timezone.now() - duration
-        )
-        qs0 = (
-            qs.annotate(k=Count("submission__user", distinct=True))
-            .order_by("-k")
-            .values_list("k", flat=True)
-        )
+    qs = Problem.get_public_problems().filter(
+        submission__date__gt=timezone.now() - duration
+    )
+    qs0 = (
+        qs.annotate(k=Count("submission__user", distinct=True))
+        .order_by("-k")
+        .values_list("k", flat=True)
+    )

-        if not qs0:
-            return []
-        # make this an aggregate
-        mx = float(qs0[0])
+    if not qs0:
+        return []
+    # make this an aggregate
+    mx = float(qs0[0])

-        qs = qs.annotate(unique_user_count=Count("submission__user", distinct=True))
-        # fix braindamage in excluding CE
-        qs = qs.annotate(
-            submission_volume=Count(
-                Case(
-                    When(submission__result="AC", then=1),
-                    When(submission__result="WA", then=1),
-                    When(submission__result="IR", then=1),
-                    When(submission__result="RTE", then=1),
-                    When(submission__result="TLE", then=1),
-                    When(submission__result="OLE", then=1),
-                    output_field=FloatField(),
-                )
+    qs = qs.annotate(unique_user_count=Count("submission__user", distinct=True))
+    # fix braindamage in excluding CE
+    qs = qs.annotate(
+        submission_volume=Count(
+            Case(
+                When(submission__result="AC", then=1),
+                When(submission__result="WA", then=1),
+                When(submission__result="IR", then=1),
+                When(submission__result="RTE", then=1),
+                When(submission__result="TLE", then=1),
+                When(submission__result="OLE", then=1),
+                output_field=FloatField(),
            )
        )
-        qs = qs.annotate(
-            ac_volume=Count(
-                Case(
-                    When(submission__result="AC", then=1),
-                    output_field=FloatField(),
-                )
+    )
+    qs = qs.annotate(
+        ac_volume=Count(
+            Case(
+                When(submission__result="AC", then=1),
+                output_field=FloatField(),
            )
        )
-        qs = qs.filter(unique_user_count__gt=max(mx / 3.0, 1))
+    )
+    qs = qs.filter(unique_user_count__gt=max(mx / 3.0, 1))

-        qs = (
-            qs.annotate(
-                ordering=ExpressionWrapper(
-                    0.02
-                    * F("points")
-                    * (
-                        0.4 * F("ac_volume") / F("submission_volume")
-                        + 0.6 * F("ac_rate")
-                    )
-                    + 100 * e ** (F("unique_user_count") / mx),
-                    output_field=FloatField(),
-                )
+    qs = (
+        qs.annotate(
+            ordering=ExpressionWrapper(
+                0.02
+                * F("points")
+                * (0.4 * F("ac_volume") / F("submission_volume") + 0.6 * F("ac_rate"))
+                + 100 * e ** (F("unique_user_count") / mx),
+                output_field=FloatField(),
            )
-            .order_by("-ordering")
-            .defer("description")[:limit]
        )
-
-        cache.set(cache_key, qs, 900)
+        .order_by("-ordering")
+        .defer("description")[:limit]
+    )
    return qs


+@cache_wrapper(prefix="grp", timeout=26400)
 def get_related_problems(profile, problem, limit=8):
    if not profile or not settings.ML_OUTPUT_PATH:
        return None
-    cache_key = "related_problems:%d:%d" % (profile.id, problem.id)
-    qs = cache.get(cache_key)
-    if qs is not None:
-        return qs
    problemset = Problem.get_visible_problems(profile.user).values_list("id", flat=True)
    problemset = problemset.exclude(id__in=user_completed_ids(profile))
    problemset = problemset.exclude(id=problem.id)
@ -254,8 +235,16 @@ def get_related_problems(profile, problem, limit=8):
    ) + cf_model.problem_neighbors(problem, problemset, CollabFilter.COSINE, limit)
    results = list(set([i[1] for i in results]))
    seed = datetime.now().strftime("%d%m%Y")
-    random.Random(seed).shuffle(results)
+    random.shuffle(results)
    results = results[:limit]
    results = [Problem.objects.get(id=i) for i in results]
-    cache.set(cache_key, results, 21600)
    return results
+
+
+def finished_submission(sub):
+    keys = ["user_complete:%d" % sub.user_id, "user_attempted:%s" % sub.user_id]
+    if hasattr(sub, "contest"):
+        participation = sub.contest.participation
+        keys += ["contest_complete:%d" % participation.id]
+        keys += ["contest_attempted:%d" % participation.id]
+    cache.delete_many(keys)