Add cache wrapper

This commit is contained in:
cuom1999 2023-04-05 12:49:23 -05:00
parent 9d645841ae
commit 57a6233779
5 changed files with 145 additions and 119 deletions

View file

@ -15,7 +15,7 @@ from django.core.cache import cache
from judge import event_poster as event from judge import event_poster as event
from judge.bridge.base_handler import ZlibPacketHandler, proxy_list from judge.bridge.base_handler import ZlibPacketHandler, proxy_list
from judge.caching import finished_submission from judge.utils.problems import finished_submission
from judge.models import ( from judge.models import (
Judge, Judge,
Language, Language,

View file

@ -1,10 +1,48 @@
from inspect import signature
from django.core.cache import cache from django.core.cache import cache
from django.db.models.query import QuerySet
import hashlib
MAX_NUM_CHAR = 15
def finished_submission(sub): def cache_wrapper(prefix, timeout=86400):
keys = ["user_complete:%d" % sub.user_id, "user_attempted:%s" % sub.user_id] def arg_to_str(arg):
if hasattr(sub, "contest"): if hasattr(arg, "id"):
participation = sub.contest.participation return str(arg.id)
keys += ["contest_complete:%d" % participation.id] if isinstance(arg, list) or isinstance(arg, QuerySet):
keys += ["contest_attempted:%d" % participation.id] return hashlib.sha1(str(list(arg)).encode()).hexdigest()[:MAX_NUM_CHAR]
cache.delete_many(keys) if len(str(arg)) > MAX_NUM_CHAR:
return str(arg)[:MAX_NUM_CHAR]
return str(arg)
def get_key(func, *args, **kwargs):
args_list = list(args)
signature_args = list(signature(func).parameters.keys())
args_list += [kwargs.get(k) for k in signature_args[len(args) :]]
args_list = [arg_to_str(i) for i in args_list]
key = prefix + ":" + ":".join(args_list)
key = key.replace(" ", "_")
return key
def decorator(func):
def wrapper(*args, **kwargs):
cache_key = get_key(func, *args, **kwargs)
result = cache.get(cache_key)
if result is not None:
return result
result = func(*args, **kwargs)
cache.set(cache_key, result, timeout)
return result
def dirty(*args, **kwargs):
cache_key = get_key(func, *args, **kwargs)
cache.delete(cache_key)
wrapper.dirty = dirty
return wrapper
return decorator

View file

@ -4,13 +4,15 @@ import os
from django.core.cache import cache from django.core.cache import cache
import hashlib import hashlib
from judge.caching import cache_wrapper
class CollabFilter: class CollabFilter:
DOT = "dot" DOT = "dot"
COSINE = "cosine" COSINE = "cosine"
# name = 'collab_filter' or 'collab_filter_time' # name = 'collab_filter' or 'collab_filter_time'
def __init__(self, name, **kwargs): def __init__(self, name):
embeddings = np.load( embeddings = np.load(
os.path.join(settings.ML_OUTPUT_PATH, name + "/embeddings.npz"), os.path.join(settings.ML_OUTPUT_PATH, name + "/embeddings.npz"),
allow_pickle=True, allow_pickle=True,
@ -20,6 +22,9 @@ class CollabFilter:
self.user_embeddings = embeddings[arr0] self.user_embeddings = embeddings[arr0]
self.problem_embeddings = embeddings[arr1] self.problem_embeddings = embeddings[arr1]
def __str__(self):
return self.name
def compute_scores(self, query_embedding, item_embeddings, measure=DOT): def compute_scores(self, query_embedding, item_embeddings, measure=DOT):
"""Computes the scores of the candidates given a query. """Computes the scores of the candidates given a query.
Args: Args:
@ -39,14 +44,9 @@ class CollabFilter:
scores = u.dot(V.T) scores = u.dot(V.T)
return scores return scores
def user_recommendations(self, user, problems, measure=DOT, limit=None, **kwargs): @cache_wrapper(prefix="user_recommendations", timeout=3600)
def user_recommendations(self, user, problems, measure=DOT, limit=None):
uid = user.id uid = user.id
problems_hash = hashlib.sha1(str(list(problems)).encode()).hexdigest()
cache_key = ":".join(map(str, [self.name, uid, measure, limit, problems_hash]))
value = cache.get(cache_key)
if value:
return value
if uid >= len(self.user_embeddings): if uid >= len(self.user_embeddings):
uid = 0 uid = 0
scores = self.compute_scores( scores = self.compute_scores(
@ -61,7 +61,6 @@ class CollabFilter:
res.sort(reverse=True, key=lambda x: x[0]) res.sort(reverse=True, key=lambda x: x[0])
res = res[:limit] res = res[:limit]
cache.set(cache_key, res, 3600)
return res return res
# return a list of pid # return a list of pid

View file

@ -8,7 +8,7 @@ from django.core.cache.utils import make_template_fragment_key
from django.db.models.signals import post_delete, post_save from django.db.models.signals import post_delete, post_save
from django.dispatch import receiver from django.dispatch import receiver
from .caching import finished_submission from judge.utils.problems import finished_submission
from .models import ( from .models import (
BlogPost, BlogPost,
Comment, Comment,

View file

@ -13,7 +13,7 @@ from django.utils.translation import gettext as _, gettext_noop
from judge.models import Problem, Submission from judge.models import Problem, Submission
from judge.ml.collab_filter import CollabFilter from judge.ml.collab_filter import CollabFilter
from judge.caching import cache_wrapper
__all__ = [ __all__ = [
"contest_completed_ids", "contest_completed_ids",
@ -57,10 +57,8 @@ def contest_completed_ids(participation):
return result return result
@cache_wrapper(prefix="user_complete", timeout=86400)
def user_completed_ids(profile): def user_completed_ids(profile):
key = "user_complete:%d" % profile.id
result = cache.get(key)
if result is None:
result = set( result = set(
Submission.objects.filter( Submission.objects.filter(
user=profile, result="AC", points=F("problem__points") user=profile, result="AC", points=F("problem__points")
@ -68,14 +66,11 @@ def user_completed_ids(profile):
.values_list("problem_id", flat=True) .values_list("problem_id", flat=True)
.distinct() .distinct()
) )
cache.set(key, result, 86400)
return result return result
@cache_wrapper(prefix="contest_attempted", timeout=86400)
def contest_attempted_ids(participation): def contest_attempted_ids(participation):
key = "contest_attempted:%s" % participation.id
result = cache.get(key)
if result is None:
result = { result = {
id: {"achieved_points": points, "max_points": max_points} id: {"achieved_points": points, "max_points": max_points}
for id, max_points, points in ( for id, max_points, points in (
@ -86,14 +81,11 @@ def contest_attempted_ids(participation):
.filter(points__lt=F("problem__points")) .filter(points__lt=F("problem__points"))
) )
} }
cache.set(key, result, 86400)
return result return result
@cache_wrapper(prefix="user_attempted", timeout=86400)
def user_attempted_ids(profile): def user_attempted_ids(profile):
key = "user_attempted:%s" % profile.id
result = cache.get(key)
if result is None:
result = { result = {
id: { id: {
"achieved_points": points, "achieved_points": points,
@ -111,7 +103,6 @@ def user_attempted_ids(profile):
.filter(points__lt=F("problem__points")) .filter(points__lt=F("problem__points"))
) )
} }
cache.set(key, result, 86400)
return result return result
@ -174,10 +165,8 @@ def editable_problems(user, profile=None):
return subquery return subquery
@cache_wrapper(prefix="hp", timeout=900)
def hot_problems(duration, limit): def hot_problems(duration, limit):
cache_key = "hot_problems:%d:%d" % (duration.total_seconds(), limit)
qs = cache.get(cache_key)
if qs is None:
qs = Problem.get_public_problems().filter( qs = Problem.get_public_problems().filter(
submission__date__gt=timezone.now() - duration submission__date__gt=timezone.now() - duration
) )
@ -222,10 +211,7 @@ def hot_problems(duration, limit):
ordering=ExpressionWrapper( ordering=ExpressionWrapper(
0.02 0.02
* F("points") * F("points")
* ( * (0.4 * F("ac_volume") / F("submission_volume") + 0.6 * F("ac_rate"))
0.4 * F("ac_volume") / F("submission_volume")
+ 0.6 * F("ac_rate")
)
+ 100 * e ** (F("unique_user_count") / mx), + 100 * e ** (F("unique_user_count") / mx),
output_field=FloatField(), output_field=FloatField(),
) )
@ -233,18 +219,13 @@ def hot_problems(duration, limit):
.order_by("-ordering") .order_by("-ordering")
.defer("description")[:limit] .defer("description")[:limit]
) )
cache.set(cache_key, qs, 900)
return qs return qs
@cache_wrapper(prefix="grp", timeout=26400)
def get_related_problems(profile, problem, limit=8): def get_related_problems(profile, problem, limit=8):
if not profile or not settings.ML_OUTPUT_PATH: if not profile or not settings.ML_OUTPUT_PATH:
return None return None
cache_key = "related_problems:%d:%d" % (profile.id, problem.id)
qs = cache.get(cache_key)
if qs is not None:
return qs
problemset = Problem.get_visible_problems(profile.user).values_list("id", flat=True) problemset = Problem.get_visible_problems(profile.user).values_list("id", flat=True)
problemset = problemset.exclude(id__in=user_completed_ids(profile)) problemset = problemset.exclude(id__in=user_completed_ids(profile))
problemset = problemset.exclude(id=problem.id) problemset = problemset.exclude(id=problem.id)
@ -254,8 +235,16 @@ def get_related_problems(profile, problem, limit=8):
) + cf_model.problem_neighbors(problem, problemset, CollabFilter.COSINE, limit) ) + cf_model.problem_neighbors(problem, problemset, CollabFilter.COSINE, limit)
results = list(set([i[1] for i in results])) results = list(set([i[1] for i in results]))
seed = datetime.now().strftime("%d%m%Y") seed = datetime.now().strftime("%d%m%Y")
random.Random(seed).shuffle(results) random.shuffle(results)
results = results[:limit] results = results[:limit]
results = [Problem.objects.get(id=i) for i in results] results = [Problem.objects.get(id=i) for i in results]
cache.set(cache_key, results, 21600)
return results return results
def finished_submission(sub):
keys = ["user_complete:%d" % sub.user_id, "user_attempted:%s" % sub.user_id]
if hasattr(sub, "contest"):
participation = sub.contest.participation
keys += ["contest_complete:%d" % participation.id]
keys += ["contest_attempted:%d" % participation.id]
cache.delete_many(keys)