Refactor problem feed code
This commit is contained in:
parent
b6c9ce4763
commit
0b4eeb8751
4 changed files with 134 additions and 67 deletions
|
@ -40,7 +40,10 @@ def cache_wrapper(prefix, timeout=None):
|
||||||
def _get(key):
|
def _get(key):
|
||||||
if not l0_cache:
|
if not l0_cache:
|
||||||
return cache.get(key)
|
return cache.get(key)
|
||||||
return l0_cache.get(key) or cache.get(key)
|
result = l0_cache.get(key)
|
||||||
|
if result is None:
|
||||||
|
result = cache.get(key)
|
||||||
|
return result
|
||||||
|
|
||||||
def _set_l0(key, value):
|
def _set_l0(key, value):
|
||||||
if l0_cache:
|
if l0_cache:
|
||||||
|
@ -56,7 +59,7 @@ def cache_wrapper(prefix, timeout=None):
|
||||||
result = _get(cache_key)
|
result = _get(cache_key)
|
||||||
if result is not None:
|
if result is not None:
|
||||||
_set_l0(cache_key, result)
|
_set_l0(cache_key, result)
|
||||||
if result == NONE_RESULT:
|
if type(result) == str and result == NONE_RESULT:
|
||||||
result = None
|
result = None
|
||||||
return result
|
return result
|
||||||
result = func(*args, **kwargs)
|
result = func(*args, **kwargs)
|
||||||
|
|
|
@ -1,7 +1,9 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from django.conf import settings
|
|
||||||
import os
|
import os
|
||||||
|
import hashlib
|
||||||
|
|
||||||
from django.core.cache import cache
|
from django.core.cache import cache
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
from judge.caching import cache_wrapper
|
from judge.caching import cache_wrapper
|
||||||
|
|
||||||
|
@ -12,14 +14,13 @@ class CollabFilter:
|
||||||
|
|
||||||
# name = 'collab_filter' or 'collab_filter_time'
|
# name = 'collab_filter' or 'collab_filter_time'
|
||||||
def __init__(self, name):
|
def __init__(self, name):
|
||||||
embeddings = np.load(
|
self.embeddings = np.load(
|
||||||
os.path.join(settings.ML_OUTPUT_PATH, name + "/embeddings.npz"),
|
os.path.join(settings.ML_OUTPUT_PATH, name + "/embeddings.npz"),
|
||||||
allow_pickle=True,
|
allow_pickle=True,
|
||||||
)
|
)
|
||||||
arr0, arr1 = embeddings.files
|
_, problem_arr = self.embeddings.files
|
||||||
self.name = name
|
self.name = name
|
||||||
self.user_embeddings = embeddings[arr0]
|
self.problem_embeddings = self.embeddings[problem_arr]
|
||||||
self.problem_embeddings = embeddings[arr1]
|
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return self.name
|
return self.name
|
||||||
|
@ -43,18 +44,32 @@ class CollabFilter:
|
||||||
scores = u.dot(V.T)
|
scores = u.dot(V.T)
|
||||||
return scores
|
return scores
|
||||||
|
|
||||||
|
def _get_embedding_version(self):
|
||||||
|
first_problem = self.problem_embeddings[0]
|
||||||
|
array_bytes = first_problem.tobytes()
|
||||||
|
hash_object = hashlib.sha256(array_bytes)
|
||||||
|
hash_bytes = hash_object.digest()
|
||||||
|
return hash_bytes.hex()[:5]
|
||||||
|
|
||||||
|
@cache_wrapper(prefix="CFgue", timeout=86400)
|
||||||
|
def _get_user_embedding(self, user_id, embedding_version):
|
||||||
|
user_arr, _ = self.embeddings.files
|
||||||
|
user_embeddings = self.embeddings[user_arr]
|
||||||
|
if user_id >= len(user_embeddings):
|
||||||
|
return user_embeddings[0]
|
||||||
|
return user_embeddings[user_id]
|
||||||
|
|
||||||
|
def get_user_embedding(self, user_id):
|
||||||
|
version = self._get_embedding_version()
|
||||||
|
return self._get_user_embedding(user_id, version)
|
||||||
|
|
||||||
@cache_wrapper(prefix="user_recommendations", timeout=3600)
|
@cache_wrapper(prefix="user_recommendations", timeout=3600)
|
||||||
def user_recommendations(self, user, problems, measure=DOT, limit=None):
|
def user_recommendations(self, user_id, problems, measure=DOT, limit=None):
|
||||||
uid = user.id
|
user_embedding = self.get_user_embedding(user_id)
|
||||||
if uid >= len(self.user_embeddings):
|
scores = self.compute_scores(user_embedding, self.problem_embeddings, measure)
|
||||||
uid = 0
|
|
||||||
scores = self.compute_scores(
|
|
||||||
self.user_embeddings[uid], self.problem_embeddings, measure
|
|
||||||
)
|
|
||||||
|
|
||||||
res = [] # [(score, problem)]
|
res = [] # [(score, problem)]
|
||||||
for pid in problems:
|
for pid in problems:
|
||||||
# pid = problem.id
|
|
||||||
if pid < len(scores):
|
if pid < len(scores):
|
||||||
res.append((scores[pid], pid))
|
res.append((scores[pid], pid))
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from math import e
|
from math import e
|
||||||
from datetime import datetime
|
from datetime import datetime, timedelta
|
||||||
import random
|
import random
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.core.cache import cache
|
from django.core.cache import cache
|
||||||
|
@ -9,6 +10,7 @@ from django.db.models import Case, Count, ExpressionWrapper, F, Max, Q, When
|
||||||
from django.db.models.fields import FloatField
|
from django.db.models.fields import FloatField
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
from django.utils.translation import gettext as _, gettext_noop
|
from django.utils.translation import gettext as _, gettext_noop
|
||||||
|
from django.http import Http404
|
||||||
|
|
||||||
from judge.models import Problem, Submission
|
from judge.models import Problem, Submission
|
||||||
from judge.ml.collab_filter import CollabFilter
|
from judge.ml.collab_filter import CollabFilter
|
||||||
|
@ -248,3 +250,72 @@ def finished_submission(sub):
|
||||||
keys += ["contest_complete:%d" % participation.id]
|
keys += ["contest_complete:%d" % participation.id]
|
||||||
keys += ["contest_attempted:%d" % participation.id]
|
keys += ["contest_attempted:%d" % participation.id]
|
||||||
cache.delete_many(keys)
|
cache.delete_many(keys)
|
||||||
|
|
||||||
|
|
||||||
|
class RecommendationType(Enum):
|
||||||
|
HOT_PROBLEM = 1
|
||||||
|
CF_DOT = 2
|
||||||
|
CF_COSINE = 3
|
||||||
|
CF_TIME_DOT = 4
|
||||||
|
CF_TIME_COSINE = 5
|
||||||
|
|
||||||
|
|
||||||
|
# Return a list of list. Each inner list correspond to each type in types
|
||||||
|
def get_user_recommended_problems(
|
||||||
|
user_id,
|
||||||
|
problem_ids,
|
||||||
|
recommendation_types,
|
||||||
|
limits,
|
||||||
|
shuffle=False,
|
||||||
|
):
|
||||||
|
cf_model = CollabFilter("collab_filter")
|
||||||
|
cf_time_model = CollabFilter("collab_filter_time")
|
||||||
|
|
||||||
|
def get_problem_ids_from_type(rec_type, limit):
|
||||||
|
if type(rec_type) == int:
|
||||||
|
try:
|
||||||
|
rec_type = RecommendationType(rec_type)
|
||||||
|
except ValueError:
|
||||||
|
raise Http404()
|
||||||
|
if rec_type == RecommendationType.HOT_PROBLEM:
|
||||||
|
return [
|
||||||
|
problem.id
|
||||||
|
for problem in hot_problems(timedelta(days=7), limit)
|
||||||
|
if problem.id in set(problem_ids)
|
||||||
|
]
|
||||||
|
if rec_type == RecommendationType.CF_DOT:
|
||||||
|
return cf_model.user_recommendations(
|
||||||
|
user_id, problem_ids, cf_model.DOT, limit
|
||||||
|
)
|
||||||
|
if rec_type == RecommendationType.CF_COSINE:
|
||||||
|
return cf_model.user_recommendations(
|
||||||
|
user_id, problem_ids, cf_model.COSINE, limit
|
||||||
|
)
|
||||||
|
if rec_type == RecommendationType.CF_TIME_DOT:
|
||||||
|
return cf_time_model.user_recommendations(
|
||||||
|
user_id, problem_ids, cf_model.DOT, limit
|
||||||
|
)
|
||||||
|
if rec_type == RecommendationType.CF_TIME_COSINE:
|
||||||
|
return cf_time_model.user_recommendations(
|
||||||
|
user_id, problem_ids, cf_model.COSINE, limit
|
||||||
|
)
|
||||||
|
return []
|
||||||
|
|
||||||
|
all_problems = []
|
||||||
|
for rec_type, limit in zip(recommendation_types, limits):
|
||||||
|
all_problems += get_problem_ids_from_type(rec_type, limit)
|
||||||
|
if shuffle:
|
||||||
|
seed = datetime.now().strftime("%d%m%Y")
|
||||||
|
random.Random(seed).shuffle(all_problems)
|
||||||
|
|
||||||
|
# deduplicate problems
|
||||||
|
res = []
|
||||||
|
used_pid = set()
|
||||||
|
|
||||||
|
for obj in all_problems:
|
||||||
|
if type(obj) == tuple:
|
||||||
|
obj = obj[1]
|
||||||
|
if obj not in used_pid:
|
||||||
|
res.append(obj)
|
||||||
|
used_pid.add(obj)
|
||||||
|
return res
|
||||||
|
|
|
@ -1,10 +1,8 @@
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
from datetime import timedelta, datetime
|
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
from random import randrange
|
from random import randrange
|
||||||
import random
|
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
|
|
||||||
from django.core.cache import cache
|
from django.core.cache import cache
|
||||||
|
@ -77,6 +75,8 @@ from judge.utils.problems import (
|
||||||
user_attempted_ids,
|
user_attempted_ids,
|
||||||
user_completed_ids,
|
user_completed_ids,
|
||||||
get_related_problems,
|
get_related_problems,
|
||||||
|
get_user_recommended_problems,
|
||||||
|
RecommendationType,
|
||||||
)
|
)
|
||||||
from judge.utils.strings import safe_float_or_none, safe_int_or_none
|
from judge.utils.strings import safe_float_or_none, safe_int_or_none
|
||||||
from judge.utils.tickets import own_ticket_filter
|
from judge.utils.tickets import own_ticket_filter
|
||||||
|
@ -834,24 +834,34 @@ class ProblemFeed(ProblemList, FeedView):
|
||||||
title = _("Problem feed")
|
title = _("Problem feed")
|
||||||
feed_type = None
|
feed_type = None
|
||||||
|
|
||||||
# arr = [[], [], ..]
|
def get_recommended_problem_ids(self, queryset):
|
||||||
def merge_recommendation(self, arr):
|
user_id = self.request.profile.id
|
||||||
seed = datetime.now().strftime("%d%m%Y")
|
problem_ids = queryset.values_list("id", flat=True)
|
||||||
merged_array = []
|
rec_types = [
|
||||||
for a in arr:
|
RecommendationType.CF_DOT,
|
||||||
merged_array += a
|
RecommendationType.CF_COSINE,
|
||||||
random.Random(seed).shuffle(merged_array)
|
RecommendationType.CF_TIME_DOT,
|
||||||
|
RecommendationType.CF_TIME_COSINE,
|
||||||
|
RecommendationType.HOT_PROBLEM,
|
||||||
|
]
|
||||||
|
limits = [100, 100, 100, 100, 20]
|
||||||
|
shuffle = True
|
||||||
|
|
||||||
res = []
|
allow_debug_type = (
|
||||||
used_pid = set()
|
self.request.user.is_impersonate or self.request.user.is_superuser
|
||||||
|
)
|
||||||
|
if allow_debug_type and "debug_type" in self.request.GET:
|
||||||
|
try:
|
||||||
|
debug_type = int(self.request.GET.get("debug_type"))
|
||||||
|
except ValueError:
|
||||||
|
raise Http404()
|
||||||
|
rec_types = [debug_type]
|
||||||
|
limits = [100]
|
||||||
|
shuffle = False
|
||||||
|
|
||||||
for obj in merged_array:
|
return get_user_recommended_problems(
|
||||||
if type(obj) == tuple:
|
user_id, problem_ids, rec_types, limits, shuffle
|
||||||
obj = obj[1]
|
)
|
||||||
if obj not in used_pid:
|
|
||||||
res.append(obj)
|
|
||||||
used_pid.add(obj)
|
|
||||||
return res
|
|
||||||
|
|
||||||
def get_queryset(self):
|
def get_queryset(self):
|
||||||
if self.feed_type == "volunteer":
|
if self.feed_type == "volunteer":
|
||||||
|
@ -885,40 +895,8 @@ class ProblemFeed(ProblemList, FeedView):
|
||||||
if not settings.ML_OUTPUT_PATH or not user:
|
if not settings.ML_OUTPUT_PATH or not user:
|
||||||
return queryset.order_by("?").add_i18n_name(self.request.LANGUAGE_CODE)
|
return queryset.order_by("?").add_i18n_name(self.request.LANGUAGE_CODE)
|
||||||
|
|
||||||
cf_model = CollabFilter("collab_filter")
|
q = self.get_recommended_problem_ids(queryset)
|
||||||
cf_time_model = CollabFilter("collab_filter_time")
|
|
||||||
|
|
||||||
queryset = queryset.values_list("id", flat=True)
|
|
||||||
hot_problems_recommendations = [
|
|
||||||
problem.id
|
|
||||||
for problem in hot_problems(timedelta(days=7), 20)
|
|
||||||
if problem.id in set(queryset)
|
|
||||||
]
|
|
||||||
|
|
||||||
q = self.merge_recommendation(
|
|
||||||
[
|
|
||||||
cf_model.user_recommendations(user, queryset, cf_model.DOT, 100),
|
|
||||||
cf_model.user_recommendations(
|
|
||||||
user,
|
|
||||||
queryset,
|
|
||||||
cf_model.COSINE,
|
|
||||||
100,
|
|
||||||
),
|
|
||||||
cf_time_model.user_recommendations(
|
|
||||||
user,
|
|
||||||
queryset,
|
|
||||||
cf_time_model.COSINE,
|
|
||||||
100,
|
|
||||||
),
|
|
||||||
cf_time_model.user_recommendations(
|
|
||||||
user,
|
|
||||||
queryset,
|
|
||||||
cf_time_model.DOT,
|
|
||||||
100,
|
|
||||||
),
|
|
||||||
hot_problems_recommendations,
|
|
||||||
]
|
|
||||||
)
|
|
||||||
queryset = Problem.objects.filter(id__in=q)
|
queryset = Problem.objects.filter(id__in=q)
|
||||||
queryset = queryset.add_i18n_name(self.request.LANGUAGE_CODE)
|
queryset = queryset.add_i18n_name(self.request.LANGUAGE_CODE)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue