Optimize problem feed DB query

This commit is contained in:
cuom1999 2022-10-28 21:29:48 -05:00
parent cffb76e220
commit 45b844d6c9
6 changed files with 20 additions and 47 deletions

View file

@ -58,6 +58,7 @@ class UserRoom(models.Model):
class Meta:
unique_together = ("user", "room")
class Ignore(models.Model):
user = models.OneToOneField(
Profile,

View file

@ -1,14 +0,0 @@
import time
def timeit(method):
def timed(*args, **kw):
ts = time.time()
result = method(*args, **kw)
te = time.time()
if "log_time" in kw:
name = kw.get("log_name", method.__name__.upper())
kw["log_time"][name] = int((te - ts) * 1000)
return result
return timed

View file

@ -1090,6 +1090,7 @@ urlpatterns = [
]
),
),
# url("__debug__/", include("debug_toolbar.urls")), # debug_toolbar use
] + url_static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT)
favicon_paths = [

View file

@ -1,7 +1,6 @@
import numpy as np
from django.conf import settings
import os
from dmoj.decorators import timeit
class CollabFilter:
@ -9,7 +8,6 @@ class CollabFilter:
COSINE = "cosine"
# name = 'collab_filter' or 'collab_filter_time'
@timeit
def __init__(self, name, **kwargs):
embeddings = np.load(
os.path.join(settings.ML_OUTPUT_PATH, name + "/embeddings.npz"),
@ -38,7 +36,6 @@ class CollabFilter:
scores = u.dot(V.T)
return scores
@timeit
def user_recommendations(self, user, problems, measure=DOT, limit=None, **kwargs):
uid = user.id
if uid >= len(self.user_embeddings):
@ -48,10 +45,10 @@ class CollabFilter:
)
res = [] # [(score, problem)]
for problem in problems:
pid = problem.id
for pid in problems:
# pid = problem.id
if pid < len(scores):
res.append((scores[pid], problem))
res.append((scores[pid], pid))
res.sort(reverse=True, key=lambda x: x[0])
return res[:limit]

View file

@ -115,7 +115,6 @@ class ProblemMixin(object):
try:
return super(ProblemMixin, self).get(request, *args, **kwargs)
except Http404 as e:
print(e)
return self.no_such_problem()
@ -558,7 +557,6 @@ class ProblemList(QueryStringSortMixin, TitleMixin, SolvedProblemMixin, ListView
)
if self.org_query:
self.org_query = self.get_org_query(self.org_query)
print(self.org_query)
queryset = queryset.filter(
Q(organizations__in=self.org_query)
| Q(contests__contest__organizations__in=self.org_query)
@ -782,9 +780,6 @@ class ProblemList(QueryStringSortMixin, TitleMixin, SolvedProblemMixin, ListView
return HttpResponseRedirect(request.get_full_path())
cf_logger = logging.getLogger("judge.ml.collab_filter")
class ProblemFeed(ProblemList):
model = Problem
context_object_name = "problems"
@ -859,56 +854,49 @@ class ProblemFeed(ProblemList):
if not settings.ML_OUTPUT_PATH or not user:
return queryset.order_by("?")
# Logging
log_data = {
"user": self.request.user.username,
"cf": {
"dot": {},
"cosine": {},
},
"cf_time": {"dot": {}, "cosine": {}},
}
cf_model = CollabFilter("collab_filter")
cf_time_model = CollabFilter("collab_filter_time")
cf_model = CollabFilter("collab_filter", log_time=log_data["cf"])
cf_time_model = CollabFilter("collab_filter_time", log_time=log_data["cf_time"])
queryset = queryset.values_list("id", flat=True)
hot_problems_recommendations = [
problem
problem.id
for problem in hot_problems(timedelta(days=7), 20)
if problem in queryset
if problem.id in set(queryset)
]
q = self.merge_recommendation(
[
cf_model.user_recommendations(
user, queryset, cf_model.DOT, 100, log_time=log_data["cf"]["dot"]
),
cf_model.user_recommendations(user, queryset, cf_model.DOT, 100),
cf_model.user_recommendations(
user,
queryset,
cf_model.COSINE,
100,
log_time=log_data["cf"]["cosine"],
),
cf_time_model.user_recommendations(
user,
queryset,
cf_time_model.COSINE,
100,
log_time=log_data["cf_time"]["cosine"],
),
cf_time_model.user_recommendations(
user,
queryset,
cf_time_model.DOT,
100,
log_time=log_data["cf_time"]["dot"],
),
hot_problems_recommendations,
]
)
queryset = Problem.objects.filter(id__in=q)
queryset = queryset.add_i18n_name(self.request.LANGUAGE_CODE)
cf_logger.info(log_data)
return q
# Reorder results from database to correct positions
res = [None for _ in range(len(q))]
position_in_q = {i: idx for idx, i in enumerate(q)}
for problem in queryset:
res[position_in_q[problem.id]] = problem
return res
def get_context_data(self, **kwargs):
context = super(ProblemFeed, self).get_context_data(**kwargs)

View file

@ -26,7 +26,7 @@
<div class="blog-box">
<h3 class="problem-feed-name">
<a href="{{ url('problem_detail', problem.code) }}">
{{ problem.name }}
{{ problem.i18n_name }}
</a>
{% if problem.id in completed_problem_ids %}
<i class="solved-problem-color fa fa-check-circle"></i>