Add ML to problem feed
This commit is contained in:
parent
34523ab53f
commit
2fe571379c
8 changed files with 157 additions and 6 deletions
|
@ -15,3 +15,4 @@ class Command(BaseCommand):
|
|||
judge.name = options['name']
|
||||
judge.auth_key = options['auth_key']
|
||||
judge.save()
|
||||
|
||||
|
|
49
judge/management/commands/generate_data.py
Normal file
49
judge/management/commands/generate_data.py
Normal file
|
@ -0,0 +1,49 @@
|
|||
from django.core.management.base import BaseCommand
|
||||
from judge.models import *
|
||||
from collections import defaultdict
|
||||
import csv
|
||||
import os
|
||||
from django.conf import settings
|
||||
|
||||
|
||||
def gen_submissions():
|
||||
headers = ['uid', 'pid']
|
||||
with open(os.path.join(settings.ML_DATA_PATH, 'submissions.csv'), 'w') as csvfile:
|
||||
f = csv.writer(csvfile)
|
||||
f.writerow(headers)
|
||||
|
||||
last_pid = defaultdict(int)
|
||||
for u in Profile.objects.all():
|
||||
used = set()
|
||||
print('Processing user', u.id)
|
||||
for s in Submission.objects.filter(user=u).order_by('-date'):
|
||||
if s.problem.id not in used:
|
||||
used.add(s.problem.id)
|
||||
f.writerow([u.id, s.problem.id])
|
||||
|
||||
def gen_users():
|
||||
headers = ['uid', 'username', 'rating', 'points']
|
||||
with open(os.path.join(settings.ML_DATA_PATH, 'profiles.csv'), 'w') as csvfile:
|
||||
f = csv.writer(csvfile)
|
||||
f.writerow(headers)
|
||||
|
||||
for u in Profile.objects.all():
|
||||
f.writerow([u.id, u.username, u.rating, u.performance_points])
|
||||
|
||||
def gen_problems():
|
||||
headers = ['pid', 'code', 'name', 'points', 'url']
|
||||
with open(os.path.join(settings.ML_DATA_PATH, 'problems.csv'), 'w') as csvfile:
|
||||
f = csv.writer(csvfile)
|
||||
f.writerow(headers)
|
||||
|
||||
for p in Problem.objects.all():
|
||||
f.writerow([p.id, p.code, p.name, p.points, 'lqdoj.edu.vn/problem/' + p.code])
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = 'generate data for ML'
|
||||
|
||||
def handle(self, *args, **options):
|
||||
gen_users()
|
||||
gen_problems()
|
||||
gen_submissions()
|
64
judge/ml/collab_filter.py
Normal file
64
judge/ml/collab_filter.py
Normal file
|
@ -0,0 +1,64 @@
|
|||
import numpy as np
|
||||
from django.conf import settings
|
||||
import os
|
||||
|
||||
|
||||
class CollabFilter:
|
||||
DOT = 'dot'
|
||||
COSINE = 'cosine'
|
||||
def __init__(self):
|
||||
embeddings = np.load(os.path.join(settings.ML_OUTPUT_PATH, 'collab_filter/embeddings.npz'),
|
||||
allow_pickle=True)
|
||||
arr0, arr1 = embeddings.files
|
||||
self.user_embeddings = embeddings[arr0]
|
||||
self.problem_embeddings = embeddings[arr1]
|
||||
|
||||
def compute_scores(self, query_embedding, item_embeddings, measure=DOT):
|
||||
"""Computes the scores of the candidates given a query.
|
||||
Args:
|
||||
query_embedding: a vector of shape [k], representing the query embedding.
|
||||
item_embeddings: a matrix of shape [N, k], such that row i is the embedding
|
||||
of item i.
|
||||
measure: a string specifying the similarity measure to be used. Can be
|
||||
either DOT or COSINE.
|
||||
Returns:
|
||||
scores: a vector of shape [N], such that scores[i] is the score of item i.
|
||||
"""
|
||||
u = query_embedding
|
||||
V = item_embeddings
|
||||
if measure == self.COSINE:
|
||||
V = V / np.linalg.norm(V, axis=1, keepdims=True)
|
||||
u = u / np.linalg.norm(u)
|
||||
scores = u.dot(V.T)
|
||||
return scores
|
||||
|
||||
def user_recommendations(self, user, problems, measure=DOT, limit=None):
|
||||
uid = user.id
|
||||
if uid >= len(self.user_embeddings):
|
||||
uid = 0
|
||||
scores = self.compute_scores(
|
||||
self.user_embeddings[uid], self.problem_embeddings, measure)
|
||||
|
||||
res = [] # [(score, problem)]
|
||||
for problem in problems:
|
||||
pid = problem.id
|
||||
if pid < len(scores):
|
||||
res.append((scores[pid], problem))
|
||||
|
||||
res.sort(reverse=True)
|
||||
return res[:limit]
|
||||
|
||||
|
||||
# return a list of pid
|
||||
def problems_neighbors(self, problem, problemset, measure=DOT, limit=None):
|
||||
pid = problem.id
|
||||
if pid >= len(self.problem_embeddings):
|
||||
return None
|
||||
scores = self.compute_scores(
|
||||
self.problem_embeddings[pid], self.problem_embeddings, measure)
|
||||
res = []
|
||||
for p in problemset:
|
||||
if p.id < len(scores):
|
||||
res.append((scores[p.id], p))
|
||||
res.sort(reverse=True)
|
||||
return res[:limit]
|
|
@ -115,7 +115,7 @@ def hot_problems(duration, limit):
|
|||
qs = cache.get(cache_key)
|
||||
if qs is None:
|
||||
qs = Problem.get_public_problems() \
|
||||
.filter(submission__date__gt=timezone.now() - duration, points__gt=3, points__lt=25)
|
||||
.filter(submission__date__gt=timezone.now() - duration)
|
||||
qs0 = qs.annotate(k=Count('submission__user', distinct=True)).order_by('-k').values_list('k', flat=True)
|
||||
|
||||
if not qs0:
|
||||
|
@ -141,7 +141,7 @@ def hot_problems(duration, limit):
|
|||
qs = qs.filter(unique_user_count__gt=max(mx / 3.0, 1))
|
||||
|
||||
qs = qs.annotate(ordering=ExpressionWrapper(
|
||||
0.5 * F('points') * (0.4 * F('ac_volume') / F('submission_volume') + 0.6 * F('ac_rate')) +
|
||||
0.02 * F('points') * (0.4 * F('ac_volume') / F('submission_volume') + 0.6 * F('ac_rate')) +
|
||||
100 * e ** (F('unique_user_count') / mx), output_field=FloatField(),
|
||||
)).order_by('-ordering').defer('description')[:limit]
|
||||
|
||||
|
|
|
@ -39,6 +39,7 @@ from judge.utils.strings import safe_float_or_none, safe_int_or_none
|
|||
from judge.utils.tickets import own_ticket_filter
|
||||
from judge.utils.views import QueryStringSortMixin, SingleObjectFormView, TitleMixin, generic_message
|
||||
from judge.views.blog import FeedView
|
||||
from judge.ml.collab_filter import CollabFilter
|
||||
|
||||
|
||||
def get_contest_problem(problem, profile):
|
||||
|
@ -611,10 +612,45 @@ class ProblemFeed(FeedView):
|
|||
.values_list('problem__id', flat=True))
|
||||
return queryset.distinct()
|
||||
|
||||
# arr = [[], [], ..]
|
||||
def merge_recommendation(self, arr):
|
||||
idx = [0] * len(arr)
|
||||
stop = False
|
||||
res = []
|
||||
used_pid = set()
|
||||
cnt = 0
|
||||
while not stop:
|
||||
cnt += 1
|
||||
stop = True
|
||||
for i in range(len(arr)):
|
||||
if idx[i] < len(arr[i]):
|
||||
obj = arr[i][idx[i]]
|
||||
if type(obj) == tuple:
|
||||
obj = obj[1]
|
||||
elif cnt % 3 != 0: # hot problems appear less
|
||||
continue
|
||||
if obj not in used_pid:
|
||||
res.append(obj)
|
||||
used_pid.add(obj)
|
||||
idx[i] += 1
|
||||
stop = False
|
||||
return res
|
||||
|
||||
|
||||
def get_queryset(self):
|
||||
queryset = self.get_unsolved_queryset()
|
||||
user = self.request.profile
|
||||
if not settings.ML_OUTPUT_PATH or not user:
|
||||
return queryset.order_by('?')
|
||||
|
||||
cl_model = CollabFilter()
|
||||
dot_rec = cl_model.user_recommendations(user, queryset, cl_model.DOT, 100)
|
||||
cosine_rec = cl_model.user_recommendations(user, queryset, cl_model.COSINE, 100)
|
||||
hot_problems_rec = hot_problems(timedelta(days=7), 10)
|
||||
|
||||
q = self.merge_recommendation([dot_rec, cosine_rec, hot_problems_rec])
|
||||
return q
|
||||
|
||||
def get_context_data(self, **kwargs):
|
||||
context = super(ProblemFeed, self).get_context_data(**kwargs)
|
||||
context['first_page_href'] = self.request.path
|
||||
|
|
|
@ -36,3 +36,4 @@ redis
|
|||
lupa
|
||||
websocket-client
|
||||
python-memcached
|
||||
numpy
|
|
@ -77,7 +77,7 @@
|
|||
})
|
||||
$('.blog-description').each(function() {
|
||||
if ($(this).prop('scrollHeight') > $(this).height() ) {
|
||||
$(this).parent().css('background-image', '-webkit-linear-gradient(bottom, lightgray, lightgray 3%, transparent 8%, transparent 100%)');
|
||||
$(this).parent().css('background-image', '-webkit-linear-gradient(bottom, gray, lightgray 3%, transparent 8%, transparent 100%)');
|
||||
$(this).parent().css('padding-bottom', '0');
|
||||
$(this).css('cursor', 'pointer');
|
||||
}
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
<i class="fa fa-tag"></i>
|
||||
{% for type in problem.types_list %}
|
||||
<span class="type-tag">{{ type }}</span>{% if not loop.last %}, {% endif %}
|
||||
{% endfor %}
|
||||
{% endfor %}, {{problem.points | int}}
|
||||
</div>
|
||||
{% endif %}
|
||||
<div class='blog-description content-description'>
|
||||
|
|
Loading…
Reference in a new issue