Add ML to problem feed

2022-04-11 21:18:01 -05:00 · 2022-04-11 21:18:01 -05:00 · 2fe571379c
commit 2fe571379c
parent 34523ab53f
8 changed files with 157 additions and 6 deletions
--- a/judge/management/commands/addjudge.py
+++ b/judge/management/commands/addjudge.py
@ -15,3 +15,4 @@ class Command(BaseCommand):
        judge.name = options['name']
        judge.auth_key = options['auth_key']
        judge.save()
--- a/judge/management/commands/generate_data.py
+++ b/judge/management/commands/generate_data.py
@ -0,0 +1,49 @@
 from django.core.management.base import BaseCommand
 from judge.models import *
 from collections import defaultdict
 import csv
 import os
 from django.conf import settings
 def gen_submissions():
    headers = ['uid', 'pid']
    with open(os.path.join(settings.ML_DATA_PATH, 'submissions.csv'), 'w') as csvfile:
        f = csv.writer(csvfile)
        f.writerow(headers)
        last_pid = defaultdict(int)
        for u in Profile.objects.all():
            used = set()
            print('Processing user', u.id)
            for s in Submission.objects.filter(user=u).order_by('-date'):
                if s.problem.id not in used:
                    used.add(s.problem.id)
                    f.writerow([u.id, s.problem.id])
 def gen_users():
    headers = ['uid', 'username', 'rating', 'points']
    with open(os.path.join(settings.ML_DATA_PATH, 'profiles.csv'), 'w') as csvfile:
        f = csv.writer(csvfile)
        f.writerow(headers)
        for u in Profile.objects.all():
            f.writerow([u.id, u.username, u.rating, u.performance_points])
 def gen_problems():
    headers = ['pid', 'code', 'name', 'points', 'url']
    with open(os.path.join(settings.ML_DATA_PATH, 'problems.csv'), 'w') as csvfile:
        f = csv.writer(csvfile)
        f.writerow(headers)
        for p in Problem.objects.all():
            f.writerow([p.id, p.code, p.name, p.points, 'lqdoj.edu.vn/problem/' + p.code])
 class Command(BaseCommand):
    help = 'generate data for ML'
    def handle(self, *args, **options):
        gen_users()
        gen_problems()
        gen_submissions()
--- a/judge/ml/collab_filter.py
+++ b/judge/ml/collab_filter.py
@ -0,0 +1,64 @@
 import numpy as np
 from django.conf import settings
 import os
 class CollabFilter:
    DOT = 'dot'
    COSINE = 'cosine'
    def __init__(self):
        embeddings = np.load(os.path.join(settings.ML_OUTPUT_PATH, 'collab_filter/embeddings.npz'),
            allow_pickle=True)
        arr0, arr1 = embeddings.files
        self.user_embeddings = embeddings[arr0]
        self.problem_embeddings = embeddings[arr1]
    def compute_scores(self, query_embedding, item_embeddings, measure=DOT):
        """Computes the scores of the candidates given a query.
        Args:
        query_embedding: a vector of shape [k], representing the query embedding.
        item_embeddings: a matrix of shape [N, k], such that row i is the embedding
            of item i.
        measure: a string specifying the similarity measure to be used. Can be
            either DOT or COSINE.
        Returns:
        scores: a vector of shape [N], such that scores[i] is the score of item i.
        """
        u = query_embedding
        V = item_embeddings
        if measure == self.COSINE:
            V = V / np.linalg.norm(V, axis=1, keepdims=True)
            u = u / np.linalg.norm(u)
        scores = u.dot(V.T)
        return scores
    def user_recommendations(self, user, problems, measure=DOT, limit=None):
        uid = user.id
        if uid >= len(self.user_embeddings):
            uid = 0
        scores = self.compute_scores(
            self.user_embeddings[uid], self.problem_embeddings, measure)
        res = [] # [(score, problem)]
        for problem in problems:
            pid = problem.id
            if pid < len(scores):
                res.append((scores[pid], problem))
        res.sort(reverse=True)
        return res[:limit]
    # return a list of pid
    def problems_neighbors(self, problem, problemset, measure=DOT, limit=None):
        pid = problem.id
        if pid >= len(self.problem_embeddings):
            return None
        scores = self.compute_scores(
            self.problem_embeddings[pid], self.problem_embeddings, measure)
        res = []
        for p in problemset:
            if p.id < len(scores):
                res.append((scores[p.id], p))
        res.sort(reverse=True)
        return res[:limit]
--- a/judge/utils/problems.py
+++ b/judge/utils/problems.py
@ -115,7 +115,7 @@ def hot_problems(duration, limit):
    qs = cache.get(cache_key)
    if qs is None:
        qs = Problem.get_public_problems() \
-                    .filter(submission__date__gt=timezone.now() - duration, points__gt=3, points__lt=25)
+                    .filter(submission__date__gt=timezone.now() - duration)
        qs0 = qs.annotate(k=Count('submission__user', distinct=True)).order_by('-k').values_list('k', flat=True)
        if not qs0:
@ -141,7 +141,7 @@ def hot_problems(duration, limit):
        qs = qs.filter(unique_user_count__gt=max(mx / 3.0, 1))
        qs = qs.annotate(ordering=ExpressionWrapper(
-            0.5 * F('points') * (0.4 * F('ac_volume') / F('submission_volume') + 0.6 * F('ac_rate')) +
+            0.02 * F('points') * (0.4 * F('ac_volume') / F('submission_volume') + 0.6 * F('ac_rate')) +
            100 * e ** (F('unique_user_count') / mx), output_field=FloatField(),
        )).order_by('-ordering').defer('description')[:limit]
--- a/judge/views/problem.py
+++ b/judge/views/problem.py
@ -39,6 +39,7 @@ from judge.utils.strings import safe_float_or_none, safe_int_or_none
 from judge.utils.tickets import own_ticket_filter
 from judge.utils.views import QueryStringSortMixin, SingleObjectFormView, TitleMixin, generic_message
 from judge.views.blog import FeedView
 from judge.ml.collab_filter import CollabFilter
 def get_contest_problem(problem, profile):
@ -611,10 +612,45 @@ class ProblemFeed(FeedView):
                                        .values_list('problem__id', flat=True))
        return queryset.distinct()
    # arr = [[], [], ..]
    def merge_recommendation(self, arr): 
        idx = [0] * len(arr)
        stop = False
        res = []
        used_pid = set()
        cnt = 0
        while not stop:
            cnt += 1
            stop = True
            for i in range(len(arr)):
                if idx[i] < len(arr[i]):
                    obj = arr[i][idx[i]]
                    if type(obj) == tuple:
                        obj = obj[1]
                    elif cnt % 3 != 0: # hot problems appear less
                        continue
                    if obj not in used_pid:
                        res.append(obj)
                        used_pid.add(obj)
                    idx[i] += 1
                    stop = False
        return res
    def get_queryset(self):
        queryset = self.get_unsolved_queryset()
        user = self.request.profile
        if not settings.ML_OUTPUT_PATH or not user:
            return queryset.order_by('?')
        cl_model = CollabFilter()
        dot_rec = cl_model.user_recommendations(user, queryset, cl_model.DOT, 100)
        cosine_rec = cl_model.user_recommendations(user, queryset, cl_model.COSINE, 100)
        hot_problems_rec = hot_problems(timedelta(days=7), 10)
        q = self.merge_recommendation([dot_rec, cosine_rec, hot_problems_rec])
        return q
    def get_context_data(self, **kwargs):
        context = super(ProblemFeed, self).get_context_data(**kwargs)
        context['first_page_href'] = self.request.path
--- a/requirements.txt
+++ b/requirements.txt
@ -36,3 +36,4 @@ redis
 lupa
 websocket-client
 python-memcached
 numpy
--- a/templates/blog/list.html
+++ b/templates/blog/list.html
@ -77,7 +77,7 @@
            })
            $('.blog-description').each(function() {
                if ($(this).prop('scrollHeight') > $(this).height() ) {
-                    $(this).parent().css('background-image', '-webkit-linear-gradient(bottom, lightgray, lightgray 3%, transparent 8%, transparent 100%)');
+                    $(this).parent().css('background-image', '-webkit-linear-gradient(bottom, gray, lightgray 3%, transparent 8%, transparent 100%)');
                    $(this).parent().css('padding-bottom', '0');
                    $(this).css('cursor', 'pointer');
                }
--- a/templates/problem/feed.html
+++ b/templates/problem/feed.html
@ -17,7 +17,7 @@
            <i class="fa fa-tag"></i>
            {% for type in problem.types_list %}
                <span class="type-tag">{{ type }}</span>{% if not loop.last %}, {% endif %}
-            {% endfor %}
+            {% endfor %}, {{problem.points | int}}
        </div>
    {% endif %}
    <div class='blog-description content-description'>