Add ML to problem feed

This commit is contained in:
cuom1999 2022-04-11 21:18:01 -05:00
parent 34523ab53f
commit 2fe571379c
8 changed files with 157 additions and 6 deletions

64
judge/ml/collab_filter.py Normal file
View file

@ -0,0 +1,64 @@
import numpy as np
from django.conf import settings
import os
class CollabFilter:
DOT = 'dot'
COSINE = 'cosine'
def __init__(self):
embeddings = np.load(os.path.join(settings.ML_OUTPUT_PATH, 'collab_filter/embeddings.npz'),
allow_pickle=True)
arr0, arr1 = embeddings.files
self.user_embeddings = embeddings[arr0]
self.problem_embeddings = embeddings[arr1]
def compute_scores(self, query_embedding, item_embeddings, measure=DOT):
"""Computes the scores of the candidates given a query.
Args:
query_embedding: a vector of shape [k], representing the query embedding.
item_embeddings: a matrix of shape [N, k], such that row i is the embedding
of item i.
measure: a string specifying the similarity measure to be used. Can be
either DOT or COSINE.
Returns:
scores: a vector of shape [N], such that scores[i] is the score of item i.
"""
u = query_embedding
V = item_embeddings
if measure == self.COSINE:
V = V / np.linalg.norm(V, axis=1, keepdims=True)
u = u / np.linalg.norm(u)
scores = u.dot(V.T)
return scores
def user_recommendations(self, user, problems, measure=DOT, limit=None):
uid = user.id
if uid >= len(self.user_embeddings):
uid = 0
scores = self.compute_scores(
self.user_embeddings[uid], self.problem_embeddings, measure)
res = [] # [(score, problem)]
for problem in problems:
pid = problem.id
if pid < len(scores):
res.append((scores[pid], problem))
res.sort(reverse=True)
return res[:limit]
# return a list of pid
def problems_neighbors(self, problem, problemset, measure=DOT, limit=None):
pid = problem.id
if pid >= len(self.problem_embeddings):
return None
scores = self.compute_scores(
self.problem_embeddings[pid], self.problem_embeddings, measure)
res = []
for p in problemset:
if p.id < len(scores):
res.append((scores[p.id], p))
res.sort(reverse=True)
return res[:limit]