The WALS algorithm requires periodic updates of its latent factor matrices. Here’s how to perform a standard update:
from implicit.als import AlternatingLeastSquaresimport torch.nn as nnclass HybridRecoModel(nn.Module): def init(self, wals_factors_dim=50, roberta_dim=768): super().init() self.wals_proj = nn.Linear(wals_factors_dim, 128) self.roberta_proj = nn.Linear(roberta_dim, 128) self.score = nn.DotProduct()
def forward(self, user_wals_vec, item_roberta_vec): u = self.wals_proj(user_wals_vec) i = self.roberta_proj(item_roberta_vec) return (u * i).sum(dim=1)from transformers import AutoTokenizer, AutoModel import torchmodel_name = "roberta-base" tokenizer = AutoTokenizer.from_pretrained(model_name) roberta = AutoModel.from_pretrained(model_name) wals roberta sets upd
def get_roberta_embedding(text): inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512) with torch.no_grad(): outputs = roberta(**inputs) # Use CLS token embedding or mean pooling cls_embedding = outputs.last_hidden_state[:, 0, :].numpy() return cls_embedding
pip install tensorflow # or PyTorch pip install transformers # Hugging Face for RoBERTa pip install implicit # Fast WALS implementation (Python) pip install numpy pandas scikit-learnmovies = [ "title": "Inception", "description": "A thief who steals secrets...", "movie_id": "1", "title": "The Matrix", "description": "A computer hacker learns...", "movie_id": "2" ]
Key parameters for WALS sets upd: