Source code for matching.scoring

from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

# Load model once at module level
_embedding_model = None


[docs] def get_embedding_model(): """ Return the shared ``SentenceTransformer`` instance, loading it on first call. Uses a module-level singleton (``_embedding_model``) to avoid reloading the ``'all-MiniLM-L6-v2'`` model on every scoring request. The model is approximately 80 MB and takes several seconds to initialise, so lazy loading defers that cost until the first actual score calculation rather than at import time. :returns: The loaded ``SentenceTransformer`` model instance. :rtype: sentence_transformers.SentenceTransformer """ global _embedding_model if _embedding_model is None: _embedding_model = SentenceTransformer('all-MiniLM-L6-v2') return _embedding_model
# --------------------------------------------------------------------------- # Score breakdown (max 100 points): # # Profile text similarity: 70 points (semantic embedding of combined profile) # Year compatibility: 10 points # Special category overlap: 20 points # ---------------------------------------------------------------------------
[docs] def build_profile_text(user): """ Concatenate a user's semantic profile fields into a single text block. Combining all fields into one string before encoding gives the embedding model full context and produces richer, more discriminative embeddings than encoding each field in isolation. Fields that are empty or falsy are omitted entirely so they do not dilute the representation. :param user: The user whose profile fields are being combined. :type user: users.models.User :returns: A newline-separated string of labelled profile fields, or an empty string if all fields are blank. :rtype: str Example:: >>> build_profile_text(user) 'Major: Computer Science\\nHobbies and interests: chess, hiking\\nGoals: SWE internship' """ parts = [] if user.major: parts.append(f"Major: {', '.join(user.major)}") if user.minor: parts.append(f"Minor: {', '.join(user.minor)}") if user.hobbies: parts.append(f"Hobbies and interests: {user.hobbies}") if user.clubs: parts.append(f"Clubs and activities: {user.clubs}") if user.goals: parts.append(f"Goals: {user.goals}") return "\n".join(parts)
[docs] def calculate_match_score(mentee, mentor): """ Compute a compatibility score between a mentee and a mentor. Combines three weighted sub-scores: 1. **Profile text similarity** (up to 70 points) — cosine similarity of sentence embeddings built from :func:`build_profile_text`, scaled to the 0–70 range. 2. **Year compatibility** (up to 10 points) — gap-based scoring from :func:`calculate_year_compatibility`. 3. **Special category overlap** (up to 20 points) — 5 points per shared background category (``international``, ``commuter``, ``firstgen``, ``outofstate``, ``transfer``), capped at 20. :param mentee: The mentee side of the pairing. :type mentee: users.models.User :param mentor: The mentor side of the pairing. :type mentor: users.models.User :returns: Total compatibility score rounded to two decimal places, in the range ``[0.0, 100.0]``. :rtype: float Example:: >>> calculate_match_score(mentee_user, mentor_user) 73.45 """ score = 0.0 # 1. PROFILE TEXT SIMILARITY (70 points) mentee_text = build_profile_text(mentee) mentor_text = build_profile_text(mentor) score += _text_similarity(mentee_text, mentor_text) * 70 # 2. YEAR COMPATIBILITY (10 points) score += calculate_year_compatibility(mentee.year, mentor.year) # 3. SPECIAL CATEGORY OVERLAP (20 points) mentee_cats = _special_categories(mentee) mentor_cats = _special_categories(mentor) if mentee_cats and mentor_cats: common = mentee_cats & mentor_cats score += min(len(common) * 5.0, 20.0) return round(float(score), 2)
# --------------------------------------------------------------------------- # Year compatibility # ---------------------------------------------------------------------------
[docs] def calculate_year_compatibility(mentee_year, mentor_year): """ Score the academic-year gap between a mentee and a mentor. Rewards pairings where the mentor is 2–3 years ahead of the mentee, reflecting the expectation that a slightly senior mentor provides the most relevant guidance. Both year values are normalized via :func:`_parse_year` before comparison; a value of ``0`` (unparseable or missing) causes the function to return a neutral mid-range score of ``5.0``. **Scoring table:** +------------------------+--------+ | Condition | Points | +========================+========+ | Year 1 mentee / Year 3 | 10.0 | +------------------------+--------+ | Year 1 mentee / Year 4 | 9.0 | +------------------------+--------+ | Gap ≥ 3 years | 9.0 | +------------------------+--------+ | Gap = 2 years | 8.0 | +------------------------+--------+ | Gap = 1 year | 6.0 | +------------------------+--------+ | Same year (gap = 0) | 3.0 | +------------------------+--------+ | Mentor is junior | 1.0 | +------------------------+--------+ | Either year unknown | 5.0 | +------------------------+--------+ :param mentee_year: Academic year of the mentee (integer or string). :type mentee_year: int or str :param mentor_year: Academic year of the mentor (integer or string). :type mentor_year: int or str :returns: Year-compatibility sub-score in the range ``[1.0, 10.0]``. :rtype: float Example:: >>> calculate_year_compatibility(1, 3) 10.0 >>> calculate_year_compatibility(2, 2) 3.0 """ mentee_n = _parse_year(mentee_year) mentor_n = _parse_year(mentor_year) if mentee_n == 0 or mentor_n == 0: return 5.0 gap = mentor_n - mentee_n if mentee_n == 1 and mentor_n == 3: return 10.0 elif mentee_n == 1 and mentor_n == 4: return 9.0 elif gap >= 3: return 9.0 elif gap == 2: return 8.0 elif gap == 1: return 6.0 elif gap == 0: return 3.0 else: return 1.0
# --------------------------------------------------------------------------- # Private helpers # --------------------------------------------------------------------------- def _text_similarity(text_a, text_b): """ Compute the cosine similarity between two text strings using sentence embeddings. Encodes each string independently with the shared ``SentenceTransformer`` model (loaded via :func:`get_embedding_model`) and returns the cosine similarity of the resulting vectors. Negative raw similarity values are clamped to ``0.0``. Returns ``0.0`` immediately if either input is empty or falsy, avoiding an unnecessary model call. :param text_a: First text string to compare. :type text_a: str :param text_b: Second text string to compare. :type text_b: str :returns: Cosine similarity in the range ``[0.0, 1.0]``. :rtype: float .. note:: This is an internal helper. Call via :func:`calculate_match_score` rather than directly. Example:: >>> _text_similarity('I enjoy chess and hiking', 'I like outdoor sports and board games') 0.61 """ if not text_a or not text_b: return 0.0 model = get_embedding_model() emb_a = model.encode([text_a]) emb_b = model.encode([text_b]) return float(max(0, cosine_similarity(emb_a, emb_b)[0][0])) def _special_categories(user): """ Return the set of active background category labels for a user. Reads the five boolean background flags directly from the user instance and returns only those whose value is truthy. :param user: The user whose background flags are being evaluated. :type user: users.models.User :returns: Set of label strings for all active background flags. Returns an empty set if no flags are set. :rtype: set[str] .. note:: This is an internal helper. Call via :func:`calculate_match_score` rather than directly. Example:: >>> _special_categories(user) # firstgen and transfer set {'firstgen', 'transfer'} """ mapping = { 'international': user.international, 'commuter': user.commuter, 'firstgen': user.firstgen, 'outofstate': user.outofstate, 'transfer': user.transfer, } return {label for label, active in mapping.items() if active} def _parse_year(year_val): """ Normalize an academic year value to an integer. Handles integer, string, and the special string ``'5+'``. Returns ``0`` for any value that cannot be meaningfully parsed (``None``, empty string, non-numeric text) so that callers can treat ``0`` as "unknown". :param year_val: Raw year value from a user's profile field. :type year_val: int or str or None :returns: Parsed year as an integer, or ``0`` if the value is absent or unparseable. :rtype: int .. note:: This is an internal helper. Call via :func:`calculate_year_compatibility` rather than directly. Example:: >>> _parse_year('3') 3 >>> _parse_year('5+') 5 >>> _parse_year(None) 0 >>> _parse_year('freshman') 0 """ if not year_val: return 0 s = str(year_val).strip() if s == '5+': return 5 try: return int(s) except (ValueError, TypeError): return 0