Source code for matching.scoring

from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

# Load model once at module level
_embedding_model = None



[docs]
def get_embedding_model():
    """
    Return the shared ``SentenceTransformer`` instance, loading it on first call.

    Uses a module-level singleton (``_embedding_model``) to avoid reloading
    the ``'all-MiniLM-L6-v2'`` model on every scoring request.  The model is
    approximately 80 MB and takes several seconds to initialise, so lazy
    loading defers that cost until the first actual score calculation rather
    than at import time.

    :returns: The loaded ``SentenceTransformer`` model instance.
    :rtype: sentence_transformers.SentenceTransformer
    """
    global _embedding_model
    if _embedding_model is None:
        _embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
    return _embedding_model



# ---------------------------------------------------------------------------
# Score breakdown (max 100 points):
#
#   Profile text similarity:     70 points  (semantic embedding of combined profile)
#   Year compatibility:          10 points
#   Special category overlap:    20 points
# ---------------------------------------------------------------------------



[docs]
def build_profile_text(user):
    """
    Concatenate a user's semantic profile fields into a single text block.

    Combining all fields into one string before encoding gives the embedding
    model full context and produces richer, more discriminative embeddings
    than encoding each field in isolation.  Fields that are empty or falsy
    are omitted entirely so they do not dilute the representation.

    :param user: The user whose profile fields are being combined.
    :type user: users.models.User
    :returns: A newline-separated string of labelled profile fields, or an
        empty string if all fields are blank.
    :rtype: str

    Example::

        >>> build_profile_text(user)
        'Major: Computer Science\\nHobbies and interests: chess, hiking\\nGoals: SWE internship'
    """
    parts = []

    if user.major:
        parts.append(f"Major: {', '.join(user.major)}")
    if user.minor:
        parts.append(f"Minor: {', '.join(user.minor)}")
    if user.hobbies:
        parts.append(f"Hobbies and interests: {user.hobbies}")
    if user.clubs:
        parts.append(f"Clubs and activities: {user.clubs}")
    if user.goals:
        parts.append(f"Goals: {user.goals}")

    return "\n".join(parts)




[docs]
def calculate_match_score(mentee, mentor):
    """
    Compute a compatibility score between a mentee and a mentor.

    Combines three weighted sub-scores:

    1. **Profile text similarity** (up to 70 points) — cosine similarity of
       sentence embeddings built from :func:`build_profile_text`, scaled to
       the 0–70 range.
    2. **Year compatibility** (up to 10 points) — gap-based scoring from
       :func:`calculate_year_compatibility`.
    3. **Special category overlap** (up to 20 points) — 5 points per shared
       background category (``international``, ``commuter``, ``firstgen``,
       ``outofstate``, ``transfer``), capped at 20.

    :param mentee: The mentee side of the pairing.
    :type mentee: users.models.User
    :param mentor: The mentor side of the pairing.
    :type mentor: users.models.User
    :returns: Total compatibility score rounded to two decimal places, in the
        range ``[0.0, 100.0]``.
    :rtype: float

    Example::

        >>> calculate_match_score(mentee_user, mentor_user)
        73.45
    """
    score = 0.0

    # 1. PROFILE TEXT SIMILARITY (70 points)
    mentee_text = build_profile_text(mentee)
    mentor_text = build_profile_text(mentor)
    score += _text_similarity(mentee_text, mentor_text) * 70

    # 2. YEAR COMPATIBILITY (10 points)
    score += calculate_year_compatibility(mentee.year, mentor.year)

    # 3. SPECIAL CATEGORY OVERLAP (20 points)
    mentee_cats = _special_categories(mentee)
    mentor_cats = _special_categories(mentor)
    if mentee_cats and mentor_cats:
        common = mentee_cats & mentor_cats
        score += min(len(common) * 5.0, 20.0)

    return round(float(score), 2)



# ---------------------------------------------------------------------------
# Year compatibility
# ---------------------------------------------------------------------------



[docs]
def calculate_year_compatibility(mentee_year, mentor_year):
    """
    Score the academic-year gap between a mentee and a mentor.

    Rewards pairings where the mentor is 2–3 years ahead of the mentee,
    reflecting the expectation that a slightly senior mentor provides the most
    relevant guidance.  Both year values are normalized via :func:`_parse_year`
    before comparison; a value of ``0`` (unparseable or missing) causes the
    function to return a neutral mid-range score of ``5.0``.

    **Scoring table:**

    +------------------------+--------+
    | Condition              | Points |
    +========================+========+
    | Year 1 mentee / Year 3 | 10.0   |
    +------------------------+--------+
    | Year 1 mentee / Year 4 | 9.0    |
    +------------------------+--------+
    | Gap ≥ 3 years          | 9.0    |
    +------------------------+--------+
    | Gap = 2 years          | 8.0    |
    +------------------------+--------+
    | Gap = 1 year           | 6.0    |
    +------------------------+--------+
    | Same year (gap = 0)    | 3.0    |
    +------------------------+--------+
    | Mentor is junior       | 1.0    |
    +------------------------+--------+
    | Either year unknown    | 5.0    |
    +------------------------+--------+

    :param mentee_year: Academic year of the mentee (integer or string).
    :type mentee_year: int or str
    :param mentor_year: Academic year of the mentor (integer or string).
    :type mentor_year: int or str
    :returns: Year-compatibility sub-score in the range ``[1.0, 10.0]``.
    :rtype: float

    Example::

        >>> calculate_year_compatibility(1, 3)
        10.0
        >>> calculate_year_compatibility(2, 2)
        3.0
    """
    mentee_n = _parse_year(mentee_year)
    mentor_n = _parse_year(mentor_year)

    if mentee_n == 0 or mentor_n == 0:
        return 5.0

    gap = mentor_n - mentee_n

    if mentee_n == 1 and mentor_n == 3:
        return 10.0
    elif mentee_n == 1 and mentor_n == 4:
        return 9.0
    elif gap >= 3:
        return 9.0
    elif gap == 2:
        return 8.0
    elif gap == 1:
        return 6.0
    elif gap == 0:
        return 3.0
    else:
        return 1.0



# ---------------------------------------------------------------------------
# Private helpers
# ---------------------------------------------------------------------------


def _text_similarity(text_a, text_b):
    """
    Compute the cosine similarity between two text strings using sentence embeddings.

    Encodes each string independently with the shared ``SentenceTransformer``
    model (loaded via :func:`get_embedding_model`) and returns the cosine
    similarity of the resulting vectors.  Negative raw similarity values are
    clamped to ``0.0``.

    Returns ``0.0`` immediately if either input is empty or falsy, avoiding
    an unnecessary model call.

    :param text_a: First text string to compare.
    :type text_a: str
    :param text_b: Second text string to compare.
    :type text_b: str
    :returns: Cosine similarity in the range ``[0.0, 1.0]``.
    :rtype: float

    .. note::
        This is an internal helper. Call via :func:`calculate_match_score`
        rather than directly.

    Example::

        >>> _text_similarity('I enjoy chess and hiking', 'I like outdoor sports and board games')
        0.61
    """
    if not text_a or not text_b:
        return 0.0
    model = get_embedding_model()
    emb_a = model.encode([text_a])
    emb_b = model.encode([text_b])
    return float(max(0, cosine_similarity(emb_a, emb_b)[0][0]))


def _special_categories(user):
    """
    Return the set of active background category labels for a user.

    Reads the five boolean background flags directly from the user instance
    and returns only those whose value is truthy.

    :param user: The user whose background flags are being evaluated.
    :type user: users.models.User
    :returns: Set of label strings for all active background flags.
        Returns an empty set if no flags are set.
    :rtype: set[str]

    .. note::
        This is an internal helper. Call via :func:`calculate_match_score`
        rather than directly.

    Example::

        >>> _special_categories(user)   # firstgen and transfer set
        {'firstgen', 'transfer'}
    """
    mapping = {
        'international': user.international,
        'commuter': user.commuter,
        'firstgen': user.firstgen,
        'outofstate': user.outofstate,
        'transfer': user.transfer,
    }
    return {label for label, active in mapping.items() if active}


def _parse_year(year_val):
    """
    Normalize an academic year value to an integer.

    Handles integer, string, and the special string ``'5+'``.  Returns ``0``
    for any value that cannot be meaningfully parsed (``None``, empty string,
    non-numeric text) so that callers can treat ``0`` as "unknown".

    :param year_val: Raw year value from a user's profile field.
    :type year_val: int or str or None
    :returns: Parsed year as an integer, or ``0`` if the value is absent or
        unparseable.
    :rtype: int

    .. note::
        This is an internal helper. Call via :func:`calculate_year_compatibility`
        rather than directly.

    Example::

        >>> _parse_year('3')
        3
        >>> _parse_year('5+')
        5
        >>> _parse_year(None)
        0
        >>> _parse_year('freshman')
        0
    """
    if not year_val:
        return 0
    s = str(year_val).strip()
    if s == '5+':
        return 5
    try:
        return int(s)
    except (ValueError, TypeError):
        return 0