from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
# Load model once at module level
_embedding_model = None
[docs]
def get_embedding_model():
"""
Return the shared ``SentenceTransformer`` instance, loading it on first call.
Uses a module-level singleton (``_embedding_model``) to avoid reloading
the ``'all-MiniLM-L6-v2'`` model on every scoring request. The model is
approximately 80 MB and takes several seconds to initialise, so lazy
loading defers that cost until the first actual score calculation rather
than at import time.
:returns: The loaded ``SentenceTransformer`` model instance.
:rtype: sentence_transformers.SentenceTransformer
"""
global _embedding_model
if _embedding_model is None:
_embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
return _embedding_model
# ---------------------------------------------------------------------------
# Score breakdown (max 100 points):
#
# Profile text similarity: 70 points (semantic embedding of combined profile)
# Year compatibility: 10 points
# Special category overlap: 20 points
# ---------------------------------------------------------------------------
[docs]
def build_profile_text(user):
"""
Concatenate a user's semantic profile fields into a single text block.
Combining all fields into one string before encoding gives the embedding
model full context and produces richer, more discriminative embeddings
than encoding each field in isolation. Fields that are empty or falsy
are omitted entirely so they do not dilute the representation.
:param user: The user whose profile fields are being combined.
:type user: users.models.User
:returns: A newline-separated string of labelled profile fields, or an
empty string if all fields are blank.
:rtype: str
Example::
>>> build_profile_text(user)
'Major: Computer Science\\nHobbies and interests: chess, hiking\\nGoals: SWE internship'
"""
parts = []
if user.major:
parts.append(f"Major: {', '.join(user.major)}")
if user.minor:
parts.append(f"Minor: {', '.join(user.minor)}")
if user.hobbies:
parts.append(f"Hobbies and interests: {user.hobbies}")
if user.clubs:
parts.append(f"Clubs and activities: {user.clubs}")
if user.goals:
parts.append(f"Goals: {user.goals}")
return "\n".join(parts)
[docs]
def calculate_match_score(mentee, mentor):
"""
Compute a compatibility score between a mentee and a mentor.
Combines three weighted sub-scores:
1. **Profile text similarity** (up to 70 points) — cosine similarity of
sentence embeddings built from :func:`build_profile_text`, scaled to
the 0–70 range.
2. **Year compatibility** (up to 10 points) — gap-based scoring from
:func:`calculate_year_compatibility`.
3. **Special category overlap** (up to 20 points) — 5 points per shared
background category (``international``, ``commuter``, ``firstgen``,
``outofstate``, ``transfer``), capped at 20.
:param mentee: The mentee side of the pairing.
:type mentee: users.models.User
:param mentor: The mentor side of the pairing.
:type mentor: users.models.User
:returns: Total compatibility score rounded to two decimal places, in the
range ``[0.0, 100.0]``.
:rtype: float
Example::
>>> calculate_match_score(mentee_user, mentor_user)
73.45
"""
score = 0.0
# 1. PROFILE TEXT SIMILARITY (70 points)
mentee_text = build_profile_text(mentee)
mentor_text = build_profile_text(mentor)
score += _text_similarity(mentee_text, mentor_text) * 70
# 2. YEAR COMPATIBILITY (10 points)
score += calculate_year_compatibility(mentee.year, mentor.year)
# 3. SPECIAL CATEGORY OVERLAP (20 points)
mentee_cats = _special_categories(mentee)
mentor_cats = _special_categories(mentor)
if mentee_cats and mentor_cats:
common = mentee_cats & mentor_cats
score += min(len(common) * 5.0, 20.0)
return round(float(score), 2)
# ---------------------------------------------------------------------------
# Year compatibility
# ---------------------------------------------------------------------------
[docs]
def calculate_year_compatibility(mentee_year, mentor_year):
"""
Score the academic-year gap between a mentee and a mentor.
Rewards pairings where the mentor is 2–3 years ahead of the mentee,
reflecting the expectation that a slightly senior mentor provides the most
relevant guidance. Both year values are normalized via :func:`_parse_year`
before comparison; a value of ``0`` (unparseable or missing) causes the
function to return a neutral mid-range score of ``5.0``.
**Scoring table:**
+------------------------+--------+
| Condition | Points |
+========================+========+
| Year 1 mentee / Year 3 | 10.0 |
+------------------------+--------+
| Year 1 mentee / Year 4 | 9.0 |
+------------------------+--------+
| Gap ≥ 3 years | 9.0 |
+------------------------+--------+
| Gap = 2 years | 8.0 |
+------------------------+--------+
| Gap = 1 year | 6.0 |
+------------------------+--------+
| Same year (gap = 0) | 3.0 |
+------------------------+--------+
| Mentor is junior | 1.0 |
+------------------------+--------+
| Either year unknown | 5.0 |
+------------------------+--------+
:param mentee_year: Academic year of the mentee (integer or string).
:type mentee_year: int or str
:param mentor_year: Academic year of the mentor (integer or string).
:type mentor_year: int or str
:returns: Year-compatibility sub-score in the range ``[1.0, 10.0]``.
:rtype: float
Example::
>>> calculate_year_compatibility(1, 3)
10.0
>>> calculate_year_compatibility(2, 2)
3.0
"""
mentee_n = _parse_year(mentee_year)
mentor_n = _parse_year(mentor_year)
if mentee_n == 0 or mentor_n == 0:
return 5.0
gap = mentor_n - mentee_n
if mentee_n == 1 and mentor_n == 3:
return 10.0
elif mentee_n == 1 and mentor_n == 4:
return 9.0
elif gap >= 3:
return 9.0
elif gap == 2:
return 8.0
elif gap == 1:
return 6.0
elif gap == 0:
return 3.0
else:
return 1.0
# ---------------------------------------------------------------------------
# Private helpers
# ---------------------------------------------------------------------------
def _text_similarity(text_a, text_b):
"""
Compute the cosine similarity between two text strings using sentence embeddings.
Encodes each string independently with the shared ``SentenceTransformer``
model (loaded via :func:`get_embedding_model`) and returns the cosine
similarity of the resulting vectors. Negative raw similarity values are
clamped to ``0.0``.
Returns ``0.0`` immediately if either input is empty or falsy, avoiding
an unnecessary model call.
:param text_a: First text string to compare.
:type text_a: str
:param text_b: Second text string to compare.
:type text_b: str
:returns: Cosine similarity in the range ``[0.0, 1.0]``.
:rtype: float
.. note::
This is an internal helper. Call via :func:`calculate_match_score`
rather than directly.
Example::
>>> _text_similarity('I enjoy chess and hiking', 'I like outdoor sports and board games')
0.61
"""
if not text_a or not text_b:
return 0.0
model = get_embedding_model()
emb_a = model.encode([text_a])
emb_b = model.encode([text_b])
return float(max(0, cosine_similarity(emb_a, emb_b)[0][0]))
def _special_categories(user):
"""
Return the set of active background category labels for a user.
Reads the five boolean background flags directly from the user instance
and returns only those whose value is truthy.
:param user: The user whose background flags are being evaluated.
:type user: users.models.User
:returns: Set of label strings for all active background flags.
Returns an empty set if no flags are set.
:rtype: set[str]
.. note::
This is an internal helper. Call via :func:`calculate_match_score`
rather than directly.
Example::
>>> _special_categories(user) # firstgen and transfer set
{'firstgen', 'transfer'}
"""
mapping = {
'international': user.international,
'commuter': user.commuter,
'firstgen': user.firstgen,
'outofstate': user.outofstate,
'transfer': user.transfer,
}
return {label for label, active in mapping.items() if active}
def _parse_year(year_val):
"""
Normalize an academic year value to an integer.
Handles integer, string, and the special string ``'5+'``. Returns ``0``
for any value that cannot be meaningfully parsed (``None``, empty string,
non-numeric text) so that callers can treat ``0`` as "unknown".
:param year_val: Raw year value from a user's profile field.
:type year_val: int or str or None
:returns: Parsed year as an integer, or ``0`` if the value is absent or
unparseable.
:rtype: int
.. note::
This is an internal helper. Call via :func:`calculate_year_compatibility`
rather than directly.
Example::
>>> _parse_year('3')
3
>>> _parse_year('5+')
5
>>> _parse_year(None)
0
>>> _parse_year('freshman')
0
"""
if not year_val:
return 0
s = str(year_val).strip()
if s == '5+':
return 5
try:
return int(s)
except (ValueError, TypeError):
return 0