Source code for arxiv.users.legacy.endorsements

"""
Provide endorsement authorizations for users.

Endorsements are authorization scopes tied to specific classificatory
categories, and are used primarily to determine whether or not a user may
submit a paper with a particular primary or secondary classification.

This module preserves the behavior of the legacy system with respect to
interpreting endorsements and evaluating potential autoendorsement. The
relevant policies can be found on the `arXiv help pages
<https://arxiv.org/help/endorsement>`_.
"""

from typing import List, Dict, Optional, Callable
from collections import Counter
from datetime import datetime

from sqlalchemy.sql.expression import literal

from . import util
from .. import domain
from arxiv import taxonomy
from .models import DBUser, DBEndorsement, DBPaperOwners, DBDocuments, \
    DBDocumentInCategory, DBCategory, DBEndorsementDomain, DBEmailWhitelist, \
    DBEmailBlacklist


GENERAL_CATEGORIES = [
    domain.Category('math', 'GM'),
    domain.Category('physics', 'gen-ph')
]

WINDOW_START = util.from_epoch(157783680)


[docs]def get_endorsements(user: domain.User) -> List[domain.Category]: """ Get all endorsements (explicit and implicit) for a user. Parameters ---------- user : :class:`.domain.User` Returns ------- list Each item is a :class:`.domain.Category` for which the user is either explicitly or implicitly endorsed. """ return list(set(explicit_endorsements(user)) | set(implicit_endorsements(user)))
[docs]def explicit_endorsements(user: domain.User) -> List[domain.Category]: """ Load endorsed categories for a user. These are endorsements (including auto-endorsements) that have been explicitly commemorated. Parameters ---------- user : :class:`.domain.User` Returns ------- list Each item is a :class:`.domain.Category` for which the user is explicitly endorsed. """ with util.transaction() as session: data: List[DBEndorsement] = ( session.query( DBEndorsement.archive, DBEndorsement.subject_class, DBEndorsement.point_value, ) .filter(DBEndorsement.endorsee_id == user.user_id) .filter(DBEndorsement.flag_valid == 1) .all() ) pooled: Counter = Counter() for archive, subject, points in data: pooled[domain.Category(archive, subject)] += points return [category for category, points in pooled.items() if points]
[docs]def implicit_endorsements(user: domain.User) -> List[domain.Category]: """ Determine categories for which a user may be autoendorsed. In the classic system, this was determined upon request, when the user attempted to submit to a particular category. Because we are separating authorization concerns (which includes endorsement) from the submission system itself, we want to calculate possible autoendorsement categories ahead of time. New development of autoendorsement-related functionality should not happen here. This function and related code are intended only to preserve the business logic already implemented in the classic system. Parameters ---------- :class:`.User` Returns ------- list Each item is a :class:`.domain.Category` for which the user may be auto-endorsed. """ candidates = [domain.Category.from_compound(category) for category, data in taxonomy.CATEGORIES_ACTIVE.items()] policies = category_policies() invalidated = invalidated_autoendorsements(user) papers = domain_papers(user) user_is_academic = is_academic(user) return [ category for category in candidates if category in policies and not _disqualifying_invalidations(category, invalidated) and (policies[category]['endorse_all'] or _endorse_by_email(category, policies, user_is_academic) or _endorse_by_papers(category, policies, papers)) ]
[docs]def is_academic(user: domain.User) -> bool: """ Determine whether a user is academic, based on their email address. Uses whitelist and blacklist patterns in the database. Parameters ---------- user : :class:`.domain.User` Returns ------- bool """ with util.transaction() as session: in_whitelist = ( session.query(DBEmailWhitelist) .filter(literal(user.email).like(DBEmailWhitelist.pattern)) .first() ) if in_whitelist: return True in_blacklist = ( session.query(DBEmailBlacklist) .filter(literal(user.email).like(DBEmailBlacklist.pattern)) .first() ) if in_blacklist: return False return True
def _disqualifying_invalidations(category: domain.Category, invalidated: List[domain.Category]) -> bool: """ Evaluate whether endorsement invalidations are disqualifying. This enforces the policy that invalidated (revoked) auto-endorsements can prevent future auto-endorsement. Parameters ---------- category : :class:`.Category` The category for which an auto-endorsement is being considered. invalidated : list Categories for which the user has had auto-endorsements invalidated (revoked). Returns ------- bool """ return bool((category in GENERAL_CATEGORIES and category in invalidated) or (category not in GENERAL_CATEGORIES and invalidated)) def _endorse_by_email(category: domain.Category, policies: Dict[domain.Category, Dict], user_is_academic: bool) -> bool: """ Evaluate whether an auto-endorsement can be issued based on email address. This enforces the policy that some categories allow auto-endorsement for academic users. Parameters ---------- category : :class:`.Category` The category for which an auto-endorsement is being considered. policies : dict Describes auto-endorsement policies for each category (inherited from their endorsement domains). user_is_academic : bool Whether or not the user has been determined to be academic. Returns ------- bool """ policy = policies.get(category) if policy is None or 'endorse_email' not in policy: return False return policy['endorse_email'] and user_is_academic def _endorse_by_papers(category: domain.Category, policies: Dict[domain.Category, Dict], papers: Dict[str, int]) -> bool: """ Evaluate whether an auto-endorsement can be issued based on prior papers. This enforces the policy that some categories allow auto-endorsements for users who have published a minimum number of papers in categories that share an endoresement domain. Parameters ---------- category : :class:`.Category` The category for which an auto-endorsement is being considered. policies : dict Describes auto-endorsement policies for each category (inherited from their endorsement domains). papers : dict The number of papers that the user has published in each endorsement domain. Keys are str names of endorsement domains, values are int. Returns ------- bool """ N_papers = papers.get(policies[category]['domain'], 0) min_papers = policies[category]['min_papers'] return bool(N_papers >= min_papers)
[docs]def domain_papers(user: domain.User, start_date: Optional[datetime] = None) -> Dict[str, int]: """ Calculate the number of papers that a user owns in each endorsement domain. This includes both submitted and claimed papers. Parameters ---------- user : :class:`.domain.User` start_date : :class:`.datetime` or None If provided, will only count papers published after this date. Returns ------- dict Keys are classification domains (str), values are the number of papers in each respective domain (int). """ with util.transaction() as session: query = ( session.query( DBPaperOwners.document_id, DBDocuments.document_id, DBDocumentInCategory.document_id, DBCategory.endorsement_domain ) .filter(DBPaperOwners.user_id == user.user_id) # Lots of joins... .filter(DBDocuments.document_id == DBPaperOwners.document_id) .filter( DBDocumentInCategory.document_id == DBDocuments.document_id ) .filter(DBCategory.archive == DBDocumentInCategory.archive) .filter( DBCategory.subject_class == DBDocumentInCategory.subject_class ) ) if start_date: query = query.filter(DBDocuments.dated > util.epoch(start_date)) data = query.all() return dict(Counter(domain for _, _, _, domain in data).items())
[docs]def category_policies() -> Dict[domain.Category, Dict]: """ Load auto-endorsement policies for each category from the database. Each category belongs to an endorsement domain, which defines the auto-endorsement policies. We retrieve those policies from the perspective of the individueal category for ease of lookup. Returns ------- dict Keys are :class:`.domain.Category` instances. Values are dicts with policiy details. """ with util.transaction() as session: data = ( session.query( DBCategory.archive, DBCategory.subject_class, DBEndorsementDomain.endorse_all, DBEndorsementDomain.endorse_email, DBEndorsementDomain.papers_to_endorse, DBEndorsementDomain.endorsement_domain ) .filter(DBCategory.definitive == 1) .filter(DBCategory.active == 1) .filter(DBCategory.endorsement_domain == DBEndorsementDomain.endorsement_domain) .all() ) return { domain.Category(archive, subject): { 'domain': e_domain, 'endorse_all': endorse_all == 'y', 'endorse_email': endorse_email == 'y', 'min_papers': min_papers } for archive, subject, endorse_all, endorse_email, min_papers, e_domain in data }
[docs]def invalidated_autoendorsements(user: domain.User) -> List[domain.Category]: """ Load any invalidated (revoked) auto-endorsements for a user. Parameters ---------- user : :class:`.domain.User` Returns ------- list Items are :class:`.domain.Category` for which the user has had past auto-endorsements revoked. """ with util.transaction() as session: data: List[DBEndorsement] = ( session.query( DBEndorsement.archive, DBEndorsement.subject_class ) .filter(DBEndorsement.endorsee_id == user.user_id) .filter(DBEndorsement.flag_valid == 0) .filter(DBEndorsement.endorsement_type == 'auto') .all() ) return [domain.Category(archive, subject) for archive, subject in data]