Source code for arxiv.users.legacy.endorsements

"""
Provide endorsement authorizations for users.

Endorsements are authorization scopes tied to specific classificatory
categories, and are used primarily to determine whether or not a user may
submit a paper with a particular primary or secondary classification.

This module preserves the behavior of the legacy system with respect to
interpreting endorsements and evaluating potential autoendorsement. The
relevant policies can be found on the `arXiv help pages
<https://arxiv.org/help/endorsement>`_.
"""

from typing import List, Dict, Optional, Callable
from collections import Counter
from datetime import datetime

from sqlalchemy.sql.expression import literal

from . import util
from .. import domain
from arxiv import taxonomy
from .models import DBUser, DBEndorsement, DBPaperOwners, DBDocuments, \
    DBDocumentInCategory, DBCategory, DBEndorsementDomain, DBEmailWhitelist, \
    DBEmailBlacklist


GENERAL_CATEGORIES = [
    domain.Category('math', 'GM'),
    domain.Category('physics', 'gen-ph')
]

WINDOW_START = util.from_epoch(157783680)


[docs]def get_endorsements(user: domain.User) -> List[domain.Category]:
    """
    Get all endorsements (explicit and implicit) for a user.

    Parameters
    ----------
    user : :class:`.domain.User`

    Returns
    -------
    list
        Each item is a :class:`.domain.Category` for which the user is
        either explicitly or implicitly endorsed.

    """
    return list(set(explicit_endorsements(user))
                | set(implicit_endorsements(user)))


[docs]def explicit_endorsements(user: domain.User) -> List[domain.Category]:
    """
    Load endorsed categories for a user.

    These are endorsements (including auto-endorsements) that have been
    explicitly commemorated.

    Parameters
    ----------
    user : :class:`.domain.User`

    Returns
    -------
    list
        Each item is a :class:`.domain.Category` for which the user is
        explicitly endorsed.
    """
    with util.transaction() as session:
        data: List[DBEndorsement] = (
            session.query(
                DBEndorsement.archive,
                DBEndorsement.subject_class,
                DBEndorsement.point_value,
            )
            .filter(DBEndorsement.endorsee_id == user.user_id)
            .filter(DBEndorsement.flag_valid == 1)
            .all()
        )
    pooled: Counter = Counter()
    for archive, subject, points in data:
        pooled[domain.Category(archive, subject)] += points
    return [category for category, points in pooled.items() if points]


[docs]def implicit_endorsements(user: domain.User) -> List[domain.Category]:
    """
    Determine categories for which a user may be autoendorsed.

    In the classic system, this was determined upon request, when the user
    attempted to submit to a particular category. Because we are separating
    authorization concerns (which includes endorsement) from the submission
    system itself, we want to calculate possible autoendorsement categories
    ahead of time.

    New development of autoendorsement-related functionality should not happen
    here. This function and related code are intended only to preserve the
    business logic already implemented in the classic system.

    Parameters
    ----------
    :class:`.User`

    Returns
    -------
    list
        Each item is a :class:`.domain.Category` for which the user may be
        auto-endorsed.
    """
    candidates = [domain.Category.from_compound(category)
                  for category, data in taxonomy.CATEGORIES_ACTIVE.items()]
    policies = category_policies()
    invalidated = invalidated_autoendorsements(user)
    papers = domain_papers(user)
    user_is_academic = is_academic(user)
    return [
        category for category in candidates
        if category in policies
        and not _disqualifying_invalidations(category, invalidated)
        and (policies[category]['endorse_all']
             or _endorse_by_email(category, policies, user_is_academic)
             or _endorse_by_papers(category, policies, papers))
    ]


[docs]def is_academic(user: domain.User) -> bool:
    """
    Determine whether a user is academic, based on their email address.

    Uses whitelist and blacklist patterns in the database.

    Parameters
    ----------
    user : :class:`.domain.User`

    Returns
    -------
    bool
    """
    with util.transaction() as session:
        in_whitelist = (
            session.query(DBEmailWhitelist)
            .filter(literal(user.email).like(DBEmailWhitelist.pattern))
            .first()
        )
        if in_whitelist:
            return True
        in_blacklist = (
            session.query(DBEmailBlacklist)
            .filter(literal(user.email).like(DBEmailBlacklist.pattern))
            .first()
        )
        if in_blacklist:
            return False
    return True


def _disqualifying_invalidations(category: domain.Category,
                                 invalidated: List[domain.Category]) -> bool:
    """
    Evaluate whether endorsement invalidations are disqualifying.

    This enforces the policy that invalidated (revoked) auto-endorsements can
    prevent future auto-endorsement.

    Parameters
    ----------
    category : :class:`.Category`
        The category for which an auto-endorsement is being considered.
    invalidated : list
        Categories for which the user has had auto-endorsements invalidated
        (revoked).

    Returns
    -------
    bool
    """
    return bool((category in GENERAL_CATEGORIES and category in invalidated)
                or (category not in GENERAL_CATEGORIES and invalidated))


def _endorse_by_email(category: domain.Category,
                      policies: Dict[domain.Category, Dict],
                      user_is_academic: bool) -> bool:
    """
    Evaluate whether an auto-endorsement can be issued based on email address.

    This enforces the policy that some categories allow auto-endorsement for
    academic users.

    Parameters
    ----------
    category : :class:`.Category`
        The category for which an auto-endorsement is being considered.
    policies : dict
        Describes auto-endorsement policies for each category (inherited from
        their endorsement domains).
    user_is_academic : bool
        Whether or not the user has been determined to be academic.

    Returns
    -------
    bool
    """
    policy = policies.get(category)
    if policy is None or 'endorse_email' not in policy:
        return False
    return policy['endorse_email'] and user_is_academic


def _endorse_by_papers(category: domain.Category,
                       policies: Dict[domain.Category, Dict],
                       papers: Dict[str, int]) -> bool:
    """
    Evaluate whether an auto-endorsement can be issued based on prior papers.

    This enforces the policy that some categories allow auto-endorsements for
    users who have published a minimum number of papers in categories that
    share an endoresement domain.

    Parameters
    ----------
    category : :class:`.Category`
        The category for which an auto-endorsement is being considered.
    policies : dict
        Describes auto-endorsement policies for each category (inherited from
        their endorsement domains).
    papers : dict
        The number of papers that the user has published in each endorsement
        domain. Keys are str names of endorsement domains, values are int.

    Returns
    -------
    bool
    """
    N_papers = papers.get(policies[category]['domain'], 0)
    min_papers = policies[category]['min_papers']
    return bool(N_papers >= min_papers)


[docs]def domain_papers(user: domain.User,
                  start_date: Optional[datetime] = None) -> Dict[str, int]:
    """
    Calculate the number of papers that a user owns in each endorsement domain.

    This includes both submitted and claimed papers.

    Parameters
    ----------
    user : :class:`.domain.User`
    start_date : :class:`.datetime` or None
        If provided, will only count papers published after this date.

    Returns
    -------
    dict
        Keys are classification domains (str), values are the number of papers
        in each respective domain (int).

    """
    with util.transaction() as session:
        query = (
            session.query(
                DBPaperOwners.document_id,
                DBDocuments.document_id,
                DBDocumentInCategory.document_id,
                DBCategory.endorsement_domain
            )
            .filter(DBPaperOwners.user_id == user.user_id)
            # Lots of joins...
            .filter(DBDocuments.document_id == DBPaperOwners.document_id)
            .filter(
                DBDocumentInCategory.document_id == DBDocuments.document_id
            )
            .filter(DBCategory.archive == DBDocumentInCategory.archive)
            .filter(
                DBCategory.subject_class == DBDocumentInCategory.subject_class
            )
        )
        if start_date:
            query = query.filter(DBDocuments.dated > util.epoch(start_date))
        data = query.all()
    return dict(Counter(domain for _, _, _, domain in data).items())


[docs]def category_policies() -> Dict[domain.Category, Dict]:
    """
    Load auto-endorsement policies for each category from the database.

    Each category belongs to an endorsement domain, which defines the
    auto-endorsement policies. We retrieve those policies from the perspective
    of the individueal category for ease of lookup.

    Returns
    -------
    dict
        Keys are :class:`.domain.Category` instances. Values are dicts with
        policiy details.

    """
    with util.transaction() as session:
        data = (
            session.query(
                DBCategory.archive,
                DBCategory.subject_class,
                DBEndorsementDomain.endorse_all,
                DBEndorsementDomain.endorse_email,
                DBEndorsementDomain.papers_to_endorse,
                DBEndorsementDomain.endorsement_domain
            )
            .filter(DBCategory.definitive == 1)
            .filter(DBCategory.active == 1)
            .filter(DBCategory.endorsement_domain ==
                    DBEndorsementDomain.endorsement_domain)
            .all()
        )
    return {
        domain.Category(archive, subject): {
            'domain': e_domain,
            'endorse_all': endorse_all == 'y',
            'endorse_email': endorse_email == 'y',
            'min_papers': min_papers
        }
        for archive, subject, endorse_all, endorse_email, min_papers, e_domain
        in data
    }


[docs]def invalidated_autoendorsements(user: domain.User) -> List[domain.Category]:
    """
    Load any invalidated (revoked) auto-endorsements for a user.

    Parameters
    ----------
    user : :class:`.domain.User`

    Returns
    -------
    list
        Items are :class:`.domain.Category` for which the user has had past
        auto-endorsements revoked.
    """
    with util.transaction() as session:
        data: List[DBEndorsement] = (
            session.query(
                DBEndorsement.archive,
                DBEndorsement.subject_class
            )
            .filter(DBEndorsement.endorsee_id == user.user_id)
            .filter(DBEndorsement.flag_valid == 0)
            .filter(DBEndorsement.endorsement_type == 'auto')
            .all()
        )
    return [domain.Category(archive, subject) for archive, subject in data]
Source code for arxiv.users.legacy.endorsements

arXiv AuthN/Z

Navigation

Related Topics