Source code for arxiv.canonical.domain.identifier

"""Provides the concept of an arXiv identifier."""

from typing import Any

from arxiv import identifier

# These are somewhat of a mystery, for there is no reference to ``neuro-sys``
# in the codebase. This mapping was generated by searching for "neuro-sys"
# in comments field, which yields results like:
#
# "Originally submitted to the neuro-sys archive which was never publicly
# announced (was 0005002)"
NEURO_SYS_IDENTIFIERS = {
    "neuro-sys/0005002": "q-bio/0309034",
    "neuro-sys/9905003": "q-bio/0309033",  # Comment said 002, but must be 003.
    "neuro-sys/9905002": "q-bio/0309032",
    "neuro-sys/9810001": "q-bio/0309031",
    "neuro-sys/9809002": "q-bio/0309030",
    "neuro-sys/9806001": "q-bio/0309029",
    "neuro-sys/9804003": "q-bio/0309028",
    "neuro-sys/9804002": "q-bio/0309027",
    "neuro-sys/9804001": "q-bio/0309026",
    "neuro-sys/9803001": "q-bio/0309025",
    "neuro-sys/9802001": "q-bio/0309024",
    "neuro-sys/9801001": "q-bio/0309023",
    "neuro-sys/9905001": "cond-mat/9905438",
    "neuro-sys/9809001": "cs/9809125",
    "neuro-dev/9710001": "cond-mat/9710352",
}


[docs]class InvalidIdentifier(ValueError): """A value was encountered that is not a valid arXiv identifier."""
[docs]class Identifier(str): """ An arXiv e-print identifier. Supports both old-style (``archive.category/YYMMNNN``) and new-style (``YYMM.NNNNN``) identifiers. """ def __init__(self, value: str) -> None: """Initialize from a raw str value.""" if value in NEURO_SYS_IDENTIFIERS: value = NEURO_SYS_IDENTIFIERS[value] if identifier.STANDARD.match(value.__str__()): # pylint: disable=no-member self.is_old_style = False elif identifier.OLD_STYLE.match(value.__str__()): # pylint: disable=no-member self.is_old_style = True else: raise InvalidIdentifier(f'Not a valid arXiv ID: {value}')
[docs] @classmethod def from_parts(cls, year: int, month: int, inc: int) -> 'Identifier': """Generate a new-style identifier from its parts.""" prefix = f'{str(year)[-2:]}{str(month).zfill(2)}' return cls(f'{prefix}.{str(inc).zfill(5)}')
@property def category_part(self) -> str: """For old-style identifiers, conveys the primary category.""" if not self.is_old_style: raise ValueError('New identifiers have no category semantics') return self.split('/')[0] @property def incremental_part(self) -> int: """The part of the identifier that is incremental.""" if self.is_old_style: return int(self.numeric_part[4:]) return int(self.split('.', 1)[1]) @property def numeric_part(self) -> str: """ The entire numeric component of the identifier. For new-style identifiers, this is the entire identifier. """ if self.is_old_style: return self.split('/')[1] return str(self) @property def yymm(self) -> str: """Numeric part conveying the original announcement year and month.""" if self.is_old_style: numeric_part = self.split('/', 1)[1] yy = numeric_part[0:2] mm = numeric_part[2:4] else: yy = self[:2] mm = self[2:4] return f'{yy}{mm}' @property def year(self) -> int: """Year in which the first version of the e-print was announced.""" if self.is_old_style: yy = int(self.split('/', 1)[1][0:2]) else: yy = int(self[:2]) if yy > 90: return 1900 + yy return 2000 + yy @property def month(self) -> int: """Month in which the first version of the e-print was announced.""" if self.is_old_style: return int(self.split('/', 1)[1][2:4]) return int(self[2:4]) def __gt__(self, other: Any) -> bool: if not isinstance(other, Identifier): raise ValueError(f'Cannot compare Identifier to {type(other)}') if self.year < other.year: return False elif self.year > other.year: return True if self.month < other.month: return False elif self.month > other.month: return True return bool(self.incremental_part > other.incremental_part) def __lt__(self, other: Any) -> bool: if not isinstance(other, Identifier): raise ValueError(f'Cannot compare {self} to {type(other)}') if self.year < other.year: return True elif self.year > other.year: return False if self.month < other.month: return True elif self.month > other.month: return False return bool(self.incremental_part < other.incremental_part) def __le__(self, other: Any) -> bool: if not isinstance(other, Identifier): raise ValueError(f'Cannot compare {self} to {type(other)}') return self < other or self == other def __ge__(self, other: Any) -> bool: if not isinstance(other, Identifier): raise ValueError(f'Cannot compare {self} to {type(other)}') return self > other or self == other
[docs]class VersionedIdentifier(str): """ An arXiv identifier for a specific :class:`.Version`. This is an :class:`.Identifier` with a version (``v{N}``) affix. """ def __init__(self, value: str) -> None: """Initialize with a raw str value.""" try: id_part, version_part = self.split('v', 1) self.arxiv_id = Identifier(id_part) self.version = int(version_part) except ValueError as e: raise ValueError(f'Not a valid version identifier: {value}') from e
[docs] @classmethod def from_parts(cls, arxiv_id: Identifier, version: int) \ -> 'VersionedIdentifier': """Generate a new-style versioned identifier from its parts.""" return cls(f'{arxiv_id}v{version}')
@property def category_part(self) -> str: """For old-style identifiers, conveys the primary category.""" return self.arxiv_id.category_part @property def numeric_part(self) -> str: """ The entire numeric component of the identifier. For new-style identifiers, this is the entire identifier. """ return self.arxiv_id.numeric_part @property def incremental_part(self) -> int: """The part of the identifier that is incremental.""" return self.arxiv_id.incremental_part @property def is_old_style(self) -> int: """Indicate whether this is an old-style identifier.""" return self.arxiv_id.is_old_style @property def year(self) -> int: """Year in which the first version of the e-print was announced.""" return self.arxiv_id.year @property def yymm(self) -> str: """Numeric part conveying the original announcement year and month.""" return self.arxiv_id.yymm @property def month(self) -> int: """Month in which the first version of the e-print was announced.""" return self.arxiv_id.month def __gt__(self, other: Any) -> bool: if not isinstance(other, VersionedIdentifier): raise ValueError(f'Cannot compare {self} to {type(other)}') if self.arxiv_id > other.arxiv_id: return True elif self.arxiv_id < other.arxiv_id: return False return self.version > other.version def __lt__(self, other: Any) -> bool: if not isinstance(other, VersionedIdentifier): raise ValueError(f'Cannot compare {self} to {type(other)}') if self.arxiv_id > other.arxiv_id: return False elif self.arxiv_id < other.arxiv_id: return True return self.version < other.version def __le__(self, other: Any) -> bool: if not isinstance(other, VersionedIdentifier): raise ValueError(f'Cannot compare {self} to {type(other)}') return self < other or self == other def __ge__(self, other: Any) -> bool: if not isinstance(other, VersionedIdentifier): raise ValueError(f'Cannot compare {self} to {type(other)}') return self > other or self == other