Source code for arxiv.canonical.record.core
"""Base classes and core concepts for :mod:`arxiv.canonical.record`."""
import datetime
import os
from abc import ABC
from io import BytesIO
from json import dumps, load
from typing import NamedTuple, List, IO, Iterator, Tuple, Optional, Dict, \
Callable, Iterable, MutableMapping, Mapping, Generic, Type, TypeVar, \
Union, Any
from ..serialize.decoder import CanonicalDecoder
from ..serialize.encoder import CanonicalEncoder
from .. import domain as D
from ..util import GenericMonoDict
Year = int
Month = int
YearMonth = Tuple[Year, Month]
[docs]class RecordStream(NamedTuple):
"""A single bitstream in the record."""
domain: D.CanonicalFile
content: Optional[IO[bytes]]
"""Raw content of the entry."""
content_type: D.ContentType
"""MIME-type of the content."""
size_bytes: int
"""Size of ``content`` in bytes."""
[docs]class RecordEntryMembers(GenericMonoDict[str, 'RecordEntry']):
"""
A dict that returns only :class: `.RecordEntry` instances.
Consistent with ``Mapping[str, RecordEntry]``.
"""
def __getitem__(self, key: str) -> 'RecordEntry':
value = dict.__getitem__(self, key)
assert isinstance(value, RecordEntry)
return value
_EDomain = TypeVar('_EDomain', bound=D.CanonicalBase)
_Self = TypeVar('_Self', bound='RecordEntry')
[docs]class RecordEntry(Generic[_EDomain]):
"""
An entry in the canonical record.
Comprised of a :class:`.RecordStream` and a domain representation of the
entry (i.e. the application-level interpretation of the stream).
"""
key: D.Key
"""Full key (path) at which the entry is stored."""
domain: _EDomain
stream: RecordStream
def __init__(self, key: D.Key, stream: RecordStream, domain: _EDomain) \
-> None:
self.key = key
self.domain = domain
self.stream = stream
@property
def name(self) -> str:
fname = os.path.split(self.key)[1]
return os.path.splitext(fname)[0]
[docs] @classmethod
def from_domain(cls: Type[_Self], d: _EDomain) -> _Self:
raise NotImplementedError("Must be implemented by child class")
[docs] @classmethod
def to_domain(cls, stream: RecordStream) -> _EDomain:
raise NotImplementedError("Must be implemented by child class")
# These TypeVars are used as placeholders in the generic RecordBase class,
# below. To learn more about TypeVars and Generics, see
# https://mypy.readthedocs.io/en/latest/generics.html
Name = TypeVar('Name')
MemberName = TypeVar('MemberName')
Member = TypeVar('Member', bound=Union['RecordBase', RecordEntry])
Domain = TypeVar('Domain')
[docs]class RecordBase(Generic[Name, MemberName, Member, Domain]):
"""
Generic base class for record collections in this module.
This produces a uniform protocol for record collections, while allowing
name, member, and member name types to vary across collection subclasses.
"""
def __init__(self, name: Name,
members: Mapping[MemberName, Member],
domain: Domain) -> None:
"""Register the name and members of this record instance."""
self.name = name
self.members = members
self.domain = domain
[docs] @classmethod
def make_manifest_key(cls, name: Name) -> D.Key: # pylint: disable=unused-argument
"""Generate a full key that can be used to store a manifest."""
... # pylint: disable=pointless-statement ; this is a stub.