Source code for arxiv.canonical.record.version

import datetime
from typing import Callable, Dict, IO, Iterable, Optional

from .core import RecordBase, RecordEntry, RecordEntryMembers, RecordStream, \
    D, Year, YearMonth
from .file import RecordFile
from .metadata import RecordMetadata


[docs]class RecordVersion(RecordBase[D.VersionedIdentifier, str, RecordEntry, D.Version]): """ A collection of serialized components that make up a version record. A version record is comprised of (1) a metadata record, (2) a source package, containing the original content provided by the submitter, and (3) a canonical rendering of the version (e.g. in PDF format). The key prefix structure for an version record is: ``` e-prints/<YYYY>/<MM>/<arXiv ID>/v<version>/ ``` Where ``YYYY`` is the year and ``MM`` the month during which the first version of the e-print was announced. Sub-keys are: - Metadata record: ``<arXiv ID>v<version>.json`` - Source package: ``<arXiv ID>v<version>.tar`` - PDF: ``<arXiv ID>v<version>.render`` - Manifest: ``<arXiv ID>v<version>.manifest.json`` """
[docs] @classmethod def from_domain(cls, version: D.Version, dereferencer: Callable[[D.URI], IO[bytes]], metadata: Optional[RecordMetadata] = None) -> 'RecordVersion': """Serialize an :class:`.Version` to an :class:`.RecordVersion`.""" if version.source is None: raise ValueError('Source is missing') if version.announced_date_first is None: raise ValueError('First announcement date not set') # Dereference the bitstreams, wherever they happen to live. source_content = dereferencer(version.source.ref) format_content = {fmt: dereferencer(cf.ref) for fmt, cf in version.formats.items()} source_key = RecordVersion.make_key(version.identifier, version.source.filename) format_keys = { fmt: RecordVersion.make_key(version.identifier, cf.filename) for fmt, cf in version.formats.items() } source = RecordFile( key=source_key, stream=RecordStream( domain=version.source, content=source_content, content_type=version.source.content_type, size_bytes=version.source.size_bytes, ), domain=version.source ) formats = { fmt.value: RecordFile( key=format_keys[fmt], stream=RecordStream( domain=cf, content=format_content[fmt], content_type=cf.content_type, size_bytes=cf.size_bytes, ), domain=cf ) for fmt, cf in version.formats.items() } if version.render: render_content = dereferencer(version.render.ref) render_key = RecordVersion.make_key(version.identifier, version.render.filename) version.render.ref = render_key formats['render'] = RecordFile( key=render_key, stream=RecordStream( domain=version.render, content=render_content, content_type=version.render.content_type, size_bytes=version.render.size_bytes, ), domain=version.render ) if metadata is None: metadata = RecordMetadata.from_domain(version) # From now on we refer to bitstreams with canonical URIs. version.source.ref = source_key for fmt, cf in version.formats.items(): cf.ref = format_keys[fmt] return RecordVersion( version.identifier, members=RecordEntryMembers( metadata=metadata, source=source, **formats ), domain=version )
[docs] @classmethod def make_key(cls, identifier: D.VersionedIdentifier, filename: Optional[str] = None) -> D.Key: if filename is None: return RecordMetadata.make_key(identifier) return D.Key(f'{cls.make_prefix(identifier)}/{filename}')
[docs] @classmethod def make_manifest_key(cls, ident: D.VersionedIdentifier) -> D.Key: date_part = f'e-prints/{ident.year}/{str(ident.month).zfill(2)}' if ident.is_old_style: return D.Key(f'{date_part}/{ident.category_part}/{ident.numeric_part}/{ident.numeric_part}.manifest.json') return D.Key(f'{date_part}/{ident.arxiv_id}/{ident}.manifest.json')
[docs] @classmethod def make_prefix(cls, ident: D.VersionedIdentifier) -> str: """ Make a key prefix for an e-print record. Parameters ---------- date : datetime.date The day on which the first version of the e-print was announced. ident : str arXiv identifier Returns ------- str """ date_part = f'e-prints/{ident.year}/{str(ident.month).zfill(2)}' if ident.is_old_style: return (f'{date_part}/{ident.category_part}/{ident.numeric_part}/' f'v{ident.version}') return f'{date_part}/{ident.arxiv_id}/v{ident.version}'
@property def identifier(self) -> D.VersionedIdentifier: return self.name @property def metadata(self) -> RecordMetadata: """JSON document containing canonical e-print metadata.""" assert 'metadata' in self.members member = self.members['metadata'] assert isinstance(member, RecordMetadata) return member @property def render(self) -> Optional[RecordEntry]: """Canonical PDF for the e-print.""" if 'render' in self.members: return self.members['render'] return None @property def formats(self) -> Dict[D.ContentType, RecordEntry]: return {D.ContentType(fmt): entry for fmt, entry in self.members.items() if fmt not in ['metadata', 'source', 'render']} @property def source(self) -> RecordEntry: """Gzipped tarball containing the e-print source.""" assert 'source' in self.members return self.members['source']
[docs] def instance_to_domain(self) -> D.Version: """Deserialize an :class:`.RecordVersion` to an :class:`.Version`.""" version = self.metadata.to_domain(self.metadata.stream) if version.source is None or version.render is None: raise ValueError('Failed to to_domain source or render metadata') return version
[docs]class RecordEPrint(RecordBase[D.Identifier, D.VersionedIdentifier, RecordVersion, D.EPrint]):
[docs] @classmethod def make_key(cls, idn: D.Identifier) -> D.Key: """ Make a key prefix for an e-print record. Parameters ---------- idn : str arXiv identifier Returns ------- str """ return D.Key(f'e-prints/{idn.year}/{str(idn.month).zfill(2)}/{idn}')
[docs] @classmethod def make_manifest_key(cls, ident: D.Identifier) -> D.Key: """ Make a key for an e-print manifest. Returns ------- str """ return D.Key(f'{cls.make_key(ident)}.manifest.json')
[docs]class RecordDay(RecordBase[datetime.date, D.Identifier, RecordEPrint, D.EPrintDay]):
[docs] @classmethod def make_manifest_key(cls, date: datetime.date) -> D.Key: """ Make a key for a daily e-print manifest. Returns ------- str """ return D.Key(date.strftime('e-prints/%Y/%m/%Y-%m-%d.manifest.json'))
[docs]class RecordMonth(RecordBase[YearMonth, datetime.date, RecordDay, D.EPrintMonth]):
[docs] @classmethod def make_manifest_key(cls, year_and_month: YearMonth) -> D.Key: """ Make a key for a monthly e-print manifest. Returns ------- str """ y, m = year_and_month return D.Key(f'e-prints/{y}/{y}-{str(m).zfill(2)}.manifest.json')
[docs]class RecordYear(RecordBase[Year, YearMonth, RecordMonth, D.EPrintYear]):
[docs] @classmethod def make_manifest_key(cls, year: Year) -> D.Key: """ Make a key for a yearly e-print manifest. Returns ------- str """ return D.Key(f'e-prints/{year}.manifest.json')
[docs]class RecordEPrints(RecordBase[str, Year, RecordYear, D.AllEPrints]):
[docs] @classmethod def make_manifest_key(cls, _: str) -> D.Key: """ Make a key for all e-print manifest. Returns ------- str """ return D.Key(f'e-prints.manifest.json')