Source code for arxiv.canonical.domain.file

"""Provides bitstream-related concepts and logic."""

import os
from datetime import datetime
from typing import Any, Dict, IO, Iterable, NamedTuple, Optional, Union
from urllib.parse import urlparse

from typing_extensions import Protocol

from .base import CanonicalBase
from .content import ContentType


[docs]class URI(str):
    """
    A unique identifier for bitstream content.

    Bitstream content may be located in a variety of places prior to
    canonicalization. For example, it may be located on a local filesystem,
    or at a remote location accessible via HTTP.
    """

    def __new__(cls, value: str) -> 'URI':
        """Make a new URI."""
        if value.startswith('/'):
            value = f'file:///{value.lstrip("/")}'
        uri: URI = super(URI, cls).__new__(cls, value)  # type: ignore
        return uri

    def __init__(self, value: str) -> None:
        """Initialize and parse an URI from a str value."""
        if value.startswith('/'):
            value = f'file:///{value.lstrip("/")}'

        o = urlparse(value)
        self.scheme = o.scheme
        if not self.scheme:
            raise ValueError(f'Not a valid URI: {value}')
        self.netloc = o.netloc
        self.path = o.path
        self.params = o.params
        self.query = o.query
        self.fragment = o.fragment

    @property
    def is_canonical(self) -> bool:
        """Indicate whether the URI is a key in the canonical record."""
        return bool(self.scheme == 'arxiv')

    @property
    def is_file(self) -> bool:
        """Indicate whether the URI is a path to a local file."""
        return bool(self.scheme == 'file')

    @property
    def is_http_url(self) -> bool:
        """Indicate whether the URI is an HTTP URL."""
        return bool(self.scheme == 'http' or self.scheme == 'https')


[docs]class Key(URI):
    """The unique identifier for a bitstream in the canonical record."""

    def __new__(cls, value: str) -> 'Key':
        """Make a new key."""
        if not value.startswith('arxiv:///'):
            value = f'arxiv:///{value.lstrip("/")}'
        key: Key = super(Key, cls).__new__(cls, value)  # type: ignore
        return key

    def __init__(self, value: str) -> None:
        """Initialize a key with a str value."""
        if not value.startswith('arxiv:///'):
            value = f'arxiv:///{value.lstrip("/")}'
        super(Key, self).__init__(value)
        _, self.filename = os.path.split(self.path)


[docs]class CanonicalFile(CanonicalBase):
    """Represents a file in the canonical record, e.g. a source package."""

    modified: datetime
    """Last time the file was modified."""

    size_bytes: int
    """Size of the file in bytes."""

    content_type: ContentType
    """The content type of the file."""

    filename: Optional[str]
    """Filename in the canonical record."""

    ref: URI
    """A reference to the location of the content of the file."""

    is_gzipped: bool
    """Whether or not the content at ``ref`` is served in gzipped form."""

    exclude_from_comparison = {'ref', 'is_gzipped'}

    def __init__(self, modified: datetime,
                 size_bytes: int,
                 content_type: ContentType,
                 ref: URI,
                 filename: Optional[str] = None,
                 is_gzipped: bool = False) -> None:
        self.modified = modified
        self.size_bytes = size_bytes
        self.content_type = content_type
        self.filename = filename
        self.ref = ref
        self.is_gzipped = is_gzipped

[docs]    @classmethod
    def from_dict(cls, data: Dict[str, Any]) -> 'CanonicalFile':
        """Reconstitute a :class:`.CanonicalFile` from a native dict."""
        return cls(
            modified=datetime.fromisoformat(data['modified']),  # type: ignore ; pylint: disable=no-member
            size_bytes=data['size_bytes'],
            content_type=ContentType(data['content_type']),
            filename=data['filename'],
            ref=URI(data['ref']),
            is_gzipped=data.get('is_gzipped', False)
        )

    @property
    def mime_type(self) -> str:
        """Convenience accessor for the MIME type of the file."""
        return self.content_type.mime_type

[docs]    def to_dict(self) -> Dict[str, Any]:
        """Generate a native dict from this :class:`.CanonicalFile`."""
        return {
            'modified': self.modified.isoformat(),
            'size_bytes': self.size_bytes,
            'content_type': self.content_type.value,
            'filename': self.filename,
            'ref': self.ref,
            'is_gzipped': self.is_gzipped
        }
Source code for arxiv.canonical.domain.file

arXiv Canonical Record

Navigation

Related Topics