Source code for arxiv.submission.services.classic.load

"""Supports loading :class:`.Submission` directly from classic data."""

from typing import List, Optional
import copy
from itertools import groupby

from arxiv.license import LICENSES
from arxiv.base import logging

from ... import domain
from . import models
from .patch import patch_withdrawal, patch_jref, patch_cross, patch_hold

logger = logging.getLogger(__name__)
logger.propagate = False


[docs]def load(rows: List[models.Submission]) -> Optional[domain.Submission]: """ Load a submission entirely from its classic database rows. Parameters ---------- rows : list Items are :class:`.models.Submission` rows loaded from the classic database belonging to a single arXiv e-print/submission group. Returns ------- :class:`.domain.Submission` or ``None`` Aggregated submission object (with ``.versions``). If there is no representation (e.g. all rows are deleted), returns ``None``. """ versions: List[domain.Submission] = [] submission_id: Optional[str] = None # We want to work within versions, and (secondarily) in order of creation # time. rows = sorted(rows, key=lambda o: o.version) logger.debug('Load from rows %s', [r.submission_id for r in rows]) for version, version_rows in groupby(rows, key=lambda o: o.version): # Creation time isn't all that precise in the classic database, so # we'll use submission ID instead. version_rows = sorted([v for v in version_rows], key=lambda o: o.submission_id) logger.debug('Version %s: %s', version, version_rows) # We use the original ID to track the entire lifecycle of the # submission in NG. if version == 1: submission_id = version_rows[0].submission_id logger.debug('Submission ID: %s', submission_id) # Find the creation row. There may be some false starts that have been # deleted, so we need to advance to the first non-deleted 'new' or # 'replacement' row. version_submission: Optional[domain.Submission] = None while version_submission is None: try: row = version_rows.pop(0) except IndexError: break if row.is_new_version() and \ (row.type == row.NEW_SUBMISSION or not row.is_deleted()): # Get the initial state of the version. version_submission = to_submission(row, submission_id) logger.debug('Got initial state: %s', version_submission) if version_submission is None: logger.debug('Nothing to work with for this version') continue # If this is not the first version, carry forward any requests. if len(versions) > 0: logger.debug('Bring user_requests forward from last version') version_submission.user_requests.update(versions[-1].user_requests) for row in version_rows: # Remaining rows, since we popped the others. logger.debug('Handle subsequent row: %s', row) # We are treating JREF submissions as though there is no approval # process; so we can just ignore deleted JREF rows. if row.is_jref() and not row.is_deleted(): logger.debug('JREF row') # This should update doi, journal_ref, report_num. version_submission = patch_jref(version_submission, row) # For withdrawals and cross-lists, we want to get data from # deleted rows since we keep track of all requests in the NG # submission. elif row.is_withdrawal(): logger.debug('Withdrawal row') # This should update the reason_for_withdrawal (if applied), # and add a WithdrawalRequest to user_requests. version_submission = patch_withdrawal(version_submission, row) elif row.is_crosslist(): logger.debug('Crosslist row') # This should update the secondary classifications (if applied) # and add a CrossListClassificationRequest to user_requests. version_submission = patch_cross(version_submission, row) # We want hold information represented as a Hold on the submission # object, not just the status. if version_submission.is_on_hold: version_submission = patch_hold(version_submission, row) versions.append(version_submission) if not versions: return submission = copy.deepcopy(versions[-1]) submission.versions = [ver for ver in versions if ver and ver.is_announced] return submission
[docs]def to_submission(row: models.Submission, submission_id: Optional[int] = None) -> domain.Submission: """ Generate a representation of submission state from a DB instance. Parameters ---------- row : :class:`.models.Submission` Database row representing a :class:`.domain.submission.Submission`. submission_id : int or None If provided the database value is overridden when setting :attr:`domain.Submission.submission_id`. Returns ------- :class:`.domain.submission.Submission` """ status = status_from_classic(row.status) primary = row.primary_classification if row.submitter is None: submitter = domain.User(native_id=row.submitter_id, email=row.submitter_email) else: submitter = row.get_submitter() if submission_id is None: submission_id = row.submission_id license: Optional[domain.License] = None if row.license: label = LICENSES[row.license]['label'] license = domain.License(uri=row.license, name=label) primary_clsn: Optional[domain.Classification] = None if primary and primary.category: _category = domain.Category(primary.category) primary_clsn = domain.Classification(category=_category) secondary_clsn = [ domain.Classification(category=domain.Category(db_cat.category)) for db_cat in row.categories if not db_cat.is_primary ] content: Optional[domain.SubmissionContent] = None if row.package: if row.package.startswith('fm://'): identifier, checksum = row.package.split('://', 1)[1].split('@', 1) else: identifier = row.package checksum = "" source_format = domain.SubmissionContent.Format(row.source_format) content = domain.SubmissionContent(identifier=identifier, compressed_size=0, uncompressed_size=row.source_size, checksum=checksum, source_format=source_format) submission = domain.Submission( submission_id=submission_id, creator=submitter, owner=submitter, status=status, created=row.get_created(), updated=row.get_updated(), source_content=content, submitter_is_author=bool(row.is_author), submitter_accepts_policy=bool(row.agree_policy), submitter_contact_verified=bool(row.userinfo), submitter_compiled_preview=not bool(row.must_process), submitter_confirmed_preview=bool(row.viewed), metadata=domain.SubmissionMetadata(title=row.title, abstract=row.abstract, comments=row.comments, report_num=row.report_num, doi=row.doi, msc_class=row.msc_class, acm_class=row.acm_class, journal_ref=row.journal_ref), license=license, primary_classification=primary_clsn, secondary_classification=secondary_clsn, arxiv_id=row.doc_paper_id, version=row.version ) if row.sticky_status == row.ON_HOLD or row.status == row.ON_HOLD: submission = patch_hold(submission, row) elif row.is_withdrawal(): submission = patch_withdrawal(submission, row) elif row.is_crosslist(): submission = patch_cross(submission, row) return submission
[docs]def status_from_classic(classic_status: str) -> str: """Map classic status codes to domain submission status.""" return STATUS_MAP.get(classic_status)
# Map classic status to Submission domain status. STATUS_MAP = { models.Submission.NOT_SUBMITTED: domain.Submission.WORKING, models.Submission.SUBMITTED: domain.Submission.SUBMITTED, models.Submission.ON_HOLD: domain.Submission.SUBMITTED, models.Submission.NEXT_PUBLISH_DAY: domain.Submission.SCHEDULED, models.Submission.PROCESSING: domain.Submission.SCHEDULED, models.Submission.PROCESSING_SUBMISSION: domain.Submission.SCHEDULED, models.Submission.NEEDS_EMAIL: domain.Submission.SCHEDULED, models.Submission.ANNOUNCED: domain.Submission.ANNOUNCED, models.Submission.DELETED_ANNOUNCED: domain.Submission.ANNOUNCED, models.Submission.USER_DELETED: domain.Submission.DELETED, models.Submission.DELETED_EXPIRED: domain.Submission.DELETED, models.Submission.DELETED_ON_HOLD: domain.Submission.DELETED, models.Submission.DELETED_PROCESSING: domain.Submission.DELETED, models.Submission.DELETED_REMOVED: domain.Submission.DELETED, models.Submission.DELETED_USER_EXPIRED: domain.Submission.DELETED, models.Submission.ERROR_STATE: domain.Submission.ERROR }