Source code for arxiv.submission.services.compiler.compiler

"""
Integration with the compiler service API.

The compiler is responsible for building PDF, DVI, and other goodies from
LaTeX sources. In the submission UI, we specifically want to build a PDF so
that the user can preview their submission. Additionally, we want to show the
submitter the TeX log so that they can identify any potential problems with
their sources.
"""
from typing import Tuple, Optional, List, Union, NamedTuple, Mapping, Any
import json
import io
import re
from enum import Enum
from functools import wraps
from collections import defaultdict
from urllib.parse import urlparse, urlunparse, urlencode

import dateutil.parser

from werkzeug.datastructures import FileStorage
import requests

from arxiv.base import logging
from arxiv.integration.api import status, service

from ...domain.compilation import Compilation, CompilationProduct, \
    CompilationLog


logger = logging.getLogger(__name__)

PDF = Compilation.Format.PDF


[docs]class CompilationFailed(RuntimeError): """The compilation service failed to compile the source package."""
[docs]class Compiler(service.HTTPIntegration): """Encapsulates a connection with the compiler service.""" VERSION = "0.1" """Verison of the compiler service with which we are integrating.""" NAME = "arxiv-compiler" """Name of the compiler service with which we are integrating."""
[docs] class Meta: """Configuration for :class:`Classifier`.""" service_name = "compiler"
[docs] def is_available(self, **kwargs: Any) -> bool: """Check our connection to the compiler service.""" timeout: float = kwargs.get('timeout', 0.2) try: self.get_service_status(timeout=timeout) except Exception as e: logger.error('Encountered error calling compiler: %s', e) return False return True
def _parse_status_response(self, data: dict) -> Compilation: return Compilation( source_id=data['source_id'], checksum=data['checksum'], output_format=Compilation.Format(data['output_format']), status=Compilation.Status(data['status']), reason=Compilation.Reason(data.get('reason', None)), description=data.get('description', None), size_bytes=data.get('size_bytes', 0) ) def _parse_loc(self, headers: Mapping) -> str: return urlparse(headers['Location']).path
[docs] def get_service_status(self, timeout: float = 0.2) -> dict: """Get the status of the compiler service.""" return self.json('get', 'status', timeout=timeout)[0]
[docs] def compile(self, source_id: str, checksum: str, token: str, stamp_label: str, stamp_link: str, compiler: Optional[Compilation.SupportedCompiler] = None, output_format: Compilation.Format = PDF, force: bool = False) -> Compilation: """ Request compilation for an upload workspace. Unless ``force`` is ``True``, the compiler service will only attempt to compile a source ID + checksum + format combo once. If there is already a compilation underway or complete for the parameters in this request, the service will redirect to the corresponding status URI. Hence the data returned by this function may be from the response to the initial POST request, or from the status endpoint after being redirected. Parameters ---------- source_id : int Unique identifier for the upload workspace. checksum : str State up of the upload workspace. token : str The original (encrypted) auth token on the request. Used to perform subrequests to the file management service. stamp_label : str Label to use in PS/PDF stamp/watermark. Form is 'Identifier [Category Date]' Category and Date are optional. By default Date will be added by compiler. stamp_link : str Link (URI) to use in PS/PDF stamp/watermark. compiler : :class:`.Compiler` or None Name of the preferred compiler. output_format : :class:`.Format` Defaults to :attr:`.Format.PDF`. force : bool If True, compilation will be forced even if it has been attempted with these parameters previously. Default is ``False``. Returns ------- :class:`Compilation` The current state of the compilation. """ logger.debug("Requesting compilation for %s @ %s: %s", source_id, checksum, output_format) payload = {'source_id': source_id, 'checksum': checksum, 'stamp_label': stamp_label, 'stamp_link': stamp_link, 'format': output_format.value, 'force': force} endpoint = '/' expected_codes = [status.OK, status.ACCEPTED, status.SEE_OTHER, status.FOUND] data, _, headers = self.json('post', endpoint, token, json=payload, expected_code=expected_codes) return self._parse_status_response(data)
[docs] def get_status(self, source_id: str, checksum: str, token: str, output_format: Compilation.Format = PDF) -> Compilation: """ Get the status of a compilation. Parameters ---------- source_id : int Unique identifier for the upload workspace. checksum : str State up of the upload workspace. output_format : :class:`.Format` Defaults to :attr:`.Format.PDF`. Returns ------- :class:`Compilation` The current state of the compilation. """ endpoint = f'/{source_id}/{checksum}/{output_format.value}' data, _, headers = self.json('get', endpoint, token) return self._parse_status_response(data)
[docs] def compilation_is_complete(self, source_id: str, checksum: str, token: str, output_format: Compilation.Format) -> bool: """Check whether compilation has completed successfully.""" stat = self.get_status(source_id, checksum, token, output_format) if stat.status is Compilation.Status.SUCCEEDED: return True elif stat.status is Compilation.Status.FAILED: raise CompilationFailed('Compilation failed') return False
[docs] def get_product(self, source_id: str, checksum: str, token: str, output_format: Compilation.Format = PDF) \ -> CompilationProduct: """ Get the compilation product for an upload workspace, if it exists. Parameters ---------- source_id : int Unique identifier for the upload workspace. checksum : str State up of the upload workspace. output_format : :class:`.Format` Defaults to :attr:`.Format.PDF`. Returns ------- :class:`CompilationProduct` The compilation product itself. """ endpoint = f'/{source_id}/{checksum}/{output_format.value}/product' response = self.request('get', endpoint, token, stream=True) return CompilationProduct(content_type=output_format.content_type, stream=io.BytesIO(response.content))
[docs] def get_log(self, source_id: str, checksum: str, token: str, output_format: Compilation.Format = PDF) -> CompilationLog: """ Get the compilation log for an upload workspace, if it exists. Parameters ---------- source_id : int Unique identifier for the upload workspace. checksum : str State up of the upload workspace. output_format : :class:`.Format` Defaults to :attr:`.Format.PDF`. Returns ------- :class:`CompilationProduct` The compilation product itself. """ endpoint = f'/{source_id}/{checksum}/{output_format.value}/log' response = self.request('get', endpoint, token, stream=True) return CompilationLog(stream=io.BytesIO(response.content))
[docs]def get_task_id(source_id: str, checksum: str, output_format: Compilation.Format) -> str: """Generate a key for a /checksum/format combination.""" return f"{source_id}/{checksum}/{output_format.value}"
[docs]def split_task_id(task_id: str) -> Tuple[str, str, Compilation.Format]: source_id, checksum, format_value = task_id.split("/") return source_id, checksum, Compilation.Format(format_value)
[docs]class Download(object): """Wrapper around response content.""" def __init__(self, response: requests.Response) -> None: """Initialize with a :class:`requests.Response` object.""" self._response = response
[docs] def read(self, *args, **kwargs) -> bytes: """Read response content.""" return self._response.content