Source code for arxiv.submission.services.classifier.classifier
"""Classifier service integration."""
from typing import Tuple, List, Any, Union, NamedTuple, Optional
from math import exp, log
from functools import wraps
from arxiv.base import logging
from arxiv.taxonomy import Category
from arxiv.integration.api import status, service
logger = logging.getLogger(__name__)
[docs]class Flag(NamedTuple):
"""General-purpose QA flag."""
key: str
value: Union[int, str, dict]
[docs]class Suggestion(NamedTuple):
"""A category suggested by the classifier."""
category: Category
probability: int
[docs]class Counts(NamedTuple):
"""Various counts of paper content."""
chars: int
pages: int
stops: int
words: int
[docs]class Classifier(service.HTTPIntegration):
"""Represents an interface to the classifier service."""
VERSION = '0.0'
SERVICE = 'classic'
ClassifierResponse = Tuple[List[Suggestion], List[Flag], Optional[Counts]]
[docs] def is_available(self, **kwargs: Any) -> bool:
"""Check our connection to the classifier service."""
timeout: float = kwargs.get('timeout', 0.2)
try:
self.classify(b'ruok?', timeout=timeout)
except Exception as e:
logger.error('Encountered error calling classifier: %s', e)
return False
return True
[docs] @classmethod
def probability(cls, logodds: float) -> float:
"""Convert log odds to a probability."""
return exp(logodds)/(1 + exp(logodds))
def _counts(self, data: dict) -> Optional[Counts]:
"""Parse counts from the response data."""
counts: Optional[Counts] = None
if 'counts' in data:
counts = Counts(**data['counts'])
return counts
def _flags(self, data: dict) -> List[Flag]:
"""Parse flags from the response data."""
return [
Flag(key, value) for key, value in data.get('flags', {}).items()
]
def _suggestions(self, data: dict) -> List[Suggestion]:
"""Parse classification suggestions from the response data."""
return [Suggestion(category=Category(datum['category']),
probability=self.probability(datum['logodds']))
for datum in data['classifier']]
[docs] def classify(self, content: bytes, timeout: float = 1.) \
-> ClassifierResponse:
"""
Make a classification request to the classifier service.
Parameters
----------
content : bytes
Raw text content from an e-print.
Returns
-------
list
A list of classifications.
list
A list of QA flags.
:class:`Counts` or None
Feature counts, if provided.
"""
_path = '/classifier/' # TODO: this MUST be configurable.
data, _, _ = self.json('post', _path, data=content, timeout=timeout)
return self._suggestions(data), self._flags(data), self._counts(data)