Source code for search.controllers.advanced.forms

"""Provides form rendering and validation for the advanced search feature."""

import calendar
import re
from datetime import date, datetime
from typing import Callable, Optional, List, Any

from wtforms import Form, BooleanField, StringField, SelectField, validators, \
    FormField, SelectMultipleField, DateField, ValidationError, FieldList, \

from wtforms.fields import HiddenField
from wtforms import widgets

from arxiv import taxonomy
from search.domain import DateRange, AdvancedQuery
from search.controllers.util import does_not_start_with_wildcard, \
                                    strip_white_space, has_balanced_quotes

[docs]class MultiFormatDateField(DateField): """Extends :class:`.DateField` to support multiple date formats.""" def __init__(self, label: Optional[str] = None, validators: Optional[List[Callable]] = None, formats: List[str] = ['%Y-%m-%d %H:%M:%S'], default_upper_bound: bool = False, **kwargs: Any) -> None: """Override to change ``format: str`` to ``formats: List[str]``.""" super(DateField, self).__init__(label, validators, **kwargs) self.formats = formats self.default_upper_bound = default_upper_bound def _value(self) -> str: if self.raw_data: return ' '.join(self.raw_data) else: return and[0]) or ''
[docs] def process_formdata(self, valuelist: List[str]) -> None: """Try date formats until one sticks, or raise ValueError.""" if valuelist: date_str = ' '.join(valuelist) Optional[date] for fmt in self.formats: try: adj_date = datetime.strptime(date_str, fmt).date() if self.default_upper_bound: if not'%[Bbm]', fmt): # when month does not appear in matching format adj_date = adj_date.replace(month=12, day=31) elif not'%d', fmt): # when day does not appear in matching format last_day = calendar.monthrange(adj_date.year, adj_date.month)[1] adj_date = adj_date.replace(day=last_day) = adj_date return except ValueError: continue = None raise ValueError(self.gettext('Not a valid date value'))
[docs]class FieldForm(Form): """Subform for query parts on specific fields.""" # pylint: disable=too-few-public-methods term = StringField("Search term...", filters=[strip_white_space], validators=[does_not_start_with_wildcard, has_balanced_quotes]) operator = SelectField("Operator", choices=[ ('AND', 'AND'), ('OR', 'OR'), ('NOT', 'NOT') ], default='AND') field = SelectField("Field", choices=AdvancedQuery.SUPPORTED_FIELDS)
[docs]class ClassificationForm(Form): """Subform for selecting a classification to (disjunctively) filter by.""" # pylint: disable=too-few-public-methods # TODO: this should not be hard-coded! # # Map arXiv archives to fields on this form. Ideally we would autogenerate # form fields based on the arXiv taxonomy, but this can't easily happen # until we replace the classic-style advanced interface with faceted # search. ARCHIVES = [ ('cs', 'computer_science'), ('econ', 'economics'), ('eess', 'eess'), ('math', 'mathematics'), ('physics', 'physics'), ('q-bio', 'q_biology'), ('q-fin', 'q_finance'), ('stat', 'statistics') ] PHYSICS_ARCHIVES = [('all', 'all')] + \ [(archive, archive) for archive, description in taxonomy.ARCHIVES_ACTIVE.items() if description['in_group'] == 'grp_physics'] INCLUDE_CROSS_LIST = 'include' EXCLUDE_CROSS_LIST = 'exclude' computer_science = BooleanField('Computer Science (cs)') economics = BooleanField('Economics (econ)') eess = BooleanField('Electrical Engineering and Systems Science (eess)') mathematics = BooleanField('Mathematics (math)') physics = BooleanField('Physics') physics_archives = SelectField(choices=PHYSICS_ARCHIVES, default='all') q_biology = BooleanField('Quantitative Biology (q-bio)') q_finance = BooleanField('Quantitative Finance (q-fin)') statistics = BooleanField('Statistics (stat)') include_cross_list = RadioField('Include cross-list', choices=[ (INCLUDE_CROSS_LIST, 'Include cross-listed papers'), (EXCLUDE_CROSS_LIST, 'Exclude cross-listed papers') ], default=INCLUDE_CROSS_LIST)
[docs]def yearInBounds(form: Form, field: DateField) -> None: """May not be prior to 1991, or later than the current year.""" if is None: return None start_of_time = date(year=1991, month=1, day=1) upper_limit = + 1) if < start_of_time or > upper_limit: raise ValidationError('Not a valid publication year')
[docs]class DateForm(Form): """Subform with options for limiting results by publication date.""" filter_by = RadioField( 'Filter by', choices=[ ('all_dates', 'All dates'), ('past_12', 'Past 12 months'), ('specific_year', 'Specific year'), ('date_range', 'Date range') ], default='all_dates' ) year = DateField( 'Year', format='%Y', validators=[validators.Optional(), yearInBounds] ) from_date = MultiFormatDateField( 'From', validators=[validators.Optional(), yearInBounds], formats=['%Y-%m-%d', '%Y-%m', '%Y'] ) to_date = MultiFormatDateField( 'to', validators=[validators.Optional(), yearInBounds], formats=['%Y-%m-%d', '%Y-%m', '%Y'], default_upper_bound=True ) SUBMITTED_ORIGINAL = DateRange.SUBMITTED_ORIGINAL SUBMITTED_CURRENT = DateRange.SUBMITTED_CURRENT ANNOUNCED = DateRange.ANNOUNCED DATE_TYPE_CHOICES = [ (SUBMITTED_CURRENT, 'Submission date (most recent)'), (SUBMITTED_ORIGINAL, 'Submission date (original)'), (ANNOUNCED, 'Announcement date'), ] date_type = RadioField('Apply to', choices=DATE_TYPE_CHOICES, default=SUBMITTED_CURRENT, description="You may filter on either submission" " date or announcement date. Note that announcement" " date supports only year and month granularity.")
[docs] def validate_filter_by(self, field: RadioField) -> None: """Ensure that related fields are filled.""" if == 'specific_year' and not'year'): raise ValidationError('Please select a year') elif == 'date_range': if not'from_date') and not'to_date'): raise ValidationError('Must select start and/or end date(s)') if'from_date') and'to_date'): if'from_date') >='to_date'): raise ValidationError( 'End date must be later than start date' )
[docs]class AdvancedSearchForm(Form): """Replacement for the 'classic' advanced search interface.""" # pylint: disable=too-few-public-methods advanced = HiddenField('Advanced', default=1) """Used to indicate whether the form should be shown.""" terms = FieldList(FormField(FieldForm), min_entries=1) classification = FormField(ClassificationForm) date = FormField(DateForm) size = SelectField('results per page', default=50, choices=[ ('25', '25'), ('50', '50'), ('100', '100'), ('200', '200') ]) order = SelectField('Sort results by', choices=[ ('-announced_date_first', 'Announcement date (newest first)'), ('announced_date_first', 'Announcement date (oldest first)'), ('-submitted_date', 'Submission date (newest first)'), ('submitted_date', 'Submission date (oldest first)'), ('', 'Relevance') ], validators=[validators.Optional()], default='-announced_date_first') include_older_versions = BooleanField('Include older versions of papers') HIDE_ABSTRACTS = 'hide' SHOW_ABSTRACTS = 'show' abstracts = RadioField('Abstracts', choices=[ (SHOW_ABSTRACTS, 'Show abstracts'), (HIDE_ABSTRACTS, 'Hide abstracts') ], default=SHOW_ABSTRACTS)