Source code for astroquery.splatalogue.core

# Licensed under a 3-clause BSD style license - see LICENSE.rst
# -*- coding: utf-8 -*-
"""
Module to search Splatalogue.net via splat, modeled loosely on
ftp://ftp.cv.nrao.edu/NRAO-staff/bkent/slap/idl/

:author: Adam Ginsburg <adam.g.ginsburg@gmail.com>
"""
import json
from astropy.table import Table
from astropy import units as u
from ..query import BaseQuery
from ..utils import async_to_sync, prepend_docstr_nosections
from . import conf
from . import load_species_table
from .utils import clean_column_headings

from astropy.utils.decorators import deprecated_renamed_argument

__all__ = ['Splatalogue', 'SplatalogueClass']

# example query of SPLATALOGUE directly:
# https://www.cv.nrao.edu/php/splat/c.php?sid%5B%5D=64&sid%5B%5D=108&calcIn=&data_version=v3.0&from=&to=&frequency_units=MHz&energy_range_from=&energy_range_to=&lill=on&tran=&submit=Search&no_atmospheric=no_atmospheric&no_potential=no_potential&no_probable=no_probable&include_only_nrao=include_only_nrao&displayLovas=displayLovas&displaySLAIM=displaySLAIM&displayJPL=displayJPL&displayCDMS=displayCDMS&displayToyaMA=displayToyaMA&displayOSU=displayOSU&displayRecomb=displayRecomb&displayLisa=displayLisa&displayRFI=displayRFI&ls1=ls1&ls5=ls5&el1=el1

# for backward-compatibility
# (As of March 5, this is incomplete, but is enough to make `minimize_table` work)
colname_mapping_feb2024 = {
    'Species': 'name',
    'Chemical Name': 'chemical_name',
    'Resolved QNs': 'resolved_QNs',
    'Freq-GHz(rest frame,redshifted)': 'orderedFreq',
    'Meas Freq-GHz(rest frame,redshifted)': 'measFreq',
    'Log<sub>10</sub> (A<sub>ij</sub>)': 'aij',
    'E_U (K)': 'upper_state_energy_K',
}



[docs]
@async_to_sync
class SplatalogueClass(BaseQuery):

    SLAP_URL = conf.slap_url
    QUERY_URL = conf.query_url
    TIMEOUT = conf.timeout
    LINES_LIMIT = conf.lines_limit
    versions = ('v1.0', 'v2.0', 'v3.0', 'vall')
    # global constant, not user-configurable
    ALL_LINE_LISTS = ('LovasNIST', 'SLAIM', 'JPL', 'CDMS', 'ToyaMA', 'OSU',
                      'TopModel', 'Recombination', 'RFI')
    VALID_LINE_STRENGTHS = ('CDMSJPL', 'SijMu2', 'Sij', 'Aij', 'LovasAST')
    VALID_ENERGY_LEVELS = {'One': 'EL_cm-1',
                           'Two': 'EL_K',
                           'Three': 'EU_cm-1',
                           'Four': 'EU_K'}
    VALID_ENERGY_TYPES = ('el_cm1', 'eu_cm1', 'eu_k', 'el_k')
    VALID_INTENSITY_TYPES = ('CDMS/JPL (log)', 'Sij-mu2', 'Aij (log)')

    def __init__(self, **kwargs):
        """
        Initialize a Splatalogue query class with default arguments set.
        Frequency specification is required for *every* query, but any
        default keyword arguments (see `query_lines`) can be overridden
        here.
        """
        super().__init__()
        self.data = self._default_kwargs()
        self.set_default_options(**kwargs)


[docs]
    def set_default_options(self, **kwargs):
        """
        Modify the default options.
        See `query_lines`
        """
        self.data.update(json.loads(self._parse_kwargs(**kwargs)['body']))



[docs]
    @deprecated_renamed_argument("restr", "species_regex", since="0.4.7")
    def get_species_ids(self, species_regex=None, *, reflags=0, recache=False):
        """
        Get a dictionary of "species" IDs, where species refers to the molecule
        name, mass, and chemical composition.

        Parameters
        ----------
        species_regex : str
            String to search for among the species names, if specified.
            The string will be compiled into a regular expression using the
            python `re` module.
        reflags : int
            Flags to pass to `re`.
        recache : bool
            Flag whether to refresh the local cache of species IDs

        Examples
        --------
        >>> import re
        >>> import pprint # unfortunate hack required for documentation testing
        >>> rslt = Splatalogue.get_species_ids(species_regex='Formaldehyde')
        >>> pprint.pprint(rslt)
        {'03023 H2CO - Formaldehyde': '194',
         '03106 H213CO - Formaldehyde': '324',
         '03107 HDCO - Formaldehyde': '109',
         '03108 H2C17O - Formaldehyde': '982',
         '03202 H2C18O - Formaldehyde': '155',
         '03203 D2CO - Formaldehyde': '94',
         '03204 HD13CO - Formaldehyde': '1219',
         '03301 D213CO - Formaldehyde': '1220',
         '03315 HDC18O - Formaldehyde': '21141',
         '03410 D2C18O - Formaldehyde': '21140'}
        >>> rslt = Splatalogue.get_species_ids(species_regex='H2CO')
        >>> pprint.pprint(rslt)
        {'03023 H2CO - Formaldehyde': '194',
         '03109 H2COH+ - Hydroxymethylium ion': '224',
         '04406 c-H2COCH2 - Ethylene Oxide': '21',
         '06029 NH2CONH2 - Urea': '21166',
         '07510 H2NCH2COOH - I v=0 - Glycine': '389',
         '07511 H2NCH2COOH - I v=1 - Glycine': '1312',
         '07512 H2NCH2COOH - I v=2 - Glycine': '1313',
         '07513 H2NCH2COOH - II v=0 - Glycine': '262',
         '07514 H2NCH2COOH - II v=1 - Glycine': '1314',
         '07515 H2NCH2COOH - II v=2 - Glycine': '1315',
         '07517 NH2CO2CH3 v=0 - Methyl Carbamate': '1334',
         '07518 NH2CO2CH3 v=1 - Methyl Carbamate': '1335',
         '08902 CH3CHNH2COOH - I - α-Alanine': '1321',
         '08903 CH3CHNH2COOH - II - α-Alanine': '1322'}
        >>> # note the whitespace, preventing H2CO within other
        >>> # more complex molecules
        >>> Splatalogue.get_species_ids(species_regex=' H2CO ')
        {'03023 H2CO - Formaldehyde': '194'}
        >>> Splatalogue.get_species_ids(species_regex=' h2co ', reflags=re.IGNORECASE)
        {'03023 H2CO - Formaldehyde': '194'}

        """
        # loading can be an expensive operation and should not change at
        # runtime: do it lazily
        if not hasattr(self, '_species_ids'):
            self._species_ids = load_species_table.species_lookuptable(recache=recache)

        if species_regex is not None:
            return self._species_ids.find(species_regex, flags=reflags)
        else:
            return self._species_ids


    def _default_kwargs(self):
        kwargs = dict(min_frequency=0 * u.GHz,
                      max_frequency=100 * u.THz,
                      chemical_name='',
                      line_lists=self.ALL_LINE_LISTS,
                      line_strengths=self.VALID_LINE_STRENGTHS,
                      energy_levels=self.VALID_ENERGY_LEVELS.keys(),
                      exclude=('potential', 'atmospheric', 'probable'),
                      version='v3.0',
                      only_NRAO_recommended=None,
                      export=True,
                      export_limit=self.LINES_LIMIT,
                      noHFS=False, displayHFS=False, show_unres_qn=False,
                      show_upper_degeneracy=False, show_molecule_tag=False,
                      show_qn_code=False, show_lovas_labref=False,
                      show_lovas_obsref=False, show_orderedfreq_only=False,
                      show_nrao_recommended=False,)
        return json.loads(self._parse_kwargs(**kwargs)['body'])

    def _parse_kwargs(self, *, min_frequency=None, max_frequency=None,
                      chemical_name=None,
                      chem_re_flags=0, energy_min=None, energy_max=None,
                      energy_type=None, intensity_lower_limit=None,
                      intensity_type=None, transition=None, version=None,
                      exclude=None,
                      only_astronomically_observed=None,
                      only_NRAO_recommended=None,
                      line_lists=None, line_strengths=None, energy_levels=None,
                      export=None, export_limit=None, noHFS=None,
                      displayHFS=None, show_unres_qn=None,
                      show_upper_degeneracy=None, show_molecule_tag=None,
                      show_qn_code=None, show_lovas_labref=None,
                      show_lovas_obsref=None, show_orderedfreq_only=None,
                      show_nrao_recommended=None,
                      parse_chemistry_locally=True,
                      export_start=0,
                      export_stop=250):
        """
        The Splatalogue service returns lines with rest frequencies in the
        range [min_frequency, max_frequency].

        Parameters
        ----------
        min_frequency : `astropy.units`
            Minimum frequency (or any spectral() equivalent)
        max_frequency : `astropy.units`
            Maximum frequency (or any spectral() equivalent)
        chemical_name : str
            Name of the chemical to search for. Treated as a regular
            expression.  An empty set ('', (), [], {}) will match *any*
            species. Examples:

            ``'H2CO'`` - 13 species have H2CO somewhere in their formula.

            ``'Formaldehyde'`` - There are 8 isotopologues of Formaldehyde
                                 (e.g., H213CO).

            ``'formaldehyde'`` - Thioformaldehyde,Cyanoformaldehyde.

            ``'formaldehyde',chem_re_flags=re.I`` - Formaldehyde,thioformaldehyde,
                                                    and Cyanoformaldehyde.

            ``' H2CO '`` - Just 1 species, H2CO. The spaces prevent including
                           others.
        parse_chemistry_locally : bool
            Attempt to determine the species ID #'s locally before sending the
            query?  This will prevent queries that have no matching species.
            It also performs a more flexible regular expression match to the
            species IDs.  See the examples in `get_species_ids`
        chem_re_flags : int
            See the `re` module
        energy_min : `None` or float
            Energy range to include.  See energy_type
        energy_max : `None` or float
            Energy range to include.  See energy_type
        energy_type : ``'el_cm1'``, ``'eu_cm1'``, ``'eu_k'``, ``'el_k'``
            Type of energy to restrict.  L/U for lower/upper state energy,
            cm/K for *inverse* cm, i.e. wavenumber, or K for Kelvin
        intensity_lower_limit : `None` or float
            Lower limit on the intensity.  See intensity_type
        intensity_type : `None` or ``'sij'``, ``'cdms_jpl'``, ``'aij'``
            The type of intensity on which to place a lower limit
        transition : str
            e.g. 1-0
        version : ``'v1.0'``, ``'v2.0'``, ``'v3.0'`` or ``'vall'``
            Data version
        exclude : list
            Types of lines to exclude.  Default is:
            (``'potential'``, ``'atmospheric'``, ``'probable'``)
            Can also exclude ``'known'``.
            To exclude nothing, use 'none', not the python object None, since
            the latter is meant to indicate 'leave as default'
        only_astronomically_observed : bool
            Show only astronomically observed species?
        only_NRAO_recommended : bool
            Show only NRAO recommended species?
        line_lists : list
            Options:
            Lovas, SLAIM, JPL, CDMS, ToyaMA, OSU, Recombination, RFI
        line_strengths : list
            * CDMS/JPL Intensity : ls1
            * Sij : ls3
            * Aij : ls4
            * Lovas/AST : ls5
        energy_levels : list
            * E_lower (cm^-1) : "One"
            * E_lower (K) : "Two"
            * E_upper (cm^-1) : "Three"
            * E_upper (K) : "Four"
        export : bool
            Set up arguments for the export server (as opposed to the HTML
            server)?
        export_limit : int
            Maximum number of lines in output file
        noHFS : bool
            No HFS Display
        displayHFS : bool
            Display HFS Intensity
        show_unres_qn : bool
            Display Unresolved Quantum Numbers
        show_upper_degeneracy : bool
            Display Upper State Degeneracy
        show_molecule_tag : bool
            Display Molecule Tag
        show_qn_code : bool
            Display Quantum Number Code
        show_lovas_labref : bool
            Display Lab Ref
        show_lovas_obsref : bool
            Display Obs Ref
        show_orderedfreq_only : bool
            Display Ordered Frequency ONLY
        show_nrao_recommended : bool
            Display NRAO Recommended Frequencies

        Returns
        -------
        payload : dict
            Dictionary of the parameters to send to the SPLAT page

        """

        payload = {"searchSpecies": "",
                   "speciesSelectBox": [],
                   "dataVersion": "v3.0",
                   "userInputFrequenciesFrom": [],
                   "userInputFrequenciesTo": [],
                   "userInputFrequenciesUnit": "GHz",
                   "frequencyRedshift": 0,
                   "energyFrom": 0,
                   "energyTo": 0,
                   "energyRangeType": "el_cm-1",
                   "lineIntensity": "None",
                   "lineIntensityLowerLimit": 0,
                   "excludeAtmosSpecies": False,
                   "excludePotentialInterstellarSpecies": False,
                   "excludeProbableInterstellarSpecies": False,
                   "excludeKnownASTSpecies": False,
                   "showOnlyAstronomicallyObservedTransitions": False,
                   "showOnlyNRAORecommendedFrequencies": False,
                   "lineListDisplayJPL": True,
                   "lineListDisplayCDMS": True,
                   "lineListDisplayLovasNIST": True,
                   "lineListDisplaySLAIM": True,
                   "lineListDisplayToyaMA": True,
                   "lineListDisplayOSU": True,
                   "lineListDisplayRecombination": True,
                   "lineListDisplayTopModel": True,
                   "lineListDisplayRFI": True,
                   "lineStrengthDisplayCDMSJPL": True,
                   "lineStrengthDisplaySijMu2": False,
                   "lineStrengthDisplaySij": False,
                   "lineStrengthDisplayAij": False,
                   "lineStrengthDisplayLovasAST": True,
                   "energyLevelOne": True,
                   "energyLevelTwo": False,
                   "energyLevelThree": False,
                   "energyLevelFour": False,
                   "displayObservedTransitions": False,
                   "displayG358MaserTransitions": False,
                   "displayObservationReference": False,
                   "displayObservationSource": False,
                   "displayTelescopeLovasNIST": False,
                   "frequencyErrorLimit": False,
                   "displayHFSIntensity": False,
                   "displayUnresolvedQuantumNumbers": False,
                   "displayUpperStateDegeneracy": False,
                   "displayMoleculeTag": False,
                   "displayQuantumNumberCode": False,
                   "displayLabRef": False,
                   "displayOrderedFrequencyOnly": False,
                   "displayNRAORecommendedFrequencies": False,
                   "displayUniqueSpeciesTag": False,
                   "displayUniqueLineIDNumber": False,
                   "exportType": "current",
                   "exportDelimiter": "tab",
                   "exportLimit": "allRecords",
                   "exportStart": 1,
                   "exportStop": 250}

        if min_frequency is not None and max_frequency is not None:
            # allow setting payload without having *ANY* valid frequencies set
            min_frequency = min_frequency.to(u.GHz, u.spectral())
            max_frequency = max_frequency.to(u.GHz, u.spectral())
            if min_frequency > max_frequency:
                min_frequency, max_frequency = max_frequency, min_frequency

            payload['userInputFrequenciesFrom'] = [min_frequency.value]
            payload['userInputFrequenciesTo'] = [max_frequency.value]

        if chemical_name in ('', {}, (), [], set(), None):
            # include all by default, or whatever default was set
            payload['speciesSelectBox'] = (self.data['speciesSelectBox']
                                           if hasattr(self, 'data')
                                           else [])
        elif chemical_name is not None:
            if parse_chemistry_locally:
                species_ids = self.get_species_ids(species_regex=chemical_name, reflags=chem_re_flags)
                if len(species_ids) == 0:
                    raise ValueError("No matching chemical species found.")
                payload['speciesSelectBox'] = list(species_ids.values())
            else:
                payload['searchSpecies'] = chemical_name

        if energy_min is not None:
            payload['energyFrom'] = float(energy_min)
        if energy_max is not None:
            payload['energyTo'] = float(energy_max)
        if energy_type is not None:
            if energy_type not in self.VALID_ENERGY_TYPES:
                raise ValueError(f'energy_type must be one of {self.VALID_ENERGY_TYPES}')
            payload['energyRangeType'] = energy_type

        if intensity_lower_limit is not None:
            if intensity_type is None:
                raise ValueError("If you specify an intensity lower limit, you must also specify its intensity_type.")
            elif intensity_type not in self.VALID_INTENSITY_TYPES:
                raise ValueError(f'intensity_type must be one of {self.VALID_INTENSITY_TYPES}')
            payload['lineIntensity'] = intensity_type
            payload['lineIntensityLowerLimit'] = intensity_lower_limit

        if version in self.versions:
            payload['dataVersion'] = version
        elif version is not None:
            raise ValueError("Invalid version specified.  Allowed versions "
                             "are {vers}".format(vers=str(self.versions)))

        if exclude is not None:
            if 'potential' in exclude:
                payload['excludePotentialInterstellarSpecies'] = True
            if 'atmospheric' in exclude:
                payload['excludeAtmosSpecies'] = True
            if 'probable' in exclude:
                payload['excludeProbableInterstellarSpecies'] = True
            if 'known' in exclude:
                payload['excludeKnownASTSpecies'] = True

        if only_astronomically_observed:
            payload['showOnlyAstronomicallyObservedTransitions'] = True
        if only_NRAO_recommended:
            payload['showOnlyNRAORecommendedFrequencies'] = True

        if line_lists is not None:
            if type(line_lists) not in (tuple, list):
                raise TypeError("Line lists should be a list of linelist "
                                "names.  See Splatalogue.ALL_LINE_LISTS")
            for L in self.ALL_LINE_LISTS:
                kwd = 'lineListDisplay' + L
                payload[kwd] = L in line_lists

        if line_strengths is not None:
            for LS in line_strengths:
                if LS not in self.VALID_LINE_STRENGTHS:
                    raise ValueError(f"Line strengths must be one of {self.VALID_LINE_STRENGTHS}")
                payload['lineStrengthDisplay' + LS] = True

        if energy_levels is not None:
            for EL in energy_levels:
                if EL not in self.VALID_ENERGY_LEVELS:
                    raise ValueError("Energy levels must be a number spelled out, i.e., "
                                     f"one of {self.VALID_ENERGY_LEVELS}")
                payload['energyLevel' + EL] = True

        for b in ("displayHFSIntensity", "displayUnresolvedQuantumNumbers",
                  "displayUpperStateDegeneracy", "displayMoleculeTag",
                  "displayQuantumNumberCode", "displayLabRef",
                  "displayOrderedFrequencyOnly", "displayNRAORecommendedFrequencies",
                  "displayUniqueLineIDNumber", "displayUniqueSpeciesTag"):
            if b in locals() and locals()[b]:
                payload[b] = True

        if export:
            payload['exportDelimiter'] = 'tab'  # or tab or comma
            payload['exportType'] = 'current'
            payload['exportStart'] = export_start
            payload['exportStop'] = export_stop

        if export_limit is not None:
            payload['exportLimit'] = export_limit
        else:
            payload['exportLimit'] = self.LINES_LIMIT

        payload = {'body': json.dumps(payload),
                   'headers': {'normalizedNames': {}, 'lazyUpdate': None}}

        return payload

    def _validate_kwargs(self, *, min_frequency=None, max_frequency=None,
                         **kwargs):
        """
        Check that min_frequency + max_frequency are specified
        """
        if min_frequency is None or max_frequency is None:
            raise ValueError("Must specify min/max frequency")


[docs]
    @prepend_docstr_nosections("\n" + _parse_kwargs.__doc__)
    def query_lines_async(self, min_frequency=None, max_frequency=None, *,
                          cache=True, **kwargs):
        """

        Returns
        -------
        response : `requests.Response`
            The response of the HTTP request.

        """
        # have to chomp this kwd here...
        get_query_payload = kwargs.pop('get_query_payload', False)

        self._validate_kwargs(min_frequency=min_frequency,
                              max_frequency=max_frequency, **kwargs)

        data_payload = self._parse_kwargs(min_frequency=min_frequency,
                                          max_frequency=max_frequency,
                                          **kwargs)
        if hasattr(self, 'data'):
            body = self.data.copy()
        else:
            body = self._default_kwargs()

        body.update(json.loads(
            self._parse_kwargs(min_frequency=min_frequency,
                               max_frequency=max_frequency, **kwargs)['body']))
        data_payload['body'] = json.dumps(body)

        if get_query_payload:
            return data_payload

        response = self._request(method='POST',
                                 url=self.QUERY_URL,
                                 json=data_payload,
                                 timeout=self.TIMEOUT,
                                 cache=cache)

        self.response = response

        return response


    def _parse_result(self, response, *, verbose=False):
        """
        Parse a response into an `~astropy.table.Table`

        Parameters
        ----------
        verbose : bool
            Has no effect; kept for API compatibility
        """

        # these are metadata items not intended to be part of the table
        meta_columns = ['orderFreqColName', 'measFreqColName']
        meta = {}

        jdat = response.json()

        result = Table([x for x in jdat if x['species_id'] is not None])

        for key in meta_columns:
            if key in result.colnames:
                meta[key] = result[key][0]
                del result[key]

        result.meta = meta

        return result


[docs]
    def get_fixed_table(self, *, columns=None):
        """
        Convenience function to get the table with html column names made human
        readable.  It returns only the columns identified with the ``columns``
        keyword.  See the source for the defaults.
        """
        if columns is None:
            columns = ('Species', 'Chemical Name', 'Resolved QNs',
                       'Freq-GHz(rest frame,redshifted)',
                       'Meas Freq-GHz(rest frame,redshifted)',
                       'Log<sub>10</sub> (A<sub>ij</sub>)',
                       'E_U (K)')
        table = clean_column_headings(self.table[columns])
        return table




Splatalogue = SplatalogueClass()
Navigation

Source code for astroquery.splatalogue.core