Source code for astroquery.linelists.cdms.core

# Licensed under a 3-clause BSD style license - see LICENSE.rst
import numpy as np
import requests
import os

from bs4 import BeautifulSoup
import astropy.units as u
from astropy.io import ascii
from astroquery.query import BaseQuery
from astroquery.utils import async_to_sync
# import configurable items declared in __init__.py
from astroquery.linelists.cdms import conf
from astroquery.exceptions import InvalidQueryError, EmptyResponseError

import re
import string

__all__ = ['CDMS', 'CDMSClass']


def data_path(filename):
    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    return os.path.join(data_dir, filename)


[docs] @async_to_sync class CDMSClass(BaseQuery): # use the Configuration Items imported from __init__.py URL = conf.server TIMEOUT = conf.timeout
[docs] def query_lines_async(self, min_frequency, max_frequency, *, min_strength=-500, molecule='All', temperature_for_intensity=300, flags=0, parse_name_locally=False, get_query_payload=False, cache=True): """ Creates an HTTP POST request based on the desired parameters and returns a response. Parameters ---------- min_frequency : `astropy.units.Quantity` or None Minimum frequency (or any spectral() equivalent). ``None`` can be interpreted as zero. max_frequency : `astropy.units.Quantity` or None Maximum frequency (or any spectral() equivalent). ``None`` can be interpreted as infinite. min_strength : int, optional Minimum strength in catalog units, the default is -500 molecule : list, string of regex if parse_name_locally=True, optional Identifiers of the molecules to search for. If this parameter is not provided the search will match any species. Default is 'All'. As a first pass, the molecule will be searched for with a direct string match. If no string match is found, a regular expression match is attempted. Note that if the molecule name regex contains parentheses, they must be escaped. For example, 'H2C(CN)2.*' must be specified as 'H2C\\(CN\\)2.*' (but because of the first-attempt full-string matching, 'H2C(CN)2' will match that molecule successfully). temperature_for_intensity : float The temperature to use when computing the intensity Smu^2. Set to 300 by default for compatibility with JPL and the native catalog format, which defaults to 300. ** If temperature is set to zero, the return value in this column will be the Einstein A value ** flags : int, optional Regular expression flags. Default is set to 0 parse_name_locally : bool, optional When set to True it allows the method to parse through catdir.cat (see `get_species_table`) in order to match the regex inputted in the molecule parameter and request the corresponding tags of the matches instead. Default is set to False get_query_payload : bool, optional When set to `True` the method should return the HTTP request parameters as a dict. Default value is set to False cache : bool Defaults to True. If set overrides global caching behavior. See :ref:`caching documentation <astroquery_cache>`. Returns ------- response : `requests.Response` The HTTP response returned from the service. Examples -------- >>> table = CDMS.query_lines(min_frequency=100*u.GHz, ... max_frequency=110*u.GHz, ... min_strength=-500, ... molecule="018505 H2O+") # doctest: +REMOTE_DATA >>> print(table) # doctest: +SKIP FREQ ERR LGINT DR ELO GUP TAG QNFMT Ju Ku vu Jl Kl vl F name MHz MHz MHz nm2 1 / cm ----------- ----- ------- --- -------- --- ----- ----- --- --- --- --- --- --- ----------- ---- 103614.4941 2.237 -4.1826 3 202.8941 8 18505 2356 4 1 4 4 0 4 3 2 1 3 0 3 H2O+ 107814.8763 148.6 -5.4438 3 878.1191 12 18505 2356 6 5 1 7 1 6 7 4 4 8 1 7 H2O+ 107822.3481 148.6 -5.3846 3 878.1178 14 18505 2356 6 5 1 7 1 7 7 4 4 8 1 8 H2O+ 107830.1216 148.6 -5.3256 3 878.1164 16 18505 2356 6 5 1 7 1 8 7 4 4 8 1 9 H2O+ """ # first initialize the dictionary of HTTP request parameters payload = dict() if min_frequency is not None and max_frequency is not None: # allow setting payload without having *ANY* valid frequencies set min_frequency = min_frequency.to(u.GHz, u.spectral()) max_frequency = max_frequency.to(u.GHz, u.spectral()) if min_frequency > max_frequency: raise InvalidQueryError("min_frequency must be less than max_frequency") payload['MinNu'] = min_frequency.value payload['MaxNu'] = max_frequency.value payload['UnitNu'] = 'GHz' payload['StrLim'] = min_strength payload['temp'] = temperature_for_intensity payload['logscale'] = 'yes' payload['mol_sort_query'] = 'tag' payload['sort'] = 'frequency' payload['output'] = 'text' payload['but_action'] = 'Submit' # changes interpretation of query self._last_query_temperature = temperature_for_intensity if molecule is not None: if parse_name_locally: self.lookup_ids = build_lookup() luts = self.lookup_ids.find(molecule, flags) if len(luts) == 0: raise InvalidQueryError('No matching species found. Please ' 'refine your search or read the Docs ' 'for pointers on how to search.') payload['Molecules'] = tuple(f"{val:06d} {key}" for key, val in luts.items())[0] else: payload['Molecules'] = molecule payload = list(payload.items()) if get_query_payload: return payload # BaseQuery classes come with a _request method that includes a # built-in caching system response = self._request(method='POST', url=self.URL, data=payload, timeout=self.TIMEOUT, cache=cache) response.raise_for_status() soup = BeautifulSoup(response.text, 'html.parser') ok = False urls = [x.attrs['src'] for x in soup.findAll('frame',)] for url in urls: if 'tab' in url and 'head' not in url: ok = True break if not ok: raise EmptyResponseError("Did not find table in response") baseurl = self.URL.split('cgi-bin')[0] fullurl = f'{baseurl}/{url}' response2 = self._request(method='GET', url=fullurl, timeout=self.TIMEOUT, cache=cache) return response2
def _parse_result(self, response, *, verbose=False): """ Parse a response into an `~astropy.table.Table` The catalog data files are composed of fixed-width card images, with one card image per spectral line. The format of each card image is similar to the JPL version: FREQ, ERR, LGINT, DR, ELO, GUP, TAG, QNFMT, QN', QN" (F13.4,F8.4, F8.4, I2,F10.4, I3, I7, I4, 6I2, 6I2) but the formats are somewhat different and are encoded below. The first several entries are the same, but more detail is appended at the end of the line FREQ: Frequency of the line in MHz. ERR: Estimated or experimental error of FREQ in MHz. LGINT: Base 10 logarithm of the integrated intensity in units of nm^2 MHz at 300 K. DR: Degrees of freedom in the rotational partition function (0 for atoms, 2 for linear molecules, and 3 for nonlinear molecules). ELO: Lower state energy in cm^{-1} relative to the ground state. GUP: Upper state degeneracy. MOLWT: The first half of the molecular weight tag, which is the mass in atomic mass units (Daltons). TAG: Species tag or molecular identifier. This only includes the last 3 digits of the CDMS tag A negative value of MOLWT flags that the line frequency has been measured in the laboratory. We record this boolean in the 'Lab' column. ERR is the reported experimental error. QNFMT: Identifies the format of the quantum numbers Ju/Ku/vu and Jl/Kl/vl are the upper/lower QNs F: the hyperfine lines name: molecule name The full detailed description is here: https://cdms.astro.uni-koeln.de/classic/predictions/description.html#description """ if 'Zero lines were found' in response.text: raise EmptyResponseError(f"Response was empty; message was '{response.text}'.") soup = BeautifulSoup(response.text, 'html.parser') text = soup.find('pre').text starts = {'FREQ': 0, 'ERR': 14, 'LGINT': 25, 'DR': 36, 'ELO': 38, 'GUP': 47, 'MOLWT': 51, 'TAG': 54, 'QNFMT': 58, 'Ju': 61, 'Ku': 63, 'vu': 65, 'F1u': 67, 'F2u': 69, 'F3u': 71, 'Jl': 73, 'Kl': 75, 'vl': 77, 'F1l': 79, 'F2l': 81, 'F3l': 83, 'name': 89} result = ascii.read(text, header_start=None, data_start=0, comment=r'THIS|^\s{12,14}\d{4,6}.*', names=list(starts.keys()), col_starts=list(starts.values()), format='fixed_width', fast_reader=False) result['FREQ'].unit = u.MHz result['ERR'].unit = u.MHz result['Lab'] = result['MOLWT'] < 0 result['MOLWT'] = np.abs(result['MOLWT']) result['MOLWT'].unit = u.Da fix_keys = ['GUP'] for suf in 'ul': for qn in ('J', 'v', 'K', 'F1', 'F2', 'F3'): qnind = qn+suf fix_keys.append(qnind) for key in fix_keys: if result[key].dtype != int: intcol = np.array(list(map(parse_letternumber, result[key])), dtype=int) result[key] = intcol # if there is a crash at this step, something went wrong with the query # and the _last_query_temperature was not set. This shouldn't ever # happen, but, well, I anticipate it will. if self._last_query_temperature == 0: result.rename_column('LGINT', 'LGAIJ') result['LGAIJ'].unit = u.s**-1 else: result['LGINT'].unit = u.nm**2 * u.MHz result['ELO'].unit = u.cm**(-1) return result
[docs] def get_species_table(self, *, catfile='catdir.cat', use_cached=True, catfile_url=conf.catfile_url): """ A directory of the catalog is found in a file called 'catdir.cat.' The table is derived from https://cdms.astro.uni-koeln.de/classic/entries/partition_function.html Parameters ---------- catfile : str, name of file, default 'catdir.cat' The catalog file, installed locally along with the package Returns ------- Table: `~astropy.table.Table` | tag : The species tag or molecular identifier. | molecule : An ASCII name for the species. | #line : The number of lines in the catalog. | lg(Q(n)) : A seven-element vector containing the base 10 logarithm of the partition function. """ if use_cached: result = ascii.read(data_path(catfile), format='fixed_width', delimiter='|') else: result = retrieve_catfile(catfile_url) meta = {'lg(Q(1000))': 1000.0, 'lg(Q(500))': 500.0, 'lg(Q(300))': 300.0, 'lg(Q(225))': 225.0, 'lg(Q(150))': 150.0, 'lg(Q(75))': 75.0, 'lg(Q(37.5))': 37.5, 'lg(Q(18.75))': 18.75, 'lg(Q(9.375))': 9.375, 'lg(Q(5.000))': 5.0, 'lg(Q(2.725))': 2.725} def tryfloat(x): try: return float(x) except ValueError: return np.nan for key in meta: result[key].meta = {'Temperature (K)': meta[key]} result[key] = np.array([tryfloat(val) for val in result[key]]) result.meta = {'Temperature (K)': [1000., 500., 300., 225., 150., 75., 37.5, 18.75, 9.375, 5., 2.725]} return result
CDMS = CDMSClass() def parse_letternumber(st): """ Parse CDMS's two-letter QNs From the CDMS docs: "Exactly two characters are available for each quantum number. Therefore, half integer quanta are rounded up ! In addition, capital letters are used to indicate quantum numbers larger than 99. E. g. A0 is 100, Z9 is 359. Small types are used to signal corresponding negative quantum numbers." """ asc = string.ascii_lowercase ASC = string.ascii_uppercase newst = ''.join(['-' + str(asc.index(x)+10) if x in asc else str(ASC.index(x)+10) if x in ASC else x for x in st]) return int(newst) class Lookuptable(dict): def find(self, st, flags): """ Search dictionary keys for a regex match to string s Parameters ---------- s : str String to compile as a regular expression Can be entered non-specific for broader results ('H2O' yields 'H2O' but will also yield 'HCCCH2OD') or as the specific desired regular expression for catered results, for example: ('H2O$' yields only 'H2O') flags : int Regular expression flags. Returns ------- The list of values corresponding to the matches """ out = {} for kk, vv in self.items(): # note that the string-match attempt here differs from the jplspec # implementation match = (st in kk) or re.search(st, str(kk), flags=flags) if match: out[kk] = vv return out def build_lookup(): result = CDMS.get_species_table() keys = list(result['molecule'][:]) # convert NAME column to list values = list(result['tag'][:]) # convert TAG column to list dictionary = dict(zip(keys, values)) # make k,v dictionary lookuptable = Lookuptable(dictionary) # apply the class above return lookuptable def retrieve_catfile(url='https://cdms.astro.uni-koeln.de/classic/entries/partition_function.html'): """ Simple retrieve index function """ response = requests.get(url) response.raise_for_status() tbl = ascii.read(response.text, header_start=None, data_start=15, data_end=-5, names=['tag', 'molecule', '#lines', 'lg(Q(1000))', 'lg(Q(500))', 'lg(Q(300))', 'lg(Q(225))', 'lg(Q(150))', 'lg(Q(75))', 'lg(Q(37.5))', 'lg(Q(18.75))', 'lg(Q(9.375))', 'lg(Q(5.000))', 'lg(Q(2.725))'], col_starts=(0, 7, 34, 41, 53, 66, 79, 92, 106, 117, 131, 145, 159, 173), format='fixed_width', delimiter=' ') return tbl