Source code for sndata.sdss._sako18

#!/usr/bin/env python3
# -*- coding: UTF-8 -*-

"""This module defines the SDSS Sako18 API for photometric data"""

import tarfile
from itertools import product
from typing import List, Union
from urllib.parse import urljoin

import numpy as np
from astropy.table import Column, Table

from ..base_classes import DefaultParser, PhotometricRelease
from ..exceptions import InvalidObjId
from ..utils import downloads, unit_conversion


@np.vectorize
def _construct_band_name(filter_id: int, ccd_id: int) -> str:
    """Return the sncosmo band name given filter and CCD ID

    Args:
        filter_id: Filter index 1 through 5 for 'ugriz'
        ccd_id: Column number 1 through 6

    Args:
        The name of the filter registered with sncosmo
    """

    return f'sdss_sako18_{"ugriz"[filter_id]}{ccd_id}'


def _format_table_to_sncosmo(data_table: Table) -> Table:
    """Format a data table for use with SNCosmo

    Args:
        data_table: A data table returned by ``get_data_for_id``

    Returns:
        The same data in a new table following the SNCosmo data model
    """

    # Format table
    if not data_table:
        return Table(
            names=['time', 'band', 'zp', 'flux', 'fluxerr', 'zpsys', 'flag'])

    out_table = Table()
    out_table.meta = data_table.meta

    out_table['time'] = data_table['JD']
    out_table['band'] = _construct_band_name(
        data_table['FILT'], data_table['IDCCD'])

    out_table['zp'] = np.full(len(data_table), 2.5 * np.log10(3631))
    out_table['flux'] = data_table['FLUX'] * 1E-6
    out_table['fluxerr'] = data_table['FLUXERR'] * 1E-6
    out_table['zpsys'] = np.full(len(data_table), 'ab')
    out_table['flag'] = data_table['FLAG']

    return out_table


[docs]class Sako18(PhotometricRelease, DefaultParser):
    """The ``Sako18`` class provides access to the **photometric** data release
    of the Sloan Digital Sky Survey-II (SDSS-II) Supernova Survey conducted
    between 2005 and 2007. Light curves are presented for 10,258 variable and
    transient sources discovered through repeat ugriz imaging of SDSS Stripe
    82, a 300 deg2 area along the celestial equator. This data release is
    comprised of all transient sources brighter than r ≃ 22.5 mag with no
    history of variability prior to 2004. (Source: Sako et al. 2018)

    For the spectroscopic data of this data release see the ``sako18spec``
    module.

    Deviations from the standard UI:
        - The ``get_outliers`` method returns a dictionary of observations
          visually flagged by the SDSS team as outliers.

    Cuts on returned data:
        - Data points manually marked as outliers by the SDSS research time
          are not included in returned data tables.
    """

    # General metadata
    survey_name = 'Sloan Digital Sky Survey'
    survey_abbrev = 'SDSS'
    release = 'sako18'
    survey_url = 'https://portal.nersc.gov/project/dessn/SDSS/dataRelease/'
    data_type = 'photometric'
    publications = ('Sako et al. (2018)',)
    ads_url = 'https://ui.adsabs.harvard.edu/abs/2018PASP..130f4002S/abstract'

    # Photometric metadata (Required for photometric data, otherwise delete)
    # Filter information
    # Effective wavelengths for SDSS filters ugriz in angstroms
    # are available at https://www.sdss.org/instruments/camera/#Filters
    band_names = tuple(f'sdss_sako18_{b}{c}' for b, c in product('ugriz', '123456'))
    zero_point = tuple(2.5 * np.log10(3631) for _ in band_names)

    def __init__(self):
        """Define local and remote paths of data"""

        super().__init__()

        # Local paths
        self._filter_dir = self._data_dir / 'doi_2010_filters/'  # Transmission filters
        self._table_dir = self._data_dir / 'tables/'  # Tables from the published paper
        self._smp_dir = self._data_dir / 'SMP_Data/'  # SMP data files (photometric light-curves)
        self._snana_dir = self._data_dir / 'SDSS_dataRelease-snana/'  # SNANA files including list of outliers
        self._outlier_path = self._snana_dir / 'SDSS_allCandidates+BOSS/SDSS_allCandidates+BOSS.IGNORE'  # Outlier data

        self._filter_file_names = tuple(f'{b}{c}.dat' for b, c in product('ugriz', '123456'))
        self._table_names = 'master_data.txt', 'Table2.txt', 'Table9.txt', 'Table11.txt', 'Table12.txt'

        # Define urls and file names for remote data
        self._filter_url = 'http://www.ioa.s.u-tokyo.ac.jp/~doi/sdss/'

        self._base_url = 'https://portal.nersc.gov/project/dessn/SDSS/dataRelease/'
        self._smp_url = urljoin(self._base_url, 'SMP_Data.tar.gz')
        self._snana_url = urljoin(self._base_url, 'SDSS_dataRelease-snana.tar.gz')

    def _get_available_tables(self) -> List[str]:
        """Get Ids for available vizier tables published by this data release"""

        table_names = []
        for f in self._table_dir.glob('*.txt'):
            table_num = f.stem.strip('Table_data')
            if table_num.isnumeric():
                table_num = int(table_num)

            table_names.append(table_num)

        return sorted(table_names, key=lambda x: 0 if x == 'master' else x)

    def _load_table(self, table_id: Union[int, str]) -> Table:
        """Return a Vizier table published by this data release

        Args:
            table_id: The published table number or table name
        """

        if table_id not in self.get_available_tables():
            raise ValueError(f'Table {table_id} is not available.')

        if table_id == 'master':
            table = Table.read(self._table_dir / 'master_data.txt', format='ascii')

        else:
            table = Table.read(
                self._table_dir / f'Table{table_id}.txt', format='ascii')

        table['CID'] = Column(table['CID'], dtype=str)
        if table_id == 9:
            table['SID'] = Column(table['SID'], dtype=str)

        return table

    def _get_available_ids(self):
        """Return a list of target object IDs for the current survey

        Returns:
            A list of object IDs
        """

        return sorted(self.load_table('master')['CID'])

[docs]    def get_outliers(self) -> dict:
        """Return a dictionary of data points marked by SDSS II as outliers

        Returns:
            A dictionary {<obj_id>: [<MJD of bad data point>, ...], ...}
        """

        out_dict = dict()
        with open(self._outlier_path) as ofile:
            for line in ofile.readlines():
                if line.startswith('IGNORE:'):
                    line_list = line.split()
                    cid, mjd, band = line_list[1], line_list[2], line_list[3]
                    if cid not in out_dict:
                        out_dict[str(cid)] = []

                    out_dict[str(cid)].append(mjd)

        return out_dict

    # noinspection PyUnusedLocal
    def _get_data_for_id(self, obj_id: str, format_table: bool = True) -> Table:
        """Returns data for a given object ID

        Args:
            obj_id: The ID of the desired object
            format_table: Format for use with ``sncosmo`` (Default: True)

        Returns:
            An astropy table of data for the given ID
        """

        if obj_id not in self.get_available_ids():
            raise InvalidObjId()

            # Read in ascii data table for specified object
        file_path = self._smp_dir / f'SMP_{int(obj_id):06d}.dat'
        data = Table.read(file_path, format='ascii')

        # Rename columns using header data from file
        col_names = data.meta['comments'][-1].split()
        for i, name in enumerate(col_names):
            data[f'col{i + 1}'].name = name

        data['JD'] = unit_conversion.convert_to_jd(data['MJD'], format='mjd')

        # Add meta data
        master_table = self.load_table('master')
        table_meta_data = master_table[master_table['CID'] == obj_id]
        data.meta['obj_id'] = obj_id
        data.meta['ra'] = table_meta_data['RA'][0]
        data.meta['dec'] = table_meta_data['DEC'][0]
        data.meta['z'] = table_meta_data['zCMB'][0]
        data.meta['z_err'] = table_meta_data['zerrCMB'][0]
        data.meta['dtype'] = 'photometric'
        data.meta['classification'] = table_meta_data['Classification'][0]
        del data.meta['comments']

        outlier_list = self.get_outliers().get(obj_id, [])
        if outlier_list:
            keep_indices = ~np.isin(data['MJD'], outlier_list)
            data = data[keep_indices]

        if format_table:
            data = _format_table_to_sncosmo(data)

        return data

    def _download_module_data(self, force=False, timeout: float = 15):
        """Download data for the current survey / data release

        Args:
            force: Re-Download locally available data
            timeout: Seconds before timeout for individual files/archives
        """

        # Photometry
        downloads.download_tar(
            url=self._smp_url,
            out_dir=self._data_dir,
            skip_exists=self._smp_dir,
            mode='r:gz',
            force=force,
            timeout=timeout
        )

        # SNANA files - including files specifying "bad" photometry data points
        downloads.download_tar(
            url=self._snana_url,
            out_dir=self._data_dir,
            skip_exists=self._snana_dir,
            mode='r:gz',
            force=force,
            timeout=timeout
        )

        # Unzip file listing "bad" photometry
        outlier_archive = self._snana_dir / 'SDSS_allCandidates+BOSS.tar.gz'
        if outlier_archive.exists():
            with tarfile.open(str(outlier_archive), mode='r:gz') as data:
                data.extractall(str(outlier_archive.parent))

        for file_name in self._table_names:
            downloads.download_file(
                url=self._base_url + file_name,
                destination=self._table_dir / file_name,
                force=force,
                timeout=timeout
            )

        for file_name in self._filter_file_names:
            downloads.download_file(
                url=self._filter_url + file_name,
                destination=self._filter_dir / file_name,
                force=force,
                timeout=timeout
            )