Source code for desispec.io.emlinefit

"""
desispec.io.emlinefit
=====================

IO routines for desi_emlinefit_afterburner.
"""

import os
from astropy.table import Table
import fitsio
import numpy as np
from desitarget.geomask import match_to
from desitarget.targets import main_cmx_or_sv
from desiutil.dust import ext_odonnell
from desiutil.dust import ebv as dust_ebv
from desiutil.log import get_logger
from desispec.emlinefit import get_rf_em_waves
from .util import checkgzip

[docs]def get_targetids(d, bitnames, log=None):
    """
    Returns the TARGETIDs passing the bitnames for the CMX_TARGET, SV{1,2,3}_DESI_TARGET, or DESI_TARGET mask.

    Args:
        d: structured array, typically a FIBERMAP catalog,
            with TARGETID and the CMX_TARGET, SV{1,2,3}_DESI_TARGET, or DESI_TARGET column.
        bitnames: comma-separated list of target bitnames to fit from the ``*DESI_TARGET`` mask (string)
        log (optional, defaults to get_logger()): Logger object

    Returns:
        targetids: list of TARGETIDs passing the {CMX,*DESI}_TARGET mask (np.array)

    Note:
        * If several bitnames are provided, selects the union of those.
        * Safer if d is already trimmed on unique TARGETIDs (see FIBERMAP format with ``zbest-*.fits`` files)
    """
    # AR log
    if log is None:
        log = get_logger()

    # AR get the *DESI_TARGET column + mask
    keys, masks, survey = main_cmx_or_sv(d)
    dtkey = keys[0]
    mask = masks[0]
    log.info("input catalog is identified as survey={}".format(survey))
    allowed_bitnames = mask.names()

    # AR read + select the targetids
    sel = np.zeros(len(d), dtype=bool)
    for bitname in bitnames.split(","):
        if bitname not in allowed_bitnames:
            msg = "{} not in allowed bitnames for {} ({})".format(
                    bitname, dtkey, allowed_bitnames
            )
            log.error(msg)
            raise ValueError(msg)
        sel |= (d[dtkey] & mask[bitname]) > 0
    targetids = d["TARGETID"][sel]
    log.info("selecting {} targets with {} in {}".format(sel.sum(), bitnames, dtkey))
    return targetids


[docs]def read_emlines_inputs(
    redrock,
    coadd,
    mwext_corr=True,
    rv=3.1,
    bitnames="ALL",
    targetids=None,
    rr_keys="TARGETID,Z,ZWARN,SPECTYPE,DELTACHI2",
    fm_keys="TARGET_RA,TARGET_DEC,OBJTYPE",
    ebvmax=2.,
    log=None,
):
    """
    Read the columns and spectra information (waves, fluxes, ivars) from the redrock and coadd files,
    which will go as input to get_emlines().

    Args:
        redrock: full path to a redrock/zbest file
        coadd: full path to a coadd file (everest-format)
        mwext_corr (optional, defaults to True): correct flux for foreground MW extinction? (boolean)
        rv (optional, defaults to 3.1): value of R_V, used if mwext_corr=True (float)
        bitnames (optional, defaults to "ALL", meaning fitting all fibers): comma-separated list of target bitnames to fit from the ``*DESI_TARGET`` mask (string)
        targetids (optional, defaults to None): list of TARGETIDs to restrict to (int, list, or numpy array)
        rr_keys (optional, defaults to "TARGETID,Z,ZWARN,SPECTYPE,DELTACHI2"): comma-separated list of columns from REDSHIFTS to propagate (string)
        fm_keys (optional, defaults to "TARGET_RA,TARGET_DEC,OBJTYPE"): comma-separated list of columns from FIBERMAP to propagate (string)
        ebvmax (optional, defaults to 2): spectra with ebv >= ebvmax will be masked (ivar=0 for all pixels) (float)
        log (optional, defaults to get_logger()): Logger object

    Returns:
        rr: structured array with the desired redrock columns for the desired TARGETIDs
        fm: structured array with the desired fibermap columns for the desired TARGETIDs
        waves: wavelengths (numpy array of shape (Nwave))
        fluxes: Galactic-extinction-corrected fluxes (numpy array of shape (Nspec, Nwave))
        ivars: inverse variances (numpy array of shape (Nspec, Nwave))

    Note:
        * We add TARGETID and Z to rr_keys if TARGETID not present in rr_keys nor in fm_keys.
        * If keys in rr_keys or fm_keys are not present in the redrock, those will be ignored.
        * If both bitnames and targetids are provided, we take the overlap of the two.
        * Mar. 2024: addition of ebvmax=2. argument, to avoid too long computing time
            on some backup tiles (https://github.com/desihub/desispec/issues/2186)
    """
    # AR log
    if log is None:
        log = get_logger()

    redrock = checkgzip(redrock)
    coadd = checkgzip(coadd)

    # AR targetids to np.array()
    if targetids is not None:
        if isinstance(targetids, list):
            targetids = np.array(targetids)
            log.info("convert targetids from list to np.array()")
        if isinstance(targetids, int):
            targetids = np.array([targetids])
            log.info("convert targetids from int to np.array()")

    # AR sanity checks
    for fn in [redrock, coadd]:
        if not os.path.isfile(fn):
            msg = "no {} file".format(fn)
            log.error(msg)
            raise FileNotFoundError(msg)
    keys = [key for key in rr_keys.split(",") if key in fm_keys.split(",")]
    if len(keys) > 0:
        msg = "the following columns are both present in rr_keys and fm_keys: {}".format(",".join(keys))
        log.error(msg)
        raise RuntimeError(msg)
    # AR grab TARGETID from the REDSHIFTS/ZBEST extension
    if "TARGETID" in fm_keys.split(","):
        log.info("removing TARGETID from fm_keys")
        fm_keys = ",".join([key for key in fm_keys.split(",") if key != "TARGETID"])
    for key in ["TARGETID", "Z"]:
        if key not in rr_keys.split(","):
            log.info("adding {} to rr_keys".format(key))
            rr_keys = "{},{}".format(key, rr_keys)

    # AR redrock: reading the correct extension
    with fitsio.FITS(redrock) as h:
        extnames = [h[i].get_extname() for i in range(len(h))]
        if "REDSHIFTS" in extnames:
            rr_extname = "REDSHIFTS"
        elif "ZBEST" in extnames:
            rr_extname = "ZBEST"
        else:
            msg = "{} has neither REDSHIFTS or ZBEST extension".format(redrock)
            log.error(msg)
            raise RuntimeError(msg)
        rr = Table(h[rr_extname].read())

    # AR coadd: reading fibermap + waves/fluxes/ivars
    # AR    fibermap has 500 rows (even in pre-everest)
    co = {}
    with fitsio.FITS(coadd) as h:
        fm = Table(h["FIBERMAP"].read())
        # AR available cameras
        cameras = []
        extnames = [h[i].get_extname() for i in range(len(h))]
        for camera in ["B", "R", "Z"]:
            if "{}_FLUX".format(camera) in extnames:
                cameras.append(camera)
        for camera in cameras:
            for key in [
                "{}_WAVELENGTH".format(camera),
                "{}_FLUX".format(camera),
                "{}_IVAR".format(camera),
            ]:
                co[key] = h[key].read()

    # AR selecting targetids
    if bitnames == "ALL":
        bit_targetids = rr["TARGETID"]
    else:
        bit_targetids = get_targetids(fm, bitnames, log=log)
    if targetids is None:
        targetids = bit_targetids
    else:
        targetids = targetids[np.in1d(targetids, bit_targetids)]
    nspec = len(targetids)
    log.info("Dealing with {} spectra".format(nspec))

    # AR targetids: cutting on redrock
    ii_rr = match_to(rr["TARGETID"], targetids)
    rr = rr[ii_rr]
    if len(rr) != nspec:
        msg = "{} TARGETIDs are not in {}".format(nspec - len(rr), redrock)
        log.error(msg)
        raise RuntimeError(msg)
    # AR targetids: cutting on fibermap + fluxes/ivars
    ii_co = match_to(fm["TARGETID"], targetids)
    if ii_co.size != nspec:
        msg = "{} TARGETIDs are not in {}".format(nspec - ii_co.size, coadd)
        log.error(msg)
        raise RuntimeError(msg)
    fm = fm[ii_co]
    for camera in cameras:
        for key in ["{}_FLUX".format(camera), "{}_IVAR".format(camera)]:
            co[key] = co[key][ii_co, :]
    # AR targetids: sanity check
    msg = None
    if len(rr) != len(fm):
        msg = "issue when selecting on targetids: len(rr) = {} != len(fm) = {}".format(
            len(rr), len(fm),
        )
    else:
        tmpn = (rr["TARGETID"] != fm["TARGETID"]).sum()
        if tmpn > 0:
            msg = "issue when selecting on targetids: {} mismatches between rr and fm".format(tmpn)
    if msg is not None:
        log.error(msg)
        raise RuntimeError(msg)

    # AR EBV
    if mwext_corr:
        ebvs = dust_ebv(fm["TARGET_RA"], fm["TARGET_DEC"])

    # AR rr_keys: restrict to existing ones
    rmv_rr_keys = [key for key in rr_keys.split(",") if key not in rr.dtype.names]
    if len(rmv_rr_keys) > 0:
        log.info("{} removed from rr_keys, as not present in {}".format(",".join(rmv_rr_keys), rr_extname))
    rr_keys = [key for key in rr_keys.split(",") if key not in rmv_rr_keys]
    rr = rr[rr_keys]
    # AR fm_keys: restrict to existing ones
    rmv_fm_keys = [key for key in fm_keys.split(",") if key not in fm.dtype.names]
    if len(rmv_fm_keys) > 0:
        log.info("{} removed from fm_keys, as not present in FIBERMAP".format(",".join(rmv_fm_keys)))
    fm_keys = [key for key in fm_keys.split(",") if key not in rmv_fm_keys]
    fm = fm[fm_keys]

    # AR b : 3600,5800  , r : 5760,7620 , z : 7520,9824
    # AR we keep all data points (i.e. including overlaps)
    # AR note that waves is not ordered, as a consequence
    waves = np.zeros(0)
    fluxes = np.zeros(0).reshape(nspec, 0)
    ivars = np.zeros(0).reshape(nspec, 0)
    for camera in cameras:
        tmpw = co["{}_WAVELENGTH".format(camera)]
        tmpfl = co["{}_FLUX".format(camera)]
        tmpiv = co["{}_IVAR".format(camera)]
        if mwext_corr:
            # AR TBD: use a smarter way with no loop on tids...
            # AR TBD: but as 500 rows at most, ~ok
            tmpexts = ext_odonnell(tmpw, Rv=rv)
            for i in range(nspec):
                tmp_mw_trans = 10 ** (-0.4 * ebvs[i] * rv * tmpexts)
                tmpfl[i, :] /= tmp_mw_trans
                tmpiv[i, :] *= tmp_mw_trans ** 2
        waves = np.append(waves, tmpw)
        fluxes = np.append(fluxes, tmpfl, axis=1)
        ivars = np.append(ivars, tmpiv, axis=1)

    # AR set ivar=0 for ebv >= ebvmax spectra
    sel = ebvs >= ebvmax
    ivars[sel, :] = 0
    for tid, ebv in zip(rr["TARGETID"][sel], ebvs[sel]):
        log.warning(
            "set ivar=0 for all pixels of TARGETID={} (EBV={:.2f} >= {})".format(
                tid, ebv, ebvmax,
            )
        )
    log.info(
        "we set ivar=0 for {}/{} spectra with EBV >= {}".format(
            sel.sum(), nspec, ebvmax
        )
    )

    return rr, fm, waves, fluxes, ivars


[docs]def write_emlines(
    output,
    emdict,
    rr=None,
    fm=None,
    redrock=None,
    coadd=None,
    rf_fit_hw=None,
    min_rf_fit_hw=None,
    rf_cont_w=None,
    rv=None,
    log=None,
):
    """
    Writes the emission line fitting to a file.

    Args:
        output: output fits file (string)
        emdict: dictionary output by get_emlines()
        rr (optional, defaults to None): redrock REDSHIFTS/ZBEST extension
        fm (optional, defaults to None): redrock FIBERMAP extension
        redrock (optional, defaults to None): used redrock/zbest file
        coadd (optional, defaults to None): used coadd file (everest-format)
        rf_fit_hw (optional, defaults to None): *rest-frame* wavelength width (in A) used for fitting on each side of the line (float)
        min_rf_fit_hw (optional, defaults to None): minimum requested *rest-frame* width (in A) on each side of the line to consider the fitting (float)
        rf_cont_w (optional, defaults to None): *rest-frame* wavelength extent (in A) to fit the continuum (float)
        rv (optional, defaults to None): value of R_V (float)
        log (optional, defaults to get_logger()): Logger object
    """
    # AR log
    if log is None:
        log = get_logger()

    # AR fitted emission lines
    emnames = list(emdict.keys())

    # AR number of fitted spectra
    nspec = emdict[emnames[0]]["FLUX"].size

    # AR data model
    dtype = []
    if rr is not None:
        dtype += [(key, rr.dtype[key]) for key in rr.dtype.names]
    if fm is not None:
        dtype += [(key, fm.dtype[key]) for key in fm.dtype.names]
    for emname in emnames:
        for key in list(emdict[emname].keys()):
            if key not in ["waves", "fluxes", "ivars", "models"]:
                if key == "NDOF":
                    dtype.append(("{}_{}".format(emname, key), ">i4"))
                else:
                    dtype.append(("{}_{}".format(emname, key), ">f4"))

    # AR store in table
    d = np.zeros(nspec, dtype=dtype)

    if rr is not None:
        for key in rr.dtype.names:
            d[key] = rr[key]
    if fm is not None:
        for key in fm.dtype.names:
            d[key] = fm[key]
    for emname in emnames:
        for key in list(emdict[emname].keys()):
            if key not in ["waves", "fluxes", "ivars", "models"]:
                d["{}_{}".format(emname, key)] = emdict[emname][key]

    # AR header
    hdr = fitsio.FITSHDR()
    if redrock is not None:
        hdr["RRFN"] = redrock
    if coadd is not None:
        hdr["COADDFN"] = coadd
    if rf_fit_hw is not None:
        hdr["RFHW"] = rf_fit_hw
    if min_rf_fit_hw is not None:
        hdr["MINRFHW"] = min_rf_fit_hw
    if rf_cont_w is not None:
        hdr["RFCONTW"] = rf_cont_w
    if rv is not None:
        hdr["RV"] = rv
    hdr["EMNAMES"] = emnames
    for i_emname, emname in enumerate(emnames):
        hdr["RFWAVE{:02d}".format(i_emname)] = ",".join(get_rf_em_waves(emname).astype(str))

    # AR write to fits
    if os.path.isfile(output):
        log.info("Removing existing {}".format(output))
        os.remove(output)
    fd = fitsio.FITS(output, "rw")
    fd.write(d, extname="EMLINEFIT", header=hdr)
    fd.close()


[docs]def plot_emlines(
    outpdf,
    zspecs,
    emdict,
    emnames=["OII", "HDELTA", "HGAMMA", "HBETA", "OIII", "HALPHA"],
    targetids=None,
    objtypes=None,
    spectypes=None,
    deltachi2s=None,
    ylim=(None, None),
    nrow=10,
    rowsort_byz=True,
):
    """
    Plot the fitted data from get_emlines().

    Args:
        outpdf: output pdf filename (string)
        zspecs: list of redrock redshifts (numpy array)
        emdict: a dictionary with various quantities, output by get_lines().
        emnames (optional, defaults to ["OII", "HDELTA", "HGAMMA", "HBETA", "OIII", "HALPHA"]): list of plotted lines (list of string)
        targetids (optional, defaults to None): list of TARGETIDs (numpy array)
        objtypes (optional, defaults to None): list of fibermap OBJTYPEs (numpy array)
        spectypes (optional, defaults to None): list of redrock SPECTYPEs (numpy array)
        deltachi2s (optional, defaults to None): list of redrock DELTACHI2s (numpy array)
        ylim (optional, defaults to (None, None)): ylim for plotting (float doublet)
        nrow (optional, defaults to 10): number of rows, i.e. galaxy, per pdf page (int)
        rowsort_byz (optional, defaults to True): the spectra are displayed by increasing redshifts (boolean)

    Note:
        * For `emdict`:

            - Each emission line has its own subdictionary.
            - Each emission line dictionary should contain these keys:

                * CONT : continuum in ``1e-17 * erg/cm2/s/A``
                * FLUX, FLUX_IVAR: flux in ``1e-17 * erg/cm2/s/A``
                * waves, fluxes, ivars, models: data used for fitting + fitted model

        * If some emname is not present in emdict, it will be discarded.
        * Each spectra will be one row of plot, each emission line corresponds to a column.
        * If rowsort_byz=False, then the spectra will be displayed by order of appearance.
    """
    # AR plot imports
    from matplotlib.backends.backend_pdf import PdfPages
    import matplotlib.pyplot as plt
    from matplotlib import gridspec

    # AR emnames present in emdict
    emnames = [emname for emname in emnames if emname in emdict]

    # AR plotting by increasing redshift?
    if rowsort_byz:
        ii = zspecs.argsort()
    else:
        ii = np.arange(zspecs.size, dtype=int)

    # AR start plot
    with PdfPages(outpdf) as pdf:
        for ix, i in enumerate(ii):
            if ix % nrow == 0:
                fig = plt.figure(figsize=(25, 15))
                gs = gridspec.GridSpec(nrow, len(emnames), wspace=0.1, hspace=0.1)
            for i_emname, emname in enumerate(emnames):
                ax = plt.subplot(gs[ix % nrow, i_emname])
                w = emdict[emname]["waves"][i]
                d = emdict[emname]["fluxes"][i]
                iv = emdict[emname]["ivars"][i]
                m = emdict[emname]["models"][i]
                # # AR case of camera overlap
                jj = w.argsort()
                w, d, iv, m = w[jj], d[jj], iv[jj], m[jj]
                jj = np.where(w[1:] - w[:-1] > 10)[0]
                for j in jj:
                    d[j], iv[j], m[j] = np.nan, np.nan, np.nan
                obs_em_waves = (1. + zspecs[i]) * get_rf_em_waves(emname)
                ax.plot(w, d, c="k", lw=0.5, alpha=1.0, label="data")
                ax.fill_between(w, d - iv ** (-0.5), d + iv ** (-0.5), color="k", alpha=0.3)
                ax.plot(w, m, c="r", lw=0.5, alpha=1.0, label="model")
                ax.axhline(emdict[emname]["CONT"][i], c="r", lw=0.5, ls="--", alpha=1.0)
                for obs_em_wave in obs_em_waves:
                    ax.axvline(obs_em_wave, c="r", lw=0.5, ls="--", alpha=1.0)
                fs = 8
                if targetids is not None:
                    ax.text(0.05, 0.85, "{}".format(targetids[i]), fontsize=fs, transform=ax.transAxes)
                ax.text(0.05, 0.75, "Z={:.3f}".format(zspecs[i]), fontsize=fs, transform=ax.transAxes)
                ax.text(0.05, 0.05, "SNR({})={:.1f}".format(emname, emdict[emname]["FLUX"][i] * emdict[emname]["FLUX_IVAR"][i] ** 0.5), fontsize=fs, transform=ax.transAxes)
                if objtypes is not None:
                    ax.text(0.95, 0.85, "OBJTYPE={}".format(objtypes[i]), fontsize=fs, ha="right", transform=ax.transAxes)
                if spectypes is not None:
                    ax.text(0.95, 0.75, "SPECTYPE={}".format(spectypes[i]), fontsize=fs, ha="right", transform=ax.transAxes)
                if deltachi2s is not None:
                    ax.text(0.95, 0.05, "log10(DELTACHI2)={:.1f}".format(np.log10(deltachi2s[i])), fontsize=fs, ha="right", transform=ax.transAxes)
                ax.grid(True)
                ax.set_xticklabels([])
                ax.set_yticklabels([])
                ax.set_ylim(ylim)
            if (ix % nrow == nrow - 1) | (i == ii[-1]):
                pdf.savefig(fig, bbox_inches="tight")
                plt.close()