Source code for desispec.emlinefit

"""
desispec.emlinefit
==================

Routines for desi_emlinefit_afterburner.
"""

import numpy as np
from scipy.optimize import curve_fit
from scipy.special import erf
from desiutil.log import get_logger

allowed_emnames = ["OII", "HDELTA", "HGAMMA", "HBETA", "OIII", "HALPHA"]



[docs]
def get_rf_em_waves(emname):
    """
    Returns the rest-frame, vacuum, wavelengths.

    Args:
        emname, from: "OII", "HDELTA", "HGAMMA", "HBETA", "OIII", "HALPHA" (string)

    Returns:
        rf_em_waves: rest-frame wavelength(s) (np array)

    Notes:
        For OII and OIII returns a two-elements array; one-element array otherwise.
        https://github.com/desihub/fastspecfit/blob/60393296e0cc466858f70a5d021d02693eff9375/py/fastspecfit/data/emlines.ecsv
    """
    if emname == "OII":
        rf_em_waves = np.array([3727.092, 3729.874])
    if emname == "OIII":
        rf_em_waves = np.array([4960.295, 5008.239])
    if emname == "HALPHA":
        rf_em_waves = np.array([6564.613])
    if emname == "HBETA":
        rf_em_waves = np.array([4862.683])
    if emname == "HGAMMA":
        rf_em_waves = np.array([4341.684])
    if emname == "HDELTA":
        rf_em_waves = np.array([4102.892])
    return rf_em_waves




[docs]
def emlines_gaussfit(
    emname,
    zspec,
    waves,
    fluxes,
    ivars,
    rf_fit_hw=40,
    min_rf_fit_hw=20,
    rf_cont_w=200,
    p0_sigma=3.5,
    p0_flux=10,
    p0_share=0.5,
    min_sigma=0.34,  # DESI resolution >= 0.8 Angstrom in FWHM, or 0.34 in sigma
    max_sigma=10.,
    min_flux=-1e9,
    max_flux=1e9,
    min_share=0.26,  # I(3729) / I(3726) > 0.35
    max_share=0.6,  # I(3729) / I(3726) < 1.5
    log=None,
):
    """
    Fits the [OII] doublet line profile with 2 related Gaussians.

    Args:
        emname: "OII" or "OIII" or "HALPHA", "HBETA", "HGAMMA", "HDELTA" (string)
        zspec: redshift (float)
        waves: wavelength in Angstroms (numpy array)
        fluxes: flux observed in the broad band (in erg/s/cm2/A) (numpy array)
        ivars: inverse variance on the flux (numpy array)
        rf_fit_hw (optional, defaults to 40): *rest-frame* wavelength width (in A) used for fitting on each side of the line (float)
        min_rf_fit_hw (optional, defaults to 20): minimum requested *rest-frame* width (in A) on each side of the line to consider the fitting (float)
        rf_cont_w (optional, defaults to 200): *rest-frame* wavelength extent (in A) to fit the continuum (float)
        p0_sigma (optional, defaults to 3.5): initial guess on the line width in A (float)
        p0_flux (optional, defaults to 10): initial guess on the line flux in 1e-17 * erg/cm2/s (float)
        p0_share (optional, defaults to 0.5): initial guess on the share between the two [OII] lines (float)
        min_sigma (optional, defaults to 0.34): minimum allowed value for the line width in A; corresponds to DESI resolution >= 0.8 A in FWHM (float)
        max_sigma (optional, defaults to 10.): maximum allowed value for the line width in A (float)
        min_flux (optional, defaults to -1e9): minimum allowed value for the flux in 1e-17 * erg/cm2/s (float)
        max_flux (optional, defaults to 1e9): maximum allowed value for the flux in 1e-17 * erg/cm2/s (float)
        min_share (optional, defaults to 0.26): minimum allowed value for the share, where share=r/(1+r) and r=I(3729)/I(3726); 0.26 corresponds to I(3729)/I(3726) >= 0.351 (float)
        max_share (optional, defaults to 0.6): maximum allowed value for the share, where share=r/(1+r) and r=I(3729)/I(3726); 0.6 corresponds to I(3729)/I(3726) <= 1.5 (float)
        log (optional, defaults to get_logger()): Logger object

    Returns:
        emdict: a dictionary with various quantities, noticely "FLUX" and "FLUX_IVAR" (dictionary of floats)
        list of all keys::

                CHI2, NDOF: *reduced* chi2 and nb of degrees of freedom
                CONT, CONT_IVAR: continuum in 1e-17 * erg/cm2/s/A
                FLUX, FLUX_IVAR: flux in 1e-17 * erg/cm2/s
                SIGMA, SIGMA_IVAR: line width in A (observed frame)
                SHARE, SHARE_IVAR: f1/(f0+f1) for OII and OIII doublets
                EW, EW_IVAR: rest-frame equivalent width (in A)
                waves: wavelength values (in A) used for the fitting (numpy array of floats)
                fluxes: flux values (in 1e-17 * erg/cm2/s/A) used for the fitting (numpy array of floats)
                ivars: ivar values used for the fitting (numpy array of floats)
                models: model flux values (in 1e-17 * erg/cm2/s/A) from the fit (numpy array of floats)

        succeed: did the fit succeed? (boolean)

    Note:
        * Adapted/simplified from elgredshiftflag from J. Comparat (used for eBOSS/ELG):
          https://svn.sdss.org/repo/eboss/elgredshiftflag/
        * Returns np.nan in emdict (and NDOF=-99) if not enough pixels to fit or if fit fails.
        * Default settings designed for ELGs (e.g. max_sigma); need to re-assess if run on other targets.
        * For "OII", let the doublet line ratio free during the fit.
        * For "OIII", fits the 4960 and 5007 lines with a fixed line ratio.
        * For the Balmer lines, SHARE is not fitted and set to np.nan.
    """
    # AR log
    if log is None:
        log = get_logger()
    # AR allowed arguments
    if emname not in allowed_emnames:
        msg = "{} not in {}".format(emname, allowed_emnames)
        log.error(msg)
        raise ValueError(msg)
    # AR Line models - integrated flux per pixel
    def gauss_nocont(ws, sigma, F0, w0):
        """
        Compute integrated Gaussian flux in each wavelength pixel.

        Args:
            ws: wavelength array (pixel centers)
            sigma: Gaussian width
            F0: total integrated flux
            w0: Gaussian center wavelength

        Returns:
            Average flux density in each pixel (integrated flux / pixel width)
        """
        # Check for minimum number of pixels
        if len(ws) < 2:
            raise ValueError("gauss_nocont requires at least 2 wavelength pixels")

        # Calculate pixel edges (boundaries between pixels)
        edges = np.zeros(len(ws) + 1)

        # Interior edges are midpoints
        edges[1:-1] = 0.5 * (ws[:-1] + ws[1:])

        # Extrapolate for first and last edges
        edges[0] = ws[0] - 0.5 * (ws[1] - ws[0])
        edges[-1] = ws[-1] + 0.5 * (ws[-1] - ws[-2])

        # Compute integrated flux using error function
        # Integral of Gaussian from a to b is:
        # F0 * (erf((b - w0)/(sqrt(2)*sigma)) - erf((a - w0)/(sqrt(2)*sigma))) / 2
        sqrt2_sigma = np.sqrt(2.0) * sigma
        erf_upper = erf((edges[1:] - w0) / sqrt2_sigma)
        erf_lower = erf((edges[:-1] - w0) / sqrt2_sigma)

        integrated_flux = F0 * (erf_upper - erf_lower) / 2.0

        # Divide by pixel width to get average flux density
        pixel_widths = edges[1:] - edges[:-1]
        flux_density = integrated_flux / pixel_widths

        return flux_density
    # AR vacuum rest-frame wavelength(s)
    rf_em_waves = get_rf_em_waves(emname)
    if emname == "OII":
        cont_choice = "left"
        min_n_lines = 2
    if emname == "OIII":
        cont_choice = "center"
        min_n_lines = 2
    if emname in ["HALPHA", "HBETA", "HGAMMA", "HDELTA"]:
        cont_choice = "center"
        min_n_lines = 1
    # AR expected position of the peak of the line in the observed frame (redshifted). 2 positions given.
    obs_em_waves = (1. + zspec) * rf_em_waves
    # AR *observed-frame* wavelength extents
    obs_fit_hw = (1. + zspec) * rf_fit_hw
    min_obs_fit_hw = (1. + zspec) * min_rf_fit_hw
    obs_cont_w = (1. + zspec) * rf_cont_w
    # AR initializing
    # AR "waves", "fluxes", "ivars", "models" are dealt separately
    emdict = {}
    keys = [
        "FLUX", "FLUX_IVAR", "SIGMA", "SIGMA_IVAR",
        "CONT", "CONT_IVAR", "SHARE", "SHARE_IVAR",
        "EW", "EW_IVAR",
        "CHI2", "NDOF",
    ]
    for key in keys:
        if key == "NDOF":
            emdict[key] = -99
        else:
            emdict[key] = np.nan
    # AR picking wavelengths
    keep_line = np.zeros(len(waves), dtype=bool)
    keep_cont = np.zeros(len(waves), dtype=bool)
    n_cov_lines = 0
    for obs_em_wave in obs_em_waves:
        # AR used for line fitting
        keep_line |= (waves > obs_em_wave - obs_fit_hw) & (waves < obs_em_wave + obs_fit_hw)
        # AR used for continuum
        if cont_choice == "left":
            keep_cont |= (waves > obs_em_wave - obs_cont_w) & (waves < obs_em_wave)
        if cont_choice == "center":
            keep_cont |= (waves > obs_em_wave - obs_cont_w / 2.) & (waves < obs_em_wave + obs_cont_w / 2.)
        if cont_choice == "right":
            keep_cont |= (waves > obs_em_wave) & (waves < obs_em_wave + obs_cont_w)
        # AR has the line minimal coverage?
        n_cov_lines += int((waves.min() < obs_em_wave - min_obs_fit_hw) & (waves.max() > obs_em_wave + min_obs_fit_hw))
    # AR excluding for the continuum estimation wavelengths used for line(s) fiting
    keep_cont &= ~keep_line
    # AR discarding flux=nan and ivars == 0 pixels
    keep_line &= (np.isfinite(fluxes)) & (ivars > 0)
    keep_cont &= (np.isfinite(fluxes)) & (ivars > 0)
    # AR
    models = np.nan+np.zeros(keep_line.sum())
    # AR enough pixels to fit?
    succeed = False
    if (
        (keep_line.sum() >= 3) &
        (keep_cont.sum() >= 3) &
        (n_cov_lines >= min_n_lines)
    ):
        # AR continuum flux, ivar
        emdict["CONT"] = np.median(fluxes[keep_cont])
        emdict["CONT_IVAR"] = np.median(ivars[keep_cont])
        #
        # AR OII
        # AR fitting a doublet with line ratio in the fitting
        # AR sh = f3729 / (f3727 + f3729)
        if emname == "OII":
            myfunc = lambda ws, sigma, F0, sh: emdict["CONT"] + gauss_nocont(ws, sigma, (1-sh) * F0, obs_em_waves[0]) + gauss_nocont(ws, sigma, sh * F0, obs_em_waves[1])
            p0 = np.array([p0_sigma, p0_flux, p0_share])
            bounds = ((min_sigma, min_flux, min_share), (max_sigma, max_flux, max_share))
        # AR OIII
        # AR fitting two lines with a fixed line ratio
        # AR f5007 = 2.9 * f4960
        # AR sh = f5007 / (f4960 + f5007)
        # AR sh = 2.9 / (1 + 2.9) = 0.744
        # AR using same F0 and sigma for both lines
        if emname == "OIII":
            sh = 0.744
            myfunc = lambda ws, sigma, F0: emdict["CONT"] + gauss_nocont(ws, sigma, (1-sh) * F0, obs_em_waves[0]) + gauss_nocont(ws, sigma, sh * F0, obs_em_waves[1])
            p0 = np.array([p0_sigma, p0_flux])
            bounds = ((min_sigma, min_flux), (max_sigma, max_flux))
        # AR Balmer lines
        # AR fitting emission line only (no absorption)
        if emname in ["HALPHA", "HBETA", "HGAMMA", "HDELTA"]:
            myfunc = lambda ws, sigma, F0: emdict["CONT"] + gauss_nocont(ws, sigma, F0, obs_em_waves[0])
            p0 = np.array([p0_sigma, p0_flux])
            bounds = ((min_sigma, min_flux), (max_sigma, max_flux))
        # AR flux at observed wavelength(s)
        obs_em_fluxes = np.array([fluxes[np.searchsorted(waves, obs_em_wave)] for obs_em_wave in obs_em_waves])
        # AR is the flux above continuum for at least one line?
        # SB don't require positive flux before attempting fit
        if True or obs_em_fluxes.max() > emdict["CONT"]:
            # AR maxfev and gtol set by JC; seems to work; not touching those...
            popt, pcov = curve_fit(
                myfunc,
                waves[keep_line],
                fluxes[keep_line],
                p0=p0,
                sigma=1. / np.sqrt(ivars[keep_line]),
                maxfev=10000000,
                gtol=1.49012e-8,
                bounds=bounds,
            )
            # AR fit succeeded?
            # AR - pcov.__class__ criterion => dates from JC, not sure how relevant.. keeping it
            # AR - then we decide that the fit is no good if:
            # AR   - var = 0,
            # AR   - or var(flux) = 0
            # AR   - or flux closer than 1% to the allowed boundaries
            if pcov.__class__ == np.ndarray:
                diag = np.diag(pcov)
                if (diag.sum() > 0) & (diag[1] > 0) & (popt[1] > 1.01 * min_flux) & (popt[1] < 0.99 * max_flux):
                    succeed = True
                    if emname == "OII":
                        models = myfunc(waves[keep_line], popt[0], popt[1], popt[2])
                        emdict["SHARE"] = popt[2]
                        emdict["SHARE_IVAR"] = diag[2] ** -1
                    if emname == "OIII":
                        models = myfunc(waves[keep_line], popt[0], popt[1])
                        emdict["SHARE"] = sh
                        emdict["SHARE_IVAR"] = np.inf
                    if emname in ["HALPHA", "HBETA", "HGAMMA", "HDELTA"]:
                        models = myfunc(waves[keep_line], popt[0], popt[1])
                    emdict["NDOF"] = keep_line.sum() - len(p0)
                    emdict["CHI2"] = np.sum(np.abs(models - fluxes[keep_line]) ** 2. * ivars[keep_line])
                    emdict["CHI2"] /= emdict["NDOF"] # AR we define CHI2 as the reduced chi2, as in fastspecfit
                    emdict["SIGMA"] = popt[0]
                    emdict["SIGMA_IVAR"] = diag[0] ** -1
                    emdict["FLUX"] = popt[1]
                    emdict["FLUX_IVAR"] = diag[1] ** -1
                    # AR rest-frame equivalent width
                    factor = 1 / ((1 + zspec) * emdict["CONT"])
                    emdict["EW"] = emdict["FLUX"] * factor
                    emdict["EW_IVAR"] = emdict["FLUX_IVAR"] / factor ** 2
    # AR fitted waves / fluxes / ivars / models
    emdict["waves"] = waves[keep_line]
    emdict["fluxes"] = fluxes[keep_line]
    emdict["ivars"] = ivars[keep_line]
    emdict["models"] = models
    #
    return emdict, succeed




[docs]
def get_emlines(
    zspecs,
    waves,
    fluxes,
    ivars,
    emnames=["OII", "HDELTA", "HGAMMA", "HBETA", "OIII", "HALPHA"],
    rf_fit_hw=40,
    min_rf_fit_hw=20,
    rf_cont_w=200,
    log=None,
):
    """
    Get Gaussian-fitted emission line fluxes.

    Args:
        zspecs: redshifts (numpy array of shape (Nspec))
        waves: wavelengths (numpy array of shape (Nwave))
        fluxes: fluxes (numpy array of shape (Nspec, Nwave))
        ivars: inverse variances (numpy array of shape (Nspec, Nwave))
        emnames (optional, defaults to ["OII", "HDELTA", "HGAMMA", "HBETA", "OIII", "HALPHA"]): list of lines to fit (list of string)
        rf_fit_hw (optional, defaults to 40): *rest-frame* wavelength width (in A) used for fitting on each side of the line (float)
        min_rf_fit_hw (optional, defaults to 20): minimum requested *rest-frame* width (in A) on each side of the line to consider the fitting (float)
        rf_cont_w (optional, defaults to 200): *rest-frame* wavelength extent (in A) to fit the continuum (float)
        log (optional, defaults to get_logger()): Logger object

    Returns:
        emdict: a dictionary with a subdictionary for each emname in emnames, with:,
            CHI2, NDOF: *reduced* chi2 and nb of degrees of freedom
            CONT, CONT_IVAR: continuum in 1e-17 * erg/cm2/s/A
            FLUX, FLUX_IVAR: flux in 1e-17 * erg/cm2/s
            SIGMA, SIGMA_IVAR: line width in A (observed frame)
            SHARE, SHARE_IVAR: f1/(f0+f1) for OII and OIII doublets
            EW, EW_IVAR: rest-frame equivalent width in A
            waves, fluxes, ivars, models: data used for fitting + fitted model
    """
    # AR log
    if log is None:
        log = get_logger()

    # AR number of spectra
    nspec = len(zspecs)

    # AR assert consistency
    if (
        (fluxes.shape[0] != nspec) |
        (fluxes.shape[1] != waves.shape[0]) |
        (ivars.shape[0] != nspec) |
        (ivars.shape[1] != waves.shape[0])
    ):
        log.error(
            "Shape inconsistencies for inputs: zspecs={}, waves={}, fluxes={}, ivars={}".format(
                zspecs.shape, waves.shape, fluxes.shape, ivars.shape,
            )
        )
        raise RuntimeError()

    # AR dictionary to store results
    # AR keys returned by emlines_gaussfit
    emkeys = [
        "FLUX", "FLUX_IVAR", "SIGMA", "SIGMA_IVAR",
        "CONT", "CONT_IVAR", "SHARE", "SHARE_IVAR",
        "EW", "EW_IVAR",
        "CHI2", "NDOF",
        "waves", "fluxes", "ivars", "models",
    ]
    emdict = {}

    # AR prepare columns
    for emname in emnames:
        emdict[emname] = {}
        for key in emkeys:
            if key == "NDOF":
                emdict[emname][key] = -99 + np.zeros(nspec, dtype=int)
            elif key in ["waves", "fluxes", "ivars", "models"]:
                emdict[emname][key] = np.zeros(nspec, dtype=object)
            else:
                emdict[emname][key] = np.nan + np.zeros(nspec)

    # AR fit Gaussians
    for i_emname, emname in enumerate(emnames):
        for i in range(nspec):
            tmpemdict, _ = emlines_gaussfit(
                emname,
                zspecs[i],
                waves,
                fluxes[i, :],
                ivars[i, :],
                rf_fit_hw=rf_fit_hw,
                min_rf_fit_hw=min_rf_fit_hw,
                rf_cont_w=rf_cont_w,
                log=log,
            )
            for key in emkeys:
                emdict[emname][key][i] = tmpemdict[key]
    return emdict