Source code for desispec.emlinefit

"""
desispec.emlinefit
==================

Routines for desi_emlinefit_afterburner.
"""

import numpy as np
from scipy.optimize import curve_fit
from desiutil.log import get_logger

allowed_emnames = ["OII", "HDELTA", "HGAMMA", "HBETA", "OIII", "HALPHA"]


[docs]def get_rf_em_waves(emname): """ Returns the rest-frame, vacuum, wavelengths. Args: emname, from: "OII", "HDELTA", "HGAMMA", "HBETA", "OIII", "HALPHA" (string) Returns: rf_em_waves: rest-frame wavelength(s) (np array) Notes: For OII and OIII returns a two-elements array; one-element array otherwise. https://github.com/desihub/fastspecfit/blob/60393296e0cc466858f70a5d021d02693eff9375/py/fastspecfit/data/emlines.ecsv """ if emname == "OII": rf_em_waves = np.array([3727.092, 3729.874]) if emname == "OIII": rf_em_waves = np.array([4960.295, 5008.239]) if emname == "HALPHA": rf_em_waves = np.array([6564.613]) if emname == "HBETA": rf_em_waves = np.array([4862.683]) if emname == "HGAMMA": rf_em_waves = np.array([4341.684]) if emname == "HDELTA": rf_em_waves = np.array([4102.892]) return rf_em_waves
[docs]def emlines_gaussfit( emname, zspec, waves, fluxes, ivars, rf_fit_hw=40, min_rf_fit_hw=20, rf_cont_w=200, p0_sigma=2.5, p0_flux=10, p0_share=0.58, min_sigma=1e-5, max_sigma=10., min_flux=-1e9, max_flux=1e9, min_share=1e-1, max_share=1, log=None, ): """ Fits the [OII] doublet line profile with 2 related Gaussians. Args: emname: "OII" or "OIII" or "HALPHA", "HBETA", "HGAMMA", "HDELTA" (string) zspec: redshift (float) waves: wavelength in Angstroms (numpy array) fluxes: flux observed in the broad band (in erg/s/cm2/A) (numpy array) ivars: inverse variance on the flux (numpy array) rf_fit_hw (optional, defaults to 40): *rest-frame* wavelength width (in A) used for fitting on each side of the line (float) min_rf_fit_hw (optional, defaults to 20): minimum requested *rest-frame* width (in A) on each side of the line to consider the fitting (float) rf_cont_w (optional, defaults to 200): *rest-frame* wavelength extent (in A) to fit the continuum (float) p0_sigma (optional, defaults to 2.5): initial guess on the line width in A (float) p0_flux (optional, defaults to 0.1): initial guess on the line flux in 1e-17 * erg/cm2/s (float) p0_share (optional, defaults to 0.58): initial guess on the share between the two [OII] lines (float) min_sigma (optional, defaults to 1e-5): minimum allowed value for the line width in A (float) max_sigma (optional, defaults to 10.): maximum allowed value for the line width in A (float) min_flux (optional, defaults to 1e-5): minimum allowed value for the flux in 1e-17 * erg/cm2/s (float) max_flux (optional, defaults to 1e9): maximum allowed value for the flux in 1e-17 * erg/cm2/s (float) min_share (optional, defaults to 1e-1): minimum allowed value for the share (float) max_share (optional, defaults to 1): maximum allowed value for the share (float) log (optional, defaults to get_logger()): Logger object Returns: emdict: a dictionary with various quantities, noticely "FLUX" and "FLUX_IVAR" (dictionary of floats) list of all keys:: CHI2, NDOF: *reduced* chi2 and nb of degrees of freedom CONT, CONT_IVAR: continuum in 1e-17 * erg/cm2/s/A FLUX, FLUX_IVAR: flux in 1e-17 * erg/cm2/s SIGMA, SIGMA_IVAR: line width in A (observed frame) SHARE, SHARE_IVAR: f1/(f0+f1) for OII and OIII doublets EW, EW_IVAR: rest-frame equivalent width (in A) waves: wavelength values (in A) used for the fitting (numpy array of floats) fluxes: flux values (in 1e-17 * erg/cm2/s/A) used for the fitting (numpy array of floats) ivars: ivar values used for the fitting (numpy array of floats) models: model flux values (in 1e-17 * erg/cm2/s/A) from the fit (numpy array of floats) succeed: did the fit succeed? (boolean) Note: * Adapted/simplified from elgredshiftflag from J. Comparat (used for eBOSS/ELG): https://svn.sdss.org/repo/eboss/elgredshiftflag/ * Returns np.nan in emdict (and NDOF=-99) if not enough pixels to fit or if fit fails. * Default settings designed for ELGs (e.g. max_sigma); need to re-assess if run on other targets. * For "OII", let the doublet line ratio free during the fit. * For "OIII", fits the 4960 and 5007 lines with a fixed line ratio. * For the Balmer lines, SHARE is not fitted and set to np.nan. """ # AR log if log is None: log = get_logger() # AR allowed arguments if emname not in allowed_emnames: msg = "{} not in {}".format(emname, allowed_emnames) log.error(msg) raise ValueError(msg) # AR Line models gauss_nocont = lambda ws, sigma, F0, w0: F0 * (np.e ** (- (ws - w0) ** 2. / (2. * sigma ** 2.))) / (sigma * (2. * np.pi) ** 0.5) # AR vacuum rest-frame wavelength(s) rf_em_waves = get_rf_em_waves(emname) if emname == "OII": cont_choice = "left" min_n_lines = 2 if emname == "OIII": cont_choice = "center" min_n_lines = 2 if emname in ["HALPHA", "HBETA", "HGAMMA", "HDELTA"]: cont_choice = "center" min_n_lines = 1 # AR expected position of the peak of the line in the observed frame (redshifted). 2 positions given. obs_em_waves = (1. + zspec) * rf_em_waves # AR *observed-frame* wavelength extents obs_fit_hw = (1. + zspec) * rf_fit_hw min_obs_fit_hw = (1. + zspec) * min_rf_fit_hw obs_cont_w = (1. + zspec) * rf_cont_w # AR initializing # AR "waves", "fluxes", "ivars", "models" are dealt separately emdict = {} keys = [ "FLUX", "FLUX_IVAR", "SIGMA", "SIGMA_IVAR", "CONT", "CONT_IVAR", "SHARE", "SHARE_IVAR", "EW", "EW_IVAR", "CHI2", "NDOF", ] for key in keys: if key == "NDOF": emdict[key] = -99 else: emdict[key] = np.nan # AR picking wavelengths keep_line = np.zeros(len(waves), dtype=bool) keep_cont = np.zeros(len(waves), dtype=bool) n_cov_lines = 0 for obs_em_wave in obs_em_waves: # AR used for line fitting keep_line |= (waves > obs_em_wave - obs_fit_hw) & (waves < obs_em_wave + obs_fit_hw) # AR used for continuum if cont_choice == "left": keep_cont |= (waves > obs_em_wave - obs_cont_w) & (waves < obs_em_wave) if cont_choice == "center": keep_cont |= (waves > obs_em_wave - obs_cont_w / 2.) & (waves < obs_em_wave + obs_cont_w / 2.) if cont_choice == "right": keep_cont |= (waves > obs_em_wave) & (waves < obs_em_wave + obs_cont_w) # AR has the line minimal coverage? n_cov_lines += int((waves.min() < obs_em_wave - min_obs_fit_hw) & (waves.max() > obs_em_wave + min_obs_fit_hw)) # AR excluding for the continuum estimation wavelengths used for line(s) fiting keep_cont &= ~keep_line # AR discarding flux=nan and ivars == 0 pixels keep_line &= (np.isfinite(fluxes)) & (ivars > 0) keep_cont &= (np.isfinite(fluxes)) & (ivars > 0) # AR models = np.nan+np.zeros(keep_line.sum()) # AR enough pixels to fit? succeed = False if ( (keep_line.sum() >= 3) & (keep_cont.sum() >= 3) & (n_cov_lines >= min_n_lines) ): # AR continuum flux, ivar emdict["CONT"] = np.median(fluxes[keep_cont]) emdict["CONT_IVAR"] = np.median(ivars[keep_cont]) # # AR OII # AR fitting a doublet with line ratio in the fitting # AR sh = f3729 / (f3727 + f3729) if emname == "OII": myfunc = lambda ws, sigma, F0, sh: emdict["CONT"] + gauss_nocont(ws, sigma, (1-sh) * F0, obs_em_waves[0]) + gauss_nocont(ws, sigma, sh * F0, obs_em_waves[1]) p0 = np.array([p0_sigma, p0_flux, p0_share]) bounds = ((min_sigma, min_flux, min_share), (max_sigma, max_flux, max_share)) # AR OIII # AR fitting two lines with a fixed line ratio # AR f5007 = 2.9 * f4960 # AR sh = f5007 / (f4960 + f5007) # AR sh = 2.9 / (1 + 2.9) = 0.744 # AR using same F0 and sigma for both lines if emname == "OIII": sh = 0.744 myfunc = lambda ws, sigma, F0: emdict["CONT"] + gauss_nocont(ws, sigma, (1-sh) * F0, obs_em_waves[0]) + gauss_nocont(ws, sigma, sh * F0, obs_em_waves[1]) p0 = np.array([p0_sigma, p0_flux]) bounds = ((min_sigma, min_flux), (max_sigma, max_flux)) # AR Balmer lines # AR fitting emission line only (no absorption) if emname in ["HALPHA", "HBETA", "HGAMMA", "HDELTA"]: myfunc = lambda ws, sigma, F0: emdict["CONT"] + gauss_nocont(ws, sigma, F0, obs_em_waves[0]) p0 = np.array([p0_sigma, p0_flux]) bounds = ((min_sigma, min_flux), (max_sigma, max_flux)) # AR flux at observed wavelength(s) obs_em_fluxes = np.array([fluxes[np.searchsorted(waves, obs_em_wave)] for obs_em_wave in obs_em_waves]) # AR is the flux above continuum for at least one line? # SB don't require positive flux before attempting fit if True or obs_em_fluxes.max() > emdict["CONT"]: # AR maxfev and gtol set by JC; seems to work; not touching those... popt, pcov = curve_fit( myfunc, waves[keep_line], fluxes[keep_line], p0=p0, sigma=1. / np.sqrt(ivars[keep_line]), maxfev=10000000, gtol=1.49012e-8, bounds=bounds, ) # AR fit succeeded? # AR - pcov.__class__ criterion => dates from JC, not sure how relevant.. keeping it # AR - then we decide that the fit is no good if: # AR - var = 0, # AR - or var(flux) = 0 # AR - or flux closer than 1% to the allowed boundaries if pcov.__class__ == np.ndarray: diag = np.diag(pcov) if (diag.sum() > 0) & (diag[1] > 0) & (popt[1] > 1.01 * min_flux) & (popt[1] < 0.99 * max_flux): succeed = True if emname == "OII": models = myfunc(waves[keep_line], popt[0], popt[1], popt[2]) emdict["SHARE"] = popt[2] emdict["SHARE_IVAR"] = diag[2] ** -1 if emname == "OIII": models = myfunc(waves[keep_line], popt[0], popt[1]) emdict["SHARE"] = sh emdict["SHARE_IVAR"] = np.inf if emname in ["HALPHA", "HBETA", "HGAMMA", "HDELTA"]: models = myfunc(waves[keep_line], popt[0], popt[1]) emdict["NDOF"] = keep_line.sum() - len(p0) emdict["CHI2"] = np.sum(np.abs(models - fluxes[keep_line]) ** 2. / ivars[keep_line] ** 2.) emdict["CHI2"] /= emdict["NDOF"] # AR we define CHI2 as the reduced chi2, as in fastspecfit emdict["SIGMA"] = popt[0] emdict["SIGMA_IVAR"] = diag[0] ** -1 emdict["FLUX"] = popt[1] emdict["FLUX_IVAR"] = diag[1] ** -1 # AR rest-frame equivalent width factor = 1 / ((1 + zspec) * emdict["CONT"]) emdict["EW"] = emdict["FLUX"] * factor emdict["EW_IVAR"] = emdict["FLUX_IVAR"] / factor ** 2 # AR fitted waves / fluxes / ivars / models emdict["waves"] = waves[keep_line] emdict["fluxes"] = fluxes[keep_line] emdict["ivars"] = ivars[keep_line] emdict["models"] = models # return emdict, succeed
[docs]def get_emlines( zspecs, waves, fluxes, ivars, emnames=["OII", "HDELTA", "HGAMMA", "HBETA", "OIII", "HALPHA"], rf_fit_hw=40, min_rf_fit_hw=20, rf_cont_w=200, log=None, ): """ Get Gaussian-fitted emission line fluxes. Args: zspecs: redshifts (numpy array of shape (Nspec)) waves: wavelengths (numpy array of shape (Nwave)) fluxes: fluxes (numpy array of shape (Nspec, Nwave)) ivars: inverse variances (numpy array of shape (Nspec, Nwave)) emnames (optional, defaults to ["OII", "HDELTA", "HGAMMA", "HBETA", "OIII", "HALPHA"]): list of lines to fit (list of string) rf_fit_hw (optional, defaults to 40): *rest-frame* wavelength width (in A) used for fitting on each side of the line (float) min_rf_fit_hw (optional, defaults to 20): minimum requested *rest-frame* width (in A) on each side of the line to consider the fitting (float) rf_cont_w (optional, defaults to 200): *rest-frame* wavelength extent (in A) to fit the continuum (float) log (optional, defaults to get_logger()): Logger object Returns: emdict: a dictionary with a subdictionary for each emname in emnames, with:, CHI2, NDOF: *reduced* chi2 and nb of degrees of freedom CONT, CONT_IVAR: continuum in 1e-17 * erg/cm2/s/A FLUX, FLUX_IVAR: flux in 1e-17 * erg/cm2/s SIGMA, SIGMA_IVAR: line width in A (observed frame) SHARE, SHARE_IVAR: f1/(f0+f1) for OII and OIII doublets EW, EW_IVAR: rest-frame equivalent width in A waves, fluxes, ivars, models: data used for fitting + fitted model """ # AR log if log is None: log = get_logger() # AR number of spectra nspec = len(zspecs) # AR assert consistency if ( (fluxes.shape[0] != nspec) | (fluxes.shape[1] != waves.shape[0]) | (ivars.shape[0] != nspec) | (ivars.shape[1] != waves.shape[0]) ): log.error( "Shape inconsistencies for inputs: zspecs={}, waves={}, fluxes={}, ivars={}".format( zspecs.shape, waves.shape, fluxes.shape, ivars.shape, ) ) raise RuntimeError() # AR dictionary to store results # AR keys returned by emlines_gaussfit emkeys = [ "FLUX", "FLUX_IVAR", "SIGMA", "SIGMA_IVAR", "CONT", "CONT_IVAR", "SHARE", "SHARE_IVAR", "EW", "EW_IVAR", "CHI2", "NDOF", "waves", "fluxes", "ivars", "models", ] emdict = {} # AR prepare columns for emname in emnames: emdict[emname] = {} for key in emkeys: if key == "NDOF": emdict[emname][key] = -99 + np.zeros(nspec, dtype=int) elif key in ["waves", "fluxes", "ivars", "models"]: emdict[emname][key] = np.zeros(nspec, dtype=object) else: emdict[emname][key] = np.nan + np.zeros(nspec) # AR fit Gaussians for i_emname, emname in enumerate(emnames): for i in range(nspec): tmpemdict, _ = emlines_gaussfit( emname, zspecs[i], waves, fluxes[i, :], ivars[i, :], rf_fit_hw=rf_fit_hw, min_rf_fit_hw=min_rf_fit_hw, rf_cont_w=rf_cont_w, log=log, ) for key in emkeys: emdict[emname][key][i] = tmpemdict[key] return emdict