Source code for desispec.emlinefit

"""
desispec.emlinefit
==================

Routines for desi_emlinefit_afterburner.
"""

import numpy as np
from scipy.optimize import curve_fit
from scipy.special import erf
from desiutil.log import get_logger

allowed_emnames = ["OII", "HDELTA", "HGAMMA", "HBETA", "OIII", "HALPHA"]


[docs] def get_rf_em_waves(emname): """ Returns the rest-frame, vacuum, wavelengths. Args: emname, from: "OII", "HDELTA", "HGAMMA", "HBETA", "OIII", "HALPHA" (string) Returns: rf_em_waves: rest-frame wavelength(s) (np array) Notes: For OII and OIII returns a two-elements array; one-element array otherwise. https://github.com/desihub/fastspecfit/blob/60393296e0cc466858f70a5d021d02693eff9375/py/fastspecfit/data/emlines.ecsv """ if emname == "OII": rf_em_waves = np.array([3727.092, 3729.874]) if emname == "OIII": rf_em_waves = np.array([4960.295, 5008.239]) if emname == "HALPHA": rf_em_waves = np.array([6564.613]) if emname == "HBETA": rf_em_waves = np.array([4862.683]) if emname == "HGAMMA": rf_em_waves = np.array([4341.684]) if emname == "HDELTA": rf_em_waves = np.array([4102.892]) return rf_em_waves
[docs] def emlines_gaussfit( emname, zspec, waves, fluxes, ivars, rf_fit_hw=40, min_rf_fit_hw=20, rf_cont_w=200, p0_sigma=3.5, p0_flux=10, p0_share=0.5, min_sigma=0.34, # DESI resolution >= 0.8 Angstrom in FWHM, or 0.34 in sigma max_sigma=10., min_flux=-1e9, max_flux=1e9, min_share=0.26, # I(3729) / I(3726) > 0.35 max_share=0.6, # I(3729) / I(3726) < 1.5 log=None, ): """ Fits the [OII] doublet line profile with 2 related Gaussians. Args: emname: "OII" or "OIII" or "HALPHA", "HBETA", "HGAMMA", "HDELTA" (string) zspec: redshift (float) waves: wavelength in Angstroms (numpy array) fluxes: flux observed in the broad band (in erg/s/cm2/A) (numpy array) ivars: inverse variance on the flux (numpy array) rf_fit_hw (optional, defaults to 40): *rest-frame* wavelength width (in A) used for fitting on each side of the line (float) min_rf_fit_hw (optional, defaults to 20): minimum requested *rest-frame* width (in A) on each side of the line to consider the fitting (float) rf_cont_w (optional, defaults to 200): *rest-frame* wavelength extent (in A) to fit the continuum (float) p0_sigma (optional, defaults to 3.5): initial guess on the line width in A (float) p0_flux (optional, defaults to 10): initial guess on the line flux in 1e-17 * erg/cm2/s (float) p0_share (optional, defaults to 0.5): initial guess on the share between the two [OII] lines (float) min_sigma (optional, defaults to 0.34): minimum allowed value for the line width in A; corresponds to DESI resolution >= 0.8 A in FWHM (float) max_sigma (optional, defaults to 10.): maximum allowed value for the line width in A (float) min_flux (optional, defaults to -1e9): minimum allowed value for the flux in 1e-17 * erg/cm2/s (float) max_flux (optional, defaults to 1e9): maximum allowed value for the flux in 1e-17 * erg/cm2/s (float) min_share (optional, defaults to 0.26): minimum allowed value for the share, where share=r/(1+r) and r=I(3729)/I(3726); 0.26 corresponds to I(3729)/I(3726) >= 0.351 (float) max_share (optional, defaults to 0.6): maximum allowed value for the share, where share=r/(1+r) and r=I(3729)/I(3726); 0.6 corresponds to I(3729)/I(3726) <= 1.5 (float) log (optional, defaults to get_logger()): Logger object Returns: emdict: a dictionary with various quantities, noticely "FLUX" and "FLUX_IVAR" (dictionary of floats) list of all keys:: CHI2, NDOF: *reduced* chi2 and nb of degrees of freedom CONT, CONT_IVAR: continuum in 1e-17 * erg/cm2/s/A FLUX, FLUX_IVAR: flux in 1e-17 * erg/cm2/s SIGMA, SIGMA_IVAR: line width in A (observed frame) SHARE, SHARE_IVAR: f1/(f0+f1) for OII and OIII doublets EW, EW_IVAR: rest-frame equivalent width (in A) waves: wavelength values (in A) used for the fitting (numpy array of floats) fluxes: flux values (in 1e-17 * erg/cm2/s/A) used for the fitting (numpy array of floats) ivars: ivar values used for the fitting (numpy array of floats) models: model flux values (in 1e-17 * erg/cm2/s/A) from the fit (numpy array of floats) succeed: did the fit succeed? (boolean) Note: * Adapted/simplified from elgredshiftflag from J. Comparat (used for eBOSS/ELG): https://svn.sdss.org/repo/eboss/elgredshiftflag/ * Returns np.nan in emdict (and NDOF=-99) if not enough pixels to fit or if fit fails. * Default settings designed for ELGs (e.g. max_sigma); need to re-assess if run on other targets. * For "OII", let the doublet line ratio free during the fit. * For "OIII", fits the 4960 and 5007 lines with a fixed line ratio. * For the Balmer lines, SHARE is not fitted and set to np.nan. """ # AR log if log is None: log = get_logger() # AR allowed arguments if emname not in allowed_emnames: msg = "{} not in {}".format(emname, allowed_emnames) log.error(msg) raise ValueError(msg) # AR Line models - integrated flux per pixel def gauss_nocont(ws, sigma, F0, w0): """ Compute integrated Gaussian flux in each wavelength pixel. Args: ws: wavelength array (pixel centers) sigma: Gaussian width F0: total integrated flux w0: Gaussian center wavelength Returns: Average flux density in each pixel (integrated flux / pixel width) """ # Check for minimum number of pixels if len(ws) < 2: raise ValueError("gauss_nocont requires at least 2 wavelength pixels") # Calculate pixel edges (boundaries between pixels) edges = np.zeros(len(ws) + 1) # Interior edges are midpoints edges[1:-1] = 0.5 * (ws[:-1] + ws[1:]) # Extrapolate for first and last edges edges[0] = ws[0] - 0.5 * (ws[1] - ws[0]) edges[-1] = ws[-1] + 0.5 * (ws[-1] - ws[-2]) # Compute integrated flux using error function # Integral of Gaussian from a to b is: # F0 * (erf((b - w0)/(sqrt(2)*sigma)) - erf((a - w0)/(sqrt(2)*sigma))) / 2 sqrt2_sigma = np.sqrt(2.0) * sigma erf_upper = erf((edges[1:] - w0) / sqrt2_sigma) erf_lower = erf((edges[:-1] - w0) / sqrt2_sigma) integrated_flux = F0 * (erf_upper - erf_lower) / 2.0 # Divide by pixel width to get average flux density pixel_widths = edges[1:] - edges[:-1] flux_density = integrated_flux / pixel_widths return flux_density # AR vacuum rest-frame wavelength(s) rf_em_waves = get_rf_em_waves(emname) if emname == "OII": cont_choice = "left" min_n_lines = 2 if emname == "OIII": cont_choice = "center" min_n_lines = 2 if emname in ["HALPHA", "HBETA", "HGAMMA", "HDELTA"]: cont_choice = "center" min_n_lines = 1 # AR expected position of the peak of the line in the observed frame (redshifted). 2 positions given. obs_em_waves = (1. + zspec) * rf_em_waves # AR *observed-frame* wavelength extents obs_fit_hw = (1. + zspec) * rf_fit_hw min_obs_fit_hw = (1. + zspec) * min_rf_fit_hw obs_cont_w = (1. + zspec) * rf_cont_w # AR initializing # AR "waves", "fluxes", "ivars", "models" are dealt separately emdict = {} keys = [ "FLUX", "FLUX_IVAR", "SIGMA", "SIGMA_IVAR", "CONT", "CONT_IVAR", "SHARE", "SHARE_IVAR", "EW", "EW_IVAR", "CHI2", "NDOF", ] for key in keys: if key == "NDOF": emdict[key] = -99 else: emdict[key] = np.nan # AR picking wavelengths keep_line = np.zeros(len(waves), dtype=bool) keep_cont = np.zeros(len(waves), dtype=bool) n_cov_lines = 0 for obs_em_wave in obs_em_waves: # AR used for line fitting keep_line |= (waves > obs_em_wave - obs_fit_hw) & (waves < obs_em_wave + obs_fit_hw) # AR used for continuum if cont_choice == "left": keep_cont |= (waves > obs_em_wave - obs_cont_w) & (waves < obs_em_wave) if cont_choice == "center": keep_cont |= (waves > obs_em_wave - obs_cont_w / 2.) & (waves < obs_em_wave + obs_cont_w / 2.) if cont_choice == "right": keep_cont |= (waves > obs_em_wave) & (waves < obs_em_wave + obs_cont_w) # AR has the line minimal coverage? n_cov_lines += int((waves.min() < obs_em_wave - min_obs_fit_hw) & (waves.max() > obs_em_wave + min_obs_fit_hw)) # AR excluding for the continuum estimation wavelengths used for line(s) fiting keep_cont &= ~keep_line # AR discarding flux=nan and ivars == 0 pixels keep_line &= (np.isfinite(fluxes)) & (ivars > 0) keep_cont &= (np.isfinite(fluxes)) & (ivars > 0) # AR models = np.nan+np.zeros(keep_line.sum()) # AR enough pixels to fit? succeed = False if ( (keep_line.sum() >= 3) & (keep_cont.sum() >= 3) & (n_cov_lines >= min_n_lines) ): # AR continuum flux, ivar emdict["CONT"] = np.median(fluxes[keep_cont]) emdict["CONT_IVAR"] = np.median(ivars[keep_cont]) # # AR OII # AR fitting a doublet with line ratio in the fitting # AR sh = f3729 / (f3727 + f3729) if emname == "OII": myfunc = lambda ws, sigma, F0, sh: emdict["CONT"] + gauss_nocont(ws, sigma, (1-sh) * F0, obs_em_waves[0]) + gauss_nocont(ws, sigma, sh * F0, obs_em_waves[1]) p0 = np.array([p0_sigma, p0_flux, p0_share]) bounds = ((min_sigma, min_flux, min_share), (max_sigma, max_flux, max_share)) # AR OIII # AR fitting two lines with a fixed line ratio # AR f5007 = 2.9 * f4960 # AR sh = f5007 / (f4960 + f5007) # AR sh = 2.9 / (1 + 2.9) = 0.744 # AR using same F0 and sigma for both lines if emname == "OIII": sh = 0.744 myfunc = lambda ws, sigma, F0: emdict["CONT"] + gauss_nocont(ws, sigma, (1-sh) * F0, obs_em_waves[0]) + gauss_nocont(ws, sigma, sh * F0, obs_em_waves[1]) p0 = np.array([p0_sigma, p0_flux]) bounds = ((min_sigma, min_flux), (max_sigma, max_flux)) # AR Balmer lines # AR fitting emission line only (no absorption) if emname in ["HALPHA", "HBETA", "HGAMMA", "HDELTA"]: myfunc = lambda ws, sigma, F0: emdict["CONT"] + gauss_nocont(ws, sigma, F0, obs_em_waves[0]) p0 = np.array([p0_sigma, p0_flux]) bounds = ((min_sigma, min_flux), (max_sigma, max_flux)) # AR flux at observed wavelength(s) obs_em_fluxes = np.array([fluxes[np.searchsorted(waves, obs_em_wave)] for obs_em_wave in obs_em_waves]) # AR is the flux above continuum for at least one line? # SB don't require positive flux before attempting fit if True or obs_em_fluxes.max() > emdict["CONT"]: # AR maxfev and gtol set by JC; seems to work; not touching those... popt, pcov = curve_fit( myfunc, waves[keep_line], fluxes[keep_line], p0=p0, sigma=1. / np.sqrt(ivars[keep_line]), maxfev=10000000, gtol=1.49012e-8, bounds=bounds, ) # AR fit succeeded? # AR - pcov.__class__ criterion => dates from JC, not sure how relevant.. keeping it # AR - then we decide that the fit is no good if: # AR - var = 0, # AR - or var(flux) = 0 # AR - or flux closer than 1% to the allowed boundaries if pcov.__class__ == np.ndarray: diag = np.diag(pcov) if (diag.sum() > 0) & (diag[1] > 0) & (popt[1] > 1.01 * min_flux) & (popt[1] < 0.99 * max_flux): succeed = True if emname == "OII": models = myfunc(waves[keep_line], popt[0], popt[1], popt[2]) emdict["SHARE"] = popt[2] emdict["SHARE_IVAR"] = diag[2] ** -1 if emname == "OIII": models = myfunc(waves[keep_line], popt[0], popt[1]) emdict["SHARE"] = sh emdict["SHARE_IVAR"] = np.inf if emname in ["HALPHA", "HBETA", "HGAMMA", "HDELTA"]: models = myfunc(waves[keep_line], popt[0], popt[1]) emdict["NDOF"] = keep_line.sum() - len(p0) emdict["CHI2"] = np.sum(np.abs(models - fluxes[keep_line]) ** 2. * ivars[keep_line]) emdict["CHI2"] /= emdict["NDOF"] # AR we define CHI2 as the reduced chi2, as in fastspecfit emdict["SIGMA"] = popt[0] emdict["SIGMA_IVAR"] = diag[0] ** -1 emdict["FLUX"] = popt[1] emdict["FLUX_IVAR"] = diag[1] ** -1 # AR rest-frame equivalent width factor = 1 / ((1 + zspec) * emdict["CONT"]) emdict["EW"] = emdict["FLUX"] * factor emdict["EW_IVAR"] = emdict["FLUX_IVAR"] / factor ** 2 # AR fitted waves / fluxes / ivars / models emdict["waves"] = waves[keep_line] emdict["fluxes"] = fluxes[keep_line] emdict["ivars"] = ivars[keep_line] emdict["models"] = models # return emdict, succeed
[docs] def get_emlines( zspecs, waves, fluxes, ivars, emnames=["OII", "HDELTA", "HGAMMA", "HBETA", "OIII", "HALPHA"], rf_fit_hw=40, min_rf_fit_hw=20, rf_cont_w=200, log=None, ): """ Get Gaussian-fitted emission line fluxes. Args: zspecs: redshifts (numpy array of shape (Nspec)) waves: wavelengths (numpy array of shape (Nwave)) fluxes: fluxes (numpy array of shape (Nspec, Nwave)) ivars: inverse variances (numpy array of shape (Nspec, Nwave)) emnames (optional, defaults to ["OII", "HDELTA", "HGAMMA", "HBETA", "OIII", "HALPHA"]): list of lines to fit (list of string) rf_fit_hw (optional, defaults to 40): *rest-frame* wavelength width (in A) used for fitting on each side of the line (float) min_rf_fit_hw (optional, defaults to 20): minimum requested *rest-frame* width (in A) on each side of the line to consider the fitting (float) rf_cont_w (optional, defaults to 200): *rest-frame* wavelength extent (in A) to fit the continuum (float) log (optional, defaults to get_logger()): Logger object Returns: emdict: a dictionary with a subdictionary for each emname in emnames, with:, CHI2, NDOF: *reduced* chi2 and nb of degrees of freedom CONT, CONT_IVAR: continuum in 1e-17 * erg/cm2/s/A FLUX, FLUX_IVAR: flux in 1e-17 * erg/cm2/s SIGMA, SIGMA_IVAR: line width in A (observed frame) SHARE, SHARE_IVAR: f1/(f0+f1) for OII and OIII doublets EW, EW_IVAR: rest-frame equivalent width in A waves, fluxes, ivars, models: data used for fitting + fitted model """ # AR log if log is None: log = get_logger() # AR number of spectra nspec = len(zspecs) # AR assert consistency if ( (fluxes.shape[0] != nspec) | (fluxes.shape[1] != waves.shape[0]) | (ivars.shape[0] != nspec) | (ivars.shape[1] != waves.shape[0]) ): log.error( "Shape inconsistencies for inputs: zspecs={}, waves={}, fluxes={}, ivars={}".format( zspecs.shape, waves.shape, fluxes.shape, ivars.shape, ) ) raise RuntimeError() # AR dictionary to store results # AR keys returned by emlines_gaussfit emkeys = [ "FLUX", "FLUX_IVAR", "SIGMA", "SIGMA_IVAR", "CONT", "CONT_IVAR", "SHARE", "SHARE_IVAR", "EW", "EW_IVAR", "CHI2", "NDOF", "waves", "fluxes", "ivars", "models", ] emdict = {} # AR prepare columns for emname in emnames: emdict[emname] = {} for key in emkeys: if key == "NDOF": emdict[emname][key] = -99 + np.zeros(nspec, dtype=int) elif key in ["waves", "fluxes", "ivars", "models"]: emdict[emname][key] = np.zeros(nspec, dtype=object) else: emdict[emname][key] = np.nan + np.zeros(nspec) # AR fit Gaussians for i_emname, emname in enumerate(emnames): for i in range(nspec): tmpemdict, _ = emlines_gaussfit( emname, zspecs[i], waves, fluxes[i, :], ivars[i, :], rf_fit_hw=rf_fit_hw, min_rf_fit_hw=min_rf_fit_hw, rf_cont_w=rf_cont_w, log=log, ) for key in emkeys: emdict[emname][key][i] = tmpemdict[key] return emdict