Source code for desispec.sky
"""
desispec.sky
============
Utility functions to compute a sky model and subtract it.
"""
import os
import numpy as np
from collections import OrderedDict
from desispec.resolution import Resolution
from desispec.linalg import cholesky_solve
from desispec.linalg import cholesky_invert
from desispec.linalg import spline_fit
from desiutil.log import get_logger
from desispec import util
from desiutil import stats as dustat
import scipy,scipy.sparse,scipy.stats,scipy.ndimage
from scipy.signal import fftconvolve
import sys
from desispec.fiberbitmasking import get_fiberbitmasked_frame_arrays, get_fiberbitmasked_frame
import scipy.ndimage
from desispec.maskbits import specmask
from desispec.preproc import get_amp_ids,parse_sec_keyword
from desispec.io import findfile,read_xytraceset
from desispec.calibfinder import CalibFinder
from desispec.preproc import get_amp_ids
[docs]def compute_sky(frame, nsig_clipping=4.,max_iterations=100,model_ivar=False,add_variance=True,angular_variation_deg=0,chromatic_variation_deg=0,\
adjust_wavelength=False,adjust_lsf=False,\
only_use_skyfibers_for_adjustments=True,pcacorr=None,fit_offsets=False,fiberflat=None) :
"""Compute a sky model.
Input flux are expected to be flatfielded!
We don't check this in this routine.
Args:
frame : Frame object, which includes attributes
- wave : 1D wavelength grid in Angstroms
- flux : 2D flux[nspec, nwave] density
- ivar : 2D inverse variance of flux
- mask : 2D inverse mask flux (0=good)
- resolution_data : 3D[nspec, ndiag, nwave] (only sky fibers)
nsig_clipping : [optional] sigma clipping value for outlier rejection
Optional:
max_iterations : int , number of iterations
model_ivar : replace ivar by a model to avoid bias due to correlated flux and ivar. this has a negligible effect on sims.
add_variance : evaluate calibration error and add this to the sky model variance
angular_variation_deg : Degree of polynomial for sky flux variation with focal plane coordinates (default=0, i.e. no correction, a uniform sky)
chromatic_variation_deg : Wavelength degree for the chromatic x angular terms. If negative, use as many 2D polynomials of x and y as wavelength entries.
adjust_wavelength : adjust the wavelength of the sky model on sky lines to improve the sky subtraction
adjust_lsf : adjust the LSF width of the sky model on sky lines to improve the sky subtraction
only_use_skyfibers_for_adjustments: interpolate adjustments using sky fibers only
pcacorr : SkyCorrPCA object to interpolate the wavelength or LSF adjustment from sky fibers to all fibers
fit_offsets : fit offsets for regions defined in calib
fiberflat : desispec.FiberFlat object used for the fit of offsets
returns SkyModel object with attributes wave, flux, ivar, mask
"""
if angular_variation_deg == 0 :
skymodel = compute_uniform_sky(frame, nsig_clipping=nsig_clipping,max_iterations=max_iterations,\
model_ivar=model_ivar,add_variance=add_variance,\
adjust_wavelength=adjust_wavelength,adjust_lsf=adjust_lsf,\
only_use_skyfibers_for_adjustments=only_use_skyfibers_for_adjustments,pcacorr=pcacorr,fit_offsets=fit_offsets,fiberflat=fiberflat)
else :
if adjust_wavelength :
raise RuntimeError("combination of wavelength calibration adjustment and angular variations not yet implemented")
if adjust_lsf :
raise RuntimeError("combination of lsf calibration adjustment and angular variations not yet implemented")
if fit_offsets :
raise RuntimeError("combination of fit_offsets and angular variations not yet implemented")
if chromatic_variation_deg < 0 :
skymodel = compute_non_uniform_sky(frame, nsig_clipping=nsig_clipping,max_iterations=max_iterations,model_ivar=model_ivar,add_variance=add_variance,angular_variation_deg=angular_variation_deg)
else :
skymodel = compute_polynomial_times_sky(frame, nsig_clipping=nsig_clipping,max_iterations=max_iterations,model_ivar=model_ivar,add_variance=add_variance,angular_variation_deg=angular_variation_deg,chromatic_variation_deg=chromatic_variation_deg)
skymodel.throughput_corrections = calculate_throughput_corrections(frame,skymodel)
return skymodel
[docs]def _model_variance(frame,cskyflux,cskyivar,skyfibers) :
"""look at chi2 per wavelength and increase sky variance to reach chi2/ndf=1
"""
log = get_logger()
tivar = util.combine_ivar(frame.ivar[skyfibers], cskyivar[skyfibers])
# the chi2 at a given wavelength can be large because on a cosmic
# and not a psf error or sky non uniformity
# so we need to consider only waves for which
# a reasonable sky model error can be computed
# mean sky
msky = np.mean(cskyflux,axis=0)
dwave = np.mean(np.gradient(frame.wave))
dskydw = np.zeros(msky.shape)
dskydw[1:-1]=(msky[2:]-msky[:-2])/(frame.wave[2:]-frame.wave[:-2])
dskydw = np.abs(dskydw)
# now we consider a worst possible sky model error (20% error on flat, 0.5A )
max_possible_var = 1./(tivar+(tivar==0)) + (0.2*msky)**2 + (0.5*dskydw)**2
# exclude residuals inconsistent with this max possible variance (at 3 sigma)
bad = (frame.flux[skyfibers]-cskyflux[skyfibers])**2 > 3**2*max_possible_var
tivar[bad]=0
ndata = np.sum(tivar>0,axis=0)
ok=np.where(ndata>1)[0]
chi2 = np.zeros(frame.wave.size)
chi2[ok] = np.sum(tivar*(frame.flux[skyfibers]-cskyflux[skyfibers])**2,axis=0)[ok]/(ndata[ok]-1)
chi2[ndata<=1] = 1. # default
# now we are going to evaluate a sky model error based on this chi2,
# but only around sky flux peaks (>0.1*max)
tmp = np.zeros(frame.wave.size)
tmp = (msky[1:-1]>msky[2:])*(msky[1:-1]>msky[:-2])*(msky[1:-1]>0.1*np.max(msky))
peaks = np.where(tmp)[0]+1
dpix = 2 #eval error range
dpix2 = 3 # scale error range (larger)
input_skyvar = 1./(cskyivar+(cskyivar==0))
skyvar = input_skyvar + 0.
# loop on peaks
for peak in peaks :
b=peak-dpix
e=peak+dpix+1
b2=peak-dpix2
e2=peak+dpix2+1
mchi2 = np.mean(chi2[b:e]) # mean reduced chi2 around peak
mndata = np.mean(ndata[b:e]) # mean number of fibers contributing
# sky model variance = sigma_flat * msky + sigma_wave * dmskydw
sigma_flat=0.005 # the fiber flat error is already included in the flux ivar, but empirical evidence we need an extra term
sigma_wave=0.005 # A, minimum value
res2=(frame.flux[skyfibers,b:e]-cskyflux[skyfibers,b:e])**2
var=1./(tivar[:,b:e]+(tivar[:,b:e]==0))
nd=np.sum(tivar[:,b:e]>0)
sigma_wave = np.arange(0.005, 2, 0.005)
#- pivar has shape (nskyfibers, npix, nsigma_wave)
pivar = (tivar[:, b:e, np.newaxis]>0)/((var+(sigma_flat*msky[b:e])**2)[..., np.newaxis] + ((sigma_wave[np.newaxis,:]*dskydw[b:e, np.newaxis])**2)[np.newaxis, ...])
#- chi2_of_sky_fibers has shape (nskyfibers, nsigma_wave)
chi2_of_sky_fibers = np.sum(pivar*res2[..., np.newaxis],axis=1)/np.sum(tivar[:,b:e]>0,axis=1)[:, np.newaxis]
#- normalization from median to mean for chi2 with 3 d.o.f.
norm = 0.7888
#- median_chi2 has shape (nsigma_wave,)
median_chi2 = np.median(chi2_of_sky_fibers, axis=0)/norm
if np.any(median_chi2 <= 1):
#- first sigma_wave with median_chi2 <= 1 is the peak
sigma_wave_peak = sigma_wave[np.where(median_chi2 <= 1)[0][0]]
else :
sigma_wave_peak = 2.
log.info("peak at {}A : sigma_wave={}".format(int(frame.wave[peak]),sigma_wave_peak))
skyvar[:,b2:e2] = input_skyvar[:,b2:e2] + (sigma_flat*msky[b2:e2])**2 + (sigma_wave_peak*dskydw[b2:e2])**2
return (cskyivar>0)/(skyvar+(skyvar==0))
[docs]def compute_uniform_sky(frame, nsig_clipping=4.,max_iterations=100,model_ivar=False,add_variance=True,\
adjust_wavelength=True,adjust_lsf=True,only_use_skyfibers_for_adjustments = True, pcacorr=None, fit_offsets=False,fiberflat=None) :
"""Compute a sky model.
Sky[fiber,i] = R[fiber,i,j] Flux[j]
Input flux are expected to be flatfielded!
We don't check this in this routine.
Args:
frame : Frame object, which includes attributes
- wave : 1D wavelength grid in Angstroms
- flux : 2D flux[nspec, nwave] density
- ivar : 2D inverse variance of flux
- mask : 2D inverse mask flux (0=good)
- resolution_data : 3D[nspec, ndiag, nwave] (only sky fibers)
nsig_clipping : [optional] sigma clipping value for outlier rejection
Optional:
max_iterations : int , number of iterations
model_ivar : replace ivar by a model to avoid bias due to correlated flux and ivar. this has a negligible effect on sims.
add_variance : evaluate calibration error and add this to the sky model variance
adjust_wavelength : adjust the wavelength of the sky model on sky lines to improve the sky subtraction
adjust_lsf : adjust the LSF width of the sky model on sky lines to improve the sky subtraction
only_use_skyfibers_for_adjustments : interpolate adjustments using sky fibers only
pcacorr : SkyCorrPCA object to interpolate the wavelength or LSF adjustment from sky fibers to all fibers
fit_offsets : fit offsets for regions defined in calib
fiberflat : desispec.FiberFlat object used for the fit of offsets
returns SkyModel object with attributes wave, flux, ivar, mask
"""
log=get_logger()
log.info("starting")
# Grab sky fibers on this frame
skyfibers = np.where(frame.fibermap['OBJTYPE'] == 'SKY')[0]
assert np.max(skyfibers) < 500 #- indices, not fiber numbers
#- Hack: test tile 81097 (observed 20210430/00086750) had set
#- FIBERSTATUS bit UNASSIGNED for sky targets on stuck positioners.
#- Undo that.
if (frame.meta is not None) and ('TILEID' in frame.meta) and (frame.meta['TILEID'] == 81097):
log.info('Unsetting FIBERSTATUS UNASSIGNED for tileid 81097 sky fibers')
frame.fibermap['FIBERSTATUS'][skyfibers] &= ~1
nwave=frame.nwave
current_ivar = get_fiberbitmasked_frame_arrays(frame,bitmask='sky',ivar_framemask=True,return_mask=False)
# checking ivar because some sky fibers have been disabled
bad=(np.sum(current_ivar[skyfibers]>0,axis=1)==0)
good=~bad
if np.any(bad) :
log.warning("{} sky fibers discarded (because ivar=0 or bad FIBERSTATUS), only {} left.".format(np.sum(bad),np.sum(good)))
skyfibers = skyfibers[good]
if np.sum(good)==0 :
message = "no valid sky fibers"
log.error(message)
raise RuntimeError(message)
nfibers=len(skyfibers)
current_ivar = current_ivar[skyfibers]
flux = frame.flux[skyfibers]
Rsky = frame.R[skyfibers]
input_ivar=None
if model_ivar :
log.info("use a model of the inverse variance to remove bias due to correlated ivar and flux")
input_ivar=current_ivar.copy()
median_ivar_vs_wave = np.median(current_ivar,axis=0)
median_ivar_vs_fiber = np.median(current_ivar,axis=1)
median_median_ivar = np.median(median_ivar_vs_fiber)
for f in range(current_ivar.shape[0]) :
threshold=0.01
current_ivar[f] = median_ivar_vs_fiber[f]/median_median_ivar * median_ivar_vs_wave
# keep input ivar for very low weights
ii=(input_ivar[f]<=(threshold*median_ivar_vs_wave))
#log.info("fiber {} keep {}/{} original ivars".format(f,np.sum(ii),current_ivar.shape[1]))
current_ivar[f][ii] = input_ivar[f][ii]
sqrtw=np.sqrt(current_ivar)
sqrtwflux=sqrtw*flux
chi2=np.zeros(flux.shape)
bad_skyfibers = []
#max_iterations=2 ; log.warning("DEBUGGING LIMITING NUMBER OF ITERATIONS")
nout_tot=0
for iteration in range(max_iterations) :
# the matrix A is 1/2 of the second derivative of the chi2 with respect to the parameters
# A_ij = 1/2 d2(chi2)/di/dj
# A_ij = sum_fiber sum_wave_w ivar[fiber,w] d(model)/di[fiber,w] * d(model)/dj[fiber,w]
# the vector B is 1/2 of the first derivative of the chi2 with respect to the parameters
# B_i = 1/2 d(chi2)/di
# B_i = sum_fiber sum_wave_w ivar[fiber,w] d(model)/di[fiber,w] * (flux[fiber,w]-model[fiber,w])
# the model is model[fiber]=R[fiber]*sky
# and the parameters are the unconvolved sky flux at the wavelength i
# so, d(model)/di[fiber,w] = R[fiber][w,i]
# this gives
# A_ij = sum_fiber sum_wave_w ivar[fiber,w] R[fiber][w,i] R[fiber][w,j]
# A = sum_fiber ( diag(sqrt(ivar))*R[fiber] ) ( diag(sqrt(ivar))* R[fiber] )^t
# A = sum_fiber sqrtwR[fiber] sqrtwR[fiber]^t
# and
# B = sum_fiber sum_wave_w ivar[fiber,w] R[fiber][w] * flux[fiber,w]
# B = sum_fiber sum_wave_w sqrt(ivar)[fiber,w]*flux[fiber,w] sqrtwR[fiber,wave]
#A=scipy.sparse.lil_matrix((nwave,nwave)).tocsr()
A=scipy.sparse.csr_matrix((nwave,nwave))
B=np.zeros((nwave))
# diagonal sparse matrix with content = sqrt(ivar)*flat of a given fiber
SD=scipy.sparse.dia_matrix((nwave,nwave))
# loop on fiber to handle resolution
for fiber in range(nfibers) :
if fiber%10==0 :
log.info("iter %d sky fiber %d/%d"%(iteration,fiber,nfibers))
R = Rsky[fiber]
# diagonal sparse matrix with content = sqrt(ivar)
SD.setdiag(sqrtw[fiber])
sqrtwR = SD*R # each row r of R is multiplied by sqrtw[r]
A += sqrtwR.T*sqrtwR
B += sqrtwR.T*sqrtwflux[fiber]
A = A.toarray()
log.info("iter %d solving"%iteration)
w = A.diagonal()>0
A_pos_def = A[w,:]
A_pos_def = A_pos_def[:,w]
deconvolved_sky = B*0
try:
deconvolved_sky[w]=cholesky_solve(A_pos_def,B[w])
except:
log.info("cholesky failed, trying svd in iteration {}".format(iteration))
deconvolved_sky[w]=np.linalg.lstsq(A_pos_def,B[w])[0]
log.info("iter %d compute chi2"%iteration)
medflux=np.zeros(nfibers)
for fiber in range(nfibers) :
# the parameters are directly the unconvolve sky flux
# so we simply have to reconvolve it
fiber_convolved_sky_flux = Rsky[fiber].dot(deconvolved_sky)
chi2[fiber]=current_ivar[fiber]*(flux[fiber]-fiber_convolved_sky_flux)**2
ok=(current_ivar[fiber]>0)
if np.sum(ok)>0 :
medflux[fiber] = np.median((flux[fiber]-fiber_convolved_sky_flux)[ok])
log.info("rejecting")
# whole fiber with excess flux
if np.sum(medflux!=0) > 2 : # at least 3 valid sky fibers
rms_from_nmad = 1.48*np.median(np.abs(medflux[medflux!=0]))
# discard fibers that are 7 sigma away
badfibers=np.where(np.abs(medflux)>7*rms_from_nmad)[0]
for fiber in badfibers :
log.warning("discarding fiber {} with median flux = {:.2f} > 7*{:.2f}".format(skyfibers[fiber],medflux[fiber],rms_from_nmad))
current_ivar[fiber]=0
sqrtw[fiber]=0
sqrtwflux[fiber]=0
# set a mask bit here
bad_skyfibers.append(skyfibers[fiber])
nout_iter=0
if iteration<1 :
# only remove worst outlier per wave
# apply rejection iteratively, only one entry per wave among fibers
# find waves with outlier (fastest way)
nout_per_wave=np.sum(chi2>nsig_clipping**2,axis=0)
selection=np.where(nout_per_wave>0)[0]
for i in selection :
worst_entry=np.argmax(chi2[:,i])
current_ivar[worst_entry,i]=0
sqrtw[worst_entry,i]=0
sqrtwflux[worst_entry,i]=0
nout_iter += 1
else :
# remove all of them at once
bad=(chi2>nsig_clipping**2)
current_ivar *= (bad==0)
sqrtw *= (bad==0)
sqrtwflux *= (bad==0)
nout_iter += np.sum(bad)
nout_tot += nout_iter
sum_chi2=float(np.sum(chi2))
ndf=int(np.sum(chi2>0)-nwave)
chi2pdf=0.
if ndf>0 :
chi2pdf=sum_chi2/ndf
log.info("iter #%d chi2=%f ndf=%d chi2pdf=%f nout=%d"%(iteration,sum_chi2,ndf,chi2pdf,nout_iter))
if nout_iter == 0 :
break
log.info("nout tot=%d"%nout_tot)
# we know have to compute the sky model for all fibers
# and propagate the uncertainties
# no need to restore the original ivar to compute the model errors when modeling ivar
# the sky inverse variances are very similar
log.info("compute the parameter covariance")
# we may have to use a different method to compute this
# covariance
try :
parameter_covar=cholesky_invert(A)
# the above is too slow
# maybe invert per block, sandwich by R
except np.linalg.linalg.LinAlgError :
log.warning("cholesky_solve_and_invert failed, switching to np.linalg.lstsq and np.linalg.pinv")
parameter_covar = np.linalg.pinv(A)
log.info("compute mean resolution")
# we make an approximation for the variance to save CPU time
# we use the average resolution of all fibers in the frame:
mean_res_data=np.mean(frame.resolution_data,axis=0)
Rmean = Resolution(mean_res_data)
log.info("compute convolved sky and ivar")
# The parameters are directly the unconvolved sky
# First convolve with average resolution :
convolved_sky_covar=Rmean.dot(parameter_covar).dot(Rmean.T.todense())
# and keep only the diagonal
convolved_sky_var=np.diagonal(convolved_sky_covar)
# inverse
convolved_sky_ivar=(convolved_sky_var>0)/(convolved_sky_var+(convolved_sky_var==0))
# and simply consider it's the same for all spectra
cskyivar = np.tile(convolved_sky_ivar, frame.nspec).reshape(frame.nspec, nwave)
# The sky model for each fiber (simple convolution with resolution of each fiber)
cskyflux = np.zeros(frame.flux.shape)
for i in range(frame.nspec):
cskyflux[i] = frame.R[i].dot(deconvolved_sky)
# See if we can improve the sky model by readjusting the wavelength and/or the width of sky lines
if adjust_wavelength or adjust_lsf :
log.info("adjust the wavelength of sky spectrum on sky lines to improve sky subtraction ...")
if adjust_wavelength :
# compute derivative of sky w.r.t. wavelength
dskydwave = np.gradient(cskyflux,axis=1)/np.gradient(frame.wave)
else :
dskydwave = None
if adjust_lsf :
# compute derivative of sky w.r.t. lsf width
dwave = np.mean(np.gradient(frame.wave))
dsigma_A = 0.3 #A
dsigma_bin = dsigma_A/dwave # consider this extra width for the PSF (sigma' = sqrt(sigma**2+dsigma**2))
hw=int(4*dsigma_bin)+1
x=np.arange(-hw,hw+1)
k=np.zeros((3,x.size)) # a Gaussian kernel
k[1]=np.exp(-x**2/dsigma_bin**2/2.)
k/=np.sum(k)
tmp = fftconvolve(cskyflux,k,mode="same")
dskydlsf = (tmp-cskyflux)/dsigma_A # variation of line shape with width
else :
dskydlsf = None
# detect peaks in mean sky spectrum
# peaks = local maximum larger than 10% of max peak
meansky = np.mean(cskyflux,axis=0)
tmp = (meansky[1:-1]>meansky[2:])*(meansky[1:-1]>meansky[:-2])*(meansky[1:-1]>0.1*np.max(meansky))
peaks = np.where(tmp)[0]+1
# remove edges
peaks = peaks[(peaks>10)&(peaks<meansky.size-10)]
peak_wave=frame.wave[peaks]
log.info("Number of peaks: {}".format(peaks.size))
if peaks.size < 10 :
log.info("Wavelength of peaks: {}".format(peak_wave))
# define area around each sky line to adjust
dwave = np.mean(np.gradient(frame.wave))
dpix = int(3//dwave)+1
# number of parameters to fit for each peak: delta_wave , delta_lsf , scale of sky , a background (to absorb source signal)
nparam = 2
if adjust_wavelength : nparam += 1
if adjust_lsf : nparam += 1
AA=np.zeros((nparam,nparam))
BB=np.zeros((nparam))
# temporary arrays with best fit parameters on peaks
# for each fiber, with errors and chi2/ndf
peak_scale=np.zeros((frame.nspec,peaks.size))
peak_scale_err=np.zeros((frame.nspec,peaks.size))
peak_dw=np.zeros((frame.nspec,peaks.size))
peak_dw_err=np.zeros((frame.nspec,peaks.size))
peak_dlsf=np.zeros((frame.nspec,peaks.size))
peak_dlsf_err=np.zeros((frame.nspec,peaks.size))
peak_chi2pdf=np.zeros((frame.nspec,peaks.size))
# interpolated values across peaks, after selection
# based on precision and chi2
interpolated_sky_dwave=np.zeros(frame.flux.shape)
interpolated_sky_dlsf=np.zeros(frame.flux.shape)
# loop on fibers and then on sky spectrum peaks
if only_use_skyfibers_for_adjustments :
fibers_in_fit = skyfibers
else :
fibers_in_fit = np.arange(frame.nspec)
# restrict to fibers with ivar!=0
ok = np.sum(frame.ivar[fibers_in_fit],axis=1)>0
fibers_in_fit = fibers_in_fit[ok]
# loop on sky spectrum peaks, compute for all fibers simultaneously
for j,peak in enumerate(peaks) :
b = peak-dpix
e = peak+dpix+1
npix = e - b
flux = frame.flux[fibers_in_fit][:,b:e]
ivar = frame.ivar[fibers_in_fit][:,b:e]
if b < 0 or e > frame.flux.shape[1] :
log.warning("skip peak on edge of spectrum with b={} e={}".format(b,e))
continue
M = np.zeros((fibers_in_fit.size, nparam, npix))
index = 0
M[:, index] = np.ones(npix); index += 1
M[:, index] = cskyflux[fibers_in_fit][:, b:e]; index += 1
if adjust_wavelength : M[:, index] = dskydwave[fibers_in_fit][:, b:e]; index += 1
if adjust_lsf : M[:, index] = dskydlsf[fibers_in_fit][:, b:e]; index += 1
# Solve (M * W * M.T) X = (M * W * flux)
BB = np.einsum('ijk,ik->ij', M, ivar*flux)
AA = np.einsum('ijk,ik,ilk->ijl', M, ivar, M)
# solve linear system
#- TODO: replace with X = np.linalg.solve(AA, BB) ?
try:
AAi=np.linalg.inv(AA)
except np.linalg.LinAlgError as e:
log.warning(str(e))
continue
# save best fit parameter and errors
X = np.einsum('ijk,ik->ij', AAi, BB)
X_err = np.sqrt(AAi*(AAi>0))
index = 1
peak_scale[fibers_in_fit,j] = X[:, index]
peak_scale_err[fibers_in_fit,j] = X_err[:, index, index]
index += 1
if adjust_wavelength:
peak_dw[fibers_in_fit, j] = X[:, index]
peak_dw_err[fibers_in_fit, j] = X_err[:, index, index]
index += 1
if adjust_lsf:
peak_dlsf[fibers_in_fit, j] = X[:, index]
peak_dlsf_err[fibers_in_fit, j] = X_err[:, index, index]
index += 1
residuals = flux
for index in range(nparam) :
#for index in range(3) : # needed for compatibility with master (but this was a bug)
residuals -= X[:,index][:, np.newaxis]*M[:,index]
variance = 1.0/(ivar+(ivar==0)) + (0.05*M[:,1])**2
peak_chi2pdf[fibers_in_fit, j] = np.sum((ivar>0)/variance*(residuals)**2, axis=1)/(npix-nparam)
for i in fibers_in_fit :
# for each fiber, select valid peaks and interpolate
ok=(peak_chi2pdf[i]<2)
if adjust_wavelength :
ok &= (peak_dw_err[i]>0.)&(peak_dw_err[i]<0.1) # error on wavelength shift
if adjust_lsf :
ok &= (peak_dlsf_err[i]>0.)&(peak_dlsf_err[i]<0.3) # error on line width (quadratic, so 0.3 mean a change of width of 0.3**2/2~5%)
# piece-wise linear interpolate across the whole spectrum between the sky line peaks
# this interpolation will be used to alter the whole sky spectrum
if np.sum(ok)>0 :
if adjust_wavelength :
interpolated_sky_dwave[i]=np.interp(frame.wave,peak_wave[ok],peak_dw[i,ok])
if adjust_lsf :
interpolated_sky_dlsf[i]=np.interp(frame.wave,peak_wave[ok],peak_dlsf[i,ok])
line=""
if adjust_wavelength :
line += " dlambda mean={:4.3f} rms={:4.3f} A".format(np.mean(interpolated_sky_dwave[i]),np.std(interpolated_sky_dwave[i]))
if adjust_lsf :
line += " dlsf mean={:4.3f} rms={:4.3f} A".format(np.mean(interpolated_sky_dlsf[i]),np.std(interpolated_sky_dlsf[i]))
log.info(line)
# we ignore the interpolated_sky_scale which is too sensitive
# to CCD defects or cosmic rays
if pcacorr is None :
if only_use_skyfibers_for_adjustments :
goodfibers=fibers_in_fit
else : # keep all except bright objects and interpolate over them
mflux=np.median(frame.flux,axis=1)
mmflux=np.median(mflux)
rms=1.48*np.median(np.abs(mflux-mmflux))
selection=(mflux<mmflux+2*rms)
# at least 80% of good pixels
ngood=np.sum((frame.ivar>0)*(frame.mask==0),axis=1)
selection &= (ngood>0.8*frame.flux.shape[1])
goodfibers=np.where(mflux<mmflux+2*rms)[0]
log.info("number of good fibers=",goodfibers.size)
allfibers=np.arange(frame.nspec)
# the actual median filtering
if adjust_wavelength :
for j in range(interpolated_sky_dwave.shape[1]) :
interpolated_sky_dwave[:,j] = np.interp(np.arange(interpolated_sky_dwave.shape[0]),goodfibers,interpolated_sky_dwave[goodfibers,j])
cskyflux += interpolated_sky_dwave*dskydwave
if adjust_lsf : # simple interpolation over fibers
for j in range(interpolated_sky_dlsf.shape[1]) :
interpolated_sky_dlsf[:,j] = np.interp(np.arange(interpolated_sky_dlsf.shape[0]),goodfibers,interpolated_sky_dlsf[goodfibers,j])
cskyflux += interpolated_sky_dlsf*dskydlsf
else :
def fit_and_interpolate(delta,skyfibers,mean,components,label="") :
mean_and_components = np.zeros((components.shape[0]+1,
components.shape[1],
components.shape[2]))
mean_and_components[0] = mean
mean_and_components[1:] = components
ncomp=mean_and_components.shape[0]
log.info("Will fit a linear combination on {} components for {}".format(ncomp,label))
AA=np.zeros((ncomp,ncomp))
BB=np.zeros(ncomp)
for i in range(ncomp) :
BB[i] = np.sum(delta[skyfibers]*mean_and_components[i][skyfibers])
for j in range(i,ncomp) :
AA[i,j] = np.sum(mean_and_components[i][skyfibers]*mean_and_components[j][skyfibers])
if j!=i :
AA[j,i]=AA[i,j]
AAi=np.linalg.inv(AA)
X=AAi.dot(BB)
log.info("Best fit linear coefficients for {} = {}".format(label,list(X)))
result = np.zeros_like(delta)
for i in range(ncomp) :
result += X[i]*mean_and_components[i]
return result
# we are going to fit a linear combination of the PCA coefficients only on the sky fibers
# and then apply the linear combination to all fibers
log.info("Use PCA skycorr")
if adjust_wavelength :
correction = fit_and_interpolate(interpolated_sky_dwave,skyfibers,\
pcacorr.dwave_mean,pcacorr.dwave_eigenvectors,label="wavelength")
cskyflux += correction*dskydwave
if adjust_lsf :
correction = fit_and_interpolate(interpolated_sky_dlsf,skyfibers,\
pcacorr.dlsf_mean,pcacorr.dlsf_eigenvectors,label="LSF")
cskyflux += correction*dskydlsf
# look at chi2 per wavelength and increase sky variance to reach chi2/ndf=1
if skyfibers.size > 1 and add_variance :
modified_cskyivar = _model_variance(frame,cskyflux,cskyivar,skyfibers)
else :
modified_cskyivar = cskyivar.copy()
# set sky flux and ivar to zero to poorly constrained regions
# and add margins to avoid expolation issues with the resolution matrix
wmask = (np.diagonal(A)<=0).astype(float)
# empirically, need to account for the full width of the resolution band
# (realized here by applying twice the resolution)
wmask = Rmean.dot(Rmean.dot(wmask))
bad = np.where(wmask!=0)[0]
cskyflux[:,bad]=0.
modified_cskyivar[:,bad]=0.
# minimum number of fibers at each wavelength
min_number_of_fibers = min(10,max(1,skyfibers.size//2))
fibers_with_signal=np.sum(current_ivar>0,axis=0)
bad = (fibers_with_signal<min_number_of_fibers)
# increase by 1 pixel
bad[1:-1] |= bad[2:]
bad[1:-1] |= bad[:-2]
cskyflux[:,bad]=0.
modified_cskyivar[:,bad]=0.
if fit_offsets :
cfinder = CalibFinder([frame.meta])
amps = get_amp_ids(frame.meta)
sectors=[]
for amp in amps :
key="OFFCOLS"+amp
sec = parse_sec_keyword(frame.meta['CCDSEC'+amp])
yb=sec[0].start
ye=sec[0].stop
if cfinder.haskey(key) :
val=cfinder.value(key)
for tmp1 in val.split(",") :
tmp2=tmp1.split(":")
if len(tmp2)!=2 :
mess="cannot decode {}={}".format(key,val)
log.error(mess)
raise KeyError(mess)
xb=max(sec[1].start,int(tmp2[0]))
xe=min(sec[1].stop,int(tmp2[1]))
sector = [yb,ye,xb,xe]
sectors.append( sector )
log.info("Adding CCD sector in amp {} with offset: {}".format(amp,sector))
if len(sectors)>0 :
# fit as many parameters as twice the number of sectors
# (one offset for fibers/wavelength in the sector and one for the others to compensate)
# need coordinates of fiber traces
psf_filename = findfile('psf',frame.meta["NIGHT"],frame.meta["EXPID"],frame.meta["CAMERA"])
if not os.path.isfile(psf_filename) :
log.error("No PSF file "+psf_filename)
raise IOError("No PSF file "+psf_filename)
log.info("Using PSF {}".format(psf_filename))
tset = read_xytraceset(psf_filename)
tmp_fibers = np.arange(frame.nspec)
tmp_x = np.zeros(frame.flux.shape,dtype=float)
tmp_y = np.zeros(frame.flux.shape,dtype=float)
for fiber in tmp_fibers :
tmp_x[fiber] = tset.x_vs_wave(fiber=fiber,wavelength=frame.wave)
tmp_y[fiber] = tset.y_vs_wave(fiber=fiber,wavelength=frame.wave)
if fiberflat is not None :
flat=fiberflat.fiberflat
log.info("Use fiberflat when fitting for offsets")
else :
flat=np.ones(frame.flux.shape)
bkg = np.zeros(cskyflux.shape)
tmp_skyflux = cskyflux.copy()
# we fit one sector at a time
for sector in sectors :
# mask for sky fibers only
mask_in = (tmp_y[skyfibers]>=sector[0])&(tmp_y[skyfibers]<sector[1])&(tmp_x[skyfibers]>=sector[2])&(tmp_x[skyfibers]<sector[3])
mask_out = (tmp_y[skyfibers]>=sector[0])&(tmp_y[skyfibers]<sector[1])&(~mask_in) # same y range
sw = np.sum(current_ivar[mask_in])
if sw>0 :
offset_in = np.sum(current_ivar[mask_in]*(frame.flux[skyfibers][mask_in]-tmp_skyflux[skyfibers][mask_in])*flat[skyfibers][mask_in])/sw
else :
offset_in = 0.
sw = np.sum(current_ivar[mask_out])
if sw>0 :
offset_out = np.sum(current_ivar[mask_out]*(frame.flux[skyfibers][mask_out]-tmp_skyflux[skyfibers][mask_out])*flat[skyfibers][mask_out])/sw
else :
offset_out = 0.
log.info("sector {} offset in = {:.2f} out = {:.2f}".format(sector,offset_in,offset_out))
# mask for all fibers now
mask_in = (tmp_y>=sector[0])&(tmp_y<sector[1])&(tmp_x>=sector[2])&(tmp_x<sector[3])
mask_out = (tmp_y>=sector[0])&(tmp_y<sector[1])&(~mask_in) # same y range
# save diff of terms in bkg in mask
bkg[mask_in] += (offset_in-offset_out)
# apply two terms to tmp sky model while fitting the other terms
tmp_skyflux[mask_in] += offset_in
tmp_skyflux[mask_out] += offset_out
bkg /= flat # flatfield the background
# now we are going to temporarily remove the bkg in the frame.flux data, refit the sky, and then finally add back the bkg to the sky model
frame.flux -= bkg
# refit sky (without fit_offsets!) (costly but the most reliable way to account for the fitted background)
skymodel = compute_uniform_sky(frame,nsig_clipping=nsig_clipping,max_iterations=max_iterations,
model_ivar=model_ivar,add_variance=add_variance,
adjust_wavelength=adjust_wavelength,adjust_lsf=adjust_lsf,
only_use_skyfibers_for_adjustments=only_use_skyfibers_for_adjustments,pcacorr=pcacorr,
fit_offsets=False,fiberflat=None)
# add back the background and return
frame.flux += bkg # to the frame flux in case we use it afterwards
skymodel.flux += bkg # to the sky model
return skymodel
mask = (modified_cskyivar==0).astype(np.uint32)
# add mask bits for bad sky fibers
bad_skyfibers = np.unique(bad_skyfibers)
if bad_skyfibers.size > 0 :
mask[bad_skyfibers] |= specmask.mask("BADSKY")
skymodel = SkyModel(frame.wave.copy(), cskyflux, modified_cskyivar, mask,
nrej=nout_tot, stat_ivar = cskyivar) # keep a record of the statistical ivar for QA
if adjust_wavelength :
skymodel.dwave = interpolated_sky_dwave
if adjust_lsf :
skymodel.dlsf = interpolated_sky_dlsf
return skymodel
[docs]def compute_polynomial_times_sky(frame, nsig_clipping=4.,max_iterations=30,model_ivar=False,add_variance=True,angular_variation_deg=1,chromatic_variation_deg=1) :
"""Compute a sky model.
Sky[fiber,i] = R[fiber,i,j] Polynomial(x[fiber],y[fiber],wavelength[j]) Flux[j]
Input flux are expected to be flatfielded!
We don't check this in this routine.
Args:
frame : Frame object, which includes attributes
- wave : 1D wavelength grid in Angstroms
- flux : 2D flux[nspec, nwave] density
- ivar : 2D inverse variance of flux
- mask : 2D inverse mask flux (0=good)
- resolution_data : 3D[nspec, ndiag, nwave] (only sky fibers)
nsig_clipping : [optional] sigma clipping value for outlier rejection
Optional:
max_iterations : int , number of iterations
model_ivar : replace ivar by a model to avoid bias due to correlated flux and ivar. this has a negligible effect on sims.
add_variance : evaluate calibration error and add this to the sky model variance
returns SkyModel object with attributes wave, flux, ivar, mask
"""
log=get_logger()
log.info("starting")
# Grab sky fibers on this frame
skyfibers = np.where(frame.fibermap['OBJTYPE'] == 'SKY')[0]
assert np.max(skyfibers) < 500 #- indices, not fiber numbers
nwave=frame.nwave
nfibers=len(skyfibers)
current_ivar = get_fiberbitmasked_frame_arrays(frame,bitmask='sky',ivar_framemask=True,return_mask=False)
current_ivar = current_ivar[skyfibers]
flux = frame.flux[skyfibers]
Rsky = frame.R[skyfibers]
input_ivar=None
if model_ivar :
log.info("use a model of the inverse variance to remove bias due to correlated ivar and flux")
input_ivar=current_ivar.copy()
median_ivar_vs_wave = np.median(current_ivar,axis=0)
median_ivar_vs_fiber = np.median(current_ivar,axis=1)
median_median_ivar = np.median(median_ivar_vs_fiber)
for f in range(current_ivar.shape[0]) :
threshold=0.01
current_ivar[f] = median_ivar_vs_fiber[f]/median_median_ivar * median_ivar_vs_wave
# keep input ivar for very low weights
ii=(input_ivar[f]<=(threshold*median_ivar_vs_wave))
#log.info("fiber {} keep {}/{} original ivars".format(f,np.sum(ii),current_ivar.shape[1]))
current_ivar[f][ii] = input_ivar[f][ii]
# need focal plane coordinates
x = frame.fibermap["FIBERASSIGN_X"]
y = frame.fibermap["FIBERASSIGN_Y"]
# normalize for numerical stability
xm = np.mean(x)
ym = np.mean(y)
xs = np.std(x)
ys = np.std(y)
if xs==0 : xs = 1
if ys==0 : ys = 1
x = (x-xm)/xs
y = (y-ym)/ys
w = (frame.wave-frame.wave[0])/(frame.wave[-1]-frame.wave[0])*2.-1
# precompute the monomials for the sky fibers
log.debug("compute monomials for deg={} and {}".format(angular_variation_deg,chromatic_variation_deg))
monomials=[]
for dx in range(angular_variation_deg+1) :
for dy in range(angular_variation_deg+1-dx) :
xypol = (x**dx)*(y**dy)
for dw in range(chromatic_variation_deg+1) :
wpol=w**dw
monomials.append(np.outer(xypol,wpol))
ncoef=len(monomials)
coef=np.zeros((ncoef))
allfibers_monomials=np.array(monomials)
log.debug("shape of allfibers_monomials = {}".format(allfibers_monomials.shape))
skyfibers_monomials = allfibers_monomials[:,skyfibers,:]
log.debug("shape of skyfibers_monomials = {}".format(skyfibers_monomials.shape))
sqrtw=np.sqrt(current_ivar)
sqrtwflux=sqrtw*flux
chi2=np.zeros(flux.shape)
Pol = np.ones(flux.shape,dtype=float)
coef[0] = 1.
nout_tot=0
previous_chi2=-10.
for iteration in range(max_iterations) :
# the matrix A is 1/2 of the second derivative of the chi2 with respect to the parameters
# A_ij = 1/2 d2(chi2)/di/dj
# A_ij = sum_fiber sum_wave_w ivar[fiber,w] d(model)/di[fiber,w] * d(model)/dj[fiber,w]
# the vector B is 1/2 of the first derivative of the chi2 with respect to the parameters
# B_i = 1/2 d(chi2)/di
# B_i = sum_fiber sum_wave_w ivar[fiber,w] d(model)/di[fiber,w] * (flux[fiber,w]-model[fiber,w])
# the model is model[fiber]=R[fiber]*Pol(x,y,wave)*sky
# the parameters are the unconvolved sky flux at the wavelength i
# and the polynomial coefficients
A=scipy.sparse.csr_matrix((nwave,nwave),dtype=float)
B=np.zeros((nwave),dtype=float)
D=scipy.sparse.dia_matrix((nwave,nwave))
D2=scipy.sparse.dia_matrix((nwave,nwave))
Pol /= coef[0] # force constant term to 1.
# solving for the deconvolved mean sky spectrum
# loop on fiber to handle resolution
for fiber in range(nfibers) :
if fiber%10==0 :
log.info("iter %d sky fiber (1st fit) %d/%d"%(iteration,fiber,nfibers))
D.setdiag(sqrtw[fiber])
D2.setdiag(Pol[fiber])
sqrtwRP = D.dot(Rsky[fiber]).dot(D2) # each row r of R is multiplied by sqrtw[r]
A += sqrtwRP.T*sqrtwRP
B += sqrtwRP.T*sqrtwflux[fiber]
A = A.toarray()
log.info("iter %d solving"%iteration)
w = A.diagonal()>0
A_pos_def = A[w,:]
A_pos_def = A_pos_def[:,w]
parameters = B*0
try:
parameters[w]=cholesky_solve(A_pos_def,B[w])
except:
log.info("cholesky failed, trying svd in iteration {}".format(iteration))
parameters[w]=np.linalg.lstsq(A_pos_def,B[w])[0]
# parameters = the deconvolved mean sky spectrum
# now evaluate the polynomial coefficients
Ap=np.zeros((ncoef,ncoef),dtype=float)
Bp=np.zeros((ncoef),dtype=float)
D2.setdiag(parameters)
for fiber in range(nfibers) :
if fiber%10==0 :
log.info("iter %d sky fiber (2nd fit) %d/%d"%(iteration,fiber,nfibers))
D.setdiag(sqrtw[fiber])
sqrtwRSM = D.dot(Rsky[fiber]).dot(D2).dot(skyfibers_monomials[:,fiber,:].T)
Ap += sqrtwRSM.T.dot(sqrtwRSM)
Bp += sqrtwRSM.T.dot(sqrtwflux[fiber])
# Add huge prior on zeroth angular order terms to converge faster
# (because those terms are degenerate with the mean deconvolved spectrum)
weight=1e24
Ap[0,0] += weight
Bp[0] += weight # force 0th term to 1
for i in range(1,chromatic_variation_deg+1) :
Ap[i,i] += weight # force other wavelength terms to 0
coef=cholesky_solve(Ap,Bp)
log.info("pol coef = {}".format(coef))
# recompute the polynomial values
Pol = skyfibers_monomials.T.dot(coef).T
# chi2 and outlier rejection
log.info("iter %d compute chi2"%iteration)
for fiber in range(nfibers) :
chi2[fiber]=current_ivar[fiber]*(flux[fiber]-Rsky[fiber].dot(Pol[fiber]*parameters))**2
log.info("rejecting")
nout_iter=0
if iteration<1 :
# only remove worst outlier per wave
# apply rejection iteratively, only one entry per wave among fibers
# find waves with outlier (fastest way)
nout_per_wave=np.sum(chi2>nsig_clipping**2,axis=0)
selection=np.where(nout_per_wave>0)[0]
for i in selection :
worst_entry=np.argmax(chi2[:,i])
current_ivar[worst_entry,i]=0
sqrtw[worst_entry,i]=0
sqrtwflux[worst_entry,i]=0
nout_iter += 1
else :
# remove all of them at once
bad=(chi2>nsig_clipping**2)
current_ivar *= (bad==0)
sqrtw *= (bad==0)
sqrtwflux *= (bad==0)
nout_iter += np.sum(bad)
nout_tot += nout_iter
sum_chi2=float(np.sum(chi2))
ndf=int(np.sum(chi2>0)-nwave)
chi2pdf=0.
if ndf>0 :
chi2pdf=sum_chi2/ndf
log.info("iter #%d chi2=%g ndf=%d chi2pdf=%f delta=%f nout=%d"%(iteration,sum_chi2,ndf,chi2pdf,abs(sum_chi2-previous_chi2),nout_iter))
if nout_iter == 0 and abs(sum_chi2-previous_chi2)<0.2 :
break
previous_chi2 = sum_chi2+0.
log.info("nout tot=%d"%nout_tot)
# we know have to compute the sky model for all fibers
# and propagate the uncertainties
# no need to restore the original ivar to compute the model errors when modeling ivar
# the sky inverse variances are very similar
# we ignore here the fact that we have fit a angular variation,
# so the sky model uncertainties are inaccurate
log.info("compute the parameter covariance")
try :
parameter_covar=cholesky_invert(A)
except np.linalg.linalg.LinAlgError :
log.warning("cholesky_solve_and_invert failed, switching to np.linalg.lstsq and np.linalg.pinv")
parameter_covar = np.linalg.pinv(A)
log.info("compute mean resolution")
# we make an approximation for the variance to save CPU time
# we use the average resolution of all fibers in the frame:
mean_res_data=np.mean(frame.resolution_data,axis=0)
Rmean = Resolution(mean_res_data)
log.info("compute convolved sky and ivar")
# The parameters are directly the unconvolved sky
# First convolve with average resolution :
convolved_sky_covar=Rmean.dot(parameter_covar).dot(Rmean.T.todense())
# and keep only the diagonal
convolved_sky_var=np.diagonal(convolved_sky_covar)
# inverse
convolved_sky_ivar=(convolved_sky_var>0)/(convolved_sky_var+(convolved_sky_var==0))
# and simply consider it's the same for all spectra
cskyivar = np.tile(convolved_sky_ivar, frame.nspec).reshape(frame.nspec, nwave)
# The sky model for each fiber (simple convolution with resolution of each fiber)
cskyflux = np.zeros(frame.flux.shape)
Pol = allfibers_monomials.T.dot(coef).T
for fiber in range(frame.nspec):
cskyflux[fiber] = frame.R[fiber].dot(Pol[fiber]*parameters)
# look at chi2 per wavelength and increase sky variance to reach chi2/ndf=1
if skyfibers.size > 1 and add_variance :
modified_cskyivar = _model_variance(frame,cskyflux,cskyivar,skyfibers)
else :
modified_cskyivar = cskyivar.copy()
# set sky flux and ivar to zero to poorly constrained regions
# and add margins to avoid expolation issues with the resolution matrix
wmask = (np.diagonal(A)<=0).astype(float)
# empirically, need to account for the full width of the resolution band
# (realized here by applying twice the resolution)
wmask = Rmean.dot(Rmean.dot(wmask))
bad = np.where(wmask!=0)[0]
cskyflux[:,bad]=0.
modified_cskyivar[:,bad]=0.
# minimum number of fibers at each wavelength
min_number_of_fibers = min(10,max(1,skyfibers.size//2))
fibers_with_signal=np.sum(current_ivar>0,axis=0)
bad = (fibers_with_signal<min_number_of_fibers)
# increase by 1 pixel
bad[1:-1] |= bad[2:]
bad[1:-1] |= bad[:-2]
cskyflux[:,bad]=0.
modified_cskyivar[:,bad]=0.
# need to do better here
mask = (modified_cskyivar==0).astype(np.uint32)
return SkyModel(frame.wave.copy(), cskyflux, modified_cskyivar, mask,
nrej=nout_tot, stat_ivar = cskyivar) # keep a record of the statistical ivar for QA
[docs]def compute_non_uniform_sky(frame, nsig_clipping=4.,max_iterations=10,model_ivar=False,add_variance=True,angular_variation_deg=1) :
"""Compute a sky model.
Sky[fiber,i] = R[fiber,i,j] ( Flux_0[j] + x[fiber]*Flux_x[j] + y[fiber]*Flux_y[j] + ... )
Input flux are expected to be flatfielded!
We don't check this in this routine.
Args:
frame : Frame object, which includes attributes
- wave : 1D wavelength grid in Angstroms
- flux : 2D flux[nspec, nwave] density
- ivar : 2D inverse variance of flux
- mask : 2D inverse mask flux (0=good)
- resolution_data : 3D[nspec, ndiag, nwave] (only sky fibers)
nsig_clipping : [optional] sigma clipping value for outlier rejection
Optional:
max_iterations : int , number of iterations
model_ivar : replace ivar by a model to avoid bias due to correlated flux and ivar. this has a negligible effect on sims.
add_variance : evaluate calibration error and add this to the sky model variance
angular_variation_deg : degree of 2D polynomial correction as a function of fiber focal plane coordinates (default=1). One set of coefficients per wavelength
returns SkyModel object with attributes wave, flux, ivar, mask
"""
log=get_logger()
log.info("starting")
# Grab sky fibers on this frame
skyfibers = np.where(frame.fibermap['OBJTYPE'] == 'SKY')[0]
assert np.max(skyfibers) < 500 #- indices, not fiber numbers
nwave=frame.nwave
nfibers=len(skyfibers)
current_ivar = get_fiberbitmasked_frame_arrays(frame,bitmask='sky',ivar_framemask=True,return_mask=False)
current_ivar *= (frame.mask==0)
current_ivar = current_ivar[skyfibers]
flux = frame.flux[skyfibers]
Rsky = frame.R[skyfibers]
# need focal plane coordinates of fibers
x = frame.fibermap["FIBERASSIGN_X"][skyfibers]
y = frame.fibermap["FIBERASSIGN_Y"][skyfibers]
# normalize for numerical stability
xm = np.mean(frame.fibermap["FIBERASSIGN_X"])
ym = np.mean(frame.fibermap["FIBERASSIGN_Y"])
xs = np.std(frame.fibermap["FIBERASSIGN_X"])
ys = np.std(frame.fibermap["FIBERASSIGN_Y"])
if xs==0 : xs = 1
if ys==0 : ys = 1
x = (x-xm)/xs
y = (y-ym)/ys
# precompute the monomials for the sky fibers
log.debug("compute monomials for deg={}".format(angular_variation_deg))
monomials=[]
for dx in range(angular_variation_deg+1) :
for dy in range(angular_variation_deg+1-dx) :
monomials.append((x**dx)*(y**dy))
ncoef=len(monomials)
monomials=np.array(monomials)
input_ivar=None
if model_ivar :
log.info("use a model of the inverse variance to remove bias due to correlated ivar and flux")
input_ivar=current_ivar.copy()
median_ivar_vs_wave = np.median(current_ivar,axis=0)
median_ivar_vs_fiber = np.median(current_ivar,axis=1)
median_median_ivar = np.median(median_ivar_vs_fiber)
for f in range(current_ivar.shape[0]) :
threshold=0.01
current_ivar[f] = median_ivar_vs_fiber[f]/median_median_ivar * median_ivar_vs_wave
# keep input ivar for very low weights
ii=(input_ivar[f]<=(threshold*median_ivar_vs_wave))
#log.info("fiber {} keep {}/{} original ivars".format(f,np.sum(ii),current_ivar.shape[1]))
current_ivar[f][ii] = input_ivar[f][ii]
sqrtw=np.sqrt(current_ivar)
sqrtwflux=sqrtw*flux
chi2=np.zeros(flux.shape)
nout_tot=0
for iteration in range(max_iterations) :
# the matrix A is 1/2 of the second derivative of the chi2 with respect to the parameters
# A_ij = 1/2 d2(chi2)/di/dj
# A_ij = sum_fiber sum_wave_w ivar[fiber,w] d(model)/di[fiber,w] * d(model)/dj[fiber,w]
# the vector B is 1/2 of the first derivative of the chi2 with respect to the parameters
# B_i = 1/2 d(chi2)/di
# B_i = sum_fiber sum_wave_w ivar[fiber,w] d(model)/di[fiber,w] * (flux[fiber,w]-model[fiber,w])
# with x_fiber,y_fiber the fiber coordinates in the focal plane (or sky)
# the unconvolved sky flux at wavelength i is a polynomial of x_fiber,y_fiber
# sky(fiber,i) = pol(x_fiber,y_fiber,p) = sum_p a_ip * x_fiber**degx(p) y_fiber**degy(p)
# sky(fiber,i) = sum_p monom[fiber,p] * a_ip
# the convolved sky flux at wavelength w is
# model[fiber,w] = sum_i R[fiber][w,i] sum_p monom[fiber,p] * a_ip
# model[fiber,w] = sum_p monom[fiber,p] R[fiber][w,i] a_ip
#
# so, the matrix A is composed of blocks (p,k) corresponding to polynomial coefficient indices where
# A[pk] = sum_fiber monom[fiber,p]*monom[fiber,k] sqrtwR[fiber] sqrtwR[fiber]^t
# similarily
# B[p] = sum_fiber monom[fiber,p] * sum_wave_w (sqrt(ivar)[fiber,w]*flux[fiber,w]) sqrtwR[fiber,wave]
A=np.zeros((nwave*ncoef,nwave*ncoef))
B=np.zeros((nwave*ncoef))
# diagonal sparse matrix with content = sqrt(ivar)*flat of a given fiber
SD=scipy.sparse.lil_matrix((nwave,nwave))
# loop on fiber to handle resolution
for fiber in range(nfibers) :
if fiber%10==0 :
log.info("iter %d sky fiber %d/%d"%(iteration,fiber,nfibers))
R = Rsky[fiber]
# diagonal sparse matrix with content = sqrt(ivar)
SD.setdiag(sqrtw[fiber])
sqrtwR = SD*R # each row r of R is multiplied by sqrtw[r]
#wRtR=(sqrtwR.T*sqrtwR).tocsr()
wRtR=(sqrtwR.T*sqrtwR).todense()
wRtF=sqrtwR.T*sqrtwflux[fiber]
# loop on polynomial coefficients (double loop for A)
# fill only blocks of A and B
for p in range(ncoef) :
for k in range(ncoef) :
A[p*nwave:(p+1)*nwave,k*nwave:(k+1)*nwave] += monomials[p,fiber]*monomials[k,fiber]*wRtR
B[p*nwave:(p+1)*nwave] += monomials[p,fiber]*wRtF
log.info("iter %d solving"%iteration)
w = A.diagonal()>0
A_pos_def = A[w,:]
A_pos_def = A_pos_def[:,w]
parameters = B*0
try:
parameters[w]=cholesky_solve(A_pos_def,B[w])
except:
log.info("cholesky failed, trying svd in iteration {}".format(iteration))
parameters[w]=np.linalg.lstsq(A_pos_def,B[w])[0]
log.info("iter %d compute chi2"%iteration)
for fiber in range(nfibers) :
# loop on polynomial indices
unconvolved_fiber_sky_flux = np.zeros(nwave)
for p in range(ncoef) :
unconvolved_fiber_sky_flux += monomials[p,fiber]*parameters[p*nwave:(p+1)*nwave]
# then convolve
fiber_convolved_sky_flux = Rsky[fiber].dot(unconvolved_fiber_sky_flux)
chi2[fiber]=current_ivar[fiber]*(flux[fiber]-fiber_convolved_sky_flux)**2
log.info("rejecting")
nout_iter=0
if iteration<1 :
# only remove worst outlier per wave
# apply rejection iteratively, only one entry per wave among fibers
# find waves with outlier (fastest way)
nout_per_wave=np.sum(chi2>nsig_clipping**2,axis=0)
selection=np.where(nout_per_wave>0)[0]
for i in selection :
worst_entry=np.argmax(chi2[:,i])
current_ivar[worst_entry,i]=0
sqrtw[worst_entry,i]=0
sqrtwflux[worst_entry,i]=0
nout_iter += 1
else :
# remove all of them at once
bad=(chi2>nsig_clipping**2)
current_ivar *= (bad==0)
sqrtw *= (bad==0)
sqrtwflux *= (bad==0)
nout_iter += np.sum(bad)
nout_tot += nout_iter
sum_chi2=float(np.sum(chi2))
ndf=int(np.sum(chi2>0)-nwave)
chi2pdf=0.
if ndf>0 :
chi2pdf=sum_chi2/ndf
log.info("iter #%d chi2=%f ndf=%d chi2pdf=%f nout=%d"%(iteration,sum_chi2,ndf,chi2pdf,nout_iter))
if nout_iter == 0 :
break
log.info("nout tot=%d"%nout_tot)
# we know have to compute the sky model for all fibers
# and propagate the uncertainties
# no need to restore the original ivar to compute the model errors when modeling ivar
# the sky inverse variances are very similar
# is there a different method to compute this ?
log.info("compute covariance")
try :
parameter_covar=cholesky_invert(A)
except np.linalg.linalg.LinAlgError :
log.warning("cholesky_solve_and_invert failed, switching to np.linalg.lstsq and np.linalg.pinv")
parameter_covar = np.linalg.pinv(A)
log.info("compute mean resolution")
# we make an approximation for the variance to save CPU time
# we use the average resolution of all fibers in the frame:
mean_res_data=np.mean(frame.resolution_data,axis=0)
Rmean = Resolution(mean_res_data)
log.info("compute convolved sky and ivar")
cskyflux = np.zeros(frame.flux.shape)
cskyivar = np.zeros(frame.flux.shape)
log.info("compute convolved parameter covariance")
# The covariance of the parameters is composed of ncoef*ncoef blocks each of size nwave*nwave
# A block (p,k) is the covariance of the unconvolved spectra p and k , corresponding to the polynomial indices p and k
# We first sandwich each block with the average resolution.
convolved_parameter_covar=np.zeros((ncoef,ncoef,nwave))
for p in range(ncoef) :
for k in range(ncoef) :
convolved_parameter_covar[p,k] = np.diagonal(Rmean.dot(parameter_covar[p*nwave:(p+1)*nwave,k*nwave:(k+1)*nwave]).dot(Rmean.T.todense()))
'''
import astropy.io.fits as pyfits
pyfits.writeto("convolved_parameter_covar.fits",convolved_parameter_covar,overwrite=True)
# other approach
log.info("dense Rmean...")
Rmean=Rmean.todense()
log.info("invert Rinv...")
Rinv=np.linalg.inv(Rmean)
# check this
print("0?",np.max(np.abs(Rinv.dot(Rmean)-np.eye(Rmean.shape[0])))/np.max(np.abs(Rmean)))
convolved_parameter_ivar=np.zeros((ncoef,ncoef,nwave))
for p in range(ncoef) :
for k in range(ncoef) :
convolved_parameter_ivar[p,k] = np.diagonal(Rinv.T.dot(A[p*nwave:(p+1)*nwave,k*nwave:(k+1)*nwave]).dot(Rinv))
# solve for each wave separately
convolved_parameter_covar=np.zeros((ncoef,ncoef,nwave))
for i in range(nwave) :
print("inverting ivar of wave %d/%d"%(i,nwave))
convolved_parameter_covar[:,:,i] = cholesky_invert(convolved_parameter_ivar[:,:,i])
pyfits.writeto("convolved_parameter_covar_bis.fits",convolved_parameter_covar,overwrite=True)
import sys
sys.exit(12)
'''
# Now we compute the sky model variance for each fiber individually
# accounting for its focal plane coordinates
# so that a target fiber distant for a sky fiber will naturally have a larger
# sky model variance
log.info("compute sky and variance per fiber")
for i in range(frame.nspec):
# compute monomials
M = []
xi=(frame.fibermap["FIBERASSIGN_X"][i]-xm)/xs
yi=(frame.fibermap["FIBERASSIGN_Y"][i]-ym)/ys
for dx in range(angular_variation_deg+1) :
for dy in range(angular_variation_deg+1-dx) :
M.append((xi**dx)*(yi**dy))
M = np.array(M)
unconvolved_fiber_sky_flux=np.zeros(nwave)
convolved_fiber_skyvar=np.zeros(nwave)
for p in range(ncoef) :
unconvolved_fiber_sky_flux += M[p]*parameters[p*nwave:(p+1)*nwave]
for k in range(ncoef) :
convolved_fiber_skyvar += M[p]*M[k]*convolved_parameter_covar[p,k]
# convolve sky model with this fiber's resolution
cskyflux[i] = frame.R[i].dot(unconvolved_fiber_sky_flux)
# save inverse of variance
cskyivar[i] = (convolved_fiber_skyvar>0)/(convolved_fiber_skyvar+(convolved_fiber_skyvar==0))
# look at chi2 per wavelength and increase sky variance to reach chi2/ndf=1
if skyfibers.size > 1 and add_variance :
modified_cskyivar = _model_variance(frame,cskyflux,cskyivar,skyfibers)
else :
modified_cskyivar = cskyivar.copy()
# set sky flux and ivar to zero to poorly constrained regions
# and add margins to avoid expolation issues with the resolution matrix
wmask = (np.diagonal(A[:nwave,:nwave])<=0).astype(float)
# empirically, need to account for the full width of the resolution band
# (realized here by applying twice the resolution)
wmask = Rmean.dot(Rmean.dot(wmask))
bad = np.where(wmask!=0)[0]
cskyflux[:,bad]=0.
modified_cskyivar[:,bad]=0.
# minimum number of fibers at each wavelength
min_number_of_fibers = min(10,max(1,skyfibers.size//2))
fibers_with_signal=np.sum(current_ivar>0,axis=0)
bad = (fibers_with_signal<min_number_of_fibers)
# increase by 1 pixel
bad[1:-1] |= bad[2:]
bad[1:-1] |= bad[:-2]
cskyflux[:,bad]=0.
modified_cskyivar[:,bad]=0.
# need to do better here
mask = (modified_cskyivar==0).astype(np.uint32)
return SkyModel(frame.wave.copy(), cskyflux, modified_cskyivar, mask,
nrej=nout_tot, stat_ivar = cskyivar) # keep a record of the statistical ivar for QA
class SkyModel(object):
def __init__(self, wave, flux, ivar, mask, header=None, nrej=0, stat_ivar=None, throughput_corrections=None):
"""Create SkyModel object
Args:
wave : 1D[nwave] wavelength in Angstroms
flux : 2D[nspec, nwave] sky model to subtract
ivar : 2D[nspec, nwave] inverse variance of the sky model
mask : 2D[nspec, nwave] 0=ok or >0 if problems; 32-bit
header : (optional) header from FITS file HDU0
nrej : (optional) Number of rejected pixels in fit
throughput_corrections : 1D (optional) Multiplicative throughput corrections for each fiber
All input arguments become attributes
"""
assert wave.ndim == 1
assert flux.ndim == 2
assert ivar.shape == flux.shape
assert mask.shape == flux.shape
self.nspec, self.nwave = flux.shape
self.wave = wave
self.flux = flux
self.ivar = ivar
self.mask = util.mask32(mask)
self.header = header
self.nrej = nrej
self.stat_ivar = stat_ivar
self.throughput_corrections = throughput_corrections
self.dwave = None # wavelength corrections
self.dlsf = None # LSF corrections
[docs]def subtract_sky(frame, skymodel, apply_throughput_correction = True, zero_ivar=True) :
"""Subtract skymodel from frame, altering frame.flux, .ivar, and .mask
Args:
frame : desispec.Frame object
skymodel : desispec.SkyModel object
Option:
apply_throughput_correction : if True, fit for an achromatic throughput correction.
This is to absorb variations of Focal Ratio Degradation with fiber flexure.
zero_ivar : if True , set ivar=0 for masked pixels
"""
assert frame.nspec == skymodel.nspec
assert frame.nwave == skymodel.nwave
log=get_logger()
log.info("starting with apply_throughput_correction = {} and zero_ivar = {}".format(apply_throughput_correction, zero_ivar))
# Set fibermask flagged spectra to have 0 flux and variance
frame = get_fiberbitmasked_frame(frame,bitmask='sky',ivar_framemask=zero_ivar)
# check same wavelength, die if not the case
if not np.allclose(frame.wave, skymodel.wave):
message = "frame and sky not on same wavelength grid"
log.error(message)
raise ValueError(message)
if apply_throughput_correction and skymodel.throughput_corrections is not None :
# need to fit for a multiplicative factor of the sky model
# before subtraction
# we are going to use a set of bright sky lines,
# and fit a multiplicative factor + background around
# each of them individually, and then combine the results
# with outlier rejection in case a source emission line
# coincides with one of the sky lines.
for fiber in range(frame.flux.shape[0]) :
# apply this correction to the sky model even if we have not fit it (default can be 1 or 0)
skymodel.flux[fiber] *= skymodel.throughput_corrections[fiber]
frame.flux -= skymodel.flux
frame.ivar = util.combine_ivar(frame.ivar, skymodel.ivar)
frame.mask |= skymodel.mask
log.info("done")
[docs]def calculate_throughput_corrections(frame,skymodel):
"""
Calculate the throughput corrections for each fiber based on the skymodel.
Args:
frame (Frame object): frame containing the data that may need to be corrected
skymodel (SkyModel object): skymodel object that contains the information about the sky for the given exposure/frame
Output:
corrections (1D array): 1D array where the index corresponds to the fiber % 500 and the values are the multiplicative corrections that would
be applied to the fluxes in frame.flux to correct them based on the input skymodel
"""
# need to fit for a multiplicative factor of the sky model
# before subtraction
# we are going to use a set of bright sky lines,
# and fit a multiplicative factor + background around
# each of them individually, and then combine the results
# with outlier rejection in case a source emission line
# coincides with one of the sky lines.
# it's more robust to have a hardcoded set of sky lines here
# these are all the sky lines with a flux >5% of the max flux
# except in b where we add an extra weaker line at 5199.4A
skyline=np.array([5199.4,5578.4,5656.4,5891.4,5897.4,6302.4,6308.4,6365.4,6500.4,6546.4,\
6555.4,6618.4,6663.4,6679.4,6690.4,6765.4,6831.4,6836.4,6865.4,6925.4,\
6951.4,6980.4,7242.4,7247.4,7278.4,7286.4,7305.4,7318.4,7331.4,7343.4,\
7360.4,7371.4,7394.4,7404.4,7440.4,7526.4,7714.4,7719.4,7752.4,7762.4,\
7782.4,7796.4,7810.4,7823.4,7843.4,7855.4,7862.4,7873.4,7881.4,7892.4,\
7915.4,7923.4,7933.4,7951.4,7966.4,7982.4,7995.4,8016.4,8028.4,8064.4,\
8280.4,8284.4,8290.4,8298.4,8301.4,8313.4,8346.4,8355.4,8367.4,8384.4,\
8401.4,8417.4,8432.4,8454.4,8467.4,8495.4,8507.4,8627.4,8630.4,8634.4,\
8638.4,8652.4,8657.4,8662.4,8667.4,8672.4,8677.4,8683.4,8763.4,8770.4,\
8780.4,8793.4,8829.4,8835.4,8838.4,8852.4,8870.4,8888.4,8905.4,8922.4,\
8945.4,8960.4,8990.4,9003.4,9040.4,9052.4,9105.4,9227.4,9309.4,9315.4,\
9320.4,9326.4,9340.4,9378.4,9389.4,9404.4,9422.4,9442.4,9461.4,9479.4,\
9505.4,9521.4,9555.4,9570.4,9610.4,9623.4,9671.4,9684.4,9693.4,9702.4,\
9714.4,9722.4,9740.4,9748.4,9793.4,9802.4,9814.4,9820.4])
# half width of wavelength region around each sky line
# larger values give a better statistical precision
# but also a larger sensitivity to source features
# best solution on one dark night exposure obtained with
# a half width of 4A.
hw=4#A
tivar=frame.ivar
if frame.mask is not None :
tivar *= (frame.mask==0)
tivar *= (skymodel.ivar>0)
# we precompute the quantities needed to fit each sky line + continuum
# the sky "line profile" is the actual sky model
# and we consider an additive constant
sw,swf,sws,sws2,swsf=[],[],[],[],[]
for line in skyline :
if line<=frame.wave[0] or line>=frame.wave[-1] : continue
ii=np.where((frame.wave>=line-hw)&(frame.wave<=line+hw))[0]
if ii.size<2 : continue
sw.append(np.sum(tivar[:,ii],axis=1))
swf.append(np.sum(tivar[:,ii]*frame.flux[:,ii],axis=1))
swsf.append(np.sum(tivar[:,ii]*frame.flux[:,ii]*skymodel.flux[:,ii],axis=1))
sws.append(np.sum(tivar[:,ii]*skymodel.flux[:,ii],axis=1))
sws2.append(np.sum(tivar[:,ii]*skymodel.flux[:,ii]**2,axis=1))
log=get_logger()
nlines=len(sw)
corrections = np.ones(frame.flux.shape[0]).astype('f8')
for fiber in range(frame.flux.shape[0]) :
# we solve the 2x2 linear system for each fiber and sky line
# and save the results for each fiber
coef=[] # list of scale values
var=[] # list of variance on scale values
for line in range(nlines) :
if sw[line][fiber]<=0 : continue
A=np.array([[sw[line][fiber],sws[line][fiber]],[sws[line][fiber],sws2[line][fiber]]])
B=np.array([swf[line][fiber],swsf[line][fiber]])
try :
Ai=np.linalg.inv(A)
X=Ai.dot(B)
coef.append(X[1]) # the scale coef (marginalized over cst background)
var.append(Ai[1,1])
except :
pass
if len(coef)==0 :
log.warning("cannot corr. throughput. for fiber %d"%fiber)
continue
coef=np.array(coef)
var=np.array(var)
ivar=(var>0)/(var+(var==0)+0.005**2)
ivar_for_outliers=(var>0)/(var+(var==0)+0.02**2)
# loop for outlier rejection
failed=False
for loop in range(50) :
a=np.sum(ivar)
if a <= 0 :
log.warning("cannot corr. throughput. ivar=0 everywhere on sky lines for fiber %d"%fiber)
failed=True
break
mcoef=np.sum(ivar*coef)/a
mcoeferr=1/np.sqrt(a)
nsig=3.
chi2=ivar_for_outliers*(coef-mcoef)**2
worst=np.argmax(chi2)
if chi2[worst]>nsig**2*np.median(chi2[chi2>0]) : # with rough scaling of errors
#log.debug("discard a bad measurement for fiber %d"%(fiber))
ivar[worst]=0
ivar_for_outliers[worst]=0
else :
break
if failed :
continue
log.info("fiber #%03d throughput corr = %5.4f +- %5.4f (mean fiber flux=%f)"%(fiber,mcoef,mcoeferr,np.median(frame.flux[fiber])))
if mcoeferr>0.1 :
log.warning("throughput corr error = %5.4f > 0.1 is too large for fiber %d, do not apply correction"%(mcoeferr,fiber))
else :
corrections[fiber] = mcoef
return corrections
[docs]def qa_skysub(param, frame, skymodel, quick_look=False):
"""Calculate QA on SkySubtraction
Note: Pixels rejected in generating the SkyModel (as above), are
not rejected in the stats calculated here. Would need to carry
along current_ivar to do so.
Args:
param : dict of QA parameters : see qa_frame.init_skysub for example
frame : desispec.Frame object; Should have been flat fielded
skymodel : desispec.SkyModel object
quick_look : bool, optional
If True, do QuickLook specific QA (or avoid some)
Returns:
qadict: dict of QA outputs
Need to record simple Python objects for yaml (str, float, int)
"""
from desispec.qa import qalib
import copy
log=get_logger()
#- QAs
#- first subtract sky to get the sky subtracted frame. This is only for QA. Pipeline does it separately.
tempframe=copy.deepcopy(frame) #- make a copy so as to propagate frame unaffected so that downstream pipeline uses it.
subtract_sky(tempframe,skymodel) #- Note: sky subtract is done to get residuals. As part of pipeline it is done in fluxcalib stage
# Sky residuals first
qadict = qalib.sky_resid(param, tempframe, skymodel, quick_look=quick_look)
# Sky continuum
if not quick_look: # Sky continuum is measured after flat fielding in QuickLook
channel = frame.meta['CAMERA'][0]
wrange1, wrange2 = param[channel.upper()+'_CONT']
skyfiber, contfiberlow, contfiberhigh, meancontfiber, skycont = qalib.sky_continuum(frame,wrange1,wrange2)
qadict["SKYFIBERID"] = skyfiber.tolist()
qadict["SKYCONT"] = skycont
qadict["SKYCONT_FIBER"] = meancontfiber
if quick_look: # The following can be a *large* dict
qadict_snr = qalib.SignalVsNoise(tempframe,param)
qadict.update(qadict_snr)
return qadict