Source code for desispec.pixgroup

"""
desispec.pixgroup
=================

Tools to regroup spectra in individual exposures by healpix on the sky.
"""

import glob, os, sys, time, json
from collections import Counter, OrderedDict
import gzip, shutil

import numpy as np

import fitsio
from astropy.io import fits
from astropy.table import Table, vstack

from desimodel.footprint import radec2pix
from desiutil.log import get_logger
import desiutil.depend
import desiutil.healpix

from . import io
from .io.util import get_tempfilename, addkeys
from .util import convert_to_pandas
from .maskbits import specmask
from .tsnr import calc_tsnr2_cframe


[docs]
def fibermap2tilepix(fibermap, nside=64):
    """
    Maps fibermap to which healpix are covered by which petals

    Args:
        fibermap: table with columns TARGET_RA, TARGET_DEC, PETAL_LOC

    Options:
        nside (int): nested healpix nside (must be power of 2)

    Returns dict petalpix[petal] = list(healpix covered by that petal)
    """
    tilepix = dict()
    ra = fibermap['TARGET_RA']
    dec = fibermap['TARGET_DEC']
    ok = ~np.isnan(ra) & ~np.isnan(dec)
    for petal in range(10):
        ii = (fibermap['PETAL_LOC'] == petal) & ok
        healpix = np.unique(radec2pix(nside, ra[ii], dec[ii]))
        tilepix[petal] = [int(p) for p in healpix]

    return tilepix



[docs]
def get_exp2uniqpix_map(zcat, frames, nmax=5000, nside_max=None):
    """
    Maps exposures to unique healpix pixels using zcat and frames

    Parameters
    ----------
    zcat : table
        Table with columns TARGETID, TILEID, PETAL_LOC, TARGET_RA, TARGET_DEC,
        e.g. from ztile file.
    frames : table
        Table with columns NIGHT, EXPID, TILEID, CAMERA, e.g. from
        exposures-SPECPROD.fits FRAMES HDU.
    nmax : int, optional
        Max targets per adaptive healpix pixel (passed to partition_radec).
    nside_max : int, optional
        NSIDE for the hpix_ntargets output table; must be a positive power of 2
        and >= the max NSIDE used internally by partition_radec. Defaults to the
        max NSIDE found in the adaptive pixelization.

    Returns
    -------
    exppix : astropy.table.Table
        Table with columns NIGHT, EXPID, TILEID, SPECTRO, UNIQPIX, NSIDE,
        HEALPIX, NTARGETS. SPECTRO is the petal number, renamed from PETAL_LOC
        for historical compatibility. NTARGETS is the number of unique targets
        per TILEID, SPECTRO, UNIQPIX combination.
    upix_ntargets : astropy.table.Table
        Table with columns UNIQPIX, NTARGETS counting the number of unique
        targets per UNIQPIX across all tiles and petals.
    hpix_ntargets : astropy.table.Table
        Table with one row per healpix pixel at nside_max, with columns
        HEALPIX, NSIDE, UNIQPIX, NTARGETS. UNIQPIX is the adaptive pixel
        covering each fine healpix (-1 if not covered by any UNIQPIX with
        targets). NTARGETS is the number of unique targets in that fine
        healpix (0 if none).
    """
    log = get_logger()

    #- Implementation note: this uses pandas.DataFrame internally because it is ~10x faster than
    #- astropy.table.Table for the merge/join operations on 10s of millions of rows in a zcatalog,
    #- reducing the runtime from minutes to seconds.  The return value is converted back to
    #- to Table for consistency with the rest of the codebase which generally doesn't use pandas.

    #- Trim zcat to Pandas DataFrame with just the columns we need
    log.info(f'Converting zcat ({len(zcat)} rows) and frames ({len(frames)} rows) to pandas DataFrames')
    zcat = convert_to_pandas(zcat, ['TARGETID', 'TILEID', 'PETAL_LOC', 'TARGET_RA', 'TARGET_DEC'])
    frames = convert_to_pandas(frames, ['NIGHT', 'EXPID', 'TILEID', 'CAMERA'])

    #- Make a table with one row per unique TARGETID plus RA,DEC to calculate UNIQPIX
    log.info('Trimming zcat to unique TARGETID, RA, DEC')
    targets = zcat[['TARGETID', 'TARGET_RA', 'TARGET_DEC']].drop_duplicates(subset='TARGETID')
    log.info(f'Calculating unique pixels from {len(targets)} unique targets')
    targets['UNIQPIX'] = desiutil.healpix.partition_radec(targets['TARGET_RA'], targets['TARGET_DEC'], nmax=nmax)

    #- Count unique targets per UNIQPIX
    upix_ntargets = targets.groupby('UNIQPIX').size().reset_index(name='NTARGETS')

    #- join zcat with targets to get UNIQPIX for each row in zcat
    log.info('Adding UNIQPIX to zcat')
    zcat = zcat.merge(targets[['TARGETID', 'UNIQPIX']], on='TARGETID', how='inner')

    #- DataFrame-fu: Trim zcat to one row per TILEID, PETAL_LOC, UNIQPIX while
    #- adding NTARGETS column counting how many targets in each TILEID, PETAL_LOC, UNIQPIX
    log.info('Trimming zcat to one row per TILEID, PETAL_LOC, UNIQPIX')
    columns = ['TILEID', 'PETAL_LOC', 'UNIQPIX']
    upix_tiles = zcat.groupby(columns).size().reset_index(name='NTARGETS')

    #- Add PETAL_LOC to frames based on CAMERA
    log.info('Converting frames CAMERA to PETAL_LOC')
    frames['PETAL_LOC'] = frames['CAMERA'].str[-1].astype(int)  # extract last character of CAMERA and convert to int

    #- Drop camera; just use PETAL_LOC
    frames = frames[['NIGHT', 'EXPID', 'TILEID', 'PETAL_LOC']].drop_duplicates(subset=['EXPID', 'PETAL_LOC'])

    #- Count input spectra per UNIQPIX, since each TARGETID can have multiple EXPID

    #- 1. Count unique EXPIDs per TILEID (frames is already deduped by EXPID, PETAL_LOC)
    nexp_per_tile = frames.groupby('TILEID')['EXPID'].nunique()

    #- 2. One row per (TARGETID, TILEID) — avoid double-counting if PETAL_LOC had duplicates
    zcat_tt = zcat[['TARGETID', 'TILEID', 'UNIQPIX']].drop_duplicates(subset=['TARGETID', 'TILEID'])
    zcat_tt = zcat_tt.assign(NEXP=zcat_tt['TILEID'].map(nexp_per_tile))

    #- 3. Sum exposures across all (TARGETID, TILEID) pairs within each UNIQPIX
    upix_nspectra = zcat_tt.groupby('UNIQPIX')['NEXP'].sum()
    upix_ntargets['NSPECTRA'] = upix_ntargets['UNIQPIX'].map(upix_nspectra).astype(int)

    #- join upix_tiles with frames to get NIGHT, EXPID, CAMERA
    log.info('Joining upix_tiles with frames')
    upix_frames = upix_tiles.merge(frames, on=['TILEID', 'PETAL_LOC'], how='inner')

    #- Add UNIQPIX -> NSIDE, HEALPIX columns
    log.info('Calculating NSIDE, HEALPIX from UNIQPIX')
    nside, healpix = desiutil.healpix.upix2hpix(upix_frames['UNIQPIX'])
    upix_frames['NSIDE'] = nside
    upix_frames['HEALPIX'] = healpix

    #- Reorder columns
    upix_frames = upix_frames[['NIGHT', 'EXPID', 'TILEID', 'PETAL_LOC', 'UNIQPIX', 'NSIDE', 'HEALPIX', 'NTARGETS']]

    #- Historical compatibility: rename PETAL_LOC to SPECTRO
    upix_frames.rename(columns={'PETAL_LOC': 'SPECTRO'}, inplace=True)

    nuniq = len(np.unique(upix_frames['UNIQPIX']))
    nexppetals = len(upix_frames[['EXPID', 'SPECTRO']].drop_duplicates())
    log.info(f'{nuniq} unique pixels covered by {nexppetals} exposure-petal combinations')

    #- Build hpix_ntargets: one row per fine healpix at nside_max
    #- Use get_hpix2upix_map for the geometric coverage (handles coarse UNIQPIX expanding
    #- to their fine children, and -1 for uncovered pixels), then scatter in target counts
    log.info(f'Building hpix_ntargets table at nside_max={nside_max}')
    hpix2upix, nside_max = get_hpix2upix_map(targets['UNIQPIX'].values, nside_max)
    npix = 12 * nside_max ** 2

    fine_healpix = radec2pix(nside_max, targets['TARGET_RA'].values, targets['TARGET_DEC'].values)
    ntargets_per_hpix = np.zeros(npix, dtype=np.int32)
    hpix_idx, counts = np.unique(fine_healpix, return_counts=True)
    ntargets_per_hpix[hpix_idx] = counts

    hpix_ntargets = Table({
        'HEALPIX': np.arange(npix, dtype=np.int64),
        'UNIQPIX': hpix2upix,
        'NTARGETS': ntargets_per_hpix,
    })
    hpix_ntargets.meta['NSIDE'] = nside_max
    hpix_ntargets.meta['HPXNSIDE'] = nside_max
    hpix_ntargets.meta['HPXNEST'] = True

    return Table.from_pandas(upix_frames), Table.from_pandas(upix_ntargets), hpix_ntargets



[docs]
def group_nspectra(nspectra, nmax, max_groupsize=None):
    """
    Return list of lists of indices, grouping nspectra array into groups with at most nmax spectra total.
    The exception is if nspectra[i]>nmax, it becomes a single-element group.

    Args:
        nspectra: array-like of integers with number of spectra
        nmax: maximum number of spectra allowable per group
        max_groupsize: if not None, maximum number of elements per group,
            regardless of whether the nmax threshold has been reached

    Returns list of lists of indices for each group

    e.g. group_nspectra([3,5,3,10,2,1], 5) -> [[3,], [1,], [0,], [2,4], [5,]]
    """
    nspectra = np.asarray(nspectra)
    sorted_indices = np.argsort(-nspectra)  # sort high -> low
    groups = list()
    current_group = list()
    current_sum = 0
    for i in sorted_indices:
        n = nspectra[i]
        if n > nmax:
            if current_group:
                groups.append(current_group)
                current_group = list()
                current_sum = 0
            groups.append([int(i)])
        elif current_sum + n <= nmax:
            if max_groupsize is not None and len(current_group) >= max_groupsize:
                groups.append(current_group)
                current_group = [int(i)]
                current_sum = n
            else:
                current_group.append(int(i))
                current_sum += n
        else:
            groups.append(current_group)
            current_group = [int(i)]
            current_sum = n

    if current_group:
        groups.append(current_group)

    return groups




[docs]
def get_hpix2upix_map(uniqpix, nside_max=None):
    """
    Given an array of UNIQ pixels (possibly at varying NSIDEs, nested scheme),
    return a map of healpix indices -> uniqpix values at requested or
    derived nside_max.

    Parameters
    ----------
    uniqpix : array-like of int
        Array of UNIQ pixel values.
    nside_max : int, optional
        Maximum NSIDE to use for the output map.
        If None, infer from input uniqpix.

    Returns
    -------
    healpix_map : np.ndarray of int, shape (12 * nside_max**2,)
        For each HEALPix pixel index at nside_max, the UNIQ pixel value
        that contains it. Pixels not covered by any UNIQ pixel are set to -1.
    nside_max : int
        The NSIDE of the healpix_map.
    """
    uniqpix = np.asarray(uniqpix, dtype=np.int64)
    nside, ipix = desiutil.healpix.upix2hpix(uniqpix)

    # Validate that all nside values from uniqpix are positive powers of 2
    for n in np.unique(nside):
        if n <= 0 or (n & (n - 1)) != 0:
            raise ValueError(f"nside={n} derived from uniqpix is not a positive power of 2")

    order = np.log2(nside).astype(int)

    if nside_max is None:
        nside_max = int(np.max(nside))
    else:
        if nside_max <= 0 or (nside_max & (nside_max - 1)) != 0:
            raise ValueError(f"{nside_max=} is not a positive power of 2")
        if nside_max < np.max(nside):
            raise ValueError(f"{nside_max=} is too small for the maximum nside {np.max(nside)} in uniqpix")

    order_max = int(np.log2(nside_max))
    npix_max = 12 * nside_max ** 2

    healpix_map = np.full(npix_max, -1, dtype=np.int64)

    # For each unique order present, expand pixels to nside_max
    for o in np.unique(order):
        mask = order == o
        ipix_o = ipix[mask]
        uniq_o = uniqpix[mask]

        # Each pixel at order o covers 4^(order_max - o) pixels at order_max
        scale = 4 ** (order_max - o)

        # The fine pixel indices covered by each coarse pixel
        # ipix_o[:, None] * scale + np.arange(scale) gives shape (n, scale)
        fine_pixels = ipix_o[:, None] * scale + np.arange(scale, dtype=np.int64)
        fine_pixels = fine_pixels.ravel()

        # Corresponding uniq values, repeated for each fine pixel
        uniq_repeated = np.repeat(uniq_o, scale)

        healpix_map[fine_pixels] = uniq_repeated

    return healpix_map, nside_max



#-----

[docs]
class FrameLite(object):
    '''
    Lightweight Frame object for regrouping

    This is intended for I/O without the overheads of float32 -> float64
    conversion, correcting endianness, etc.
    '''
    def __init__(self, wave, flux, ivar, mask, resolution_data, fibermap, header, scores=None, model=None, redshifts=None):
        '''
        Create a new FrameLite object

        Args:
            wave: 1D array of wavlengths
            flux: 2D[nspec, nwave] fluxes
            ivar: 2D[nspec, nwave] inverse variances of flux
            mask: 2D[nspec, nwave] mask of flux; 0=good
            resolution_data 3D[nspec, ndiag, nwave] Resolution matrix diagonals
            fibermap: fibermap table
            header: FITS header

        Options:
            scores: table of QA scores
            model: 2D[nspec, nwave] spectral models
            redshifts: table of best-fit redshifts
        '''
        self.wave = wave
        self.flux = flux
        self.ivar = ivar
        self.mask = mask
        self.resolution_data = resolution_data
        self.fibermap = fibermap
        self.header = header
        self.meta = header  #- for compatibility with Frame objects
        self.scores = scores
        self.model = model
        self.redshifts = redshifts

    def __getitem__(self, index):
        '''Return a subset of the original FrameLight'''
        if not isinstance(index, slice):
            index = np.atleast_1d(index)

        if self.scores is not None:
            scores = self.scores[index]
        else:
            scores = None
            
        if self.redshifts is not None:
            redshifts = self.redshifts[index]
        else:
            redshifts = None
            
        if self.model is not None:
            model = {}
            for b in self.flux.keys():
                model[b] = self.model[b][index]
        else:
            model = None

        return FrameLite(self.wave, self.flux[index], self.ivar[index],
            self.mask[index], self.resolution_data[index], self.fibermap[index],
            self.header, scores, model, redshifts)


[docs]
    @classmethod
    def read(cls, filename):
        '''
        Return FrameLite read from `filename`
        '''
        with fitsio.FITS(filename) as fx:
            header = fx[0].read_header()
            wave = fx['WAVELENGTH'].read()
            flux = fx['FLUX'].read()
            ivar = fx['IVAR'].read()
            mask = fx['MASK'].read()
            resolution_data = fx['RESOLUTION'].read()
            fibermap = fx['FIBERMAP'].read()
            fmhdr = fx['FIBERMAP'].read_header()
            if "MODEL" in fx:
                model = fx["MODEL"].read()
            else:
                model = None

            for col in ['SURVEY', 'PROGRAM']:
                if col in fmhdr and col not in header:
                    header[col] = fmhdr[col]

            if 'SCORES' in fx:
                scores = fx['SCORES'].read()
                if 'TARGETID' not in scores.dtype.names:
                    tmp = Table(scores)
                    tmp.add_column(fibermap['TARGETID'], index=0, name='TARGETID')
                    scores = np.asarray(tmp)
            else:
                scores = None

            if "REDSHIFTS" in fx:
                redshifts = Table(fx["REDSHIFTS"].read())
            else:
                redshifts = None
                
        #- Add extra fibermap columns NIGHT, EXPID, TILEID
        nspec = len(fibermap)
        night = np.tile(header['NIGHT'], nspec).astype('i4')
        expid = np.tile(header['EXPID'], nspec).astype('i4')
        tileid = np.tile(header['TILEID'], nspec).astype('i4')
        if 'MJD-OBS' in header:
            mjd = np.tile(header['MJD-OBS'], nspec).astype('f8')
        elif 'MJD' in header:
            mjd = np.tile(header['MJD'], nspec).astype('f8')
        else:
            mjd = np.zeros(nspec, dtype='f8')-1

        fibermap = Table(fibermap)
        fibermap['NIGHT'] = night
        fibermap['EXPID'] = expid
        fibermap['MJD'] = mjd
        fibermap['TILEID'] = tileid
        addkeys(fibermap.meta, fmhdr)

        fr = FrameLite(wave, flux, ivar, mask, resolution_data, fibermap, header, scores, model, redshifts)
        fr.filename = filename
        return fr



#-----

[docs]
class SpectraLite(object):
    '''
    Lightweight spectra I/O object for regrouping
    '''
    def __init__(self, bands, wave, flux, ivar, mask, resolution_data,
            fibermap, exp_fibermap=None, scores=None, model=None, redshifts=None):
        '''
        Create a SpectraLite object

        Args:
            bands: list of bands, e.g. ['b', 'r', 'z']
            wave: dict of wavelengths, keyed by band
            flux: dict of fluxes, keyed by band
            ivar: dict of inverse variances, keyed by band
            mask: dict of masks, keyed by band
            resolution_data: dict of Resolution sparse diagonals, keyed by band
            fibermap: fibermap table, applies to all bands

        Options:
            exp_fibermap: per-exposure fibermap table
            scores: scores table, applies to all bands
            model: 2D[nspec, nwave] spectral models
            redshifts: table of best-fit redshifts
        '''

        self.bands = bands[:]

        #- All inputs should have the same bands
        _bands = set(bands)
        assert set(wave.keys()) == _bands
        assert set(flux.keys()) == _bands
        assert set(ivar.keys()) == _bands
        assert set(mask.keys()) == _bands
        assert set(resolution_data.keys()) == _bands

        #- All bands should have the same number of spectra
        nspec = len(fibermap)
        for band in bands:
            assert flux[band].shape[0] == nspec
            assert ivar[band].shape[0] == nspec
            assert mask[band].shape[0] == nspec
            assert resolution_data[band].shape[0] == nspec

            #- Also check wavelength dimension consistency
            nwave = len(wave[band])
            assert flux[band].shape[1] == nwave
            assert ivar[band].shape[1] == nwave
            assert mask[band].shape[1] == nwave
            assert resolution_data[band].shape[2] == nwave  #- resolution_data[band].shape[1] is ndiag

        #- scores and fibermap should be row matched with same length
        if scores is not None:
            assert len(scores) == len(fibermap)

        self.wave = wave.copy()
        self.flux = flux.copy()
        self.ivar = ivar.copy()
        self.mask = mask.copy()
        self.resolution_data = resolution_data.copy()
        self.fibermap = Table(fibermap)

        if model is not None:
            self.model = model.copy()
        else:
            self.model = None
            
        #- optional tables
        if exp_fibermap is not None:
            self.exp_fibermap = Table(exp_fibermap)
        else:
            self.exp_fibermap = None

        if scores is not None:
            self.scores = Table(scores)
        else:
            self.scores = None

        if redshifts is not None:
            self.redshifts = Table(redshifts)
        else:
            self.redshifts = None
            
        #- for compatibility with full Spectra objects
        self.meta = None
        self.extra = None
        self.extra_catalog = None


[docs]
    def target_ids(self):
        """
        Return list of unique target IDs.

        The target IDs are sorted by the order that they first appear.

        Returns (array):
            an array of integer target IDs.
        """
        uniq, indices = np.unique(self.fibermap["TARGETID"], return_index=True)
        return uniq[indices.argsort()]



[docs]
    def num_spectra(self):
        """
        Get the number of spectra contained in this group.

        Returns (int):
            Number of spectra contained in this group.
        """
        if self.fibermap is not None:
            return len(self.fibermap)
        else:
            return 0




[docs]
    def num_targets(self):
        """
        Get the number of distinct targets.

        Returns (int):
            Number of unique targets with spectra in this object.
        """
        if self.fibermap is not None:
            return len(np.unique(self.fibermap["TARGETID"]))
        else:
            return 0


    def __add__(self, other):
        '''
        concatenate two SpectraLite objects into one
        '''
        assert self.bands == other.bands
        for band in self.bands:
            assert np.all(self.wave[band] == other.wave[band])
        if self.scores is not None:
            assert other.scores is not None

        bands = self.bands
        wave = self.wave
        flux = dict()
        ivar = dict()
        mask = dict()
        resolution_data = dict()

        if self.model is not None and other.model is not None:
            model = {}
        else:
            model = None
            
        for band in self.bands:
            flux[band] = np.vstack([self.flux[band], other.flux[band]])
            ivar[band] = np.vstack([self.ivar[band], other.ivar[band]])
            mask[band] = np.vstack([self.mask[band], other.mask[band]])
            resolution_data[band] = np.vstack([self.resolution_data[band], other.resolution_data[band]])
            if self.model is not None and other.model is not None:
                model[band] = np.vstack([self.model[band], other.model[band]])

        fibermap = vstack([self.fibermap, other.fibermap])
        if self.scores is not None:
            scores = np.hstack([self.scores, other.scores])
        else:
            scores = None

        if self.redshifts is not None and other.redshifts is not None:
            redshifts = np.hstack([self.redshifts, other.redshifts])
        else:
            redshifts = None

        if self.exp_fibermap is not None:
            exp_fibermap = np.hstack([self.exp_fibermap, other.exp_fibermap])
        else:
            exp_fibermap = None

        return SpectraLite(bands, wave, flux, ivar, mask, resolution_data,
                fibermap, exp_fibermap=exp_fibermap, scores=scores, model=model, redshifts=redshifts)


[docs]
    def write(self, filename, header=None):
        '''
        Write this SpectraLite object to `filename`
        '''
        log = get_logger()
        log.warning('SpectraLite.write() is deprecated; please use desispec.io.write_spectra() instead')

        #- create directory if missing
        dirname=os.path.dirname(filename)
        if dirname != '':
            os.makedirs(dirname, exist_ok=True)

        #- Have to first write non-gzip so that we can append
        if filename.endswith('.gz'):
            tmpout = get_tempfilename(filename[-3:])
        else:
            tmpout = get_tempfilename(filename)

        #- work around c/fitsio bug that appends spaces to string column values
        #- by using astropy Table to write fibermap

        from astropy.table import Table
        fm = Table(self.fibermap)
        fm.meta['EXTNAME'] = 'FIBERMAP'

        header = io.fitsheader(header)
        desiutil.depend.add_dependencies(header)
        hdus = fits.HDUList()
        hdus.append(fits.PrimaryHDU(None, header))
        hdus.append(fits.convenience.table_to_hdu(fm))

        if self.exp_fibermap is not None:
            expfm = Table(self.exp_fibermap)
            expfm.meta['EXTNAME'] = 'EXP_FIBERMAP'
            hdus.append(fits.convenience.table_to_hdu(expfm))

        if self.scores is not None:
            scores = Table(self.scores)
            scores.meta['EXTNAME'] = 'SCORES'
            hdus.append(fits.convenience.table_to_hdu(scores))

        if self.redshifts is not None:
            redshifts = Table(self.redshifts)
            redshifts.meta['EXTNAME'] = 'REDSHIFTS'
            hdus.append(fits.convenience.table_to_hdu(redshifts))

        hdus.writeto(tmpout, overwrite=True, checksum=True)

        #- then proceed with more efficient fitsio for everything else
        #- See https://github.com/esheldon/fitsio/issues/150 for why
        #- these are written one-by-one
        ### if self.scores is not None:
        ###     fitsio.write(tmpout, self.scores, extname='SCORES')

        for band in sorted(self.bands):
            upperband = band.upper()
            fitsio.write(tmpout, self.wave[band], extname=upperband+'_WAVELENGTH',
                    header=dict(BUNIT='Angstrom'))
            fitsio.write(tmpout, self.flux[band], extname=upperband+'_FLUX',
                    header=dict(BUNIT='10**-17 erg/(s cm2 Angstrom)'))
            fitsio.write(tmpout, self.ivar[band], extname=upperband+'_IVAR',
                header=dict(BUNIT='10**+34 (s2 cm4 Angstrom2) / erg2'))
            if self.model is not None:
                fitsio.write(tmpout, self.model[band], extname=upperband+'_MODEL',
                    header=dict(BUNIT='10**-17 erg/(s cm2 Angstrom)'))
            fitsio.write(tmpout, self.mask[band], extname=upperband+'_MASK', compress='gzip')
            fitsio.write(tmpout, self.resolution_data[band], extname=upperband+'_RESOLUTION')

        #- compress if needed (via another tempfile), otherwise just rename
        if filename.endswith('.gz'):
            tmpoutgz = get_tempfilename(filename)
            with open(tmpout, 'rb') as f_in:
                with gzip.open(tmpoutgz, 'wb') as f_out:
                    shutil.copyfileobj(f_in, f_out)
            os.rename(tmpoutgz, filename)
            os.remove(tmpout)
        else:
            os.rename(tmpout, filename)



[docs]
    @classmethod
    def read(cls, filename):
        '''
        Return a SpectraLite object read from `filename`
        '''
        with fitsio.FITS(filename) as fx:
            wave = dict()
            flux = dict()
            ivar = dict()
            mask = dict()
            resolution_data = dict()
            fibermap = fx['FIBERMAP'].read()
            model_exists = any("_MODEL" in fx)
            if model_exists:
                model = {}
            else:
                model = None
            if 'EXP_FIBERMAP' in fx:
                exp_fibermap = fx['EXP_FIBERMAP'].read()
            else:
                exp_fibermap = None

            if 'SCORES' in fx:
                scores = fx['SCORES'].read()
            else:
                scores = None

            if 'REDSHIFTS' in fx:
                redshifts = fx['REDSHIFTS'].read()
            else:
                redshifts = None

            bands = ['b', 'r', 'z']
            for band in bands:
                upperband = band.upper()
                wave[band] = fx[upperband+'_WAVELENGTH'].read()
                flux[band] = fx[upperband+'_FLUX'].read()
                ivar[band] = fx[upperband+'_IVAR'].read()
                mask[band] = fx[upperband+'_MASK'].read()
                resolution_data[band] = fx[upperband+'_RESOLUTION'].read()
                if model is not None:
                    model[band] = fx[upperband+'_MODEL'].read()

        return SpectraLite(bands, wave, flux, ivar, mask, resolution_data,
                fibermap, exp_fibermap=exp_fibermap, scores=scores, model=model, redshifts=redshifts)




[docs]
def add_missing_frames(frames):
    '''
    Adds any missing frames with ivar=0 FrameLite objects with correct shape
    to match those that do exist.

    Args:
        frames: dict of FrameLite objects, keyed by (night,expid,camera)

    Modifies `frames` in-place.

    Example: if `frames` has keys (2020,1,'b0') and (2020,1,'r0') but
    not (2020,1,'z0'), this will add a blank FrameLite object for z0.

    The purpose of this is to facilitate frames2spectra, which needs
    *something* for every spectro camera for every exposure that is included.
    '''

    log = get_logger()

    #- First figure out the number of wavelengths per band
    wave = dict()
    ndiag = dict()
    for (night, expid, camera), frame in frames.items():
        band = camera[0]
        if band not in wave:
            wave[band] = frame.wave
        if band not in ndiag:
            ndiag[band] = frame.resolution_data.shape[1]

    #- Now loop through all frames, filling in any missing bands
    bands = sorted(list(wave.keys()))
    for (night, expid, camera), frame in list(frames.items()):
        frameband = camera[0]
        spectro = camera[1:]
        for band in bands:
            if band == frameband:
                continue

            bandcam = band+spectro
            if (night, expid, bandcam) in frames:
                continue

            log.warning('Creating blank data for missing frame {}'.format(
                (night, expid, bandcam)))
            nwave = len(wave[band])
            nspec = frame.flux.shape[0]
            flux = np.zeros((nspec, nwave), dtype='f4')
            ivar = np.zeros((nspec, nwave), dtype='f4')
            mask = np.zeros((nspec, nwave), dtype='u4') + specmask.NODATA
            resolution_data = np.zeros((nspec, ndiag[band], nwave), dtype='f4')

            #- Copy the header and correct the camera keyword
            if type(frame.meta) is fits.header.Header:
                header = fitsio.FITSHDR( OrderedDict(frame.meta) )
            else:
                header = fitsio.FITSHDR(frame.meta)
            header['camera'] = bandcam

            #- Make new blank scores, replacing trailing band _B/R/Z
            if frame.scores is not None:
                dtype = [ ('TARGETID', 'i8') ]
                for name in frame.scores.dtype.names:
                    if name.endswith('_'+frameband.upper()):
                        bandname = name[0:-1] + band.upper()
                        dtype.append((bandname, type(frame.scores[name][0])))

                scores = np.zeros(nspec, dtype=dtype)
                scores['TARGETID'] = frame.scores['TARGETID']
            else:
                scores = None

            #- Add the blank FrameLite object
            frames[(night,expid,bandcam)] = FrameLite(
                wave[band], flux, ivar, mask, resolution_data,
                frame.fibermap, header, scores)



[docs]
def frames2spectra(frames, pix=None, nside=64, onetile=False):
    '''
    Combine a dict of FrameLite into a SpectraLite for healpix `pix`

    Args:
        frames: list or dict of FrameLight, keyed by (night, expid, camera)

    Options:
        pix: only include targets in this NESTED healpix pixel number
        nside: Healpix nside, must be power of 2
        onetile: All frames come from the same tile (impacts keyword propagation)

    Returns:
        SpectraLite object with subset of spectra from frames that are in
        the requested healpix pixel `pix`

    If `onetile` is True, propagate fibermap header keywords that apply
    to a single tile, e.g. TILEID and fiberassign keywords
    '''
    log = get_logger()

    #- support list or tuple instead of dict as input
    if isinstance(frames, (list, tuple)):
        framedict = dict()
        for fr in frames:
            night = fr.meta['NIGHT']
            expid = fr.meta['EXPID']
            camera = fr.meta['CAMERA']
            framedict[(night, expid, camera)] = fr

        frames = framedict

    #- shallow copy of frames dict in case we augment with blank frames
    frames = frames.copy()

    if pix is not None:
        log.info(f'Filtering by nside={nside} nested healpix={pix}')

    #- To support combining old+new data, recalculate TSNR2 if any
    #- frames are missing TSNR2* scores present in other frames of same bad.
    #- Assume longest list per camera is the one we want, because we also
    #- need to preserve order.  Ugh.
    frame_scores_columns = dict()
    scores_columns = dict(b=[], r=[], z=[])
    for (night,expid,cam), frame in frames.items():
        brz = cam.lower()[0]
        cols = frame.scores.dtype.names
        frame_scores_columns[(night,expid,cam)] = cols
        if len(cols) > len(scores_columns[brz]):
            scores_columns[brz] = cols

    for (night,expid,cam), frame in frames.items():
        brz = cam.lower()[0]
        if frame_scores_columns[(night,expid,cam)] != scores_columns[brz]:
            log.warning(f'Recalculating TSNR2 for ({night},{expid},{cam})')
            s = Table(frame.scores)
            tsnr2, alpha = calc_tsnr2_cframe(frame)
            for key in tsnr2:
                s[key] = tsnr2[key]

            #- standardize order
            frame.scores = np.array(s[scores_columns[brz]])

    #- Make sure that the given set of frames is complete
    #- If not, fill in missing cameras so that the stack works properly
    add_missing_frames(frames)

    #- Setup data structures to fill
    wave, flux, ivar, mask = dict(), dict(), dict(), dict()
    resolution_data, scores, fmaps = dict(), dict(), dict()

    #- Get the bands that exist in the input data
    #- identify all of the exposures for each band
    #- and instantiate some variables for the next loop
    bands, allkeys =  list(),  dict()
    for (night,expid,cam),frame in frames.items():
        band = cam[0]
        if band not in bands:
            bands.append(band)
            allkeys[band] = list()
            flux[band] = list()
            ivar[band] = list()
            mask[band] = list()
            resolution_data[band] = list()
            scores[band] = list()
            wave[band] = frame.wave
        if (cam[1],night,expid) not in fmaps.keys():
            fmaps[(cam[1],night,expid)] = dict()
        fmaps[(cam[1],night,expid)][band] = None
        allkeys[band].append((cam[1],night,expid,cam))

    bands = sorted(bands)
    for band in bands:
        assert len(allkeys[band]) != 0
        #- Select just the frames for this band using keys collected above
        #  Sort them so we are ordered by spectrograph, then night, then exposure
        band_keys = sorted(allkeys[band])

        #- Select flux, ivar, etc. for just the spectra on this healpix
        for spec,night,expid,cam in band_keys:
            bandframe = frames[(night,expid,cam)]
            if pix is not None:
                ra, dec = bandframe.fibermap['TARGET_RA'], bandframe.fibermap['TARGET_DEC']
                ok = ~np.isnan(ra) & ~np.isnan(dec)
                ra[~ok] = 0.0
                dec[~ok] = 0.0
                allpix = radec2pix(nside, ra, dec)
                ii = (allpix == pix) & ok
            else:
                ii = np.ones(bandframe.flux.shape[0]).astype(bool)

            #- Careful: very similar code below for non-filtered appending
            flux[band].append(bandframe.flux[ii])
            ivar[band].append(bandframe.ivar[ii])
            mask[band].append(bandframe.mask[ii])
            resolution_data[band].append(bandframe.resolution_data[ii])

            fmaps[(spec,night,expid)][band] = bandframe.fibermap[ii]

            if bandframe.scores is not None:
                scores[band].append(bandframe.scores[ii])


        flux[band] = np.vstack(flux[band])
        ivar[band] = np.vstack(ivar[band])
        mask[band] = np.vstack(mask[band])
        resolution_data[band] = np.vstack(resolution_data[band])

        if len(scores[band]) > 0:
            scores[band] = np.hstack(scores[band])

    #- Combine scores into a single table
    #- Why doesn't np.vstack work for this? (says invalid type promotion)
    if len(scores[bands[0]]) > 0:
        if len(bands) == 1:
            scores = scores[bands[0]]
        else:
            names = list()
            data = list()
            for band in bands[1:]:
                for colname in scores[band].dtype.names:
                    #- TARGETID appears in multiple bands; only add once
                    if colname != 'TARGETID':
                        names.append(colname)
                        data.append(scores[band][colname])
            scores = np.lib.recfunctions.append_fields(
                    scores[bands[0]], names, data)

    else:
        scores = None

    #- Go in order over all exposures, pick up fibermaps. When multiple, use the first
    #- but bitwise OR the fiberstatus.
    merged_over_cams_fmaps = list()
    sorted_keys = sorted(list(fmaps.keys()))
    for key in sorted_keys:
        banddict = fmaps[key]
        outmap = None
        for band in bands:
            if banddict[band] is None:
                continue
            if outmap is None:
                outmap = banddict[band]
            else:
                outmap['FIBERSTATUS'] |= banddict[band]['FIBERSTATUS']
        merged_over_cams_fmaps.append(outmap)

    #- Only keep fibermap keywords that apply to combined spectra, not individual exp
    general_keywords = [
            'SURVEY', 'PROGRAM', 'EXTNAME', 'LONGSTRN',
            'INSTRUME', 'OBSERVAT', 'OBS-LAT', 'OBS-LONG', 'OBS-ELEV', 'TELESCOP'
            ]

    tile_keywords = [
            'TILEID', 'TILERA', 'TILEDEC', 'FIELDROT',
            'FILEDROT', 'FA_PLAN', 'FA_HA', 'FA_RUN', 'FA_M_GFA',
            'FA_M_PET', 'FA_M_POS', 'REQRA', 'REQDEC', 'FIELDNUM',
            'FA_VER', 'FA_SURV', 'FAFLAVOR', 'FAPRGRM', 'DESIROOT',
            'GFA', 'MTL', 'SCND', 'SCND2', 'SCND3', 'SCNDMTL', 'SKY',
            'SKYSUPP', 'TARG', 'TOO', 'FAARGS', 'FAOUTDIR',
            'NOWTIME', 'RUNDATE', 'PMCORR', 'PMTIME', 'MTLTIME',
            'OBSCON', 'GOALTIME', 'GOALTYPE', 'EBVFAC', 'SBPROF',
            'MINTFRAC', 'FASCRIPT', 'SVNDM', 'SVNMTL',
            ]

    if onetile:
        keep_keywords = general_keywords + tile_keywords
    else:
        keep_keywords = general_keywords

    for fm in merged_over_cams_fmaps:
        for key in list(fm.meta.keys()):
            if key not in keep_keywords:
                del fm.meta[key]

    #- Convert to Tables to facilitate column add/drop/reorder
    for i, fm in enumerate(merged_over_cams_fmaps):
        if not isinstance(fm, Table):
            merged_over_cams_fmaps[i] = Table(fm)

    #- Standardize all fibermaps to have the same columns in same order
    if len(merged_over_cams_fmaps) > 1:
        colnames = list(merged_over_cams_fmaps[0].colnames)
        for fm in merged_over_cams_fmaps[1:]:
            for drop in set(colnames) - set(fm.colnames):
                log.warning(f"Ignoring {drop}, missing from some fibermaps")
                colnames.remove(drop)
            #- Also warn about columns that were never in original list
            for drop in set(fm.colnames) - set(colnames):
                log.warning(f"Ignoring {drop}, missing from some fibermaps")

        for i in range(len(merged_over_cams_fmaps)):
            merged_over_cams_fmaps[i] = merged_over_cams_fmaps[i][colnames]

    #- Combine all the individual fibermaps from the exposures and spectrographs
    fibermap = vstack(merged_over_cams_fmaps)

    #- assemble_fibermap now sets NaN to 0,
    #- but reset here too if combining older data
    for col in [
        'FIBER_X', 'FIBER_Y',
        'DELTA_X', 'DELTA_Y',
        'FIBER_RA', 'FIBER_DEC',
        'GAIA_PHOT_G_MEAN_MAG',
        'GAIA_PHOT_BP_MEAN_MAG',
        'GAIA_PHOT_RP_MEAN_MAG',
        ]:
        ii = np.isnan(fibermap[col])
        if np.any(ii):
            n = np.sum(ii)
            log.warning(f'Setting {n} {col} NaN to 0.0')
            fibermap[col][ii] = 0.0

    return SpectraLite(bands, wave, flux, ivar, mask, resolution_data,
            fibermap, scores=scores)



[docs]
def update_frame_cache(frames, framekeys, specprod_dir=None):
    '''
    Update a cache of FrameLite objects to match requested frameskeys

    Args:
        frames: dict of FrameLite objects, keyed by (night, expid, camera)
        framekeys: list of desired (night, expid, camera)

    Updates `frames` in-place

    Notes:
        `frames` is dictionary, `framekeys` is list.
        When finished, the keys of `frames` match the entries in `framekeys`
    '''

    log = get_logger()

    #- Drop frames that we no longer need
    ndrop = 0
    for key in list(frames.keys()):
        if key not in framekeys:
            ndrop += 1
            del frames[key]

    nkeep = len(frames)

    #- Read and add the new frames that we do need
    nadd = 0
    for key in framekeys:
        if key not in frames.keys():
            night, expid, camera = key
            framefile = io.findfile('cframe', night, expid, camera,
                    specprod_dir=specprod_dir, readonly=True)
            log.debug('  Reading {}'.format(os.path.basename(framefile)))
            nadd += 1
            frames[key] = FrameLite.read(framefile)

    log.debug('Frame cache: {} kept, {} added, {} dropped, now have {}'.format(
         nkeep, nadd, ndrop, len(frames)))