Source code for desispec.scripts.reformat_exptables

"""
desispec.scripts.updateexptables
================================

"""
import os
import sys
import numpy as np
import re
import time

from desispec.workflow.exptable import get_exposure_table_path, \
                                       get_exposure_table_name, \
                                       default_obstypes_for_exptable,\
                                       night_to_month, \
                                       get_exposure_table_column_defaults
from desispec.workflow.utils import define_variable_from_environment, listpath, \
                                    pathjoin
from desispec.workflow.tableio import write_table, load_table
from desispec.scripts.exposuretable import create_exposure_tables



[docs]def update_exposure_tables(nights=None, night_range=None, path_to_data=None, exp_table_path=None, obstypes=None, orig_filetype='csv', out_filetype='csv', verbose=False, no_specprod=False, dry_run=False): """ Generates updated exposure tables for the nights requested. Requires exposure tables to exist on disk. Args: nights: str, int, or comma separated list. The night(s) to generate procesing tables for. night_range: str. comma separated pair of nights in form YYYYMMDD,YYYYMMDD for first_night,last_night specifying the beginning and end of a range of nights to be generated. last_night should be inclusive. path_to_data: str. The path to the raw data and request*.json and manifest* files. exp_table_path: str. Full path to where to exposure tables should be saved, WITHOUT the monthly directory included. obstypes: str. The exposure OBSTYPE's that you want to include in the exposure table. Can be a comma separated list. orig_filetype: str. The file extension (without the '.') of the exposure tables. out_filetype: str. The file extension for the outputted exposure tables (without the '.'). verbose: boolean. Whether to give verbose output information or not. True prints more information. no_specprod: boolean. Create exposure table in repository location rather than the SPECPROD location Returns: Nothing """ ## Make sure user specified what nights to run on if nights is None and night_range is None: raise ValueError("Must specify either nights or night_range." +" To process all nights give nights=all") ## Define where to find the data if path_to_data is None: path_to_data = define_variable_from_environment(env_name='DESI_SPECTRO_DATA', var_descr="The data path") ## Get all nights in 2020's with data nights_with_data = list() for n in listpath(path_to_data): # - nights are 20YYMMDD if re.match('^202\d{5}$', n): nights_with_data.append(n) ## If unpecified or given "all", set nights to all nights with data check_night = False if nights is None or nights == 'all': nights = [int(night) for night in nights_with_data] ## No need to check nights since derived from disk else: nights = [int(val.strip()) for val in nights.split(",")] ## If nights are specified, make sure we check that there is actually data check_night = True nights = np.array(nights) ## If user specified a night range, cut nights to that range of dates if night_range is not None: if ',' not in night_range: raise ValueError("night_range must be a comma separated pair of " + "nights in form YYYYMMDD,YYYYMMDD") nightpair = night_range.split(',') if len(nightpair) != 2 or not nightpair[0].isnumeric() \ or not nightpair[1].isnumeric(): raise ValueError("night_range must be a comma separated pair of " + "nights in form YYYYMMDD,YYYYMMDD") first_night, last_night = nightpair nights = nights[np.where(int(first_night)<=nights.astype(int))[0]] nights = nights[np.where(int(last_night)>=nights.astype(int))[0]] ## Parse the obstypes of the input if obstypes is not None: obstypes = [ val.strip('\t ') for val in obstypes.split(",") ] else: obstypes = default_obstypes_for_exptable() ## Define where to save the data usespecprod = (not no_specprod) if exp_table_path is None: exp_table_path = get_exposure_table_path(night=None, usespecprod=usespecprod) ## Tell user the final list of nights and starting looping over them print("Nights: ", nights) for night in nights: if check_night and str(night) not in nights_with_data: print(f'Night {night} not in data directory: {path_to_data}. Skipping') continue ## Define where we should be looking for the exposure tables month = night_to_month(night) exptab_path = pathjoin(exp_table_path,month) orig_name = get_exposure_table_name(night, extension=orig_filetype) orig_pathname = pathjoin(exptab_path, orig_name) ## If the exposure table doesn't exist, skip, since we are updating ## not generating. if not os.path.exists(orig_pathname): print(f'Could not find exposure table for night={night} at:' + f' {orig_pathname}. Skipping this night.') continue ## Create a temporary file pathname temp_filetype = f"updatetemp.{out_filetype}" temp_pathname = orig_pathname.replace(f".{orig_filetype}", f".{temp_filetype}") ## Create a fresh version of the exposure table using the current ## code and save it to the temporary pathname obstypes_str = ','.join(obstypes) create_exposure_tables(nights=str(night), night_range=None, path_to_data=path_to_data, exp_table_path=exp_table_path, obstypes=obstypes_str, exp_filetype=temp_filetype, cameras=None, bad_cameras=None, badamps=None, verbose=verbose, no_specprod=no_specprod, overwrite_files=False) ## Load the old and new tables to compare newtable = load_table(temp_pathname, tabletype='exptab', use_specprod=usespecprod) origtable = load_table(orig_pathname, tabletype='exptab', use_specprod=usespecprod) ## Print some useful information and do some sanity checks that ## The new table has as much or more data than the old print(f"\n\nNumber of rows in original: {len(origtable)}" + f", Number of rows in new: {len(newtable)}") if 'OBSTYPE' in origtable.colnames \ and not set(origtable['OBSTYPE']).issubset(set(obstypes)): subset_rows = np.array([obs in obstypes for obs in origtable['OBSTYPE']]) subset_orig = origtable[subset_rows] else: subset_orig = origtable assert len(newtable) >= len(subset_orig), \ "Tables for given obstypes must greater or equal length" assert np.all([exp in newtable['EXPID'] for exp in subset_orig['EXPID']]), \ "All old exposures of given obstype must be present in the new table" ## Go through exposure by exposure and check each columns value ## in the new vs the original mutual_colnames = [col for col in newtable.colnames if col in origtable.colnames] coldefs = get_exposure_table_column_defaults(asdict=True) for newloc,expid in enumerate(newtable['EXPID']): ## Match to the row in the original table origloc = np.where(origtable['EXPID']==expid)[0] if len(origloc) > 1: print(f"ERROR on night {night}: found more than one exposure" + f"matching expid {expid}") continue elif len(origloc) == 1: origloc = origloc[0] else: print(f"New exposure identified: {newtable[newloc]}") continue ## For colnames that the two columns share, compare values. for col in mutual_colnames: origval = origtable[col][origloc] newval = newtable[col][newloc] ## Clean up three special cases of bad flags/comments in early data if col == 'EXPFLAG' and 'EFFTIME_ETC' in newtable.colnames and \ newtable['EFFTIME_ETC'][newloc] > 0. and 'aborted' in origval: origorigval = origval.copy() origval = origval[np.where(origval != 'aborted')] print("Identified outdated aborted exposure flag. " + "Removing that. Original set: " + f"{origorigval}, Updated origset: {origval}") if col == 'COMMENTS' and 'EFFTIME_ETC' in newtable.colnames \ and newtable['EFFTIME_ETC'][newloc] > 0. and \ 'EXPFLAG' in origtable.colnames \ and 'aborted' in origtable['EXPFLAG'][origloc]: origorigval = origval.copy() valcheck = np.array([('For EXPTIME:' not in val) for val in origval]) origval = origval[valcheck] print(f"Identified outdated aborted exptime COMMENT." + "Removing that. Original set: " + f"{origorigval}, Updated origset: {origval}") if col == 'HEADERERR' and 'PURPOSE:->' in origval: origorigval = origval.copy() valcheck = (np.array(origval) != 'PURPOSE:->') origval = origval[valcheck] print(f"Identified outdated PURPOSE null->null HEADERERR." + " Removing that. Original set: " + f"{origorigval}, Updated origset: {origval}") ## If columns differ and original isn't a default value, ## then take the original user-defined value if np.isscalar(origtable[col][origloc]): if origval != coldefs[col] and newval != origval: print(f"Difference detected for Night {night}, exp {expid}, " + f"col {col}: orig={origval}, new={newval}. " + "Taking the original value. ") newtable[col][newloc] = origval else: if not np.array_equal(origval, coldefs[col]) and \ not np.array_equal(newval, origval): print(f"Difference detected for Night {night}, exp {expid}, " + f"col {col}: orig={origval}, new={newval}. " + "Taking union of the two arrays.") combined_val = newval[newval != ''] for val in origval: if val != '' and val not in newval: combined_val = np.append(combined_val,[val]) newtable[col][newloc] = combined_val ## If just testing, print the table and a cell-by-cell equality test ## for the scalar columns ## If not testing, move the original table to an archived filename ## and save the updated table to the official exptable pathname if dry_run: print("\n\nOutput file would have been:") newtable.pprint_all() names = [col for col in newtable.colnames if col not in ['HEADERERR','EXPFLAG','COMMENTS']] t1 = newtable[names] t2 = load_table(temp_pathname, tabletype='exptab', use_specprod=usespecprod)[names] t1.values_equal(t2).pprint_all() else: ftime = time.strftime("%Y%m%d_%Hh%Mm") replaced_pathname = orig_pathname.replace(f".{orig_filetype}", f".replaced-{ftime}.{orig_filetype}") print(f"Moving original file from {orig_pathname} to {replaced_pathname}") os.rename(orig_pathname,replaced_pathname) time.sleep(0.1) out_pathname = orig_pathname.replace(f".{orig_filetype}", f".{out_filetype}") write_table(newtable, out_pathname) print(f"Updated file saved to {out_pathname}. Original archived as {replaced_pathname}") ## Cleanup the temporary table created with the fresh version of the ## create_exposure_table script os.remove(temp_pathname) print(f"Removed the temporary file {temp_pathname}") print("\n\n") ## Flush the outputs sys.stdout.flush() sys.stderr.flush() print("Exposure table regenerations complete")