Single Baseline LST-Stacker and Re-Inpainter¶
by Josh Dillon and Tyler Cox, last updated September 17, 2025
This notebook performs LST-stacking (a.k.a. LST-binning) of whole-JD, single baseline, all pol files. Most parameters are controlled by a toml config file, such as this one. In addition al single-baseline files, this notebook also requires UVFlag-compatible where_inpainted files which tell us where inpainting was previously done.
In addition to LST-stacking, which includes rephasing to a common grid, this notebook also performs re-inpainting. Data that are outliers among the other nights (in terms of a high modified $z$-score) and had previously been inpainted are now re-inpainted (on a whole band, single integration basis) using information from other nights, as well as feathering to prevent discontinuities. Next, a bit of "ex-painting" is done to ensure that all nights span the same frequency range (or are completely flagged). This is again informed by what's going on on other nights. Despite the potentially large amount of inpainting, inpainted data are considered to have Nsamples=0 in the final LST-stacked data products.
Finally, this notebook performs an optional per-night FR=0 filter, under the theory that per-night FR=0 systematics might vary from night to night in a way that can be mitigated with per-night filtering. This data product is saved separately.
Here's a set of links to skip to particular figures and tables:
• Figure 1: East-Polarized LST-Stacked Amplitude, Phase, and Nsamples after Re-Inpainting¶
• Figure 2: North-Polarized LST-Stacked Amplitude, Phase, and Nsamples after Re-Inpainting¶
• Figure 3: Modified z-Score Across Nights, Before and After Re-Inpainting¶
import time
tstart = time.time()
!hostname
herapost002
import os
os.environ['HDF5_USE_FILE_LOCKING'] = 'FALSE'
import h5py
import hdf5plugin # REQUIRED to have the compression plugins available
import numpy as np
import scipy
import copy
import toml
from astropy import units
from functools import reduce
import matplotlib.pyplot as plt
import matplotlib
import warnings
from pyuvdata import UVData
from hera_cal import lst_stack, utils, io, flag_utils
from hera_cal.lst_stack import calibration
from hera_cal.frf import sky_frates, get_FR_buffer_from_spectra
from hera_cal.lst_stack.binning import SingleBaselineStacker
from hera_qm.time_series_metrics import true_stretches
from hera_filters.dspec import fourier_filter, dpss_operator, sparse_linear_fit_2D
from IPython.display import display, HTML
%matplotlib inline
display(HTML("<style>.container { width:100% !important; }</style>"))
_ = np.seterr(all='ignore') # get rid of red warnings
%config InlineBackend.figure_format = 'retina'
Parse Options¶
toml_file = os.environ.get('TOML_FILE', '/lustre/aoc/projects/hera/h6c-analysis/IDR3/src/hera_pipelines/pipelines/h6c/idr3/v1/lstbin/single_bl_lst_stack.toml')
print(f'toml_file = "{toml_file}"')
baseline_string = os.environ.get('BASELINE_STRING', None)
print(f'baseline_string = "{baseline_string}"')
PRELIMINARY = (os.environ.get('PRELIMINARY', "FALSE").upper() == "TRUE")
print(f'PRELIMINARY = {PRELIMINARY}')
toml_file = "/lustre/aoc/projects/hera/h6c-analysis/IDR3/makeflow-lststack/IDR3/single_bl_lst_stack.toml" baseline_string = "0_4" PRELIMINARY = True
# get options from toml file, print them out, and update globals
toml_options = toml.load(toml_file)
print(f"Now setting the following global variables from {toml_file}:\n")
globals().update({'lst_branch_cut': toml_options['FILE_CFG']['lst_branch_cut']})
print(f"lst_branch_cut = {lst_branch_cut}")
globals().update({'where_inpainted_file_rules': toml_options['FILE_CFG']['where_inpainted_file_rules']})
print(f"where_inpainted_file_rules = {where_inpainted_file_rules}")
if PRELIMINARY:
# this is used for an initial stacking of a handful of baselines, which are then used for LSTCal
toml_options['LST_STACK_OPTS']['FNAME_FORMAT'] = toml_options['LST_STACK_OPTS']['FNAME_FORMAT'].replace('.sum.uvh5', '.preliminary.sum.uvh5')
for key, val in toml_options['LSTCAL_OPTS'].items():
if isinstance(val, str):
print(f'{key} = "{val}"')
else:
print(f'{key} = {val}')
globals().update(toml_options['LSTCAL_OPTS'])
for key, val in toml_options['LST_STACK_OPTS'].items():
if isinstance(val, str):
print(f'{key} = "{val}"')
else:
print(f'{key} = {val}')
globals().update(toml_options['LST_STACK_OPTS'])
Now setting the following global variables from /lustre/aoc/projects/hera/h6c-analysis/IDR3/makeflow-lststack/IDR3/single_bl_lst_stack.toml:
lst_branch_cut = 5.2
where_inpainted_file_rules = [['.inpainted.uvh5', '.where_inpainted.h5']]
NBLS_FOR_LSTCAL = 30
RUN_AMPLITUDE_CAL = True
RUN_TIP_TILT_PHASE_CAL = True
RUN_CROSS_POL_PHASE_CAL = True
INCLUDE_AUTOS = False
FREQ_SMOOTHING_SCALE = 30.0
TIME_SMOOTHING_SCALE = 2500
BLACKLIST_TIMESCALE_FACTOR = 10
BLACKLIST_RELATIVE_ERROR_THRESH = 0.25
WHERE_INPAINTED_WGTS = 0.0001
LSTCAL_FNAME_FORMAT = "single_bl_reinpaint_1000ns/zen.{night}.lstcal.hdf5"
OUTDIR = "/lustre/aoc/projects/hera/h6c-analysis/IDR3/lststack-outputs-full-season"
FNAME_FORMAT = "single_bl_reinpaint_1000ns/zen.LST.baseline.{bl_str}.preliminary.sum.uvh5"
USE_LSTCAL_GAINS = True
FM_LOW_FREQ = 87.5
FM_HIGH_FREQ = 108.0
EIGENVAL_CUTOFF = 1e-12
INPAINT_DELAY = 1000
AUTO_INPAINT_DELAY = 100
REINPAINT_AUTOS = False
INPAINT_WIDTH_FACTOR = 0.5
INPAINT_ZERO_DIST_WEIGHT = 0.01
MIN_NIGHTS_PER_BIN = 3
MAX_CHANNEL_NIGHTLY_FLAG_FRAC = 0.25
MOD_Z_TO_REINPAINT = 5
AUTO_FR_SPECTRUM_FILE = "/lustre/aoc/projects/hera/zmartino/hera_frf/spectra_cache/spectra_cache_hera_auto.h5"
GAUSS_FIT_BUFFER_CUT = 1e-05
CG_TOL = 1e-06
CG_ITER_LIM = 500
FR0_FILTER = True
FR0_HALFWIDTH = 0.01
# figure out outfiles
OUTFILE = os.path.join(OUTDIR, FNAME_FORMAT.replace('{bl_str}', baseline_string))
print(f'OUTFILE = "{OUTFILE}"')
if FR0_FILTER:
FR0_FILT_OUTFILE = OUTFILE.replace('.uvh5', '.FR0filt.uvh5')
print(f'FR0_FILT_OUTFILE = "{FR0_FILT_OUTFILE}"')
# if necessary, create the output directory
if not os.path.exists(os.path.dirname(OUTFILE)):
os.makedirs(os.path.dirname(OUTFILE), exist_ok=True)
OUTFILE = "/lustre/aoc/projects/hera/h6c-analysis/IDR3/lststack-outputs-full-season/single_bl_reinpaint_1000ns/zen.LST.baseline.0_4.preliminary.sum.uvh5" FR0_FILT_OUTFILE = "/lustre/aoc/projects/hera/h6c-analysis/IDR3/lststack-outputs-full-season/single_bl_reinpaint_1000ns/zen.LST.baseline.0_4.preliminary.sum.FR0filt.uvh5"
Load Data¶
# build configurator from toml file
configurator = lst_stack.config.LSTBinConfiguratorSingleBaseline.from_toml(toml_file)
if not PRELIMINARY and USE_LSTCAL_GAINS:
configurator.build_visfile_to_calfile_map(
os.path.join(OUTDIR, LSTCAL_FNAME_FORMAT)
)
cal_file_loader = calibration.load_single_baseline_lstcal_gains
else:
cal_file_loader = None
auto_baseline_string = [s for s in configurator.bl_to_file_map if (p := s.split('_'))[0] == p[1]][0]
# get key data properties from a singe file
hd = io.HERAData(configurator.bl_to_file_map[auto_baseline_string][0])
pol_convention = hd.pol_convention
df = np.median(np.diff(hd.freqs))
dlst = np.median(np.diff(hd.lsts))
lst_grid = lst_stack.config.make_lst_grid(dlst, begin_lst=0, lst_width=(2 * np.pi))
lst_bin_edges = np.concatenate([lst_grid - dlst / 2, (lst_grid[-1] + dlst / 2)[None]])
low_band = slice(0, np.searchsorted(hd.freqs, FM_LOW_FREQ * 1e6))
high_band = slice(np.searchsorted(hd.freqs, FM_HIGH_FREQ * 1e6), len(hd.freqs))
# load autocorrelations for weights
autos = SingleBaselineStacker.from_configurator(configurator,
auto_baseline_string,
lst_bin_edges,
lst_branch_cut=lst_branch_cut,
where_inpainted_file_rules=where_inpainted_file_rules,
cal_file_loader=cal_file_loader)
# compute average autocorrelations across nights
lst_avg_auto_data, lst_avg_auto_flags, lst_avg_auto_nsamples = autos.average_over_nights(inpainted_data_are_samples=True)
slice_kept = copy.deepcopy(autos.slice_kept)
auto_hd = copy.deepcopy(autos.hd)
auto_times_in_bins = copy.deepcopy(autos.times_in_bins)
auto_bin_lst = autos.bin_lst.copy()
del autos
Getting antpos from the first file only. This is almost always correct, but will be wrong if different files have different antenna_position arrays.
crosses = SingleBaselineStacker.from_configurator(configurator,
baseline_string,
lst_bin_edges,
lst_branch_cut=lst_branch_cut,
to_keep_slice=slice_kept,
where_inpainted_file_rules=where_inpainted_file_rules,
cal_file_loader=cal_file_loader)
Getting antpos from the first file only. This is almost always correct, but will be wrong if different files have different antenna_position arrays.
# convert bin_lsts to JD, starting with the first day
lat, lon, alt = hd.telescope.location_lat_lon_alt_degrees
bin_times = utils.LST2JD(auto_bin_lst, int(auto_times_in_bins[0][0]), allow_other_jd=True, latitude=lat, longitude=lon, altitude=alt)
Perform re-inpainting of data with high modified z-score relative to other nights¶
# compute modified z-scores across nights for each night
mod_zs = []
modz_const = 2**.5 * scipy.special.erfinv(.5)
for d, f in list(zip(crosses.data, crosses.flags)):
ma = np.ma.array(d, mask=f)
med = np.ma.median(ma, axis=0, keepdims=True)
MAD = np.ma.median(np.abs(ma - med), axis=0, keepdims=True)
mod_zs.append(modz_const * (ma - med) / MAD)
# perfrom fourier filtering with cached DPSS operators
CACHE = {}
def freq_filter(freqs, data, wgts, filter_half_widths=[INPAINT_DELAY * 1e-9], eigenval_cutoff=[EIGENVAL_CUTOFF]):
'''Thin wrapper around hera_filters.dspec.fourier_filter'''
return fourier_filter(freqs,
data,
wgts=wgts,
filter_centers=[0],
filter_half_widths=filter_half_widths,
mode='dpss_solve',
eigenval_cutoff=eigenval_cutoff,
suppression_factors=eigenval_cutoff,
max_contiguous_edge_flags=len(hd.freqs),
filter_dims=1,
cache_solver_products=False,
cache=CACHE)
# re-inpaint inpainted data with high z-scores
# Get antennas that make up the baseline
ai, aj = list(map(int, baseline_string.split('_')))
if ai != aj or REINPAINT_AUTOS:
for d, f, n, aa, mod_z, wip in list(zip(crosses.data, crosses.flags, crosses.nsamples, lst_avg_auto_data, mod_zs, crosses.where_inpainted)):
enough_nights = np.sum(~f, axis=0) >= MIN_NIGHTS_PER_BIN
f[:, ~enough_nights] = True # flag bins with not enough nights
to_reip = np.zeros_like(f)
for pol in crosses.hd.pols:
pidx = crosses.hd.pols.index(pol)
# get indices for indexing into autocorrelations for weights
p1, p2 = utils.split_pol(pol)
pidx1 = crosses.hd.pols.index(utils.join_pol(p1, p1))
pidx2 = crosses.hd.pols.index(utils.join_pol(p2, p2))
for band in [low_band, high_band]:
for tidx in range(mod_z.shape[0]):
if np.any(wip[tidx, band, pidx] &
(~f[tidx, band, pidx]) &
enough_nights[band, pidx] &
(np.abs(mod_z.data[tidx, band, pidx]) > MOD_Z_TO_REINPAINT)):
to_reip[tidx, band, pidx] = True
if np.any(to_reip[:, band, pidx]):
# weighted average data across nights, excluding nights that are to be reinpainted
nights_with_reip = np.any(to_reip[:, band, pidx], axis=1)
if np.all(f[~nights_with_reip, band, pidx]):
f[:, band, :] = True # there are no unflagged nights with consistently good z-scores, so flag the whole integration
continue
avg_data_here = np.nansum(np.where(f[~nights_with_reip, band, pidx], np.nan,
d[~nights_with_reip, band, pidx] * n[~nights_with_reip, band, pidx]), axis=0)
avg_data_here /= np.sum(np.where(f[~nights_with_reip, band, pidx], 0,
n[~nights_with_reip, band, pidx]), axis=0)
# fit that average with DPSS
samples_here = np.sum([n[tidx, band, pidx] * ~f[tidx, band, pidx]
for tidx in range(n.shape[0])], axis=0)
wgts = np.where(~np.isfinite(avg_data_here), 0, aa[band, pidx1]**-1 * aa[band, pidx2]**-1 * samples_here)
avg_mdl, *_ = freq_filter(hd.freqs[band], np.where(~np.isfinite(avg_data_here), 0, avg_data_here), wgts=wgts)
# perform re-inpainting with feathered weights
for tidx in range(d.shape[0]):
if np.any(to_reip[tidx, band, pidx]):
# figure out feathered weights
distances = flag_utils.distance_to_nearest_nonzero(~wip[tidx, band, pidx])
width = (1e-9 * INPAINT_DELAY)**-1 / df * INPAINT_WIDTH_FACTOR
rel_weights = (1 + np.exp(-np.log(INPAINT_ZERO_DIST_WEIGHT**-1 - 1) / width * (distances - width)))**-1
wgts_here = np.where(wip[tidx, band, pidx], wgts * rel_weights, wgts)
# re-inpaint with DPSS
to_fit = np.where(wip[tidx, band, pidx] | f[tidx, band, pidx],
avg_mdl, d[tidx, band, pidx])
ts = true_stretches(wgts_here > 0)[0]
assert len(true_stretches(wgts_here > 0)) == 1, "Expected only one stretch of non-zero wgts_here"
reip_mdl = np.zeros_like(to_fit)
reip_mdl[ts], *_ = freq_filter(hd.freqs[band][ts], to_fit[ts], wgts=wgts_here[ts])
d[tidx, band, pidx] = np.where(wip[tidx, band, pidx], reip_mdl, d[tidx, band, pidx])
# flag channels that are too often flagged across nights
fully_flagged = np.array([np.all(f, axis=0) for f in crosses.flags])
for pidx in range(fully_flagged.shape[-1]):
for band in [low_band, high_band]:
tslice = flag_utils.get_minimal_slices(fully_flagged[:, band, pidx])[0][0]
if tslice is not None:
too_often_flagged_chans = np.mean(fully_flagged[tslice, band, pidx], axis=0) > MAX_CHANNEL_NIGHTLY_FLAG_FRAC
for f in crosses.flags:
f[:, band, pidx] |= too_often_flagged_chans
# if the vast majority of the waterfall (typically defined by other pols) is flagged
if np.mean(fully_flagged[:, band, pidx]) > 0.95:
for f in crosses.flags:
f[:, band, :] = True
# Harmonize flags across polarization
for band in [low_band, high_band]:
for fi, f in enumerate(crosses.flags):
if np.any(np.all(f[:, band], axis=(0, 1))):
f[:, band, :] = True
all_flagged = np.array([np.all(f, axis=0) for f in crosses.flags])
def _max_abs_or_nan(mz: np.ndarray) -> np.ndarray:
if mz.shape[0] == 0:
return np.full(mz.shape[1:], np.nan)
return np.max(np.abs(mz), axis=0)
# compute summary statistics for modified z-scores, then delete them to save memory
max_mod_z_before = np.where(all_flagged, np.nan, np.array([_max_abs_or_nan(mz) for mz in mod_zs]))
del mod_zs
Expaint band edges (i.e. use other nights to extrapolate)¶
for d, f, n, aa, wip in list(zip(crosses.data, crosses.flags, crosses.nsamples, lst_avg_auto_data, crosses.where_inpainted)):
for pol in crosses.hd.pols:
pidx = crosses.hd.pols.index(pol)
# get indices for indexing into autocorrelations for weights
p1, p2 = utils.split_pol(pol)
pidx1 = crosses.hd.pols.index(utils.join_pol(p1, p1))
pidx2 = crosses.hd.pols.index(utils.join_pol(p2, p2))
for band in [low_band, high_band]:
d_here, f_here, n_here = d[:, band, pidx], f[:, band, pidx], n[:, band, pidx]
if np.all(f_here):
continue
night_to_last_unflagged = {}
night_to_first_unflagged = {}
for tidx in range(f.shape[0]):
# if np.all(f[tidx, band, pidx]) or not np.any(f[tidx, band, pidx]):
if np.all(f[tidx, band, pidx]):
continue
# find the first and last unflagged channels on this night, for this band and pol
unflagged_here = ~f[tidx, band, pidx]
night_to_first_unflagged[tidx] = unflagged_here.argmax()
night_to_last_unflagged[tidx] = len(unflagged_here) - 1 - unflagged_here[::-1].argmax(axis=-1)
def _expaint_edge(to_fit_slice, nights_to_avg, tidx):
# average over nights with more data
avg_data = np.nansum(d_here[nights_to_avg, to_fit_slice] * n_here[nights_to_avg, to_fit_slice], axis=0)
avg_data /= np.sum(n_here[nights_to_avg, to_fit_slice], axis=0)
wgts = (aa[band, pidx1]**-1 * aa[band, pidx2]**-1)[to_fit_slice] # don't need nsamples, because it's flat
avg_mdl, *_ = freq_filter(hd.freqs[band][to_fit_slice], avg_data, wgts=wgts)
# perform re-inpainting with feathered weights
distances = flag_utils.distance_to_nearest_nonzero(~f_here[tidx, to_fit_slice])
width = (1e-9 * INPAINT_DELAY)**-1 / df * INPAINT_WIDTH_FACTOR
rel_weights = (1 + np.exp(-np.log(INPAINT_ZERO_DIST_WEIGHT**-1 - 1) / width * (distances - width)))**-1
wgts_here = np.where(f_here[tidx, to_fit_slice], wgts * rel_weights, wgts)
# re-inpaint with DPSS
to_fit = np.where(f_here[tidx, to_fit_slice], avg_mdl, d_here[tidx, to_fit_slice])
xp_mdl, *_ = freq_filter(hd.freqs[band][to_fit_slice], to_fit, wgts=wgts_here)
# modify data, flags, and where_inpainted arrays in place
freq_indices = np.arange(len(hd.freqs))[band][to_fit_slice]
d[tidx, freq_indices, pidx] = np.where(f_here[tidx, to_fit_slice], xp_mdl, d_here[tidx, to_fit_slice])
f[tidx, freq_indices, pidx] = False
wip[tidx, freq_indices, pidx] = True
# first, perform ex-painting on the bottom of the band
sorted_nights = sorted(night_to_last_unflagged.keys(), key=lambda x: night_to_last_unflagged[x], reverse=True)
target_last_unflagged = night_to_last_unflagged[sorted_nights[0]]
for i, tidx in enumerate(sorted_nights):
if night_to_last_unflagged[tidx] == target_last_unflagged:
continue # no additional extrapolation necessary
# figure out which channels to fit on this particular night to get a good model to expaint with
Nchans_to_fit = (target_last_unflagged - night_to_last_unflagged[tidx])
if Nchans_to_fit < (2 / (INPAINT_DELAY * 1e-9) / df):
Nchans_to_fit += int(np.ceil(2 / (INPAINT_DELAY * 1e-9) / df))
else:
Nchans_to_fit *= 2
to_fit_slice = slice(target_last_unflagged - Nchans_to_fit + 1, target_last_unflagged + 1)
_expaint_edge(to_fit_slice, sorted_nights[:i], tidx)
# now, perform ex-painting on the top of the band
sorted_nights = sorted(night_to_first_unflagged.keys(), key=lambda x: night_to_first_unflagged[x])
target_first_unflagged = night_to_first_unflagged[sorted_nights[0]]
for i, tidx in enumerate(sorted_nights):
if night_to_first_unflagged[tidx] == target_first_unflagged:
continue # no additional extrapolation necessary
# figure out which channels to fit on this particular night to get a good model to expaint with
Nchans_to_fit = (night_to_first_unflagged[tidx] - target_first_unflagged)
if Nchans_to_fit < (2 / (INPAINT_DELAY * 1e-9) / df):
Nchans_to_fit += int(np.ceil(2 / (INPAINT_DELAY * 1e-9) / df))
else:
Nchans_to_fit *= 2
to_fit_slice = slice(target_first_unflagged, target_first_unflagged + Nchans_to_fit)
_expaint_edge(to_fit_slice, sorted_nights[:i], tidx)
2D-informed expainting to get even band edges¶
FR_CENTER_AND_HW_CACHE = {}
def cache_fr_center_and_hw(hd, antpair, tslice, band):
'''Figure out the range of FRs in Hz spanned for a given band and tslice, buffered by the size of the autocorrelation FR kernel,
and stores the value in FR_CENTER_AND_HW_CACHE (if it hasn't already been computed.'''
if (tslice is not None) and (band is not None) and ((antpair, tslice, band) not in FR_CENTER_AND_HW_CACHE):
# calculate fringe rate center and half-width and then update cache
fr_buffer = get_FR_buffer_from_spectra(AUTO_FR_SPECTRUM_FILE, hd.times[tslice], hd.freqs[band],
gauss_fit_buffer_cut=GAUSS_FIT_BUFFER_CUT)
hd_here = hd.select(inplace=False, frequencies=hd.freqs[band])
fr_center = list(sky_frates(hd_here)[0].values())[0] / 1e3 # converts to Hz
fr_hw = (list(sky_frates(hd_here)[1].values())[0] + fr_buffer) / 1e3
FR_CENTER_AND_HW_CACHE[(antpair, tslice, band)] = fr_center, fr_hw
def fit_2D_DPSS(data, weights, filter_delay, tslices, bands, **kwargs):
'''Fit a 2D DPSS model to all the baselines in data. The time-dimension is based on sky FRs
and the FR spectrum of the autos. fr_centers and fr_hws are drawn from FR_CENTER_AND_HW_CACHE.
Arguments:
data: datacontainer mapping baselines to complex visibility waterfalls
weights: datacontainer mapping baselines to real weight waterfalls.
filter_delay: maximum delay in ns for the 2D filter
tslices: dictionary mapping bl to time slices corresponding to low and high bands
bands: dictionary mapping bl to low band and high band frequency slices
kwargs: kwargs to pass into sparse_linear_fit_2D()
Returns:
dpss_fit: datacontainer mapping baselines to 2D DPSS models
'''
dpss_fit = copy.deepcopy(data)
for bl in data.keys():
# set to all nans by default
dpss_fit[bl] *= np.nan
for tslice, band in zip(tslices[bl], bands[bl]):
if (tslice is None) or (band is None) or np.all(weights[bl][tslice, band] == 0):
continue
# perform 2D DPSS filter
fr_center, fr_hw = FR_CENTER_AND_HW_CACHE[(bl[0:2], tslice, band)]
time_filters, _ = dpss_operator((data.times[tslice] - data.times[tslice][0]) * 3600 * 24,
[fr_center], [fr_hw], eigenval_cutoff=[EIGENVAL_CUTOFF])
freq_filters, _ = dpss_operator(data.freqs[band], [0.0], [filter_delay / 1e9], eigenval_cutoff=[EIGENVAL_CUTOFF])
fit, meta = sparse_linear_fit_2D(
data=data[bl][tslice, band],
weights=weights[bl][tslice, band],
axis_1_basis=time_filters,
axis_2_basis=freq_filters,
precondition_solver=True,
iter_lim=CG_ITER_LIM,
**kwargs,
)
dpss_fit[bl][tslice, band] = time_filters.dot(fit).dot(freq_filters.T)
return dpss_fit
nnights_unflagged = np.array([np.sum((~f[:, :, :]).astype(float), axis=0) for f in crosses.flags])
ntimes_unflagged = np.sum(nnights_unflagged, axis=0)
# perfrom feathered ex-painting on all pols, bands (top and bottom), and nights
for pol in crosses.hd.pols:
pidx = crosses.hd.pols.index(pol)
# get indices for indexing into autocorrelations for weights
p1, p2 = utils.split_pol(pol)
pidx1 = crosses.hd.pols.index(utils.join_pol(p1, p1))
pidx2 = crosses.hd.pols.index(utils.join_pol(p2, p2))
for band in [low_band, high_band]:
# find the range of frequencies that could need explainting
max_unflagged = np.max(ntimes_unflagged[band, pidx])
if max_unflagged == 0:
continue
first_unflagged_channel = np.where(ntimes_unflagged[band, pidx] > 0)[0][0]
first_minimally_flagged_channel = np.where(ntimes_unflagged[band, pidx] == max_unflagged)[0][0]
last_minimally_flagged_channel = np.where(ntimes_unflagged[band, pidx] == max_unflagged)[0][-1]
last_unflagged_channel = np.where(ntimes_unflagged[band, pidx] > 0)[0][-1]
tslice = flag_utils.get_minimal_slices(nnights_unflagged[:, band, pidx] == 0)[0][0]
def _expaint_2D(fslice):
'''Fits a 2D DPSS model to the data we do have, then performs feathered ex-painting'''
cache_fr_center_and_hw(hd, hd.antpairs[0], tslice, fslice)
data_here = np.array([np.nansum(np.where(f[:, fslice, pidx], np.nan,
d[:, fslice, pidx] * n[:, fslice, pidx]), axis=0)
for d, f, n in zip(crosses.data, crosses.flags, crosses.nsamples)])[tslice]
nsamples_here = np.array([np.sum(n[:, fslice, pidx] * ~f[:, fslice, pidx], axis=0)
for f, n in zip(crosses.flags, crosses.nsamples)])[tslice]
data_here = np.where(nsamples_here > 0, data_here / nsamples_here, 0.0)
wgts_here = np.array([aa[fslice, pidx1]**-1 * aa[fslice, pidx2]**-1 for aa in lst_avg_auto_data])[tslice] * nsamples_here
wgts_here = np.where(np.isfinite(wgts_here), wgts_here, 0.0)
# perform 2D DPSS filter
fr_center, fr_hw = FR_CENTER_AND_HW_CACHE[(hd.antpairs[0], tslice, fslice)]
time_filters, _ = dpss_operator((bin_times[tslice] - bin_times[tslice][0]) * 3600 * 24,
[fr_center], [fr_hw], eigenval_cutoff=[EIGENVAL_CUTOFF])
freq_filters, _ = dpss_operator(hd.freqs[fslice], [0.0], [INPAINT_DELAY / 1e9], eigenval_cutoff=[EIGENVAL_CUTOFF])
fit, meta = sparse_linear_fit_2D(data=data_here, weights=wgts_here, precondition_solver=True,
axis_1_basis=time_filters, axis_2_basis=freq_filters,
iter_lim=CG_ITER_LIM, atol=CG_TOL, btol=CG_TOL)
dpss_fit = time_filters.dot(fit).dot(freq_filters.T)
# perform feathered expainting on a per LST and per night basis
for lidx, (d, f, aa, wip) in list(enumerate(zip(crosses.data, crosses.flags, lst_avg_auto_data, crosses.where_inpainted)))[tslice]:
for tidx in range(d.shape[0]):
if np.any(f[tidx, fslice, pidx]) and not np.all(f[tidx, fslice, pidx]):
wgts = aa[fslice, pidx1]**-1 * aa[fslice, pidx2]**-1 # don't need nsamples, because it's flat
wgts[~np.isfinite(wgts)] = np.min(wgts[np.isfinite(wgts)]) # handle case where we're expainting beyond autos
distances = flag_utils.distance_to_nearest_nonzero(~f[tidx, fslice, pidx])
width = (1e-9 * INPAINT_DELAY)**-1 / df * INPAINT_WIDTH_FACTOR
rel_weights = (1 + np.exp(-np.log(INPAINT_ZERO_DIST_WEIGHT**-1 - 1) / width * (distances - width)))**-1
to_fit = np.where(f[tidx, fslice, pidx], dpss_fit[lidx - tslice.start], d[tidx, fslice, pidx])
wgts = np.where(f[tidx, fslice, pidx], wgts * rel_weights, wgts)
xp_mdl, *_ = freq_filter(hd.freqs[fslice], to_fit, wgts=wgts)
# modify data, flags, and where_inpainted arrays in place
d[tidx, fslice, pidx] = np.where(f[tidx, fslice, pidx], xp_mdl, d[tidx, fslice, pidx])
wip[tidx, fslice, pidx] = np.where(f[tidx, fslice, pidx], True, wip[tidx, fslice, pidx])
f[tidx, fslice, pidx] = False
if first_unflagged_channel < first_minimally_flagged_channel:
Nchans_to_fit = first_minimally_flagged_channel - first_unflagged_channel
if Nchans_to_fit < (2 / (INPAINT_DELAY * 1e-9) / df):
Nchans_to_fit += int(np.ceil(2 / (INPAINT_DELAY * 1e-9) / df))
else:
Nchans_to_fit *= 2
fslice = slice(band.start + first_unflagged_channel,
min(band.start + first_unflagged_channel + Nchans_to_fit, band.stop))
_expaint_2D(fslice)
if last_minimally_flagged_channel < last_unflagged_channel:
Nchans_to_fit = last_unflagged_channel - last_minimally_flagged_channel
if Nchans_to_fit < (2 / (INPAINT_DELAY * 1e-9) / df):
Nchans_to_fit += int(np.ceil(2 / (INPAINT_DELAY * 1e-9) / df))
else:
Nchans_to_fit *= 2
fslice = slice(max(band.start + last_unflagged_channel + 1 - Nchans_to_fit, band.start),
band.start + last_unflagged_channel + 1)
_expaint_2D(fslice)
Mean of empty slice
Mean of empty slice
Now Actually Average Over Nights¶
lst_avg_data, lst_avg_flags, lst_avg_nsamples = crosses.average_over_nights()
mod_zs_after = []
modz_const = 2**.5 * scipy.special.erfinv(.5)
for d, f in list(zip(crosses.data, crosses.flags)):
ma = np.ma.array(d, mask=f)
med = np.ma.median(ma, axis=0, keepdims=True)
MAD = np.ma.median(np.abs(ma - med), axis=0, keepdims=True)
mod_zs_after.append(modz_const * (ma - med) / MAD)
# compute summary statistics for modified z-scores after re-inpainting, then delete them to save memory
max_mod_z_after = np.where(lst_avg_flags, np.nan, np.array([_max_abs_or_nan(mz) for mz in mod_zs_after]))
del mod_zs_after
FR=0 Filter¶
# compute 2D DPSS smooth autocorrelations to use for weighting without introducing spectral structure
if FR0_FILTER:
filtered_autos = np.full_like(lst_avg_auto_data, np.nan)
for pol in ['ee', 'nn']:
pidx = auto_hd.pols.index(pol)
tslices, fslices = flag_utils.get_minimal_slices(lst_avg_flags[:, :, pidx], freqs=auto_hd.freqs, freq_cuts=[(FM_HIGH_FREQ + FM_LOW_FREQ) * .5e6])
for tslice, fslice in zip(tslices, fslices):
if (tslice is None) or (fslice is None):
continue
cache_fr_center_and_hw(auto_hd, auto_hd.antpairs[0], tslice, fslice)
autos_here = lst_avg_auto_data[tslice, fslice, pidx]
nsamples_here = lst_avg_auto_nsamples[tslice, fslice, pidx]
weights_here = autos_here**-2 * nsamples_here
fr_center, fr_hw = FR_CENTER_AND_HW_CACHE[(auto_hd.antpairs[0], tslice, fslice)]
time_filters, _ = dpss_operator((bin_times[tslice] - bin_times[tslice][0]) * 3600 * 24,
[fr_center], [fr_hw], eigenval_cutoff=[EIGENVAL_CUTOFF])
freq_filters, _ = dpss_operator(auto_hd.freqs[fslice], [0.0], [AUTO_INPAINT_DELAY / 1e9], eigenval_cutoff=[EIGENVAL_CUTOFF])
fit, meta = sparse_linear_fit_2D(data=autos_here, weights=weights_here, precondition_solver=True,
axis_1_basis=time_filters, axis_2_basis=freq_filters,
iter_lim=CG_ITER_LIM, atol=CG_TOL, btol=CG_TOL)
dpss_fit = time_filters.dot(fit).dot(freq_filters.T)
filtered_autos[tslice, fslice, pidx] = np.abs(dpss_fit)
# Perform FR=0 filter on crosses on a per-night basis
if FR0_FILTER:
for jd in [int(jd) for jd in crosses.configurator.nights]:
data_here, flags_here, nsamples_here, tindices = [], [], [], []
for d, f, n, tib in zip(crosses.data, crosses.flags, crosses.nsamples, crosses.times_in_bins):
# find the indices of the times that are in this JD
tidx = np.argwhere(np.floor(tib).astype(int) == jd)
if len(tidx) > 0:
data_here.append(np.where(f[tidx[0][0]], np.nan, d[tidx[0][0]]))
flags_here.append(f[tidx[0][0]])
nsamples_here.append(n[tidx[0][0]])
tindices.append(tidx[0][0])
else:
data_here.append(np.full(d.shape[1:], np.nan))
flags_here.append(np.full(f.shape[1:], True))
nsamples_here.append(np.zeros(n.shape[1:], dtype=n.dtype))
tindices.append(None)
data_here, flags_here, nsamples_here = np.array(data_here), np.array(flags_here), np.array(nsamples_here)
for pol in crosses.hd.pols:
pidx = crosses.hd.pols.index(pol)
# get indices for indexing into autocorrelations for weights
p1, p2 = utils.split_pol(pol)
pidx1 = crosses.hd.pols.index(utils.join_pol(p1, p1))
pidx2 = crosses.hd.pols.index(utils.join_pol(p2, p2))
weights_here = np.where(flags_here[:, :, pidx] | ~np.isfinite(filtered_autos[:, :, pidx1]) | ~np.isfinite(filtered_autos[:, :, pidx2]),
0, nsamples_here[:, :, pidx] * filtered_autos[:, :, pidx1]**-1 * filtered_autos[:, :, pidx2]**-1)
tslices, fslices = flag_utils.get_minimal_slices(flags_here[:, :, pidx], freqs=auto_hd.freqs, freq_cuts=[(FM_HIGH_FREQ + FM_LOW_FREQ) * .5e6])
for tslice, fslice in zip(tslices, fslices):
if (tslice is None) or (fslice is None):
continue
d_mdl, _, info = fourier_filter(bin_times[tslice] * 24 * 60 * 60,
np.where(weights_here[tslice, fslice] == 0, 0, data_here[tslice, fslice, pidx]),
wgts=weights_here[tslice, fslice],
filter_centers=[0],
filter_half_widths=[FR0_HALFWIDTH / 1000],
mode='dpss_solve',
eigenval_cutoff=[EIGENVAL_CUTOFF],
suppression_factors=[EIGENVAL_CUTOFF],
max_contiguous_edge_flags=len(bin_times[tslice]),
filter_dims=0)
data_here[tslice, fslice, pidx] -= d_mdl
# update data in crosses
for d, d_filt, tidx in zip(crosses.data, data_here, tindices):
if tidx is not None:
d[tidx, :, :] = d_filt
if FR0_FILTER:
lst_avg_fr0_filt_data, _, _ = crosses.average_over_nights()
Visualization¶
def plot_waterfall(data, flags, nsamples, freqs, lsts, bl_label):
'''Plots data (amplitude and phase) as well as nsamples waterfalls for a baseline.'''
if np.all(flags):
print('This waterfall is entirely flagged. Nothing to plot.')
return
lsts_in_hours = np.where(lsts > lsts[-1], lsts - 2 * np.pi, lsts * 12 / np.pi)
extent = [freqs[0]/1e6, freqs[-1]/1e6, lsts_in_hours[-1], lsts_in_hours[0]]
fig, axes = plt.subplots(1, 3, figsize=(20, 12), sharex=True, sharey=True, gridspec_kw={'wspace': 0}, dpi=100)
im = axes[0].imshow(np.where(flags, np.nan, np.abs(data)), aspect='auto', norm=matplotlib.colors.LogNorm(), interpolation='none', cmap='inferno', extent=extent)
fig.colorbar(im, ax=axes[0], location='top', pad=.02).set_label(f'{bl_label}: Amplitude (Jy)', fontsize=16)
im = axes[1].imshow(np.where(flags, np.nan, np.angle(data)), aspect='auto', cmap='twilight', interpolation='none', extent=extent)
fig.colorbar(im, ax=axes[1], location='top', pad=.02).set_label(f'{bl_label}: Phase (Radians)', fontsize=16)
im = axes[2].imshow(np.where(flags, np.nan, nsamples), aspect='auto', interpolation='none', extent=extent)
fig.colorbar(im, ax=axes[2], location='top', pad=.02).set_label(f'{bl_label}: Number of Samples', fontsize=16)
plt.tight_layout()
axes[0].set_ylabel('LST (hours)')
for ax in axes:
ax.set_xlabel('Frequency (MHz)')
with warnings.catch_warnings():
warnings.simplefilter("ignore", UserWarning)
ax.set_yticklabels([f'{(int(val) if np.isclose(val, int(val)) else val) % 24:n}' for val in ax.get_yticks()])
plt.tight_layout()
Figure 1: East-Polarized LST-Stacked Amplitude, Phase, and Nsamples after Re-Inpainting¶
pidx = hd.pols.index('ee')
plot_waterfall(lst_avg_data[:, :, pidx], lst_avg_flags[:, :, pidx], lst_avg_nsamples[:, :, pidx],
hd.freqs, crosses.bin_lst, crosses.hd.antpairs[0] + ('ee',))