Calibration Smoothing¶
by Josh Dillon, last updated February 26, 2025
This notebook runs calibration smoothing to the gains coming out of file_calibration notebook. It removes any flags founds on by that notebook and replaces them with flags generated from full_day_rfi and full_day_antenna_flagging. It also plots the results for a couple of antennas.
Here's a set of links to skip to particular figures and tables:
• Figure 1: Identifying and Blacklisting abscal Failures¶
• Figure 2: Full-Day Gain Amplitudes Before and After smooth_cal¶
• Figure 3: Full-Day Gain Phases Before and After smooth_cal¶
• Figure 4: Full-Day $\chi^2$ / DoF Waterfall from Redundant-Baseline Calibration¶
• Figure 5: Average $\chi^2$ per Antenna vs. Time and Frequency¶
import time
tstart = time.time()
import os
os.environ['HDF5_USE_FILE_LOCKING'] = 'FALSE'
import h5py
import hdf5plugin  # REQUIRED to have the compression plugins available
import numpy as np
import glob
import copy
import warnings
import matplotlib
import matplotlib.pyplot as plt
from hera_cal import io, utils, smooth_cal
from hera_qm.time_series_metrics import true_stretches
%matplotlib inline
from IPython.display import display, HTML
Parse inputs¶
# get files
SUM_FILE = os.environ.get("SUM_FILE", None)
# SUM_FILE = '/lustre/aoc/projects/hera/h6c-analysis/IDR2/2459867/zen.2459867.43004.sum.uvh5'
SUM_SUFFIX = os.environ.get("SUM_SUFFIX", 'sum.uvh5')
CAL_SUFFIX = os.environ.get("CAL_SUFFIX", 'sum.omni.calfits')
SMOOTH_CAL_SUFFIX = os.environ.get("SMOOTH_CAL_SUFFIX", 'sum.smooth.calfits')
ANT_FLAG_SUFFIX = os.environ.get("ANT_FLAG_SUFFIX", 'sum.antenna_flags.h5')
RFI_FLAG_SUFFIX = os.environ.get("RFI_FLAG_SUFFIX", 'sum.flag_waterfall.h5')
FREQ_SMOOTHING_SCALE = float(os.environ.get("FREQ_SMOOTHING_SCALE", 30.0)) # MHz
TIME_SMOOTHING_SCALE = float(os.environ.get("TIME_SMOOTHING_SCALE", 1e4)) # seconds
EIGENVAL_CUTOFF = float(os.environ.get("EIGENVAL_CUTOFF", 1e-12))
PER_POL_REFANT = os.environ.get("PER_POL_REFANT", "False").upper() == "TRUE"
BLACKLIST_TIMESCALE_FACTOR = float(os.environ.get("BLACKLIST_TIMESCALE_FACTOR", 10))
BLACKLIST_RELATIVE_ERROR_THRESH = float(os.environ.get("BLACKLIST_RELATIVE_ERROR_THRESH", 1))
BLACKLIST_RELATIVE_WEIGHT = float(os.environ.get("BLACKLIST_RELATIVE_WEIGHT", 0.1))
FM_LOW_FREQ = float(os.environ.get("FM_LOW_FREQ", 87.5)) # in MHz
FM_HIGH_FREQ = float(os.environ.get("FM_HIGH_FREQ", 108.0)) # in MHz
for setting in ['SUM_FILE', 'SUM_SUFFIX', 'CAL_SUFFIX', 'SMOOTH_CAL_SUFFIX', 'ANT_FLAG_SUFFIX',
                'RFI_FLAG_SUFFIX', 'FREQ_SMOOTHING_SCALE', 'TIME_SMOOTHING_SCALE', 'EIGENVAL_CUTOFF', 
                'PER_POL_REFANT', 'BLACKLIST_TIMESCALE_FACTOR', 'BLACKLIST_RELATIVE_ERROR_THRESH', 
                'BLACKLIST_RELATIVE_WEIGHT', 'FM_LOW_FREQ', 'FM_HIGH_FREQ',]:
    if issubclass(type(eval(setting)), str):
        print(f'{setting} = "{eval(setting)}"')
    else:
        print(f'{setting} = {eval(setting)}')
SUM_FILE = "/lustre/aoc/projects/hera/h6c-analysis/IDR3/2459870/zen.2459870.25293.sum.uvh5" SUM_SUFFIX = "sum.uvh5" CAL_SUFFIX = "sum.omni.calfits" SMOOTH_CAL_SUFFIX = "sum.smooth.calfits" ANT_FLAG_SUFFIX = "sum.antenna_flags.h5" RFI_FLAG_SUFFIX = "sum.flag_waterfall.h5" FREQ_SMOOTHING_SCALE = 30.0 TIME_SMOOTHING_SCALE = 10000.0 EIGENVAL_CUTOFF = 1e-12 PER_POL_REFANT = False BLACKLIST_TIMESCALE_FACTOR = 10.0 BLACKLIST_RELATIVE_ERROR_THRESH = 1.0 BLACKLIST_RELATIVE_WEIGHT = 0.1 FM_LOW_FREQ = 87.5 FM_HIGH_FREQ = 108.0
Load files and select reference antenna(s)¶
sum_glob = '.'.join(SUM_FILE.split('.')[:-3]) + '.*.' + SUM_SUFFIX
cal_files_glob = sum_glob.replace(SUM_SUFFIX, CAL_SUFFIX)
cal_files = sorted(glob.glob(cal_files_glob))
print(f'Found {len(cal_files)} *.{CAL_SUFFIX} files starting with {cal_files[0]}.')
Found 1862 *.sum.omni.calfits files starting with /lustre/aoc/projects/hera/h6c-analysis/IDR3/2459870/zen.2459870.25293.sum.omni.calfits.
rfi_flag_files_glob = sum_glob.replace(SUM_SUFFIX, RFI_FLAG_SUFFIX)
rfi_flag_files = sorted(glob.glob(rfi_flag_files_glob))
print(f'Found {len(rfi_flag_files)} *.{RFI_FLAG_SUFFIX} files starting with {rfi_flag_files[0]}.')
Found 1862 *.sum.flag_waterfall.h5 files starting with /lustre/aoc/projects/hera/h6c-analysis/IDR3/2459870/zen.2459870.25293.sum.flag_waterfall.h5.
ant_flag_files_glob = sum_glob.replace(SUM_SUFFIX, ANT_FLAG_SUFFIX)
ant_flag_files = sorted(glob.glob(ant_flag_files_glob))
print(f'Found {len(ant_flag_files)} *.{ANT_FLAG_SUFFIX} files starting with {ant_flag_files[0]}.')
Found 1862 *.sum.antenna_flags.h5 files starting with /lustre/aoc/projects/hera/h6c-analysis/IDR3/2459870/zen.2459870.25293.sum.antenna_flags.h5.
cs = smooth_cal.CalibrationSmoother(cal_files, flag_file_list=(ant_flag_files + rfi_flag_files),
                                    ignore_calflags=True, pick_refant=False, load_chisq=True, load_cspa=True)
cs.refant = smooth_cal.pick_reference_antenna(cs.gain_grids, cs.flag_grids, cs.freqs, per_pol=True)
for pol in cs.refant:
    print(f'Reference antenna {cs.refant[pol][0]} selected for smoothing {pol} gains.')
if not PER_POL_REFANT:
    # in this case, rephase both pols separately before smoothing, but also smooth the relative polarization calibration phasor
    overall_refant = smooth_cal.pick_reference_antenna({ant: cs.gain_grids[ant] for ant in cs.refant.values()}, 
                                                       {ant: cs.flag_grids[ant] for ant in cs.refant.values()}, 
                                                       cs.freqs, per_pol=False)
    print(f'Overall reference antenna {overall_refant} selected.')
    other_refant = [ant for ant in cs.refant.values() if ant != overall_refant][0]
    relative_pol_phasor = cs.gain_grids[overall_refant] * cs.gain_grids[other_refant].conj() # TODO: is this conjugation right?
    relative_pol_phasor /= np.abs(relative_pol_phasor)
Reference antenna 85 selected for smoothing Jee gains. Reference antenna 151 selected for smoothing Jnn gains.
Overall reference antenna (np.int64(85), 'Jee') selected.
# duplicate a small number of abscal gains for plotting
antnums = set([ant[0] for ant in cs.ants])
flags_per_antnum = [np.sum(cs.flag_grids[ant, 'Jnn']) + np.sum(cs.flag_grids[ant, 'Jee']) for ant in antnums]
refant_nums = [ant[0] for ant in cs.refant.values()]
candidate_ants = [ant for ant, nflags in zip(antnums, flags_per_antnum) if (ant not in refant_nums) and (nflags <= np.percentile(flags_per_antnum, 25))
                  and not np.all(cs.flag_grids[ant, 'Jee']) and not np.all(cs.flag_grids[ant, 'Jnn'])]
ants_to_plot = [func(candidate_ants) for func in (np.min, np.max)]
abscal_gains = {}
for pol in ['Jee', 'Jnn']:
    for antnum in ants_to_plot:
        refant_here = (cs.refant[pol] if PER_POL_REFANT else overall_refant)
        abscal_gains[antnum, pol] = cs.gain_grids[(antnum, pol)] * np.abs(cs.gain_grids[refant_here]) / cs.gain_grids[refant_here]
cs.rephase_to_refant(propagate_refant_flags=True)
lst_grid = utils.JD2LST(cs.time_grid) * 12 / np.pi
lst_grid[lst_grid > lst_grid[-1]] -= 24
Find consistent outliers in relative error after a coarse smoothing¶
These are typically a sign of failures of abscal.
relative_error_samples = {pol: np.zeros_like(cs.gain_grids[cs.refant[pol]], dtype=float) for pol in ['Jee', 'Jnn']}
sum_relative_error = {pol: np.zeros_like(cs.gain_grids[cs.refant[pol]], dtype=float) for pol in ['Jee', 'Jnn']}
# perform a 2D DPSS filter with a BLACKLIST_TIMESCALE_FACTOR longer timescale, averaging the results per-pol
for ant in cs.gain_grids:
    if np.all(cs.flag_grids[ant]):
        continue
    filtered, _ = smooth_cal.time_freq_2D_filter(gains=cs.gain_grids[ant], 
                                                 wgts=(~cs.flag_grids[ant]).astype(float),
                                                 freqs=cs.freqs,
                                                 times=cs.time_grid,
                                                 freq_scale=FREQ_SMOOTHING_SCALE,
                                                 time_scale=TIME_SMOOTHING_SCALE * BLACKLIST_TIMESCALE_FACTOR,
                                                 eigenval_cutoff=EIGENVAL_CUTOFF,
                                                 method='DPSS', 
                                                 fit_method='lu_solve', 
                                                 fix_phase_flips=True, 
                                                 flag_phase_flip_ints=True,
                                                 skip_flagged_edges=True, 
                                                 freq_cuts=[(FM_LOW_FREQ + FM_HIGH_FREQ) * .5e6],
                                                ) 
    relative_error = np.where(cs.flag_grids[ant], 0, np.abs(cs.gain_grids[ant] - filtered) / np.abs(filtered))
    relative_error_samples[ant[1]] += (~cs.flag_grids[ant]).astype(float)
    sum_relative_error[ant[1]] += relative_error
# figure out per-antpol cuts for where to set weights to 0 for the main smooth_cal (but not necessarily flags)
cs.blacklist_wgt = BLACKLIST_RELATIVE_WEIGHT
for pol in ['Jee', 'Jnn']:
    avg_rel_error = sum_relative_error[pol] / relative_error_samples[pol]
    to_blacklist = np.where(relative_error_samples[pol] > 0, avg_rel_error > BLACKLIST_RELATIVE_ERROR_THRESH, False)
    for ant in cs.ants:
        if ant[1] == pol:
            cs.waterfall_blacklist[ant] = to_blacklist
invalid value encountered in divide
def plot_relative_error():
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        fig, axes = plt.subplots(1, 3, figsize=(14, 7))
        extent = [cs.freqs[0] / 1e6, cs.freqs[-1] / 1e6, lst_grid[-1], lst_grid[0]]
        cmap = plt.get_cmap('Greys', 256)
        cmap.set_over('red')
        for ax, pol in zip(axes[0:2], ['Jee', 'Jnn']):
            to_plot = sum_relative_error[pol] / relative_error_samples[pol]
            im = ax.imshow(np.where(np.isfinite(to_plot), to_plot, np.nan), aspect='auto', interpolation='none', 
                           vmin=0, vmax=BLACKLIST_RELATIVE_ERROR_THRESH, extent=extent, cmap=cmap)
            ax.set_title(pol)
            ax.set_yticklabels(ax.get_yticks() % 24)
            ax.set_ylabel('LST (hours)')
            ax.set_xlabel('Frequency (MHz)')
        plt.colorbar(im, ax=axes[0:2], location='top', extend='max', label='Average Relative Error on Initial Smoothing')
        
        for pol in ['Jee', 'Jnn']:
            axes[2].hist((sum_relative_error[pol] / relative_error_samples[pol]).ravel(), bins=np.arange(0,2,.01), alpha=.5, label=pol)
        axes[2].set_yscale('log')
        axes[2].set_ylabel('Number of Waterfall Pixels')
        axes[2].set_xlabel('Relative Error')
        axes[2].axvline(BLACKLIST_RELATIVE_ERROR_THRESH, ls='--', c='r', label='Blacklist Threshold')
        axes[2].legend()
Figure 1: Identifying and Blacklisting abscal Failures¶
This plot highlights regions of the waterfall that are per-polarization blacklisted (i.e. given 0 weight in the main smooth_cal fit, but not necessarily flagged). This is usually a sign of problems with abscal and often occurs because
plot_relative_error()
Perform smoothing¶
if not PER_POL_REFANT:
    # treat the relative_pol_phasor as if it were antenna -1
    cs.gain_grids[(-1, other_refant[1])] = relative_pol_phasor
    cs.flag_grids[(-1, other_refant[1])] = cs.flag_grids[overall_refant] | cs.flag_grids[other_refant]
    cs.waterfall_blacklist[(-1, other_refant[1])] = cs.waterfall_blacklist[cs.ants[0][0], 'Jee'] | cs.waterfall_blacklist[cs.ants[0][0], 'Jnn'] 
cs.time_freq_2D_filter(freq_scale=FREQ_SMOOTHING_SCALE,
                       time_scale=TIME_SMOOTHING_SCALE,
                       eigenval_cutoff=EIGENVAL_CUTOFF,
                       method='DPSS', 
                       fit_method='lu_solve',
                       fix_phase_flips=True,
                       flag_phase_flip_ints=True,
                       skip_flagged_edges=True,
                       freq_cuts=[(FM_LOW_FREQ + FM_HIGH_FREQ) * .5e6],
                      )
1 phase flips detected on antenna (np.int64(144), 'Jnn'). A total of 2189 integrations were phase-flipped relative to the 0th integration between 2459870.424619694 and 2459870.6693433714.
if not PER_POL_REFANT:
    # put back in the smoothed phasor, ensuring the amplitude is 1 and that data are flagged anywhere either polarization's refant is flagged
    smoothed_relative_pol_phasor = cs.gain_grids[(-1, other_refant[-1])] / np.abs(cs.gain_grids[(-1, other_refant[-1])])
    for ant in cs.gain_grids:
        if ant[0] >= 0 and ant[1] == other_refant[1]:
            cs.gain_grids[ant] /= smoothed_relative_pol_phasor
        cs.flag_grids[ant] |= (cs.flag_grids[(-1, other_refant[1])])
    cs.refant = overall_refant
Plot results¶
def amplitude_plot(ant_to_plot):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        # Pick vmax to not saturate 90% of the abscal gains
        vmax = np.max([np.percentile(np.abs(cs.gain_grids[ant_to_plot, pol][~cs.flag_grids[ant_to_plot, pol]]), 99) for pol in ['Jee', 'Jnn']])
        display(HTML(f'<h2>Antenna {ant_to_plot} Amplitude Waterfalls</h2>'))    
        # Plot abscal gain amplitude waterfalls for a single antenna
        fig, axes = plt.subplots(4, 2, figsize=(14,14), gridspec_kw={'height_ratios': [1, 1, .4, .4]})
        for ax, pol in zip(axes[0], ['Jee', 'Jnn']):
            ant = (ant_to_plot, pol)
            extent=[cs.freqs[0]/1e6, cs.freqs[-1]/1e6, lst_grid[-1], lst_grid[0]]
            im = ax.imshow(np.where(cs.flag_grids[ant], np.nan, np.abs(cs.gain_grids[ant])), aspect='auto', cmap='inferno', 
                           interpolation='nearest', vmin=0, vmax=vmax, extent=extent)
            ax.set_title(f'Smoothcal Gain Amplitude of Antenna {ant[0]}: {pol[-1]}-polarized' )
            ax.set_xlabel('Frequency (MHz)')
            ax.set_ylabel('LST (Hours)')
            ax.set_xlim([cs.freqs[0]/1e6, cs.freqs[-1]/1e6])
            ax.set_yticklabels(ax.get_yticks() % 24)
            plt.colorbar(im, ax=ax,  orientation='horizontal', pad=.15)
        # Now flagged plot abscal waterfall    
        for ax, pol in zip(axes[1], ['Jee', 'Jnn']):
            ant = (ant_to_plot, pol)
            extent=[cs.freqs[0]/1e6, cs.freqs[-1]/1e6, lst_grid[-1], lst_grid[0]]
            im = ax.imshow(np.where(cs.flag_grids[ant], np.nan, np.abs(abscal_gains[ant])), aspect='auto', cmap='inferno', 
                           interpolation='nearest', vmin=0, vmax=vmax, extent=extent)
            ax.set_title(f'Abscal Gain Amplitude of Antenna {ant[0]}: {pol[-1]}-polarized' )
            ax.set_xlabel('Frequency (MHz)')
            ax.set_ylabel('LST (Hours)')
            ax.set_xlim([cs.freqs[0]/1e6, cs.freqs[-1]/1e6])
            ax.set_yticklabels(ax.get_yticks() % 24)
            plt.colorbar(im, ax=ax,  orientation='horizontal', pad=.15)
            
        # Now plot mean gain spectra 
        for ax, pol in zip(axes[2], ['Jee', 'Jnn']):
            ant = (ant_to_plot, pol)   
            nflags_spectrum = np.sum(cs.flag_grids[ant], axis=0)
            to_plot = nflags_spectrum <= np.percentile(nflags_spectrum, 75)
            ax.plot(cs.freqs[to_plot] / 1e6, np.nanmean(np.where(cs.flag_grids[ant], np.nan, np.abs(abscal_gains[ant])), axis=0)[to_plot], 'r.', label='Abscal')        
            ax.plot(cs.freqs[to_plot] / 1e6, np.nanmean(np.where(cs.flag_grids[ant], np.nan, np.abs(cs.gain_grids[ant])), axis=0)[to_plot], 'k.', ms=2, label='Smoothed')        
            ax.set_ylim([0, vmax])
            ax.set_xlim([cs.freqs[0]/1e6, cs.freqs[-1]/1e6])    
            ax.set_xlabel('Frequency (MHz)')
            ax.set_ylabel('|g| (unitless)')
            ax.set_title(f'Mean Infrequently-Flagged Gain Amplitude of Antenna {ant[0]}: {pol[-1]}-polarized')
            ax.legend(loc='upper left')
        # Now plot mean gain time series
        for ax, pol in zip(axes[3], ['Jee', 'Jnn']):
            ant = (ant_to_plot, pol)
            nflags_series = np.sum(cs.flag_grids[ant], axis=1)
            to_plot = nflags_series <= np.percentile(nflags_series, 75)
            ax.plot(lst_grid[to_plot], np.nanmean(np.where(cs.flag_grids[ant], np.nan, np.abs(abscal_gains[ant])), axis=1)[to_plot], 'r.', label='Abscal')        
            ax.plot(lst_grid[to_plot], np.nanmean(np.where(cs.flag_grids[ant], np.nan, np.abs(cs.gain_grids[ant])), axis=1)[to_plot], 'k.', ms=2, label='Smoothed')        
            ax.set_ylim([0, vmax])
            ax.set_xlabel('LST (hours)')
            ax.set_ylabel('|g| (unitless)')
            ax.set_title(f'Mean Infrequently-Flagged Gain Amplitude of Antenna {ant[0]}: {pol[-1]}-polarized')
            ax.set_xticklabels(ax.get_xticks() % 24)
            ax.legend(loc='upper left')
        plt.tight_layout()
        plt.show()    
def phase_plot(ant_to_plot):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")    
        display(HTML(f'<h2>Antenna {ant_to_plot} Phase Waterfalls</h2>'))
        fig, axes = plt.subplots(4, 2, figsize=(14,14), gridspec_kw={'height_ratios': [1, 1, .4, .4]})
        
        # Plot phase waterfalls for a single antenna    
        for ax, pol in zip(axes[0], ['Jee', 'Jnn']):
            ant = (ant_to_plot, pol)
            extent=[cs.freqs[0]/1e6, cs.freqs[-1]/1e6, lst_grid[-1], lst_grid[0]]
            im = ax.imshow(np.where(cs.flag_grids[ant], np.nan, np.angle(cs.gain_grids[ant])), aspect='auto', cmap='inferno', 
                           interpolation='nearest', vmin=-np.pi, vmax=np.pi, extent=extent)
            refant = (cs.refant[pol] if isinstance(cs.refant, dict) else cs.refant)
            ax.set_title(f'Smoothcal Gain Phase of Ant {ant[0]}{pol[-1]} / Ant {refant[0]}{refant[1][-1]}')
            ax.set_xlabel('Frequency (MHz)')
            ax.set_ylabel('LST (Hours)')
            ax.set_xlim([cs.freqs[0]/1e6, cs.freqs[-1]/1e6])
            ax.set_yticklabels(ax.get_yticks() % 24)
            plt.colorbar(im, ax=ax,  orientation='horizontal', pad=.15)
        # Now plot abscal phase waterfall    
        for ax, pol in zip(axes[1], ['Jee', 'Jnn']):
            ant = (ant_to_plot, pol)
            extent=[cs.freqs[0]/1e6, cs.freqs[-1]/1e6, lst_grid[-1], lst_grid[0]]
            im = ax.imshow(np.where(cs.flag_grids[ant], np.nan, np.angle(abscal_gains[ant])), aspect='auto', cmap='inferno', 
                           interpolation='nearest', vmin=-np.pi, vmax=np.pi, extent=extent)
            refant = (cs.refant[pol] if isinstance(cs.refant, dict) else cs.refant)
            ax.set_title(f'Abscal Gain Phase of Ant {ant[0]}{pol[-1]} / Ant {refant[0]}{refant[1][-1]}')
            ax.set_xlabel('Frequency (MHz)')
            ax.set_ylabel('LST (Hours)')
            ax.set_xlim([cs.freqs[0]/1e6, cs.freqs[-1]/1e6])
            ax.set_yticklabels(ax.get_yticks() % 24)
            plt.colorbar(im, ax=ax,  orientation='horizontal', pad=.15)
            
        # Now plot median gain spectra 
        for ax, pol in zip(axes[2], ['Jee', 'Jnn']):
            ant = (ant_to_plot, pol)   
            nflags_spectrum = np.sum(cs.flag_grids[ant], axis=0)
            to_plot = nflags_spectrum <= np.percentile(nflags_spectrum, 75)
            ax.plot(cs.freqs[to_plot] / 1e6, np.nanmedian(np.where(cs.flag_grids[ant], np.nan, np.angle(abscal_gains[ant])), axis=0)[to_plot], 'r.', label='Abscal')        
            ax.plot(cs.freqs[to_plot] / 1e6, np.nanmedian(np.where(cs.flag_grids[ant], np.nan, np.angle(cs.gain_grids[ant])), axis=0)[to_plot], 'k.', ms=2, label='Smoothed')        
            ax.set_ylim([-np.pi, np.pi])
            ax.set_xlim([cs.freqs[0]/1e6, cs.freqs[-1]/1e6])    
            ax.set_xlabel('Frequency (MHz)')
            refant = (cs.refant[pol] if isinstance(cs.refant, dict) else cs.refant)
            ax.set_ylabel(f'Phase of g$_{{{ant[0]}{pol[-1]}}}$ / g$_{{{refant[0]}{refant[1][-1]}}}$')
            ax.set_title(f'Median Infrequently-Flagged Gain Phase of Ant {ant[0]}{pol[-1]} / Ant {refant[0]}{refant[1][-1]}')
            ax.legend(loc='upper left')
        # # Now plot median gain time series
        for ax, pol in zip(axes[3], ['Jee', 'Jnn']):
            ant = (ant_to_plot, pol)
            nflags_series = np.sum(cs.flag_grids[ant], axis=1)
            to_plot = nflags_series <= np.percentile(nflags_series, 75)
            ax.plot(lst_grid[to_plot], np.nanmean(np.where(cs.flag_grids[ant], np.nan, np.angle(abscal_gains[ant])), axis=1)[to_plot], 'r.', label='Abscal')        
            ax.plot(lst_grid[to_plot], np.nanmean(np.where(cs.flag_grids[ant], np.nan, np.angle(cs.gain_grids[ant])), axis=1)[to_plot], 'k.', ms=2, label='Smoothed')        
            ax.set_ylim([-np.pi, np.pi])    
            ax.set_xlabel('LST (hours)')
            refant = (cs.refant[pol] if isinstance(cs.refant, dict) else cs.refant)
            ax.set_ylabel(f'Phase of g$_{{{ant[0]}{pol[-1]}}}$ / g$_{{{refant[0]}{refant[1][-1]}}}$')
            ax.set_title(f'Mean Infrequently-Flagged Gain Phase of Ant {ant[0]}{pol[-1]} / Ant {refant[0]}{refant[1][-1]}')
            ax.set_xticklabels(ax.get_xticks() % 24)    
            ax.legend(loc='upper left')
        plt.tight_layout()
        plt.show()
Figure 2: Full-Day Gain Amplitudes Before and After smooth_cal¶
Here we plot abscal and smooth_cal gain amplitudes for both of the sample antennas. We also show means across time/frequency, excluding frequencies/times that are frequently flagged.
for ant_to_plot in ants_to_plot:
    amplitude_plot(ant_to_plot)
Antenna 7 Amplitude Waterfalls
Antenna 239 Amplitude Waterfalls
Figure 3: Full-Day Gain Phases Before and After smooth_cal¶
Here we plot abscal and smooth_cal phases relative to each polarization's reference antenna for both of the sample antennas. We also show medians across time/frequency, excluding frequencies/times that are frequently flagged.
for ant_to_plot in ants_to_plot:
    phase_plot(ant_to_plot)
Antenna 7 Phase Waterfalls
Antenna 239 Phase Waterfalls
Examine $\chi^2$¶
def chisq_plot():
    fig, axes = plt.subplots(1, 2, figsize=(14, 10), sharex=True, sharey=True)
    extent = [cs.freqs[0]/1e6, cs.freqs[-1]/1e6, lst_grid[-1], lst_grid[0]]
    for ax, pol in zip(axes, ['Jee', 'Jnn']):
        refant = (cs.refant[pol] if isinstance(cs.refant, dict) else cs.refant)
        im = ax.imshow(np.where(cs.flag_grids[refant], np.nan, cs.chisq_grids[pol]), vmin=1, vmax=5, 
                       aspect='auto', cmap='turbo', interpolation='none', extent=extent)
        ax.set_yticklabels(ax.get_yticks() % 24)
        ax.set_title(f'{pol[1:]}-Polarized $\\chi^2$ / DoF')
        ax.set_xlabel('Frequency (MHz)')
    axes[0].set_ylabel('LST (hours)')
    plt.tight_layout()
    fig.colorbar(im, ax=axes, pad=.07, label='$\\chi^2$ / DoF', orientation='horizontal', extend='both', aspect=50)
Figure 4: Full-Day $\chi^2$ / DoF Waterfall from Redundant-Baseline Calibration¶
Here we plot $\chi^2$ per degree of freedom from redundant-baseline calibration for both polarizations separately. While this plot is a little out of place, as it was not produced by this notebook, it is a convenient place where all the necessary components are readily available. If the array were perfectly redundant and any non-redundancies in the calibrated visibilities were explicable by thermal noise alone, this waterfall should be all 1.
chisq_plot()
set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
avg_cspa_vs_time = {ant: np.nanmean(np.where(cs.flag_grids[ant], np.nan, cs.cspa_grids[ant]), axis=1) for ant in cs.ants}
avg_cspa_vs_freq = {ant: np.nanmean(np.where(cs.flag_grids[ant], np.nan, cs.cspa_grids[ant]), axis=0) for ant in cs.ants}
Mean of empty slice
Mean of empty slice
def cspa_vs_time_plot():
    fig, axes = plt.subplots(2, 1, figsize=(14, 8), sharex=True, sharey=True, gridspec_kw={'hspace': 0})
    for ax, pol in zip(axes, ['Jee', 'Jnn']):
        detail_cutoff = np.percentile([np.nanmean(m) for ant, m in avg_cspa_vs_time.items() 
                                       if ant[1] == pol and np.isfinite(np.nanmean(m))], 95)
        for ant in avg_cspa_vs_time:
            if ant[1] == pol and not np.all(cs.flag_grids[ant]):
                if np.nanmean(avg_cspa_vs_time[ant]) > detail_cutoff:
                    ax.plot(lst_grid, avg_cspa_vs_time[ant], label=ant, zorder=100)
                else:
                    ax.plot(lst_grid, avg_cspa_vs_time[ant], c='grey', alpha=.2, lw=.5)
        ax.legend(title=f'{pol[1:]}-Polarized', ncol=2)
        ax.set_ylabel('Mean Unflagged $\\chi^2$ per Antenna')
        ax.set_xlabel('LST (hours)')
        ax.set_xticklabels(ax.get_xticks() % 24)
    plt.ylim([1, 5.4])
    plt.tight_layout()
def cspa_vs_freq_plot():
    fig, axes = plt.subplots(2, 1, figsize=(14, 6), sharex=True, sharey=True, gridspec_kw={'hspace': 0})
    for ax, pol in zip(axes, ['Jee', 'Jnn']):
        detail_cutoff = np.percentile([np.nanmean(m) for ant, m in avg_cspa_vs_freq.items() 
                                       if ant[1] == pol and np.isfinite(np.nanmean(m))], 95)
        for ant in avg_cspa_vs_freq:
            if ant[1] == pol and not np.all(cs.flag_grids[ant]):
                if np.nanmean(avg_cspa_vs_freq[ant]) > detail_cutoff:
                    ax.plot(cs.freqs / 1e6, avg_cspa_vs_freq[ant], label=ant, zorder=100)
                else:
                    ax.plot(cs.freqs / 1e6, avg_cspa_vs_freq[ant], c='grey', alpha=.2, lw=.5)
        ax.legend(title=f'{pol[1:]}-Polarized', ncol=2)
        ax.set_ylabel('Mean Unflagged $\\chi^2$ per Antenna')
        ax.set_xlabel('Frequency (MHz)')
    plt.ylim([1, 5.4])
    plt.tight_layout()
Figure 5: Average $\chi^2$ per Antenna vs. Time and Frequency¶
Here we plot $\chi^2$ per antenna from redundant-baseline calibration, separating polarizations and averaging the unflagged pixels in the waterfalls over frequency or time. The worst 5% of antennas are shown in color and highlighted in the legends, the rest are shown in grey.
cspa_vs_time_plot()
cspa_vs_freq_plot()
Mean of empty slice Passing label as a length 2 sequence when plotting a single dataset is deprecated in Matplotlib 3.9 and will error in 3.11. To keep the current behavior, cast the sequence to string before passing. set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. Mean of empty slice Passing label as a length 2 sequence when plotting a single dataset is deprecated in Matplotlib 3.9 and will error in 3.11. To keep the current behavior, cast the sequence to string before passing.
set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. Mean of empty slice Passing label as a length 2 sequence when plotting a single dataset is deprecated in Matplotlib 3.9 and will error in 3.11. To keep the current behavior, cast the sequence to string before passing.
Save Results¶
add_to_history = 'Produced by calibration_smoothing notebook with the following environment:\n' + '=' * 65 + '\n' + os.popen('conda env export').read() + '=' * 65
cs.write_smoothed_cal(output_replace=(CAL_SUFFIX, SMOOTH_CAL_SUFFIX), add_to_history=add_to_history, clobber=True)
Mean of empty slice
Metadata¶
for repo in ['hera_cal', 'hera_qm', 'hera_filters', 'hera_notebook_templates', 'pyuvdata']:
    exec(f'from {repo} import __version__')
    print(f'{repo}: {__version__}')
hera_cal: 3.7.5.dev34+geabd8f66 hera_qm: 2.2.1.dev4+gf6d0211 hera_filters: 0.1.7.dev1+gcbb4f93
hera_notebook_templates: 0.1.dev1138+g6fe413d pyuvdata: 3.2.1
print(f'Finished execution in {(time.time() - tstart) / 60:.2f} minutes.')
Finished execution in 190.80 minutes.