Calibration Smoothing¶

by Josh Dillon, last updated September 3, 2025

This notebook runs calibration smoothing to the gains coming out of file_calibration notebook. It removes any flags founds on by that notebook and replaces them with flags generated from full_day_rfi and full_day_antenna_flagging. It flags antennas with high relative difference between the original gains and smoothed gains. It also plots the results for a couple of antennas.

Here's a set of links to skip to particular figures and tables:

• Figure 1: Identifying and Blacklisting abscal Failures¶

• Figure 2: Antenna Phases with Identified Phase Flips¶

• Figure 3: Full-Day Gain Amplitudes Before and After smooth_cal¶

• Figure 4: Full-Day Gain Phases Before and After smooth_cal¶

• Figure 5: Full-Day $\chi^2$ / DoF Waterfall from Redundant-Baseline Calibration¶

• Figure 6: Average $\chi^2$ per Antenna¶

• Figure 7: Relative Difference Before and After Smoothing¶

In [1]:
import time
tstart = time.time()
In [2]:
import os
os.environ['HDF5_USE_FILE_LOCKING'] = 'FALSE'
import h5py
import hdf5plugin  # REQUIRED to have the compression plugins available
import numpy as np
import glob
import copy
import warnings
import matplotlib
import matplotlib.pyplot as plt
from hera_cal import io, utils, smooth_cal
from hera_qm.time_series_metrics import true_stretches
%matplotlib inline
from IPython.display import display, HTML

Parse inputs¶

In [3]:
# get files
SUM_FILE = os.environ.get("SUM_FILE", None)
# SUM_FILE = "/lustre/aoc/projects/hera/h6c-analysis/IDR3/2459893/zen.2459893.25258.sum.uvh5"
SUM_SUFFIX = os.environ.get("SUM_SUFFIX", 'sum.uvh5')
CAL_SUFFIX = os.environ.get("CAL_SUFFIX", 'sum.omni.calfits')
SMOOTH_CAL_SUFFIX = os.environ.get("SMOOTH_CAL_SUFFIX", 'sum.smooth.calfits')
ANT_FLAG_SUFFIX = os.environ.get("ANT_FLAG_SUFFIX", 'sum.antenna_flags.h5')
RFI_FLAG_SUFFIX = os.environ.get("RFI_FLAG_SUFFIX", 'sum.flag_waterfall.h5')
FREQ_SMOOTHING_SCALE = float(os.environ.get("FREQ_SMOOTHING_SCALE", 30.0)) # MHz
TIME_SMOOTHING_SCALE = float(os.environ.get("TIME_SMOOTHING_SCALE", 1e4)) # seconds
EIGENVAL_CUTOFF = float(os.environ.get("EIGENVAL_CUTOFF", 1e-12))
PER_POL_REFANT = os.environ.get("PER_POL_REFANT", "False").upper() == "TRUE"
BLACKLIST_TIMESCALE_FACTOR = float(os.environ.get("BLACKLIST_TIMESCALE_FACTOR", 4.0))
BLACKLIST_RELATIVE_ERROR_THRESH = float(os.environ.get("BLACKLIST_RELATIVE_ERROR_THRESH", 1))
BLACKLIST_RELATIVE_WEIGHT = float(os.environ.get("BLACKLIST_RELATIVE_WEIGHT", 0.1))
FM_LOW_FREQ = float(os.environ.get("FM_LOW_FREQ", 87.5)) # in MHz
FM_HIGH_FREQ = float(os.environ.get("FM_HIGH_FREQ", 108.0)) # in MHz
SC_RELATIVE_DIFF_CUTOFF = float(os.environ.get("SC_RELATIVE_DIFF_CUTOFF", 0.2))

for setting in ['SUM_FILE', 'SUM_SUFFIX', 'CAL_SUFFIX', 'SMOOTH_CAL_SUFFIX', 'ANT_FLAG_SUFFIX',
                'RFI_FLAG_SUFFIX', 'FREQ_SMOOTHING_SCALE', 'TIME_SMOOTHING_SCALE', 'EIGENVAL_CUTOFF', 
                'PER_POL_REFANT', 'BLACKLIST_TIMESCALE_FACTOR', 'BLACKLIST_RELATIVE_ERROR_THRESH', 
                'BLACKLIST_RELATIVE_WEIGHT', 'FM_LOW_FREQ', 'FM_HIGH_FREQ', 'SC_RELATIVE_DIFF_CUTOFF']:
    if issubclass(type(eval(setting)), str):
        print(f'{setting} = "{eval(setting)}"')
    else:
        print(f'{setting} = {eval(setting)}')
SUM_FILE = "/lustre/aoc/projects/hera/h6c-analysis/IDR3/2459903/zen.2459903.25274.sum.uvh5"
SUM_SUFFIX = "sum.uvh5"
CAL_SUFFIX = "sum.omni.calfits"
SMOOTH_CAL_SUFFIX = "sum.smooth.calfits"
ANT_FLAG_SUFFIX = "sum.antenna_flags.h5"
RFI_FLAG_SUFFIX = "sum.flag_waterfall.h5"
FREQ_SMOOTHING_SCALE = 30.0
TIME_SMOOTHING_SCALE = 10000.0
EIGENVAL_CUTOFF = 1e-12
PER_POL_REFANT = False
BLACKLIST_TIMESCALE_FACTOR = 4.0
BLACKLIST_RELATIVE_ERROR_THRESH = 1.0
BLACKLIST_RELATIVE_WEIGHT = 0.1
FM_LOW_FREQ = 87.5
FM_HIGH_FREQ = 108.0
SC_RELATIVE_DIFF_CUTOFF = 0.2

Load files and select reference antenna(s)¶

In [4]:
sum_glob = '.'.join(SUM_FILE.split('.')[:-3]) + '.*.' + SUM_SUFFIX
cal_files_glob = sum_glob.replace(SUM_SUFFIX, CAL_SUFFIX)
cal_files = sorted(glob.glob(cal_files_glob))
print(f'Found {len(cal_files)} *.{CAL_SUFFIX} files starting with {cal_files[0]}.')
Found 1850 *.sum.omni.calfits files starting with /lustre/aoc/projects/hera/h6c-analysis/IDR3/2459903/zen.2459903.25274.sum.omni.calfits.
In [5]:
rfi_flag_files_glob = sum_glob.replace(SUM_SUFFIX, RFI_FLAG_SUFFIX)
rfi_flag_files = sorted(glob.glob(rfi_flag_files_glob))
print(f'Found {len(rfi_flag_files)} *.{RFI_FLAG_SUFFIX} files starting with {rfi_flag_files[0]}.')
Found 1850 *.sum.flag_waterfall.h5 files starting with /lustre/aoc/projects/hera/h6c-analysis/IDR3/2459903/zen.2459903.25274.sum.flag_waterfall.h5.
In [6]:
ant_flag_files_glob = sum_glob.replace(SUM_SUFFIX, ANT_FLAG_SUFFIX)
ant_flag_files = sorted(glob.glob(ant_flag_files_glob))
print(f'Found {len(ant_flag_files)} *.{ANT_FLAG_SUFFIX} files starting with {ant_flag_files[0]}.')
Found 1850 *.sum.antenna_flags.h5 files starting with /lustre/aoc/projects/hera/h6c-analysis/IDR3/2459903/zen.2459903.25274.sum.antenna_flags.h5.
In [7]:
cs = smooth_cal.CalibrationSmoother(cal_files, flag_file_list=(ant_flag_files + rfi_flag_files),
                                    ignore_calflags=True, pick_refant=False, load_chisq=True, load_cspa=True)
In [8]:
cs.refant = smooth_cal.pick_reference_antenna(cs.gain_grids, cs.flag_grids, cs.freqs, per_pol=True)
for pol in cs.refant:
    print(f'Reference antenna {cs.refant[pol][0]} selected for smoothing {pol} gains.')

if not PER_POL_REFANT:
    # in this case, rephase both pols separately before smoothing, but also smooth the relative polarization calibration phasor
    overall_refant = smooth_cal.pick_reference_antenna({ant: cs.gain_grids[ant] for ant in cs.refant.values()}, 
                                                       {ant: cs.flag_grids[ant] for ant in cs.refant.values()}, 
                                                       cs.freqs, per_pol=False)
    print(f'Overall reference antenna {overall_refant} selected.')
    other_refant = [ant for ant in cs.refant.values() if ant != overall_refant][0]

    relative_pol_phasor = cs.gain_grids[overall_refant] * cs.gain_grids[other_refant].conj() # TODO: is this conjugation right?
    relative_pol_phasor /= np.abs(relative_pol_phasor)

abscal_refants = {cs.refant[pol]: cs.gain_grids[cs.refant[pol]] for pol in ['Jee', 'Jnn']}
Reference antenna 129 selected for smoothing Jnn gains.
Reference antenna 202 selected for smoothing Jee gains.
Overall reference antenna (np.int64(202), 'Jee') selected.
In [9]:
cs.rephase_to_refant(propagate_refant_flags=True)
In [10]:
lst_grid = utils.JD2LST(cs.time_grid) * 12 / np.pi
lst_grid[lst_grid > lst_grid[-1]] -= 24

Find consistent outliers in relative error after a coarse smoothing¶

These are typically a sign of failures of abscal.

In [11]:
relative_error_samples = {pol: np.zeros_like(cs.gain_grids[cs.refant[pol]], dtype=float) for pol in ['Jee', 'Jnn']}
sum_relative_error = {pol: np.zeros_like(cs.gain_grids[cs.refant[pol]], dtype=float) for pol in ['Jee', 'Jnn']}
per_ant_avg_relative_error = {} 

# perform a 2D DPSS filter with a BLACKLIST_TIMESCALE_FACTOR longer timescale, averaging the results per-pol
for ant in cs.gain_grids:
    if np.all(cs.flag_grids[ant]):
        continue
    filtered, _ = smooth_cal.time_freq_2D_filter(gains=cs.gain_grids[ant], 
                                                 wgts=(~cs.flag_grids[ant]).astype(float),
                                                 freqs=cs.freqs,
                                                 times=cs.time_grid,
                                                 freq_scale=FREQ_SMOOTHING_SCALE,
                                                 time_scale=TIME_SMOOTHING_SCALE * BLACKLIST_TIMESCALE_FACTOR,
                                                 eigenval_cutoff=EIGENVAL_CUTOFF,
                                                 method='DPSS', 
                                                 fit_method='lu_solve', 
                                                 fix_phase_flips=True, 
                                                 flag_phase_flip_ints=True,
                                                 skip_flagged_edges=True, 
                                                 freq_cuts=[(FM_LOW_FREQ + FM_HIGH_FREQ) * .5e6],
                                                ) 
    relative_error = np.where(cs.flag_grids[ant], 0, np.abs(cs.gain_grids[ant] - filtered) / np.abs(filtered))
    per_ant_avg_relative_error[ant] = np.nanmean(np.where(cs.flag_grids[ant], np.nan, relative_error))
    relative_error_samples[ant[1]] += (~cs.flag_grids[ant]).astype(float)
    sum_relative_error[ant[1]] += relative_error

# figure out per-antpol cuts for where to set weights to 0 for the main smooth_cal (but not necessarily flags)
cs.blacklist_wgt = BLACKLIST_RELATIVE_WEIGHT
for pol in ['Jee', 'Jnn']:
    avg_rel_error = sum_relative_error[pol] / relative_error_samples[pol]
    to_blacklist = np.where(relative_error_samples[pol] > 0, avg_rel_error > BLACKLIST_RELATIVE_ERROR_THRESH, False)
    for ant in cs.ants:
        if ant[1] == pol:
            cs.waterfall_blacklist[ant] = to_blacklist
invalid value encountered in divide
In [12]:
def plot_relative_error():
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")

        fig, axes = plt.subplots(1, 3, figsize=(14, 7))
        extent = [cs.freqs[0] / 1e6, cs.freqs[-1] / 1e6, lst_grid[-1], lst_grid[0]]
        cmap = plt.get_cmap('Greys', 256)
        cmap.set_over('red')
        for ax, pol in zip(axes[0:2], ['Jee', 'Jnn']):
            to_plot = sum_relative_error[pol] / relative_error_samples[pol]
            im = ax.imshow(np.where(np.isfinite(to_plot), to_plot, np.nan), aspect='auto', interpolation='none', 
                           vmin=0, vmax=BLACKLIST_RELATIVE_ERROR_THRESH, extent=extent, cmap=cmap)
            ax.set_title(pol)
            ax.set_yticklabels(ax.get_yticks() % 24)
            ax.set_ylabel('LST (hours)')
            ax.set_xlabel('Frequency (MHz)')
        plt.colorbar(im, ax=axes[0:2], location='top', extend='max', label='Average Relative Error on Initial Smoothing')
        
        for pol in ['Jee', 'Jnn']:
            axes[2].hist((sum_relative_error[pol] / relative_error_samples[pol]).ravel(), bins=np.arange(0,2,.01), alpha=.5, label=pol)
        axes[2].set_yscale('log')
        axes[2].set_ylabel('Number of Waterfall Pixels')
        axes[2].set_xlabel('Relative Error')
        axes[2].axvline(BLACKLIST_RELATIVE_ERROR_THRESH, ls='--', c='r', label='Blacklist Threshold')
        axes[2].legend()

Figure 1: Identifying and Blacklisting abscal Failures¶

This plot highlights regions of the waterfall that are per-polarization blacklisted (i.e. given 0 weight in the main smooth_cal fit, but not necessarily flagged). This is usually a sign of problems with abscal and often occurs because

In [13]:
plot_relative_error()
No description has been provided for this image
In [14]:
# duplicate a small number of abscal gains for plotting
antnums = set([ant[0] for ant in cs.ants])
flags_per_antnum = [np.sum(cs.flag_grids[ant, 'Jnn']) + np.sum(cs.flag_grids[ant, 'Jee']) for ant in antnums]
larger_relative_error = np.array([np.max([per_ant_avg_relative_error.get((ant, pol), np.inf) for pol in ['Jee', 'Jnn']]) for ant in antnums])
refant_nums = [ant[0] for ant in cs.refant.values()]
candidate_ants = [ant for ant, nflags, rel_err in zip(antnums, flags_per_antnum, larger_relative_error) 
                  if (ant not in refant_nums) and (nflags <= np.percentile(flags_per_antnum, 25))
                  and (rel_err <= SC_RELATIVE_DIFF_CUTOFF)
                  and not np.all(cs.flag_grids[ant, 'Jee']) and not np.all(cs.flag_grids[ant, 'Jnn'])]
ants_to_plot = [func(candidate_ants) for func in (np.min, np.max)]
abscal_gains = {}
for pol in ['Jee', 'Jnn']:
    for antnum in ants_to_plot:
        if PER_POL_REFANT:
            abscal_gains[antnum, pol] = cs.gain_grids[(antnum, pol)] * np.abs(abscal_refants[cs.refant[pol]]) / abscal_refants[cs.refant[pol]]
        else:
            abscal_gains[antnum, pol] = cs.gain_grids[(antnum, pol)] / np.abs(abscal_refants[cs.refant[pol]]) * abscal_refants[cs.refant[pol]]
            abscal_gains[antnum, pol] *= np.abs(abscal_refants[overall_refant]) / abscal_refants[overall_refant]

Perform smoothing¶

In [15]:
if not PER_POL_REFANT:
    # treat the relative_pol_phasor as if it were antenna -1
    cs.gain_grids[(-1, other_refant[1])] = relative_pol_phasor
    cs.flag_grids[(-1, other_refant[1])] = cs.flag_grids[overall_refant] | cs.flag_grids[other_refant]
    cs.waterfall_blacklist[(-1, other_refant[1])] = cs.waterfall_blacklist[cs.ants[0][0], 'Jee'] | cs.waterfall_blacklist[cs.ants[0][0], 'Jnn'] 
In [16]:
meta = cs.time_freq_2D_filter(freq_scale=FREQ_SMOOTHING_SCALE,
                              time_scale=TIME_SMOOTHING_SCALE,
                              eigenval_cutoff=EIGENVAL_CUTOFF,
                              method='DPSS', 
                              fit_method='lu_solve',
                              fix_phase_flips=True,
                              flag_phase_flip_ints=True,
                              skip_flagged_edges=True,
                              freq_cuts=[(FM_LOW_FREQ + FM_HIGH_FREQ) * .5e6],)
1 phase flips detected on antenna (np.int64(144), 'Jnn'). A total of 3108 integrations were phase-flipped relative to the 0th integration between 2459903.3189503164 and 2459903.6664624126.
In [17]:
# Pick out antennas with too high relative differences before and after smoothing and flag them.
avg_relative_diffs = {ant: np.nanmean(rel_diff) for ant, rel_diff in meta['freq_avg_rel_diff'].items()}
to_cut = sorted([ant for ant, diff in avg_relative_diffs.items() if ant[0] >= 0 and diff > SC_RELATIVE_DIFF_CUTOFF])
if len(to_cut) > 0:
    for ant in to_cut:
        print(f'Flagging antenna {ant[0]}{ant[1][-1]} with a relative difference before and after smoothing of {avg_relative_diffs[ant]:.2%} '
              f'(compared to the {SC_RELATIVE_DIFF_CUTOFF:.2%} cutoff).')
        cs.flag_grids[ant] |= True
else:
    print(f'No antennas have a relative difference above the {SC_RELATIVE_DIFF_CUTOFF:.2%} cutoff.')
Flagging antenna 136n with a relative difference before and after smoothing of 20.23% (compared to the 20.00% cutoff).
In [18]:
if not PER_POL_REFANT:
    # put back in the smoothed phasor, ensuring the amplitude is 1 and that data are flagged anywhere either polarization's refant is flagged
    smoothed_relative_pol_phasor = cs.gain_grids[(-1, other_refant[-1])] / np.abs(cs.gain_grids[(-1, other_refant[-1])])
    for ant in cs.gain_grids:
        if ant[0] >= 0 and ant[1] == other_refant[1]:
            cs.gain_grids[ant] /= smoothed_relative_pol_phasor
        cs.flag_grids[ant] |= (cs.flag_grids[(-1, other_refant[1])])
    cs.refant = overall_refant
In [19]:
def phase_flip_diagnostic_plot():
    '''Shows time-smoothed antenna avg phases after taking out a delay and filtering in time.'''
    if not np.any([np.any(meta['phase_flipped'][ant]) for ant in meta['phase_flipped']]):
        print("No antennas have phase flips identified. Nothing to plot.")
        return
    
    plt.figure(figsize=(14,4))
    for ant in meta['phase_flipped']:
        if np.any(meta['phase_flipped'][ant]):
            plt.plot(cs.time_grid - int(cs.time_grid[0]), 
                     np.angle(np.exp(1.0j * (meta['phases'][ant] - meta['time_smoothed_phases'][ant]))), label=f'{ant[0]}{ant[1][-1]}')
    plt.legend(title='Antennas with Identified Phase Flips', ncol=4)
    plt.xlabel(f'JD - {int(cs.time_grid[0])}')
    plt.ylabel('Average Phase After Filtering (radians)')
    plt.tight_layout()

Figure 2: Antenna Phases with Identified Phase Flips¶

In [20]:
phase_flip_diagnostic_plot()
No description has been provided for this image

Plot results¶

In [21]:
def amplitude_plot(ant_to_plot):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        # Pick vmax to not saturate 90% of the abscal gains
        vmax = np.max([np.percentile(np.abs(cs.gain_grids[ant_to_plot, pol][~cs.flag_grids[ant_to_plot, pol]]), 99) for pol in ['Jee', 'Jnn']])

        display(HTML(f'<h2>Antenna {ant_to_plot} Amplitude Waterfalls</h2>'))    

        # Plot abscal gain amplitude waterfalls for a single antenna
        fig, axes = plt.subplots(4, 2, figsize=(14,14), gridspec_kw={'height_ratios': [1, 1, .4, .4]})
        for ax, pol in zip(axes[0], ['Jee', 'Jnn']):
            ant = (ant_to_plot, pol)
            extent=[cs.freqs[0]/1e6, cs.freqs[-1]/1e6, lst_grid[-1], lst_grid[0]]
            im = ax.imshow(np.where(cs.flag_grids[ant], np.nan, np.abs(cs.gain_grids[ant])), aspect='auto', cmap='inferno', 
                           interpolation='nearest', vmin=0, vmax=vmax, extent=extent)
            ax.set_title(f'Smoothcal Gain Amplitude of Antenna {ant[0]}: {pol[-1]}-polarized' )
            ax.set_xlabel('Frequency (MHz)')
            ax.set_ylabel('LST (Hours)')
            ax.set_xlim([cs.freqs[0]/1e6, cs.freqs[-1]/1e6])
            ax.set_yticklabels(ax.get_yticks() % 24)
            plt.colorbar(im, ax=ax,  orientation='horizontal', pad=.15)

        # Now flagged plot abscal waterfall    
        for ax, pol in zip(axes[1], ['Jee', 'Jnn']):
            ant = (ant_to_plot, pol)
            extent=[cs.freqs[0]/1e6, cs.freqs[-1]/1e6, lst_grid[-1], lst_grid[0]]
            im = ax.imshow(np.where(cs.flag_grids[ant], np.nan, np.abs(abscal_gains[ant])), aspect='auto', cmap='inferno', 
                           interpolation='nearest', vmin=0, vmax=vmax, extent=extent)
            ax.set_title(f'Abscal Gain Amplitude of Antenna {ant[0]}: {pol[-1]}-polarized' )
            ax.set_xlabel('Frequency (MHz)')
            ax.set_ylabel('LST (Hours)')
            ax.set_xlim([cs.freqs[0]/1e6, cs.freqs[-1]/1e6])
            ax.set_yticklabels(ax.get_yticks() % 24)
            plt.colorbar(im, ax=ax,  orientation='horizontal', pad=.15)
            
        # Now plot mean gain spectra 
        for ax, pol in zip(axes[2], ['Jee', 'Jnn']):
            ant = (ant_to_plot, pol)   
            nflags_spectrum = np.sum(cs.flag_grids[ant], axis=0)
            to_plot = nflags_spectrum <= np.percentile(nflags_spectrum, 75)
            ax.plot(cs.freqs[to_plot] / 1e6, np.nanmean(np.where(cs.flag_grids[ant], np.nan, np.abs(abscal_gains[ant])), axis=0)[to_plot], 'r.', label='Abscal')        
            ax.plot(cs.freqs[to_plot] / 1e6, np.nanmean(np.where(cs.flag_grids[ant], np.nan, np.abs(cs.gain_grids[ant])), axis=0)[to_plot], 'k.', ms=2, label='Smoothed')        
            ax.set_ylim([0, vmax])
            ax.set_xlim([cs.freqs[0]/1e6, cs.freqs[-1]/1e6])    
            ax.set_xlabel('Frequency (MHz)')
            ax.set_ylabel('|g| (unitless)')
            ax.set_title(f'Mean Infrequently-Flagged Gain Amplitude of Antenna {ant[0]}: {pol[-1]}-polarized')
            ax.legend(loc='upper left')

        # Now plot mean gain time series
        for ax, pol in zip(axes[3], ['Jee', 'Jnn']):
            ant = (ant_to_plot, pol)
            nflags_series = np.sum(cs.flag_grids[ant], axis=1)
            to_plot = nflags_series <= np.percentile(nflags_series, 75)
            ax.plot(lst_grid[to_plot], np.nanmean(np.where(cs.flag_grids[ant], np.nan, np.abs(abscal_gains[ant])), axis=1)[to_plot], 'r.', label='Abscal')        
            ax.plot(lst_grid[to_plot], np.nanmean(np.where(cs.flag_grids[ant], np.nan, np.abs(cs.gain_grids[ant])), axis=1)[to_plot], 'k.', ms=2, label='Smoothed')        
            ax.set_ylim([0, vmax])
            ax.set_xlabel('LST (hours)')
            ax.set_ylabel('|g| (unitless)')
            ax.set_title(f'Mean Infrequently-Flagged Gain Amplitude of Antenna {ant[0]}: {pol[-1]}-polarized')
            ax.set_xticklabels(ax.get_xticks() % 24)
            ax.legend(loc='upper left')

        plt.tight_layout()
        plt.show()    
In [22]:
def phase_plot(ant_to_plot):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")    
        display(HTML(f'<h2>Antenna {ant_to_plot} Phase Waterfalls</h2>'))
        fig, axes = plt.subplots(4, 2, figsize=(14,14), gridspec_kw={'height_ratios': [1, 1, .4, .4]})
        
        # Plot phase waterfalls for a single antenna    
        for ax, pol in zip(axes[0], ['Jee', 'Jnn']):
            ant = (ant_to_plot, pol)
            extent=[cs.freqs[0]/1e6, cs.freqs[-1]/1e6, lst_grid[-1], lst_grid[0]]
            im = ax.imshow(np.where(cs.flag_grids[ant], np.nan, np.angle(cs.gain_grids[ant])), aspect='auto', cmap='inferno', 
                           interpolation='nearest', vmin=-np.pi, vmax=np.pi, extent=extent)

            refant = (cs.refant[pol] if isinstance(cs.refant, dict) else cs.refant)
            ax.set_title(f'Smoothcal Gain Phase of Ant {ant[0]}{pol[-1]} / Ant {refant[0]}{refant[1][-1]}')
            ax.set_xlabel('Frequency (MHz)')
            ax.set_ylabel('LST (Hours)')
            ax.set_xlim([cs.freqs[0]/1e6, cs.freqs[-1]/1e6])
            ax.set_yticklabels(ax.get_yticks() % 24)
            plt.colorbar(im, ax=ax,  orientation='horizontal', pad=.15)

        # Now plot abscal phase waterfall    
        for ax, pol in zip(axes[1], ['Jee', 'Jnn']):
            ant = (ant_to_plot, pol)
            extent=[cs.freqs[0]/1e6, cs.freqs[-1]/1e6, lst_grid[-1], lst_grid[0]]
            im = ax.imshow(np.where(cs.flag_grids[ant], np.nan, np.angle(abscal_gains[ant])), aspect='auto', cmap='inferno', 
                           interpolation='nearest', vmin=-np.pi, vmax=np.pi, extent=extent)
            refant = (cs.refant[pol] if isinstance(cs.refant, dict) else cs.refant)
            ax.set_title(f'Abscal Gain Phase of Ant {ant[0]}{pol[-1]} / Ant {refant[0]}{refant[1][-1]}')
            ax.set_xlabel('Frequency (MHz)')
            ax.set_ylabel('LST (Hours)')
            ax.set_xlim([cs.freqs[0]/1e6, cs.freqs[-1]/1e6])
            ax.set_yticklabels(ax.get_yticks() % 24)
            plt.colorbar(im, ax=ax,  orientation='horizontal', pad=.15)
            
        # Now plot median gain spectra 
        for ax, pol in zip(axes[2], ['Jee', 'Jnn']):
            ant = (ant_to_plot, pol)   
            nflags_spectrum = np.sum(cs.flag_grids[ant], axis=0)
            to_plot = nflags_spectrum <= np.percentile(nflags_spectrum, 75)
            ax.plot(cs.freqs[to_plot] / 1e6, np.nanmedian(np.where(cs.flag_grids[ant], np.nan, np.angle(abscal_gains[ant])), axis=0)[to_plot], 'r.', label='Abscal')        
            ax.plot(cs.freqs[to_plot] / 1e6, np.nanmedian(np.where(cs.flag_grids[ant], np.nan, np.angle(cs.gain_grids[ant])), axis=0)[to_plot], 'k.', ms=2, label='Smoothed')        
            ax.set_ylim([-np.pi, np.pi])
            ax.set_xlim([cs.freqs[0]/1e6, cs.freqs[-1]/1e6])    
            ax.set_xlabel('Frequency (MHz)')
            refant = (cs.refant[pol] if isinstance(cs.refant, dict) else cs.refant)
            ax.set_ylabel(f'Phase of g$_{{{ant[0]}{pol[-1]}}}$ / g$_{{{refant[0]}{refant[1][-1]}}}$')
            ax.set_title(f'Median Infrequently-Flagged Gain Phase of Ant {ant[0]}{pol[-1]} / Ant {refant[0]}{refant[1][-1]}')
            ax.legend(loc='upper left')

        # # Now plot median gain time series
        for ax, pol in zip(axes[3], ['Jee', 'Jnn']):
            ant = (ant_to_plot, pol)
            nflags_series = np.sum(cs.flag_grids[ant], axis=1)
            to_plot = nflags_series <= np.percentile(nflags_series, 75)
            ax.plot(lst_grid[to_plot], np.nanmean(np.where(cs.flag_grids[ant], np.nan, np.angle(abscal_gains[ant])), axis=1)[to_plot], 'r.', label='Abscal')        
            ax.plot(lst_grid[to_plot], np.nanmean(np.where(cs.flag_grids[ant], np.nan, np.angle(cs.gain_grids[ant])), axis=1)[to_plot], 'k.', ms=2, label='Smoothed')        
            ax.set_ylim([-np.pi, np.pi])    
            ax.set_xlabel('LST (hours)')
            refant = (cs.refant[pol] if isinstance(cs.refant, dict) else cs.refant)
            ax.set_ylabel(f'Phase of g$_{{{ant[0]}{pol[-1]}}}$ / g$_{{{refant[0]}{refant[1][-1]}}}$')
            ax.set_title(f'Mean Infrequently-Flagged Gain Phase of Ant {ant[0]}{pol[-1]} / Ant {refant[0]}{refant[1][-1]}')
            ax.set_xticklabels(ax.get_xticks() % 24)    
            ax.legend(loc='upper left')

        plt.tight_layout()
        plt.show()

Figure 3: Full-Day Gain Amplitudes Before and After smooth_cal¶

Here we plot abscal and smooth_cal gain amplitudes for both of the sample antennas. We also show means across time/frequency, excluding frequencies/times that are frequently flagged.

In [23]:
for ant_to_plot in ants_to_plot:
    amplitude_plot(ant_to_plot)

Antenna 5 Amplitude Waterfalls

No description has been provided for this image

Antenna 238 Amplitude Waterfalls

No description has been provided for this image

Figure 4: Full-Day Gain Phases Before and After smooth_cal¶

Here we plot abscal and smooth_cal phases relative to each polarization's reference antenna for both of the sample antennas. We also show medians across time/frequency, excluding frequencies/times that are frequently flagged.

In [24]:
for ant_to_plot in ants_to_plot:
    phase_plot(ant_to_plot)

Antenna 5 Phase Waterfalls

No description has been provided for this image

Antenna 238 Phase Waterfalls

No description has been provided for this image

Examine $\chi^2$¶

In [25]:
def chisq_plot():
    fig, axes = plt.subplots(1, 2, figsize=(14, 10), sharex=True, sharey=True)
    extent = [cs.freqs[0]/1e6, cs.freqs[-1]/1e6, lst_grid[-1], lst_grid[0]]
    for ax, pol in zip(axes, ['Jee', 'Jnn']):
        refant = (cs.refant[pol] if isinstance(cs.refant, dict) else cs.refant)
        im = ax.imshow(np.where(cs.flag_grids[refant], np.nan, cs.chisq_grids[pol]), vmin=1, vmax=5, 
                       aspect='auto', cmap='turbo', interpolation='none', extent=extent)
        ax.set_yticklabels(ax.get_yticks() % 24)
        ax.set_title(f'{pol[1:]}-Polarized $\\chi^2$ / DoF')
        ax.set_xlabel('Frequency (MHz)')

    axes[0].set_ylabel('LST (hours)')
    plt.tight_layout()
    fig.colorbar(im, ax=axes, pad=.07, label='$\\chi^2$ / DoF', orientation='horizontal', extend='both', aspect=50)

Figure 5: Full-Day $\chi^2$ / DoF Waterfall from Redundant-Baseline Calibration¶

Here we plot $\chi^2$ per degree of freedom from redundant-baseline calibration for both polarizations separately. While this plot is a little out of place, as it was not produced by this notebook, it is a convenient place where all the necessary components are readily available. If the array were perfectly redundant and any non-redundancies in the calibrated visibilities were explicable by thermal noise alone, this waterfall should be all 1.

In [26]:
chisq_plot()
set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
No description has been provided for this image
In [27]:
avg_cspa_vs_time = {ant: np.nanmean(np.where(cs.flag_grids[ant], np.nan, cs.cspa_grids[ant]), axis=1) for ant in cs.ants}
avg_cspa_vs_freq = {ant: np.nanmean(np.where(cs.flag_grids[ant], np.nan, cs.cspa_grids[ant]), axis=0) for ant in cs.ants}
avg_cspa = {ant: np.nanmean(np.where(cs.flag_grids[ant], np.nan, cs.cspa_grids[ant])) for ant in cs.ants}
Mean of empty slice
Mean of empty slice
Mean of empty slice
In [28]:
def cspa_vs_time_plot():
    fig, axes = plt.subplots(2, 1, figsize=(14, 6), sharex=True, sharey=True, gridspec_kw={'hspace': 0})
    for ax, pol in zip(axes, ['Jee', 'Jnn']):
        detail_cutoff = np.percentile([np.nanmean(m) for ant, m in avg_cspa_vs_time.items() 
                                       if ant[1] == pol and np.isfinite(np.nanmean(m))], 95)
        for ant in avg_cspa_vs_time:
            if ant[1] == pol and not np.all(cs.flag_grids[ant]):
                if np.nanmean(avg_cspa_vs_time[ant]) > detail_cutoff:
                    ax.plot(lst_grid, avg_cspa_vs_time[ant], label=str((int(ant[0]), ant[1])), zorder=100)
                else:
                    ax.plot(lst_grid, avg_cspa_vs_time[ant], c='grey', alpha=.2, lw=.5)
        ax.legend(title=f'{pol[1:]}-Polarized', ncol=2)
        ax.set_ylabel('Mean Unflagged $\\chi^2$ per Antenna')
        ax.set_xlabel('LST (hours)')
        ax.set_xticklabels(ax.get_xticks() % 24)

    plt.ylim([1, 5.4])
    plt.tight_layout()
In [29]:
def cspa_vs_freq_plot():
    fig, axes = plt.subplots(2, 1, figsize=(14, 6), sharex=True, sharey=True, gridspec_kw={'hspace': 0})
    for ax, pol in zip(axes, ['Jee', 'Jnn']):
        detail_cutoff = np.percentile([np.nanmean(m) for ant, m in avg_cspa_vs_freq.items() 
                                       if ant[1] == pol and np.isfinite(np.nanmean(m))], 95)
        for ant in avg_cspa_vs_freq:
            if ant[1] == pol and not np.all(cs.flag_grids[ant]):
                if np.nanmean(avg_cspa_vs_freq[ant]) > detail_cutoff:
                    ax.plot(cs.freqs / 1e6, avg_cspa_vs_freq[ant], label=str((int(ant[0]), ant[1])), zorder=100)
                else:
                    ax.plot(cs.freqs / 1e6, avg_cspa_vs_freq[ant], c='grey', alpha=.2, lw=.5)
        ax.legend(title=f'{pol[1:]}-Polarized', ncol=2)
        ax.set_ylabel('Mean Unflagged $\\chi^2$ per Antenna')
        ax.set_xlabel('Frequency (MHz)')

    plt.ylim([1, 5.4])
    plt.tight_layout()
In [30]:
def avg_cspa_array_plot():
    hd = io.HERAData(SUM_FILE)
    
    fig, axes = plt.subplots(1, 2, figsize=(14, 8), sharex=True, sharey=True, gridspec_kw={'wspace': 0})
    for pol, ax in zip(['Jee', 'Jnn'], axes):

        ants_here = [ant for ant in avg_cspa if np.isfinite(avg_cspa[ant]) and ant[1] == pol if ant[0] in hd.antpos]
        avg_chisqs = [avg_cspa[ant] for ant in ants_here]
        xs = [hd.antpos[ant[0]][0] for ant in ants_here]
        ys = [hd.antpos[ant[0]][1] for ant in ants_here]
        names = [ant[0] for ant in ants_here]
        
        im = ax.scatter(x=xs, y=ys, c=avg_chisqs, s=200, vmin=1, vmax=3, cmap='turbo')
        ax.set_aspect('equal')
        for x,y,n in zip(xs, ys, names):
            ax.text(x, y, str(n), va='center', ha='center', fontsize=8)
        ax.set_title(pol)
        ax.set_xlabel('East-West Antenna Position (m)')
    
    axes[0].set_ylabel('North-South Antenna Position (m)')

    plt.tight_layout()
    plt.colorbar(im, ax=axes, location='top', aspect=60, pad=.04, label='Mean Unflagged $\\chi^2$ per Antenna', extend='both')

Figure 6: Average $\chi^2$ per Antenna¶

Here we plot $\chi^2$ per antenna from redundant-baseline calibration, separating polarizations and averaging the unflagged pixels in the waterfalls over frequency or time. The worst 5% of antennas are shown in color and highlighted in the legends, the rest are shown in grey. We also show time- and frequency-averaged $\chi^2$ for each antennas as a scatter plot with array position.

In [31]:
cspa_vs_freq_plot()
cspa_vs_time_plot()
avg_cspa_array_plot()
Mean of empty slice
set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
Mean of empty slice
set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image

Examine relative differences before and after smoothing¶

In [32]:
def time_avg_diff_plot():
    fig, axes = plt.subplots(2, 1, figsize=(14, 6), sharex=True, sharey=True, gridspec_kw={'hspace': 0})
    for ax, pol in zip(axes, ['Jee', 'Jnn']):
        detail_cutoff = np.percentile([np.nanmean(diff) for ant, diff in meta['time_avg_rel_diff'].items() 
                                       if ant[1] == pol and np.isfinite(np.nanmean(diff))], 95)    
        for ant, rel_diff in meta['time_avg_rel_diff'].items():
            if ant[0] >= 0 and ant[1] == pol and np.any(np.isfinite(rel_diff)):
                if np.nanmean(rel_diff) > detail_cutoff:
                    if np.all(cs.flag_grids[ant]):
                        ax.plot(cs.freqs / 1e6, rel_diff, label=str((int(ant[0]), ant[1])), zorder=99, ls='--', c='r', lw=.5)    
                    else:
                        ax.plot(cs.freqs / 1e6, rel_diff, label=str((int(ant[0]), ant[1])), zorder=100)
                else:
                    ax.plot(cs.freqs / 1e6, rel_diff, c='grey', alpha=.2, lw=.5)
        med_rel_diff = np.nanmedian([diff for ant, diff in meta['time_avg_rel_diff'].items() if ant[1] == pol], axis=0)
        ax.plot(cs.freqs / 1e6, med_rel_diff, 'k--', label='Median')
        ax.set_ylim([0, 1.05])
        ax.legend(title=f'{pol[1:]}-Polarized', ncol=2)
        ax.set_ylabel('Time-Averaged Relative Difference\nBefore and After Smoothing')
        ax.set_xlabel('Frequency (MHz)')
    plt.tight_layout()
In [33]:
def freq_avg_diff_plot():
    fig, axes = plt.subplots(2, 1, figsize=(14, 6), sharex=True, sharey=True, gridspec_kw={'hspace': 0})
    for ax, pol in zip(axes, ['Jee', 'Jnn']):
        detail_cutoff = np.percentile([np.nanmean(m) for ant, m in meta['freq_avg_rel_diff'].items() 
                                       if ant[1] == pol and np.isfinite(np.nanmean(m))], 95)    
        for ant, rel_diff in meta['freq_avg_rel_diff'].items():
            if ant[0] >= 0 and ant[1] == pol and np.any(np.isfinite(rel_diff)):
                if np.nanmean(rel_diff) > detail_cutoff:
                    if np.all(cs.flag_grids[ant]):
                        ax.plot(lst_grid, rel_diff, label=str((int(ant[0]), ant[1])), zorder=99, ls='--', c='r', lw=.5)    
                    else:
                        ax.plot(lst_grid, rel_diff, label=str((int(ant[0]), ant[1])), zorder=100)
                else:
                    ax.plot(lst_grid, rel_diff, c='grey', alpha=.2, lw=.5)
        
        med_rel_diff = np.nanmedian([diff for ant, diff in meta['freq_avg_rel_diff'].items() if ant[1] == pol], axis=0)
        ax.plot(lst_grid, med_rel_diff, 'k--', label='Median', zorder=101)
        ax.set_ylim([0, 1.05])
        ax.legend(title=f'{pol[1:]}-Polarized', ncol=2)
        ax.set_ylabel('Frequency-Averaged Relative Difference\nBefore and After Smoothing')
        ax.set_xlabel('LST (hours)')
        ax.set_xticklabels(ax.get_xticks() % 24)
    plt.tight_layout()
In [34]:
def avg_difference_array_plot():
    hd = io.HERAData(SUM_FILE)
    
    fig, axes = plt.subplots(1, 2, figsize=(14, 8), sharex=True, sharey=True, gridspec_kw={'wspace': 0})
    for pol, ax in zip(['Jee', 'Jnn'], axes):
    
        avg_diffs = [np.nanmean(meta['time_avg_rel_diff'][ant]) for ant in meta['time_avg_rel_diff'] if ant[1] == pol if ant[0] in hd.antpos]
        xs = [hd.antpos[ant[0]][0] for ant in meta['time_avg_rel_diff'] if ant[1] == pol if ant[0] in hd.antpos]
        ys = [hd.antpos[ant[0]][1] for ant in meta['time_avg_rel_diff'] if ant[1] == pol if ant[0] in hd.antpos]
        names = [ant[0] for ant in meta['time_avg_rel_diff'] if ant[1] == pol if ant[0] in hd.antpos]
        
        im = ax.scatter(x=xs, y=ys, c=avg_diffs, s=200, vmin=0, vmax=.25, cmap='turbo')
        ax.set_aspect('equal')
        for x,y,n in zip(xs, ys, names):
            color = ('w' if np.all(cs.flag_grids[n, pol]) else 'k')
            ax.text(x, y, str(n), va='center', ha='center', fontsize=8, c=color)
        ax.set_title(pol)
        ax.set_xlabel('East-West Antenna Position (m)')
    
    axes[0].set_ylabel('North-South Antenna Position (m)')

    plt.tight_layout()
    plt.colorbar(im, ax=axes, location='top', aspect=60, pad=.04, label='Average Relative Difference Before and After Smoothing', extend='max')

Figure 7: Relative Difference Before and After Smoothing¶

Similar to the above plots, here we show the relative difference before and after smoothing, compared to the magnitude of the smoothed calibration solution. Totally flagged antennas (because they are above the SC_RELATIVE_DIFF_CUTOFF) are red in the first two plots, and their numbers are white in the last plot.

In [35]:
time_avg_diff_plot()
freq_avg_diff_plot()
avg_difference_array_plot()
All-NaN slice encountered
All-NaN slice encountered
set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
All-NaN slice encountered
set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image

Save Results¶

In [36]:
add_to_history = 'Produced by calibration_smoothing notebook with the following environment:\n' + '=' * 65 + '\n' + os.popen('conda env export').read() + '=' * 65
In [37]:
cs.write_smoothed_cal(output_replace=(CAL_SUFFIX, SMOOTH_CAL_SUFFIX), add_to_history=add_to_history, clobber=True)
Mean of empty slice

Metadata¶

In [38]:
for repo in ['hera_cal', 'hera_qm', 'hera_filters', 'hera_notebook_templates', 'pyuvdata']:
    exec(f'from {repo} import __version__')
    print(f'{repo}: {__version__}')
hera_cal: 3.7.7.dev38+g49184245e
hera_qm: 2.2.1.dev4+gf6d0211
hera_filters: 0.1.7
hera_notebook_templates: 0.1.dev1186+g5d4205b
pyuvdata: 3.2.3.dev10+g11d3f658
In [39]:
print(f'Finished execution in {(time.time() - tstart) / 60:.2f} minutes.')
Finished execution in 179.84 minutes.