Calibration Smoothing¶

by Josh Dillon, last updated February 26, 2025

This notebook runs calibration smoothing to the gains coming out of file_calibration notebook. It removes any flags founds on by that notebook and replaces them with flags generated from full_day_rfi and full_day_antenna_flagging. It also plots the results for a couple of antennas.

Here's a set of links to skip to particular figures and tables:

• Figure 1: Identifying and Blacklisting abscal Failures¶

• Figure 2: Full-Day Gain Amplitudes Before and After smooth_cal¶

• Figure 3: Full-Day Gain Phases Before and After smooth_cal¶

• Figure 4: Full-Day $\chi^2$ / DoF Waterfall from Redundant-Baseline Calibration¶

• Figure 5: Average $\chi^2$ per Antenna vs. Time and Frequency¶

In [1]:
import time
tstart = time.time()
In [2]:
import os
os.environ['HDF5_USE_FILE_LOCKING'] = 'FALSE'
import h5py
import hdf5plugin  # REQUIRED to have the compression plugins available
import numpy as np
import glob
import copy
import warnings
import matplotlib
import matplotlib.pyplot as plt
from hera_cal import io, utils, smooth_cal
from hera_qm.time_series_metrics import true_stretches
%matplotlib inline
from IPython.display import display, HTML

Parse inputs¶

In [3]:
# get files
SUM_FILE = os.environ.get("SUM_FILE", None)
# SUM_FILE = '/lustre/aoc/projects/hera/h6c-analysis/IDR2/2459867/zen.2459867.43004.sum.uvh5'
SUM_SUFFIX = os.environ.get("SUM_SUFFIX", 'sum.uvh5')
CAL_SUFFIX = os.environ.get("CAL_SUFFIX", 'sum.omni.calfits')
SMOOTH_CAL_SUFFIX = os.environ.get("SMOOTH_CAL_SUFFIX", 'sum.smooth.calfits')
ANT_FLAG_SUFFIX = os.environ.get("ANT_FLAG_SUFFIX", 'sum.antenna_flags.h5')
RFI_FLAG_SUFFIX = os.environ.get("RFI_FLAG_SUFFIX", 'sum.flag_waterfall.h5')
FREQ_SMOOTHING_SCALE = float(os.environ.get("FREQ_SMOOTHING_SCALE", 30.0)) # MHz
TIME_SMOOTHING_SCALE = float(os.environ.get("TIME_SMOOTHING_SCALE", 1e4)) # seconds
EIGENVAL_CUTOFF = float(os.environ.get("EIGENVAL_CUTOFF", 1e-12))
PER_POL_REFANT = os.environ.get("PER_POL_REFANT", "False").upper() == "TRUE"
BLACKLIST_TIMESCALE_FACTOR = float(os.environ.get("BLACKLIST_TIMESCALE_FACTOR", 10))
BLACKLIST_RELATIVE_ERROR_THRESH = float(os.environ.get("BLACKLIST_RELATIVE_ERROR_THRESH", .75))

for setting in ['SUM_FILE', 'SUM_SUFFIX', 'CAL_SUFFIX', 'SMOOTH_CAL_SUFFIX', 'ANT_FLAG_SUFFIX',
                'RFI_FLAG_SUFFIX', 'FREQ_SMOOTHING_SCALE', 'TIME_SMOOTHING_SCALE', 'EIGENVAL_CUTOFF', 
                'PER_POL_REFANT', 'BLACKLIST_TIMESCALE_FACTOR', 'BLACKLIST_RELATIVE_ERROR_THRESH']:
    if issubclass(type(eval(setting)), str):
        print(f'{setting} = "{eval(setting)}"')
    else:
        print(f'{setting} = {eval(setting)}')
SUM_FILE = "/lustre/aoc/projects/hera/h6c-analysis/IDR3/2459876/zen.2459876.25284.sum.uvh5"
SUM_SUFFIX = "sum.uvh5"
CAL_SUFFIX = "sum.omni.calfits"
SMOOTH_CAL_SUFFIX = "sum.smooth.calfits"
ANT_FLAG_SUFFIX = "sum.antenna_flags.h5"
RFI_FLAG_SUFFIX = "sum.flag_waterfall.h5"
FREQ_SMOOTHING_SCALE = 30.0
TIME_SMOOTHING_SCALE = 10000.0
EIGENVAL_CUTOFF = 1e-12
PER_POL_REFANT = False
BLACKLIST_TIMESCALE_FACTOR = 10.0
BLACKLIST_RELATIVE_ERROR_THRESH = 0.75

Load files and select reference antenna(s)¶

In [4]:
sum_glob = '.'.join(SUM_FILE.split('.')[:-3]) + '.*.' + SUM_SUFFIX
cal_files_glob = sum_glob.replace(SUM_SUFFIX, CAL_SUFFIX)
cal_files = sorted(glob.glob(cal_files_glob))
print(f'Found {len(cal_files)} *.{CAL_SUFFIX} files starting with {cal_files[0]}.')
Found 1862 *.sum.omni.calfits files starting with /lustre/aoc/projects/hera/h6c-analysis/IDR3/2459876/zen.2459876.25284.sum.omni.calfits.
In [5]:
rfi_flag_files_glob = sum_glob.replace(SUM_SUFFIX, RFI_FLAG_SUFFIX)
rfi_flag_files = sorted(glob.glob(rfi_flag_files_glob))
print(f'Found {len(rfi_flag_files)} *.{RFI_FLAG_SUFFIX} files starting with {rfi_flag_files[0]}.')
Found 1862 *.sum.flag_waterfall.h5 files starting with /lustre/aoc/projects/hera/h6c-analysis/IDR3/2459876/zen.2459876.25284.sum.flag_waterfall.h5.
In [6]:
ant_flag_files_glob = sum_glob.replace(SUM_SUFFIX, ANT_FLAG_SUFFIX)
ant_flag_files = sorted(glob.glob(ant_flag_files_glob))
print(f'Found {len(ant_flag_files)} *.{ANT_FLAG_SUFFIX} files starting with {ant_flag_files[0]}.')
Found 1862 *.sum.antenna_flags.h5 files starting with /lustre/aoc/projects/hera/h6c-analysis/IDR3/2459876/zen.2459876.25284.sum.antenna_flags.h5.
In [7]:
cs = smooth_cal.CalibrationSmoother(cal_files, flag_file_list=(ant_flag_files + rfi_flag_files),
                                    ignore_calflags=True, pick_refant=False, load_chisq=True, load_cspa=True)
In [8]:
cs.refant = smooth_cal.pick_reference_antenna(cs.gain_grids, cs.flag_grids, cs.freqs, per_pol=True)
for pol in cs.refant:
    print(f'Reference antenna {cs.refant[pol][0]} selected for smoothing {pol} gains.')

if not PER_POL_REFANT:
    # in this case, rephase both pols separately before smoothing, but also smooth the relative polarization calibration phasor
    overall_refant = smooth_cal.pick_reference_antenna({ant: cs.gain_grids[ant] for ant in cs.refant.values()}, 
                                                       {ant: cs.flag_grids[ant] for ant in cs.refant.values()}, 
                                                       cs.freqs, per_pol=False)
    print(f'Overall reference antenna {overall_refant} selected.')
    other_refant = [ant for ant in cs.refant.values() if ant != overall_refant][0]

    relative_pol_phasor = cs.gain_grids[overall_refant] * cs.gain_grids[other_refant].conj() # TODO: is this conjugation right?
    relative_pol_phasor /= np.abs(relative_pol_phasor)
Reference antenna 128 selected for smoothing Jee gains.
Reference antenna 221 selected for smoothing Jnn gains.
Overall reference antenna (np.int64(221), 'Jnn') selected.
In [9]:
# duplicate a small number of abscal gains for plotting
antnums = set([ant[0] for ant in cs.ants])
flags_per_antnum = [np.sum(cs.flag_grids[ant, 'Jnn']) + np.sum(cs.flag_grids[ant, 'Jee']) for ant in antnums]
refant_nums = [ant[0] for ant in cs.refant.values()]
candidate_ants = [ant for ant, nflags in zip(antnums, flags_per_antnum) if (ant not in refant_nums) and (nflags <= np.percentile(flags_per_antnum, 25))
                  and not np.all(cs.flag_grids[ant, 'Jee']) and not np.all(cs.flag_grids[ant, 'Jnn'])]
ants_to_plot = [func(candidate_ants) for func in (np.min, np.max)]
abscal_gains = {}
for pol in ['Jee', 'Jnn']:
    for antnum in ants_to_plot:
        refant_here = (cs.refant[pol] if PER_POL_REFANT else overall_refant)
        abscal_gains[antnum, pol] = cs.gain_grids[(antnum, pol)] * np.abs(cs.gain_grids[refant_here]) / cs.gain_grids[refant_here]
In [10]:
cs.rephase_to_refant(propagate_refant_flags=True)
In [11]:
lst_grid = utils.JD2LST(cs.time_grid) * 12 / np.pi
lst_grid[lst_grid > lst_grid[-1]] -= 24

Find consistent outliers in relative error after a coarse smoothing¶

These are typically a sign of failures of abscal.

In [12]:
relative_error_samples = {pol: np.zeros_like(cs.gain_grids[cs.refant[pol]], dtype=float) for pol in ['Jee', 'Jnn']}
sum_relative_error = {pol: np.zeros_like(cs.gain_grids[cs.refant[pol]], dtype=float) for pol in ['Jee', 'Jnn']}

# perform a 2D DPSS filter with a BLACKLIST_TIMESCALE_FACTOR longer timescale, averaging the results per-pol
for ant in cs.gain_grids:
    if np.all(cs.flag_grids[ant]):
        continue
    filtered, _ = smooth_cal.time_freq_2D_filter(gains=cs.gain_grids[ant], 
                                                 wgts=(~cs.flag_grids[ant]).astype(float),
                                                 freqs=cs.freqs,
                                                 times=cs.time_grid,
                                                 freq_scale=FREQ_SMOOTHING_SCALE,
                                                 time_scale=TIME_SMOOTHING_SCALE * BLACKLIST_TIMESCALE_FACTOR,
                                                 eigenval_cutoff=EIGENVAL_CUTOFF,
                                                 method='DPSS', 
                                                 fit_method='lu_solve', 
                                                 fix_phase_flips=True, 
                                                 flag_phase_flip_ints=True,
                                                ) 
    relative_error = np.where(cs.flag_grids[ant], 0, np.abs(cs.gain_grids[ant] - filtered) / np.abs(filtered))
    relative_error_samples[ant[1]] += (~cs.flag_grids[ant]).astype(float)
    sum_relative_error[ant[1]] += relative_error

# figure out per-antpol cuts for where to set weights to 0 for the main smooth_cal (but not necessarily flags)
for pol in ['Jee', 'Jnn']:
    avg_rel_error = sum_relative_error[pol] / relative_error_samples[pol]
    to_blacklist = np.where(relative_error_samples[pol] > 0, avg_rel_error > BLACKLIST_RELATIVE_ERROR_THRESH, False)
    for ant in cs.ants:
        if ant[1] == pol:
            cs.waterfall_blacklist[ant] = to_blacklist
invalid value encountered in divide
In [13]:
def plot_relative_error():
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")

        fig, axes = plt.subplots(1, 3, figsize=(14, 7))
        extent = [cs.freqs[0] / 1e6, cs.freqs[-1] / 1e6, lst_grid[-1], lst_grid[0]]
        cmap = plt.get_cmap('Greys', 256)
        cmap.set_over('red')
        for ax, pol in zip(axes[0:2], ['Jee', 'Jnn']):
            to_plot = sum_relative_error[pol] / relative_error_samples[pol]
            im = ax.imshow(np.where(np.isfinite(to_plot), to_plot, np.nan), aspect='auto', interpolation='none', 
                           vmin=0, vmax=BLACKLIST_RELATIVE_ERROR_THRESH, extent=extent, cmap=cmap)
            ax.set_title(pol)
            ax.set_yticklabels(ax.get_yticks() % 24)
            ax.set_ylabel('LST (hours)')
            ax.set_xlabel('Frequency (MHz)')
        plt.colorbar(im, ax=axes[0:2], location='top', extend='max', label='Average Relative Error on Initial Smoothing')
        
        for pol in ['Jee', 'Jnn']:
            axes[2].hist((sum_relative_error[pol] / relative_error_samples[pol]).ravel(), bins=np.arange(0,2,.01), alpha=.5, label=pol)
        axes[2].set_yscale('log')
        axes[2].set_ylabel('Number of Waterfall Pixels')
        axes[2].set_xlabel('Relative Error')
        axes[2].axvline(BLACKLIST_RELATIVE_ERROR_THRESH, ls='--', c='r', label='Blacklist Threshold')
        axes[2].legend()

Figure 1: Identifying and Blacklisting abscal Failures¶

This plot highlights regions of the waterfall that are per-polarization blacklisted (i.e. given 0 weight in the main smooth_cal fit, but not necessarily flagged). This is usually a sign of problems with abscal and often occurs because

In [14]:
plot_relative_error()
No description has been provided for this image

Perform smoothing¶

In [15]:
if not PER_POL_REFANT:
    # treat the relative_pol_phasor as if it were antenna -1
    cs.gain_grids[(-1, other_refant[1])] = relative_pol_phasor
    cs.flag_grids[(-1, other_refant[1])] = cs.flag_grids[overall_refant] | cs.flag_grids[other_refant]
    cs.waterfall_blacklist[(-1, other_refant[1])] = cs.waterfall_blacklist[cs.ants[0][0], 'Jee'] | cs.waterfall_blacklist[cs.ants[0][0], 'Jnn'] 
In [16]:
cs.time_freq_2D_filter(freq_scale=FREQ_SMOOTHING_SCALE, time_scale=TIME_SMOOTHING_SCALE, eigenval_cutoff=EIGENVAL_CUTOFF, 
                       method='DPSS', fit_method='lu_solve', fix_phase_flips=True, flag_phase_flip_ints=True)
1 phase flips detected on antenna (np.int64(144), 'Jnn'). A total of 1322 integrations were phase-flipped relative to the 0th integration between 2459876.521498564 and 2459876.6692499253.
In [17]:
if not PER_POL_REFANT:
    # put back in the smoothed phasor, ensuring the amplitude is 1 and that data are flagged anywhere either polarization's refant is flagged
    smoothed_relative_pol_phasor = cs.gain_grids[(-1, other_refant[-1])] / np.abs(cs.gain_grids[(-1, other_refant[-1])])
    for ant in cs.gain_grids:
        if ant[0] >= 0 and ant[1] == other_refant[1]:
            cs.gain_grids[ant] /= smoothed_relative_pol_phasor
        cs.flag_grids[ant] |= (cs.flag_grids[(-1, other_refant[1])])
    cs.refant = overall_refant

Plot results¶

In [18]:
def amplitude_plot(ant_to_plot):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        # Pick vmax to not saturate 90% of the abscal gains
        vmax = np.max([np.percentile(np.abs(cs.gain_grids[ant_to_plot, pol][~cs.flag_grids[ant_to_plot, pol]]), 99) for pol in ['Jee', 'Jnn']])

        display(HTML(f'<h2>Antenna {ant_to_plot} Amplitude Waterfalls</h2>'))    

        # Plot abscal gain amplitude waterfalls for a single antenna
        fig, axes = plt.subplots(4, 2, figsize=(14,14), gridspec_kw={'height_ratios': [1, 1, .4, .4]})
        for ax, pol in zip(axes[0], ['Jee', 'Jnn']):
            ant = (ant_to_plot, pol)
            extent=[cs.freqs[0]/1e6, cs.freqs[-1]/1e6, lst_grid[-1], lst_grid[0]]
            im = ax.imshow(np.where(cs.flag_grids[ant], np.nan, np.abs(cs.gain_grids[ant])), aspect='auto', cmap='inferno', 
                           interpolation='nearest', vmin=0, vmax=vmax, extent=extent)
            ax.set_title(f'Smoothcal Gain Amplitude of Antenna {ant[0]}: {pol[-1]}-polarized' )
            ax.set_xlabel('Frequency (MHz)')
            ax.set_ylabel('LST (Hours)')
            ax.set_xlim([cs.freqs[0]/1e6, cs.freqs[-1]/1e6])
            ax.set_yticklabels(ax.get_yticks() % 24)
            plt.colorbar(im, ax=ax,  orientation='horizontal', pad=.15)

        # Now flagged plot abscal waterfall    
        for ax, pol in zip(axes[1], ['Jee', 'Jnn']):
            ant = (ant_to_plot, pol)
            extent=[cs.freqs[0]/1e6, cs.freqs[-1]/1e6, lst_grid[-1], lst_grid[0]]
            im = ax.imshow(np.where(cs.flag_grids[ant], np.nan, np.abs(abscal_gains[ant])), aspect='auto', cmap='inferno', 
                           interpolation='nearest', vmin=0, vmax=vmax, extent=extent)
            ax.set_title(f'Abscal Gain Amplitude of Antenna {ant[0]}: {pol[-1]}-polarized' )
            ax.set_xlabel('Frequency (MHz)')
            ax.set_ylabel('LST (Hours)')
            ax.set_xlim([cs.freqs[0]/1e6, cs.freqs[-1]/1e6])
            ax.set_yticklabels(ax.get_yticks() % 24)
            plt.colorbar(im, ax=ax,  orientation='horizontal', pad=.15)
            
        # Now plot mean gain spectra 
        for ax, pol in zip(axes[2], ['Jee', 'Jnn']):
            ant = (ant_to_plot, pol)   
            nflags_spectrum = np.sum(cs.flag_grids[ant], axis=0)
            to_plot = nflags_spectrum <= np.percentile(nflags_spectrum, 75)
            ax.plot(cs.freqs[to_plot] / 1e6, np.nanmean(np.where(cs.flag_grids[ant], np.nan, np.abs(abscal_gains[ant])), axis=0)[to_plot], 'r.', label='Abscal')        
            ax.plot(cs.freqs[to_plot] / 1e6, np.nanmean(np.where(cs.flag_grids[ant], np.nan, np.abs(cs.gain_grids[ant])), axis=0)[to_plot], 'k.', ms=2, label='Smoothed')        
            ax.set_ylim([0, vmax])
            ax.set_xlim([cs.freqs[0]/1e6, cs.freqs[-1]/1e6])    
            ax.set_xlabel('Frequency (MHz)')
            ax.set_ylabel('|g| (unitless)')
            ax.set_title(f'Mean Infrequently-Flagged Gain Amplitude of Antenna {ant[0]}: {pol[-1]}-polarized')
            ax.legend(loc='upper left')

        # Now plot mean gain time series
        for ax, pol in zip(axes[3], ['Jee', 'Jnn']):
            ant = (ant_to_plot, pol)
            nflags_series = np.sum(cs.flag_grids[ant], axis=1)
            to_plot = nflags_series <= np.percentile(nflags_series, 75)
            ax.plot(lst_grid[to_plot], np.nanmean(np.where(cs.flag_grids[ant], np.nan, np.abs(abscal_gains[ant])), axis=1)[to_plot], 'r.', label='Abscal')        
            ax.plot(lst_grid[to_plot], np.nanmean(np.where(cs.flag_grids[ant], np.nan, np.abs(cs.gain_grids[ant])), axis=1)[to_plot], 'k.', ms=2, label='Smoothed')        
            ax.set_ylim([0, vmax])
            ax.set_xlabel('LST (hours)')
            ax.set_ylabel('|g| (unitless)')
            ax.set_title(f'Mean Infrequently-Flagged Gain Amplitude of Antenna {ant[0]}: {pol[-1]}-polarized')
            ax.set_xticklabels(ax.get_xticks() % 24)
            ax.legend(loc='upper left')

        plt.tight_layout()
        plt.show()    
In [19]:
def phase_plot(ant_to_plot):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")    
        display(HTML(f'<h2>Antenna {ant_to_plot} Phase Waterfalls</h2>'))
        fig, axes = plt.subplots(4, 2, figsize=(14,14), gridspec_kw={'height_ratios': [1, 1, .4, .4]})
        
        # Plot phase waterfalls for a single antenna    
        for ax, pol in zip(axes[0], ['Jee', 'Jnn']):
            ant = (ant_to_plot, pol)
            extent=[cs.freqs[0]/1e6, cs.freqs[-1]/1e6, lst_grid[-1], lst_grid[0]]
            im = ax.imshow(np.where(cs.flag_grids[ant], np.nan, np.angle(cs.gain_grids[ant])), aspect='auto', cmap='inferno', 
                           interpolation='nearest', vmin=-np.pi, vmax=np.pi, extent=extent)

            refant = (cs.refant[pol] if isinstance(cs.refant, dict) else cs.refant)
            ax.set_title(f'Smoothcal Gain Phase of Ant {ant[0]}{pol[-1]} / Ant {refant[0]}{refant[1][-1]}')
            ax.set_xlabel('Frequency (MHz)')
            ax.set_ylabel('LST (Hours)')
            ax.set_xlim([cs.freqs[0]/1e6, cs.freqs[-1]/1e6])
            ax.set_yticklabels(ax.get_yticks() % 24)
            plt.colorbar(im, ax=ax,  orientation='horizontal', pad=.15)

        # Now plot abscal phase waterfall    
        for ax, pol in zip(axes[1], ['Jee', 'Jnn']):
            ant = (ant_to_plot, pol)
            extent=[cs.freqs[0]/1e6, cs.freqs[-1]/1e6, lst_grid[-1], lst_grid[0]]
            im = ax.imshow(np.where(cs.flag_grids[ant], np.nan, np.angle(abscal_gains[ant])), aspect='auto', cmap='inferno', 
                           interpolation='nearest', vmin=-np.pi, vmax=np.pi, extent=extent)
            refant = (cs.refant[pol] if isinstance(cs.refant, dict) else cs.refant)
            ax.set_title(f'Abscal Gain Phase of Ant {ant[0]}{pol[-1]} / Ant {refant[0]}{refant[1][-1]}')
            ax.set_xlabel('Frequency (MHz)')
            ax.set_ylabel('LST (Hours)')
            ax.set_xlim([cs.freqs[0]/1e6, cs.freqs[-1]/1e6])
            ax.set_yticklabels(ax.get_yticks() % 24)
            plt.colorbar(im, ax=ax,  orientation='horizontal', pad=.15)
            
        # Now plot median gain spectra 
        for ax, pol in zip(axes[2], ['Jee', 'Jnn']):
            ant = (ant_to_plot, pol)   
            nflags_spectrum = np.sum(cs.flag_grids[ant], axis=0)
            to_plot = nflags_spectrum <= np.percentile(nflags_spectrum, 75)
            ax.plot(cs.freqs[to_plot] / 1e6, np.nanmedian(np.where(cs.flag_grids[ant], np.nan, np.angle(abscal_gains[ant])), axis=0)[to_plot], 'r.', label='Abscal')        
            ax.plot(cs.freqs[to_plot] / 1e6, np.nanmedian(np.where(cs.flag_grids[ant], np.nan, np.angle(cs.gain_grids[ant])), axis=0)[to_plot], 'k.', ms=2, label='Smoothed')        
            ax.set_ylim([-np.pi, np.pi])
            ax.set_xlim([cs.freqs[0]/1e6, cs.freqs[-1]/1e6])    
            ax.set_xlabel('Frequency (MHz)')
            refant = (cs.refant[pol] if isinstance(cs.refant, dict) else cs.refant)
            ax.set_ylabel(f'Phase of g$_{{{ant[0]}{pol[-1]}}}$ / g$_{{{refant[0]}{refant[1][-1]}}}$')
            ax.set_title(f'Median Infrequently-Flagged Gain Phase of Ant {ant[0]}{pol[-1]} / Ant {refant[0]}{refant[1][-1]}')
            ax.legend(loc='upper left')

        # # Now plot median gain time series
        for ax, pol in zip(axes[3], ['Jee', 'Jnn']):
            ant = (ant_to_plot, pol)
            nflags_series = np.sum(cs.flag_grids[ant], axis=1)
            to_plot = nflags_series <= np.percentile(nflags_series, 75)
            ax.plot(lst_grid[to_plot], np.nanmean(np.where(cs.flag_grids[ant], np.nan, np.angle(abscal_gains[ant])), axis=1)[to_plot], 'r.', label='Abscal')        
            ax.plot(lst_grid[to_plot], np.nanmean(np.where(cs.flag_grids[ant], np.nan, np.angle(cs.gain_grids[ant])), axis=1)[to_plot], 'k.', ms=2, label='Smoothed')        
            ax.set_ylim([-np.pi, np.pi])    
            ax.set_xlabel('LST (hours)')
            refant = (cs.refant[pol] if isinstance(cs.refant, dict) else cs.refant)
            ax.set_ylabel(f'Phase of g$_{{{ant[0]}{pol[-1]}}}$ / g$_{{{refant[0]}{refant[1][-1]}}}$')
            ax.set_title(f'Mean Infrequently-Flagged Gain Phase of Ant {ant[0]}{pol[-1]} / Ant {refant[0]}{refant[1][-1]}')
            ax.set_xticklabels(ax.get_xticks() % 24)    
            ax.legend(loc='upper left')

        plt.tight_layout()
        plt.show()

Figure 2: Full-Day Gain Amplitudes Before and After smooth_cal¶

Here we plot abscal and smooth_cal gain amplitudes for both of the sample antennas. We also show means across time/frequency, excluding frequencies/times that are frequently flagged.

In [20]:
for ant_to_plot in ants_to_plot:
    amplitude_plot(ant_to_plot)

Antenna 3 Amplitude Waterfalls

No description has been provided for this image

Antenna 238 Amplitude Waterfalls

No description has been provided for this image

Figure 3: Full-Day Gain Phases Before and After smooth_cal¶

Here we plot abscal and smooth_cal phases relative to each polarization's reference antenna for both of the sample antennas. We also show medians across time/frequency, excluding frequencies/times that are frequently flagged.

In [21]:
for ant_to_plot in ants_to_plot:
    phase_plot(ant_to_plot)

Antenna 3 Phase Waterfalls

No description has been provided for this image

Antenna 238 Phase Waterfalls

No description has been provided for this image

Examine $\chi^2$¶

In [22]:
def chisq_plot():
    fig, axes = plt.subplots(1, 2, figsize=(14, 10), sharex=True, sharey=True)
    extent = [cs.freqs[0]/1e6, cs.freqs[-1]/1e6, lst_grid[-1], lst_grid[0]]
    for ax, pol in zip(axes, ['Jee', 'Jnn']):
        refant = (cs.refant[pol] if isinstance(cs.refant, dict) else cs.refant)
        im = ax.imshow(np.where(cs.flag_grids[refant], np.nan, cs.chisq_grids[pol]), vmin=1, vmax=5, 
                       aspect='auto', cmap='turbo', interpolation='none', extent=extent)
        ax.set_yticklabels(ax.get_yticks() % 24)
        ax.set_title(f'{pol[1:]}-Polarized $\\chi^2$ / DoF')
        ax.set_xlabel('Frequency (MHz)')

    axes[0].set_ylabel('LST (hours)')
    plt.tight_layout()
    fig.colorbar(im, ax=axes, pad=.07, label='$\\chi^2$ / DoF', orientation='horizontal', extend='both', aspect=50)

Figure 4: Full-Day $\chi^2$ / DoF Waterfall from Redundant-Baseline Calibration¶

Here we plot $\chi^2$ per degree of freedom from redundant-baseline calibration for both polarizations separately. While this plot is a little out of place, as it was not produced by this notebook, it is a convenient place where all the necessary components are readily available. If the array were perfectly redundant and any non-redundancies in the calibrated visibilities were explicable by thermal noise alone, this waterfall should be all 1.

In [23]:
chisq_plot()
set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
No description has been provided for this image
In [24]:
avg_cspa_vs_time = {ant: np.nanmean(np.where(cs.flag_grids[ant], np.nan, cs.cspa_grids[ant]), axis=1) for ant in cs.ants}
avg_cspa_vs_freq = {ant: np.nanmean(np.where(cs.flag_grids[ant], np.nan, cs.cspa_grids[ant]), axis=0) for ant in cs.ants}
Mean of empty slice
Mean of empty slice
In [25]:
def cspa_vs_time_plot():
    fig, axes = plt.subplots(2, 1, figsize=(14, 8), sharex=True, sharey=True, gridspec_kw={'hspace': 0})
    for ax, pol in zip(axes, ['Jee', 'Jnn']):
        detail_cutoff = np.percentile([np.nanmean(m) for ant, m in avg_cspa_vs_time.items() 
                                       if ant[1] == pol and np.isfinite(np.nanmean(m))], 95)
        for ant in avg_cspa_vs_time:
            if ant[1] == pol and not np.all(cs.flag_grids[ant]):
                if np.nanmean(avg_cspa_vs_time[ant]) > detail_cutoff:
                    ax.plot(lst_grid, avg_cspa_vs_time[ant], label=ant, zorder=100)
                else:
                    ax.plot(lst_grid, avg_cspa_vs_time[ant], c='grey', alpha=.2, lw=.5)
        ax.legend(title=f'{pol[1:]}-Polarized', ncol=2)
        ax.set_ylabel('Mean Unflagged $\\chi^2$ per Antenna')
        ax.set_xlabel('LST (hours)')
        ax.set_xticklabels(ax.get_xticks() % 24)

    plt.ylim([1, 5.4])
    plt.tight_layout()
In [26]:
def cspa_vs_freq_plot():
    fig, axes = plt.subplots(2, 1, figsize=(14, 6), sharex=True, sharey=True, gridspec_kw={'hspace': 0})
    for ax, pol in zip(axes, ['Jee', 'Jnn']):
        detail_cutoff = np.percentile([np.nanmean(m) for ant, m in avg_cspa_vs_freq.items() 
                                       if ant[1] == pol and np.isfinite(np.nanmean(m))], 95)
        for ant in avg_cspa_vs_freq:
            if ant[1] == pol and not np.all(cs.flag_grids[ant]):
                if np.nanmean(avg_cspa_vs_freq[ant]) > detail_cutoff:
                    ax.plot(cs.freqs / 1e6, avg_cspa_vs_freq[ant], label=ant, zorder=100)
                else:
                    ax.plot(cs.freqs / 1e6, avg_cspa_vs_freq[ant], c='grey', alpha=.2, lw=.5)
        ax.legend(title=f'{pol[1:]}-Polarized', ncol=2)
        ax.set_ylabel('Mean Unflagged $\\chi^2$ per Antenna')
        ax.set_xlabel('Frequency (MHz)')

    plt.ylim([1, 5.4])
    plt.tight_layout()

Figure 5: Average $\chi^2$ per Antenna vs. Time and Frequency¶

Here we plot $\chi^2$ per antenna from redundant-baseline calibration, separating polarizations and averaging the unflagged pixels in the waterfalls over frequency or time. The worst 5% of antennas are shown in color and highlighted in the legends, the rest are shown in grey.

In [27]:
cspa_vs_time_plot()
cspa_vs_freq_plot()
Mean of empty slice
Passing label as a length 2 sequence when plotting a single dataset is deprecated in Matplotlib 3.9 and will error in 3.11.  To keep the current behavior, cast the sequence to string before passing.
set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
Mean of empty slice
Passing label as a length 2 sequence when plotting a single dataset is deprecated in Matplotlib 3.9 and will error in 3.11.  To keep the current behavior, cast the sequence to string before passing.
set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
Mean of empty slice
Passing label as a length 2 sequence when plotting a single dataset is deprecated in Matplotlib 3.9 and will error in 3.11.  To keep the current behavior, cast the sequence to string before passing.
No description has been provided for this image
No description has been provided for this image

Save Results¶

In [28]:
add_to_history = 'Produced by calibration_smoothing notebook with the following environment:\n' + '=' * 65 + '\n' + os.popen('conda env export').read() + '=' * 65
In [29]:
cs.write_smoothed_cal(output_replace=(CAL_SUFFIX, SMOOTH_CAL_SUFFIX), add_to_history=add_to_history, clobber=True)
Mean of empty slice

Metadata¶

In [30]:
for repo in ['hera_cal', 'hera_qm', 'hera_filters', 'hera_notebook_templates', 'pyuvdata']:
    exec(f'from {repo} import __version__')
    print(f'{repo}: {__version__}')
hera_cal: 3.7.5.dev25+g553b4f5a
hera_qm: 2.2.1.dev4+gf6d0211
hera_filters: 0.1.7.dev1+gcbb4f93
hera_notebook_templates: 0.1.dev1126+g777c416
pyuvdata: 3.2.1
In [31]:
print(f'Finished execution in {(time.time() - tstart) / 60:.2f} minutes.')
Finished execution in 92.11 minutes.