Calibration Smoothing¶

by Josh Dillon, last updated March 29, 2023

This notebook runs calibration smoothing to the gains coming out of file_calibration notebook. It removes any flags founds on by that notebook and replaces them with flags generated from full_day_rfi and full_day_antenna_flagging. It also plots the results for a couple of antennas.

Here's a set of links to skip to particular figures and tables:

• Figure 1: Full-Day Gain Amplitudes Before and After smooth_cal¶

• Figure 2: Full-Day Gain Phases Before and After smooth_cal¶

• Figure 3: Full-Day $\chi^2$ / DoF Waterfall from Redundant-Baseline Calibration¶

• Figure 4: Average $\chi^2$ per Antenna vs. Time and Frequency¶

In [1]:
import time
tstart = time.time()
In [2]:
import os
os.environ['HDF5_USE_FILE_LOCKING'] = 'FALSE'
import h5py
import hdf5plugin  # REQUIRED to have the compression plugins available
import numpy as np
import glob
import copy
import warnings
import matplotlib
import matplotlib.pyplot as plt
from hera_cal import io, utils, smooth_cal
from hera_qm.time_series_metrics import true_stretches
%matplotlib inline
from IPython.display import display, HTML

Parse inputs¶

In [3]:
# get files
SUM_FILE = os.environ.get("SUM_FILE", None)
# SUM_FILE = '/users/jsdillon/lustre/H6C/abscal/2459853/zen.2459853.25518.sum.uvh5'
SUM_SUFFIX = os.environ.get("SUM_SUFFIX", 'sum.uvh5')
CAL_SUFFIX = os.environ.get("CAL_SUFFIX", 'sum.omni.calfits')
SMOOTH_CAL_SUFFIX = os.environ.get("SMOOTH_CAL_SUFFIX", 'sum.smooth.calfits')
ANT_FLAG_SUFFIX = os.environ.get("ANT_FLAG_SUFFIX", 'sum.antenna_flags.h5')
RFI_FLAG_SUFFIX = os.environ.get("RFI_FLAG_SUFFIX", 'sum.flag_waterfall.h5')
FREQ_SMOOTHING_SCALE = float(os.environ.get("FREQ_SMOOTHING_SCALE", 10.0)) # MHz
TIME_SMOOTHING_SCALE = float(os.environ.get("TIME_SMOOTHING_SCALE", 6e5)) # seconds
EIGENVAL_CUTOFF = float(os.environ.get("EIGENVAL_CUTOFF", 1e-12))

for setting in ['SUM_FILE', 'SUM_SUFFIX', 'CAL_SUFFIX', 'SMOOTH_CAL_SUFFIX', 'ANT_FLAG_SUFFIX',
                'RFI_FLAG_SUFFIX', 'FREQ_SMOOTHING_SCALE', 'TIME_SMOOTHING_SCALE', 'EIGENVAL_CUTOFF']:
        print(f'{setting} = {eval(setting)}')
SUM_FILE = /lustre/aoc/projects/hera/h6c-analysis/IDR2/2459868/zen.2459868.25282.sum.uvh5
SUM_SUFFIX = sum.uvh5
CAL_SUFFIX = sum.omni.calfits
SMOOTH_CAL_SUFFIX = sum.smooth.calfits
ANT_FLAG_SUFFIX = sum.antenna_flags.h5
RFI_FLAG_SUFFIX = sum.flag_waterfall.h5
FREQ_SMOOTHING_SCALE = 10.0
TIME_SMOOTHING_SCALE = 600000.0
EIGENVAL_CUTOFF = 1e-12

Load files¶

In [4]:
sum_glob = '.'.join(SUM_FILE.split('.')[:-3]) + '.*.' + SUM_SUFFIX
cal_files_glob = sum_glob.replace(SUM_SUFFIX, CAL_SUFFIX)
cal_files = sorted(glob.glob(cal_files_glob))
print(f'Found {len(cal_files)} *.{CAL_SUFFIX} files starting with {cal_files[0]}.')
Found 1862 *.sum.omni.calfits files starting with /lustre/aoc/projects/hera/h6c-analysis/IDR2/2459868/zen.2459868.25282.sum.omni.calfits.
In [5]:
rfi_flag_files_glob = sum_glob.replace(SUM_SUFFIX, RFI_FLAG_SUFFIX)
rfi_flag_files = sorted(glob.glob(rfi_flag_files_glob))
print(f'Found {len(rfi_flag_files)} *.{RFI_FLAG_SUFFIX} files starting with {rfi_flag_files[0]}.')
Found 1862 *.sum.flag_waterfall.h5 files starting with /lustre/aoc/projects/hera/h6c-analysis/IDR2/2459868/zen.2459868.25282.sum.flag_waterfall.h5.
In [6]:
ant_flag_files_glob = sum_glob.replace(SUM_SUFFIX, ANT_FLAG_SUFFIX)
ant_flag_files = sorted(glob.glob(ant_flag_files_glob))
print(f'Found {len(ant_flag_files)} *.{ANT_FLAG_SUFFIX} files starting with {ant_flag_files[0]}.')
Found 1862 *.sum.antenna_flags.h5 files starting with /lustre/aoc/projects/hera/h6c-analysis/IDR2/2459868/zen.2459868.25282.sum.antenna_flags.h5.
In [7]:
cs = smooth_cal.CalibrationSmoother(cal_files, flag_file_list=(ant_flag_files + rfi_flag_files), ignore_calflags=True,
                                    pick_refant=True, propagate_refant_flags=True, load_chisq=True, load_cspa=True)
for pol in cs.refant:
    print(f'Reference antenna {cs.refant[pol][0]} selected for {pol}.')
Mean of empty slice
Reference antenna 19 selected for Jee.
Reference antenna 89 selected for Jnn.
In [8]:
# duplicate a small number of abscal gains for plotting
antnums = set([ant[0] for ant in cs.ants])
flags_per_antnum = [np.sum(cs.flag_grids[ant, 'Jnn']) + np.sum(cs.flag_grids[ant, 'Jee']) for ant in antnums]
refant_nums = [ant[0] for ant in cs.refant.values()]
candidate_ants = [ant for ant, nflags in zip(antnums, flags_per_antnum) if (ant not in refant_nums) and (nflags <= np.percentile(flags_per_antnum, 25))
                  and not np.all(cs.flag_grids[ant, 'Jee']) and not np.all(cs.flag_grids[ant, 'Jnn'])]
ants_to_plot = [func(candidate_ants) for func in (np.min, np.max)]
abscal_gains = {(ant, pol): np.array(cs.gain_grids[(ant, pol)]) for ant in ants_to_plot for pol in ['Jee', 'Jnn']}

Perform smoothing¶

In [9]:
cs.time_freq_2D_filter(freq_scale=FREQ_SMOOTHING_SCALE, time_scale=TIME_SMOOTHING_SCALE, eigenval_cutoff=EIGENVAL_CUTOFF, 
                       method='DPSS', fit_method='lu_solve', fix_phase_flips=True, flag_phase_flip_ints=True)
No GPU/TPU found, falling back to CPU. (Set TF_CPP_MIN_LOG_LEVEL=0 and rerun for more info.)

Plot results¶

In [10]:
lst_grid = utils.JD2LST(cs.time_grid) * 12 / np.pi
lst_grid[lst_grid > lst_grid[-1]] -= 24
In [11]:
def amplitude_plot(ant_to_plot):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        # Pick vmax to not saturate 90% of the abscal gains
        vmax = np.max([np.percentile(np.abs(cs.gain_grids[ant_to_plot, pol][~cs.flag_grids[ant_to_plot, pol]]), 99) for pol in ['Jee', 'Jnn']])

        display(HTML(f'<h2>Antenna {ant_to_plot} Amplitude Waterfalls</h2>'))    

        # Plot abscal gain amplitude waterfalls for a single antenna
        fig, axes = plt.subplots(4, 2, figsize=(14,14), gridspec_kw={'height_ratios': [1, 1, .4, .4]})
        for ax, pol in zip(axes[0], ['Jee', 'Jnn']):
            ant = (ant_to_plot, pol)
            extent=[cs.freqs[0]/1e6, cs.freqs[-1]/1e6, lst_grid[-1], lst_grid[0]]
            im = ax.imshow(np.where(cs.flag_grids[ant], np.nan, np.abs(cs.gain_grids[ant])), aspect='auto', cmap='inferno', 
                           interpolation='nearest', vmin=0, vmax=vmax, extent=extent)
            ax.set_title(f'Smoothcal Gain Amplitude of Antenna {ant[0]}: {pol[-1]}-polarized' )
            ax.set_xlabel('Frequency (MHz)')
            ax.set_ylabel('LST (Hours)')
            ax.set_xlim([cs.freqs[0]/1e6, cs.freqs[-1]/1e6])
            ax.set_yticklabels(ax.get_yticks() % 24)
            plt.colorbar(im, ax=ax,  orientation='horizontal', pad=.15)

        # Now flagged plot abscal waterfall    
        for ax, pol in zip(axes[1], ['Jee', 'Jnn']):
            ant = (ant_to_plot, pol)
            extent=[cs.freqs[0]/1e6, cs.freqs[-1]/1e6, lst_grid[-1], lst_grid[0]]
            im = ax.imshow(np.where(cs.flag_grids[ant], np.nan, np.abs(abscal_gains[ant])), aspect='auto', cmap='inferno', 
                           interpolation='nearest', vmin=0, vmax=vmax, extent=extent)
            ax.set_title(f'Abscal Gain Amplitude of Antenna {ant[0]}: {pol[-1]}-polarized' )
            ax.set_xlabel('Frequency (MHz)')
            ax.set_ylabel('LST (Hours)')
            ax.set_xlim([cs.freqs[0]/1e6, cs.freqs[-1]/1e6])
            ax.set_yticklabels(ax.get_yticks() % 24)
            plt.colorbar(im, ax=ax,  orientation='horizontal', pad=.15)
            
        # Now plot mean gain spectra 
        for ax, pol in zip(axes[2], ['Jee', 'Jnn']):
            ant = (ant_to_plot, pol)   
            nflags_spectrum = np.sum(cs.flag_grids[ant], axis=0)
            to_plot = nflags_spectrum <= np.percentile(nflags_spectrum, 75)
            ax.plot(cs.freqs[to_plot] / 1e6, np.nanmean(np.where(cs.flag_grids[ant], np.nan, np.abs(abscal_gains[ant])), axis=0)[to_plot], 'r.', label='Abscal')        
            ax.plot(cs.freqs[to_plot] / 1e6, np.nanmean(np.where(cs.flag_grids[ant], np.nan, np.abs(cs.gain_grids[ant])), axis=0)[to_plot], 'k.', ms=2, label='Smoothed')        
            ax.set_ylim([0, vmax])
            ax.set_xlim([cs.freqs[0]/1e6, cs.freqs[-1]/1e6])    
            ax.set_xlabel('Frequency (MHz)')
            ax.set_ylabel('|g| (unitless)')
            ax.set_title(f'Mean Infrequently-Flagged Gain Amplitude of Antenna {ant[0]}: {pol[-1]}-polarized')
            ax.legend(loc='upper left')

        # Now plot mean gain time series
        for ax, pol in zip(axes[3], ['Jee', 'Jnn']):
            ant = (ant_to_plot, pol)
            nflags_series = np.sum(cs.flag_grids[ant], axis=1)
            to_plot = nflags_series <= np.percentile(nflags_series, 75)
            ax.plot(lst_grid[to_plot], np.nanmean(np.where(cs.flag_grids[ant], np.nan, np.abs(abscal_gains[ant])), axis=1)[to_plot], 'r.', label='Abscal')        
            ax.plot(lst_grid[to_plot], np.nanmean(np.where(cs.flag_grids[ant], np.nan, np.abs(cs.gain_grids[ant])), axis=1)[to_plot], 'k.', ms=2, label='Smoothed')        
            ax.set_ylim([0, vmax])
            ax.set_xlabel('LST (hours)')
            ax.set_ylabel('|g| (unitless)')
            ax.set_title(f'Mean Infrequently-Flagged Gain Amplitude of Antenna {ant[0]}: {pol[-1]}-polarized')
            ax.set_xticklabels(ax.get_xticks() % 24)
            ax.legend(loc='upper left')

        plt.tight_layout()
        plt.show()    
In [12]:
def phase_plot(ant_to_plot):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")    
        display(HTML(f'<h2>Antenna {ant_to_plot} Phase Waterfalls</h2>'))
        fig, axes = plt.subplots(4, 2, figsize=(14,14), gridspec_kw={'height_ratios': [1, 1, .4, .4]})

        # Plot phase waterfalls for a single antenna    
        for ax, pol in zip(axes[0], ['Jee', 'Jnn']):
            ant = (ant_to_plot, pol)
            extent=[cs.freqs[0]/1e6, cs.freqs[-1]/1e6, lst_grid[-1], lst_grid[0]]
            im = ax.imshow(np.where(cs.flag_grids[ant], np.nan, np.angle(cs.gain_grids[ant])), aspect='auto', cmap='inferno', 
                           interpolation='nearest', vmin=-np.pi, vmax=np.pi, extent=extent)
            ax.set_title(f'Smoothcal Gain Phase of Ant {ant[0]} / Ant {cs.refant[pol][0]}: {pol[-1]}-polarized')
            ax.set_xlabel('Frequency (MHz)')
            ax.set_ylabel('LST (Hours)')
            ax.set_xlim([cs.freqs[0]/1e6, cs.freqs[-1]/1e6])
            ax.set_yticklabels(ax.get_yticks() % 24)
            plt.colorbar(im, ax=ax,  orientation='horizontal', pad=.15)

        # Now plot abscal phase waterfall    
        for ax, pol in zip(axes[1], ['Jee', 'Jnn']):
            ant = (ant_to_plot, pol)
            extent=[cs.freqs[0]/1e6, cs.freqs[-1]/1e6, lst_grid[-1], lst_grid[0]]
            im = ax.imshow(np.where(cs.flag_grids[ant], np.nan, np.angle(abscal_gains[ant])), aspect='auto', cmap='inferno', 
                           interpolation='nearest', vmin=-np.pi, vmax=np.pi, extent=extent)
            ax.set_title(f'Abscal Gain Phase of Ant {ant[0]} / Ant {cs.refant[pol][0]}: {pol[-1]}-polarized')
            ax.set_xlabel('Frequency (MHz)')
            ax.set_ylabel('LST (Hours)')
            ax.set_xlim([cs.freqs[0]/1e6, cs.freqs[-1]/1e6])
            ax.set_yticklabels(ax.get_yticks() % 24)
            plt.colorbar(im, ax=ax,  orientation='horizontal', pad=.15)
            
        # Now plot median gain spectra 
        for ax, pol in zip(axes[2], ['Jee', 'Jnn']):
            ant = (ant_to_plot, pol)   
            nflags_spectrum = np.sum(cs.flag_grids[ant], axis=0)
            to_plot = nflags_spectrum <= np.percentile(nflags_spectrum, 75)
            ax.plot(cs.freqs[to_plot] / 1e6, np.nanmedian(np.where(cs.flag_grids[ant], np.nan, np.angle(abscal_gains[ant])), axis=0)[to_plot], 'r.', label='Abscal')        
            ax.plot(cs.freqs[to_plot] / 1e6, np.nanmedian(np.where(cs.flag_grids[ant], np.nan, np.angle(cs.gain_grids[ant])), axis=0)[to_plot], 'k.', ms=2, label='Smoothed')        
            ax.set_ylim([-np.pi, np.pi])
            ax.set_xlim([cs.freqs[0]/1e6, cs.freqs[-1]/1e6])    
            ax.set_xlabel('Frequency (MHz)')
            ax.set_ylabel(f'Phase of g$_{{{ant[0]}}}$ / g$_{{{cs.refant[pol][0]}}}$')
            ax.set_title(f'Median Infrequently-Flagged Gain Phase of Ant {ant[0]} / Ant {cs.refant[pol][0]}: {pol[-1]}-polarized')
            ax.legend(loc='upper left')

        # # Now plot median gain time series
        for ax, pol in zip(axes[3], ['Jee', 'Jnn']):
            ant = (ant_to_plot, pol)
            nflags_series = np.sum(cs.flag_grids[ant], axis=1)
            to_plot = nflags_series <= np.percentile(nflags_series, 75)
            ax.plot(lst_grid[to_plot], np.nanmean(np.where(cs.flag_grids[ant], np.nan, np.angle(abscal_gains[ant])), axis=1)[to_plot], 'r.', label='Abscal')        
            ax.plot(lst_grid[to_plot], np.nanmean(np.where(cs.flag_grids[ant], np.nan, np.angle(cs.gain_grids[ant])), axis=1)[to_plot], 'k.', ms=2, label='Smoothed')        
            ax.set_ylim([-np.pi, np.pi])    
            ax.set_xlabel('LST (hours)')
            ax.set_ylabel(f'Phase of g$_{{{ant[0]}}}$ / g$_{{{cs.refant[pol][0]}}}$')
            ax.set_title(f'Mean Infrequently-Flagged Gain Phase of Ant {ant[0]} / Ant {cs.refant[pol][0]}: {pol[-1]}-polarized')
            ax.set_xticklabels(ax.get_xticks() % 24)    
            ax.legend(loc='upper left')

        plt.tight_layout()
        plt.show()

Figure 1: Full-Day Gain Amplitudes Before and After smooth_cal¶

Here we plot abscal and smooth_cal gain amplitudes for both of the sample antennas. We also show means across time/frequency, excluding frequencies/times that are frequently flagged.

In [13]:
for ant_to_plot in ants_to_plot:
    amplitude_plot(ant_to_plot)

Antenna 3 Amplitude Waterfalls

Antenna 239 Amplitude Waterfalls

Figure 2: Full-Day Gain Phases Before and After smooth_cal¶

Here we plot abscal and smooth_cal phases relative to each polarization's reference antenna for both of the sample antennas. We also show medians across time/frequency, excluding frequencies/times that are frequently flagged.

In [14]:
for ant_to_plot in ants_to_plot:
    phase_plot(ant_to_plot)

Antenna 3 Phase Waterfalls

Antenna 239 Phase Waterfalls

Examine $\chi^2$¶

In [15]:
def chisq_plot():
    fig, axes = plt.subplots(1, 2, figsize=(14, 10), sharex=True, sharey=True)
    extent = [cs.freqs[0]/1e6, cs.freqs[-1]/1e6, lst_grid[-1], lst_grid[0]]
    for ax, pol in zip(axes, ['Jee', 'Jnn']):

        im = ax.imshow(np.where(cs.flag_grids[cs.refant[pol]], np.nan, cs.chisq_grids[pol]), vmin=1, vmax=5, 
                       aspect='auto', cmap='turbo', interpolation='none', extent=extent)
        ax.set_yticklabels(ax.get_yticks() % 24)
        ax.set_title(f'{pol[1:]}-Polarized $\\chi^2$ / DoF')
        ax.set_xlabel('Frequency (MHz)')

    axes[0].set_ylabel('LST (hours)')
    plt.tight_layout()
    fig.colorbar(im, ax=axes, pad=.07, label='$\\chi^2$ / DoF', orientation='horizontal', extend='both', aspect=50)

Figure 3: Full-Day $\chi^2$ / DoF Waterfall from Redundant-Baseline Calibration¶

Here we plot $\chi^2$ per degree of freedom from redundant-baseline calibration for both polarizations separately. While this plot is a little out of place, as it was not produced by this notebook, it is a convenient place where all the necessary components are readily available. If the array were perfectly redundant and any non-redundancies in the calibrated visibilities were explicable by thermal noise alone, this waterfall should be all 1.

In [16]:
chisq_plot()
FixedFormatter should only be used together with FixedLocator
In [17]:
avg_cspa_vs_time = {ant: np.nanmean(np.where(cs.flag_grids[ant], np.nan, cs.cspa_grids[ant]), axis=1) for ant in cs.ants}
avg_cspa_vs_freq = {ant: np.nanmean(np.where(cs.flag_grids[ant], np.nan, cs.cspa_grids[ant]), axis=0) for ant in cs.ants}
Mean of empty slice
Mean of empty slice
In [18]:
def cspa_vs_time_plot():
    fig, axes = plt.subplots(2, 1, figsize=(14, 8), sharex=True, sharey=True, gridspec_kw={'hspace': 0})
    for ax, pol in zip(axes, ['Jee', 'Jnn']):
        detail_cutoff = np.percentile([np.nanmean(m) for ant, m in avg_cspa_vs_time.items() 
                                       if ant[1] == pol and np.isfinite(np.nanmean(m))], 95)
        for ant in avg_cspa_vs_time:
            if ant[1] == pol and not np.all(cs.flag_grids[ant]):
                if np.nanmean(avg_cspa_vs_time[ant]) > detail_cutoff:
                    ax.plot(lst_grid, avg_cspa_vs_time[ant], label=ant, zorder=100)
                else:
                    ax.plot(lst_grid, avg_cspa_vs_time[ant], c='grey', alpha=.2, lw=.5)
        ax.legend(title=f'{pol[1:]}-Polarized', ncol=2)
        ax.set_ylabel('Mean Unflagged $\\chi^2$ per Antenna')
        ax.set_xlabel('LST (hours)')
        ax.set_xticklabels(ax.get_xticks() % 24)

    plt.ylim([1, 5.4])
    plt.tight_layout()
In [19]:
def cspa_vs_freq_plot():
    fig, axes = plt.subplots(2, 1, figsize=(14, 6), sharex=True, sharey=True, gridspec_kw={'hspace': 0})
    for ax, pol in zip(axes, ['Jee', 'Jnn']):
        detail_cutoff = np.percentile([np.nanmean(m) for ant, m in avg_cspa_vs_freq.items() 
                                       if ant[1] == pol and np.isfinite(np.nanmean(m))], 95)
        for ant in avg_cspa_vs_freq:
            if ant[1] == pol and not np.all(cs.flag_grids[ant]):
                if np.nanmean(avg_cspa_vs_freq[ant]) > detail_cutoff:
                    ax.plot(cs.freqs / 1e6, avg_cspa_vs_freq[ant], label=ant, zorder=100)
                else:
                    ax.plot(cs.freqs / 1e6, avg_cspa_vs_freq[ant], c='grey', alpha=.2, lw=.5)
        ax.legend(title=f'{pol[1:]}-Polarized', ncol=2)
        ax.set_ylabel('Mean Unflagged $\\chi^2$ per Antenna')
        ax.set_xlabel('Frequency (MHz)')

    plt.ylim([1, 5.4])
    plt.tight_layout()

Figure 4: Average $\chi^2$ per Antenna vs. Time and Frequency¶

Here we plot $\chi^2$ per antenna from redundant-baseline calibration, separating polarizations and averaging the unflagged pixels in the waterfalls over frequency or time. The worst 5% of antennas are shown in color and highlighted in the legends, the rest are shown in grey.

In [20]:
cspa_vs_time_plot()
cspa_vs_freq_plot()
Mean of empty slice
FixedFormatter should only be used together with FixedLocator

Save Results¶

In [21]:
add_to_history = 'Produced by calibration_smoothing notebook with the following environment:\n' + '=' * 65 + '\n' + os.popen('conda env export').read() + '=' * 65
In [22]:
cs.write_smoothed_cal(output_replace=(CAL_SUFFIX, SMOOTH_CAL_SUFFIX), add_to_history=add_to_history, clobber=True)
Mean of empty slice

Metadata¶

In [23]:
for repo in ['hera_cal', 'hera_qm', 'hera_filters', 'hera_notebook_templates', 'pyuvdata']:
    exec(f'from {repo} import __version__')
    print(f'{repo}: {__version__}')
hera_cal: 3.3.0
hera_qm: 2.1.1
hera_filters: 0.1.4.dev2+ga4ff591
hera_notebook_templates: 0.1.dev531+gfe314a8
pyuvdata: 2.4.1.dev9+gc10c105e
In [24]:
print(f'Finished execution in {(time.time() - tstart) / 60:.2f} minutes.')
Finished execution in 37.37 minutes.