import time
tstart = time.time()

import os
os.environ['HDF5_USE_FILE_LOCKING'] = 'FALSE'
import h5py
import hdf5plugin  # REQUIRED to have the compression plugins available
import numpy as np
import glob
import copy
import warnings
import matplotlib
import matplotlib.pyplot as plt
from hera_cal import io, utils, smooth_cal
from hera_qm.time_series_metrics import true_stretches
%matplotlib inline
from IPython.display import display, HTML

# get files
SUM_FILE = os.environ.get("SUM_FILE", None)
# SUM_FILE = "/lustre/aoc/projects/hera/h6c-analysis/IDR3/2459893/zen.2459893.25258.sum.uvh5"
SUM_SUFFIX = os.environ.get("SUM_SUFFIX", 'sum.uvh5')
CAL_SUFFIX = os.environ.get("CAL_SUFFIX", 'sum.omni.calfits')
SMOOTH_CAL_SUFFIX = os.environ.get("SMOOTH_CAL_SUFFIX", 'sum.smooth.calfits')
ANT_FLAG_SUFFIX = os.environ.get("ANT_FLAG_SUFFIX", 'sum.antenna_flags.h5')
RFI_FLAG_SUFFIX = os.environ.get("RFI_FLAG_SUFFIX", 'sum.flag_waterfall.h5')
FREQ_SMOOTHING_SCALE = float(os.environ.get("FREQ_SMOOTHING_SCALE", 30.0)) # MHz
TIME_SMOOTHING_SCALE = float(os.environ.get("TIME_SMOOTHING_SCALE", 1e4)) # seconds
EIGENVAL_CUTOFF = float(os.environ.get("EIGENVAL_CUTOFF", 1e-12))
PER_POL_REFANT = os.environ.get("PER_POL_REFANT", "False").upper() == "TRUE"
BLACKLIST_TIMESCALE_FACTOR = float(os.environ.get("BLACKLIST_TIMESCALE_FACTOR", 4.0))
BLACKLIST_RELATIVE_ERROR_THRESH = float(os.environ.get("BLACKLIST_RELATIVE_ERROR_THRESH", 1))
BLACKLIST_RELATIVE_WEIGHT = float(os.environ.get("BLACKLIST_RELATIVE_WEIGHT", 0.1))
FM_LOW_FREQ = float(os.environ.get("FM_LOW_FREQ", 87.5)) # in MHz
FM_HIGH_FREQ = float(os.environ.get("FM_HIGH_FREQ", 108.0)) # in MHz
SC_RELATIVE_DIFF_CUTOFF = float(os.environ.get("SC_RELATIVE_DIFF_CUTOFF", 0.2))

for setting in ['SUM_FILE', 'SUM_SUFFIX', 'CAL_SUFFIX', 'SMOOTH_CAL_SUFFIX', 'ANT_FLAG_SUFFIX',
                'RFI_FLAG_SUFFIX', 'FREQ_SMOOTHING_SCALE', 'TIME_SMOOTHING_SCALE', 'EIGENVAL_CUTOFF', 
                'PER_POL_REFANT', 'BLACKLIST_TIMESCALE_FACTOR', 'BLACKLIST_RELATIVE_ERROR_THRESH', 
                'BLACKLIST_RELATIVE_WEIGHT', 'FM_LOW_FREQ', 'FM_HIGH_FREQ', 'SC_RELATIVE_DIFF_CUTOFF']:
    if issubclass(type(eval(setting)), str):
        print(f'{setting} = "{eval(setting)}"')
    else:
        print(f'{setting} = {eval(setting)}')

SUM_FILE = "/lustre/aoc/projects/hera/h6c-analysis/IDR3/2459943/zen.2459943.21276.sum.uvh5"
SUM_SUFFIX = "sum.uvh5"
CAL_SUFFIX = "sum.omni.calfits"
SMOOTH_CAL_SUFFIX = "sum.smooth.calfits"
ANT_FLAG_SUFFIX = "sum.antenna_flags.h5"
RFI_FLAG_SUFFIX = "sum.flag_waterfall.h5"
FREQ_SMOOTHING_SCALE = 30.0
TIME_SMOOTHING_SCALE = 10000.0
EIGENVAL_CUTOFF = 1e-12
PER_POL_REFANT = False
BLACKLIST_TIMESCALE_FACTOR = 4.0
BLACKLIST_RELATIVE_ERROR_THRESH = 1.0
BLACKLIST_RELATIVE_WEIGHT = 0.1
FM_LOW_FREQ = 87.5
FM_HIGH_FREQ = 108.0
SC_RELATIVE_DIFF_CUTOFF = 0.2

sum_glob = '.'.join(SUM_FILE.split('.')[:-3]) + '.*.' + SUM_SUFFIX
cal_files_glob = sum_glob.replace(SUM_SUFFIX, CAL_SUFFIX)
cal_files = sorted(glob.glob(cal_files_glob))
print(f'Found {len(cal_files)} *.{CAL_SUFFIX} files starting with {cal_files[0]}.')

Found 1851 *.sum.omni.calfits files starting with /lustre/aoc/projects/hera/h6c-analysis/IDR3/2459943/zen.2459943.21276.sum.omni.calfits.

rfi_flag_files_glob = sum_glob.replace(SUM_SUFFIX, RFI_FLAG_SUFFIX)
rfi_flag_files = sorted(glob.glob(rfi_flag_files_glob))
print(f'Found {len(rfi_flag_files)} *.{RFI_FLAG_SUFFIX} files starting with {rfi_flag_files[0]}.')

Found 1851 *.sum.flag_waterfall.h5 files starting with /lustre/aoc/projects/hera/h6c-analysis/IDR3/2459943/zen.2459943.21276.sum.flag_waterfall.h5.

ant_flag_files_glob = sum_glob.replace(SUM_SUFFIX, ANT_FLAG_SUFFIX)
ant_flag_files = sorted(glob.glob(ant_flag_files_glob))
print(f'Found {len(ant_flag_files)} *.{ANT_FLAG_SUFFIX} files starting with {ant_flag_files[0]}.')

Found 1851 *.sum.antenna_flags.h5 files starting with /lustre/aoc/projects/hera/h6c-analysis/IDR3/2459943/zen.2459943.21276.sum.antenna_flags.h5.

cs = smooth_cal.CalibrationSmoother(cal_files, flag_file_list=(ant_flag_files + rfi_flag_files),
                                    ignore_calflags=True, pick_refant=False, load_chisq=True, load_cspa=True)

cs.refant = smooth_cal.pick_reference_antenna(cs.gain_grids, cs.flag_grids, cs.freqs, per_pol=True)
for pol in cs.refant:
    print(f'Reference antenna {cs.refant[pol][0]} selected for smoothing {pol} gains.')

if not PER_POL_REFANT:
    # in this case, rephase both pols separately before smoothing, but also smooth the relative polarization calibration phasor
    overall_refant = smooth_cal.pick_reference_antenna({ant: cs.gain_grids[ant] for ant in cs.refant.values()}, 
                                                       {ant: cs.flag_grids[ant] for ant in cs.refant.values()}, 
                                                       cs.freqs, per_pol=False)
    print(f'Overall reference antenna {overall_refant} selected.')
    other_refant = [ant for ant in cs.refant.values() if ant != overall_refant][0]

    relative_pol_phasor = cs.gain_grids[overall_refant] * cs.gain_grids[other_refant].conj() # TODO: is this conjugation right?
    relative_pol_phasor /= np.abs(relative_pol_phasor)

abscal_refants = {cs.refant[pol]: cs.gain_grids[cs.refant[pol]] for pol in ['Jee', 'Jnn']}

Reference antenna 125 selected for smoothing Jee gains.
Reference antenna 127 selected for smoothing Jnn gains.

Overall reference antenna (np.int64(127), 'Jnn') selected.

cs.rephase_to_refant(propagate_refant_flags=True)

lst_grid = utils.JD2LST(cs.time_grid) * 12 / np.pi
lst_grid[lst_grid > lst_grid[-1]] -= 24

relative_error_samples = {pol: np.zeros_like(cs.gain_grids[cs.refant[pol]], dtype=float) for pol in ['Jee', 'Jnn']}
sum_relative_error = {pol: np.zeros_like(cs.gain_grids[cs.refant[pol]], dtype=float) for pol in ['Jee', 'Jnn']}
per_ant_avg_relative_error = {} 

# perform a 2D DPSS filter with a BLACKLIST_TIMESCALE_FACTOR longer timescale, averaging the results per-pol
for ant in cs.gain_grids:
    if np.all(cs.flag_grids[ant]):
        continue
    filtered, _ = smooth_cal.time_freq_2D_filter(gains=cs.gain_grids[ant], 
                                                 wgts=(~cs.flag_grids[ant]).astype(float),
                                                 freqs=cs.freqs,
                                                 times=cs.time_grid,
                                                 freq_scale=FREQ_SMOOTHING_SCALE,
                                                 time_scale=TIME_SMOOTHING_SCALE * BLACKLIST_TIMESCALE_FACTOR,
                                                 eigenval_cutoff=EIGENVAL_CUTOFF,
                                                 method='DPSS', 
                                                 fit_method='lu_solve', 
                                                 fix_phase_flips=True, 
                                                 phase_flip_time_scale = TIME_SMOOTHING_SCALE / 2,
                                                 flag_phase_flip_ints=True,
                                                 skip_flagged_edges=True, 
                                                 freq_cuts=[(FM_LOW_FREQ + FM_HIGH_FREQ) * .5e6],
                                                ) 
    relative_error = np.where(cs.flag_grids[ant], 0, np.abs(cs.gain_grids[ant] - filtered) / np.abs(filtered))
    per_ant_avg_relative_error[ant] = np.nanmean(np.where(cs.flag_grids[ant], np.nan, relative_error))
    relative_error_samples[ant[1]] += (~cs.flag_grids[ant]).astype(float)
    sum_relative_error[ant[1]] += relative_error

# figure out per-antpol cuts for where to set weights to 0 for the main smooth_cal (but not necessarily flags)
cs.blacklist_wgt = BLACKLIST_RELATIVE_WEIGHT
for pol in ['Jee', 'Jnn']:
    avg_rel_error = sum_relative_error[pol] / relative_error_samples[pol]
    to_blacklist = np.where(relative_error_samples[pol] > 0, avg_rel_error > BLACKLIST_RELATIVE_ERROR_THRESH, False)
    for ant in cs.ants:
        if ant[1] == pol:
            cs.waterfall_blacklist[ant] = to_blacklist

invalid value encountered in divide

def plot_relative_error():
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")

        fig, axes = plt.subplots(1, 3, figsize=(14, 7))
        extent = [cs.freqs[0] / 1e6, cs.freqs[-1] / 1e6, lst_grid[-1], lst_grid[0]]
        cmap = plt.get_cmap('Greys', 256)
        cmap.set_over('red')
        for ax, pol in zip(axes[0:2], ['Jee', 'Jnn']):
            to_plot = sum_relative_error[pol] / relative_error_samples[pol]
            im = ax.imshow(np.where(np.isfinite(to_plot), to_plot, np.nan), aspect='auto', interpolation='none', 
                           vmin=0, vmax=BLACKLIST_RELATIVE_ERROR_THRESH, extent=extent, cmap=cmap)
            ax.set_title(pol)
            ax.set_yticklabels(ax.get_yticks() % 24)
            ax.set_ylabel('LST (hours)')
            ax.set_xlabel('Frequency (MHz)')
        plt.colorbar(im, ax=axes[0:2], location='top', extend='max', label='Average Relative Error on Initial Smoothing')
        
        for pol in ['Jee', 'Jnn']:
            axes[2].hist((sum_relative_error[pol] / relative_error_samples[pol]).ravel(), bins=np.arange(0,2,.01), alpha=.5, label=pol)
        axes[2].set_yscale('log')
        axes[2].set_ylabel('Number of Waterfall Pixels')
        axes[2].set_xlabel('Relative Error')
        axes[2].axvline(BLACKLIST_RELATIVE_ERROR_THRESH, ls='--', c='r', label='Blacklist Threshold')
        axes[2].legend()

plot_relative_error()

# duplicate a small number of abscal gains for plotting
antnums = set([ant[0] for ant in cs.ants])
flags_per_antnum = [np.sum(cs.flag_grids[ant, 'Jnn']) + np.sum(cs.flag_grids[ant, 'Jee']) for ant in antnums]
larger_relative_error = np.array([np.max([per_ant_avg_relative_error.get((ant, pol), np.inf) for pol in ['Jee', 'Jnn']]) for ant in antnums])
refant_nums = [ant[0] for ant in cs.refant.values()]
candidate_ants = [ant for ant, nflags, rel_err in zip(antnums, flags_per_antnum, larger_relative_error) 
                  if (ant not in refant_nums) and (nflags <= np.percentile(flags_per_antnum, 25))
                  and (rel_err <= SC_RELATIVE_DIFF_CUTOFF)
                  and not np.all(cs.flag_grids[ant, 'Jee']) and not np.all(cs.flag_grids[ant, 'Jnn'])]
ants_to_plot = [func(candidate_ants) for func in (np.min, np.max)]
abscal_gains = {}
for pol in ['Jee', 'Jnn']:
    for antnum in ants_to_plot:
        if PER_POL_REFANT:
            abscal_gains[antnum, pol] = cs.gain_grids[(antnum, pol)] * np.abs(abscal_refants[cs.refant[pol]]) / abscal_refants[cs.refant[pol]]
        else:
            abscal_gains[antnum, pol] = cs.gain_grids[(antnum, pol)] / np.abs(abscal_refants[cs.refant[pol]]) * abscal_refants[cs.refant[pol]]
            abscal_gains[antnum, pol] *= np.abs(abscal_refants[overall_refant]) / abscal_refants[overall_refant]

if not PER_POL_REFANT:
    # treat the relative_pol_phasor as if it were antenna -1
    cs.gain_grids[(-1, other_refant[1])] = relative_pol_phasor
    cs.flag_grids[(-1, other_refant[1])] = cs.flag_grids[overall_refant] | cs.flag_grids[other_refant]
    cs.waterfall_blacklist[(-1, other_refant[1])] = cs.waterfall_blacklist[cs.ants[0][0], 'Jee'] | cs.waterfall_blacklist[cs.ants[0][0], 'Jnn']

meta = cs.time_freq_2D_filter(freq_scale=FREQ_SMOOTHING_SCALE,
                              time_scale=TIME_SMOOTHING_SCALE,
                              eigenval_cutoff=EIGENVAL_CUTOFF,
                              method='DPSS', 
                              fit_method='lu_solve',
                              fix_phase_flips=True,
                              phase_flip_time_scale = TIME_SMOOTHING_SCALE / 2,
                              flag_phase_flip_ints=True,
                              skip_flagged_edges=True,
                              freq_cuts=[(FM_LOW_FREQ + FM_HIGH_FREQ) * .5e6],)

1 phase flips detected on antenna (np.int64(144), 'Jnn'). A total of 2727 integrations were phase-flipped relative to the 0th integration between 2459943.3218111764 and 2459943.6267091404.

# calculate average chi^2 per antenna before additional flagging
avg_cspa_vs_time = {ant: np.nanmean(np.where(cs.flag_grids[ant], np.nan, cs.cspa_grids[ant]), axis=1) for ant in cs.ants}
avg_cspa_vs_freq = {ant: np.nanmean(np.where(cs.flag_grids[ant], np.nan, cs.cspa_grids[ant]), axis=0) for ant in cs.ants}
avg_cspa = {ant: np.nanmean(np.where(cs.flag_grids[ant], np.nan, cs.cspa_grids[ant])) for ant in cs.ants}

Mean of empty slice

Mean of empty slice

Mean of empty slice

# Pick out antennas with too high relative differences before and after smoothing and flag them.
avg_relative_diffs = {ant: np.nanmean(rel_diff) for ant, rel_diff in meta['freq_avg_rel_diff'].items()}
to_cut = sorted([ant for ant, diff in avg_relative_diffs.items() if ant[0] >= 0 and diff > SC_RELATIVE_DIFF_CUTOFF])
if len(to_cut) > 0:
    for ant in to_cut:
        print(f'Flagging antenna {ant[0]}{ant[1][-1]} with a relative difference before and after smoothing of {avg_relative_diffs[ant]:.2%} '
              f'(compared to the {SC_RELATIVE_DIFF_CUTOFF:.2%} cutoff).')
        cs.flag_grids[ant] |= True
else:
    print(f'No antennas have a relative difference above the {SC_RELATIVE_DIFF_CUTOFF:.2%} cutoff.')

No antennas have a relative difference above the 20.00% cutoff.

if not PER_POL_REFANT:
    # put back in the smoothed phasor, ensuring the amplitude is 1 and that data are flagged anywhere either polarization's refant is flagged
    smoothed_relative_pol_phasor = cs.gain_grids[(-1, other_refant[-1])] / np.abs(cs.gain_grids[(-1, other_refant[-1])])
    for ant in cs.gain_grids:
        if ant[0] >= 0 and ant[1] == other_refant[1]:
            cs.gain_grids[ant] /= smoothed_relative_pol_phasor
        cs.flag_grids[ant] |= (cs.flag_grids[(-1, other_refant[1])])
    cs.refant = overall_refant

def phase_flip_diagnostic_plot():
    '''Shows time-smoothed antenna avg phases after taking out a delay and filtering in time.'''
    if not np.any([np.any(meta['phase_flipped'][ant]) for ant in meta['phase_flipped']]):
        print("No antennas have phase flips identified. Nothing to plot.")
        return
    
    plt.figure(figsize=(14,4))
    for ant in meta['phase_flipped']:
        if np.any(meta['phase_flipped'][ant]):
            plt.plot(cs.time_grid - int(cs.time_grid[0]), 
                     np.angle(np.exp(1.0j * (meta['phases'][ant] - meta['time_smoothed_phases'][ant]))), label=f'{ant[0]}{ant[1][-1]}')
    plt.legend(title='Antennas with Identified Phase Flips', ncol=4)
    plt.xlabel(f'JD - {int(cs.time_grid[0])}')
    plt.ylabel('Average Phase After Filtering (radians)')
    plt.tight_layout()

phase_flip_diagnostic_plot()

def amplitude_plot(ant_to_plot):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        # Pick vmax to not saturate 90% of the abscal gains
        vmax = np.max([np.percentile(np.abs(cs.gain_grids[ant_to_plot, pol][~cs.flag_grids[ant_to_plot, pol]]), 99) for pol in ['Jee', 'Jnn']])

        display(HTML(f'<h2>Antenna {ant_to_plot} Amplitude Waterfalls</h2>'))    

        # Plot abscal gain amplitude waterfalls for a single antenna
        fig, axes = plt.subplots(4, 2, figsize=(14,14), gridspec_kw={'height_ratios': [1, 1, .4, .4]})
        for ax, pol in zip(axes[0], ['Jee', 'Jnn']):
            ant = (ant_to_plot, pol)
            extent=[cs.freqs[0]/1e6, cs.freqs[-1]/1e6, lst_grid[-1], lst_grid[0]]
            im = ax.imshow(np.where(cs.flag_grids[ant], np.nan, np.abs(cs.gain_grids[ant])), aspect='auto', cmap='inferno', 
                           interpolation='nearest', vmin=0, vmax=vmax, extent=extent)
            ax.set_title(f'Smoothcal Gain Amplitude of Antenna {ant[0]}: {pol[-1]}-polarized' )
            ax.set_xlabel('Frequency (MHz)')
            ax.set_ylabel('LST (Hours)')
            ax.set_xlim([cs.freqs[0]/1e6, cs.freqs[-1]/1e6])
            ax.set_yticklabels(ax.get_yticks() % 24)
            plt.colorbar(im, ax=ax,  orientation='horizontal', pad=.15)

        # Now flagged plot abscal waterfall    
        for ax, pol in zip(axes[1], ['Jee', 'Jnn']):
            ant = (ant_to_plot, pol)
            extent=[cs.freqs[0]/1e6, cs.freqs[-1]/1e6, lst_grid[-1], lst_grid[0]]
            im = ax.imshow(np.where(cs.flag_grids[ant], np.nan, np.abs(abscal_gains[ant])), aspect='auto', cmap='inferno', 
                           interpolation='nearest', vmin=0, vmax=vmax, extent=extent)
            ax.set_title(f'Abscal Gain Amplitude of Antenna {ant[0]}: {pol[-1]}-polarized' )
            ax.set_xlabel('Frequency (MHz)')
            ax.set_ylabel('LST (Hours)')
            ax.set_xlim([cs.freqs[0]/1e6, cs.freqs[-1]/1e6])
            ax.set_yticklabels(ax.get_yticks() % 24)
            plt.colorbar(im, ax=ax,  orientation='horizontal', pad=.15)
            
        # Now plot mean gain spectra 
        for ax, pol in zip(axes[2], ['Jee', 'Jnn']):
            ant = (ant_to_plot, pol)   
            nflags_spectrum = np.sum(cs.flag_grids[ant], axis=0)
            to_plot = nflags_spectrum <= np.percentile(nflags_spectrum, 75)
            ax.plot(cs.freqs[to_plot] / 1e6, np.nanmean(np.where(cs.flag_grids[ant], np.nan, np.abs(abscal_gains[ant])), axis=0)[to_plot], 'r.', label='Abscal')        
            ax.plot(cs.freqs[to_plot] / 1e6, np.nanmean(np.where(cs.flag_grids[ant], np.nan, np.abs(cs.gain_grids[ant])), axis=0)[to_plot], 'k.', ms=2, label='Smoothed')        
            ax.set_ylim([0, vmax])
            ax.set_xlim([cs.freqs[0]/1e6, cs.freqs[-1]/1e6])    
            ax.set_xlabel('Frequency (MHz)')
            ax.set_ylabel('|g| (unitless)')
            ax.set_title(f'Mean Infrequently-Flagged Gain Amplitude of Antenna {ant[0]}: {pol[-1]}-polarized')
            ax.legend(loc='upper left')

        # Now plot mean gain time series
        for ax, pol in zip(axes[3], ['Jee', 'Jnn']):
            ant = (ant_to_plot, pol)
            nflags_series = np.sum(cs.flag_grids[ant], axis=1)
            to_plot = nflags_series <= np.percentile(nflags_series, 75)
            ax.plot(lst_grid[to_plot], np.nanmean(np.where(cs.flag_grids[ant], np.nan, np.abs(abscal_gains[ant])), axis=1)[to_plot], 'r.', label='Abscal')        
            ax.plot(lst_grid[to_plot], np.nanmean(np.where(cs.flag_grids[ant], np.nan, np.abs(cs.gain_grids[ant])), axis=1)[to_plot], 'k.', ms=2, label='Smoothed')        
            ax.set_ylim([0, vmax])
            ax.set_xlabel('LST (hours)')
            ax.set_ylabel('|g| (unitless)')
            ax.set_title(f'Mean Infrequently-Flagged Gain Amplitude of Antenna {ant[0]}: {pol[-1]}-polarized')
            ax.set_xticklabels(ax.get_xticks() % 24)
            ax.legend(loc='upper left')

        plt.tight_layout()
        plt.show()

def phase_plot(ant_to_plot):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")    
        display(HTML(f'<h2>Antenna {ant_to_plot} Phase Waterfalls</h2>'))
        fig, axes = plt.subplots(4, 2, figsize=(14,14), gridspec_kw={'height_ratios': [1, 1, .4, .4]})
        
        # Plot phase waterfalls for a single antenna    
        for ax, pol in zip(axes[0], ['Jee', 'Jnn']):
            ant = (ant_to_plot, pol)
            extent=[cs.freqs[0]/1e6, cs.freqs[-1]/1e6, lst_grid[-1], lst_grid[0]]
            im = ax.imshow(np.where(cs.flag_grids[ant], np.nan, np.angle(cs.gain_grids[ant])), aspect='auto', cmap='inferno', 
                           interpolation='nearest', vmin=-np.pi, vmax=np.pi, extent=extent)

            refant = (cs.refant[pol] if isinstance(cs.refant, dict) else cs.refant)
            ax.set_title(f'Smoothcal Gain Phase of Ant {ant[0]}{pol[-1]} / Ant {refant[0]}{refant[1][-1]}')
            ax.set_xlabel('Frequency (MHz)')
            ax.set_ylabel('LST (Hours)')
            ax.set_xlim([cs.freqs[0]/1e6, cs.freqs[-1]/1e6])
            ax.set_yticklabels(ax.get_yticks() % 24)
            plt.colorbar(im, ax=ax,  orientation='horizontal', pad=.15)

        # Now plot abscal phase waterfall    
        for ax, pol in zip(axes[1], ['Jee', 'Jnn']):
            ant = (ant_to_plot, pol)
            extent=[cs.freqs[0]/1e6, cs.freqs[-1]/1e6, lst_grid[-1], lst_grid[0]]
            im = ax.imshow(np.where(cs.flag_grids[ant], np.nan, np.angle(abscal_gains[ant])), aspect='auto', cmap='inferno', 
                           interpolation='nearest', vmin=-np.pi, vmax=np.pi, extent=extent)
            refant = (cs.refant[pol] if isinstance(cs.refant, dict) else cs.refant)
            ax.set_title(f'Abscal Gain Phase of Ant {ant[0]}{pol[-1]} / Ant {refant[0]}{refant[1][-1]}')
            ax.set_xlabel('Frequency (MHz)')
            ax.set_ylabel('LST (Hours)')
            ax.set_xlim([cs.freqs[0]/1e6, cs.freqs[-1]/1e6])
            ax.set_yticklabels(ax.get_yticks() % 24)
            plt.colorbar(im, ax=ax,  orientation='horizontal', pad=.15)
            
        # Now plot median gain spectra 
        for ax, pol in zip(axes[2], ['Jee', 'Jnn']):
            ant = (ant_to_plot, pol)   
            nflags_spectrum = np.sum(cs.flag_grids[ant], axis=0)
            to_plot = nflags_spectrum <= np.percentile(nflags_spectrum, 75)
            ax.plot(cs.freqs[to_plot] / 1e6, np.nanmedian(np.where(cs.flag_grids[ant], np.nan, np.angle(abscal_gains[ant])), axis=0)[to_plot], 'r.', label='Abscal')        
            ax.plot(cs.freqs[to_plot] / 1e6, np.nanmedian(np.where(cs.flag_grids[ant], np.nan, np.angle(cs.gain_grids[ant])), axis=0)[to_plot], 'k.', ms=2, label='Smoothed')        
            ax.set_ylim([-np.pi, np.pi])
            ax.set_xlim([cs.freqs[0]/1e6, cs.freqs[-1]/1e6])    
            ax.set_xlabel('Frequency (MHz)')
            refant = (cs.refant[pol] if isinstance(cs.refant, dict) else cs.refant)
            ax.set_ylabel(f'Phase of g$_{{{ant[0]}{pol[-1]}}}$ / g$_{{{refant[0]}{refant[1][-1]}}}$')
            ax.set_title(f'Median Infrequently-Flagged Gain Phase of Ant {ant[0]}{pol[-1]} / Ant {refant[0]}{refant[1][-1]}')
            ax.legend(loc='upper left')

        # # Now plot median gain time series
        for ax, pol in zip(axes[3], ['Jee', 'Jnn']):
            ant = (ant_to_plot, pol)
            nflags_series = np.sum(cs.flag_grids[ant], axis=1)
            to_plot = nflags_series <= np.percentile(nflags_series, 75)
            ax.plot(lst_grid[to_plot], np.nanmean(np.where(cs.flag_grids[ant], np.nan, np.angle(abscal_gains[ant])), axis=1)[to_plot], 'r.', label='Abscal')        
            ax.plot(lst_grid[to_plot], np.nanmean(np.where(cs.flag_grids[ant], np.nan, np.angle(cs.gain_grids[ant])), axis=1)[to_plot], 'k.', ms=2, label='Smoothed')        
            ax.set_ylim([-np.pi, np.pi])    
            ax.set_xlabel('LST (hours)')
            refant = (cs.refant[pol] if isinstance(cs.refant, dict) else cs.refant)
            ax.set_ylabel(f'Phase of g$_{{{ant[0]}{pol[-1]}}}$ / g$_{{{refant[0]}{refant[1][-1]}}}$')
            ax.set_title(f'Mean Infrequently-Flagged Gain Phase of Ant {ant[0]}{pol[-1]} / Ant {refant[0]}{refant[1][-1]}')
            ax.set_xticklabels(ax.get_xticks() % 24)    
            ax.legend(loc='upper left')

        plt.tight_layout()
        plt.show()

for ant_to_plot in ants_to_plot:
    amplitude_plot(ant_to_plot)

for ant_to_plot in ants_to_plot:
    phase_plot(ant_to_plot)

def chisq_plot():
    fig, axes = plt.subplots(1, 2, figsize=(14, 10), sharex=True, sharey=True)
    extent = [cs.freqs[0]/1e6, cs.freqs[-1]/1e6, lst_grid[-1], lst_grid[0]]
    for ax, pol in zip(axes, ['Jee', 'Jnn']):
        refant = (cs.refant[pol] if isinstance(cs.refant, dict) else cs.refant)
        im = ax.imshow(np.where(cs.flag_grids[refant], np.nan, cs.chisq_grids[pol]), vmin=1, vmax=5, 
                       aspect='auto', cmap='turbo', interpolation='none', extent=extent)
        ax.set_yticklabels(ax.get_yticks() % 24)
        ax.set_title(f'{pol[1:]}-Polarized $\\chi^2$ / DoF')
        ax.set_xlabel('Frequency (MHz)')

    axes[0].set_ylabel('LST (hours)')
    plt.tight_layout()
    fig.colorbar(im, ax=axes, pad=.07, label='$\\chi^2$ / DoF', orientation='horizontal', extend='both', aspect=50)

chisq_plot()

set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.

def cspa_vs_time_plot():
    fig, axes = plt.subplots(2, 1, figsize=(14, 6), sharex=True, sharey=True, gridspec_kw={'hspace': 0})
    for ax, pol in zip(axes, ['Jee', 'Jnn']):
        detail_cutoff = np.percentile([np.nanmean(m) for ant, m in avg_cspa_vs_time.items() 
                                       if ant[1] == pol and np.isfinite(np.nanmean(m))], 95)
        for ant in avg_cspa_vs_time:
            if ant[1] == pol and not np.all(cs.flag_grids[ant]):
                if np.nanmean(avg_cspa_vs_time[ant]) > detail_cutoff:
                    ax.plot(lst_grid, avg_cspa_vs_time[ant], label=str((int(ant[0]), ant[1])), zorder=100)
                else:
                    ax.plot(lst_grid, avg_cspa_vs_time[ant], c='grey', alpha=.2, lw=.5)
        ax.legend(title=f'{pol[1:]}-Polarized', ncol=2)
        ax.set_ylabel('Mean Unflagged $\\chi^2$ per Antenna')
        ax.set_xlabel('LST (hours)')
        ax.set_xticklabels(ax.get_xticks() % 24)

    plt.ylim([1, 5.4])
    plt.tight_layout()

def cspa_vs_freq_plot():
    fig, axes = plt.subplots(2, 1, figsize=(14, 6), sharex=True, sharey=True, gridspec_kw={'hspace': 0})
    for ax, pol in zip(axes, ['Jee', 'Jnn']):
        detail_cutoff = np.percentile([np.nanmean(m) for ant, m in avg_cspa_vs_freq.items() 
                                       if ant[1] == pol and np.isfinite(np.nanmean(m))], 95)
        for ant in avg_cspa_vs_freq:
            if ant[1] == pol and not np.all(cs.flag_grids[ant]):
                if np.nanmean(avg_cspa_vs_freq[ant]) > detail_cutoff:
                    ax.plot(cs.freqs / 1e6, avg_cspa_vs_freq[ant], label=str((int(ant[0]), ant[1])), zorder=100)
                else:
                    ax.plot(cs.freqs / 1e6, avg_cspa_vs_freq[ant], c='grey', alpha=.2, lw=.5)
        ax.legend(title=f'{pol[1:]}-Polarized', ncol=2)
        ax.set_ylabel('Mean Unflagged $\\chi^2$ per Antenna')
        ax.set_xlabel('Frequency (MHz)')

    plt.ylim([1, 5.4])
    plt.tight_layout()

def avg_cspa_array_plot():
    hd = io.HERAData(SUM_FILE)
    
    fig, axes = plt.subplots(1, 2, figsize=(14, 8), sharex=True, sharey=True, gridspec_kw={'wspace': 0})
    for pol, ax in zip(['Jee', 'Jnn'], axes):

        ants_here = [ant for ant in avg_cspa if np.isfinite(avg_cspa[ant]) and ant[1] == pol if ant[0] in hd.antpos]
        avg_chisqs = [avg_cspa[ant] for ant in ants_here]
        xs = [hd.antpos[ant[0]][0] for ant in ants_here]
        ys = [hd.antpos[ant[0]][1] for ant in ants_here]
        names = [ant[0] for ant in ants_here]
        
        im = ax.scatter(x=xs, y=ys, c=avg_chisqs, s=200, vmin=1, vmax=3, cmap='turbo')
        ax.set_aspect('equal')
        for x,y,n in zip(xs, ys, names):
            ax.text(x, y, str(n), va='center', ha='center', fontsize=8)
        ax.set_title(pol)
        ax.set_xlabel('East-West Antenna Position (m)')
    
    axes[0].set_ylabel('North-South Antenna Position (m)')

    plt.tight_layout()
    plt.colorbar(im, ax=axes, location='top', aspect=60, pad=.04, label='Mean Unflagged $\\chi^2$ per Antenna', extend='both')

cspa_vs_freq_plot()
cspa_vs_time_plot()
avg_cspa_array_plot()

Mean of empty slice

set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
Mean of empty slice
set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.

def time_avg_diff_plot():
    fig, axes = plt.subplots(2, 1, figsize=(14, 6), sharex=True, sharey=True, gridspec_kw={'hspace': 0})
    for ax, pol in zip(axes, ['Jee', 'Jnn']):
        detail_cutoff = np.percentile([np.nanmean(diff) for ant, diff in meta['time_avg_rel_diff'].items() 
                                       if ant[1] == pol and np.isfinite(np.nanmean(diff))], 95)    
        for ant, rel_diff in meta['time_avg_rel_diff'].items():
            if ant[0] >= 0 and ant[1] == pol and np.any(np.isfinite(rel_diff)):
                if np.nanmean(rel_diff) > detail_cutoff:
                    if np.all(cs.flag_grids[ant]):
                        ax.plot(cs.freqs / 1e6, rel_diff, label=str((int(ant[0]), ant[1])), zorder=99, ls='--', c='r', lw=.5)    
                    else:
                        ax.plot(cs.freqs / 1e6, rel_diff, label=str((int(ant[0]), ant[1])), zorder=100)
                else:
                    ax.plot(cs.freqs / 1e6, rel_diff, c='grey', alpha=.2, lw=.5)
        med_rel_diff = np.nanmedian([diff for ant, diff in meta['time_avg_rel_diff'].items() if ant[1] == pol], axis=0)
        ax.plot(cs.freqs / 1e6, med_rel_diff, 'k--', label='Median')
        ax.set_ylim([0, 1.05])
        ax.legend(title=f'{pol[1:]}-Polarized', ncol=2)
        ax.set_ylabel('Time-Averaged Relative Difference\nBefore and After Smoothing')
        ax.set_xlabel('Frequency (MHz)')
    plt.tight_layout()

def freq_avg_diff_plot():
    fig, axes = plt.subplots(2, 1, figsize=(14, 6), sharex=True, sharey=True, gridspec_kw={'hspace': 0})
    for ax, pol in zip(axes, ['Jee', 'Jnn']):
        detail_cutoff = np.percentile([np.nanmean(m) for ant, m in meta['freq_avg_rel_diff'].items() 
                                       if ant[1] == pol and np.isfinite(np.nanmean(m))], 95)    
        for ant, rel_diff in meta['freq_avg_rel_diff'].items():
            if ant[0] >= 0 and ant[1] == pol and np.any(np.isfinite(rel_diff)):
                if np.nanmean(rel_diff) > detail_cutoff:
                    if np.all(cs.flag_grids[ant]):
                        ax.plot(lst_grid, rel_diff, label=str((int(ant[0]), ant[1])), zorder=99, ls='--', c='r', lw=.5)    
                    else:
                        ax.plot(lst_grid, rel_diff, label=str((int(ant[0]), ant[1])), zorder=100)
                else:
                    ax.plot(lst_grid, rel_diff, c='grey', alpha=.2, lw=.5)
        
        med_rel_diff = np.nanmedian([diff for ant, diff in meta['freq_avg_rel_diff'].items() if ant[1] == pol], axis=0)
        ax.plot(lst_grid, med_rel_diff, 'k--', label='Median', zorder=101)
        ax.set_ylim([0, 1.05])
        ax.legend(title=f'{pol[1:]}-Polarized', ncol=2)
        ax.set_ylabel('Frequency-Averaged Relative Difference\nBefore and After Smoothing')
        ax.set_xlabel('LST (hours)')
        ax.set_xticklabels(ax.get_xticks() % 24)
    plt.tight_layout()

def avg_difference_array_plot():
    hd = io.HERAData(SUM_FILE)
    
    fig, axes = plt.subplots(1, 2, figsize=(14, 8), sharex=True, sharey=True, gridspec_kw={'wspace': 0})
    for pol, ax in zip(['Jee', 'Jnn'], axes):
    
        avg_diffs = [np.nanmean(meta['time_avg_rel_diff'][ant]) for ant in meta['time_avg_rel_diff'] if ant[1] == pol if ant[0] in hd.antpos]
        xs = [hd.antpos[ant[0]][0] for ant in meta['time_avg_rel_diff'] if ant[1] == pol if ant[0] in hd.antpos]
        ys = [hd.antpos[ant[0]][1] for ant in meta['time_avg_rel_diff'] if ant[1] == pol if ant[0] in hd.antpos]
        names = [ant[0] for ant in meta['time_avg_rel_diff'] if ant[1] == pol if ant[0] in hd.antpos]
        
        im = ax.scatter(x=xs, y=ys, c=avg_diffs, s=200, vmin=0, vmax=.25, cmap='turbo')
        ax.set_aspect('equal')
        for x,y,n in zip(xs, ys, names):
            color = ('w' if np.all(cs.flag_grids[n, pol]) else 'k')
            ax.text(x, y, str(n), va='center', ha='center', fontsize=8, c=color)
        ax.set_title(pol)
        ax.set_xlabel('East-West Antenna Position (m)')
    
    axes[0].set_ylabel('North-South Antenna Position (m)')

    plt.tight_layout()
    plt.colorbar(im, ax=axes, location='top', aspect=60, pad=.04, label='Average Relative Difference Before and After Smoothing', extend='max')

time_avg_diff_plot()
freq_avg_diff_plot()
avg_difference_array_plot()

All-NaN slice encountered

All-NaN slice encountered
set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
All-NaN slice encountered
set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.

add_to_history = 'Produced by calibration_smoothing notebook with the following environment:\n' + '=' * 65 + '\n' + os.popen('conda env export').read() + '=' * 65

cs.write_smoothed_cal(output_replace=(CAL_SUFFIX, SMOOTH_CAL_SUFFIX), add_to_history=add_to_history, clobber=True)

Mean of empty slice

for repo in ['hera_cal', 'hera_qm', 'hera_filters', 'hera_notebook_templates', 'pyuvdata']:
    exec(f'from {repo} import __version__')
    print(f'{repo}: {__version__}')

hera_cal: 3.7.7.dev69+g9adaeb01c
hera_qm: 2.2.1.dev4+gf6d0211
hera_filters: 0.1.7

hera_notebook_templates: 0.0.1.dev1285+g2b96f023a
pyuvdata: 3.2.3.dev10+g11d3f658

print(f'Finished execution in {(time.time() - tstart) / 60:.2f} minutes.')

Finished execution in 549.55 minutes.

Calibration Smoothing¶

• Figure 1: Identifying and Blacklisting `abscal` Failures ¶

• Figure 2: Antenna Phases with Identified Phase Flips ¶

• Figure 3: Full-Day Gain Amplitudes Before and After `smooth_cal`¶

• Figure 4: Full-Day Gain Phases Before and After `smooth_cal`¶

• Figure 5: Full-Day $\chi^2$ / DoF Waterfall from Redundant-Baseline Calibration ¶

• Figure 6: Average $\chi^2$ per Antenna ¶

• Figure 7: Relative Difference Before and After Smoothing ¶

Parse inputs¶

Load files and select reference antenna(s)¶

Find consistent outliers in relative error after a coarse smoothing¶

Figure 1: Identifying and Blacklisting `abscal` Failures¶

Perform smoothing¶

Figure 2: Antenna Phases with Identified Phase Flips¶

Plot results¶

Figure 3: Full-Day Gain Amplitudes Before and After `smooth_cal`¶

Antenna 5 Amplitude Waterfalls

Antenna 261 Amplitude Waterfalls

Figure 4: Full-Day Gain Phases Before and After `smooth_cal`¶

Antenna 5 Phase Waterfalls

Antenna 261 Phase Waterfalls

Examine $\chi^2$¶

Figure 5: Full-Day $\chi^2$ / DoF Waterfall from Redundant-Baseline Calibration¶

Figure 6: Average $\chi^2$ per Antenna¶

Examine relative differences before and after smoothing¶

Figure 7: Relative Difference Before and After Smoothing¶

Save Results¶

Metadata¶

Calibration Smoothing¶

• Figure 1: Identifying and Blacklisting abscal Failures¶

• Figure 2: Antenna Phases with Identified Phase Flips¶

• Figure 3: Full-Day Gain Amplitudes Before and After smooth_cal¶

• Figure 4: Full-Day Gain Phases Before and After smooth_cal¶

• Figure 5: Full-Day $\chi^2$ / DoF Waterfall from Redundant-Baseline Calibration¶

• Figure 6: Average $\chi^2$ per Antenna¶

• Figure 7: Relative Difference Before and After Smoothing¶

Parse inputs¶

Load files and select reference antenna(s)¶

Find consistent outliers in relative error after a coarse smoothing¶

Figure 1: Identifying and Blacklisting abscal Failures¶

Perform smoothing¶

Figure 2: Antenna Phases with Identified Phase Flips¶

Plot results¶

Figure 3: Full-Day Gain Amplitudes Before and After smooth_cal¶

Antenna 5 Amplitude Waterfalls

Antenna 261 Amplitude Waterfalls

Figure 4: Full-Day Gain Phases Before and After smooth_cal¶

Antenna 5 Phase Waterfalls

Antenna 261 Phase Waterfalls

Examine $\chi^2$¶

Figure 5: Full-Day $\chi^2$ / DoF Waterfall from Redundant-Baseline Calibration¶

Figure 6: Average $\chi^2$ per Antenna¶

Examine relative differences before and after smoothing¶

Figure 7: Relative Difference Before and After Smoothing¶

Save Results¶

Metadata¶

• Figure 1: Identifying and Blacklisting `abscal` Failures ¶

• Figure 2: Antenna Phases with Identified Phase Flips ¶

• Figure 3: Full-Day Gain Amplitudes Before and After `smooth_cal`¶

• Figure 4: Full-Day Gain Phases Before and After `smooth_cal`¶

• Figure 5: Full-Day $\chi^2$ / DoF Waterfall from Redundant-Baseline Calibration ¶

• Figure 6: Average $\chi^2$ per Antenna ¶

• Figure 7: Relative Difference Before and After Smoothing ¶

Figure 1: Identifying and Blacklisting `abscal` Failures¶

Figure 3: Full-Day Gain Amplitudes Before and After `smooth_cal`¶

Figure 4: Full-Day Gain Phases Before and After `smooth_cal`¶