Source code for calibration.calibration

###################################################
# imports

import numpy as np
import ehtim as eh
import ngehtsim.obs.obs_generator as og
import ngehtsim.const_def as const
from astropy.time import Time
import copy
try:
    from eat.io.hops import read_alist
except ImportError:
    print('Warning: eat not installed! Cannot use calibration functionality.')
import pandas as pd

###################################################
# define various dictionaries

# station code conversion dictionary
known_station_dict = {'A': 'ALMA',
                      'G': 'GLT',
                      'J': 'JCMT',
                      'K': 'KP',
                      'L': 'LMT',
                      'N': 'NOEMA',
                      'P': 'IRAM',
                      'S': 'SMA',
                      'X': 'APEX',
                      'Y': 'SPT',
                      'Z': 'SMT'}

###################################################
# function definitions

[docs]def argunique(array): """ Return the indices associated with the unique elements of an array. Analogous in spirit to functions like argmin and argsort. Args: array (numpy.array): The array for which to return the unique-element arguments Returns: (numpy.array): An array of arguments for each unique element """ arr_unique = np.unique(array) ind_unique = np.zeros(len(arr_unique),dtype=int) for i in range(len(arr_unique)): ind_unique[i] = np.min(np.nonzero(array == arr_unique[i])) return ind_unique
[docs]def apriorical(filename,sourcename,bandwidth,debias=True,remove_autocorr=True, SNR_cut=0.0,station_codes={},return_coords=False,**kwargs): """ Read an alist data file and carry out a priori flux density calibration. Args: filename (str): The name of the input alist data file sourcename (str): The name of the source whose data to calibrate bandwidth (float): The bandwidth over which the data have been averaged, in GHz debias (bool): Flag for whether to debias the amplitudes stored in the alist file remove_autocorr (bool): Flag for whether to remove autocorrelations during calibration SNR_cut (float): SNR cut to apply station_codes (dict): Dictionary containing conversions between single- and multi-letter station codes return_coords (bool): Flag for whether to return RA and DEC of the source Returns: (pandas.DataFrame): pandas DataFrame containing the calibrated data and associated metainfo """ ############################################ # check inputs # update the station codes if needed station_dict = copy.deepcopy(known_station_dict) station_dict.update(station_codes) ############################################ # parse the alist file # read in the alist file using eat df = read_alist(filename) # extract the relevant quantities datetime_orig = df.datetime timetag_orig = np.array(df.timetag) source_orig = np.array(df.source) bl_orig = np.array(df.baseline) pl_orig = np.array(df.polarization) tint_orig = np.array(df.duration, dtype=float) amp_orig = np.array(df.amp, dtype=float) snr_orig = np.array(df.snr, dtype=float) phase_orig = np.array(df.resid_phas, dtype=float) elev1_orig = np.array(df.ref_elev, dtype=float) elev2_orig = np.array(df.rem_elev, dtype=float) az1_orig = np.array(df.ref_az, dtype=float) az2_orig = np.array(df.rem_az, dtype=float) u_orig = np.array(df.u, dtype=float) v_orig = np.array(df.v, dtype=float) freq_orig = np.array(df.ref_freq, dtype=float) ra_orig = np.array(df.ra_hrs, dtype=float) dec_orig = np.array(df.dec_deg, dtype=float) # debias SNR if desired if debias: snr_orig = np.sqrt((snr_orig**2.0) - 1.0) # determine uncertainties amp_err_orig = amp_orig / snr_orig # determine time in hours t_orig = np.zeros(len(datetime_orig)) day_orig = np.zeros(len(datetime_orig)) for i, dt in enumerate(datetime_orig): t_orig[i] = dt.hour + (dt.minute/60.0) + (dt.second/3600.0) day_orig[i] = float(timetag_orig[i].split('-')[0]) # convert (u,v) to lambda u_orig *= (1.0e6) v_orig *= (1.0e6) # convert frequency to GHz freq_orig /= (1.0e3) ############################################ # extract only the data associated with the desired source ind = (source_orig == sourcename) datetime = datetime_orig[ind] datetime.index = np.arange(len(datetime)) timetag = timetag_orig[ind] t_hr = t_orig[ind] t = t_hr + (day_orig[ind]*24.0) day = day_orig[ind] bl = bl_orig[ind] pl = pl_orig[ind] u = u_orig[ind] v = v_orig[ind] freq = freq_orig[ind] amp = amp_orig[ind] phase = phase_orig[ind] * (np.pi/180.0) snr = snr_orig[ind] elev1 = elev1_orig[ind] * (np.pi/180.0) elev2 = elev2_orig[ind] * (np.pi/180.0) az1 = az1_orig[ind] * (np.pi/180.0) az2 = az2_orig[ind] * (np.pi/180.0) ra = ra_orig[ind] dec = dec_orig[ind] amp_err = amp_err_orig[ind] tint = tint_orig[ind] # construct complex visibilities vis = amp*np.exp((1j)*phase) # time info days = np.empty(len(datetime),dtype=int) months = np.empty(len(datetime),dtype=int) years = np.empty(len(datetime),dtype=int) for i in range(len(datetime)): days[i] = datetime[i].day months[i] = datetime[i].month years[i] = datetime[i].year ############################################ # remove autocorrelations if remove_autocorr: ind = np.ones(len(timetag),dtype=bool) for i in range(len(timetag)): if (bl[i][0] == bl[i][1]): ind[i] = False datetime = datetime[ind] datetime.index = np.arange(len(datetime)) days = days[ind] months = months[ind] years = years[ind] timetag = timetag[ind] t = t[ind] t_hr = t_hr[ind] day = day[ind] bl = bl[ind] pl = pl[ind] u = u[ind] v = v[ind] freq = freq[ind] amp = amp[ind] phase = phase[ind] snr = snr[ind] elev1 = elev1[ind] elev2 = elev2[ind] az1 = az1[ind] az2 = az2[ind] ra = ra[ind] dec = dec[ind] amp_err = amp_err[ind] tint = tint[ind] vis = vis[ind] if SNR_cut > 0.0: ind = (snr >= SNR_cut) datetime = datetime[ind] datetime.index = np.arange(len(datetime)) days = days[ind] months = months[ind] years = years[ind] timetag = timetag[ind] t = t[ind] t_hr = t_hr[ind] day = day[ind] bl = bl[ind] pl = pl[ind] u = u[ind] v = v[ind] freq = freq[ind] amp = amp[ind] phase = phase[ind] snr = snr[ind] elev1 = elev1[ind] elev2 = elev2[ind] az1 = az1[ind] az2 = az2[ind] ra = ra[ind] dec = dec[ind] amp_err = amp_err[ind] tint = tint[ind] vis = vis[ind] # flag if there's more than one RA/DEC if ((len(np.unique(ra)) > 1) | (len(np.unique(dec)) > 1)): raise Exception('More than one RA and/or DEC value for this source is listed! Check the alist file.') RA = np.mean(ra) DEC = np.mean(dec) ############################################ # simulate the observation with ngehtsim months = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'] # make dummy input model mod = eh.model.Model() mod = mod.add_point(1.0) # loop through scans t_uniq = np.unique(t) t_uniq_arg = argunique(t) datetime_uniq = datetime[t_uniq_arg] datetime_uniq.index = np.arange(len(datetime_uniq)) vis_corrected = np.zeros_like(vis) amp_corrected = np.zeros_like(amp) amp_err_corrected = np.zeros_like(amp_err) for itime in range(len(t_uniq)): there = t_uniq[itime] ind = (t == there) there -= (np.mean(day[ind])*24.0) # determine the sites that are observing bl_here = bl[ind] sites = list() for b in bl_here: if (b[0] not in station_dict.keys()): raise Exception('Unrecognized station with single-letter code '+b[0]+'; please specify this station using the station_codes keyword argument.') if (b[1] not in station_dict.keys()): raise Exception('Unrecognized station with single-letter code '+b[1]+'; please specify this station using the station_codes keyword argument.') sites.append(station_dict[b[0]]) sites.append(station_dict[b[1]]) sites = list(np.unique(sites)) # other relevant quantities freq_here = np.mean(freq[ind]) pl_here = pl[ind] vis_here = vis[ind] amp_here = amp[ind] amp_err_here = amp_err[ind] tint_here = tint[ind] dt = np.max(tint_here) if len(np.unique(tint_here)) > 1: print('Warning: Some baselines in the scan at time '+str(there)+' have different integration times than others. Assumption here is to use a single integration time corresponding to the maximum value in the scan, but some baselines may be poorly-calibrated as a result.') # settings settings = {'source': sourcename, 'RA': RA, 'DEC': DEC, 'frequency': freq_here, 'bandwidth': bandwidth, 'day': str(datetime_uniq[itime].day), 'month': months[datetime_uniq[itime].month-1], 'year': str(datetime_uniq[itime].year), 't_start': there, 'dt': (dt/3600.0), 't_int': dt, 't_rest': dt, 'fringe_finder': ['naive', 0.0], 'sites': sites, 'weather': 'exact'} # simulate this scan, tracking SEFDs obsgen = og.obs_generator(settings,weight=1,**kwargs) obs = obsgen.make_obs(mod,addnoise=False,addgains=False,flagwind=False,el_min=0.0,el_max=90.0) obs = obs.switch_polrep('circ') # flag any unwanted times UT_start_hour = there-0.0001 UT_stop_hour = there+0.0001 UT_mask = obs.unpack('time')['time'] <= UT_start_hour UT_mask = UT_mask + (obs.unpack('time')['time'] >= UT_stop_hour) what_mask = np.array([False for j in range(len(UT_mask))]) mask = UT_mask | what_mask SEFD1 = obsgen.SEFD1[~mask] SEFD2 = obsgen.SEFD2[~mask] with eh.parloop.HiddenPrints(): obs = obs.flag_UT_range(UT_start_hour, UT_stop_hour, output='flagged') # determine amplitude scaling factor sites_arr = np.array(sites) bw = bandwidth * (1.0e9) t1 = obs.data['t1'] t2 = obs.data['t2'] scale_factor = np.zeros(len(amp_here)) for j in range(len(amp_here)): ind_here = ((t1 == station_dict[bl_here[j][0]]) & (t2 == station_dict[bl_here[j][1]])) ind_here |= ((t2 == station_dict[bl_here[j][0]]) & (t1 == station_dict[bl_here[j][1]])) scale_factor[j] = (1.0e-4)*np.sqrt(SEFD1[ind_here]*SEFD2[ind_here]) # apply the scaling factor vis_corrected[ind] = vis_here * scale_factor amp_corrected[ind] = amp_here * scale_factor amp_err_corrected[ind] = amp_err_here * scale_factor ############################################ # return the calibrated data struct = {'freq': freq, 't': t, 't_hr': t_hr, 'bl': bl, 'pl': pl, 'u': u, 'v': v, 'elev1': elev1, 'elev2': elev2, 'az1': az1, 'az2': az2, 'datetime': datetime, 'vis': vis_corrected, 'err': amp_err_corrected, 'snr': snr} df_out = pd.DataFrame(struct) if return_coords: return df_out, RA, DEC else: return df_out
[docs]def write_dlist(filename,sourcename,bandwidth,outname,debias=True,remove_autocorr=True, SNR_cut=0.0,station_codes={},**kwargs): """ Write a "dlist" data file from an "alist" file. Args: filename (str): The name of the input alist data file sourcename (str): The name of the source whose data to calibrate bandwidth (float): The bandwidth over which the data have been averaged, in GHz outname (str): The name of the output dlist data file debias (bool): Flag for whether to debias the amplitudes stored in the alist file remove_autocorr (bool): Flag for whether to remove autocorrelations during calibration SNR_cut (float): SNR cut to apply station_codes (dict): Dictionary containing conversions between single- and multi-letter station codes Returns: Writes a dlist file to disk """ ############################################ # check inputs # update the station codes if needed station_dict = copy.deepcopy(known_station_dict) station_dict.update(station_codes) ############################################ # carry out the flux density calibration df, RA, DEC = apriorical(filename,sourcename,bandwidth,debias=debias,remove_autocorr=remove_autocorr,station_codes=station_codes,return_coords=True,**kwargs) freq = df.freq t = df.t t_hr = df.t_hr bl = df.bl pl = df.pl u = df.u v = df.v elev1 = df.elev1 elev2 = df.elev2 az1 = df.az1 az2 = df.az2 datetime = df.datetime vis_corrected = df.vis err_corrected = df.err snr = df.snr ############################################ # write reformatted "dlist" table # convert source RA and DEC to radians RA_rad = RA*15.0*(np.pi/180.0) DEC_rad = DEC*(np.pi/180.0) # write file with open(outname,'w') as f: header = '' header += 'Freq'.ljust(24) header += 'Time'.ljust(24) header += 'Station1'.ljust(12) header += 'Station2'.ljust(12) header += 'Pol1'.ljust(6) header += 'Pol2'.ljust(6) header += 'u'.ljust(24) header += 'v'.ljust(24) header += 'elev1'.ljust(24) header += 'elev2'.ljust(24) header += 'parang1'.ljust(24) header += 'parang2'.ljust(24) header += 'Re(vis)'.ljust(24) header += 'Im(vis)'.ljust(24) header += 'sigma' + '\n' f.write(header) f.write('-'*292 + '\n') for i in range(len(vis_corrected)): # populate table entries strhere = '' strhere += str(freq[i]).ljust(24) strhere += str(np.round(t[i],12)).ljust(24) strhere += station_dict[bl[i][0]].ljust(12) strhere += station_dict[bl[i][1]].ljust(12) strhere += pl[i][0].ljust(6) strhere += pl[i][1].ljust(6) strhere += str(np.round(u[i],10)).ljust(24) strhere += str(np.round(v[i],10)).ljust(24) strhere += str(np.round(elev1[i],10)).ljust(24) strhere += str(np.round(elev2[i],10)).ljust(24) # compute parallactic angle timestr = str(datetime[i].year) + '-' + str(datetime[i].month).zfill(2) + '-' + str(datetime[i].day).zfill(2) + ' ' + str(datetime[i].hour).zfill(2) + ':' + str(datetime[i].minute).zfill(2) + ':' + str(datetime[i].second).zfill(2) timeobj = Time(timestr,format='iso') MJD = np.floor(timeobj.mjd) gst = (np.pi/12.0)*eh.observing.obs_helpers.utc_to_gmst(t_hr[i],MJD) lat1 = const.known_latitudes[station_dict[bl[i][0]]]*(np.pi/180.0) lat2 = const.known_latitudes[station_dict[bl[i][1]]]*(np.pi/180.0) lon1 = const.known_longitudes[station_dict[bl[i][0]]]*(np.pi/180.0) lon2 = const.known_longitudes[station_dict[bl[i][1]]]*(np.pi/180.0) hr_angle1 = eh.observing.obs_helpers.hr_angle(gst,lon1,RA_rad) hr_angle2 = eh.observing.obs_helpers.hr_angle(gst,lon2,RA_rad) par_angle1 = eh.observing.obs_helpers.par_angle(hr_angle1, lat1, DEC_rad) par_angle2 = eh.observing.obs_helpers.par_angle(hr_angle2, lat2, DEC_rad) # continue populating table entries strhere += str(np.round(par_angle1,10)).ljust(24) strhere += str(np.round(par_angle2,10)).ljust(24) strhere += str(np.round(np.real(vis_corrected[i]),12)).ljust(24) strhere += str(np.round(np.imag(vis_corrected[i]),12)).ljust(24) strhere += str(np.round(err_corrected[i],12)) strhere += '\n' f.write(strhere)