diff --git a/SSINS/incoherent_noise_spectrum.py b/SSINS/incoherent_noise_spectrum.py index 4fa38a74..afd3d728 100644 --- a/SSINS/incoherent_noise_spectrum.py +++ b/SSINS/incoherent_noise_spectrum.py @@ -215,21 +215,26 @@ def mean_subtract(self, freq_slice=slice(None), return_coeffs=False): def mask_to_flags(self): """ Propagates the mask to construct flags for the original - (non time-differenced) data. If a time is flagged in the INS, then both + (non differenced) data. If a time is flagged in the INS, then both times that could have contributed to that time in the sky-subtraction step are flagged in the new array. Returns: - tp_flags (array): The time-propagated flags + p_flags (array): The (time or frequency)-propagated flags """ # Propagate the flags shape = list(self.metric_array.shape) - tp_flags = np.zeros([shape[0] + 1] + shape[1:], dtype=bool) - tp_flags[:-1] = self.metric_array.mask - tp_flags[1:] = np.logical_or(tp_flags[1:], tp_flags[:-1]) - - return(tp_flags) + # no else here between dif/time, it is possible to have a doubly differenced set + if self.extra_keywords['dif_time'] is True: + p_flags = np.zeros([shape[0] + 1] + shape[1:], dtype=bool) + p_flags[:-1] = self.metric_array.mask + p_flags[1:] = np.logical_or(p_flags[1:], p_flags[:-1]) + if self.extra_keywords['dif_freq'] is True: + p_flags = np.zeros([shape[0], shape[1] + 1, shape[2]], dtype=bool) + p_flags[:,:-1] = self.metric_array.mask + p_flags[:,1:] = np.logical_or(p_flags[:,1:], p_flags[:,:-1]) + return(p_flags) def flag_uvf(self, uvf, inplace=False): """ @@ -253,13 +258,20 @@ def flag_uvf(self, uvf, inplace=False): raise ValueError("UVFlag object must be in flag mode to write flags from INS object.") if uvf.type != 'waterfall': raise ValueError("UVFlag object must be in waterfall mode to write flags from INS object.") - try: - test_times = 0.5 * (uvf.time_array[:-1] + uvf.time_array[1:]) - time_compat = np.all(self.time_array == test_times) - assert time_compat - except Exception: - raise ValueError("UVFlag object's times do not match those of INS object.") - + if self.extra_keywords['dif_time'] is True: + try: + test_times = 0.5 * (uvf.time_array[:-1] + uvf.time_array[1:]) + time_compat = np.all(self.time_array == test_times) + assert time_compat + except Exception: + raise ValueError("UVFlag object's times do not match those of INS object.") + if self.extra_keywords['dif_freq'] is True: + try: + test_freqs = 0.5 * (uvf.freq_array[:-1] + uvf.freq_array[1:]) + freq_compat = np.all(self.freq_array == test_freqs) + assert freq_compat + except Exception: + raise ValueError("UVFlag object's frequencies do not match those of INS object.") new_flags = self.mask_to_flags() if inplace: diff --git a/SSINS/sky_subtract.py b/SSINS/sky_subtract.py index 5ced6f51..66b8832f 100644 --- a/SSINS/sky_subtract.py +++ b/SSINS/sky_subtract.py @@ -26,8 +26,8 @@ def __init__(self): """Array of length Nfreqs that stores maximum likelihood estimators for each frequency, calculated using the MLE_calc method""" - def read(self, filename, diff=False, flag_choice=None, INS=None, custom=None, - **kwargs): + def read(self, filename, diff=False, diff_freq=False, flag_choice=None, INS=None, custom=None, + override_keyword=None, **kwargs): """ Reads in a file that is compatible with UVData object by first calling @@ -37,29 +37,68 @@ def read(self, filename, diff=False, flag_choice=None, INS=None, custom=None, Args: filename (str or list of str): The filepath(s) to read in. diff (bool): If True, and data was read in, then difference the visibilities in time + diff_freq (bool): If True, and data was read in, then difference the visibilities in freq flag_choice: Sets flags for the data array on read using apply_flags method. INS: An INS object for apply_flags() custom: A custom flag array for apply_flags() kwargs: Additional kwargs are passed to UVData.read() + override_keyword (str): sets a keyword to override to True regardless of internal diffs, + can be set to `dif_freq`, `dif_time`, or `both`. """ - warnings.warn("SS.read will be renamed to SS.read_data soon to avoid" - " conflicts with UVData.read.", category=PendingDeprecationWarning) super().read(filename, **kwargs) - + # detect if there is contridicting override_keyword and diff/diff_freq terms + if diff or diff_freq is True and override_keyword is not None: + warnings.warn("diff or diff_freq set to true and override keyword has" + " been set. Please ensure that this does not cause unwanted" + " behavior, since the override_keyword will override diff" + " setting internal attributes") + + if override_keyword != "dif_freq" or override_keyword != "dif_time" or override_keyword != "both": + warnings.warn("override_keyword passed but is not 'dif_time'," + " 'dif_freq', or 'both': override_keyword will have no" + " effect") + + # always set extra keywords, else key errors when trying to check if (self.data_array is not None): if diff: self.diff() self.apply_flags(flag_choice=flag_choice, INS=INS, custom=custom) - else: + self.extra_keywords['dif_time'] = True + self.extra_keywords['dif_freq'] = False + if diff_freq: + self.diff_freq() + self.apply_flags(flag_choice=flag_choice, INS=INS, custom=custom) + self.extra_keywords['dif_freq'] = True + self.extra_keywords['dif_time'] = False + if not diff_freq and not diff: # This warning will be issued when diff is False and there is some data read in # If filename is a list of files, then this warning will get issued in the recursive call in UVData.read warnings.warn("diff on read defaults to False now. Please double" " check SS.read call and ensure the appropriate" " keyword arguments for your intended use case.") + self.extra_keywords['dif_time'] = False + self.extra_keywords['dif_freq'] = False if flag_choice is not None: warnings.warn("flag_choice will be ignored on read since" " diff is being skipped.") + if diff_freq and diff: + self.extra_keywords['dif_time'] = True + self.extra_keywords['dif_freq'] = True + else: + self.extra_keywords['dif_time'] = False + self.extra_keywords['dif_freq'] = False + # hardcoded cases for override_keyword to set dif_freq/dif_time (or both) + # useful if data is pre-differenced rather than internally diff-ed by read() itself + # applied after the internal flags, and thus overwrites them + if (override_keyword is not None): + if override_keyword == 'dif_time': + self.extra_keywords['dif_time'] = True + elif override_keyword == 'dif_freq': + self.extra_keywords['dif_freq'] = True + elif override_keyword == 'both': + self.extra_keywords['dif_freq'] = True + self.extra_keywords['dif_time'] = True def apply_flags(self, flag_choice=None, INS=None, custom=None): """ @@ -225,6 +264,25 @@ def diff(self): super().set_lsts_from_time_array() self.data_array = np.ma.masked_array(self.data_array) + def diff_freq(self): + + """ + Differences the visibilities in freq. Does so independently for each time. + The flags are propagated by taking the boolean OR of the entries that correspond + to the visibilities that are differenced from one another. Other metadata + attributes are also adjusted so that the resulting SS object passes + UVData.check() + """ + + diff_dat = np.diff(self.data_array, axis=2) #axis 2: nfreqs (see uvdata object) + self.data_array = diff_dat + + self.Nfreqs -= 1 + self.data_array = np.ma.masked_array(self.data_array) + self.freq_array = (self.freq_array[:,1:] + self.freq_array[:,:-1]) / 2.0 + self.nsample_array = (self.nsample_array[:,:,1:,:] + self.nsample_array[:,:,:-1,:]) / 2.0 + self.flag_array = np.logical_or(self.flag_array[:,:,1:,:], self.flag_array[:,:,:-1,:]) + def MLE_calc(self): """ diff --git a/SSINS/tests/test_INS.py b/SSINS/tests/test_INS.py index 6845a675..027d7f6d 100644 --- a/SSINS/tests/test_INS.py +++ b/SSINS/tests/test_INS.py @@ -35,6 +35,25 @@ def test_init(): # Check that the weights summed correctly assert np.all(test_weights == ins.weights_array), "Weights did not sum properly" +def test_extra_keywords(): + obs = '1061313128_99bl_1pol_half_time' + testfile = os.path.join(DATA_PATH, '%s.uvfits' % obs) + file_type = 'uvfits' + + ss = SS() + ss.read(testfile, flag_choice='original', diff=True) + ins = INS(ss) + + assert ss.extra_keywords['dif_time'] is True + assert ins.extra_keywords['dif_time'] is True + + ss.read(testfile, flag_choice='original', diff=False) + ins = INS(ss) + + assert ss.extra_keywords['dif_time'] is False + assert ins.extra_keywords['dif_time'] is False + + def test_no_diff_start(): obs = '1061313128_99bl_1pol_half_time' @@ -109,6 +128,36 @@ def test_polyfit(): ins.metric_ms = ins.mean_subtract() assert np.all(ins.metric_ms.mask), "The metric_ms array was not all masked" +def test_flag_uvf_freq(): + obs = '1061313128_99bl_1pol_half_time' + testfile = os.path.join(DATA_PATH, '%s.uvfits' % obs) + file_type = 'uvfits' + prefix = os.path.join(DATA_PATH, '%s_test' % obs) + flags_outfile = '%s_SSINS_flags.h5' % prefix + + ss = SS() + ss.read(testfile, diff_freq=True) + + uvd = UVData() + uvd.read(testfile) + + uvf = UVFlag(uvd, mode='flag', waterfall=True) + # start with some flags so that we can test the intended OR operation + uvf.flag_array[6, :] = True + ins = INS(ss) + + # Check error handling + with pytest.raises(ValueError): + bad_uvf = UVFlag(uvd, mode='metric', waterfall=True) + err_uvf = ins.flag_uvf(uvf=bad_uvf) + with pytest.raises(ValueError): + bad_uvf = UVFlag(uvd, mode='flag', waterfall=False) + err_uvf = ins.flag_uvf(uvf=bad_uvf) + with pytest.raises(ValueError): + bad_uvf = UVFlag(uvd, mode='flag', waterfall=True) + # Pretend the data is off by 1 freq + bad_uvf.freq_array += 1 + err_uvf = ins.flag_uvf(uvf=bad_uvf) @pytest.mark.filterwarnings("ignore:Reordering", "ignore:SS.read") def test_mask_to_flags(): @@ -236,6 +285,10 @@ def test_write_mwaf(): testfile = os.path.join(DATA_PATH, '%s.h5' % obs) prefix = os.path.join(DATA_PATH, '%s_test' % obs) ins = INS(testfile) + #to override the fact that the data files don't have dif_ keywords set + ins.extra_keywords['dif_time'] = True + ins.extra_keywords['dif_freq'] = False + mwaf_files = [os.path.join(DATA_PATH, '1061313128_12.mwaf')] bad_mwaf_files = [os.path.join(DATA_PATH, 'bad_file_path')] metafits_file = os.path.join(DATA_PATH, '1061313128.metafits') @@ -431,3 +484,17 @@ def test_add(): assert np.all(combo_ins.metric_array.mask == first_ins.metric_array.mask) assert np.all(combo_ins.metric_array.data == truth_ins.metric_array.data) assert np.all(combo_ins.metric_array.mask == truth_ins.metric_array.mask) + +def test_mask_to_flags_2(): + #verify array sizes + ss = SS() + filepath = 'SSINS/data/1061313128_99bl_1pol_half_time.uvfits' + ss.read(filepath, diff=True) + ins = INS(ss) + flags = ins.mask_to_flags() + ss.read(filepath, diff=False, diff_freq=True) + ins = INS(ss) + flags = ins.mask_to_flags() + ss.read(filepath, diff=True, diff_freq=True) + ins = INS(ss) + flags = ins.mask_to_flags() diff --git a/SSINS/tests/test_SS.py b/SSINS/tests/test_SS.py index b73553d3..2b5ddafa 100644 --- a/SSINS/tests/test_SS.py +++ b/SSINS/tests/test_SS.py @@ -9,19 +9,20 @@ Tests the various capabilities of the sky_subtract class """ - @pytest.mark.filterwarnings("ignore:Reordering", "ignore:SS.read") + def test_SS_read(): obs = '1061313128_99bl_1pol_half_time' testfile = os.path.join(DATA_PATH, '%s.uvfits' % obs) ss = SS() - # Test reading in only metadata skips if block and warning - with pytest.warns(PendingDeprecationWarning, match="SS.read will be renamed"): - ss.read(testfile, read_data=False) + ss.read(testfile, read_data=False) assert ss.data_array is None, "Data array is not None" + # See that it is not yet flagged as diffed + assert ss.extra_keywords['dif_freq'] is False + # Test select on read and diff ss.read(testfile, times=np.unique(ss.time_array)[1:10], diff=True) assert ss.Ntimes == 8, "Number of times after diff disagrees!" @@ -64,11 +65,74 @@ def test_diff(): assert np.all(ss.uvw_array == diff_uvw), "uvw_arrays disagree!" assert np.all(ss.ant_1_array == np.array([0, 0])), "ant_1_array disagrees!" assert np.all(ss.ant_2_array == np.array([1, 2])), "ant_2_array disagrees!" + assert ss.extra_keywords['dif_freq'] is False + assert ss.extra_keywords['dif_time'] is True + + +def test_keyword_override_time(): + obs = '1061313128_99bl_1pol_half_time' + testfile = os.path.join(DATA_PATH, '%s.uvfits' % obs) + ss = SS() + ss.read(testfile, read_data=False, diff=False, diff_freq=False, override_keyword='dif_time') + assert ss.extra_keywords['dif_time'] is True + + +def test_keyword_override_freq(): + obs = '1061313128_99bl_1pol_half_time' + ss = SS() + testfile = os.path.join(DATA_PATH, '%s.uvfits' % obs) + ss.read(testfile, read_data=False, diff=False, diff_freq=False, override_keyword='dif_freq') + assert ss.extra_keywords['dif_freq'] is True + + +def test_keyword_override_both(): + obs = '1061313128_99bl_1pol_half_time' + ss = SS() + testfile = os.path.join(DATA_PATH, '%s.uvfits' % obs) + ss.read(testfile, read_data=False, diff=False, diff_freq=False, override_keyword='both') + assert ss.extra_keywords['dif_time'] is True + assert ss.extra_keywords['dif_freq'] is True +#checks whether diff_freq reads in and out, and the diff values are sane +def test_diff_freq(): + obs = '1061313128_99bl_1pol_half_time' + testfile = os.path.join(DATA_PATH, '%s.uvfits' % obs) + + ss = SS() + uv = UVData() + + # Read in two times and two baselines of data, so that the diff is obvious. + uv.read(testfile, read_data=False) + times = np.unique(uv.time_array)[:2] + bls = [(0, 1), (0, 2)] + uv.read(testfile, times=times, bls=bls) + + diff_dat = np.diff(uv.data_array, axis=2) + + ss.read(testfile, diff=False, diff_freq=True, times=times, bls=bls) + print(ss._data_array.form) + #ss.reorder_blts(order='baseline') + assert np.all(ss.data_array == diff_dat), "Data values are different!" + assert ss.extra_keywords['dif_freq'] is True + assert ss.extra_keywords['dif_time'] is False + +#checks whether diff_freq masks properly +def test_diff_freq_mask(): + obs = '1061313128_99bl_1pol_half_time' + testfile = os.path.join(DATA_PATH, '%s.uvfits' % obs) + ss = SS() + + #read in test file + ss.read(testfile, read_data=True, diff=False, diff_freq=False) + ss.apply_flags(flag_choice='original') + assert ss.flag_array is not None + temp_array = np.logical_or(ss.flag_array, ss.flag_array) + nonzero_or = np.count_nonzero(temp_array) + nonzero_flags = np.count_nonzero(ss.flag_array[::2]) + assert (nonzero_or > nonzero_flags) @pytest.mark.filterwarnings("ignore:SS.read", "ignore:Reordering") def test_apply_flags(): - obs = '1061313128_99bl_1pol_half_time' testfile = os.path.join(DATA_PATH, '%s.uvfits' % obs) file_type = 'uvfits' diff --git a/docs/tutorial.rst b/docs/tutorial.rst index b373c7f1..b27c7394 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -43,9 +43,20 @@ Generating the sky-subtracted visibilities >>> # The following lines make use of the time_array attribute (metadata) to >>> # read in all but the first and last integrations >>> times = np.unique(ss.time_array)[1:-1] + >>> # This read() call uses the `diff` keyword to difference the data automatically along the time axis >>> ss.read(filepath, read_data=True, times=times, diff=True) -(c) Applying flags +(c) Differencing along the frequency axis +***************************************** +:: + >>> # We can opt to difference visibilities along the frequency axis instead of the default time axis + >>> # To do this, use the separate keyword `diff_freq` and set it to true when you call the read() function + >>> # The two modes work otherwise the same except for the axis the visibilities are differenced upon. + + >>> # Note that this will override the time-differenced data if you ran ss.read() from section (b) + >>> ss.read(filepath, read_data=True, times=times, diff_freq=True) + +(d) Applying flags ****************** :: >>> # SS.data_array is a numpy masked array. To "apply flags" is to change the mask of the data_array. @@ -72,7 +83,7 @@ Generating the sky-subtracted visibilities >>> print(np.any(ss.data_array.mask)) False -(d) Plotting using Catalog_Plot +(e) Plotting using Catalog_Plot ******************************* :: >>> from SSINS import Catalog_Plot as cp