mwilensky768 · no-lex · Nov 6, 2020 · Nov 6, 2020 · Dec 11, 2020 · Dec 11, 2020
diff --git a/SSINS/incoherent_noise_spectrum.py b/SSINS/incoherent_noise_spectrum.py
@@ -215,21 +215,26 @@ def mean_subtract(self, freq_slice=slice(None), return_coeffs=False):
     def mask_to_flags(self):
         """
         Propagates the mask to construct flags for the original
-        (non time-differenced) data. If a time is flagged in the INS, then both
+        (non differenced) data. If a time is flagged in the INS, then both
         times that could have contributed to that time in the sky-subtraction
         step are flagged in the new array.
 
         Returns:
-            tp_flags (array): The time-propagated flags
+            p_flags (array): The (time or frequency)-propagated flags
         """
 
         # Propagate the flags
         shape = list(self.metric_array.shape)
-        tp_flags = np.zeros([shape[0] + 1] + shape[1:], dtype=bool)
-        tp_flags[:-1] = self.metric_array.mask
-        tp_flags[1:] = np.logical_or(tp_flags[1:], tp_flags[:-1])
-
-        return(tp_flags)
+        # no else here between dif/time, it is possible to have a doubly differenced set
+        if self.extra_keywords['dif_time'] is True:
+            p_flags = np.zeros([shape[0] + 1] + shape[1:], dtype=bool)
+            p_flags[:-1] = self.metric_array.mask
+            p_flags[1:] = np.logical_or(p_flags[1:], p_flags[:-1])
+        if self.extra_keywords['dif_freq'] is True:
+            p_flags = np.zeros([shape[0], shape[1] + 1, shape[2]], dtype=bool)
+            p_flags[:,:-1] = self.metric_array.mask
+            p_flags[:,1:] = np.logical_or(p_flags[:,1:], p_flags[:,:-1])
+        return(p_flags)
 
     def flag_uvf(self, uvf, inplace=False):
         """
@@ -253,13 +258,20 @@ def flag_uvf(self, uvf, inplace=False):
             raise ValueError("UVFlag object must be in flag mode to write flags from INS object.")
         if uvf.type != 'waterfall':
             raise ValueError("UVFlag object must be in waterfall mode to write flags from INS object.")
-        try:
-            test_times = 0.5 * (uvf.time_array[:-1] + uvf.time_array[1:])
-            time_compat = np.all(self.time_array == test_times)
-            assert time_compat
-        except Exception:
-            raise ValueError("UVFlag object's times do not match those of INS object.")
-
+        if self.extra_keywords['dif_time'] is True:
+            try:
+                test_times = 0.5 * (uvf.time_array[:-1] + uvf.time_array[1:])
+                time_compat = np.all(self.time_array == test_times)
+                assert time_compat
+            except Exception:
+                raise ValueError("UVFlag object's times do not match those of INS object.")
+        if self.extra_keywords['dif_freq'] is True:
+            try:
+                test_freqs = 0.5 * (uvf.freq_array[:-1] + uvf.freq_array[1:])
+                freq_compat = np.all(self.freq_array == test_freqs)
+                assert freq_compat
+            except Exception:
+                raise ValueError("UVFlag object's frequencies do not match those of INS object.")
         new_flags = self.mask_to_flags()
 
         if inplace:

diff --git a/SSINS/sky_subtract.py b/SSINS/sky_subtract.py
@@ -26,8 +26,8 @@ def __init__(self):
         """Array of length Nfreqs that stores maximum likelihood estimators for
         each frequency, calculated using the MLE_calc method"""
 
-    def read(self, filename, diff=False, flag_choice=None, INS=None, custom=None,
-             **kwargs):
+    def read(self, filename, diff=False, diff_freq=False, flag_choice=None, INS=None, custom=None,
+             override_keyword=None, **kwargs):
 
         """
         Reads in a file that is compatible with UVData object by first calling
@@ -37,29 +37,68 @@ def read(self, filename, diff=False, flag_choice=None, INS=None, custom=None,
         Args:
             filename (str or list of str): The filepath(s) to read in.
             diff (bool): If True, and data was read in, then difference the visibilities in time
+            diff_freq (bool): If True, and data was read in, then difference the visibilities in freq
             flag_choice: Sets flags for the data array on read using apply_flags method.
             INS: An INS object for apply_flags()
             custom: A custom flag array for apply_flags()
             kwargs: Additional kwargs are passed to UVData.read()
+            override_keyword (str): sets a keyword to override to True regardless of internal diffs,
+                can be set to `dif_freq`, `dif_time`, or `both`.
         """
-        warnings.warn("SS.read will be renamed to SS.read_data soon to avoid"
-                      " conflicts with UVData.read.", category=PendingDeprecationWarning)
 
         super().read(filename, **kwargs)
-
+        # detect if there is contridicting override_keyword and diff/diff_freq terms
+        if diff or diff_freq is True and override_keyword is not None:
+            warnings.warn("diff or diff_freq set to true and override keyword has"
+                          " been set. Please ensure that this does not cause unwanted"
+                          " behavior, since the override_keyword will override diff"
+                          " setting internal attributes")
+
+        if override_keyword != "dif_freq" or override_keyword != "dif_time" or override_keyword != "both":
+            warnings.warn("override_keyword passed but is not 'dif_time',"
+                          " 'dif_freq', or 'both': override_keyword will have no"
+                          " effect")
+
+        # always set extra keywords, else key errors when trying to check
         if (self.data_array is not None):
             if diff:
                 self.diff()
                 self.apply_flags(flag_choice=flag_choice, INS=INS, custom=custom)
-            else:
+                self.extra_keywords['dif_time'] = True
+                self.extra_keywords['dif_freq'] = False
+            if diff_freq:
+                self.diff_freq()
+                self.apply_flags(flag_choice=flag_choice, INS=INS, custom=custom)
+                self.extra_keywords['dif_freq'] = True
+                self.extra_keywords['dif_time'] = False
+            if not diff_freq and not diff:
                 # This warning will be issued when diff is False and there is some data read in
                 # If filename is a list of files, then this warning will get issued in the recursive call in UVData.read
                 warnings.warn("diff on read defaults to False now. Please double"
                               " check SS.read call and ensure the appropriate"
                               " keyword arguments for your intended use case.")
+                self.extra_keywords['dif_time'] = False
+                self.extra_keywords['dif_freq'] = False
                 if flag_choice is not None:
                     warnings.warn("flag_choice will be ignored on read since"
                                   " diff is being skipped.")
+            if diff_freq and diff:
+                self.extra_keywords['dif_time'] = True
+                self.extra_keywords['dif_freq'] = True
+        else:
+            self.extra_keywords['dif_time'] = False
+            self.extra_keywords['dif_freq'] = False
+        # hardcoded cases for override_keyword to set dif_freq/dif_time (or both)
+        # useful if data is pre-differenced rather than internally diff-ed by read() itself
+        # applied after the internal flags, and thus overwrites them
+        if (override_keyword is not None):
+            if override_keyword == 'dif_time':
+                self.extra_keywords['dif_time'] = True
+            elif override_keyword == 'dif_freq':
+                self.extra_keywords['dif_freq'] = True
+            elif override_keyword == 'both':
+                self.extra_keywords['dif_freq'] = True
+                self.extra_keywords['dif_time'] = True
 
     def apply_flags(self, flag_choice=None, INS=None, custom=None):
         """
@@ -225,6 +264,25 @@ def diff(self):
         super().set_lsts_from_time_array()
         self.data_array = np.ma.masked_array(self.data_array)
 
+    def diff_freq(self):
+
+        """
+        Differences the visibilities in freq. Does so independently for each time.
+        The flags are propagated by taking the boolean OR of the entries that correspond
+        to the visibilities that are differenced from one another. Other metadata
+        attributes are also adjusted so that the resulting SS object passes
+        UVData.check()
+        """
+
+        diff_dat = np.diff(self.data_array, axis=2) #axis 2: nfreqs (see uvdata object)
+        self.data_array = diff_dat
+
+        self.Nfreqs -= 1
+        self.data_array = np.ma.masked_array(self.data_array)
+        self.freq_array = (self.freq_array[:,1:] + self.freq_array[:,:-1]) / 2.0
+        self.nsample_array = (self.nsample_array[:,:,1:,:] + self.nsample_array[:,:,:-1,:]) / 2.0
+        self.flag_array = np.logical_or(self.flag_array[:,:,1:,:], self.flag_array[:,:,:-1,:])
+
     def MLE_calc(self):
 
         """

diff --git a/SSINS/tests/test_INS.py b/SSINS/tests/test_INS.py
@@ -35,6 +35,25 @@ def test_init():
     # Check that the weights summed correctly
     assert np.all(test_weights == ins.weights_array), "Weights did not sum properly"
 
+def test_extra_keywords():
+    obs = '1061313128_99bl_1pol_half_time'
+    testfile = os.path.join(DATA_PATH, '%s.uvfits' % obs)
+    file_type = 'uvfits'
+
+    ss = SS()
+    ss.read(testfile, flag_choice='original', diff=True)
+    ins = INS(ss)
+
+    assert ss.extra_keywords['dif_time'] is True
+    assert ins.extra_keywords['dif_time'] is True
+
+    ss.read(testfile, flag_choice='original', diff=False)
+    ins = INS(ss)
+
+    assert ss.extra_keywords['dif_time'] is False
+    assert ins.extra_keywords['dif_time'] is False
+
+
 
 def test_no_diff_start():
     obs = '1061313128_99bl_1pol_half_time'
@@ -109,6 +128,36 @@ def test_polyfit():
     ins.metric_ms = ins.mean_subtract()
     assert np.all(ins.metric_ms.mask), "The metric_ms array was not all masked"
 
+def test_flag_uvf_freq():
+    obs = '1061313128_99bl_1pol_half_time'
+    testfile = os.path.join(DATA_PATH, '%s.uvfits' % obs)
+    file_type = 'uvfits'
+    prefix = os.path.join(DATA_PATH, '%s_test' % obs)
+    flags_outfile = '%s_SSINS_flags.h5' % prefix
+
+    ss = SS()
+    ss.read(testfile, diff_freq=True)
+
+    uvd = UVData()
+    uvd.read(testfile)
+
+    uvf = UVFlag(uvd, mode='flag', waterfall=True)
+    # start with some flags so that we can test the intended OR operation
+    uvf.flag_array[6, :] = True
+    ins = INS(ss)
+
+    # Check error handling
+    with pytest.raises(ValueError):
+        bad_uvf = UVFlag(uvd, mode='metric', waterfall=True)
+        err_uvf = ins.flag_uvf(uvf=bad_uvf)
+    with pytest.raises(ValueError):
+        bad_uvf = UVFlag(uvd, mode='flag', waterfall=False)
+        err_uvf = ins.flag_uvf(uvf=bad_uvf)
+    with pytest.raises(ValueError):
+        bad_uvf = UVFlag(uvd, mode='flag', waterfall=True)
+        # Pretend the data is off by 1 freq
+        bad_uvf.freq_array += 1
+        err_uvf = ins.flag_uvf(uvf=bad_uvf)
 
 @pytest.mark.filterwarnings("ignore:Reordering", "ignore:SS.read")
 def test_mask_to_flags():
@@ -236,6 +285,10 @@ def test_write_mwaf():
     testfile = os.path.join(DATA_PATH, '%s.h5' % obs)
     prefix = os.path.join(DATA_PATH, '%s_test' % obs)
     ins = INS(testfile)
+    #to override the fact that the data files don't have dif_ keywords set
+    ins.extra_keywords['dif_time'] = True
+    ins.extra_keywords['dif_freq'] = False
+
     mwaf_files = [os.path.join(DATA_PATH, '1061313128_12.mwaf')]
     bad_mwaf_files = [os.path.join(DATA_PATH, 'bad_file_path')]
     metafits_file = os.path.join(DATA_PATH, '1061313128.metafits')
@@ -431,3 +484,17 @@ def test_add():
     assert np.all(combo_ins.metric_array.mask == first_ins.metric_array.mask)
     assert np.all(combo_ins.metric_array.data == truth_ins.metric_array.data)
     assert np.all(combo_ins.metric_array.mask == truth_ins.metric_array.mask)
+
+def test_mask_to_flags_2():
+    #verify array sizes
+    ss = SS()
+    filepath = 'SSINS/data/1061313128_99bl_1pol_half_time.uvfits'
+    ss.read(filepath, diff=True)
+    ins = INS(ss)
+    flags = ins.mask_to_flags()
+    ss.read(filepath, diff=False, diff_freq=True)
+    ins = INS(ss)
+    flags = ins.mask_to_flags()
+    ss.read(filepath, diff=True, diff_freq=True)
+    ins = INS(ss)
+    flags = ins.mask_to_flags()
diff --git a/SSINS/tests/test_SS.py b/SSINS/tests/test_SS.py
@@ -9,19 +9,20 @@
 Tests the various capabilities of the sky_subtract class
 """
 
-
 @pytest.mark.filterwarnings("ignore:Reordering", "ignore:SS.read")
+
 def test_SS_read():
     obs = '1061313128_99bl_1pol_half_time'
     testfile = os.path.join(DATA_PATH, '%s.uvfits' % obs)
 
     ss = SS()
 
-    # Test reading in only metadata skips if block and warning
-    with pytest.warns(PendingDeprecationWarning, match="SS.read will be renamed"):
-        ss.read(testfile, read_data=False)
+    ss.read(testfile, read_data=False)
     assert ss.data_array is None, "Data array is not None"
 
+    # See that it is not yet flagged as diffed
+    assert ss.extra_keywords['dif_freq'] is False
+
     # Test select on read and diff
     ss.read(testfile, times=np.unique(ss.time_array)[1:10], diff=True)
     assert ss.Ntimes == 8, "Number of times after diff disagrees!"
@@ -64,11 +65,74 @@ def test_diff():
     assert np.all(ss.uvw_array == diff_uvw), "uvw_arrays disagree!"
     assert np.all(ss.ant_1_array == np.array([0, 0])), "ant_1_array disagrees!"
     assert np.all(ss.ant_2_array == np.array([1, 2])), "ant_2_array disagrees!"
+    assert ss.extra_keywords['dif_freq'] is False
+    assert ss.extra_keywords['dif_time'] is True
+
+
+def test_keyword_override_time():
+    obs = '1061313128_99bl_1pol_half_time'
+    testfile = os.path.join(DATA_PATH, '%s.uvfits' % obs)
+    ss = SS()
+    ss.read(testfile, read_data=False, diff=False, diff_freq=False, override_keyword='dif_time')
+    assert ss.extra_keywords['dif_time'] is True
+
+
+def test_keyword_override_freq():
+    obs = '1061313128_99bl_1pol_half_time'
+    ss = SS()
+    testfile = os.path.join(DATA_PATH, '%s.uvfits' % obs)
+    ss.read(testfile, read_data=False, diff=False, diff_freq=False, override_keyword='dif_freq')
+    assert ss.extra_keywords['dif_freq'] is True
+
+
+def test_keyword_override_both():
+    obs = '1061313128_99bl_1pol_half_time'
+    ss = SS()
+    testfile = os.path.join(DATA_PATH, '%s.uvfits' % obs)
+    ss.read(testfile, read_data=False, diff=False, diff_freq=False, override_keyword='both')
+    assert ss.extra_keywords['dif_time'] is True
+    assert ss.extra_keywords['dif_freq'] is True
 
+#checks whether diff_freq reads in and out, and the diff values are sane
+def test_diff_freq():
+    obs = '1061313128_99bl_1pol_half_time'
+    testfile = os.path.join(DATA_PATH, '%s.uvfits' % obs)
+
+    ss = SS()
+    uv = UVData()
+
+    # Read in two times and two baselines of data, so that the diff is obvious.
+    uv.read(testfile, read_data=False)
+    times = np.unique(uv.time_array)[:2]
+    bls = [(0, 1), (0, 2)]
+    uv.read(testfile, times=times, bls=bls)
+
+    diff_dat = np.diff(uv.data_array, axis=2)
+
+    ss.read(testfile, diff=False, diff_freq=True, times=times, bls=bls)
+    print(ss._data_array.form)
+    #ss.reorder_blts(order='baseline')
+    assert np.all(ss.data_array == diff_dat), "Data values are different!"
+    assert ss.extra_keywords['dif_freq'] is True
+    assert ss.extra_keywords['dif_time'] is False
+
+#checks whether diff_freq masks properly
+def test_diff_freq_mask():
+    obs = '1061313128_99bl_1pol_half_time'
+    testfile = os.path.join(DATA_PATH, '%s.uvfits' % obs)
+    ss = SS()
+
+    #read in test file
+    ss.read(testfile, read_data=True, diff=False, diff_freq=False)
+    ss.apply_flags(flag_choice='original')
+    assert ss.flag_array is not None
+    temp_array = np.logical_or(ss.flag_array, ss.flag_array)
+    nonzero_or = np.count_nonzero(temp_array)
+    nonzero_flags = np.count_nonzero(ss.flag_array[::2])
+    assert (nonzero_or > nonzero_flags)
 
 @pytest.mark.filterwarnings("ignore:SS.read", "ignore:Reordering")
 def test_apply_flags():
-
     obs = '1061313128_99bl_1pol_half_time'
     testfile = os.path.join(DATA_PATH, '%s.uvfits' % obs)
     file_type = 'uvfits'

diff --git a/docs/tutorial.rst b/docs/tutorial.rst
@@ -43,9 +43,20 @@ Generating the sky-subtracted visibilities
   >>> # The following lines make use of the time_array attribute (metadata) to
   >>> # read in all but the first and last integrations
   >>> times = np.unique(ss.time_array)[1:-1]
+  >>> # This read() call uses the `diff` keyword to difference the data automatically along the time axis
   >>> ss.read(filepath, read_data=True, times=times, diff=True)
 
-(c) Applying flags
+(c) Differencing along the frequency axis
+*****************************************
+::
+  >>> # We can opt to difference visibilities along the frequency axis instead of the default time axis
+  >>> # To do this, use the separate keyword `diff_freq` and set it to true when you call the read() function
+  >>> # The two modes work otherwise the same except for the axis the visibilities are differenced upon.
+
+  >>> # Note that this will override the time-differenced data if you ran ss.read() from section (b)
+  >>> ss.read(filepath, read_data=True, times=times, diff_freq=True)
+
+(d) Applying flags
 ******************
 ::
   >>> # SS.data_array is a numpy masked array. To "apply flags" is to change the mask of the data_array.
@@ -72,7 +83,7 @@ Generating the sky-subtracted visibilities
   >>> print(np.any(ss.data_array.mask))
   False
 
-(d) Plotting using Catalog_Plot
+(e) Plotting using Catalog_Plot
 *******************************
 ::
   >>> from SSINS import Catalog_Plot as cp