Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
17d1064
remove read() deprecation warning, add diff_freq extra keyword
no-lex Nov 6, 2020
305f31c
shorten keyword to 8 chars per standard
no-lex Nov 6, 2020
558f8c6
trigger dif_freq write in all cases && add test cases
no-lex Dec 11, 2020
b673059
properly use 'is' in dif_freq test
no-lex Dec 11, 2020
94193d0
add INS extra keywords test
no-lex Dec 11, 2020
3b31495
add diff_freq test
no-lex Feb 8, 2021
b10a0f3
add crude implementation of diff_freq
no-lex Feb 8, 2021
ad33658
fix conditional statements to have SS pass keywords test
no-lex Feb 16, 2021
02e7871
fix test_SS_read test by always setting extra_keywords
no-lex Feb 17, 2021
8932d26
add test_diff_freq_mask test to test_ss
no-lex Feb 22, 2021
8296354
average freq array entries && or the flag array
no-lex Feb 22, 2021
e487242
scrub freq_mask test for ss
no-lex Feb 22, 2021
173ff52
add ability to override extra_keywords at read() level
no-lex Mar 1, 2021
38199bd
resolve bounced tests caused by merge
no-lex Mar 5, 2021
bf3bf8d
fix keyword_override_freq testing time instead of freq
no-lex Mar 5, 2021
ba595d9
average nsample array in diff_freq
no-lex Mar 8, 2021
71c664a
remove reorder from diff_freq
no-lex Mar 10, 2021
15088d0
assert keyword values after read() with tests
no-lex Mar 12, 2021
3c850ee
add tutorial section about frequency differencing
no-lex Mar 12, 2021
8fda8ef
add override_keyword keywords to diff() comments
no-lex Mar 12, 2021
2c76b05
remove diff_freq print statement, array comment
no-lex Mar 12, 2021
ea50606
add case for both diffs setting both extra_keywords
no-lex Mar 12, 2021
b2e9d1a
add freq_compattest to ins flag_uvf()
no-lex Mar 15, 2021
e638152
fix axis typo in tutorial
no-lex Mar 15, 2021
ed71833
use equals instead of assignment in INS flag_uvf() key checks
no-lex Mar 15, 2021
3c5e0ab
fix time mention in flag_uvf frequency check
no-lex Mar 15, 2021
09d1b4e
add freq_diff support to INS mask_to_flags()
no-lex Mar 26, 2021
e41a693
use is to compare to singletons in INS flag_uvf()
no-lex Mar 26, 2021
74f72eb
have ss.read() apply_flags() in diff_freq
no-lex Mar 26, 2021
a6987f0
properly arrange arrays for frequency slicing in INS.mask_to_flags
no-lex Mar 26, 2021
89106d5
add test_mask_to_flags testo test_INS
no-lex Mar 26, 2021
6fdab99
add freq diff test for flag_uvf in INS
no-lex Mar 29, 2021
ebd737b
comment flag propagation if/if statement
no-lex Apr 16, 2021
a3a222c
warn on invalid extra_keywords entries
no-lex Apr 16, 2021
689ff30
fix missing closing paren on diff override warning
no-lex May 7, 2021
96a3d69
read data when testing masks
no-lex Nov 12, 2021
432dad9
add masktoflags test but for freq
no-lex Dec 3, 2021
7f4a7a5
Revert "add masktoflags test but for freq"
no-lex Dec 3, 2021
5169e78
trivially rename test_mask_to_flags second test to avoid collision
no-lex Jan 7, 2022
1cfb820
add extra mask_to_flags_2 test for diff freq+time
no-lex Jan 11, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 26 additions & 14 deletions SSINS/incoherent_noise_spectrum.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,21 +215,26 @@ def mean_subtract(self, freq_slice=slice(None), return_coeffs=False):
def mask_to_flags(self):
"""
Propagates the mask to construct flags for the original
(non time-differenced) data. If a time is flagged in the INS, then both
(non differenced) data. If a time is flagged in the INS, then both
times that could have contributed to that time in the sky-subtraction
step are flagged in the new array.

Returns:
tp_flags (array): The time-propagated flags
p_flags (array): The (time or frequency)-propagated flags
"""

# Propagate the flags
shape = list(self.metric_array.shape)
tp_flags = np.zeros([shape[0] + 1] + shape[1:], dtype=bool)
tp_flags[:-1] = self.metric_array.mask
tp_flags[1:] = np.logical_or(tp_flags[1:], tp_flags[:-1])

return(tp_flags)
# no else here between dif/time, it is possible to have a doubly differenced set
if self.extra_keywords['dif_time'] is True:
p_flags = np.zeros([shape[0] + 1] + shape[1:], dtype=bool)
p_flags[:-1] = self.metric_array.mask
p_flags[1:] = np.logical_or(p_flags[1:], p_flags[:-1])
if self.extra_keywords['dif_freq'] is True:
p_flags = np.zeros([shape[0], shape[1] + 1, shape[2]], dtype=bool)
p_flags[:,:-1] = self.metric_array.mask
p_flags[:,1:] = np.logical_or(p_flags[:,1:], p_flags[:,:-1])
return(p_flags)

def flag_uvf(self, uvf, inplace=False):
"""
Expand All @@ -253,13 +258,20 @@ def flag_uvf(self, uvf, inplace=False):
raise ValueError("UVFlag object must be in flag mode to write flags from INS object.")
if uvf.type != 'waterfall':
raise ValueError("UVFlag object must be in waterfall mode to write flags from INS object.")
try:
test_times = 0.5 * (uvf.time_array[:-1] + uvf.time_array[1:])
time_compat = np.all(self.time_array == test_times)
assert time_compat
except Exception:
raise ValueError("UVFlag object's times do not match those of INS object.")

if self.extra_keywords['dif_time'] is True:
try:
test_times = 0.5 * (uvf.time_array[:-1] + uvf.time_array[1:])
time_compat = np.all(self.time_array == test_times)
assert time_compat
except Exception:
raise ValueError("UVFlag object's times do not match those of INS object.")
if self.extra_keywords['dif_freq'] is True:
try:
test_freqs = 0.5 * (uvf.freq_array[:-1] + uvf.freq_array[1:])
freq_compat = np.all(self.freq_array == test_freqs)
assert freq_compat
except Exception:
raise ValueError("UVFlag object's frequencies do not match those of INS object.")
new_flags = self.mask_to_flags()

if inplace:
Expand Down
70 changes: 64 additions & 6 deletions SSINS/sky_subtract.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ def __init__(self):
"""Array of length Nfreqs that stores maximum likelihood estimators for
each frequency, calculated using the MLE_calc method"""

def read(self, filename, diff=False, flag_choice=None, INS=None, custom=None,
**kwargs):
def read(self, filename, diff=False, diff_freq=False, flag_choice=None, INS=None, custom=None,
override_keyword=None, **kwargs):

"""
Reads in a file that is compatible with UVData object by first calling
Expand All @@ -37,29 +37,68 @@ def read(self, filename, diff=False, flag_choice=None, INS=None, custom=None,
Args:
filename (str or list of str): The filepath(s) to read in.
diff (bool): If True, and data was read in, then difference the visibilities in time
diff_freq (bool): If True, and data was read in, then difference the visibilities in freq
flag_choice: Sets flags for the data array on read using apply_flags method.
INS: An INS object for apply_flags()
custom: A custom flag array for apply_flags()
kwargs: Additional kwargs are passed to UVData.read()
override_keyword (str): sets a keyword to override to True regardless of internal diffs,
can be set to `dif_freq`, `dif_time`, or `both`.
"""
warnings.warn("SS.read will be renamed to SS.read_data soon to avoid"
" conflicts with UVData.read.", category=PendingDeprecationWarning)

super().read(filename, **kwargs)

# detect if there is contridicting override_keyword and diff/diff_freq terms
if diff or diff_freq is True and override_keyword is not None:
warnings.warn("diff or diff_freq set to true and override keyword has"
" been set. Please ensure that this does not cause unwanted"
" behavior, since the override_keyword will override diff"
" setting internal attributes")

if override_keyword != "dif_freq" or override_keyword != "dif_time" or override_keyword != "both":
warnings.warn("override_keyword passed but is not 'dif_time',"
" 'dif_freq', or 'both': override_keyword will have no"
" effect")

# always set extra keywords, else key errors when trying to check
if (self.data_array is not None):
if diff:
self.diff()
self.apply_flags(flag_choice=flag_choice, INS=INS, custom=custom)
else:
self.extra_keywords['dif_time'] = True
Comment thread
mwilensky768 marked this conversation as resolved.
self.extra_keywords['dif_freq'] = False
if diff_freq:
self.diff_freq()
self.apply_flags(flag_choice=flag_choice, INS=INS, custom=custom)
self.extra_keywords['dif_freq'] = True
self.extra_keywords['dif_time'] = False
if not diff_freq and not diff:
# This warning will be issued when diff is False and there is some data read in
# If filename is a list of files, then this warning will get issued in the recursive call in UVData.read
warnings.warn("diff on read defaults to False now. Please double"
" check SS.read call and ensure the appropriate"
" keyword arguments for your intended use case.")
self.extra_keywords['dif_time'] = False
self.extra_keywords['dif_freq'] = False
if flag_choice is not None:
warnings.warn("flag_choice will be ignored on read since"
" diff is being skipped.")
if diff_freq and diff:
self.extra_keywords['dif_time'] = True
self.extra_keywords['dif_freq'] = True
else:
self.extra_keywords['dif_time'] = False
self.extra_keywords['dif_freq'] = False
# hardcoded cases for override_keyword to set dif_freq/dif_time (or both)
# useful if data is pre-differenced rather than internally diff-ed by read() itself
# applied after the internal flags, and thus overwrites them
if (override_keyword is not None):
if override_keyword == 'dif_time':
self.extra_keywords['dif_time'] = True
elif override_keyword == 'dif_freq':
self.extra_keywords['dif_freq'] = True
elif override_keyword == 'both':
self.extra_keywords['dif_freq'] = True
self.extra_keywords['dif_time'] = True

def apply_flags(self, flag_choice=None, INS=None, custom=None):
"""
Expand Down Expand Up @@ -225,6 +264,25 @@ def diff(self):
super().set_lsts_from_time_array()
self.data_array = np.ma.masked_array(self.data_array)

def diff_freq(self):

"""
Differences the visibilities in freq. Does so independently for each time.
The flags are propagated by taking the boolean OR of the entries that correspond
to the visibilities that are differenced from one another. Other metadata
attributes are also adjusted so that the resulting SS object passes
UVData.check()
"""

diff_dat = np.diff(self.data_array, axis=2) #axis 2: nfreqs (see uvdata object)
self.data_array = diff_dat

self.Nfreqs -= 1
self.data_array = np.ma.masked_array(self.data_array)
self.freq_array = (self.freq_array[:,1:] + self.freq_array[:,:-1]) / 2.0
self.nsample_array = (self.nsample_array[:,:,1:,:] + self.nsample_array[:,:,:-1,:]) / 2.0
self.flag_array = np.logical_or(self.flag_array[:,:,1:,:], self.flag_array[:,:,:-1,:])

def MLE_calc(self):

"""
Expand Down
67 changes: 67 additions & 0 deletions SSINS/tests/test_INS.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,25 @@ def test_init():
# Check that the weights summed correctly
assert np.all(test_weights == ins.weights_array), "Weights did not sum properly"

def test_extra_keywords():
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test doesn't appear to check whether diff_freq is set, etc.

obs = '1061313128_99bl_1pol_half_time'
testfile = os.path.join(DATA_PATH, '%s.uvfits' % obs)
file_type = 'uvfits'

ss = SS()
ss.read(testfile, flag_choice='original', diff=True)
ins = INS(ss)

assert ss.extra_keywords['dif_time'] is True
assert ins.extra_keywords['dif_time'] is True

ss.read(testfile, flag_choice='original', diff=False)
ins = INS(ss)

assert ss.extra_keywords['dif_time'] is False
assert ins.extra_keywords['dif_time'] is False



def test_no_diff_start():
obs = '1061313128_99bl_1pol_half_time'
Expand Down Expand Up @@ -109,6 +128,36 @@ def test_polyfit():
ins.metric_ms = ins.mean_subtract()
assert np.all(ins.metric_ms.mask), "The metric_ms array was not all masked"

def test_flag_uvf_freq():
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test checks the error handling, but I don't see that it ever checks that the answer is right when the right calls are made.

obs = '1061313128_99bl_1pol_half_time'
testfile = os.path.join(DATA_PATH, '%s.uvfits' % obs)
file_type = 'uvfits'
prefix = os.path.join(DATA_PATH, '%s_test' % obs)
flags_outfile = '%s_SSINS_flags.h5' % prefix

ss = SS()
ss.read(testfile, diff_freq=True)

uvd = UVData()
uvd.read(testfile)

uvf = UVFlag(uvd, mode='flag', waterfall=True)
# start with some flags so that we can test the intended OR operation
uvf.flag_array[6, :] = True
ins = INS(ss)

# Check error handling
with pytest.raises(ValueError):
bad_uvf = UVFlag(uvd, mode='metric', waterfall=True)
err_uvf = ins.flag_uvf(uvf=bad_uvf)
with pytest.raises(ValueError):
bad_uvf = UVFlag(uvd, mode='flag', waterfall=False)
err_uvf = ins.flag_uvf(uvf=bad_uvf)
with pytest.raises(ValueError):
bad_uvf = UVFlag(uvd, mode='flag', waterfall=True)
# Pretend the data is off by 1 freq
bad_uvf.freq_array += 1
err_uvf = ins.flag_uvf(uvf=bad_uvf)

@pytest.mark.filterwarnings("ignore:Reordering", "ignore:SS.read")
def test_mask_to_flags():
Expand Down Expand Up @@ -236,6 +285,10 @@ def test_write_mwaf():
testfile = os.path.join(DATA_PATH, '%s.h5' % obs)
prefix = os.path.join(DATA_PATH, '%s_test' % obs)
ins = INS(testfile)
#to override the fact that the data files don't have dif_ keywords set
ins.extra_keywords['dif_time'] = True
ins.extra_keywords['dif_freq'] = False

mwaf_files = [os.path.join(DATA_PATH, '1061313128_12.mwaf')]
bad_mwaf_files = [os.path.join(DATA_PATH, 'bad_file_path')]
metafits_file = os.path.join(DATA_PATH, '1061313128.metafits')
Expand Down Expand Up @@ -431,3 +484,17 @@ def test_add():
assert np.all(combo_ins.metric_array.mask == first_ins.metric_array.mask)
assert np.all(combo_ins.metric_array.data == truth_ins.metric_array.data)
assert np.all(combo_ins.metric_array.mask == truth_ins.metric_array.mask)

def test_mask_to_flags_2():
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test also does not check anything other than that the code runs through. I think the smaller test files should support a "gets the correct answer" test. If not, you can use "select-on-read" functionality to make a testable object where it is not too hard to calculate the answer ahead of time through alternate means (you could even modify the starting data array to make this easy).

#verify array sizes
ss = SS()
filepath = 'SSINS/data/1061313128_99bl_1pol_half_time.uvfits'
ss.read(filepath, diff=True)
ins = INS(ss)
flags = ins.mask_to_flags()
ss.read(filepath, diff=False, diff_freq=True)
ins = INS(ss)
flags = ins.mask_to_flags()
ss.read(filepath, diff=True, diff_freq=True)
ins = INS(ss)
flags = ins.mask_to_flags()
74 changes: 69 additions & 5 deletions SSINS/tests/test_SS.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,20 @@
Tests the various capabilities of the sky_subtract class
"""


@pytest.mark.filterwarnings("ignore:Reordering", "ignore:SS.read")

def test_SS_read():
obs = '1061313128_99bl_1pol_half_time'
testfile = os.path.join(DATA_PATH, '%s.uvfits' % obs)

ss = SS()

# Test reading in only metadata skips if block and warning
with pytest.warns(PendingDeprecationWarning, match="SS.read will be renamed"):
ss.read(testfile, read_data=False)
ss.read(testfile, read_data=False)
assert ss.data_array is None, "Data array is not None"

# See that it is not yet flagged as diffed
assert ss.extra_keywords['dif_freq'] is False

# Test select on read and diff
ss.read(testfile, times=np.unique(ss.time_array)[1:10], diff=True)
assert ss.Ntimes == 8, "Number of times after diff disagrees!"
Expand Down Expand Up @@ -64,11 +65,74 @@ def test_diff():
assert np.all(ss.uvw_array == diff_uvw), "uvw_arrays disagree!"
assert np.all(ss.ant_1_array == np.array([0, 0])), "ant_1_array disagrees!"
assert np.all(ss.ant_2_array == np.array([1, 2])), "ant_2_array disagrees!"
assert ss.extra_keywords['dif_freq'] is False
assert ss.extra_keywords['dif_time'] is True


def test_keyword_override_time():
obs = '1061313128_99bl_1pol_half_time'
testfile = os.path.join(DATA_PATH, '%s.uvfits' % obs)
ss = SS()
ss.read(testfile, read_data=False, diff=False, diff_freq=False, override_keyword='dif_time')
assert ss.extra_keywords['dif_time'] is True


def test_keyword_override_freq():
obs = '1061313128_99bl_1pol_half_time'
ss = SS()
testfile = os.path.join(DATA_PATH, '%s.uvfits' % obs)
ss.read(testfile, read_data=False, diff=False, diff_freq=False, override_keyword='dif_freq')
assert ss.extra_keywords['dif_freq'] is True


def test_keyword_override_both():
obs = '1061313128_99bl_1pol_half_time'
ss = SS()
testfile = os.path.join(DATA_PATH, '%s.uvfits' % obs)
ss.read(testfile, read_data=False, diff=False, diff_freq=False, override_keyword='both')
assert ss.extra_keywords['dif_time'] is True
assert ss.extra_keywords['dif_freq'] is True

#checks whether diff_freq reads in and out, and the diff values are sane
def test_diff_freq():
obs = '1061313128_99bl_1pol_half_time'
testfile = os.path.join(DATA_PATH, '%s.uvfits' % obs)

ss = SS()
uv = UVData()

# Read in two times and two baselines of data, so that the diff is obvious.
uv.read(testfile, read_data=False)
times = np.unique(uv.time_array)[:2]
bls = [(0, 1), (0, 2)]
uv.read(testfile, times=times, bls=bls)

diff_dat = np.diff(uv.data_array, axis=2)

ss.read(testfile, diff=False, diff_freq=True, times=times, bls=bls)
print(ss._data_array.form)
#ss.reorder_blts(order='baseline')
assert np.all(ss.data_array == diff_dat), "Data values are different!"
assert ss.extra_keywords['dif_freq'] is True
assert ss.extra_keywords['dif_time'] is False

#checks whether diff_freq masks properly
def test_diff_freq_mask():
obs = '1061313128_99bl_1pol_half_time'
testfile = os.path.join(DATA_PATH, '%s.uvfits' % obs)
ss = SS()

#read in test file
ss.read(testfile, read_data=True, diff=False, diff_freq=False)
ss.apply_flags(flag_choice='original')
assert ss.flag_array is not None
temp_array = np.logical_or(ss.flag_array, ss.flag_array)
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This line and those below confuse me. This line should just return a copy of ss.flag_array, since ORing an array with itself will just return an array with 1 everywhere that the original array was nonzero and 0 otherwise, and the flag array is already boolean. Due to this, the next two lines are guaranteed to satisfy the assertion, since ss.flag_array[::2] is now just a subarray of temp_array. Maybe you meant to index the ss.flag_array's being fed into the np.logical_or call, e.g. `np.logical_or(ss.flag_array[:, :-1], ss.flag_array[:, 1:])?

nonzero_or = np.count_nonzero(temp_array)
nonzero_flags = np.count_nonzero(ss.flag_array[::2])
assert (nonzero_or > nonzero_flags)

@pytest.mark.filterwarnings("ignore:SS.read", "ignore:Reordering")
def test_apply_flags():

obs = '1061313128_99bl_1pol_half_time'
testfile = os.path.join(DATA_PATH, '%s.uvfits' % obs)
file_type = 'uvfits'
Expand Down
15 changes: 13 additions & 2 deletions docs/tutorial.rst
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,20 @@ Generating the sky-subtracted visibilities
>>> # The following lines make use of the time_array attribute (metadata) to
>>> # read in all but the first and last integrations
>>> times = np.unique(ss.time_array)[1:-1]
>>> # This read() call uses the `diff` keyword to difference the data automatically along the time axis
>>> ss.read(filepath, read_data=True, times=times, diff=True)

(c) Applying flags
(c) Differencing along the frequency axis
*****************************************
::
>>> # We can opt to difference visibilities along the frequency axis instead of the default time axis
>>> # To do this, use the separate keyword `diff_freq` and set it to true when you call the read() function
>>> # The two modes work otherwise the same except for the axis the visibilities are differenced upon.

>>> # Note that this will override the time-differenced data if you ran ss.read() from section (b)
>>> ss.read(filepath, read_data=True, times=times, diff_freq=True)

(d) Applying flags
******************
::
>>> # SS.data_array is a numpy masked array. To "apply flags" is to change the mask of the data_array.
Expand All @@ -72,7 +83,7 @@ Generating the sky-subtracted visibilities
>>> print(np.any(ss.data_array.mask))
False

(d) Plotting using Catalog_Plot
(e) Plotting using Catalog_Plot
*******************************
::
>>> from SSINS import Catalog_Plot as cp
Expand Down