From 571947d618bcaba3baeb18aa2f6c68eefeb62a17 Mon Sep 17 00:00:00 2001 From: Eric Potash Date: Tue, 24 Oct 2017 18:49:01 -0500 Subject: [PATCH 1/2] handle partial join with missing index and add test --- drain/aggregation.py | 11 ++++++----- tests/test_aggregation.py | 7 +++++++ 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/drain/aggregation.py b/drain/aggregation.py index 9ff4c3c..fb59fa7 100644 --- a/drain/aggregation.py +++ b/drain/aggregation.py @@ -84,11 +84,12 @@ def join(self, left): logging.info('Joining %s %s' % (self.prefix, str(concat_args))) data.prefix_columns(df, self.args_prefix(concat_args)) if not set(df.index.names).issubset(left.columns): - logging.info("Skipping join since aggregation index not in left: %s" - % df.index.names) - continue - left = left.merge(df, left_on=df.index.names, - right_index=True, how='left', copy=False) + logging.info("Aggregation index not in left: %s" % df.index.names) + left = left.reindex(columns=left.columns + df.columns) + else: + left = left.merge(df, left_on=df.index.names, + right_index=True, how='left', copy=False) + fillna_value = fillna_value.append(self.fillna_value( df=df, left=left, diff --git a/tests/test_aggregation.py b/tests/test_aggregation.py index 4bde2d7..1e678b6 100644 --- a/tests/test_aggregation.py +++ b/tests/test_aggregation.py @@ -80,3 +80,10 @@ def test_spacetime_join_fillna(drain_setup, spacetime_crime_agg): 'date':[np.datetime64(date(2015,12,30)), np.datetime64(date(2015,12,31))]}) print(spacetime_crime_agg.join(left)) +def test_spacetime_partial_join(drain_setup, spacetime_crime_agg): + spacetime_crime_agg.execute() + + left = pd.DataFrame({'Community Area':[1,100], + 'date':[np.datetime64(date(2015,12,30)), np.datetime64(date(2015,12,31))]}) + print(spacetime_crime_agg.join(left)) + From 9b4ae26e63e47e97978891ac25a1bd487f4f90ba Mon Sep 17 00:00:00 2001 From: Eric Potash Date: Tue, 24 Oct 2017 19:02:04 -0500 Subject: [PATCH 2/2] do not use deprecated index add --- drain/aggregation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drain/aggregation.py b/drain/aggregation.py index fb59fa7..c6155c9 100644 --- a/drain/aggregation.py +++ b/drain/aggregation.py @@ -85,7 +85,7 @@ def join(self, left): data.prefix_columns(df, self.args_prefix(concat_args)) if not set(df.index.names).issubset(left.columns): logging.info("Aggregation index not in left: %s" % df.index.names) - left = left.reindex(columns=left.columns + df.columns) + left = left.reindex(columns=list(left.columns) + list(df.columns)) else: left = left.merge(df, left_on=df.index.names, right_index=True, how='left', copy=False)