mhoogen · sheejantripathi · Jun 8, 2022 · Jun 9, 2022
diff --git a/.DS_Store b/.DS_Store
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,3 @@
 *.pyc
-datasets
 PythonCode/intermediate_datafiles/
 PythonCode/Example_graphs/
diff --git a/Python3Code/.DS_Store b/Python3Code/.DS_Store
diff --git a/Python3Code/.python-version b/Python3Code/.python-version
@@ -0,0 +1 @@
+3.8.8
diff --git a/Python3Code/ch2_test.py b/Python3Code/ch2_test.py
@@ -0,0 +1,79 @@
+##############################################################
+#                                                            #
+#    Mark Hoogendoorn and Burkhardt Funk (2017)              #
+#    Machine Learning for the Quantified Self                #
+#    Springer                                                #
+#    Chapter 2                                               #
+#                                                            #
+##############################################################
+
+# Import the relevant classes.
+from Chapter2.CreateDataset import CreateDataset
+from util.VisualizeDataset import VisualizeDataset
+from util import util
+from pathlib import Path
+import copy
+import os
+import sys
+
+# Chapter 2: Initial exploration of the dataset.
+
+DATASET_PATH = Path('./datasets/translations/')
+RESULT_PATH = Path('./intermediate_datafiles/')
+RESULT_FNAME = 'chapter2_question1_result.csv'
+
+# Set a granularity (the discrete step size of our time series data). We'll use a course-grained granularity of one
+# instance per minute, and a fine-grained one with four instances per second.
+GRANULARITIES = [60000, 250]
+
+# We can call Path.mkdir(exist_ok=True) to make any required directories if they don't already exist.
+[path.mkdir(exist_ok=True, parents=True) for path in [DATASET_PATH, RESULT_PATH]]
+
+print('Please wait, this will take a while to run!')
+
+datasets = []
+for milliseconds_per_instance in GRANULARITIES:
+    print(f'Creating numerical datasets from files in {DATASET_PATH} using granularity {milliseconds_per_instance}.')
+
+    # Create an initial dataset object with the base directory for our data and a granularity
+    dataset = CreateDataset(DATASET_PATH, milliseconds_per_instance)
+
+    # Add the selected measurements to it.
+
+    print(dataset)
+    ##test
+    dataset.add_numerical_dataset('acc.csv', 'Time (s)', ['X (m/s^2)','Y (m/s^2)','Z (m/s^2)'], 'avg', 'acc_smartphone_')
+
+    # Get the resulting pandas data table
+    dataset = dataset.data_table
+
+    # Plot the data
+    DataViz = VisualizeDataset(__file__)
+
+    # print(dataset)
+
+    # Boxplot
+    # DataViz.plot_dataset_boxplot(dataset, ['acc_smartphone_X (m/s^2)','acc_smartphone_Y (m/s^2)','acc_smartphone_Z (m/s^2)'])
+
+    # Plot all data
+    # DataViz.plot_dataset(dataset, ['acc_', 'gyr_', 'hr_watch_rate', 'light_phone_lux', 'mag_', 'press_phone_', 'label'],
+    #                               ['like', 'like', 'like', 'like', 'like', 'like', 'like','like'],
+    #                               ['line', 'line', 'line', 'line', 'line', 'line', 'points', 'points'])
+
+    # And print a summary of the dataset.
+    util.print_statistics(dataset)
+    datasets.append(copy.deepcopy(dataset))
+
+    # If needed, we could save the various versions of the dataset we create in the loop with logical filenames:
+    # dataset.to_csv(RESULT_PATH / f'chapter2_result_{milliseconds_per_instance}')
+
+
+# Make a table like the one shown in the book, comparing the two datasets produced.
+util.print_latex_table_statistics_two_datasets(datasets[0], datasets[1])
+
+# Finally, store the last dataset we generated (250 ms).
+dataset.to_csv(RESULT_PATH / RESULT_FNAME)
+
+# Lastly, print a statement to know the code went through
+
+print('The code has run through successfully!')