Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .DS_Store
Binary file not shown.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
*.pyc
datasets
PythonCode/intermediate_datafiles/
PythonCode/Example_graphs/
Binary file added Python3Code/.DS_Store
Binary file not shown.
1 change: 1 addition & 0 deletions Python3Code/.python-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.8.8
79 changes: 79 additions & 0 deletions Python3Code/ch2_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
##############################################################
# #
# Mark Hoogendoorn and Burkhardt Funk (2017) #
# Machine Learning for the Quantified Self #
# Springer #
# Chapter 2 #
# #
##############################################################

# Import the relevant classes.
from Chapter2.CreateDataset import CreateDataset
from util.VisualizeDataset import VisualizeDataset
from util import util
from pathlib import Path
import copy
import os
import sys

# Chapter 2: Initial exploration of the dataset.

DATASET_PATH = Path('./datasets/translations/')
RESULT_PATH = Path('./intermediate_datafiles/')
RESULT_FNAME = 'chapter2_question1_result.csv'

# Set a granularity (the discrete step size of our time series data). We'll use a course-grained granularity of one
# instance per minute, and a fine-grained one with four instances per second.
GRANULARITIES = [60000, 250]

# We can call Path.mkdir(exist_ok=True) to make any required directories if they don't already exist.
[path.mkdir(exist_ok=True, parents=True) for path in [DATASET_PATH, RESULT_PATH]]

print('Please wait, this will take a while to run!')

datasets = []
for milliseconds_per_instance in GRANULARITIES:
print(f'Creating numerical datasets from files in {DATASET_PATH} using granularity {milliseconds_per_instance}.')

# Create an initial dataset object with the base directory for our data and a granularity
dataset = CreateDataset(DATASET_PATH, milliseconds_per_instance)

# Add the selected measurements to it.

print(dataset)
##test
dataset.add_numerical_dataset('acc.csv', 'Time (s)', ['X (m/s^2)','Y (m/s^2)','Z (m/s^2)'], 'avg', 'acc_smartphone_')

# Get the resulting pandas data table
dataset = dataset.data_table

# Plot the data
DataViz = VisualizeDataset(__file__)

# print(dataset)

# Boxplot
# DataViz.plot_dataset_boxplot(dataset, ['acc_smartphone_X (m/s^2)','acc_smartphone_Y (m/s^2)','acc_smartphone_Z (m/s^2)'])

# Plot all data
# DataViz.plot_dataset(dataset, ['acc_', 'gyr_', 'hr_watch_rate', 'light_phone_lux', 'mag_', 'press_phone_', 'label'],
# ['like', 'like', 'like', 'like', 'like', 'like', 'like','like'],
# ['line', 'line', 'line', 'line', 'line', 'line', 'points', 'points'])

# And print a summary of the dataset.
util.print_statistics(dataset)
datasets.append(copy.deepcopy(dataset))

# If needed, we could save the various versions of the dataset we create in the loop with logical filenames:
# dataset.to_csv(RESULT_PATH / f'chapter2_result_{milliseconds_per_instance}')


# Make a table like the one shown in the book, comparing the two datasets produced.
util.print_latex_table_statistics_two_datasets(datasets[0], datasets[1])

# Finally, store the last dataset we generated (250 ms).
dataset.to_csv(RESULT_PATH / RESULT_FNAME)

# Lastly, print a statement to know the code went through

print('The code has run through successfully!')
Loading