forked from benbrahim7777/GetDataProject
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun_analysis.R
More file actions
80 lines (50 loc) · 3.18 KB
/
run_analysis.R
File metadata and controls
80 lines (50 loc) · 3.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
##Here are the data for the project:
# https://d396qusza40orc.cloudfront.net/getdata%2Fprojectfiles%2FUCI%20HAR%20Dataset.zip
#You should create one R script called run_analysis.R that does the following.
#Merges the training and the test sets to create one data set.
#Extracts only the measurements on the mean and standard deviation for each measurement.
#Uses descriptive activity names to name the activities in the data set
#Appropriately labels the data set with descriptive variable names.
#From the data set in step 4, creates a second, independent tidy data set with the average of each variable for each activity and each subject.
#Good luck!
Sys.setenv(JAVA_HOME='C:\\Program Files\\Java\\jre1.8.0_25')
setwd("C:/DropBox Salah/Coursera/getdata/Week3/")
source ('./getLibrairies.R')
getLibrairies()
# Add code to get files from zipfile
# get column names for mean and std data of x test and x training
# Extracts only the measurements on the mean and standard deviation for each measurement.
# Uses descriptive activity names to name the activities in the data set
source('get_mean_stdDeviation.R')
my_x_test_fileName <- "./data/UCI HAR Dataset/test/X_test.txt"
my_x_test_mean_std <- get_mean_stdDeviation(my_x_test_fileName)
my_x_train_fileName <- "./data/UCI HAR Dataset/train/X_train.txt"
my_x_train_mean_std <- get_mean_stdDeviation(my_x_train_fileName)
# get y test and y training labels
# Uses descriptive activity names to name the activities in the data set
source('get_y_test_data.R')
my_y_test_fileName <- "./data/UCI HAR Dataset/test/y_test.txt"
my_y_test_labels <- get_y_data(my_y_test_fileName)
my_y_train_fileName <- "./data/UCI HAR Dataset/train/y_train.txt"
my_y_train_labels <- get_y_data(my_y_train_fileName)
# get subject data
# Uses descriptive activity names to name the activities in the data set
my_subject_test_fileName <- "./data/UCI HAR Dataset/test/subject_test.txt"
my_subject_test <- get_subject_data(my_subject_test_fileName)
my_subject_train_fileName <- "./data/UCI HAR Dataset/train/subject_train.txt"
my_subject_train <- get_subject_data(my_subject_train_fileName)
# bind test data and trining data with descriptive activity names to name the activities in the data set
my_test_data <- cbind(as.data.table(my_subject_test), my_y_test_labels, my_x_test_mean_std)
my_train_data <- cbind(as.data.table(my_subject_train), my_y_train_labels, my_x_train_mean_std)
# Merges the training and the test sets to create one data set.
my_meged_data = rbind(my_test_data, my_train_data)
# Creates a second, independent tidy data set with the average of each variable
# for each activity and each subject.
# get tidy data from merged data
my_id_labels = c("subject", "Activity_ID", "Activity_Label")
my_dataVariables_labels = setdiff(colnames(my_meged_data), my_id_labels)
my_melt_merged_data = melt(my_meged_data, id = my_id_labels, measure.vars = my_dataVariables_labels)
# get tidy average data for each variable
my_tidy_average_merged_data = dcast(my_melt_merged_data, subject + Activity_Label ~ variable, mean)
# save tidy average data in a file
write.table(my_tidy_average_merged_data, file = "./my_tidy_average_merged_data.txt", row.name=FALSE)