GetDataProject/run_analysis.R at master · benbrahim777/GetDataProject · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
##Here are the data for the project:

#  https://d396qusza40orc.cloudfront.net/getdata%2Fprojectfiles%2FUCI%20HAR%20Dataset.zip

#You should create one R script called run_analysis.R that does the following.
#Merges the training and the test sets to create one data set.
#Extracts only the measurements on the mean and standard deviation for each measurement.
#Uses descriptive activity names to name the activities in the data set
#Appropriately labels the data set with descriptive variable names.
#From the data set in step 4, creates a second, independent tidy data set with the average of each variable for each activity and each subject.

#Good luck!


Sys.setenv(JAVA_HOME='C:\\Program Files\\Java\\jre1.8.0_25')

setwd("C:/DropBox Salah/Coursera/getdata/Week3/")

source ('./getLibrairies.R')
getLibrairies()

# Add code to get files from zipfile


# get column names for mean and std data of x test and x training
# Extracts only the measurements on the mean and standard deviation for each measurement.
# Uses descriptive activity names to name the activities in the data set
source('get_mean_stdDeviation.R')
my_x_test_fileName <- "./data/UCI HAR Dataset/test/X_test.txt"
my_x_test_mean_std <- get_mean_stdDeviation(my_x_test_fileName)

my_x_train_fileName <- "./data/UCI HAR Dataset/train/X_train.txt"
my_x_train_mean_std <- get_mean_stdDeviation(my_x_train_fileName)

# get y test and y training labels
# Uses descriptive activity names to name the activities in the data set
source('get_y_test_data.R')
my_y_test_fileName <- "./data/UCI HAR Dataset/test/y_test.txt"
my_y_test_labels    <- get_y_data(my_y_test_fileName)

my_y_train_fileName <- "./data/UCI HAR Dataset/train/y_train.txt"
my_y_train_labels  <- get_y_data(my_y_train_fileName)

# get subject data
# Uses descriptive activity names to name the activities in the data set
my_subject_test_fileName  <- "./data/UCI HAR Dataset/test/subject_test.txt"
my_subject_test           <- get_subject_data(my_subject_test_fileName)

my_subject_train_fileName <-   "./data/UCI HAR Dataset/train/subject_train.txt"
my_subject_train          <- get_subject_data(my_subject_train_fileName)


# bind test data and trining data with descriptive activity names to name the activities in the data set
my_test_data <- cbind(as.data.table(my_subject_test), my_y_test_labels, my_x_test_mean_std)
my_train_data <- cbind(as.data.table(my_subject_train), my_y_train_labels, my_x_train_mean_std)


# Merges the training and the test sets to create one data set.
my_meged_data = rbind(my_test_data, my_train_data)


# Creates a second, independent tidy data set with the average of each variable
# for each activity and each subject.


# get tidy data from merged data
my_id_labels   = c("subject", "Activity_ID", "Activity_Label")
my_dataVariables_labels = setdiff(colnames(my_meged_data), my_id_labels)
my_melt_merged_data      = melt(my_meged_data, id = my_id_labels, measure.vars = my_dataVariables_labels)

# get tidy average data for each variable
my_tidy_average_merged_data   = dcast(my_melt_merged_data, subject + Activity_Label ~ variable, mean)

# save  tidy average data in a file
write.table(my_tidy_average_merged_data, file = "./my_tidy_average_merged_data.txt", row.name=FALSE)