-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathBoxplot.R
More file actions
36 lines (31 loc) · 1.42 KB
/
Boxplot.R
File metadata and controls
36 lines (31 loc) · 1.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
thads13 = read.csv('L:/DePaul/CSC-324/R_projects/HousingAnalysis-master/thads2013n.txt')
thads13$dataset_year = 2013
#Load only 2013 data set.
thads = thads13
#thads11 = read.csv('L:/DePaul/CSC-324/R_projects/HousingAnalysis-master/thads2011.txt')
#thads11$dataset_year = 2011
#thads = merge(thads13, thads11, all=TRUE)
library(sqldf)
sqldf("SELECT DISTINCT VALUE FROM thads WHERE VALUE < 0")
sqldf("SELECT COUNT(*) FROM thads WHERE VALUE < 0")
#Remove rows where VALUE is equal to -6
thads = subset(thads, VALUE!=-6)
#Remove rows where Total Salary is less than zero
sqldf("SELECT COUNT(*) FROM thads WHERE TOTSAL < 0")
#Remove rows where Total Salary is less than zero
thads = subset(thads, TOTSAL!=-9)
sqldf("SELECT COUNT(*) FROM thads WHERE ZSMHC < 0")
#Remove rows where Monthly Housing Cost (ZSMHC) is less than zero
thads = subset(thads, ZSMHC!=-6)
sqldf("SELECT DISTINCT VALUE FROM thads WHERE VALUE > 2500000")
sqldf("SELECT COUNT(*) FROM thads WHERE VALUE = 2520000")
sqldf("SELECT COUNT(*) FROM thads WHERE VALUE BETWEEN 1000000 AND 1500000")
sqldf("SELECT COUNT(*) VALUE FROM thads WHERE VALUE > 300000")
thads = subset(thads, VALUE <= 300000)
thads$home_age = thads$dataset_year - thads$BUILT
save(thads, file='thads.Rda')
options(scipen=5)
par(cex.lab=.5)
par(cex.axis=.75)
boxplot(thads$VALUE, yaxt="n", main = "2013 Market Value")
axis(2, at=axTicks(2), labels=sprintf("$%s", axTicks(2)), las=1)