-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlinearRegression.py
More file actions
65 lines (49 loc) · 1.34 KB
/
linearRegression.py
File metadata and controls
65 lines (49 loc) · 1.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
from numpy import*
from sklearn.metrics import*
def computeCost(X,y,theta):
m=X.shape[0]
c=ones((y.shape[0],1))
c=X.dot(theta)-y
return (c**2).sum()/(2*m*1.0)
def normalization(X,XX):
n=X.shape[1]
for i in range(1,n):
mu=X[:,i].mean()
sigma=X[:,i].std()
XX[:,i]=(XX[:,i]-mu)/sigma
return XX
def graDescent(X,y,theta,alpha,num_it):
m=y.shape[0]
for i in range(0,num_it):
theta=theta-( (alpha/m)*( X.conj().transpose().dot( X.dot(theta)-y) ) )
#print computeCost(X,y,theta)
return theta
data=[]
yvalue=[]
with open("data11.txt","r") as file1:
for line in file1:
temp=map( float,line.rstrip().split(',') )
temp.insert(0,1)
data.append(temp[:-1])
yvalue.append([temp[-1]])
m=len(data)
n=len(data[0])
m1=int(0.7*m)
X=array(data[:m1])
y=array(yvalue[:m1])
X_test=array(data[m1:m])
y_test=array(yvalue[m1:m])
theta=zeros((X.shape[1],1))
alpha=0.01
ite=10000
#X_test=normalization(X,X_test)
#X=normalization(X,X)
theta=graDescent(X,y,theta,alpha,ite)
print "cost on training data",computeCost(X,y,theta)
print "cost on testing data",computeCost(X_test,y_test,theta)
print theta
c=X_test.dot(theta)
print "mean absolute error"
print mean_absolute_error(y_test,c)
import math
print math.sqrt(mean_squared_error(y_test,c))