-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathDataframeFromDatFiles.py
More file actions
81 lines (60 loc) · 3.15 KB
/
DataframeFromDatFiles.py
File metadata and controls
81 lines (60 loc) · 3.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
'''
@author: Anurag Mishra
This module has one function that reads the typical *.dat files exported from EEMS or read in EEMS and returns a dataframe. If the input *.dat file has more than one series, then the dataframe will have more than one series.
The function makes following assumptions
1. Values are tab-separated
2. Typical EEMS output *.dat files have correct dates
'''
import pandas as pd
import os
import re
def dffromdatfile(filename):
"This function reads the typical *.dat files exported from EEMS or read in EEMS and returns a dataframe."
with open(filename,'r') as fobj:
firstline=fobj.readline().rstrip()
#Reading the first line of the data file.
if '$' in firstline or '*' in firstline:
#This is to figure out the type of file.
frames=[]
df=pd.DataFrame()
TotalNumberOfSeries=0
for line in fobj:
#Going through lines to figure out number of dataseries.
if 'NumberOfSeries' in line:
TotalNumberOfSeries=int(line.split('=')[1])
#Now that we know the number of dataseries in this file, we will exit from this loop.
break
fobj.seek(0,0)
nlines=0
SeriesDictionary={}
linenumber=0
for seriesnumber in range(1,TotalNumberOfSeries+1):
for line in fobj:
linenumber+=1
if linenumber<=nlines:continue
nlines+=1
if 'BaseDate' in line:
BaseDate=pd.to_datetime(line.split('=')[1])
#print(str(BaseDate))
if 'Column2' in line:
ColumName=line.split('=')[1].rstrip()
try:
if '$' in line or '*' in line:continue
numberofdatapoints=int(re.search('\d+',line.split('\t')[0])[0])
SeriesDictionary[seriesnumber]=[numberofdatapoints,ColumName, nlines]
nlines+=numberofdatapoints
break
except:
'''Nothing Special'''
for serieskey in SeriesDictionary.keys():
fobj.seek(0,0)
df=pd.read_csv(fobj,sep='\t', header=None, names=['DateTime',SeriesDictionary[serieskey][1]], nrows=SeriesDictionary[serieskey][0],parse_dates=True,index_col=[0],skiprows=SeriesDictionary[serieskey][2],na_values=-999)
frames.append(df)
df=pd.concat(frames,axis=1)
elif len(firstline.split('\t'))==2:
'''This might have only dataset'''
df=pd.read_csv(fobj,sep='\t',skiprows=0,header=None, names=['DateTime',os.path.basename(filename)],index_col=0,na_values=-999)
else:
'''This might not be timeseries'''
df=pd.DataFrame()
return(df)