UCNRS/wateryear_split.py at master · erczo/UCNRS · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
#!/usr/bin/python
# -*- coding: utf-8 -*-
################################################################################
#
#  wateryear_splitter.py
#  Author: Collin Bode
#  Date: March, 2016
#
#  Purpose: Take all Campbell Scientific CR1000 .dat files and split them by
#  water year (Oct.1 to Oct. 1).  Leave only current year in file.
#  Modified from wateryear_splitter.pl perl script.
#
#  Depedencies:
#
################################################################################
import os
import sys
import datetime as dt
from dateutil.parser import parse
import gzip

def get_date(str_datetime):
    #print 'get_date('+str_datetime+')'
    str_datetime = str_datetime.strip('"')
    if(str_datetime == ''):
        str_datetime = 'blank'
    try:
        #dtime = dt.datetime.strptime(str_datetime,"%Y-%m-%d %H:%M:%S")
        #print dtime
        dtime = parse(str_datetime)
        #print 'get_date('+str_datetime+') = ',isinstance(dtime,dt.datetime)
        return dtime
    except:
        #print 'get_date('+str_datetime+') = Not_Date'
        return 'Not_Date'

def get_wy(dt_datetime):
    year = dt_datetime.year
    month = dt_datetime.month
    if(month > 9):
        year = year + 1
    return year

def create_wydir(localpath,wy):
    str_wydir = 'wy'+str(wy)
    wypath = localpath+os.sep+str_wydir
    if(os.path.exists(wypath) == False):
        print 'creating '+str_wydir
        os.mkdir(wypath)
    else:
        print str_wydir,' exists. using.'
    return wypath

def get_dat_stats(fdats):
    fdats.seek(0)   # set pointer at beginning of file

    # Define parameters for reading the dates and header on dat file
    i = 0
    dmax = dt.datetime.strptime('1990-01-01',"%Y-%m-%d")    # most recent date in file
    dmin = dt.datetime.now()                              # oldest date in file.
    header = []
    wy_list = []
    for row in fdats:
        #"TOA5","L5_1_CR1000","CR1000","19598","CR1000.Std.15","CPU:Level5_1.CR1","56774","Table501"
        #"TIMESTAMP","RECORD","BattV","L501Temp","Well15_psi","Well15_WaterLevel_m","Well15_ToC","Well16_psi","Well16_WaterLevel_m","Well16_ToC"
        #"TS","RN","","","","","","","",""
        #"","","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp"
        #"2011-10-06 08:30:00",0,10.4,9.44,5.737,-21.77,11.34,9.71,-19.51,11.53,0.135,0.006,9.02,0.235,0.019,9.5,0.181,0.015,9.66,0.205,0.011,10.14,0.284,0.03,12.14
        arow = row.split(',')
        field1 = arow[0].strip()
        dtrow = get_date(field1)

        # Collect Header Rows
        if(dtrow == 'Not_Date'):
            #print i,'row is header, appending',row
            header.append(row)
            if(i > 6):
                print "Too many header rows!"
                sys.exit()
        # If Data Row, find Oldest, Newest Dates and Water Years
        else:
            if(dmax < dtrow):
                dmax = dtrow
            if(dmin >  dtrow):
                dmin = dtrow

            # Make a list of all water years represented in file
            wy = get_wy(dtrow)
            if(len(wy_list) == 0):
                #print len(wy_list),') water year: ',wy,', date: ',dtrow
                wy_list.append(wy)
            add_wy = True
            for wyr in wy_list:
                if(wyr == wy):
                    add_wy = False
            if(add_wy == True):
                #print len(wy_list),') water year: ',wy,', date: ',dtrow
                wy_list.append(wy)
        i += 1
    fdats.seek(0)   # set pointer at beginning of file
    return header,wy_list,dmin,dmax

##########################################
# start
print 'WATERYEAR SPLIT START'
localpath = os.path.dirname(os.path.realpath(__file__))
localpath = localpath+os.sep+'LoggerNet_Test'
#localpath = localpath+os.sep+'ucnrs_Test'
print 'Data Directory: '+localpath
now = dt.date.today()
wy_current = get_wy(now)
print 'year: ',now.year,', month: ', now.month,', water year: ',wy_current

##########################################
# Create archive directory and subdirectory for backup
path_archive = localpath+os.sep+'archive'
if(os.path.exists(path_archive) == False):
    print 'Creating archive directory'
    os.mkdir(path_archive)
else:
    print 'Archive directory exists.'
# Now create today's archive
gdt = dt.datetime.strftime(dt.datetime.now(),"%Y-%m-%d")
backupdir = 'backup'+gdt
path_backupdir = path_archive+os.sep+backupdir

if(os.path.exists(path_backupdir) == True):
    print 'WARNING '+backupdir+' already exists. Renaming old directory.'
    j = 0
    backupdir2 = path_archive+os.sep+backupdir+'.'+str(j)+'.backup'
    while (os.path.exists(backupdir2) == True):
        print 'oops. '+backupdir2+' exists. trying another name...'
        j += 1
        backupdir2 = path_archive+os.sep+backupdir+'.'+str(j)+'.backup'
    print 'Old backup directory '+backupdir+' moved to '+backupdir2
    os.rename(path_backupdir,backupdir2)

if(os.path.exists(path_backupdir) == False):
    print 'Creating '+path_backupdir
    os.mkdir(path_backupdir)
else:
    sys.exit(path_backupdir+' still exists. wtf? quitting.')


i = 0
k = 0
##########################################
# List each .dat file in directory, get header, water years, min and max datetimes
for str_dat in os.listdir(localpath):
    k = k+1
    # parse file and directory names looking for .dat files
    pref,suf = os.path.splitext(str_dat)
    #print 'testing '+str_dat+', pref: '+pref+', suf: '+suf
    #adat = str_dat.split('.')
    #pref = adat[0]
    #suf = adat[1]

    # Dat file found, load and get stats
    if(suf == '.dat' or suf == '.backup'):
        parts_public = str_dat.split('_Public')
        parts_status = str_dat.split('_Status')
        if(len(parts_public) == 1 and len(parts_status) == 1):
            i = i+1
            #print k,i,str_dat
            #continue

            print k,i,'Moving '+str_dat+' to archive '+backupdir
            os.rename(localpath+os.sep+str_dat,path_backupdir+os.sep+str_dat)
            if(os.path.exists(localpath+os.sep+str_dat) == True):
                sys.exit('WARNING! '+str_dat+' remains in main directory. Something didnt work. Quiting.')

            # open file
            path_fdat = path_backupdir+os.sep+str_dat
            fdat = open(path_fdat)

            ##########################################
            # Function to get header rows and Date start and end
            header,wy_list,dmin,dmax = get_dat_stats(fdat)
            #print str_dat,' WY_LIST: ',wy_list
            print str_dat,' Date start: ',dmin,', end: ',dmax,wy_list
            '''
            if(i >10):
                print 'Stopping'
                break
            '''
            ##########################################
            # Open water year files and write rows, stop if already done
            for wyr in wy_list:
                print wyr
                str_wy = 'wy'+str(wyr)
                str_wydat = str_dat
                #wydatpath = localpath+os.sep+str_wy
                if(wyr == wy_current):
                    wydatpath = localpath
                else:
                    wydatpath = create_wydir(localpath,wyr)     # This function will create directory if it doesn't exist
                #print wydatpath+os.sep+str_wydat

                # Check if file already exists, if so, rename the old file
                if(os.path.exists(wydatpath+os.sep+str_wydat) == True):
                    print 'WARNING '+str_wydat+' already exists! Renaming old file'
                    j = 0
                    str_wydat_bak = str_dat+'.'+str_wy+'.backup'
                    while (os.path.exists(wydatpath+os.sep+str_wydat_bak) == True):
                        j += 1
                        str_wydat = str_dat+'.'+str_wy+'.'+str(j)+'.backup'
                    print 'Renaming old '+str_wydat+' to '+str_wydat_bak
                    os.rename(wydatpath+os.sep+str_wydat,wydatpath+os.sep+str_wydat_bak)

                # Once you have a unique filename, create and open file
                if(os.path.exists(wydatpath+os.sep+str_wydat) == False):
                    wydat = open(wydatpath+os.sep+str_wydat,'a')
                    print 'CREATING '+wydatpath+os.sep+str_wydat
                else:
                    sys.exit('WARNING! Failed to move previous file. Quitting.')

                # Header
                for row in header:
                    wydat.write(row)

                # Write data rows
                i = 0
                fdat.seek(0)    # reset file iterator back to beginning of file
                for row in fdat:
                    arow = row.split(',')
                    field1 = arow[0].strip()
                    dtrow = get_date(field1)
                    if(dtrow != 'Not_Date'):
                        dtyear = get_wy(dtrow)
                        if(dtyear == wyr):
                            wydat.write(row)
                            i += 1
                print str_wy,': ',i,' rows written to '+str_dat
                wydat.close()

            ##########################################
            # Gzip the DAT file into archive directory
            fdat.seek(0)
            j = 0
            # Make sure the gzip filename is unique
            # Check if file already exists, if so, rename the old gzip file
            path_gzip = path_fdat+'.gz'
            if(os.path.exists(path_gzip) == True):
                print 'WARNING gzip file already exists!'
                j = 0
                path_gzip_old = path_fdat+'.'+str(j)+'.gz'
                while (os.path.exists(path_gzip_old) == True):
                    j += 1
                    path_gzip_old = path_fdat+'.'+str(j)+'.gz'
                print 'Renaming old gzip file '+path_gzip+' --> '+path_gzip_old
                os.rename(path_gzip,path_gzip_old)
            # Gzip datfile
            if(os.path.exists(path_gzip) == False):
                print 'Gzip into archive '+path_gzip
                gzdat = gzip.open(path_gzip,'wb')
                gzdat.writelines(fdat)
                fdat.close()
                gzdat.close()
                print 'CLOSED '+str_dat

            # Delete DAT file - only do after confirming gzip file exists!
            if(os.path.exists(path_gzip) == True):
                os.remove(path_fdat)
                print 'Deleted source file '+str_dat
            else:
                sys.exit('GZIP Failed! Cowardly refusing to delete .dat file. Quitting.')
            print '\n'

# Done
print "\n"
print 'DONE! All ',i,' files of ',k,' in directory processed.'