forked from markmont/flux-utils
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfreealloc
More file actions
executable file
·182 lines (147 loc) · 6.28 KB
/
freealloc
File metadata and controls
executable file
·182 lines (147 loc) · 6.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
#!/usr/bin/python
#
# freealloc - display resources requested by jobs in an allocation
#
INSTALL_ROOT = '/usr/local/flux-utils';
fluxod_maxproc = 4385
import site
site.addsitedir( "%s/lib/python2.7/site-packages/" % INSTALL_ROOT )
import sys
import os
import re
import subprocess
from collections import defaultdict
import argparse
from torque import *
# Use libc's atoi() function per
# http://stackoverflow.com/questions/1665511/python-equivalent-to-atoi-atof
# ...because using regular expressions followed by int() seems overkill,
# and there are several points where we need to get integers that are at
# the beginning of strings.
import ctypes.util
libc = ctypes.cdll.LoadLibrary(ctypes.util.find_library('c'))
def run_moab(argv, type='xml'):
success = False
attempt = 1
while attempt <= 60:
try:
pipe = subprocess.Popen( argv, stdout=subprocess.PIPE )
if type == 'xml':
pass # so we don't have to import lxml.etree for this script
#output = etree.parse( pipe.stdout )
else:
output = pipe.stdout.readlines()
pipe.stdout.close()
pipe.wait()
success = True
break
except:
pass
sys.stderr.write( 'Trying %s.' % argv[0].split('/')[-1] if attempt == 1 else '.' )
sys.stderr.flush()
attempt += 1
time.sleep( 1 )
if not success:
sys.stderr.write( "\nERROR: unable to run " + argv[0].split('/')[-1] )
sys.exit( 1 )
if attempt > 1:
sys.stderr.write( "\n" )
sys.stderr.flush()
return output
def main():
global fluxod_maxproc
parser = argparse.ArgumentParser(description='Displays unused cores and memory for an allocation')
parser.add_argument("allocation_name", help="Name of the allocation")
parser.add_argument("--jobs", help="display core and memory usage for each job", action="store_true")
args = parser.parse_args()
if not re.search( r'^[a-zA-Z][a-zA-Z0-9_-]+$', args.allocation_name ):
print "ERROR: bad allocation name: allocation names must begin with a letter followed by letters, digits, and underscores."
sys.exit( 1 );
maxproc = -1
maxmem = -1 # memory in kb
output = run_moab( [ '/opt/moab/bin/mdiag', '-a', args.allocation_name ], type='text' )
for line in output:
m = re.search( r'\sMAXPROC=(\d+,)?(\d+)\s', line )
if m and maxproc == -1: maxproc = int( m.group(2) )
m = re.search( r'ALLOC_SUM:(\d+)', line )
if m: maxproc = int( m.group(1) )
m = re.search( r'\sMAXMEM=(\d+,)?(\d+)\s', line )
if m: maxmem = int( m.group(2) ) * 1024 # convert mb to kb
if args.allocation_name.endswith('_fluxod') and maxproc == -1:
maxproc = fluxod_maxproc
if maxproc == -1:
print "ERROR: could not determine MAXPROC for allocation %s" % args.allocation_name
print "It is possible that %s may not currently be active." % args.allocation_name
print "To determine if this is the case, try running:\n mdiag -a %s" % args.allocation_name
sys.exit( 2 )
if maxmem == -1:
maxmem = maxproc * 4 * 1024 * 1024
if not args.allocation_name.endswith('_fluxod'):
print "WARNING: could not determine MAXMEM for allocation %s, assuming 4 GB per core" % args.allocation_name
#print "ERROR: could not determine MAXMEM for allocation %s" % args.allocation_name
#sys.exit( 2 )
total_cores = 0
total_mem = 0 # memory in kb
uncertain = 0
if args.jobs:
print "Job ID User Cores Memory"
print "---------------- -------- ----- ---------"
pbs = PBS()
pbs.connect( pbs.default() )
sel_list = ATTROPL( None, "job_state", None, "R", BATCH_OP.EQ )
jobs = pbs.selstat( sel_list, None )
for job in jobs:
attrib = job['attrib']
if not 'Account_Name' in attrib or attrib['Account_Name'] != args.allocation_name:
continue
cores = -1
r = attrib['Resource_List']
if 'procs_bitmap' in r:
# see http://docs.adaptivecomputing.com/torque/4-2-6/help.htm#topics/2-jobs/requestingRes.htm#procs_bitmap
cores = len( r['procs_bitmap'] )
if 'procs' in r:
c = libc.atoi( r['procs'] )
if c > cores and c > 0: cores = c
if 'nodes' in r:
c = 0
for nodespec in r['nodes'].split('+'):
p = 1
n = libc.atoi( nodespec )
if n < 1: n = 1
m = re.search( r'ppn=(\d+)', nodespec )
if m: p = int( m.group(1) )
if p < 1: p = 1
c += n*p
if c > cores and c > 0: cores = c
if 'exec_host' in attrib:
c = 0
for host in attrib['exec_host'].split('+'):
# we know how to handle host specs of the form nyx5678/3
if not re.search( r'\w+/\d+', host ):
# something we don't know how to interpret, bail
c = 0
break
c += 1
if c > cores and c > 0: cores = c
if cores > 0: total_cores += cores
mem = -1
if 'pmem' in r and cores > 0:
mem = get_memory( r['pmem'] ) * cores;
if 'mem' in r:
m = get_memory( r['mem'] );
if m > mem and m > 0: mem = m
if mem > 0: total_mem += mem
if cores <= 0 or mem <= 0: uncertain = 1
id = job['name'].split('.')[0]
user = attrib['Job_Owner'].split('@')[0]
if args.jobs:
print "%-16s %-8s %5s %9s" % ( id, user, cores if cores > 0 else '???', show_memory( mem ) )
cores_avail = maxproc - total_cores
if cores_avail < 0: cores_avail = 0
mem_avail = maxmem - total_mem
if mem_avail < 0: mem_avail = 0
if args.jobs: print ""
print "%s%d of %d core%s in use, %d %score%s available " % ( 'AT LEAST ' if uncertain else '', total_cores, maxproc, '' if maxproc == 1 else 's', cores_avail, 'OR FEWER ' if uncertain else '', '' if cores_avail == 1 else 's' )
print "%s%s of %s memory in use, %s %smemory available " % ( 'AT LEAST ' if uncertain else '', show_memory( total_mem ), show_memory( maxmem ), show_memory( mem_avail ), 'OR LESS ' if uncertain else '' )
if __name__ == "__main__":
main()