codeanalysis/csv_as_enclosure_json.py at master · jasonareid/codeanalysis · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
#!/bin/env python

#######################################################################
## This program generates a JSON document suitable for a D3.js
## enclosure diagram visualization.
## The input data is read from two CSV files:
##  1) The complete system structure, including size metrics.
##  2) A hotspot analysis result used to assign weights to the modules.
#######################################################################

import argparse
import csv
import json
import sys
import math

class MergeError(Exception):
	def __init__(self, message):
		Exception.__init__(self, message)

class Merged(object):
	def __init__(self):
		self._all_modules_with_complexity = {}
		self._merged = {}

	def sorted_result(self):
		# Sort on descending order:
		ordered = sorted(self._merged.items(), key=lambda item: item[1][0], reverse=True)
		return ordered

	def extend_with(self, name, freqs):
		if name in self._all_modules_with_complexity:
			complexity = self._all_modules_with_complexity[name]
			self._merged[name] = freqs, complexity

	def record_detected(self, name, complexity):
		self._all_modules_with_complexity[name] = complexity

def write_csv(stats):
	print 'module,revisions,code'
	for s in stats:
		name, (f,c) = s
		print name + ',' + f + ',' + c

def parse_complexity(merged, row):
	name = row[1][2:]
	complexity = row[4]
	merged.record_detected(name, complexity)

def parse_freqs(merged, row):
	name = row[0]
	freqs = row[1]
	merged.extend_with(name, freqs)

def merge(revs_file, comp_file):
	merged = Merged()
	parse_csv(merged, comp_file, parse_complexity, expected_format='language,filename,blank,comment,code')
	parse_csv(merged, revs_file, parse_freqs, expected_format='entity,n-revs')
	write_csv(merged.sorted_result())

######################################################################
## Parse input
######################################################################

def validate_content_by(heading, expected):
	if not expected:
		return # no validation
	comparison = expected.split(',')
	stripped = heading[0:len(comparison)] # allow extra fields
	if stripped != comparison:
		raise MergeError('Erroneous content. Expected = ' + expected + ', got = ' + ','.join(heading))

def parse_csv(filename, parse_action, expected_format=None):
	def read_heading_from(r):
		p = r.next()
		while p == []:
			p = r.next()
		return p
	with open(filename, 'rb') as csvfile:
		r = csv.reader(csvfile, delimiter=',')
		heading = read_heading_from(r)
		validate_content_by(heading, expected_format)
		return [parse_action(row) for row in r]

class StructuralElement(object):
	def __init__(self, name, complexity):
		self.name = name
		self.complexity = complexity
	def parts(self):
		return self.name.split('/')

def parse_structural_element(csv_row):
	#JR change
	name = csv_row[1][2:]
	#name = csv_row[1]
	complexity = csv_row[4]
	return StructuralElement(name, complexity)

def make_element_weight_parser(weight_column):
	""" Parameterize with the column - this allows us
		to generate data from different analysis result types.
	"""
	def parse_element_weight(csv_row):
		name = csv_row[0]
		weight = float(csv_row[weight_column]) # Assert not zero?
		return name, weight
	return parse_element_weight

######################################################################
## Calculating weights from the given CSV analysis file
######################################################################

def module_weight_calculator_from(analysis_results, normalizeweightsfactor):
	max_raw_weight = max(analysis_results, key=lambda e: e[1])
	max_value = math.pow(max_raw_weight[1], 1 / (1.0 * normalizeweightsfactor))
	normalized_weights = dict([(name, (1.0 / max_value) * math.pow(n, 1 / (1.0 * normalizeweightsfactor))) for name,n in analysis_results])
	def normalized_weight_for(module_name):
		if module_name in normalized_weights:
			return normalized_weights[module_name]
		return 0.0
	return normalized_weight_for

def dict_lists(list_of_lists):
	return dict([(name, n) for name,n in list_of_lists])

######################################################################
## Building the structure of the system
######################################################################

def _matching_part_in(hierarchy, part):
	return next((x for x in hierarchy if x['name']==part), None)

def _ensure_branch_exists(hierarchy, branch):
	existing = _matching_part_in(hierarchy, branch)
	if not existing:
		new_branch = {'name':branch, 'children':[]}
		hierarchy.append(new_branch)
		existing = new_branch
	return existing

def _add_leaf(hierarchy, module, weight_calculator, raw_weights, minrevs, name):
	# TODO: augment with weight here!
	revs = 0
	if module.name in raw_weights:
		revs = int(raw_weights[module.name])

	if revs < minrevs:
		return hierarchy

	new_leaf = {'name':name,
	            'size':module.complexity,
	            'weight':weight_calculator(module.name),
	            'revs': revs}
	hierarchy.append(new_leaf)
	return hierarchy

def _insert_parts_into(hierarchy, module, weight_calculator, raw_weights, minrevs, parts):
	""" Recursively traverse the hierarchy and insert the individual parts
		of the module, one by one.
		The parts specify branches. If any branch is missing, it's
		created during the traversal.
		The final part specifies a module name (sans its path, of course).
		This is where we add size and weight to the leaf.
	"""
	if len(parts) == 1:
		return _add_leaf(hierarchy, module, weight_calculator, raw_weights, minrevs, name=parts[0])
	next_branch = parts[0]
	existing_branch = _ensure_branch_exists(hierarchy, next_branch)
	return _insert_parts_into(existing_branch['children'],
							  module,
							  weight_calculator,
							  raw_weights,
							  minrevs,
							  parts=parts[1:])

def generate_structure_from(modules, weight_calculator, raw_weights, minrevs):
	hierarchy = []
	for module in modules:
		parts = module.parts()
		_insert_parts_into(hierarchy, module, weight_calculator, raw_weights, minrevs, parts)

	structure = {'name':'root', 'children':hierarchy}
	return structure

######################################################################
## Output
######################################################################

def write_json(result):
	print json.dumps(result)

######################################################################
## Main
######################################################################

# TODO: turn it around: parse the weights first and add them to individual elements
# as the raw structure list is built!

def run(args):
	raw_weights = parse_csv(args.weights, parse_action=make_element_weight_parser(args.weightcolumn))
	weight_calculator = module_weight_calculator_from(raw_weights, args.normalizeweightsfactor)
	raw_weights_by_module_name = dict_lists(raw_weights)

	structure_input = parse_csv(args.structure,
								expected_format='language,filename,blank,comment,code',
								parse_action=parse_structural_element)
	weighted_system_structure = generate_structure_from(structure_input, weight_calculator, raw_weights_by_module_name, args.minrevs)
	write_json(weighted_system_structure)

if __name__ == "__main__":
	parser = argparse.ArgumentParser(description='Generates a JSON document suitable for enclosure diagrams.')
	parser.add_argument('--structure', required=True, help='A CSV file generated by cloc')
	parser.add_argument('--weights', required=True, help='A CSV file with hotspot results from Code Maat')
	parser.add_argument('--weightcolumn', type=int, default=1, help="The index specifying the columnt to use in the weight table")
	parser.add_argument('--minrevs', required=False, type=int, default=0, help="Hide anything without at least this many revisions")
	parser.add_argument('--normalizeweightsfactor', required=False, type=int, default=1, help="Normalize weights by this int factor (1 default, 3 a lot)")
	# TODO: add arguments to specify which CSV columns to use!

	args = parser.parse_args()
	run(args)