-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathreport.py
More file actions
175 lines (141 loc) · 6.46 KB
/
report.py
File metadata and controls
175 lines (141 loc) · 6.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import random
import seaborn as sns
import colorsys
import csv
class LibraryUsagePlot:
def __init__(self, category_file, git_file, enabled_categories_file):
self.category_file = category_file
self.git_file = git_file
self.enabled_categories_file = enabled_categories_file
def categorize_library(self, library, categories):
"""
Categorize a library, assigning 'Other' to libraries with empty categories.
Parameters:
library (str): The name of the library.
categories (dict): A dictionary of categories keyed by library names.
Returns:
str: The category name, or 'Other' for libraries not found in categories or with empty category names.
"""
return categories.get(library, 'Other') if categories.get(library, 'Other') != '' else 'Other'
def load_categories(self):
"""
Load the categories from the category file.
Returns:
dict: The categories dictionary.
"""
categories_df = pd.read_json(self.category_file, orient='index')
categories_dict = categories_df.to_dict()[0]
return categories_dict
def load_enabled_categories(self):
"""
Load the enabled categories from the enabled categories file.
Returns:
list: The list of enabled categories.
"""
enabled_categories = []
with open(self.enabled_categories_file, 'r') as file:
for line in file:
enabled_categories.append(line.strip())
return enabled_categories
def print_categories(self):
"""
Print the categories and enabled categories.
"""
categories = self.load_categories()
enabled_categories = self.load_enabled_categories()
all_categories = [category if category != '' else 'Other' for category in categories.values()]
print('\n# All categories:')
for category in sorted(set(all_categories)):
print(category)
print('\n# Enabled categories:')
for category in sorted(enabled_categories):
print(category)
def generate_colors(self, num_colors):
colors = []
for i in range(num_colors):
# Vary hue from 0 to 1, keeping saturation and lightness constant
hue = i / num_colors
saturation = 0.7 # Adjust saturation here (0 to 1)
lightness = 0.5 # Adjust lightness here (0 to 1)
rgb_color = colorsys.hls_to_rgb(hue, lightness, saturation)
# Convert RGB to hex
hex_color = '#%02x%02x%02x' % (int(rgb_color[0]*255), int(rgb_color[1]*255), int(rgb_color[2]*255))
colors.append(hex_color)
return colors
def plot_library_usage_by_category(self):
"""
Creates an interactive plot of library usage over time, colored by category.
Each category can be toggled on or off in the plot. Certain categories are disabled (visible in legend only) by default.
"""
# Load the Git dataset
df = pd.read_csv(self.git_file, sep='|', quoting=csv.QUOTE_MINIMAL, escapechar='\\')
# Load categories
categories = self.load_categories()
# Apply the categorization function to the 'library' column
df['category'] = df['library'].apply(self.categorize_library, args=(categories,))
# Sort the DataFrame by 'date_end' in descending order
df_sorted = df.sort_values(by='date_end', ascending=False)
# Load enabled categories
enabled_categories = self.load_enabled_categories()
# Get unique categories and assign colors
all_categories = df_sorted['category'].unique()
colors = sns.color_palette("husl", len(all_categories)).as_hex()
# Create a Plotly figure
fig = go.Figure()
# Create a dictionary to map categories to traces
category_to_trace = {}
# Add a single trace for each category with both start and end dates
for i, category in enumerate(all_categories):
category_df = df_sorted[df_sorted['category'] == category]
# Check if the category is enabled (visible in the plot)
visible = True if category in enabled_categories else 'legendonly'
# Assign a random color to the enabled categories
if category in enabled_categories:
color = random.choice(px.colors.qualitative.Bold)
else:
# Assign colors from the original color palette
color = colors[i % len(colors)]
# Combine start and end dates
trace = go.Scatter(
x=category_df['date_start'].tolist() + category_df['date_end'].tolist(),
y=category_df['library'].tolist() + category_df['library'].tolist(),
mode='markers',
name=category,
visible=visible,
marker=dict(color=color,
symbol=['circle'] * len(category_df) + ['x'] * len(category_df)),
text=category_df['project'].tolist() + category_df['project'].tolist(),
customdata=[category] * (2 * len(category_df)), # Add category to customdata
hovertemplate="<b>%{text}</b><br>Library: %{y}<br>Date: %{x}<br>Category: %{customdata}<extra></extra>"
)
# Store the trace in the category_to_trace mapping
category_to_trace[category] = trace
# Sort the categories for legend, moving enabled categories to the top
legend_order = sorted(all_categories, key=lambda c: (0, c) if c in enabled_categories else (1, c))
# Add the traces to the figure in the desired legend order
for category in legend_order:
trace = category_to_trace[category]
fig.add_trace(trace)
# Update layout
fig.update_layout(
title='Usage of Libraries Over Time by Category',
xaxis_title='Date',
yaxis_title='Libraries',
legend_title='Categories',
template="plotly_white"
)
# Generate filename
filename = "report.html"
# Save the plot as an HTML file
fig.write_html(filename)
# Show the plot
fig.show()
def main():
plot = LibraryUsagePlot('cat.json', 'git.csv', 'enabled_categories.txt')
plot.print_categories()
plot.plot_library_usage_by_category()
if __name__ == '__main__':
main()