-
Notifications
You must be signed in to change notification settings - Fork 17
Expand file tree
/
Copy pathplot.py
More file actions
107 lines (85 loc) · 3.53 KB
/
plot.py
File metadata and controls
107 lines (85 loc) · 3.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#! /usr/bin/env python3
"""
Plot average throughput vs number of CPU threads per job.
Command examples:
+ Read 3 csv files and store OUTPUT.pdf and OUTPUT.png:
python3 patatrack-scipts/plot.py scan/reduced_hlt_{ecal,hcal,pixel}_w7900.csv --title Labels --labels ECAL HCAL Pixel -o OUTPUT
"""
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator
import sys
import os
import argparse
# Create the parser
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawTextHelpFormatter)
# Optional arguments
parser.add_argument('-t', '--title', type=str, default="",
help="Title of the legend")
parser.add_argument('-o', '--output', type=str, default="throughput_vs_threads",
help="Base filename for output files (PNG/PDF)")
parser.add_argument('-x', '--x-axis', default='CPU threads per job',
help='Horizontal axis label.')
parser.add_argument('--labels', nargs='+', default=None,
help="Labels to show in the legend instead of the CSV file names")
# Positional arguments (CSV files)
parser.add_argument('files', nargs='+', help="CSV files to process")
# Parse arguments
args = parser.parse_args()
# Access the values
title = args.title
filename = args.output
files = args.files
labels = args.labels
if labels is not None:
assert len(files) == len(labels), "The number of labels must match the number of input CSV files. Each label corresponds to one file, following the order they are provided."
# Dictionary to store per-file datasets
datasets = {}
for file in files:
# Read CSV and clean column names
df = pd.read_csv(file)
df.columns = df.columns.str.strip()
# Keep only relevant columns (ignore "jobs")
df = df[["CPU threads per job", "average throughput (ev/s)"]]
# Group by CPU threads per job and compute mean & std
grouped = (
df.groupby("CPU threads per job")["average throughput (ev/s)"]
.agg(['mean', 'std'])
.reset_index()
.sort_values("CPU threads per job")
)
# Create a nicer label: remove extension and replace underscores
label = os.path.basename(file) if labels is None else labels[files.index(file)]
if label.endswith(".csv"):
label = label[:-4]
label = label.replace("_", " ")
datasets[label] = grouped
# Plotting
fig, ax = plt.subplots(figsize=(10, 6))
for label, df in datasets.items():
#df["std"] = df["std"].fillna(0) # In case some groups have a single entry
color = ax.plot(df["CPU threads per job"], df["mean"], '--', linewidth=1.5)[0].get_color()
ax.errorbar(df["CPU threads per job"], df["mean"], yerr=df["std"],
label=label, marker='o', markersize=8, capsize=5, capthick=2,
ls='none', color=color)
ax.set_xlabel(args.x_axis)
ax.set_ylabel("Average throughput (ev/s)")
ax.set_title("Average throughput vs number of CPU threads per job")
if title:
ax.legend(title=title, title_fontsize='13', fontsize='11')
else:
ax.legend()
transparency = dict(alpha=0.7)
ax.grid(True, axis='x', **transparency)
ax.xaxis.set_major_locator(MultipleLocator(4))
ax.grid(True, which='major', axis='both', **transparency)
ax.set_ylim(bottom=0)
# Make the axes (plot area) white
ax.set_facecolor('white')
# Make only the figure background (outside axes) transparent
fig.patch.set_facecolor('none') # fully transparent
fig.patch.set_alpha(0)
fig.tight_layout()
# Save as PNG and PDF with transparent canvas background
fig.savefig(f"{filename}.png", dpi=600)
fig.savefig(f"{filename}.pdf")