-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathplot.py
More file actions
177 lines (137 loc) · 5.48 KB
/
plot.py
File metadata and controls
177 lines (137 loc) · 5.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
import argparse
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
response_vars = ["d_avg", "d_avg_star", "ReID", "IVA", "OOVA", "OA", "MTE"]
def plot_step_vs_metric(df, args):
output_dir = args.path.parent / "plots" / "step_vs_metric"
output_dir.mkdir(exist_ok=True, parents=True)
# average over the first sample column
df_avg = df.groupby("step")[response_vars].mean()
for var in response_vars:
plt.figure()
plt.plot(df_avg.index, df_avg[var], marker="o")
plt.xlabel("Step")
plt.ylabel(var)
plt.title(f"Effect of TTT Step on {var}")
plt.grid(True)
output_path = output_dir / f"step_vs_metric_{var}.png"
plt.savefig(output_path)
plt.close()
print(var, df_avg[var])
print(f"Saved plot for {var} to {output_path}")
def plot_baseline_vs_best(df, args):
output_dir = args.path.parent / "plots" / "baseline_vs_best"
output_dir.mkdir(exist_ok=True, parents=True)
# Get values at step 0
df_baseline = df[df["step"] == 0].set_index("sample")
for var in response_vars:
# Calculate best value for each sample
if var == "MTE":
# Lower is better for MTE
best_values = df.groupby("sample")[var].min()
else:
# Higher is better for others
best_values = df.groupby("sample")[var].max()
# Calculate averages
avg_baseline = df_baseline[var].mean()
avg_best = best_values.mean()
# Plot
plt.figure()
bars = plt.bar(["Baseline", "Best Step"], [avg_baseline, avg_best])
plt.ylabel(var)
plt.title(f"Average {var}: Baseline vs Best TTT Step")
plt.grid(axis="y", linestyle="--", alpha=0.7)
# Add value labels on top of bars
for bar in bars:
height = bar.get_height()
plt.text(
bar.get_x() + bar.get_width() / 2.0,
height,
f"{height:.4f}",
ha="center",
va="bottom",
)
output_path = output_dir / f"baseline_vs_best_{var}.png"
plt.savefig(output_path)
plt.close()
print(f"Saved plot for {var} to {output_path}")
def plot_percent_diff_histograms(df, args):
output_dir = args.path.parent / "plots" / "percent_diff_histograms"
output_dir.mkdir(exist_ok=True, parents=True)
# Get baseline values (step 0)
df_baseline = df[df["step"] == 0].set_index("sample")[response_vars]
# Rename columns for merge
df_baseline.columns = [f"{col}_0" for col in df_baseline.columns]
# Join baseline values to the main dataframe
df_merged = df.join(df_baseline, on="sample")
steps = sorted(df["step"].unique())
steps = [s for s in steps if s != 0]
for var in response_vars:
var_dir = output_dir / var
var_dir.mkdir(exist_ok=True, parents=True)
for step in steps:
df_step = df_merged[df_merged["step"] == step]
baseline_vals = df_step[f"{var}_0"]
current_vals = df_step[var]
# Calculate percent difference
# Handle division by zero
valid_mask = baseline_vals != 0
if not valid_mask.any():
continue
pct_diff = (
(current_vals[valid_mask] - baseline_vals[valid_mask])
/ baseline_vals[valid_mask]
* 100
)
plt.figure()
plt.hist(pct_diff, bins=20, edgecolor="black", alpha=0.7)
plt.xlabel(f"% Change in {var} from Baseline")
plt.ylabel("Count")
plt.title(f"Distribution of % Change in {var} (Step {step})")
plt.grid(axis="y", linestyle="--", alpha=0.7)
plt.axvline(0, color="red", linestyle="--", linewidth=1)
output_path = var_dir / f"hist_{var}_step_{step}.png"
plt.savefig(output_path)
plt.close()
print(f"Saved histogram for {var} step {step} to {output_path}")
def plot_scatter_sample_vs_mte(df, args):
output_dir = args.path.parent / "plots" / "sample_vs_mte"
output_dir.mkdir(exist_ok=True, parents=True)
# Only plot steps 0 and 4 if present
available_steps = set(df["step"].unique())
selected_steps = [s for s in [0, 4] if s in available_steps]
plt.figure()
colors = ["tab:blue", "tab:orange", "tab:green", "tab:red", "tab:purple"]
for idx, step in enumerate(selected_steps):
df_step = df[df["step"] == step]
c = colors[idx % len(colors)]
plt.scatter(
df_step["sample"],
df_step["MTE"],
s=10,
alpha=0.7,
label=f"Step {step}",
color=c,
)
plt.xlabel("Sample")
plt.ylabel("MTE")
plt.title("Sample vs MTE across steps")
plt.grid(True, linestyle="--", alpha=0.6)
plt.legend(title="TTT Step")
output_path = output_dir / "sample_vs_mte_scatter.png"
plt.savefig(output_path, dpi=150, bbox_inches="tight")
plt.close()
print(f"Saved scatter plot to {output_path}")
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--path", default="dataset/results/tuned.csv", type=Path)
args = parser.parse_args()
# columns: sample,step,d_avg,d_avg_star,ReID,IVA,OOVA,OA,MTE
df = pd.read_csv(args.path)
plot_step_vs_metric(df, args)
plot_baseline_vs_best(df, args)
plot_percent_diff_histograms(df, args)
plot_scatter_sample_vs_mte(df, args)
if __name__ == "__main__":
main()