-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathgenerate_chart_image.py
More file actions
121 lines (98 loc) · 5.2 KB
/
generate_chart_image.py
File metadata and controls
121 lines (98 loc) · 5.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import requests
import json
import random
import os
from pathlib import Path
from tqdm import tqdm
import subprocess
from subprocess import STDOUT, check_output
from joblib import Parallel, delayed
import threading
def save_error_outputs(error_message, output_dir_path):
with open(output_dir_path, 'a') as f:
f.write(error_message)
def run_command(cmd, error_count, output_dir_name, chart_type, image_name):
directory = Path(os.path.join(output_dir_name, chart_type))
directory.mkdir(parents=True, exist_ok=True)
fail_flag = False
try:
result = check_output(cmd, stderr=STDOUT, timeout=10)
# result = subprocess.run(cmd, stderr=STDOUT, timeout=30)
except Exception as e:
fail_flag = True
error_count.append(1)
if hasattr(e, 'output') and e.output is not None:
save_error_outputs(e.output.decode("utf-8"), os.path.join(output_dir_name, chart_type, image_name.replace('.png', '.txt')))
else:
save_error_outputs(str(e), os.path.join(output_dir_name, chart_type, image_name.replace('.png', '.txt')))
return fail_flag
def generate_chart_image(chart_type, exp_path, chart_img_output_dir, error_output_dir, sample_json_size=None, sample_script_size=None):
error_count = []
total_count = 1
# create a directory to save the json files
chart_output_dir_path = os.path.join(chart_img_output_dir, chart_type, 'chart')
directory = Path(chart_output_dir_path)
directory.mkdir(parents=True, exist_ok=True)
if sample_json_size is not None:
json_files = random.sample(sorted(os.listdir(os.path.join(exp_path, chart_type, 'json'))), k=sample_json_size)
else:
json_files = sorted(os.listdir(os.path.join(exp_path, chart_type, 'json')))
if sample_script_size is not None:
python_scripts = random.sample(sorted(os.listdir(os.path.join(exp_path, chart_type, 'code'))), k=sample_script_size)
else:
python_scripts = sorted(os.listdir(os.path.join(exp_path, chart_type, 'code')))
# if the python script fail for first k json file, then skip the rest of the json files
k = 5
threshold = 0.5 # if the error rate is larger than threshold, skip the rest of the json files
fail_dictionary = {}
for script_idx in range(len(python_scripts)):
fail_dictionary[python_scripts[script_idx]] = 0
json_count = 0
pbar = tqdm(json_files, leave=False)
for json_file in pbar:
pbar.set_description("Processing {}. Error rate: {} \n".format(chart_type, float(sum(error_count))/total_count))
json_path = os.path.join(exp_path, chart_type, 'json', json_file)
# thread_list = []
for script_idx in tqdm(range(len(python_scripts))):
total_count += 1
python_script_path = os.path.join(exp_path, chart_type, 'code', python_scripts[script_idx])
image_name = json_file.split('.json')[0] + '_{}.png'.format(python_scripts[script_idx].split('.')[0])
image_save_path = os.path.join(chart_output_dir_path, image_name)
# skip the first k json files if the python script fail for the first k json files
if float(fail_dictionary[python_scripts[script_idx]]/(json_count+0.001)) > threshold:
continue
# if image_save_path exists, skip
if os.path.exists(image_save_path):
continue
else:
command = ['python3', python_script_path, json_path, image_save_path]
fail_flag = run_command(command, error_count, error_output_dir, chart_type, image_name)
if fail_flag:
fail_dictionary[python_scripts[script_idx]] += 1
json_count += 1
print('Error rate: ', sum(error_count)/total_count)
with open(os.path.join(chart_img_output_dir, 'error_rate.txt'), 'a') as f:
f.write('Chart type: {}. Error rate: {}. \n'.format(chart_type, sum(error_count)/total_count))
if __name__ == '__main__':
# get output chart types
exp_path = 'data/final'
chart_img_output_dir = exp_path #'exp/full_gpt4_v1/outputs_chart_examples'
error_output_dir = 'data/rawdata/merged_python_error_logs'
metadata_path = 'data/metadata.json'
sample_json_size = None
sample_script_size = None
os.makedirs(error_output_dir, exist_ok=True)
chart_types = [ name for name in os.listdir(exp_path) if os.path.isdir(os.path.join(exp_path, name)) ]
chart_types = sorted(chart_types)
# template file path
with open(metadata_path, 'r') as f:
metadata = json.load(f)
core_type_list = [x.replace(' ', '_') for x in metadata['core_chart_type']]
advanced_type_list = [x.replace(' ', '_') for x in metadata['advanced_chart_type']]
all_type_list = core_type_list + advanced_type_list
Parallel(n_jobs=1)(delayed(generate_chart_image)(i, exp_path, chart_img_output_dir, error_output_dir, sample_json_size, sample_script_size) for i in all_type_list)
# calculate the number of image in each chart type
chart_output_dir_path = os.path.join(chart_img_output_dir, 'chart')
for chart_type in all_type_list:
chart_output_dir_path = os.path.join(chart_img_output_dir, chart_type, 'chart')
print(chart_type, len(os.listdir(chart_output_dir_path)))