-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathcheck_data_format.py
More file actions
26 lines (22 loc) · 912 Bytes
/
check_data_format.py
File metadata and controls
26 lines (22 loc) · 912 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import json
import os
import argparse
def check_integrity(json_file):
try:
with open(json_file, 'r', encoding='utf-8') as file:
data = json.load(file)
# # Filter out entries that don't strictly contain "instruction" and "output" keys
# valid_data = [entry for entry in data if isinstance(entry, dict) and
# set(entry.keys()) == {"instruction", "output"}]
#
# # Write the valid data back to the file
# with open(json_file, 'w', encoding='utf-8') as file:
# json.dump(valid_data, file, ensure_ascii=False, indent=4)
print(f"Finished checking")
except json.JSONDecodeError as e:
print(f"Error processing {json_file}: {e}")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("data_path")
args = parser.parse_args()
check_integrity(args.data_path)