-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathmat2bin.py
More file actions
72 lines (55 loc) · 2.26 KB
/
mat2bin.py
File metadata and controls
72 lines (55 loc) · 2.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import os
import subprocess
import pickle
import pandas as pd
from scipy.io import loadmat
from argparse import ArgumentParser
# -------------------------------
# Sort out arguments
# -------------------------------
help_str = "full path of the target .mat file"
parser = ArgumentParser()
parser.add_argument("-f", "--file", dest="filename", help=help_str, metavar="FILE", required=True)
filename = parser.parse_args().filename
if os.path.isdir(filename):
files = [filename + os.altsep + file for file in os.listdir(filename) if file.endswith(".mat")]
for file in files[:-1]:
subprocess.run(["python", os.path.basename(__file__), "-f", file])
filename = files[-1]
print('converting ---> ' + filename)
# -------------------------------
# Reformat data
# -------------------------------
def mat2dict(mat_content: dict, struct: str):
data = mat_content[struct][0][0]
labels = mat_content[struct][0][0].dtype.names
return {label: datum.reshape(-1) for datum, label in zip(data, labels)}
MAT = loadmat(filename)
MAT.pop("__header__")
MAT.pop("__version__")
MAT.pop("__globals__")
dataframes_dict = {}
for k in MAT.keys():
temp_dict = mat2dict(MAT, k)
# Find the maximum size among non-empty arrays
max_size = max(len(arr) for arr in temp_dict.values() if len(arr) > 0)
# Replace empty arrays with arrays of the same size
for key, value in temp_dict.items():
if len(value) == 0:
temp_dict[key] = [None] * max_size
dataframes_dict[k] = pd.DataFrame(temp_dict)
TMP = dataframes_dict["SESSION_DATA"].to_dict('records')[0]
TABLES = {df_name: df for df_name, df in dataframes_dict.items() if df_name.endswith("_TABLE")}
SESSION_DATA = {**TMP, **TABLES}
# -------------------------------
# Sort out directory paths and output file names
# -------------------------------
rawfilename = os.path.basename(filename)[:-4] # remove .mat extension
dest = os.path.dirname(filename) + os.path.sep
# -------------------------------
# Save data
# -------------------------------
file_id = open(dest + rawfilename + '.pickle', 'wb')
pickle.dump({"SESSION_DATA": SESSION_DATA, "TRIAL_DATA": dataframes_dict["TRIAL_DATA"]}, file_id)
file_id.close()
dataframes_dict["TIME_SERIES_DATA"].to_feather(dest + rawfilename + '.feather', compression='zstd')