-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtrain.sh
More file actions
118 lines (103 loc) · 4.27 KB
/
train.sh
File metadata and controls
118 lines (103 loc) · 4.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/bin/bash
# ==============================================================================
# TRAINING SCRIPT (Config-File-Only Approach)
# ==============================================================================
# This script invokes main.py train_lightning with config values from YAML.
# All configuration is defined in:
# - assets/configs/tasks/train.yaml
# - assets/configs/data/train_data.yaml
# - assets/configs/envs/${PROBLEM}.yaml
# - assets/configs/models/${MODEL}.yaml
# ==============================================================================
set -e
# Handle memory fragmentation
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
MAGENTA='\033[0;35m'
CYAN='\033[0;36m'
NC='\033[0m' # No Color
# Default to verbose mode
VERBOSE=true
# Configuration files
TASK_CONFIG="assets/configs/tasks/train.yaml"
# Load Task Config first
eval $(uv run python logic/src/utils/configs/yaml_to_env.py "$TASK_CONFIG" 2>/dev/null | grep -v "declare -A") 2>/dev/null || true
# Load environment config based on problem
if [ -n "$PROBLEM" ]; then
ENV_CONFIG="assets/configs/envs/${PROBLEM}.yaml"
if [ -f "$ENV_CONFIG" ]; then
eval $(uv run python logic/src/utils/configs/yaml_to_env.py "$TASK_CONFIG" "$ENV_CONFIG" 2>/dev/null | grep -v "declare -A") 2>/dev/null || true
fi
fi
# Parse CLI overrides
CLI_OVERRIDES=()
while getopts "qm:e:s:n:" flag; do
case "${flag}" in
q) VERBOSE=false;;
m) MODEL_NAME="${OPTARG}";;
e) EPOCHS="${OPTARG}";;
s) SIZE="${OPTARG}";;
n) N_DATA="${OPTARG}";;
\?) echo -e "${RED}Invalid option: -${OPTARG}${NC}" >&2; exit 1;;
esac
done
shift $((OPTIND-1))
# Use loaded or default values
DATA_PROBLEM="${DATA_PROBLEM:-wcvrp}"
PROBLEM="${ENV_NAME:-${PROBLEM:-cwcvrp}}"
DATA_DISTRIBUTION="${DATA_DISTRIBUTION:-gamma1}"
AREA="${AREA:-riomaior}"
WASTE_TYPE="${WASTE_TYPE:-plastic}"
SIZE="${ENV_NUM_LOC:-${SIZE:-50}}"
EPOCHS="${TRAIN_N_EPOCHS:-${EPOCHS:-100}}"
B_SIZE="${TRAIN_BATCH_SIZE:-128}"
SEED="${SEED:-42}"
MODEL="${MODEL_NAME:-am}"
ENCODER="${MODEL_ENCODER_TYPE:-gat}"
TEMPORAL_HORIZON="${MODEL_TEMPORAL_HORIZON:-0}"
LOAD_DATASET_PATH="data/datasets/${DATA_PROBLEM}${SIZE}_${DATA_DISTRIBUTION}_${TRAIN_LOAD_DATASET}${EPOCHS}_seed${SEED}.td"
FINAL_MODEL_PATH="assets/model_weights/${PROBLEM}${SIZE}_${AREA}_${WASTE_TYPE}/${DATA_DISTRIBUTION}/${MODEL}${ENCODER}${TEMPORAL_HORIZON}/epoch-${EPOCHS-1}.pt"
echo -e "${BLUE}╔══════════════════════════════════════════╗${NC}"
echo -e "${BLUE}║ TRAINING MODULE (Hydra-based) ║${NC}"
echo -e "${BLUE}╚══════════════════════════════════════════╝${NC}"
echo -e "${CYAN}[CONFIG]${NC} Problem: ${MAGENTA}${PROBLEM}${NC}"
echo -e "${CYAN}[CONFIG]${NC} Data Distribution: ${MAGENTA}${DATA_DISTRIBUTION}${NC}"
echo -e "${CYAN}[CONFIG]${NC} Graph Size: ${MAGENTA}${SIZE}${NC}"
echo -e "${CYAN}[CONFIG]${NC} Area: ${MAGENTA}${AREA}${NC}"
echo -e "${CYAN}[CONFIG]${NC} Epochs: ${MAGENTA}${EPOCHS}${NC}"
echo -e "${CYAN}[CONFIG]${NC} Model: ${MAGENTA}${MODEL}${NC}"
echo -e "${CYAN}[CONFIG]${NC} Encoder: ${MAGENTA}${ENCODER}${NC}"
echo -e "${CYAN}[CONFIG]${NC} Temporal Horizon: ${MAGENTA}${TEMPORAL_HORIZON}${NC}"
echo ""
# If not verbose, redirect all output to /dev/null
if [ "$VERBOSE" = false ]; then
exec 3>&1 4>&2
exec >/dev/null 2>&1
fi
# Execute with config values from YAML
uv run python main.py train \
"env.name='${PROBLEM}'" \
"env.num_loc=${SIZE}" \
"env.area='${AREA}'" \
"model.name='${MODEL}'" \
"model.encoder.type='${ENCODER}'" \
"train.n_epochs=${EPOCHS}" \
"train.batch_size=${B_SIZE}" \
"train.final_model_path='${FINAL_MODEL_PATH}'" \
"train.data_distribution='${DATA_DISTRIBUTION}'" \
"load_dataset='${LOAD_DATASET_PATH}'" \
"seed=${SEED}" \
"hpo.n_trials=0" \
"${CLI_OVERRIDES[@]}" \
"$@"
# Restore output
if [ "$VERBOSE" = false ]; then
exec 1>&3 2>&4
exec 3>&- 4>&-
fi
echo ""
echo -e "${GREEN}✓ [DONE] Training completed.${NC}"