Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 77 additions & 0 deletions scripts/configs/grid_search_scaling_sasrec_bce_amazons.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#!/usr/bin/env bash
set -euo pipefail

# # Configure visible GPUs for the run (comma-separated string of physical GPU ids).
# export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-4,5,6,7}"

# Define GPU groups used by grid_search. Each entry is a comma-separated list of GPU ids.
# Example: GPU_GROUPS=("4,5" "6,7") to allocate two GPUs per trial.
GPU_GROUPS=("0,1,2,3,4,5,6,7")

if [[ ${#GPU_GROUPS[@]} -eq 0 ]]; then
echo "GPU_GROUPS must contain at least one entry" >&2
exit 1
fi

# We assume configs are provided in ./configs/
BASE_DIR=$(dirname "$(realpath "$0")")
CONFIG_DIR="${BASE_DIR}/configs"

# Set up template config path.
TEMPLATE_PATH="${CONFIG_DIR}/seqrec/template.yaml"

# Set up search config path options.
SCALING_SCALES=("s" "b" "l" "xl" "xxl")
SEARCH_PREFIX="${CONFIG_DIR}/seqrec/scaling/sasrec_bce_amazons/sasrec_bce_amazon"

# Set up output directory.
OUTPUT_ROOT="${BASE_DIR}/../outputs/seqrec/scaling/sasrec_bce_amazons"

# Set up main module to run.
MAIN_MODULE="genrec.main_seqrec"

# Optional dryrun/rerun controls (set to desired exp_id or leave empty).
DRYRUN_EXP_ID="${DRYRUN_EXP_ID:-}"
RERUN_EXP_ID="${RERUN_EXP_ID:-}"

if [[ -n "$DRYRUN_EXP_ID" && -n "$RERUN_EXP_ID" ]]; then
echo "Set either DRYRUN_EXP_ID or RERUN_EXP_ID, not both." >&2
exit 1
fi

# If dryrun or rerun is specified, adjust SEARCH_PATH accordingly.
if [[ -n "$DRYRUN_EXP_ID" ]]; then
SEARCH_PATH="${OUTPUT_ROOT}/${DRYRUN_EXP_ID}/search.yaml"
elif [[ -n "$RERUN_EXP_ID" ]]; then
SEARCH_PATH="${OUTPUT_ROOT}/${RERUN_EXP_ID}/search.yaml"
fi

EXTRA_ARGS=()
for grp in "${GPU_GROUPS[@]}"; do
EXTRA_ARGS+=(--gpu_groups "$grp")
done

if [[ -n "$DRYRUN_EXP_ID" ]]; then
EXTRA_ARGS+=(--dryrun "$DRYRUN_EXP_ID")
elif [[ -n "$RERUN_EXP_ID" ]]; then
EXTRA_ARGS+=(--rerun "$RERUN_EXP_ID")
fi

run_grid_search() {
local search_path="$1"
# Executes one grid search invocation for the provided search config.
poetry run python scripts/grid_search.py \
--template "${TEMPLATE_PATH}" \
--search "${search_path}" \
--main "${MAIN_MODULE}" \
--output_root "${OUTPUT_ROOT}" \
"${EXTRA_ARGS[@]}"
}

if [[ -n "$DRYRUN_EXP_ID" || -n "$RERUN_EXP_ID" ]]; then
run_grid_search "${SEARCH_PATH}"
else
for scale in "${SCALING_SCALES[@]}"; do
run_grid_search "${SEARCH_PREFIX}_${scale}.yaml"
done
fi
78 changes: 78 additions & 0 deletions scripts/configs/grid_search_scaling_sasrec_bce_movielens.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#!/usr/bin/env bash
set -euo pipefail

# # Configure visible GPUs for the run (comma-separated string of physical GPU ids).
# export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-4,5,6,7}"

# Define GPU groups used by grid_search. Each entry is a comma-separated list of GPU ids.
# Example: GPU_GROUPS=("4,5" "6,7") to allocate two GPUs per trial.
GPU_GROUPS=("0,1,2,3,4,5,6,7")

if [[ ${#GPU_GROUPS[@]} -eq 0 ]]; then
echo "GPU_GROUPS must contain at least one entry" >&2
exit 1
fi

# We assume configs are provided in ./configs/
BASE_DIR=$(dirname "$(realpath "$0")")
CONFIG_DIR="${BASE_DIR}/configs"

# Set up template config path.
TEMPLATE_PATH="${CONFIG_DIR}/seqrec/template.yaml"

# Set up search config path options.
# SCALING_SCALES=("xxs" "xs" "s" "b" "l" "xl" "xxl")
SCALING_SCALES=("s" "b")
SEARCH_PREFIX="${CONFIG_DIR}/configs/seqrec/scaling/sasrec_bce_movielens/sasrec_bce_movielens"

# Set up output directory.
OUTPUT_ROOT="${BASE_DIR}/../outputs/seqrec/scaling/sasrec_bce_movielens"

# Set up main module to run.
MAIN_MODULE="genrec.main_seqrec"

# Optional dryrun/rerun controls (set to desired exp_id or leave empty).
DRYRUN_EXP_ID="${DRYRUN_EXP_ID:-}"
RERUN_EXP_ID="${RERUN_EXP_ID:-}"

if [[ -n "$DRYRUN_EXP_ID" && -n "$RERUN_EXP_ID" ]]; then
echo "Set either DRYRUN_EXP_ID or RERUN_EXP_ID, not both." >&2
exit 1
fi

# If dryrun or rerun is specified, adjust SEARCH_PATH accordingly.
if [[ -n "$DRYRUN_EXP_ID" ]]; then
SEARCH_PATH="${OUTPUT_ROOT}/${DRYRUN_EXP_ID}/search.yaml"
elif [[ -n "$RERUN_EXP_ID" ]]; then
SEARCH_PATH="${OUTPUT_ROOT}/${RERUN_EXP_ID}/search.yaml"
fi

EXTRA_ARGS=()
for grp in "${GPU_GROUPS[@]}"; do
EXTRA_ARGS+=(--gpu_groups "$grp")
done

if [[ -n "$DRYRUN_EXP_ID" ]]; then
EXTRA_ARGS+=(--dryrun "$DRYRUN_EXP_ID")
elif [[ -n "$RERUN_EXP_ID" ]]; then
EXTRA_ARGS+=(--rerun "$RERUN_EXP_ID")
fi

run_grid_search() {
local search_path="$1"
# Executes one grid search invocation for the provided search config.
poetry run python scripts/grid_search.py \
--template "${TEMPLATE_PATH}" \
--search "${search_path}" \
--main "${MAIN_MODULE}" \
--output_root "${OUTPUT_ROOT}" \
"${EXTRA_ARGS[@]}"
}

if [[ -n "$DRYRUN_EXP_ID" || -n "$RERUN_EXP_ID" ]]; then
run_grid_search "${SEARCH_PATH}"
else
for scale in "${SCALING_SCALES[@]}"; do
run_grid_search "${SEARCH_PREFIX}_${scale}.yaml"
done
fi
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# SASRec (Base) + BCE on Amazon-2018

# global settings
pretrained_ckpt: null # optional path to a pretrained checkpoint to load
test_eval: false # whether to run evaluation on the test set instead of validation set
save_predictions: false # whether to save the predictions on the test set

# dataset settings
dataset:
type: seqrec

# specific path to interaction data file for Amazon-2018 dataset.
search__interaction_data_path:
- ./data/amazon2018/proc/user2item.pkl

max_seq_length: 100

# collator settings
collator:
type: seqrec

num_negative_samples: 16
negative_sampling_strategy: uniform

# model settings
model:
type: sasrec

config:
# base model parameters
search__hidden_size: [512]
search__num_attention_heads: [8]
search__num_hidden_layers: [6]

# subclass model parameters
search__attention_dropout: [0.1]

# trainer settings
trainer:
type: bce

config:
# training arguments - Run control
do_train: true
do_eval: true
do_predict: true

# training arguments - Optimization & schedule
search__num_train_epochs: [100]
per_device_train_batch_size: 256
per_device_eval_batch_size: 512
gradient_accumulation_steps: 1 # batch_size = per_device_train_batch_size * num_devices * gradient_accumulation_steps
search__learning_rate: [1.0e-3]
search__weight_decay: [0.1]
lr_scheduler_type: cosine
warmup_ratio: 0.05

# training arguments - Evaluation & checkpointing
metric_for_best_model: ndcg@5 # should exist in the metrics

# training arguments - Parallelism & precision
dataloader_num_workers: 4
gradient_checkpointing: true
bf16: true
tf32: false

# base trainer parameters
norm_embeddings: false # whether to L2-normalize user and item embeddings
eval_interval: 5 # run metrics every epoch
metrics:
- ["hr", {}]
- ["ndcg", {}]
- ["popularity", {p: [0.1, 0.2]}]
- ["unpopularity", {p: [0.2, 0.4]}]
model_loss_weight: 1.0
top_k: [1, 5, 10]

Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# SASRec (Large) + BCE on Amazon-2018

# global settings
pretrained_ckpt: null # optional path to a pretrained checkpoint to load
test_eval: false # whether to run evaluation on the test set instead of validation set
save_predictions: false # whether to save the predictions on the test set

# dataset settings
dataset:
type: seqrec

# specific path to interaction data file for Amazon-2018 dataset.
search__interaction_data_path:
- ./data/amazon2018/proc/user2item.pkl

max_seq_length: 100

# collator settings
collator:
type: seqrec

num_negative_samples: 16
negative_sampling_strategy: uniform

# model settings
model:
type: sasrec

config:
# base model parameters
search__hidden_size: [768]
search__num_attention_heads: [12]
search__num_hidden_layers: [8]

# subclass model parameters
search__attention_dropout: [0.1]

# trainer settings
trainer:
type: bce

config:
# training arguments - Run control
do_train: true
do_eval: true
do_predict: true

# training arguments - Optimization & schedule
search__num_train_epochs: [100]
per_device_train_batch_size: 256
per_device_eval_batch_size: 512
gradient_accumulation_steps: 1 # batch_size = per_device_train_batch_size * num_devices * gradient_accumulation_steps
search__learning_rate: [5.0e-4]
search__weight_decay: [0.1]
lr_scheduler_type: cosine
warmup_ratio: 0.05

# training arguments - Evaluation & checkpointing
metric_for_best_model: ndcg@5 # should exist in the metrics

# training arguments - Parallelism & precision
dataloader_num_workers: 4
gradient_checkpointing: true
bf16: true
tf32: false

# base trainer parameters
norm_embeddings: false # whether to L2-normalize user and item embeddings
eval_interval: 5 # run metrics every epoch
metrics:
- ["hr", {}]
- ["ndcg", {}]
- ["popularity", {p: [0.1, 0.2]}]
- ["unpopularity", {p: [0.2, 0.4]}]
model_loss_weight: 1.0
top_k: [1, 5, 10]

Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# SASRec (Small) + BCE on Amazon-2018

# global settings
pretrained_ckpt: null # optional path to a pretrained checkpoint to load
test_eval: false # whether to run evaluation on the test set instead of validation set
save_predictions: false # whether to save the predictions on the test set

# dataset settings
dataset:
type: seqrec

# specific path to interaction data file for Amazon-2018 dataset.
search__interaction_data_path:
- ./data/amazon2018/proc/user2item.pkl

max_seq_length: 100

# collator settings
collator:
type: seqrec

num_negative_samples: 16
negative_sampling_strategy: uniform

# model settings
model:
type: sasrec

config:
# base model parameters
search__hidden_size: [256]
search__num_attention_heads: [4]
search__num_hidden_layers: [4]

# subclass model parameters
search__attention_dropout: [0.1]

# trainer settings
trainer:
type: bce

config:
# training arguments - Run control
do_train: true
do_eval: true
do_predict: true

# training arguments - Optimization & schedule
search__num_train_epochs: [100]
per_device_train_batch_size: 256
per_device_eval_batch_size: 512
gradient_accumulation_steps: 1 # batch_size = per_device_train_batch_size * num_devices * gradient_accumulation_steps
search__learning_rate: [1.0e-3]
search__weight_decay: [0.1]
lr_scheduler_type: cosine
warmup_ratio: 0.05

# training arguments - Evaluation & checkpointing
metric_for_best_model: ndcg@5 # should exist in the metrics

# training arguments - Parallelism & precision
dataloader_num_workers: 4
gradient_checkpointing: true
bf16: true
tf32: false

# base trainer parameters
norm_embeddings: false # whether to L2-normalize user and item embeddings
eval_interval: 5 # run metrics every epoch
metrics:
- ["hr", {}]
- ["ndcg", {}]
- ["popularity", {p: [0.1, 0.2]}]
- ["unpopularity", {p: [0.2, 0.4]}]
model_loss_weight: 1.0
top_k: [1, 5, 10]

Loading