diff --git a/scripts/configs/grid_search_scaling_sasrec_bce_amazons.sh b/scripts/configs/grid_search_scaling_sasrec_bce_amazons.sh new file mode 100644 index 0000000..cab2cee --- /dev/null +++ b/scripts/configs/grid_search_scaling_sasrec_bce_amazons.sh @@ -0,0 +1,77 @@ +#!/usr/bin/env bash +set -euo pipefail + +# # Configure visible GPUs for the run (comma-separated string of physical GPU ids). +# export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-4,5,6,7}" + +# Define GPU groups used by grid_search. Each entry is a comma-separated list of GPU ids. +# Example: GPU_GROUPS=("4,5" "6,7") to allocate two GPUs per trial. +GPU_GROUPS=("0,1,2,3,4,5,6,7") + +if [[ ${#GPU_GROUPS[@]} -eq 0 ]]; then + echo "GPU_GROUPS must contain at least one entry" >&2 + exit 1 +fi + +# We assume configs are provided in ./configs/ +BASE_DIR=$(dirname "$(realpath "$0")") +CONFIG_DIR="${BASE_DIR}/configs" + +# Set up template config path. +TEMPLATE_PATH="${CONFIG_DIR}/seqrec/template.yaml" + +# Set up search config path options. +SCALING_SCALES=("s" "b" "l" "xl" "xxl") +SEARCH_PREFIX="${CONFIG_DIR}/seqrec/scaling/sasrec_bce_amazons/sasrec_bce_amazon" + +# Set up output directory. +OUTPUT_ROOT="${BASE_DIR}/../outputs/seqrec/scaling/sasrec_bce_amazons" + +# Set up main module to run. +MAIN_MODULE="genrec.main_seqrec" + +# Optional dryrun/rerun controls (set to desired exp_id or leave empty). +DRYRUN_EXP_ID="${DRYRUN_EXP_ID:-}" +RERUN_EXP_ID="${RERUN_EXP_ID:-}" + +if [[ -n "$DRYRUN_EXP_ID" && -n "$RERUN_EXP_ID" ]]; then + echo "Set either DRYRUN_EXP_ID or RERUN_EXP_ID, not both." >&2 + exit 1 +fi + +# If dryrun or rerun is specified, adjust SEARCH_PATH accordingly. +if [[ -n "$DRYRUN_EXP_ID" ]]; then + SEARCH_PATH="${OUTPUT_ROOT}/${DRYRUN_EXP_ID}/search.yaml" +elif [[ -n "$RERUN_EXP_ID" ]]; then + SEARCH_PATH="${OUTPUT_ROOT}/${RERUN_EXP_ID}/search.yaml" +fi + +EXTRA_ARGS=() +for grp in "${GPU_GROUPS[@]}"; do + EXTRA_ARGS+=(--gpu_groups "$grp") +done + +if [[ -n "$DRYRUN_EXP_ID" ]]; then + EXTRA_ARGS+=(--dryrun "$DRYRUN_EXP_ID") +elif [[ -n "$RERUN_EXP_ID" ]]; then + EXTRA_ARGS+=(--rerun "$RERUN_EXP_ID") +fi + +run_grid_search() { + local search_path="$1" + # Executes one grid search invocation for the provided search config. + poetry run python scripts/grid_search.py \ + --template "${TEMPLATE_PATH}" \ + --search "${search_path}" \ + --main "${MAIN_MODULE}" \ + --output_root "${OUTPUT_ROOT}" \ + "${EXTRA_ARGS[@]}" +} + +if [[ -n "$DRYRUN_EXP_ID" || -n "$RERUN_EXP_ID" ]]; then + run_grid_search "${SEARCH_PATH}" +else + for scale in "${SCALING_SCALES[@]}"; do + run_grid_search "${SEARCH_PREFIX}_${scale}.yaml" + done +fi diff --git a/scripts/configs/grid_search_scaling_sasrec_bce_movielens.sh b/scripts/configs/grid_search_scaling_sasrec_bce_movielens.sh new file mode 100644 index 0000000..73098cb --- /dev/null +++ b/scripts/configs/grid_search_scaling_sasrec_bce_movielens.sh @@ -0,0 +1,78 @@ +#!/usr/bin/env bash +set -euo pipefail + +# # Configure visible GPUs for the run (comma-separated string of physical GPU ids). +# export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-4,5,6,7}" + +# Define GPU groups used by grid_search. Each entry is a comma-separated list of GPU ids. +# Example: GPU_GROUPS=("4,5" "6,7") to allocate two GPUs per trial. +GPU_GROUPS=("0,1,2,3,4,5,6,7") + +if [[ ${#GPU_GROUPS[@]} -eq 0 ]]; then + echo "GPU_GROUPS must contain at least one entry" >&2 + exit 1 +fi + +# We assume configs are provided in ./configs/ +BASE_DIR=$(dirname "$(realpath "$0")") +CONFIG_DIR="${BASE_DIR}/configs" + +# Set up template config path. +TEMPLATE_PATH="${CONFIG_DIR}/seqrec/template.yaml" + +# Set up search config path options. +# SCALING_SCALES=("xxs" "xs" "s" "b" "l" "xl" "xxl") +SCALING_SCALES=("s" "b") +SEARCH_PREFIX="${CONFIG_DIR}/configs/seqrec/scaling/sasrec_bce_movielens/sasrec_bce_movielens" + +# Set up output directory. +OUTPUT_ROOT="${BASE_DIR}/../outputs/seqrec/scaling/sasrec_bce_movielens" + +# Set up main module to run. +MAIN_MODULE="genrec.main_seqrec" + +# Optional dryrun/rerun controls (set to desired exp_id or leave empty). +DRYRUN_EXP_ID="${DRYRUN_EXP_ID:-}" +RERUN_EXP_ID="${RERUN_EXP_ID:-}" + +if [[ -n "$DRYRUN_EXP_ID" && -n "$RERUN_EXP_ID" ]]; then + echo "Set either DRYRUN_EXP_ID or RERUN_EXP_ID, not both." >&2 + exit 1 +fi + +# If dryrun or rerun is specified, adjust SEARCH_PATH accordingly. +if [[ -n "$DRYRUN_EXP_ID" ]]; then + SEARCH_PATH="${OUTPUT_ROOT}/${DRYRUN_EXP_ID}/search.yaml" +elif [[ -n "$RERUN_EXP_ID" ]]; then + SEARCH_PATH="${OUTPUT_ROOT}/${RERUN_EXP_ID}/search.yaml" +fi + +EXTRA_ARGS=() +for grp in "${GPU_GROUPS[@]}"; do + EXTRA_ARGS+=(--gpu_groups "$grp") +done + +if [[ -n "$DRYRUN_EXP_ID" ]]; then + EXTRA_ARGS+=(--dryrun "$DRYRUN_EXP_ID") +elif [[ -n "$RERUN_EXP_ID" ]]; then + EXTRA_ARGS+=(--rerun "$RERUN_EXP_ID") +fi + +run_grid_search() { + local search_path="$1" + # Executes one grid search invocation for the provided search config. + poetry run python scripts/grid_search.py \ + --template "${TEMPLATE_PATH}" \ + --search "${search_path}" \ + --main "${MAIN_MODULE}" \ + --output_root "${OUTPUT_ROOT}" \ + "${EXTRA_ARGS[@]}" +} + +if [[ -n "$DRYRUN_EXP_ID" || -n "$RERUN_EXP_ID" ]]; then + run_grid_search "${SEARCH_PATH}" +else + for scale in "${SCALING_SCALES[@]}"; do + run_grid_search "${SEARCH_PREFIX}_${scale}.yaml" + done +fi diff --git a/scripts/configs/seqrec/scaling/sasrec_bce_amazons/sasrec_bce_amazon_b.yaml b/scripts/configs/seqrec/scaling/sasrec_bce_amazons/sasrec_bce_amazon_b.yaml new file mode 100644 index 0000000..4fdcdeb --- /dev/null +++ b/scripts/configs/seqrec/scaling/sasrec_bce_amazons/sasrec_bce_amazon_b.yaml @@ -0,0 +1,77 @@ +# SASRec (Base) + BCE on Amazon-2018 + +# global settings +pretrained_ckpt: null # optional path to a pretrained checkpoint to load +test_eval: false # whether to run evaluation on the test set instead of validation set +save_predictions: false # whether to save the predictions on the test set + +# dataset settings +dataset: + type: seqrec + + # specific path to interaction data file for Amazon-2018 dataset. + search__interaction_data_path: + - ./data/amazon2018/proc/user2item.pkl + + max_seq_length: 100 + +# collator settings +collator: + type: seqrec + + num_negative_samples: 16 + negative_sampling_strategy: uniform + +# model settings +model: + type: sasrec + + config: + # base model parameters + search__hidden_size: [512] + search__num_attention_heads: [8] + search__num_hidden_layers: [6] + + # subclass model parameters + search__attention_dropout: [0.1] + +# trainer settings +trainer: + type: bce + + config: + # training arguments - Run control + do_train: true + do_eval: true + do_predict: true + + # training arguments - Optimization & schedule + search__num_train_epochs: [100] + per_device_train_batch_size: 256 + per_device_eval_batch_size: 512 + gradient_accumulation_steps: 1 # batch_size = per_device_train_batch_size * num_devices * gradient_accumulation_steps + search__learning_rate: [1.0e-3] + search__weight_decay: [0.1] + lr_scheduler_type: cosine + warmup_ratio: 0.05 + + # training arguments - Evaluation & checkpointing + metric_for_best_model: ndcg@5 # should exist in the metrics + + # training arguments - Parallelism & precision + dataloader_num_workers: 4 + gradient_checkpointing: true + bf16: true + tf32: false + + # base trainer parameters + norm_embeddings: false # whether to L2-normalize user and item embeddings + eval_interval: 5 # run metrics every epoch + metrics: + - ["hr", {}] + - ["ndcg", {}] + - ["popularity", {p: [0.1, 0.2]}] + - ["unpopularity", {p: [0.2, 0.4]}] + model_loss_weight: 1.0 + top_k: [1, 5, 10] + diff --git a/scripts/configs/seqrec/scaling/sasrec_bce_amazons/sasrec_bce_amazon_l.yaml b/scripts/configs/seqrec/scaling/sasrec_bce_amazons/sasrec_bce_amazon_l.yaml new file mode 100644 index 0000000..e32e018 --- /dev/null +++ b/scripts/configs/seqrec/scaling/sasrec_bce_amazons/sasrec_bce_amazon_l.yaml @@ -0,0 +1,77 @@ +# SASRec (Large) + BCE on Amazon-2018 + +# global settings +pretrained_ckpt: null # optional path to a pretrained checkpoint to load +test_eval: false # whether to run evaluation on the test set instead of validation set +save_predictions: false # whether to save the predictions on the test set + +# dataset settings +dataset: + type: seqrec + + # specific path to interaction data file for Amazon-2018 dataset. + search__interaction_data_path: + - ./data/amazon2018/proc/user2item.pkl + + max_seq_length: 100 + +# collator settings +collator: + type: seqrec + + num_negative_samples: 16 + negative_sampling_strategy: uniform + +# model settings +model: + type: sasrec + + config: + # base model parameters + search__hidden_size: [768] + search__num_attention_heads: [12] + search__num_hidden_layers: [8] + + # subclass model parameters + search__attention_dropout: [0.1] + +# trainer settings +trainer: + type: bce + + config: + # training arguments - Run control + do_train: true + do_eval: true + do_predict: true + + # training arguments - Optimization & schedule + search__num_train_epochs: [100] + per_device_train_batch_size: 256 + per_device_eval_batch_size: 512 + gradient_accumulation_steps: 1 # batch_size = per_device_train_batch_size * num_devices * gradient_accumulation_steps + search__learning_rate: [5.0e-4] + search__weight_decay: [0.1] + lr_scheduler_type: cosine + warmup_ratio: 0.05 + + # training arguments - Evaluation & checkpointing + metric_for_best_model: ndcg@5 # should exist in the metrics + + # training arguments - Parallelism & precision + dataloader_num_workers: 4 + gradient_checkpointing: true + bf16: true + tf32: false + + # base trainer parameters + norm_embeddings: false # whether to L2-normalize user and item embeddings + eval_interval: 5 # run metrics every epoch + metrics: + - ["hr", {}] + - ["ndcg", {}] + - ["popularity", {p: [0.1, 0.2]}] + - ["unpopularity", {p: [0.2, 0.4]}] + model_loss_weight: 1.0 + top_k: [1, 5, 10] + diff --git a/scripts/configs/seqrec/scaling/sasrec_bce_amazons/sasrec_bce_amazon_s.yaml b/scripts/configs/seqrec/scaling/sasrec_bce_amazons/sasrec_bce_amazon_s.yaml new file mode 100644 index 0000000..a92bfe3 --- /dev/null +++ b/scripts/configs/seqrec/scaling/sasrec_bce_amazons/sasrec_bce_amazon_s.yaml @@ -0,0 +1,77 @@ +# SASRec (Small) + BCE on Amazon-2018 + +# global settings +pretrained_ckpt: null # optional path to a pretrained checkpoint to load +test_eval: false # whether to run evaluation on the test set instead of validation set +save_predictions: false # whether to save the predictions on the test set + +# dataset settings +dataset: + type: seqrec + + # specific path to interaction data file for Amazon-2018 dataset. + search__interaction_data_path: + - ./data/amazon2018/proc/user2item.pkl + + max_seq_length: 100 + +# collator settings +collator: + type: seqrec + + num_negative_samples: 16 + negative_sampling_strategy: uniform + +# model settings +model: + type: sasrec + + config: + # base model parameters + search__hidden_size: [256] + search__num_attention_heads: [4] + search__num_hidden_layers: [4] + + # subclass model parameters + search__attention_dropout: [0.1] + +# trainer settings +trainer: + type: bce + + config: + # training arguments - Run control + do_train: true + do_eval: true + do_predict: true + + # training arguments - Optimization & schedule + search__num_train_epochs: [100] + per_device_train_batch_size: 256 + per_device_eval_batch_size: 512 + gradient_accumulation_steps: 1 # batch_size = per_device_train_batch_size * num_devices * gradient_accumulation_steps + search__learning_rate: [1.0e-3] + search__weight_decay: [0.1] + lr_scheduler_type: cosine + warmup_ratio: 0.05 + + # training arguments - Evaluation & checkpointing + metric_for_best_model: ndcg@5 # should exist in the metrics + + # training arguments - Parallelism & precision + dataloader_num_workers: 4 + gradient_checkpointing: true + bf16: true + tf32: false + + # base trainer parameters + norm_embeddings: false # whether to L2-normalize user and item embeddings + eval_interval: 5 # run metrics every epoch + metrics: + - ["hr", {}] + - ["ndcg", {}] + - ["popularity", {p: [0.1, 0.2]}] + - ["unpopularity", {p: [0.2, 0.4]}] + model_loss_weight: 1.0 + top_k: [1, 5, 10] + diff --git a/scripts/configs/seqrec/scaling/sasrec_bce_amazons/sasrec_bce_amazon_xl.yaml b/scripts/configs/seqrec/scaling/sasrec_bce_amazons/sasrec_bce_amazon_xl.yaml new file mode 100644 index 0000000..2bccc65 --- /dev/null +++ b/scripts/configs/seqrec/scaling/sasrec_bce_amazons/sasrec_bce_amazon_xl.yaml @@ -0,0 +1,77 @@ +# SASRec (XL) + BCE on Amazon-2018 + +# global settings +pretrained_ckpt: null # optional path to a pretrained checkpoint to load +test_eval: false # whether to run evaluation on the test set instead of validation set +save_predictions: false # whether to save the predictions on the test set + +# dataset settings +dataset: + type: seqrec + + # specific path to interaction data file for Amazon-2018 dataset. + search__interaction_data_path: + - ./data/amazon2018/proc/user2item.pkl + + max_seq_length: 100 + +# collator settings +collator: + type: seqrec + + num_negative_samples: 16 + negative_sampling_strategy: uniform + +# model settings +model: + type: sasrec + + config: + # base model parameters + search__hidden_size: [1024] + search__num_attention_heads: [16] + search__num_hidden_layers: [10] + + # subclass model parameters + search__attention_dropout: [0.1] + +# trainer settings +trainer: + type: bce + + config: + # training arguments - Run control + do_train: true + do_eval: true + do_predict: true + + # training arguments - Optimization & schedule + search__num_train_epochs: [100] + per_device_train_batch_size: 256 + per_device_eval_batch_size: 512 + gradient_accumulation_steps: 1 # batch_size = per_device_train_batch_size * num_devices * gradient_accumulation_steps + search__learning_rate: [1.0e-4] + search__weight_decay: [0.1] + lr_scheduler_type: cosine + warmup_ratio: 0.05 + + # training arguments - Evaluation & checkpointing + metric_for_best_model: ndcg@5 # should exist in the metrics + + # training arguments - Parallelism & precision + dataloader_num_workers: 4 + gradient_checkpointing: true + bf16: true + tf32: false + + # base trainer parameters + norm_embeddings: false # whether to L2-normalize user and item embeddings + eval_interval: 5 # run metrics every epoch + metrics: + - ["hr", {}] + - ["ndcg", {}] + - ["popularity", {p: [0.1, 0.2]}] + - ["unpopularity", {p: [0.2, 0.4]}] + model_loss_weight: 1.0 + top_k: [1, 5, 10] + diff --git a/scripts/configs/seqrec/scaling/sasrec_bce_amazons/sasrec_bce_amazon_xs.yaml b/scripts/configs/seqrec/scaling/sasrec_bce_amazons/sasrec_bce_amazon_xs.yaml new file mode 100644 index 0000000..1d79e03 --- /dev/null +++ b/scripts/configs/seqrec/scaling/sasrec_bce_amazons/sasrec_bce_amazon_xs.yaml @@ -0,0 +1,77 @@ +# SASRec (XS) + BCE on Amazon-2018 + +# global settings +pretrained_ckpt: null # optional path to a pretrained checkpoint to load +test_eval: false # whether to run evaluation on the test set instead of validation set +save_predictions: false # whether to save the predictions on the test set + +# dataset settings +dataset: + type: seqrec + + # specific path to interaction data file for Amazon-2018 dataset. + search__interaction_data_path: + - ./data/amazon2018/proc/user2item.pkl + + max_seq_length: 100 + +# collator settings +collator: + type: seqrec + + num_negative_samples: 16 + negative_sampling_strategy: uniform + +# model settings +model: + type: sasrec + + config: + # base model parameters + search__hidden_size: [128] + search__num_attention_heads: [2] + search__num_hidden_layers: [2] + + # subclass model parameters + search__attention_dropout: [0.1] + +# trainer settings +trainer: + type: bce + + config: + # training arguments - Run control + do_train: true + do_eval: true + do_predict: true + + # training arguments - Optimization & schedule + search__num_train_epochs: [100] + per_device_train_batch_size: 256 + per_device_eval_batch_size: 512 + gradient_accumulation_steps: 1 # batch_size = per_device_train_batch_size * num_devices * gradient_accumulation_steps + search__learning_rate: [1.0e-3] + search__weight_decay: [0.1] + lr_scheduler_type: cosine + warmup_ratio: 0.05 + + # training arguments - Evaluation & checkpointing + metric_for_best_model: ndcg@5 # should exist in the metrics + + # training arguments - Parallelism & precision + dataloader_num_workers: 4 + gradient_checkpointing: true + bf16: true + tf32: false + + # base trainer parameters + norm_embeddings: false # whether to L2-normalize user and item embeddings + eval_interval: 5 # run metrics every epoch + metrics: + - ["hr", {}] + - ["ndcg", {}] + - ["popularity", {p: [0.1, 0.2]}] + - ["unpopularity", {p: [0.2, 0.4]}] + model_loss_weight: 1.0 + top_k: [1, 5, 10] + diff --git a/scripts/configs/seqrec/scaling/sasrec_bce_amazons/sasrec_bce_amazon_xxl.yaml b/scripts/configs/seqrec/scaling/sasrec_bce_amazons/sasrec_bce_amazon_xxl.yaml new file mode 100644 index 0000000..5661ebc --- /dev/null +++ b/scripts/configs/seqrec/scaling/sasrec_bce_amazons/sasrec_bce_amazon_xxl.yaml @@ -0,0 +1,77 @@ +# SASRec (XXL) + BCE on Amazon-2018 + +# global settings +pretrained_ckpt: null # optional path to a pretrained checkpoint to load +test_eval: false # whether to run evaluation on the test set instead of validation set +save_predictions: false # whether to save the predictions on the test set + +# dataset settings +dataset: + type: seqrec + + # specific path to interaction data file for Amazon-2018 dataset. + search__interaction_data_path: + - ./data/amazon2018/proc/user2item.pkl + + max_seq_length: 100 + +# collator settings +collator: + type: seqrec + + num_negative_samples: 16 + negative_sampling_strategy: uniform + +# model settings +model: + type: sasrec + + config: + # base model parameters + search__hidden_size: [1536] + search__num_attention_heads: [24] + search__num_hidden_layers: [12] + + # subclass model parameters + search__attention_dropout: [0.1] + +# trainer settings +trainer: + type: bce + + config: + # training arguments - Run control + do_train: true + do_eval: true + do_predict: true + + # training arguments - Optimization & schedule + search__num_train_epochs: [100] + per_device_train_batch_size: 256 + per_device_eval_batch_size: 512 + gradient_accumulation_steps: 1 # batch_size = per_device_train_batch_size * num_devices * gradient_accumulation_steps + search__learning_rate: [1.0e-4] + search__weight_decay: [0.1] + lr_scheduler_type: cosine + warmup_ratio: 0.05 + + # training arguments - Evaluation & checkpointing + metric_for_best_model: ndcg@5 # should exist in the metrics + + # training arguments - Parallelism & precision + dataloader_num_workers: 4 + gradient_checkpointing: true + bf16: true + tf32: false + + # base trainer parameters + norm_embeddings: false # whether to L2-normalize user and item embeddings + eval_interval: 5 # run metrics every epoch + metrics: + - ["hr", {}] + - ["ndcg", {}] + - ["popularity", {p: [0.1, 0.2]}] + - ["unpopularity", {p: [0.2, 0.4]}] + model_loss_weight: 1.0 + top_k: [1, 5, 10] + diff --git a/scripts/configs/seqrec/scaling/sasrec_bce_amazons/sasrec_bce_amazon_xxs.yaml b/scripts/configs/seqrec/scaling/sasrec_bce_amazons/sasrec_bce_amazon_xxs.yaml new file mode 100644 index 0000000..691c9af --- /dev/null +++ b/scripts/configs/seqrec/scaling/sasrec_bce_amazons/sasrec_bce_amazon_xxs.yaml @@ -0,0 +1,77 @@ +# SASRec (XXS) + BCE on Amazon-2018 + +# global settings +pretrained_ckpt: null # optional path to a pretrained checkpoint to load +test_eval: false # whether to run evaluation on the test set instead of validation set +save_predictions: false # whether to save the predictions on the test set + +# dataset settings +dataset: + type: seqrec + + # specific path to interaction data file for Amazon-2018 dataset. + search__interaction_data_path: + - ./data/amazon2018/proc/user2item.pkl + + max_seq_length: 100 + +# collator settings +collator: + type: seqrec + + num_negative_samples: 16 + negative_sampling_strategy: uniform + +# model settings +model: + type: sasrec + + config: + # base model parameters + search__hidden_size: [64] + search__num_attention_heads: [1] + search__num_hidden_layers: [1] + + # subclass model parameters + search__attention_dropout: [0.1] + +# trainer settings +trainer: + type: bce + + config: + # training arguments - Run control + do_train: true + do_eval: true + do_predict: true + + # training arguments - Optimization & schedule + search__num_train_epochs: [100] + per_device_train_batch_size: 256 + per_device_eval_batch_size: 512 + gradient_accumulation_steps: 1 # batch_size = per_device_train_batch_size * num_devices * gradient_accumulation_steps + search__learning_rate: [1.0e-3] + search__weight_decay: [0.1] + lr_scheduler_type: cosine + warmup_ratio: 0.05 + + # training arguments - Evaluation & checkpointing + metric_for_best_model: ndcg@5 # should exist in the metrics + + # training arguments - Parallelism & precision + dataloader_num_workers: 4 + gradient_checkpointing: true + bf16: true + tf32: false + + # base trainer parameters + norm_embeddings: false # whether to L2-normalize user and item embeddings + eval_interval: 5 # run metrics every epoch + metrics: + - ["hr", {}] + - ["ndcg", {}] + - ["popularity", {p: [0.1, 0.2]}] + - ["unpopularity", {p: [0.2, 0.4]}] + model_loss_weight: 1.0 + top_k: [1, 5, 10] + diff --git a/scripts/configs/seqrec/scaling/sasrec_bce_movielens/sasrec_bce_movielens_b.yaml b/scripts/configs/seqrec/scaling/sasrec_bce_movielens/sasrec_bce_movielens_b.yaml new file mode 100644 index 0000000..4cf577f --- /dev/null +++ b/scripts/configs/seqrec/scaling/sasrec_bce_movielens/sasrec_bce_movielens_b.yaml @@ -0,0 +1,76 @@ +# SASRec (Base) + BCE on Movielens-20M + +# global settings +pretrained_ckpt: null # optional path to a pretrained checkpoint to load +test_eval: false # whether to run evaluation on the test set instead of validation set +save_predictions: false # whether to save the predictions on the test set + +# dataset settings +dataset: + type: seqrec + + # specific path to interaction data file for Movielens-20M dataset. + search__interaction_data_path: + - ./data/movielens-20m/proc/user2item.pkl + + max_seq_length: 200 + +# collator settings +collator: + type: seqrec + + num_negative_samples: 32 + negative_sampling_strategy: uniform + +# model settings +model: + type: sasrec + + config: + # base model parameters + search__hidden_size: [512] + search__num_attention_heads: [8] + search__num_hidden_layers: [6] + + # subclass model parameters + search__attention_dropout: [0.1, 0.2, 0.4] + +# trainer settings +trainer: + type: bce + + config: + # training arguments - Run control + do_train: true + do_eval: true + do_predict: true + + # training arguments - Optimization & schedule + search__num_train_epochs: [100] + per_device_train_batch_size: 128 + per_device_eval_batch_size: 256 + gradient_accumulation_steps: 1 # batch_size = per_device_train_batch_size * num_devices * gradient_accumulation_steps + search__learning_rate: [5.0e-4, 1.0e-3] + search__weight_decay: [0.1, 0.2, 0.05] + lr_scheduler_type: cosine + warmup_ratio: 0.05 + + # training arguments - Evaluation & checkpointing + metric_for_best_model: ndcg@5 # should exist in the metrics + + # training arguments - Parallelism & precision + dataloader_num_workers: 4 + gradient_checkpointing: true + bf16: true + tf32: false + + # base trainer parameters + norm_embeddings: false # whether to L2-normalize user and item embeddings + eval_interval: 5 # run metrics every epoch + metrics: + - ["hr", {}] + - ["ndcg", {}] + - ["popularity", {p: [0.05, 0.1]}] + - ["unpopularity", {p: [0.6, 0.8]}] + model_loss_weight: 1.0 + top_k: [1, 5, 10] diff --git a/scripts/configs/seqrec/scaling/sasrec_bce_movielens/sasrec_bce_movielens_l.yaml b/scripts/configs/seqrec/scaling/sasrec_bce_movielens/sasrec_bce_movielens_l.yaml new file mode 100644 index 0000000..0ae974a --- /dev/null +++ b/scripts/configs/seqrec/scaling/sasrec_bce_movielens/sasrec_bce_movielens_l.yaml @@ -0,0 +1,76 @@ +# SASRec (Large) + BCE on Movielens-20M + +# global settings +pretrained_ckpt: null # optional path to a pretrained checkpoint to load +test_eval: false # whether to run evaluation on the test set instead of validation set +save_predictions: false # whether to save the predictions on the test set + +# dataset settings +dataset: + type: seqrec + + # specific path to interaction data file for Movielens-20M dataset. + search__interaction_data_path: + - ./data/movielens-20m/proc/user2item.pkl + + max_seq_length: 200 + +# collator settings +collator: + type: seqrec + + num_negative_samples: 32 + negative_sampling_strategy: uniform + +# model settings +model: + type: sasrec + + config: + # base model parameters + search__hidden_size: [768] + search__num_attention_heads: [12] + search__num_hidden_layers: [8] + + # subclass model parameters + search__attention_dropout: [0.2] + +# trainer settings +trainer: + type: bce + + config: + # training arguments - Run control + do_train: true + do_eval: true + do_predict: true + + # training arguments - Optimization & schedule + search__num_train_epochs: [100] + per_device_train_batch_size: 128 + per_device_eval_batch_size: 256 + gradient_accumulation_steps: 1 # batch_size = per_device_train_batch_size * num_devices * gradient_accumulation_steps + search__learning_rate: [5.0e-4] + search__weight_decay: [0.1] + lr_scheduler_type: cosine + warmup_ratio: 0.05 + + # training arguments - Evaluation & checkpointing + metric_for_best_model: ndcg@5 # should exist in the metrics + + # training arguments - Parallelism & precision + dataloader_num_workers: 4 + gradient_checkpointing: true + bf16: true + tf32: false + + # base trainer parameters + norm_embeddings: false # whether to L2-normalize user and item embeddings + eval_interval: 5 # run metrics every epoch + metrics: + - ["hr", {}] + - ["ndcg", {}] + - ["popularity", {p: [0.05, 0.1]}] + - ["unpopularity", {p: [0.6, 0.8]}] + model_loss_weight: 1.0 + top_k: [1, 5, 10] diff --git a/scripts/configs/seqrec/scaling/sasrec_bce_movielens/sasrec_bce_movielens_s.yaml b/scripts/configs/seqrec/scaling/sasrec_bce_movielens/sasrec_bce_movielens_s.yaml new file mode 100644 index 0000000..321198e --- /dev/null +++ b/scripts/configs/seqrec/scaling/sasrec_bce_movielens/sasrec_bce_movielens_s.yaml @@ -0,0 +1,76 @@ +# SASRec (Small) + BCE on Movielens-20M + +# global settings +pretrained_ckpt: null # optional path to a pretrained checkpoint to load +test_eval: false # whether to run evaluation on the test set instead of validation set +save_predictions: false # whether to save the predictions on the test set + +# dataset settings +dataset: + type: seqrec + + # specific path to interaction data file for Movielens-20M dataset. + search__interaction_data_path: + - ./data/movielens-20m/proc/user2item.pkl + + max_seq_length: 200 + +# collator settings +collator: + type: seqrec + + num_negative_samples: 32 + negative_sampling_strategy: uniform + +# model settings +model: + type: sasrec + + config: + # base model parameters + search__hidden_size: [256] + search__num_attention_heads: [4] + search__num_hidden_layers: [4] + + # subclass model parameters + search__attention_dropout: [0, 0.2] + +# trainer settings +trainer: + type: bce + + config: + # training arguments - Run control + do_train: true + do_eval: true + do_predict: true + + # training arguments - Optimization & schedule + search__num_train_epochs: [100] + per_device_train_batch_size: 128 + per_device_eval_batch_size: 256 + gradient_accumulation_steps: 1 # batch_size = per_device_train_batch_size * num_devices * gradient_accumulation_steps + search__learning_rate: [1.0e-3] + search__weight_decay: [0.1, 0.05, 0.01] + lr_scheduler_type: cosine + warmup_ratio: 0.05 + + # training arguments - Evaluation & checkpointing + metric_for_best_model: ndcg@5 # should exist in the metrics + + # training arguments - Parallelism & precision + dataloader_num_workers: 4 + gradient_checkpointing: true + bf16: true + tf32: false + + # base trainer parameters + norm_embeddings: false # whether to L2-normalize user and item embeddings + eval_interval: 5 # run metrics every epoch + metrics: + - ["hr", {}] + - ["ndcg", {}] + - ["popularity", {p: [0.05, 0.1]}] + - ["unpopularity", {p: [0.6, 0.8]}] + model_loss_weight: 1.0 + top_k: [1, 5, 10] diff --git a/scripts/configs/seqrec/scaling/sasrec_bce_movielens/sasrec_bce_movielens_xl.yaml b/scripts/configs/seqrec/scaling/sasrec_bce_movielens/sasrec_bce_movielens_xl.yaml new file mode 100644 index 0000000..32906d3 --- /dev/null +++ b/scripts/configs/seqrec/scaling/sasrec_bce_movielens/sasrec_bce_movielens_xl.yaml @@ -0,0 +1,76 @@ +# SASRec (XL) + BCE on Movielens-20M + +# global settings +pretrained_ckpt: null # optional path to a pretrained checkpoint to load +test_eval: false # whether to run evaluation on the test set instead of validation set +save_predictions: false # whether to save the predictions on the test set + +# dataset settings +dataset: + type: seqrec + + # specific path to interaction data file for Movielens-20M dataset. + search__interaction_data_path: + - ./data/movielens-20m/proc/user2item.pkl + + max_seq_length: 200 + +# collator settings +collator: + type: seqrec + + num_negative_samples: 32 + negative_sampling_strategy: uniform + +# model settings +model: + type: sasrec + + config: + # base model parameters + search__hidden_size: [1024] + search__num_attention_heads: [16] + search__num_hidden_layers: [10] + + # subclass model parameters + search__attention_dropout: [0.2] + +# trainer settings +trainer: + type: bce + + config: + # training arguments - Run control + do_train: true + do_eval: true + do_predict: true + + # training arguments - Optimization & schedule + search__num_train_epochs: [100] + per_device_train_batch_size: 128 + per_device_eval_batch_size: 256 + gradient_accumulation_steps: 1 # batch_size = per_device_train_batch_size * num_devices * gradient_accumulation_steps + search__learning_rate: [1.0e-4] + search__weight_decay: [0.1] + lr_scheduler_type: cosine + warmup_ratio: 0.05 + + # training arguments - Evaluation & checkpointing + metric_for_best_model: ndcg@5 # should exist in the metrics + + # training arguments - Parallelism & precision + dataloader_num_workers: 4 + gradient_checkpointing: true + bf16: true + tf32: false + + # base trainer parameters + norm_embeddings: false # whether to L2-normalize user and item embeddings + eval_interval: 5 # run metrics every epoch + metrics: + - ["hr", {}] + - ["ndcg", {}] + - ["popularity", {p: [0.05, 0.1]}] + - ["unpopularity", {p: [0.6, 0.8]}] + model_loss_weight: 1.0 + top_k: [1, 5, 10] diff --git a/scripts/configs/seqrec/scaling/sasrec_bce_movielens/sasrec_bce_movielens_xs.yaml b/scripts/configs/seqrec/scaling/sasrec_bce_movielens/sasrec_bce_movielens_xs.yaml new file mode 100644 index 0000000..aa5bf00 --- /dev/null +++ b/scripts/configs/seqrec/scaling/sasrec_bce_movielens/sasrec_bce_movielens_xs.yaml @@ -0,0 +1,76 @@ +# SASRec (XS) + BCE on Movielens-20M + +# global settings +pretrained_ckpt: null # optional path to a pretrained checkpoint to load +test_eval: false # whether to run evaluation on the test set instead of validation set +save_predictions: false # whether to save the predictions on the test set + +# dataset settings +dataset: + type: seqrec + + # specific path to interaction data file for Movielens-20M dataset. + search__interaction_data_path: + - ./data/movielens-20m/proc/user2item.pkl + + max_seq_length: 200 + +# collator settings +collator: + type: seqrec + + num_negative_samples: 32 + negative_sampling_strategy: uniform + +# model settings +model: + type: sasrec + + config: + # base model parameters + search__hidden_size: [128] + search__num_attention_heads: [2] + search__num_hidden_layers: [2] + + # subclass model parameters + search__attention_dropout: [0, 0.2] + +# trainer settings +trainer: + type: bce + + config: + # training arguments - Run control + do_train: true + do_eval: true + do_predict: true + + # training arguments - Optimization & schedule + search__num_train_epochs: [100] + per_device_train_batch_size: 128 + per_device_eval_batch_size: 256 + gradient_accumulation_steps: 1 # batch_size = per_device_train_batch_size * num_devices * gradient_accumulation_steps + search__learning_rate: [1.0e-3] + search__weight_decay: [0.1, 0.05, 0.01] + lr_scheduler_type: cosine + warmup_ratio: 0.05 + + # training arguments - Evaluation & checkpointing + metric_for_best_model: ndcg@5 # should exist in the metrics + + # training arguments - Parallelism & precision + dataloader_num_workers: 4 + gradient_checkpointing: true + bf16: true + tf32: false + + # base trainer parameters + norm_embeddings: false # whether to L2-normalize user and item embeddings + eval_interval: 5 # run metrics every epoch + metrics: + - ["hr", {}] + - ["ndcg", {}] + - ["popularity", {p: [0.05, 0.1]}] + - ["unpopularity", {p: [0.6, 0.8]}] + model_loss_weight: 1.0 + top_k: [1, 5, 10] diff --git a/scripts/configs/seqrec/scaling/sasrec_bce_movielens/sasrec_bce_movielens_xxl.yaml b/scripts/configs/seqrec/scaling/sasrec_bce_movielens/sasrec_bce_movielens_xxl.yaml new file mode 100644 index 0000000..d188c85 --- /dev/null +++ b/scripts/configs/seqrec/scaling/sasrec_bce_movielens/sasrec_bce_movielens_xxl.yaml @@ -0,0 +1,77 @@ +# SASRec (XXL) + BCE on Movielens-20M + +# global settings +pretrained_ckpt: null # optional path to a pretrained checkpoint to load +test_eval: false # whether to run evaluation on the test set instead of validation set +save_predictions: false # whether to save the predictions on the test set + +# dataset settings +dataset: + type: seqrec + + # specific path to interaction data file for Movielens-20M dataset. + search__interaction_data_path: + - ./data/movielens-20m/proc/user2item.pkl + + max_seq_length: 200 + +# collator settings +collator: + type: seqrec + + num_negative_samples: 32 + negative_sampling_strategy: uniform + +# model settings +model: + type: sasrec + + config: + # base model parameters + search__hidden_size: [1536] + search__num_attention_heads: [24] + search__num_hidden_layers: [12] + + # subclass model parameters + search__attention_dropout: [0.2] + +# trainer settings +trainer: + type: bce + + config: + # training arguments - Run control + do_train: true + do_eval: true + do_predict: true + + # training arguments - Optimization & schedule + search__num_train_epochs: [100] + per_device_train_batch_size: 128 + per_device_eval_batch_size: 256 + gradient_accumulation_steps: 1 # batch_size = per_device_train_batch_size * num_devices * gradient_accumulation_steps + search__learning_rate: [1.0e-4] + search__weight_decay: [0.1] + lr_scheduler_type: cosine + warmup_ratio: 0.05 + + # training arguments - Evaluation & checkpointing + metric_for_best_model: ndcg@5 # should exist in the metrics + + # training arguments - Parallelism & precision + dataloader_num_workers: 4 + gradient_checkpointing: true + bf16: true + tf32: false + + # base trainer parameters + norm_embeddings: false # whether to L2-normalize user and item embeddings + eval_interval: 5 # run metrics every epoch + metrics: + - ["hr", {}] + - ["ndcg", {}] + - ["popularity", {p: [0.05, 0.1]}] + - ["unpopularity", {p: [0.6, 0.8]}] + model_loss_weight: 1.0 + top_k: [1, 5, 10] + diff --git a/scripts/configs/seqrec/scaling/sasrec_bce_movielens/sasrec_bce_movielens_xxs.yaml b/scripts/configs/seqrec/scaling/sasrec_bce_movielens/sasrec_bce_movielens_xxs.yaml new file mode 100644 index 0000000..cc522b6 --- /dev/null +++ b/scripts/configs/seqrec/scaling/sasrec_bce_movielens/sasrec_bce_movielens_xxs.yaml @@ -0,0 +1,76 @@ +# SASRec (XXS) + BCE on Movielens-20M + +# global settings +pretrained_ckpt: null # optional path to a pretrained checkpoint to load +test_eval: false # whether to run evaluation on the test set instead of validation set +save_predictions: false # whether to save the predictions on the test set + +# dataset settings +dataset: + type: seqrec + + # specific path to interaction data file for Movielens-20M dataset. + search__interaction_data_path: + - ./data/movielens-20m/proc/user2item.pkl + + max_seq_length: 200 + +# collator settings +collator: + type: seqrec + + num_negative_samples: 32 + negative_sampling_strategy: uniform + +# model settings +model: + type: sasrec + + config: + # base model parameters + search__hidden_size: [64] + search__num_attention_heads: [1] + search__num_hidden_layers: [1] + + # subclass model parameters + search__attention_dropout: [0, 0.2] + +# trainer settings +trainer: + type: bce + + config: + # training arguments - Run control + do_train: true + do_eval: true + do_predict: true + + # training arguments - Optimization & schedule + search__num_train_epochs: [100] + per_device_train_batch_size: 128 + per_device_eval_batch_size: 256 + gradient_accumulation_steps: 1 # batch_size = per_device_train_batch_size * num_devices * gradient_accumulation_steps + search__learning_rate: [1.0e-3] + search__weight_decay: [0.1, 0.05, 0.01] + lr_scheduler_type: cosine + warmup_ratio: 0.05 + + # training arguments - Evaluation & checkpointing + metric_for_best_model: ndcg@5 # should exist in the metrics + + # training arguments - Parallelism & precision + dataloader_num_workers: 4 + gradient_checkpointing: true + bf16: true + tf32: false + + # base trainer parameters + norm_embeddings: false # whether to L2-normalize user and item embeddings + eval_interval: 5 # run metrics every epoch + metrics: + - ["hr", {}] + - ["ndcg", {}] + - ["popularity", {p: [0.05, 0.1]}] + - ["unpopularity", {p: [0.6, 0.8]}] + model_loss_weight: 1.0 + top_k: [1, 5, 10]