Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion scripts/prepare_hidden_states.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,18 @@ def parse_args():
help="Number of files per subdirectory.",
)

# vlm related args
vlm_group = parser.add_argument_group("vlm")
vlm_group.add_argument(
"--min-pixels", type=int, default=50176
) # 64*28*28 for qwen2.5-vl
vlm_group.add_argument(
"--max-pixels", type=int, default=802816
) # 1024*28*28 for qwen2.5-vl
Comment on lines +143 to +148
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

It's good practice to add help strings to command-line arguments to explain their purpose. This improves the script's usability and maintainability. The formatting of these calls can also be improved to be more conventional and readable, similar to what code formatters like black would produce.

Suggested change
vlm_group.add_argument(
"--min-pixels", type=int, default=50176
) # 64*28*28 for qwen2.5-vl
vlm_group.add_argument(
"--max-pixels", type=int, default=802816
) # 1024*28*28 for qwen2.5-vl
vlm_group.add_argument(
"--min-pixels",
type=int,
default=50176,
help="The minimum number of pixels for VLM processing.",
) # 64*28*28 for qwen2.5-vl
vlm_group.add_argument(
"--max-pixels",
type=int,
default=802816,
help="The maximum number of pixels for VLM processing.",
) # 1024*28*28 for qwen2.5-vl


sglang_group = parser.add_argument_group("sglang")
SGLangBackendArgs.add_args(sglang_group)

return parser.parse_args()


Expand Down Expand Up @@ -187,7 +197,11 @@ def build_target_model(
target_model.set_aux_hidden_states_layers(args.aux_hidden_states_layers)

if args.is_vlm:
processor = AutoProcessor.from_pretrained(args.target_model_path)
processor = AutoProcessor.from_pretrained(
args.target_model_path,
min_pixels=args.min_pixels,
max_pixels=args.max_pixels,
)
else:
processor = None

Expand Down Expand Up @@ -583,6 +597,8 @@ def main():
args.target_model_path, trust_remote_code=True
)
cache_params_string = f"{args.data_path}-{args.max_length}-{args.chat_template}-{args.target_model_path}-{args.num_samples}-{args.is_preformatted}"
if args.is_vlm:
cache_params_string = f'{cache_params_string}-{args.min_pixels}-{args.max_pixels}'
cache_key = hashlib.md5(cache_params_string.encode()).hexdigest()

# Preprocess on complete, un-sharded dataset
Expand Down
Loading