From 52ef10c67597a98d252e0b678cac8bf603dcb70e Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Sat, 3 Jan 2026 20:38:42 +0000 Subject: [PATCH] feat: add tqdm progress bar to CLI cleaning operations - Replaced periodic logging with `tqdm` progress bar in `yogimass/processing.py` - Added `tqdm` to `pyproject.toml` dependencies - Removed unused `LOG_INTERVAL` constant This improves the CLI UX by providing real-time progress feedback and ETA for long-running cleaning operations. --- pyproject.toml | 1 + yogimass/processing.py | 22 +++++++--------------- 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index af8d045..9cabb73 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,7 @@ dependencies = [ "matchms>=0.23", "numpy>=1.24", "PyYAML>=6.0", + "tqdm>=4.66", ] [project.optional-dependencies] diff --git a/yogimass/processing.py b/yogimass/processing.py index 1b923c2..7a5b2fd 100644 --- a/yogimass/processing.py +++ b/yogimass/processing.py @@ -5,6 +5,7 @@ from __future__ import annotations import logging +from tqdm import tqdm from matchms.importing import load_from_mgf, load_from_msp from matchms.filtering import ( default_filters, @@ -25,9 +26,6 @@ logger = logging.getLogger(__name__) -# Interval for progress logging -LOG_INTERVAL = 1000 - def metadata_processing(spectrum): """ @@ -78,19 +76,16 @@ def clean_mgf_library(mgf_path: str) -> list: """ logger.info(f"Cleaning {mgf_path} library spectra...") library_list = list(load_from_mgf(mgf_path)) - + # Apply filters sequentially processed_spectra = [] - for i, s in enumerate(library_list): - if (i + 1) % LOG_INTERVAL == 0: - logger.info(f"Processed {i + 1} / {len(library_list)} spectra...") - + for s in tqdm(library_list, desc="Cleaning spectra", unit="spectrum"): meta_processed = metadata_processing(s) if meta_processed: peak_processed = peak_processing(meta_processed) if peak_processed: processed_spectra.append(peak_processed) - + logger.info(f"Retained {len(processed_spectra)} spectra after cleaning.") return processed_spectra @@ -101,17 +96,14 @@ def clean_msp_library(msp_path: str) -> list: """ logger.info(f"Cleaning {msp_path} library spectra...") library_list = list(load_from_msp(msp_path)) - - processed_spectra = [] - for i, s in enumerate(library_list): - if (i + 1) % LOG_INTERVAL == 0: - logger.info(f"Processed {i + 1} / {len(library_list)} spectra...") + processed_spectra = [] + for s in tqdm(library_list, desc="Cleaning spectra", unit="spectrum"): meta_processed = metadata_processing(s) if meta_processed: peak_processed = peak_processing(meta_processed) if peak_processed: processed_spectra.append(peak_processed) - + logger.info(f"Retained {len(processed_spectra)} spectra after cleaning.") return processed_spectra