diff --git a/docs/releases/2026.rst b/docs/releases/2026.rst index bf3e6759f..786a4447f 100644 --- a/docs/releases/2026.rst +++ b/docs/releases/2026.rst @@ -10,6 +10,18 @@ There are no new major paradigm shifts, though — pipelines, datasets, and components work as they do in the 2025 series, but with more features, some rough corners polished off the interfaces, and hopefully fewer bugs. +.. _2026.2.0: + +2026.2.0 +~~~~~~~~ + +Second release in the 2026 series, bringing a few smaller feature updates: + +- Added :meth:`~lenskit.data.EntityAttribute.list` and + :meth:`~lenskit.data.EntityAttribute.value` accessors to entity attributes. +- Improved typecasting of entity IDs in :meth:`lenskit.data.Vocabulary.numbers`. +- Added ``--releated-items`` option to the ``lenskit recommend`` command. + .. _2026.1.0: 2026.1.0 diff --git a/src/lenskit/cli/recommend.py b/src/lenskit/cli/recommend.py index 68edc7f1d..6c146cc37 100644 --- a/src/lenskit/cli/recommend.py +++ b/src/lenskit/cli/recommend.py @@ -6,14 +6,16 @@ import pickle import sys +from json import dump as dump_json from pathlib import Path import click +from rich import print, print_json from xopen import xopen import lenskit.operations as ops from lenskit import batch -from lenskit.data import Dataset, ItemList, ListILC, UserIDKey +from lenskit.data import ID, Dataset, ItemList, ListILC, RecQuery, UserIDKey from lenskit.logging import Stopwatch, get_logger, item_progress from lenskit.pipeline import PipelineProfiler from lenskit.random import random_generator @@ -30,6 +32,7 @@ help="Output file for recommendations.", ) @click.option("--print/--no-print", "print_recs", default=True, help="Print recommendations.") +@click.option("--json", is_flag=True, help="Print and save recommendations in JSON.") @click.option("-n", "--list-length", type=int, help="Recommendation list length.") @click.option("--batch/--no-batch", "use_batch", default=False, help="Use batch.recommend.") @click.option("-j", "--process-count", type=int, help="Use specified number of worker processes.") @@ -37,13 +40,17 @@ @click.option("-d", "--dataset", metavar="DATA", type=Path, help="Use dataset DATA.") @click.option("-u", "--users-file", type=Path, metavar="FILE", help="Load list of users from FILE.") @click.option("--random-users", type=int, metavar="N", help="Recommend for N random users.") -@click.option("--profile", type=Path, metavar="FILE", help="Profile profile inference.") +@click.option("--profile", type=Path, metavar="FILE", help="Profile inference and save in FILE.") +@click.option( + "--related-items", is_flag=True, help="interpret USERS as items, and recommend related items" +) @click.argument("PIPE_FILE", type=Path) @click.argument("USERS", nargs=-1) def recommend( out_file: Path, users_file: Path | None, print_recs: bool, + json: bool, use_batch: bool, process_count: int | None, use_ray: bool, @@ -51,6 +58,7 @@ def recommend( list_length: int | None, dataset: Path | None, profile: Path | None, + related_items: bool, pipe_file: Path, users: list, ): @@ -83,32 +91,48 @@ def recommend( profiler = PipelineProfiler(pipe, profile) if use_batch or use_ray or process_count is not None: - if use_ray: - n_jobs = "ray" - else: - n_jobs = process_count - - all_recs = batch.recommend(pipe, users, list_length, n_jobs=n_jobs, profiler=profiler) + all_recs = batch.recommend( + pipe, users, list_length, n_jobs=process_count, profiler=profiler + ) else: timer = Stopwatch(start=False) - all_recs = None if out_file is None else ListILC(UserIDKey) + all_recs = None + if out_file is not None or json: + if related_items: + all_recs = ListILC(["ref_item_id"]) + else: + all_recs = ListILC(UserIDKey) + with item_progress("user recommendations", len(users)) as pb: for user in users: - ulog = log.bind(user=user) - ulog.debug("generating single-user recommendations") + if related_items: + ulog = log.bind(ref_item=user) + ulog.debug("generating related product recommendation") + query = RecQuery(context_items=ItemList([user])) + else: + ulog = log.bind(user=user) + ulog.debug("generating single-user recommendations") + query = RecQuery(user_id=user) + with timer.measure(accumulate=True): - recs = ops.recommend(pipe, user, list_length, profiler=profiler) + recs = ops.recommend(pipe, query, list_length, profiler=profiler) + ulog.info( - "recommended for user", + "generated recommendation list", length=len(recs), time="{:.1f}ms".format(timer.elapsed(accumulated=False) * 1000), ) if all_recs is not None: - all_recs.add(recs, user_id=user) + all_recs.add(recs, user) - if print_recs: - print_recommendation_list(recs, data) + if print_recs and not json: + print_recommendation_list( + recs, + data, + user=None if related_items else user, + ref_item=user if related_items else None, + ) pb.update() @@ -122,21 +146,46 @@ def recommend( if profiler is not None: profiler.close() + if print_recs and json: + print_json(all_recs.to_json()) + if out_file is not None: assert all_recs is not None log.info("saving recommendations to %s", str(out_file), count=len(all_recs)) - all_recs.save_parquet(out_file) + if json: + with open(out_file, "w") as jsf: + dump_json(all_recs.to_json_data(object=True), jsf) + jsf.write("\n") + else: + all_recs.save_parquet(out_file) -def print_recommendation_list(recs: ItemList, data: Dataset | None): +def print_recommendation_list( + recs: ItemList, + data: Dataset | None, + *, + ref_item: ID | None = None, + user: ID | None = None, +): + ref_title = None titles = None if data is not None: items = data.entities("item") if "title" in items.attributes: titles = items.select(ids=recs.ids()).attribute("title").pandas() + if ref_item is not None: + ref_title = items.select(ids=[ref_item]).attribute("title").value() + + if user is not None: + print("recommendations for user {}:".format(user)) + elif ref_item is not None: + if ref_title is not None: + print("related items for item {} ([italic]{}[/italic]):".format(ref_item, ref_title)) + else: + print("related items for item {}:".format(ref_item)) for item in recs.ids(): if titles is not None: - print("item {}: {}".format(item, titles.loc[item])) + print(" [bold]item {}[/bold]: [italic]{}[/italic]".format(item, titles.loc[item])) else: - print("item {}".format(item)) + print(" [bold]item {}[/bold]".format(item)) diff --git a/tests/cli/test-train.sh b/tests/cli/test-train-recommend.sh similarity index 74% rename from tests/cli/test-train.sh rename to tests/cli/test-train-recommend.sh index c5952ca4d..30b0861ed 100644 --- a/tests/cli/test-train.sh +++ b/tests/cli/test-train-recommend.sh @@ -2,6 +2,7 @@ set -eo pipefail data="$TEST_WORK/ml-data" out="$TEST_WORK/als.pkl.gz" +recs="$TEST_WORK/recs.json" cat >>"$TEST_WORK/verify.py" <