From c817ec83f807049e06ac8d6414f4c82318854a76 Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Thu, 23 Apr 2026 14:35:13 -0400 Subject: [PATCH 1/4] cli: add --related-items option to recommende --- src/lenskit/cli/recommend.py | 61 +++++++++++++++++++++++++++++------- 1 file changed, 49 insertions(+), 12 deletions(-) diff --git a/src/lenskit/cli/recommend.py b/src/lenskit/cli/recommend.py index 68edc7f1d..dece42f1d 100644 --- a/src/lenskit/cli/recommend.py +++ b/src/lenskit/cli/recommend.py @@ -9,11 +9,12 @@ from pathlib import Path import click +from rich import print from xopen import xopen import lenskit.operations as ops from lenskit import batch -from lenskit.data import Dataset, ItemList, ListILC, UserIDKey +from lenskit.data import ID, Dataset, ItemList, ListILC, RecQuery, UserIDKey from lenskit.logging import Stopwatch, get_logger, item_progress from lenskit.pipeline import PipelineProfiler from lenskit.random import random_generator @@ -37,7 +38,10 @@ @click.option("-d", "--dataset", metavar="DATA", type=Path, help="Use dataset DATA.") @click.option("-u", "--users-file", type=Path, metavar="FILE", help="Load list of users from FILE.") @click.option("--random-users", type=int, metavar="N", help="Recommend for N random users.") -@click.option("--profile", type=Path, metavar="FILE", help="Profile profile inference.") +@click.option("--profile", type=Path, metavar="FILE", help="Profile inference and save in FILE.") +@click.option( + "--related-items", is_flag=True, help="interpret USERS as items, and recommend related items" +) @click.argument("PIPE_FILE", type=Path) @click.argument("USERS", nargs=-1) def recommend( @@ -51,6 +55,7 @@ def recommend( list_length: int | None, dataset: Path | None, profile: Path | None, + related_items: bool, pipe_file: Path, users: list, ): @@ -91,24 +96,43 @@ def recommend( all_recs = batch.recommend(pipe, users, list_length, n_jobs=n_jobs, profiler=profiler) else: timer = Stopwatch(start=False) - all_recs = None if out_file is None else ListILC(UserIDKey) + all_recs = None + if out_file is not None: + if related_items: + all_recs = ListILC(["ref_item_id"]) + else: + all_recs = ListILC(UserIDKey) + with item_progress("user recommendations", len(users)) as pb: for user in users: - ulog = log.bind(user=user) - ulog.debug("generating single-user recommendations") + if related_items: + ulog = log.bind(ref_item=user) + ulog.debug("generating related product recommendation") + query = RecQuery(context_items=ItemList([user])) + else: + ulog = log.bind(user=user) + ulog.debug("generating single-user recommendations") + query = RecQuery(user_id=user) + with timer.measure(accumulate=True): - recs = ops.recommend(pipe, user, list_length, profiler=profiler) + recs = ops.recommend(pipe, query, list_length, profiler=profiler) + ulog.info( - "recommended for user", + "generated recommendation list", length=len(recs), time="{:.1f}ms".format(timer.elapsed(accumulated=False) * 1000), ) if all_recs is not None: - all_recs.add(recs, user_id=user) + all_recs.add(recs, user) if print_recs: - print_recommendation_list(recs, data) + print_recommendation_list( + recs, + data, + user=None if related_items else user, + ref_item=user if related_items else None, + ) pb.update() @@ -128,15 +152,28 @@ def recommend( all_recs.save_parquet(out_file) -def print_recommendation_list(recs: ItemList, data: Dataset | None): +def print_recommendation_list( + recs: ItemList, data: Dataset | None, ref_item: ID | None = None, user: ID | None = None +): + ref_title = None titles = None if data is not None: items = data.entities("item") if "title" in items.attributes: titles = items.select(ids=recs.ids()).attribute("title").pandas() + if ref_item is not None: + ref_title = items.select(ids=[ref_item]).attribute("title").value() + + if user is not None: + print("recommendations for user {}:".format(user)) + elif ref_item is not None: + if ref_title is not None: + print("related items for item {} ([italic]{}[/italic]):".format(ref_item, ref_title)) + else: + print("related items for item {}:".format(ref_item)) for item in recs.ids(): if titles is not None: - print("item {}: {}".format(item, titles.loc[item])) + print(" [bold]item {}[/bold]: [italic]{}[/italic]".format(item, titles.loc[item])) else: - print("item {}".format(item)) + print(" [bold]item {}[/bold]".format(item)) From 4df929a7792d7a801ede3f3463dedb0f7f9411fc Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Thu, 23 Apr 2026 14:37:08 -0400 Subject: [PATCH 2/4] docs: release notes for entity & recommend updates --- docs/releases/2026.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docs/releases/2026.rst b/docs/releases/2026.rst index bf3e6759f..786a4447f 100644 --- a/docs/releases/2026.rst +++ b/docs/releases/2026.rst @@ -10,6 +10,18 @@ There are no new major paradigm shifts, though — pipelines, datasets, and components work as they do in the 2025 series, but with more features, some rough corners polished off the interfaces, and hopefully fewer bugs. +.. _2026.2.0: + +2026.2.0 +~~~~~~~~ + +Second release in the 2026 series, bringing a few smaller feature updates: + +- Added :meth:`~lenskit.data.EntityAttribute.list` and + :meth:`~lenskit.data.EntityAttribute.value` accessors to entity attributes. +- Improved typecasting of entity IDs in :meth:`lenskit.data.Vocabulary.numbers`. +- Added ``--releated-items`` option to the ``lenskit recommend`` command. + .. _2026.1.0: 2026.1.0 From e4cbdb9a4ce283386cc1e92e7843e08a236c735b Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Fri, 24 Apr 2026 17:53:16 -0400 Subject: [PATCH 3/4] cli: add JSON output options --- src/lenskit/cli/recommend.py | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/src/lenskit/cli/recommend.py b/src/lenskit/cli/recommend.py index dece42f1d..6c146cc37 100644 --- a/src/lenskit/cli/recommend.py +++ b/src/lenskit/cli/recommend.py @@ -6,10 +6,11 @@ import pickle import sys +from json import dump as dump_json from pathlib import Path import click -from rich import print +from rich import print, print_json from xopen import xopen import lenskit.operations as ops @@ -31,6 +32,7 @@ help="Output file for recommendations.", ) @click.option("--print/--no-print", "print_recs", default=True, help="Print recommendations.") +@click.option("--json", is_flag=True, help="Print and save recommendations in JSON.") @click.option("-n", "--list-length", type=int, help="Recommendation list length.") @click.option("--batch/--no-batch", "use_batch", default=False, help="Use batch.recommend.") @click.option("-j", "--process-count", type=int, help="Use specified number of worker processes.") @@ -48,6 +50,7 @@ def recommend( out_file: Path, users_file: Path | None, print_recs: bool, + json: bool, use_batch: bool, process_count: int | None, use_ray: bool, @@ -88,16 +91,13 @@ def recommend( profiler = PipelineProfiler(pipe, profile) if use_batch or use_ray or process_count is not None: - if use_ray: - n_jobs = "ray" - else: - n_jobs = process_count - - all_recs = batch.recommend(pipe, users, list_length, n_jobs=n_jobs, profiler=profiler) + all_recs = batch.recommend( + pipe, users, list_length, n_jobs=process_count, profiler=profiler + ) else: timer = Stopwatch(start=False) all_recs = None - if out_file is not None: + if out_file is not None or json: if related_items: all_recs = ListILC(["ref_item_id"]) else: @@ -126,7 +126,7 @@ def recommend( if all_recs is not None: all_recs.add(recs, user) - if print_recs: + if print_recs and not json: print_recommendation_list( recs, data, @@ -146,14 +146,26 @@ def recommend( if profiler is not None: profiler.close() + if print_recs and json: + print_json(all_recs.to_json()) + if out_file is not None: assert all_recs is not None log.info("saving recommendations to %s", str(out_file), count=len(all_recs)) - all_recs.save_parquet(out_file) + if json: + with open(out_file, "w") as jsf: + dump_json(all_recs.to_json_data(object=True), jsf) + jsf.write("\n") + else: + all_recs.save_parquet(out_file) def print_recommendation_list( - recs: ItemList, data: Dataset | None, ref_item: ID | None = None, user: ID | None = None + recs: ItemList, + data: Dataset | None, + *, + ref_item: ID | None = None, + user: ID | None = None, ): ref_title = None titles = None From 78ab1b3121738c1992564a05977e95807061fb13 Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Fri, 24 Apr 2026 17:53:25 -0400 Subject: [PATCH 4/4] cli: test lenskit recommend --- tests/cli/{test-train.sh => test-train-recommend.sh} | 10 ++++++++++ 1 file changed, 10 insertions(+) rename tests/cli/{test-train.sh => test-train-recommend.sh} (74%) diff --git a/tests/cli/test-train.sh b/tests/cli/test-train-recommend.sh similarity index 74% rename from tests/cli/test-train.sh rename to tests/cli/test-train-recommend.sh index c5952ca4d..30b0861ed 100644 --- a/tests/cli/test-train.sh +++ b/tests/cli/test-train-recommend.sh @@ -2,6 +2,7 @@ set -eo pipefail data="$TEST_WORK/ml-data" out="$TEST_WORK/als.pkl.gz" +recs="$TEST_WORK/recs.json" cat >>"$TEST_WORK/verify.py" <