Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions docs/releases/2026.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,18 @@ There are no new major paradigm shifts, though — pipelines, datasets, and
components work as they do in the 2025 series, but with more features, some
rough corners polished off the interfaces, and hopefully fewer bugs.

.. _2026.2.0:

2026.2.0
~~~~~~~~

Second release in the 2026 series, bringing a few smaller feature updates:

- Added :meth:`~lenskit.data.EntityAttribute.list` and
:meth:`~lenskit.data.EntityAttribute.value` accessors to entity attributes.
- Improved typecasting of entity IDs in :meth:`lenskit.data.Vocabulary.numbers`.
- Added ``--releated-items`` option to the ``lenskit recommend`` command.

.. _2026.1.0:

2026.1.0
Expand Down
89 changes: 69 additions & 20 deletions src/lenskit/cli/recommend.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,16 @@

import pickle
import sys
from json import dump as dump_json
from pathlib import Path

import click
from rich import print, print_json
from xopen import xopen

import lenskit.operations as ops
from lenskit import batch
from lenskit.data import Dataset, ItemList, ListILC, UserIDKey
from lenskit.data import ID, Dataset, ItemList, ListILC, RecQuery, UserIDKey
from lenskit.logging import Stopwatch, get_logger, item_progress
from lenskit.pipeline import PipelineProfiler
from lenskit.random import random_generator
Expand All @@ -30,27 +32,33 @@
help="Output file for recommendations.",
)
@click.option("--print/--no-print", "print_recs", default=True, help="Print recommendations.")
@click.option("--json", is_flag=True, help="Print and save recommendations in JSON.")
@click.option("-n", "--list-length", type=int, help="Recommendation list length.")
@click.option("--batch/--no-batch", "use_batch", default=False, help="Use batch.recommend.")
@click.option("-j", "--process-count", type=int, help="Use specified number of worker processes.")
@click.option("--ray", "use_ray", is_flag=True, help="Ue Ray for parallelism.")
@click.option("-d", "--dataset", metavar="DATA", type=Path, help="Use dataset DATA.")
@click.option("-u", "--users-file", type=Path, metavar="FILE", help="Load list of users from FILE.")
@click.option("--random-users", type=int, metavar="N", help="Recommend for N random users.")
@click.option("--profile", type=Path, metavar="FILE", help="Profile profile inference.")
@click.option("--profile", type=Path, metavar="FILE", help="Profile inference and save in FILE.")
@click.option(
"--related-items", is_flag=True, help="interpret USERS as items, and recommend related items"
)
@click.argument("PIPE_FILE", type=Path)
@click.argument("USERS", nargs=-1)
def recommend(
out_file: Path,
users_file: Path | None,
print_recs: bool,
json: bool,
use_batch: bool,
process_count: int | None,
use_ray: bool,
random_users: int | None,
list_length: int | None,
dataset: Path | None,
profile: Path | None,
related_items: bool,
pipe_file: Path,
users: list,
):
Expand Down Expand Up @@ -83,32 +91,48 @@ def recommend(
profiler = PipelineProfiler(pipe, profile)

if use_batch or use_ray or process_count is not None:
if use_ray:
n_jobs = "ray"
else:
n_jobs = process_count

all_recs = batch.recommend(pipe, users, list_length, n_jobs=n_jobs, profiler=profiler)
all_recs = batch.recommend(
pipe, users, list_length, n_jobs=process_count, profiler=profiler
)
else:
timer = Stopwatch(start=False)
all_recs = None if out_file is None else ListILC(UserIDKey)
all_recs = None
if out_file is not None or json:
if related_items:
all_recs = ListILC(["ref_item_id"])
else:
all_recs = ListILC(UserIDKey)

with item_progress("user recommendations", len(users)) as pb:
for user in users:
ulog = log.bind(user=user)
ulog.debug("generating single-user recommendations")
if related_items:
ulog = log.bind(ref_item=user)
ulog.debug("generating related product recommendation")
query = RecQuery(context_items=ItemList([user]))
else:
ulog = log.bind(user=user)
ulog.debug("generating single-user recommendations")
query = RecQuery(user_id=user)

with timer.measure(accumulate=True):
recs = ops.recommend(pipe, user, list_length, profiler=profiler)
recs = ops.recommend(pipe, query, list_length, profiler=profiler)

ulog.info(
"recommended for user",
"generated recommendation list",
length=len(recs),
time="{:.1f}ms".format(timer.elapsed(accumulated=False) * 1000),
)

if all_recs is not None:
all_recs.add(recs, user_id=user)
all_recs.add(recs, user)

if print_recs:
print_recommendation_list(recs, data)
if print_recs and not json:
print_recommendation_list(
recs,
data,
user=None if related_items else user,
ref_item=user if related_items else None,
)

pb.update()

Expand All @@ -122,21 +146,46 @@ def recommend(
if profiler is not None:
profiler.close()

if print_recs and json:
print_json(all_recs.to_json())

if out_file is not None:
assert all_recs is not None
log.info("saving recommendations to %s", str(out_file), count=len(all_recs))
all_recs.save_parquet(out_file)
if json:
with open(out_file, "w") as jsf:
dump_json(all_recs.to_json_data(object=True), jsf)
jsf.write("\n")
else:
all_recs.save_parquet(out_file)


def print_recommendation_list(recs: ItemList, data: Dataset | None):
def print_recommendation_list(
recs: ItemList,
data: Dataset | None,
*,
ref_item: ID | None = None,
user: ID | None = None,
):
ref_title = None
titles = None
if data is not None:
items = data.entities("item")
if "title" in items.attributes:
titles = items.select(ids=recs.ids()).attribute("title").pandas()
if ref_item is not None:
ref_title = items.select(ids=[ref_item]).attribute("title").value()

if user is not None:
print("recommendations for user {}:".format(user))
elif ref_item is not None:
if ref_title is not None:
print("related items for item {} ([italic]{}[/italic]):".format(ref_item, ref_title))
else:
print("related items for item {}:".format(ref_item))

for item in recs.ids():
if titles is not None:
print("item {}: {}".format(item, titles.loc[item]))
print(" [bold]item {}[/bold]: [italic]{}[/italic]".format(item, titles.loc[item]))
else:
print("item {}".format(item))
print(" [bold]item {}[/bold]".format(item))
10 changes: 10 additions & 0 deletions tests/cli/test-train.sh → tests/cli/test-train-recommend.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ set -eo pipefail

data="$TEST_WORK/ml-data"
out="$TEST_WORK/als.pkl.gz"
recs="$TEST_WORK/recs.json"

cat >>"$TEST_WORK/verify.py" <<EOF
import pickle
Expand All @@ -27,3 +28,12 @@ run-lenskit train --config pipelines/als-explicit.toml -o "$out" "$data"

require -f "$out"
run-python "$TEST_WORK/verify.py" "$out"

run-lenskit recommend -o "$recs" --json -n 10 "$out" 200
require -f "$recs"

n_users=$(jq 'length' <$recs)
require "$n_users" == 1

n_items=$(jq '.["200"] | length' <$recs)
require "$n_items" == 10
Loading