From 7d8f23c0ae2884eb7d2214b32e54262de43c0518 Mon Sep 17 00:00:00 2001 From: Edouard Heitzmann Date: Sat, 23 May 2026 19:10:19 +1000 Subject: [PATCH 1/7] Faster dataframe-based mentions computations for RankedProfiles --- src/votekit/utils.py | 42 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/src/votekit/utils.py b/src/votekit/utils.py index 6badce3b..9b7a8ef7 100644 --- a/src/votekit/utils.py +++ b/src/votekit/utils.py @@ -368,8 +368,48 @@ def first_place_votes( profile, [1] + [0] * (profile.max_ranking_length - 1), tie_convention ) +def mentions(profile: RankProfile) -> dict[str, float]: + """ + Calculates total mentions for all candidates in a ``RankProfile``. + + Args: + profile (RankProfile): RankProfile of ballots. + + Returns: + dict[str, float]: + Dictionary mapping candidates to mention totals (values). + """ + if not isinstance(profile, RankProfile): + raise TypeError("Profile must be of type RankProfile.") + + assert profile.max_ranking_length is not None + + ranking_cols = [ + f"Ranking_{i}" + for i in range(1, profile.max_ranking_length + 1) + ] + + rank_sets = profile.df[ranking_cols].stack() + + tilde = frozenset({"~"}) + rank_sets = rank_sets[ + rank_sets.map(lambda s: bool(s) and s != tilde) + ] + + exploded = rank_sets.explode() + + weights = profile.df["Weight"].reindex( + exploded.index.get_level_values(0) + ) + + totals = weights.groupby(exploded).sum() + + return { + c: float(totals.get(c, 0.0)) + for c in profile.candidates + } -def mentions( +def old_mentions( profile: RankProfile, ) -> dict[str, float]: """ From 7a71f1c511734dd0a76d5eda0048a643981a20f4 Mon Sep 17 00:00:00 2001 From: Edouard Heitzmann Date: Sat, 23 May 2026 20:55:22 +1000 Subject: [PATCH 2/7] fix + add alternative fallback to old `mentions` function when ballots are already materialized --- src/votekit/utils.py | 64 ++++++++++++++++++++++++++++++++++++-------- tests/test_utils.py | 58 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 111 insertions(+), 11 deletions(-) diff --git a/src/votekit/utils.py b/src/votekit/utils.py index 9b7a8ef7..40dbe493 100644 --- a/src/votekit/utils.py +++ b/src/votekit/utils.py @@ -1,5 +1,6 @@ import math import random +from fractions import Fraction from itertools import permutations from typing import Literal, Optional, Sequence, Union @@ -368,9 +369,24 @@ def first_place_votes( profile, [1] + [0] * (profile.max_ranking_length - 1), tie_convention ) -def mentions(profile: RankProfile) -> dict[str, float]: + +def _ballots_are_materialized(profile: RankProfile) -> bool: """ - Calculates total mentions for all candidates in a ``RankProfile``. + Checks if the ballots are materialized in a ``RankProfile``. + + Args: + profile (RankProfile): RankProfile of ballots. + + Returns: + bool: + True if ballots are materialized, False otherwise. + """ + return "ballots" in profile.__dict__ + + +def _mentions_from_df(profile: RankProfile) -> dict[str, Fraction]: + """ + Faster pandas-based mentions calculator for RankProfiles where ballots are not materialized. Args: profile (RankProfile): RankProfile of ballots. @@ -379,9 +395,6 @@ def mentions(profile: RankProfile) -> dict[str, float]: dict[str, float]: Dictionary mapping candidates to mention totals (values). """ - if not isinstance(profile, RankProfile): - raise TypeError("Profile must be of type RankProfile.") - assert profile.max_ranking_length is not None ranking_cols = [ @@ -389,27 +402,56 @@ def mentions(profile: RankProfile) -> dict[str, float]: for i in range(1, profile.max_ranking_length + 1) ] + tilde = frozenset({"~"}) + rank_sets = profile.df[ranking_cols].stack() - tilde = frozenset({"~"}) rank_sets = rank_sets[ - rank_sets.map(lambda s: bool(s) and s != tilde) + rank_sets.map( + lambda s: isinstance(s, frozenset) and bool(s) and s != tilde + ) ] exploded = rank_sets.explode() + if exploded.empty: + return {c: Fraction(0) for c in profile.candidates} + weights = profile.df["Weight"].reindex( exploded.index.get_level_values(0) - ) + ).to_numpy() - totals = weights.groupby(exploded).sum() + totals = pd.Series(weights).groupby(exploded.to_numpy(), sort=False).sum() return { - c: float(totals.get(c, 0.0)) + c: totals.get(c, Fraction(0)) for c in profile.candidates } -def old_mentions( + +def fast_mentions(profile: RankProfile) -> dict[str, Fraction]: + """ + Decides which way to compute mentions based on whether ballots are materialized in the profile. + If they are, uses the traditional mentions calculation. + If not, uses a faster pandas-based approach. + + Args: + profile (RankProfile): RankProfile of ballots. + + Returns: + dict[str, float]: + Dictionary mapping candidates to mention totals (values). + """ + if not isinstance(profile, RankProfile): + raise TypeError("Profile must be of type RankProfile.") + + if _ballots_are_materialized(profile): + return mentions(profile) + + return _mentions_from_df(profile) + + +def mentions( profile: RankProfile, ) -> dict[str, float]: """ diff --git a/tests/test_utils.py b/tests/test_utils.py index 8d52fd6a..59eb6e86 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,4 +1,5 @@ from itertools import permutations +from pathlib import Path from typing import Literal, cast import pytest @@ -12,6 +13,7 @@ borda_scores, elect_cands_from_set_ranking, expand_tied_ballot, + fast_mentions, first_place_votes, index_to_lexicographic_ballot, mentions, @@ -24,6 +26,8 @@ validate_score_vector, ) +CSV_DIR = Path(__file__).resolve().parents[0] / "data" / "csv" + profile_no_ties = RankProfile( ballots=( RankBallot(ranking=tuple(map(frozenset, [{"A"}, {"B"}])), weight=1), @@ -40,6 +44,14 @@ ) ) +profile_with_duplicates = RankProfile( + ballots=( + RankBallot(ranking=tuple(map(frozenset, [{"A"}, {"B"}, {"B"}])), weight=1), + RankBallot(ranking=tuple(map(frozenset, [{"A"}, {"B"}, {"C"}])), weight=1 / 2), + RankBallot(ranking=tuple(map(frozenset, [{"B"}, {"B"}, {"B"}])), weight=3), + ) +) + profile_with_missing = RankProfile( ballots=( RankBallot(ranking=tuple(map(frozenset, [{"A", "B"}, {"D"}])), weight=1), @@ -251,11 +263,57 @@ def test_mentions(): assert isinstance(test["A"], float) +def test_mentions_with_ties(): + correct = {"A": 9 / 2, "B": 9 / 2, "C": 7 / 2} + test = mentions(profile_with_ties) + assert correct == test + assert isinstance(test["A"], float) + + +def test_mentions_with_duplicates(): + correct = {"A": 3 / 2, "B": 23 / 2, "C": 1 / 2} + test = mentions(profile_with_duplicates) + assert correct == test + assert isinstance(test["A"], float) + + +def test_fast_mentions(): + correct = {"A": 9 / 2, "B": 9 / 2, "C": 7 / 2} + test = fast_mentions(profile_no_ties) + assert correct == test + assert isinstance(test["A"], float) + + +def test_fast_mentions_with_ties(): + correct = {"A": 9 / 2, "B": 9 / 2, "C": 7 / 2} + test = fast_mentions(profile_with_ties) + assert correct == test + assert isinstance(test["A"], float) + + +def test_fast_mentions_with_duplicates(): + correct = {"A": 3 / 2, "B": 23 / 2, "C": 1 / 2} + test = fast_mentions(profile_with_duplicates) + assert correct == test + assert isinstance(test["A"], float) + + +@pytest.mark.slow +def test_fast_and_slow_mentions_are_same(): + profile = RankProfile.from_csv(CSV_DIR / "albany_profile.csv") + assert mentions(profile) == fast_mentions(profile) + + def test_mentions_errors(): with pytest.raises(TypeError, match="Profile must be of type RankProfile"): mentions(cast(RankProfile, ScoreProfile(ballots=(ScoreBallot(scores={"A": 3}),)))) +def test_fast_mentions_errors(): + with pytest.raises(TypeError, match="Profile must be of type RankProfile"): + fast_mentions(cast(RankProfile, ScoreProfile(ballots=(ScoreBallot(scores={"A": 3}),)))) + + def test_borda_no_ties(): true_borda = {"A": 15 / 2, "B": 9, "C": 19 / 2} From 3117f39ceeff71af7561302674865babb80e9b45 Mon Sep 17 00:00:00 2001 From: Edouard Heitzmann Date: Sat, 23 May 2026 21:06:13 +1000 Subject: [PATCH 3/7] ty negotiations --- src/votekit/utils.py | 43 ++++++++++++++++++++----------------------- tests/test_utils.py | 2 +- 2 files changed, 21 insertions(+), 24 deletions(-) diff --git a/src/votekit/utils.py b/src/votekit/utils.py index 40dbe493..fff3903a 100644 --- a/src/votekit/utils.py +++ b/src/votekit/utils.py @@ -1,9 +1,8 @@ import math import random -from fractions import Fraction from itertools import permutations from typing import Literal, Optional, Sequence, Union - +from typing import Any, cast import numpy as np import pandas as pd from numpy.typing import NDArray @@ -384,17 +383,7 @@ def _ballots_are_materialized(profile: RankProfile) -> bool: return "ballots" in profile.__dict__ -def _mentions_from_df(profile: RankProfile) -> dict[str, Fraction]: - """ - Faster pandas-based mentions calculator for RankProfiles where ballots are not materialized. - - Args: - profile (RankProfile): RankProfile of ballots. - - Returns: - dict[str, float]: - Dictionary mapping candidates to mention totals (values). - """ +def _mentions_from_df(profile: RankProfile) -> dict[str, float]: assert profile.max_ranking_length is not None ranking_cols = [ @@ -404,32 +393,40 @@ def _mentions_from_df(profile: RankProfile) -> dict[str, Fraction]: tilde = frozenset({"~"}) - rank_sets = profile.df[ranking_cols].stack() + rank_sets = cast( + pd.Series[Any], + profile.df[ranking_cols].stack(), + ) - rank_sets = rank_sets[ + mask = cast( + pd.Series[Any], rank_sets.map( lambda s: isinstance(s, frozenset) and bool(s) and s != tilde - ) - ] + ), + ) + + rank_sets = rank_sets[mask] exploded = rank_sets.explode() if exploded.empty: - return {c: Fraction(0) for c in profile.candidates} + return {c: 0.0 for c in profile.candidates} - weights = profile.df["Weight"].reindex( - exploded.index.get_level_values(0) - ).to_numpy() + weights = ( + profile.df["Weight"] + .reindex(exploded.index.get_level_values(0)) + .to_numpy() + ) totals = pd.Series(weights).groupby(exploded.to_numpy(), sort=False).sum() return { - c: totals.get(c, Fraction(0)) + c: totals.get(c, 0.0) for c in profile.candidates } -def fast_mentions(profile: RankProfile) -> dict[str, Fraction]: +def fast_mentions(profile: RankProfile) -> dict[str, float]: """ Decides which way to compute mentions based on whether ballots are materialized in the profile. If they are, uses the traditional mentions calculation. diff --git a/tests/test_utils.py b/tests/test_utils.py index 59eb6e86..1a943623 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -300,7 +300,7 @@ def test_fast_mentions_with_duplicates(): @pytest.mark.slow def test_fast_and_slow_mentions_are_same(): - profile = RankProfile.from_csv(CSV_DIR / "albany_profile.csv") + profile = cast(RankProfile, RankProfile.from_csv(CSV_DIR / "albany_profile.csv")) assert mentions(profile) == fast_mentions(profile) From 7c0f04c431ffc34df78e02d4ebacaf5d841dc02b Mon Sep 17 00:00:00 2001 From: Edouard Heitzmann Date: Sat, 23 May 2026 21:18:20 +1000 Subject: [PATCH 4/7] don't subscript typing --- src/votekit/utils.py | 32 ++++++++++---------------------- 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/src/votekit/utils.py b/src/votekit/utils.py index fff3903a..e73ffbce 100644 --- a/src/votekit/utils.py +++ b/src/votekit/utils.py @@ -1,8 +1,8 @@ import math import random from itertools import permutations -from typing import Literal, Optional, Sequence, Union -from typing import Any, cast +from typing import Any, Literal, Optional, Sequence, Union, cast + import numpy as np import pandas as pd from numpy.typing import NDArray @@ -386,23 +386,18 @@ def _ballots_are_materialized(profile: RankProfile) -> bool: def _mentions_from_df(profile: RankProfile) -> dict[str, float]: assert profile.max_ranking_length is not None - ranking_cols = [ - f"Ranking_{i}" - for i in range(1, profile.max_ranking_length + 1) - ] + ranking_cols = [f"Ranking_{i}" for i in range(1, profile.max_ranking_length + 1)] tilde = frozenset({"~"}) rank_sets = cast( - pd.Series[Any], + pd.Series, profile.df[ranking_cols].stack(), ) mask = cast( - pd.Series[Any], - rank_sets.map( - lambda s: isinstance(s, frozenset) and bool(s) and s != tilde - ), + pd.Series, + rank_sets.map(lambda s: isinstance(s, frozenset) and bool(s) and s != tilde), ) rank_sets = rank_sets[mask] @@ -412,24 +407,17 @@ def _mentions_from_df(profile: RankProfile) -> dict[str, float]: if exploded.empty: return {c: 0.0 for c in profile.candidates} - weights = ( - profile.df["Weight"] - .reindex(exploded.index.get_level_values(0)) - .to_numpy() - ) + weights = profile.df["Weight"].reindex(exploded.index.get_level_values(0)).to_numpy() totals = pd.Series(weights).groupby(exploded.to_numpy(), sort=False).sum() - return { - c: totals.get(c, 0.0) - for c in profile.candidates - } + return {c: totals.get(c, 0.0) for c in profile.candidates} def fast_mentions(profile: RankProfile) -> dict[str, float]: """ - Decides which way to compute mentions based on whether ballots are materialized in the profile. - If they are, uses the traditional mentions calculation. + Decides which way to compute mentions based on whether ballots are materialized in the profile. + If they are, uses the traditional mentions calculation. If not, uses a faster pandas-based approach. Args: From 33e51dff720dbe950470c6f47f297e970a8957ba Mon Sep 17 00:00:00 2001 From: Edouard Heitzmann Date: Sat, 23 May 2026 21:22:28 +1000 Subject: [PATCH 5/7] fix unused import --- src/votekit/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/votekit/utils.py b/src/votekit/utils.py index e73ffbce..1eb81b8c 100644 --- a/src/votekit/utils.py +++ b/src/votekit/utils.py @@ -1,7 +1,7 @@ import math import random from itertools import permutations -from typing import Any, Literal, Optional, Sequence, Union, cast +from typing import Literal, Optional, Sequence, Union, cast import numpy as np import pandas as pd From 936ee066d62c05d1267d75ac4a3e3ed7485c29f7 Mon Sep 17 00:00:00 2001 From: Edouard Heitzmann Date: Sat, 23 May 2026 22:26:54 +1000 Subject: [PATCH 6/7] not sure how I broke the 3.11 tests? --- src/votekit/utils.py | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/src/votekit/utils.py b/src/votekit/utils.py index 1eb81b8c..a5b3d38b 100644 --- a/src/votekit/utils.py +++ b/src/votekit/utils.py @@ -1,7 +1,7 @@ import math import random from itertools import permutations -from typing import Literal, Optional, Sequence, Union, cast +from typing import Any, Literal, Optional, Sequence, Union, cast import numpy as np import pandas as pd @@ -386,33 +386,37 @@ def _ballots_are_materialized(profile: RankProfile) -> bool: def _mentions_from_df(profile: RankProfile) -> dict[str, float]: assert profile.max_ranking_length is not None - ranking_cols = [f"Ranking_{i}" for i in range(1, profile.max_ranking_length + 1)] + ranking_cols = [ + f"Ranking_{i}" + for i in range(1, profile.max_ranking_length + 1) + ] tilde = frozenset({"~"}) - rank_sets = cast( - pd.Series, - profile.df[ranking_cols].stack(), - ) + rank_sets = cast(Any, profile.df[ranking_cols].stack()) - mask = cast( - pd.Series, - rank_sets.map(lambda s: isinstance(s, frozenset) and bool(s) and s != tilde), - ) + mask = rank_sets.map( + lambda s: isinstance(s, frozenset) and bool(s) and s != tilde + ) rank_sets = rank_sets[mask] - exploded = rank_sets.explode() if exploded.empty: return {c: 0.0 for c in profile.candidates} - weights = profile.df["Weight"].reindex(exploded.index.get_level_values(0)).to_numpy() + weights = ( + profile.df["Weight"] + .reindex(exploded.index.get_level_values(0)) + .to_numpy() + ) totals = pd.Series(weights).groupby(exploded.to_numpy(), sort=False).sum() - return {c: totals.get(c, 0.0) for c in profile.candidates} - + return { + c: totals.get(c, 0.0) + for c in profile.candidates + } def fast_mentions(profile: RankProfile) -> dict[str, float]: """ From b1870feb8483f60d7c8e36da74cd329cede35c7e Mon Sep 17 00:00:00 2001 From: Edouard Heitzmann Date: Sat, 23 May 2026 22:27:39 +1000 Subject: [PATCH 7/7] lint --- src/votekit/utils.py | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/src/votekit/utils.py b/src/votekit/utils.py index a5b3d38b..9631360f 100644 --- a/src/votekit/utils.py +++ b/src/votekit/utils.py @@ -386,18 +386,13 @@ def _ballots_are_materialized(profile: RankProfile) -> bool: def _mentions_from_df(profile: RankProfile) -> dict[str, float]: assert profile.max_ranking_length is not None - ranking_cols = [ - f"Ranking_{i}" - for i in range(1, profile.max_ranking_length + 1) - ] + ranking_cols = [f"Ranking_{i}" for i in range(1, profile.max_ranking_length + 1)] tilde = frozenset({"~"}) rank_sets = cast(Any, profile.df[ranking_cols].stack()) - mask = rank_sets.map( - lambda s: isinstance(s, frozenset) and bool(s) and s != tilde - ) + mask = rank_sets.map(lambda s: isinstance(s, frozenset) and bool(s) and s != tilde) rank_sets = rank_sets[mask] exploded = rank_sets.explode() @@ -405,18 +400,12 @@ def _mentions_from_df(profile: RankProfile) -> dict[str, float]: if exploded.empty: return {c: 0.0 for c in profile.candidates} - weights = ( - profile.df["Weight"] - .reindex(exploded.index.get_level_values(0)) - .to_numpy() - ) + weights = profile.df["Weight"].reindex(exploded.index.get_level_values(0)).to_numpy() totals = pd.Series(weights).groupby(exploded.to_numpy(), sort=False).sum() - return { - c: totals.get(c, 0.0) - for c in profile.candidates - } + return {c: totals.get(c, 0.0) for c in profile.candidates} + def fast_mentions(profile: RankProfile) -> dict[str, float]: """