diff --git a/src/votekit/pref_profile/pref_profile.py b/src/votekit/pref_profile/pref_profile.py index 88e12d9f..50c2c3db 100644 --- a/src/votekit/pref_profile/pref_profile.py +++ b/src/votekit/pref_profile/pref_profile.py @@ -25,6 +25,8 @@ _validate_score_csv_format, ) from votekit.pref_profile.utils import ( + _sum_rank_profiles, + _sum_score_profiles, convert_row_to_rank_ballot, convert_row_to_score_ballot, ) @@ -276,6 +278,9 @@ def __str__(self) -> str: def group_ballots(self) -> Self: raise NotImplementedError + def copy(self) -> Self: + raise NotImplementedError + @property def ballots(self) -> tuple[Ballot, ...]: raise NotImplementedError @@ -672,44 +677,7 @@ def __add__(self, other) -> RankProfile: """ Add two PreferenceProfiles by combining their ballot lists. """ - if not isinstance(other, RankProfile): - raise TypeError("Unsupported operand type. Must be an instance of RankProfile.") - - assert self.max_ranking_length is not None and other.max_ranking_length is not None - max_ranking_length = max([self.max_ranking_length, other.max_ranking_length]) - candidates = list(set(self.candidates).union(other.candidates)) - - df_1 = self.df.copy() - df_2 = other.df.copy() - - if self.max_ranking_length < max_ranking_length: - for i in range(self.max_ranking_length, max_ranking_length): - df_1.insert( - len(df_1.columns), - f"Ranking_{i + 1}", - pd.Series([frozenset("~")] * len(df_1), dtype=object, index=df_1.index), - ) - if other.max_ranking_length < max_ranking_length: - for i in range(other.max_ranking_length, max_ranking_length): - df_2.insert( - len(df_2.columns), - f"Ranking_{i + 1}", - pd.Series([frozenset("~")] * len(df_2), dtype=object, index=df_2.index), - ) - - new_df = pd.concat([df_1, df_2], ignore_index=True) - new_df.index.name = "Ballot Index" - ranking_cols = [c for c in new_df.columns if "Ranking_" in c] - new_df[ranking_cols] = new_df[ranking_cols].astype("object") - new_df = new_df[ - [f"Ranking_{i + 1}" for i in range(max_ranking_length)] + ["Weight", "Voter Set"] - ] - - return RankProfile( - candidates=candidates, - df=new_df, - max_ranking_length=max_ranking_length, - ) + return _sum_rank_profiles([self, other]) def group_ballots(self) -> RankProfile: """ @@ -745,6 +713,19 @@ def group_ballots(self) -> RankProfile: max_ranking_length=self.max_ranking_length, ) + def copy(self) -> RankProfile: + """ + Returns a copy of a RankProfile + + Returns: + RankProfile: New RankProfile object + """ + return RankProfile( + candidates=self.candidates, + df=self.df.copy(), + max_ranking_length=self.max_ranking_length, + ) + def __eq__(self, other): if not isinstance(other, RankProfile): return False @@ -1254,29 +1235,7 @@ def __add__(self, other): """ Add two PreferenceProfiles by combining their ballot lists. """ - if not isinstance(other, ScoreProfile): - raise TypeError("Unsupported operand type. Must be an instance of ScoreProfile.") - - df_1 = self.df.copy() - df_2 = other.df.copy() - - cand1 = set(self.candidates) - cand2 = set(other.candidates) - for cand in cand2 - cand1: - df_1[cand] = [np.nan] * len(df_1) - for cand in cand1 - cand2: - df_2[cand] = [np.nan] * len(df_2) - - new_df = pd.concat([df_1, df_2], ignore_index=True) - new_df.index.name = "Ballot Index" - - new_candidates = sorted(set(self.candidates).union(other.candidates)) - new_df = new_df[new_candidates + ["Weight", "Voter Set"]] - - return ScoreProfile( - candidates=new_candidates, - df=new_df, - ) + return _sum_score_profiles([self, other]) def group_ballots(self) -> ScoreProfile: """ @@ -1312,6 +1271,18 @@ def group_ballots(self) -> ScoreProfile: candidates=self.candidates, ) + def copy(self) -> ScoreProfile: + """ + Returns a copy of a ScoreProfile + + Returns: + ScoreProfile: New ScoreProfile object + """ + return ScoreProfile( + df=self.df.copy(), + candidates=self.candidates, + ) + def __eq__(self, other): if not isinstance(other, ScoreProfile): return False diff --git a/src/votekit/pref_profile/utils.py b/src/votekit/pref_profile/utils.py index 597be5e4..664ae580 100644 --- a/src/votekit/pref_profile/utils.py +++ b/src/votekit/pref_profile/utils.py @@ -405,7 +405,7 @@ def convert_rank_profile_to_score_profile_via_score_vector( raise ValueError("Ballots must not contain ties.") cand_to_score_list = { - c: [np.nan for _ in range(len(rank_profile.df))] for c in rank_profile.candidates + cand: [np.nan for _ in range(len(rank_profile.df))] for cand in rank_profile.candidates } for df_tuple in rank_profile.df[ranking_cols].itertuples(): @@ -427,3 +427,144 @@ def convert_rank_profile_to_score_profile_via_score_vector( df=new_df, candidates=rank_profile.candidates, ) + + +def _sum_rank_profiles(rank_profiles: Sequence[PreferenceProfile]) -> RankProfile: + """ + Helper function for sum_profiles that sums RankProfiles. + + Args: + rank_profiles (Sequence[PreferenceProfile]): List of profiles to sum. + + Raises: + TypeError: Each profile must be of RankProfile type + """ + + from votekit.pref_profile.pref_profile import RankProfile + + if len(rank_profiles) == 1 and isinstance(rank_profiles[0], RankProfile): + return rank_profiles[0].copy() + + if not (all(isinstance(p, RankProfile) for p in rank_profiles)): + invalid_profiles = [ + (i, type(p).__name__) + for i, p in enumerate(rank_profiles) + if not isinstance(p, RankProfile) + ] + invalid_profiles_str = ", ".join(f"index {i} ({t})" for i, t in invalid_profiles) + raise TypeError( + "All profiles must be of the same type, RankProfile. " + f"non-RankProfiles found at: {invalid_profiles_str}" + ) + + candidates = list(set().union(*[set(profile.candidates) for profile in rank_profiles])) + max_ranking_length = max([profile.max_ranking_length for profile in rank_profiles]) + + total_dfs = [] + for profile in rank_profiles: + assert profile.max_ranking_length is not None + curr_df = ( + profile.df.copy() if profile.max_ranking_length < max_ranking_length else profile.df + ) + for i in range(profile.max_ranking_length, max_ranking_length): + curr_df.insert( + len(curr_df.columns), + f"Ranking_{i + 1}", + pd.Series([frozenset("~")] * len(curr_df), dtype=object, index=curr_df.index), + ) + total_dfs.append(curr_df) + + new_df = pd.concat(total_dfs, ignore_index=True) + new_df.index.name = "Ballot Index" + ranking_cols = [col for col in new_df.columns if "Ranking_" in col] + new_df[ranking_cols] = new_df[ranking_cols].astype("object") + new_df = new_df[ + [f"Ranking_{i + 1}" for i in range(max_ranking_length)] + ["Weight", "Voter Set"] + ] + + return RankProfile( + candidates=candidates, + df=new_df, + max_ranking_length=max_ranking_length, + ) + + +def _sum_score_profiles(score_profiles: Sequence[PreferenceProfile]) -> ScoreProfile: + """ + Helper function for sum_profiles that sums ScoreProfiles. + + Args: + score_profiles (Sequence[PreferenceProfile]): The profiles to sum. + + Raises: + TypeError: Each profile must be of ScoreProfile type + """ + + from votekit.pref_profile.pref_profile import ScoreProfile + + if len(score_profiles) == 1 and isinstance(score_profiles[0], ScoreProfile): + return score_profiles[0].copy() + + if not (all(isinstance(p, ScoreProfile) for p in score_profiles)): + invalid_profiles = [ + (i, type(p).__name__) + for i, p in enumerate(score_profiles) + if not isinstance(p, ScoreProfile) + ] + invalid_profiles_str = ", ".join(f"index {i} ({t})" for i, t in invalid_profiles) + raise TypeError( + "All profiles must be of the same type, ScoreProfile. " + f"non-ScoreProfiles found at: {invalid_profiles_str}" + ) + + total_cand = set().union(*[set(profile.candidates) for profile in score_profiles]) + total_dfs = [] + for profile in score_profiles: + curr_cand = set(profile.candidates) + curr_df = profile.df.copy() if curr_cand < total_cand else profile.df + for cand in total_cand - curr_cand: + curr_df[cand] = [np.nan] * len(curr_df) + total_dfs.append(curr_df) + + new_df = pd.concat(total_dfs, ignore_index=True) + new_df.index.name = "Ballot Index" + new_candidates = sorted(total_cand) + new_df = new_df[new_candidates + ["Weight", "Voter Set"]] + + return ScoreProfile( + candidates=new_candidates, + df=new_df, + ) + + +def sum_profiles(profiles: Sequence[PreferenceProfile]) -> PreferenceProfile: + """ + Combines multiple PreferenceProfiles by combining their ball lists. + + Args: + profiles (Sequence[PreferenceProfile]): The profiles to sum. + + Returns: + PreferenceProfile: A new PreferenceProfile object containing the combined profiles. + + Raises: + ValueError: Cannot sum an empty list of profiles. + TypeError: Can only sum profiles of type RankProfile or ScoreProfile. + """ + + from votekit.pref_profile.pref_profile import RankProfile, ScoreProfile + + if len(profiles) == 0: + raise ValueError("Cannot sum an empty list of profiles.") + + if isinstance(profiles[0], RankProfile): + return _sum_rank_profiles(profiles) + + elif isinstance(profiles[0], ScoreProfile): + return _sum_score_profiles(profiles) + + else: + raise TypeError( + f"Cannot sum profiles of type {type(profiles[0]).__name__}. " + "List can only contain RankProfiles or ScoreProfiles." + ) diff --git a/tests/pref_profile/utils/test_sum_profiles.py b/tests/pref_profile/utils/test_sum_profiles.py new file mode 100644 index 00000000..64bdaed1 --- /dev/null +++ b/tests/pref_profile/utils/test_sum_profiles.py @@ -0,0 +1,215 @@ +import pytest + +from votekit.ballot import RankBallot, ScoreBallot +from votekit.pref_profile import RankProfile, ScoreProfile +from votekit.pref_profile.utils import sum_profiles + + +def test_sum_profiles_with_mixed_types_raises_type_error(): + score_profile = ScoreProfile( + ballots=[ + ScoreBallot(scores={"A": 2, "B": 2}, weight=2), + ScoreBallot(scores={"A": 2, "C": 2}, voter_set={"Chris"}), + ScoreBallot(), + ScoreBallot(weight=0), + ], + candidates=["A", "B", "C", "D"], + ) + rank_profile = RankProfile( + ballots=[ + RankBallot(ranking=({"A"}, {"B"}, {"C"}), weight=2), + RankBallot(ranking=({"A", "B"}, frozenset(), {"D"}), voter_set={"Chris"}), + RankBallot(), + RankBallot(weight=0), + ], + candidates=["A", "B", "C", "D"], + max_ranking_length=3, + ) + with pytest.raises( + TypeError, + match="All profiles must be of the same type.", + ): + sum_profiles([score_profile, score_profile, score_profile, rank_profile]) + + with pytest.raises( + TypeError, + match="All profiles must be of the same type.", + ): + sum_profiles([rank_profile, score_profile, score_profile, rank_profile]) + + +def test_sum_empty_profile_raises_value_error(): + with pytest.raises( + ValueError, + match="Cannot sum an empty list of profiles", + ): + sum_profiles([]) + + +def test_sum_one_profile_returns_copy_of_same_profile(): + profile = ScoreProfile( + ballots=[ + ScoreBallot(scores={"A": 2, "B": 2}, weight=2), + ScoreBallot(scores={"A": 2, "C": 2}, voter_set={"Chris"}), + ScoreBallot(), + ScoreBallot(weight=0), + ], + candidates=["A", "B", "C", "D"], + ) + summed_profile = sum_profiles([profile]) + assert summed_profile == profile + assert id(summed_profile) != id(profile) + + profile = RankProfile( + ballots=[ + RankBallot(ranking=({"A"}, {"B"}, {"C"}), weight=2), + RankBallot(ranking=({"A", "B"}, frozenset(), {"D"}), voter_set={"Chris"}), + RankBallot(), + RankBallot(weight=0), + ], + candidates=["A", "B", "C", "D"], + max_ranking_length=3, + ) + summed_profile = sum_profiles([profile]) + assert summed_profile == profile + assert id(summed_profile) != id(profile) + + +def test_sum_one_profile_no_list_raises_type_error(): + profile = ScoreProfile( + ballots=[ + ScoreBallot(scores={"A": 2, "B": 2}, weight=2), + ScoreBallot(scores={"A": 2, "C": 2}, voter_set={"Chris"}), + ScoreBallot(), + ScoreBallot(weight=0), + ], + candidates=["A", "B", "C", "D"], + ) + with pytest.raises(TypeError, match="has no len()"): + sum_profiles(profile) # type: ignore[arg-type] + + profile = RankProfile( + ballots=[ + RankBallot(ranking=({"A"}, {"B"}, {"C"}), weight=2), + RankBallot(ranking=({"A", "B"}, frozenset(), {"D"}), voter_set={"Chris"}), + RankBallot(), + RankBallot(weight=0), + ], + candidates=["A", "B", "C", "D"], + max_ranking_length=3, + ) + with pytest.raises(TypeError, match="has no len()"): + sum_profiles(profile) # type: ignore[arg-type] + + +def test_sum_score_profiles(): + profile_1 = ScoreProfile( + ballots=[ + ScoreBallot(scores={"A": 2, "B": 2}, weight=2), + ScoreBallot(scores={"A": 2, "C": 2}, voter_set={"Chris"}), + ScoreBallot(), + ScoreBallot(weight=0), + ], + candidates=["A", "B", "C", "D"], + ) + + profile_2 = ScoreProfile( + ballots=[ + ScoreBallot(scores={"D": 2, "E": 2}, weight=2), + ScoreBallot(scores={"D": 2, "E": 2, "F": 3.1}, weight=2), + ScoreBallot(), + ScoreBallot(weight=0), + ], + candidates=["D", "E", "F"], + ) + + profile_3 = ScoreProfile( + ballots=[ + ScoreBallot(scores={"G": 2, "H": 2}, weight=2), + ScoreBallot(scores={"G": 2, "H": 2, "I": 3.1}, weight=2), + ScoreBallot(), + ScoreBallot(weight=0), + ], + candidates=["G", "H", "I"], + ) + summed_profile = sum_profiles([profile_1, profile_2, profile_3]) + true_summed_profile = ScoreProfile( + ballots=[ + ScoreBallot(scores={"A": 2, "B": 2}, weight=2), + ScoreBallot(scores={"A": 2, "C": 2}, voter_set={"Chris"}), + ScoreBallot(), + ScoreBallot(weight=0), + ScoreBallot(scores={"D": 2, "E": 2}, weight=2), + ScoreBallot(scores={"D": 2, "E": 2, "F": 3.1}, weight=2), + ScoreBallot(), + ScoreBallot(weight=0), + ScoreBallot(scores={"G": 2, "H": 2}, weight=2), + ScoreBallot(scores={"G": 2, "H": 2, "I": 3.1}, weight=2), + ScoreBallot(), + ScoreBallot(weight=0), + ], + candidates=["A", "B", "C", "D", "E", "F", "G", "H", "I"], + ) + + assert set(summed_profile.candidates) == set(["A", "B", "C", "D", "E", "F", "G", "H", "I"]) + assert isinstance(summed_profile, ScoreProfile) + assert true_summed_profile == summed_profile + + +def test_sum_rank_profiles(): + profile_1 = RankProfile( + ballots=[ + RankBallot(ranking=({"A"}, {"B"}, {"C"}), weight=2), + RankBallot(ranking=({"A", "B"}, frozenset(), {"D"}), voter_set={"Chris"}), + RankBallot(), + RankBallot(weight=0), + ], + candidates=["A", "B", "C", "D"], + max_ranking_length=3, + ) + + profile_2 = RankProfile( + ballots=[ + RankBallot(ranking=({"E"}, {"D"}, {"F"}, {"E"}), weight=2), + RankBallot(ranking=({"D"}, {"E"}, {"F"}), weight=2), + RankBallot(), + RankBallot(weight=0), + ], + candidates=["D", "E", "F"], + max_ranking_length=0, + ) + + profile_3 = RankProfile( + ballots=[ + RankBallot(ranking=({"G"}, {"H"}, {"I"}, {"G"}), weight=2), + RankBallot(ranking=({"G"}, {"H"}, {"I"}), weight=2), + RankBallot(), + RankBallot(weight=0), + ], + candidates=["G", "H", "I"], + max_ranking_length=0, + ) + summed_profile = sum_profiles([profile_1, profile_2, profile_3]) + true_summed_profile = RankProfile( + ballots=[ + RankBallot(ranking=({"A"}, {"B"}, {"C"}), weight=2), + RankBallot(ranking=({"A", "B"}, frozenset(), {"D"}), voter_set={"Chris"}), + RankBallot(), + RankBallot(weight=0), + RankBallot(ranking=({"E"}, {"D"}, {"F"}, {"E"}), weight=2), + RankBallot(ranking=({"D"}, {"E"}, {"F"}), weight=2), + RankBallot(), + RankBallot(weight=0), + RankBallot(ranking=({"G"}, {"H"}, {"I"}, {"G"}), weight=2), + RankBallot(ranking=({"G"}, {"H"}, {"I"}), weight=2), + RankBallot(), + RankBallot(weight=0), + ], + candidates=["A", "B", "C", "D", "E", "F", "G", "H", "I"], + max_ranking_length=4, + ) + + assert set(summed_profile.candidates) == set(["A", "B", "C", "D", "E", "F", "G", "H", "I"]) + assert summed_profile.max_ranking_length == 4 + assert isinstance(summed_profile, RankProfile) + assert true_summed_profile == summed_profile