From ac74d1d26bbe8a6727494573216e55deeaec3430 Mon Sep 17 00:00:00 2001 From: Grace Gibson Date: Fri, 12 Jun 2026 10:44:24 -0500 Subject: [PATCH 01/11] initial pass, translate cand to ids --- src/votekit/ballot.py | 78 ++++++----- src/votekit/pref_profile/pref_profile.py | 160 ++++++++++++++++------- src/votekit/types.py | 3 + 3 files changed, 160 insertions(+), 81 deletions(-) create mode 100644 src/votekit/types.py diff --git a/src/votekit/ballot.py b/src/votekit/ballot.py index 1d30ada9..700d210f 100644 --- a/src/votekit/ballot.py +++ b/src/votekit/ballot.py @@ -3,8 +3,10 @@ from numbers import Real from typing import Iterable, Optional, Sequence, TypeAlias, Union, overload -Ranking: TypeAlias = Optional[tuple[frozenset[str], ...]] -RankingLike: TypeAlias = Optional[Sequence[str | Iterable[str]]] +from votekit.types import Candidate + +Ranking: TypeAlias = Optional[tuple[frozenset[Candidate], ...]] +RankingLike: TypeAlias = Optional[Sequence[Candidate | Iterable[Candidate]]] class Ballot: @@ -12,24 +14,25 @@ class Ballot: Ballot parent class, contains voter set and assigned weight. Args: - ranking (Optional[Sequence[str | Iterable[str]]]): Candidate ranking. + ranking (Optional[Sequence[Candidate | Iterable[Candidate]]]): Candidate ranking. Entry i of the sequence is a candidate or iterable of candidates ranked in position i. - Defaults to None. Will be coerced to tuple[frozenset[str], ...]. + Candidate can be represented as a str or int. Allow mix of types in candidate set. + Defaults to None. Will be coerced to tuple[frozenset[Candidate], ...]. weight (Union[float, int]): Weight assigned to a given ballot. Defaults to 1.0 Can be input as int or float, and will be coerced to float. voter_set (Union[set[str], frozenset[str]]): Set of voters who cast the ballot. Defaults to frozenset(). Will be coerced to frozenset. - scores (Optional[dict[str, Union[int, float]]): Scores for individual candidates. + scores (Optional[dict[Candidate, Union[int, float]]): Scores for individual candidates. Defaults to None. Values can be input as int or float but will be coerced to float. Only retains non-zero scores. Attributes: - ranking (Optional[tuple[frozenset[str], ...]]): Tuple of candidate ranking. + ranking (Optional[tuple[frozenset[Candidate], ...]]): Tuple of candidate ranking. Entry i of the tuple is a frozenset of candidates ranked in position i. weight (float): Weight assigned to a given ballot. voter_set (frozenset[str]): Set of voters who cast the ballot. - scores (Optional[dict[str, float]]): Scores for individual candidates. + scores (Optional[dict[Candidate, float]]): Scores for individual candidates. Raises: TypeError: Only one of ranking or scores can be provided. @@ -49,7 +52,7 @@ class Ballot: def __new__( cls, *, - ranking: Sequence[str | Iterable[str]], + ranking: Sequence[Candidate | Iterable[Candidate]], scores: None = None, weight: Union[float, int] = 1.0, voter_set: Union[set[str], frozenset[str]] = frozenset(), @@ -60,7 +63,7 @@ def __new__( cls, *, ranking: None = None, - scores: dict[str, Union[int, float]], + scores: dict[Candidate, Union[int, float]], weight: Union[float, int] = 1.0, voter_set: Union[set[str], frozenset[str]] = frozenset(), ) -> ScoreBallot: ... @@ -69,8 +72,8 @@ def __new__( def __new__( cls, *, - ranking: Optional[Sequence[str | Iterable[str]]] = None, - scores: Optional[dict[str, Union[int, float]]] = None, + ranking: Optional[Sequence[Candidate | Iterable[Candidate]]] = None, + scores: Optional[dict[Candidate, Union[int, float]]] = None, weight: Union[float, int] = 1.0, voter_set: Union[set[str], frozenset[str]] = frozenset(), ) -> Ballot: ... @@ -78,8 +81,8 @@ def __new__( def __new__( cls, *, - ranking: Optional[Sequence[str | Iterable[str]]] = None, - scores: Optional[dict[str, Union[int, float]]] = None, + ranking: Optional[Sequence[Candidate | Iterable[Candidate]]] = None, + scores: Optional[dict[Candidate, Union[int, float]]] = None, weight: Union[float, int] = 1.0, voter_set: Union[set[str], frozenset[str]] = frozenset(), ): @@ -95,8 +98,8 @@ def __new__( def __init__( self, *, - ranking: Optional[Sequence[str | Iterable[str]]] = None, - scores: Optional[dict[str, Union[int, float]]] = None, + ranking: Optional[Sequence[Candidate | Iterable[Candidate]]] = None, + scores: Optional[dict[Candidate, Union[int, float]]] = None, weight: Union[float, int] = 1.0, voter_set: Union[set[str], frozenset[str]] = frozenset(), ): @@ -170,7 +173,7 @@ def __init__( self, *, ranking: RankingLike = None, - scores: Optional[dict[str, Union[int, float]]] = None, + scores: Optional[dict[Candidate, Union[int, float]]] = None, weight: Union[int, float] = 1.0, voter_set: Union[set[str], frozenset[str]] = frozenset(), ): @@ -194,10 +197,14 @@ def _convert_ranking_candidates_to_frozenset_strip_whitespace( normalized_ranking = [] for cand_set in ranking: - if isinstance(cand_set, str): - normalized_ranking.append(frozenset({cand_set.strip()})) + if isinstance(cand_set, Candidate): + normalized_ranking.append( + frozenset({cand_set.strip() if isinstance(cand_set, str) else cand_set}) + ) else: - normalized_ranking.append(frozenset(c.strip() for c in cand_set)) + normalized_ranking.append( + frozenset(c.strip() if isinstance(c, str) else c for c in cand_set) + ) return tuple(normalized_ranking) def _validate_ranking_candidates(self, ranking: Ranking): @@ -209,6 +216,7 @@ def _validate_ranking_candidates(self, ranking: Ranking): " '~' is a reserved character and cannot be used for" " candidate names." ) + # add a warning if candidates are of mixed type and if str and int are equivalent? def __eq__(self, other): if not isinstance(other, RankBallot): @@ -266,36 +274,40 @@ def __init__( self, *, ranking: RankingLike = None, - scores: Optional[dict[str, Union[int, float]]] = None, + scores: Optional[dict[Candidate, Union[int, float]]] = None, weight: Union[int, float] = 1.0, voter_set: Union[set[str], frozenset[str]] = frozenset(), ): if ranking is not None: raise TypeError("Only one of ranking or scores can be provided.") + scores = self._convert_scores_to_float_strip_whitespace(scores) self._validate_scores_candidates(scores) - self.scores = self._convert_scores_to_float_strip_whitespace(scores) + self.scores = scores super().__init__(weight=weight, voter_set=voter_set) - def _validate_scores_candidates(self, scores: Optional[dict[str, Union[int, float]]]): - if scores is not None: - if "~" in scores: - raise ValueError( - f"Candidate '~' found in ballot scores {list(scores.keys())}." - " '~' is a reserved character and cannot be used for" - " candidate names." - ) - def _convert_scores_to_float_strip_whitespace( - self, scores: Optional[dict[str, float]] - ) -> Optional[dict[str, float]]: + self, scores: Optional[dict[Candidate, float]] + ) -> Optional[dict[Candidate, float]]: if scores is None: return None if any(not isinstance(s, Real) for s in scores.values()): raise TypeError("Score values must be numeric.") - return {c.strip(): float(s) for c, s in scores.items() if s != 0} + return { + c.strip() if isinstance(c, str) else c: float(s) for c, s in scores.items() if s != 0 + } + + def _validate_scores_candidates(self, scores: Optional[dict[Candidate, Union[int, float]]]): + if scores is not None: + if "~" in scores: + raise ValueError( + f"Candidate '~' found in ballot scores {list(scores.keys())}." + " '~' is a reserved character and cannot be used for" + " candidate names." + ) + # add a warning if candidates are of mixed type def __eq__(self, other): if not isinstance(other, ScoreBallot): diff --git a/src/votekit/pref_profile/pref_profile.py b/src/votekit/pref_profile/pref_profile.py index 88e12d9f..6a747205 100644 --- a/src/votekit/pref_profile/pref_profile.py +++ b/src/votekit/pref_profile/pref_profile.py @@ -28,6 +28,7 @@ convert_row_to_rank_ballot, convert_row_to_score_ballot, ) +from votekit.types import Candidate class PreferenceProfile: @@ -163,10 +164,10 @@ def __init__( max_ranking_length: Optional[int] = None, df: pd.DataFrame = pd.DataFrame(), ): - self.candidates_cast = candidates_cast - self.candidates = candidates + self._candidates_cast = candidates_cast + self._candidates = candidates self.max_ranking_length = max_ranking_length - self.df = df + self._df = df self.total_ballot_wt = self._find_total_ballot_wt() self.num_ballots = self._find_num_ballots() @@ -181,7 +182,7 @@ def _find_num_ballots(self) -> int: Returns: int: num ballots """ - return len(self.df) + return len(self._df) def _find_total_ballot_wt(self) -> float: """ @@ -192,8 +193,8 @@ def _find_total_ballot_wt(self) -> float: """ total_weight = 0 - if not self.df.equals(pd.DataFrame()): - total_weight = self.df["Weight"].sum() + if not self._df.equals(pd.DataFrame()): + total_weight = self._df["Weight"].sum() return total_weight @@ -206,8 +207,8 @@ def _validate_and_set_candidates(self) -> None: ProfileError: Candidate names must not be the same as "Ranking_i". ProfileError: Candidate names must be unique. """ - for cand in self.candidates: - if any(f"Ranking_{i}" == cand for i in range(len(self.candidates))): + for cand in self._candidates: + if any(f"Ranking_{i}" == cand for i in range(len(self._candidates))): raise ProfileError( ( f"{cand} is a name reserved for ranking columns, it cannot be used as a " @@ -215,18 +216,17 @@ def _validate_and_set_candidates(self) -> None: ) ) - if not len(set(self.candidates)) == len(self.candidates): + if not len(set(self._candidates)) == len(self._candidates): raise ProfileError("All candidates must be unique.") - - if not set(self.candidates_cast).issubset(self.candidates): + if not set(self._candidates_cast).issubset(self._candidates): raise ProfileError( "Candidates cast are not a subset of candidates list. The following " " candidates are in candidates_cast but not candidates: " - f"{set(self.candidates_cast) - set(self.candidates)}." + f"{set(self._candidates_cast) - set(self._candidates)}." ) - self.candidates = tuple([c.strip() for c in self.candidates]) - self.candidates_cast = tuple([c.strip() for c in self.candidates_cast]) + self._candidates = tuple([c.strip() for c in self._candidates]) + self._candidates_cast = tuple([c.strip() for c in self._candidates_cast]) def __setattr__(self, name, value): if getattr(self, "_is_frozen", False): @@ -321,23 +321,30 @@ def __init__( self, *, ballots: Sequence[Ballot] = tuple(), - candidates: Sequence[str] = tuple(), + candidates: Sequence[Candidate] = tuple(), max_ranking_length: Optional[int] = None, df: pd.DataFrame = pd.DataFrame(), ): - self.candidates = tuple(candidates) + candidates = tuple(candidates) + self._candidates = tuple([str(cand_id) for cand_id in range(len(candidates))]) + candidate_id_map = { + cand: cand_id for cand, cand_id in zip(candidates, self._candidates, strict=True) + } + self.max_ranking_length = 0 if max_ranking_length is None else max_ranking_length if df.equals(pd.DataFrame()): - ( - self.df, - self.candidates_cast, - ) = self._init_from_rank_ballots(cast(Sequence[RankBallot], ballots)) - if self.candidates == tuple(): - self.candidates = self.candidates_cast + (self._df, self._candidates_cast, candidate_id_map) = self._init_from_rank_ballots( + cast(Sequence[RankBallot], ballots), candidate_id_map + ) else: - self.df, self.candidates_cast = self._init_from_rank_df(df) + self._df, self._candidates_cast, candidate_id_map = self._init_from_rank_df( + df, candidate_id_map + ) + + if self._candidates == tuple(): + self._candidates = self._candidates_cast self.max_ranking_length = self._find_max_ranking_length() @@ -353,13 +360,27 @@ def __init__( ) raise ValueError(msg) + self.id_candidate_map = {cand_id: cand for cand, cand_id in candidate_id_map.items()} + super().__init__( - candidates=self.candidates, - candidates_cast=self.candidates_cast, - df=self.df, + candidates=self._candidates, + candidates_cast=self._candidates_cast, + df=self._df, max_ranking_length=self.max_ranking_length, ) + @cached_property + def df(self) -> pd.DataFrame: + return self._translate_df_to_candidate_ids(self._df, self.id_candidate_map) + + @cached_property + def candidates(self) -> tuple[Candidate, ...]: + return tuple(self.id_candidate_map[cand_id] for cand_id in self._candidates) + + @cached_property + def candidates_cast(self) -> tuple[Candidate, ...]: + return tuple(self.id_candidate_map[cand_id] for cand_id in self._candidates_cast) + def __update_ballot_ranking_data( self, rank_ballot_data: dict[str, list], @@ -367,6 +388,7 @@ def __update_ballot_ranking_data( rank_ballot: RankBallot, candidates_cast: list[str], num_ballots: int, + candidate_id_map: dict[Candidate, str], ): """ Update the ranking data from a ballot. @@ -377,6 +399,7 @@ def __update_ballot_ranking_data( rank_ballot (RankBallot): Ballot. candidates_cast (list[str]): List of candidates who have received votes. num_ballots (int): Total number of ballots. + candidate_id_map (dict[Candidate,str]): mapping of candidate to their id (int) """ if rank_ballot.ranking is None: @@ -384,14 +407,19 @@ def __update_ballot_ranking_data( for j, cand_set in enumerate(rank_ballot.ranking): for c in cand_set: - if self.candidates != tuple(): - if c not in self.candidates: + if self._candidates != tuple(): + if c not in candidate_id_map: raise ProfileError( f"Candidate {c} found in ballot {rank_ballot} but not in " - f"candidate list {self.candidates}." + f"candidate list {candidate_id_map.keys()}." ) - if rank_ballot.weight > 0 and c not in candidates_cast: - candidates_cast.append(c) + if rank_ballot.weight > 0: + if c not in candidate_id_map: + candidate_id_map[c] = str(len(candidate_id_map)) + cand_id = candidate_id_map[c] + if cand_id not in candidates_cast: + candidates_cast.append(candidate_id_map[c]) + if f"Ranking_{j + 1}" not in rank_ballot_data: assert self.max_ranking_length is not None @@ -401,8 +429,8 @@ def __update_ballot_ranking_data( f"ballot {rank_ballot} has length at least {j + 1}." ) rank_ballot_data[f"Ranking_{j + 1}"] = [frozenset("~")] * num_ballots - - rank_ballot_data[f"Ranking_{j + 1}"][idx] = cand_set + cand_id_set = frozenset([candidate_id_map[cand] for cand in cand_set]) + rank_ballot_data[f"Ranking_{j + 1}"][idx] = cand_id_set def __update_rank_ballot_data_attrs( self, @@ -411,6 +439,7 @@ def __update_rank_ballot_data_attrs( rank_ballot: RankBallot, candidates_cast: list[str], num_ballots: int, + candidate_id_map: dict[Candidate, str], ): """ Update ballot data from a rank ballot. @@ -421,6 +450,7 @@ def __update_rank_ballot_data_attrs( rank_ballot (RankBallot): Ballot. candidates_cast (list[str]): List of candidates who have received votes. num_ballots (int): Total number of ballots. + candidate_id_map (dict[Candidate, str]): Mapping of candidate name to id (integer) """ rank_ballot_data["Weight"][idx] = rank_ballot.weight @@ -434,6 +464,7 @@ def __update_rank_ballot_data_attrs( rank_ballot=rank_ballot, candidates_cast=candidates_cast, num_ballots=num_ballots, + candidate_id_map=candidate_id_map, ) def __init_rank_ballot_data( @@ -490,8 +521,8 @@ def __init_formatted_rank_df( return df def _init_from_rank_ballots( - self, ballots: Sequence[RankBallot] - ) -> tuple[pd.DataFrame, tuple[str, ...]]: + self, ballots: Sequence[RankBallot], candidate_id_map: dict[Candidate, str] + ) -> tuple[pd.DataFrame, tuple[str, ...], dict[Candidate, str]]: """ Create the pandas dataframe representation of the profile. @@ -499,7 +530,7 @@ def _init_from_rank_ballots( ballots (Sequence[RankBallot,...]): Sequence of ballots. Returns: - tuple[pd.DataFrame, tuple[str, ...]]: df, candidates_cast + tuple[pd.DataFrame, tuple[str, ...], dict[Candidate, str]]: df, candidates_cast, candidate_id_map """ # `rank_ballot_data` sends {Weight, Voter Set} keys to a list to be @@ -517,6 +548,7 @@ def _init_from_rank_ballots( rank_ballot=b, candidates_cast=candidates_cast, num_ballots=num_ballots, + candidate_id_map=candidate_id_map, ) df = self.__init_formatted_rank_df( @@ -526,6 +558,7 @@ def _init_from_rank_ballots( return ( df, tuple(candidates_cast), + candidate_id_map, ) def __validate_init_rank_df_params(self, df: pd.DataFrame) -> None: @@ -600,7 +633,34 @@ def __find_candidates_cast_from_init_rank_df(self, df: pd.DataFrame) -> tuple[st candidates_cast.discard("~") return tuple(candidates_cast) - def _init_from_rank_df(self, df: pd.DataFrame) -> tuple[pd.DataFrame, tuple[str, ...]]: + def _translate_df_to_candidate_ids( + self, df: pd.DataFrame, candidate_id_map: dict[Candidate, str] + ) -> pd.DataFrame: + ranking_cols = [col for col in df.columns if col.startswith("Ranking_")] + + all_cand_values = set().union(*df[ranking_cols].to_numpy().ravel()) - {"~"} + missing_cands = all_cand_values - set(candidate_id_map.keys()) + if missing_cands: + raise ValueError( + f"Candidates {missing_cands} are missing from the candidate_id_map. " + "Add them to the dictionary to successfully translate to candidate ids." + ) + + translated_df = df.copy() + translated_df[ranking_cols] = translated_df[ranking_cols].map( + lambda ranking: frozenset(candidate_id_map[cand] for cand in ranking) + if ranking != frozenset("~") + else ranking + ) + return translated_df + + def _init_from_rank_df( + self, df: pd.DataFrame, candidate_id_map: dict[Candidate, str] + ) -> tuple[ + pd.DataFrame, + tuple[str, ...], + dict[Candidate, str], + ]: """ Validate the dataframe and determine the candidates cast. @@ -613,11 +673,15 @@ def _init_from_rank_df(self, df: pd.DataFrame) -> tuple[pd.DataFrame, tuple[str, self.__validate_init_rank_df_params(df) self.__validate_init_rank_df(df) candidates_cast = self.__find_candidates_cast_from_init_rank_df(df) + for cand in candidates_cast: + if cand not in candidate_id_map: + candidate_id_map[cand] = str(len(candidate_id_map)) + new_df = self._translate_df_to_candidate_ids(df, candidate_id_map) - if len(df) == 0: + if len(new_df) == 0: self.max_ranking_length = 0 - return df, candidates_cast + return new_df, candidates_cast, candidate_id_map def _find_max_ranking_length(self) -> int: """ @@ -628,7 +692,7 @@ def _find_max_ranking_length(self) -> int: """ if self.max_ranking_length == 0 or self.max_ranking_length is None: - return len([c for c in self.df.columns if "Ranking_" in c]) + return len([c for c in self._df.columns if "Ranking_" in c]) return self.max_ranking_length @@ -641,13 +705,13 @@ def max_candidates_ranked(self) -> int: E.g., a ballot that ranks two candidates tied for first and ranks no other candidates has length 1, but ranks 2 candidates in total. """ - if self.df.empty: + if self._df.empty: return 0 tilde = frozenset("~") assert self.max_ranking_length is not None ranking_cols = [f"Ranking_{i}" for i in range(1, self.max_ranking_length + 1)] return ( - self.df[ranking_cols] + self._df[ranking_cols] .apply(lambda row: len(frozenset.union(*row) - tilde), axis=1) .max() ) @@ -770,7 +834,7 @@ def __str__(self) -> str: __repr__ = __str__ def __to_rank_csv_header( - self, candidate_mapping: dict[str, str], include_voter_set: bool + self, candidate_mapping: dict[Candidate, str], include_voter_set: bool ) -> list[list]: """ Construct the header rows for the PrefProfile a custom CSV format. @@ -782,7 +846,7 @@ def __to_rank_csv_header( header = [ ["VoteKit RankProfile"], ["Candidates"], - [f"({c}:{cand_label})" for c, cand_label in candidate_mapping.items()], + [f"({str(c)}:{cand_label})" for c, cand_label in candidate_mapping.items()], ] header += [["Max Ranking Length"], [str(self.max_ranking_length)]] header += [["Includes Voter Set"], [str(include_voter_set)]] @@ -791,7 +855,7 @@ def __to_rank_csv_header( return header def __to_rank_csv_ranking_list( - self, rank_ballot: RankBallot, candidate_mapping: dict[str, str] + self, rank_ballot: RankBallot, candidate_mapping: dict[Candidate, str] ) -> list: """ Create the list of ranking data for a ballot in the profile. @@ -819,7 +883,7 @@ def __to_rank_csv_ballot_row( self, ballot: RankBallot, include_voter_set: bool, - candidate_mapping: dict[str, str], + candidate_mapping: dict[Candidate, str], weight_precision: int, ) -> list[list]: """ @@ -844,7 +908,7 @@ def __to_rank_csv_ballot_row( return row def __to_rank_csv_data_column_names( - self, include_voter_set: bool, candidate_mapping: dict[str, str] + self, include_voter_set: bool, candidate_mapping: dict[Candidate, str] ) -> list: """ Create the data column header. @@ -892,7 +956,7 @@ def to_csv( if len(self.ballots) == 0: raise ProfileError("Cannot write a profile with no ballots to a csv.") - candidate_mapping = {c: str(i) for i, c in enumerate(self.candidates)} + candidate_mapping: dict[Candidate, str] = {c: str(i) for i, c in enumerate(self.candidates)} header = self.__to_rank_csv_header(candidate_mapping, include_voter_set) data_col_names = self.__to_rank_csv_data_column_names(include_voter_set, candidate_mapping) diff --git a/src/votekit/types.py b/src/votekit/types.py new file mode 100644 index 00000000..aa5d07de --- /dev/null +++ b/src/votekit/types.py @@ -0,0 +1,3 @@ +from typing import TypeAlias, Union + +Candidate: TypeAlias = Union[str, int] From da89464974c02490946ca237c3335290fec8a9c1 Mon Sep 17 00:00:00 2001 From: Grace Gibson Date: Fri, 12 Jun 2026 14:41:15 -0500 Subject: [PATCH 02/11] remove cached property of cand attributes --- src/votekit/pref_profile/pref_profile.py | 90 +++++++++++------------- 1 file changed, 42 insertions(+), 48 deletions(-) diff --git a/src/votekit/pref_profile/pref_profile.py b/src/votekit/pref_profile/pref_profile.py index 6a747205..0f23cf6b 100644 --- a/src/votekit/pref_profile/pref_profile.py +++ b/src/votekit/pref_profile/pref_profile.py @@ -164,8 +164,8 @@ def __init__( max_ranking_length: Optional[int] = None, df: pd.DataFrame = pd.DataFrame(), ): - self._candidates_cast = candidates_cast - self._candidates = candidates + self.candidates_cast = candidates_cast + self.candidates = candidates self.max_ranking_length = max_ranking_length self._df = df @@ -207,8 +207,8 @@ def _validate_and_set_candidates(self) -> None: ProfileError: Candidate names must not be the same as "Ranking_i". ProfileError: Candidate names must be unique. """ - for cand in self._candidates: - if any(f"Ranking_{i}" == cand for i in range(len(self._candidates))): + for cand in self.candidates: + if any(f"Ranking_{i}" == cand for i in range(len(self.candidates))): raise ProfileError( ( f"{cand} is a name reserved for ranking columns, it cannot be used as a " @@ -216,17 +216,17 @@ def _validate_and_set_candidates(self) -> None: ) ) - if not len(set(self._candidates)) == len(self._candidates): + if not len(set(self.candidates)) == len(self.candidates): raise ProfileError("All candidates must be unique.") - if not set(self._candidates_cast).issubset(self._candidates): + if not set(self.candidates_cast).issubset(self.candidates): raise ProfileError( "Candidates cast are not a subset of candidates list. The following " " candidates are in candidates_cast but not candidates: " - f"{set(self._candidates_cast) - set(self._candidates)}." + f"{set(self.candidates_cast) - set(self.candidates)}." ) - self._candidates = tuple([c.strip() for c in self._candidates]) - self._candidates_cast = tuple([c.strip() for c in self._candidates_cast]) + self.candidates = tuple([c for c in self.candidates]) + self.candidates_cast = tuple([c for c in self.candidates_cast]) def __setattr__(self, name, value): if getattr(self, "_is_frozen", False): @@ -325,26 +325,26 @@ def __init__( max_ranking_length: Optional[int] = None, df: pd.DataFrame = pd.DataFrame(), ): - candidates = tuple(candidates) - self._candidates = tuple([str(cand_id) for cand_id in range(len(candidates))]) + self.candidates = tuple(candidates) + self._candidates = tuple([cand_id for cand_id in range(len(self.candidates))]) candidate_id_map = { - cand: cand_id for cand, cand_id in zip(candidates, self._candidates, strict=True) + cand: cand_id for cand, cand_id in zip(self.candidates, self._candidates, strict=True) } self.max_ranking_length = 0 if max_ranking_length is None else max_ranking_length if df.equals(pd.DataFrame()): - (self._df, self._candidates_cast, candidate_id_map) = self._init_from_rank_ballots( + (self._df, self.candidates_cast, candidate_id_map) = self._init_from_rank_ballots( cast(Sequence[RankBallot], ballots), candidate_id_map ) else: - self._df, self._candidates_cast, candidate_id_map = self._init_from_rank_df( + self._df, self.candidates_cast, candidate_id_map = self._init_from_rank_df( df, candidate_id_map ) - if self._candidates == tuple(): - self._candidates = self._candidates_cast + if self.candidates == tuple(): + self.candidates = self.candidates_cast self.max_ranking_length = self._find_max_ranking_length() @@ -361,25 +361,20 @@ def __init__( raise ValueError(msg) self.id_candidate_map = {cand_id: cand for cand, cand_id in candidate_id_map.items()} + if self._candidates == tuple(): + self._candidates = tuple(candidate_id_map[cand] for cand in self.candidates) + self._candidates_cast = tuple(candidate_id_map[cand] for cand in self.candidates_cast) super().__init__( - candidates=self._candidates, - candidates_cast=self._candidates_cast, + candidates=self.candidates, + candidates_cast=self.candidates_cast, df=self._df, max_ranking_length=self.max_ranking_length, ) @cached_property def df(self) -> pd.DataFrame: - return self._translate_df_to_candidate_ids(self._df, self.id_candidate_map) - - @cached_property - def candidates(self) -> tuple[Candidate, ...]: - return tuple(self.id_candidate_map[cand_id] for cand_id in self._candidates) - - @cached_property - def candidates_cast(self) -> tuple[Candidate, ...]: - return tuple(self.id_candidate_map[cand_id] for cand_id in self._candidates_cast) + return self._translate_df_ranking_values(self._df, self.id_candidate_map) def __update_ballot_ranking_data( self, @@ -388,7 +383,7 @@ def __update_ballot_ranking_data( rank_ballot: RankBallot, candidates_cast: list[str], num_ballots: int, - candidate_id_map: dict[Candidate, str], + candidate_id_map: dict[Candidate, int], ): """ Update the ranking data from a ballot. @@ -415,10 +410,9 @@ def __update_ballot_ranking_data( ) if rank_ballot.weight > 0: if c not in candidate_id_map: - candidate_id_map[c] = str(len(candidate_id_map)) - cand_id = candidate_id_map[c] - if cand_id not in candidates_cast: - candidates_cast.append(candidate_id_map[c]) + candidate_id_map[c] = len(candidate_id_map) + if c not in candidates_cast: + candidates_cast.append(c) if f"Ranking_{j + 1}" not in rank_ballot_data: assert self.max_ranking_length is not None @@ -439,7 +433,7 @@ def __update_rank_ballot_data_attrs( rank_ballot: RankBallot, candidates_cast: list[str], num_ballots: int, - candidate_id_map: dict[Candidate, str], + candidate_id_map: dict[Candidate, int], ): """ Update ballot data from a rank ballot. @@ -450,7 +444,7 @@ def __update_rank_ballot_data_attrs( rank_ballot (RankBallot): Ballot. candidates_cast (list[str]): List of candidates who have received votes. num_ballots (int): Total number of ballots. - candidate_id_map (dict[Candidate, str]): Mapping of candidate name to id (integer) + candidate_id_map (dict[Candidate, int]): Mapping of candidate name to id (integer) """ rank_ballot_data["Weight"][idx] = rank_ballot.weight @@ -521,8 +515,8 @@ def __init_formatted_rank_df( return df def _init_from_rank_ballots( - self, ballots: Sequence[RankBallot], candidate_id_map: dict[Candidate, str] - ) -> tuple[pd.DataFrame, tuple[str, ...], dict[Candidate, str]]: + self, ballots: Sequence[RankBallot], candidate_id_map: dict[Candidate, int] + ) -> tuple[pd.DataFrame, tuple[str, ...], dict[Candidate, int]]: """ Create the pandas dataframe representation of the profile. @@ -530,7 +524,7 @@ def _init_from_rank_ballots( ballots (Sequence[RankBallot,...]): Sequence of ballots. Returns: - tuple[pd.DataFrame, tuple[str, ...], dict[Candidate, str]]: df, candidates_cast, candidate_id_map + tuple[pd.DataFrame, tuple[str, ...], dict[Candidate, int]]: df, candidates_cast, candidate_id_map """ # `rank_ballot_data` sends {Weight, Voter Set} keys to a list to be @@ -633,8 +627,8 @@ def __find_candidates_cast_from_init_rank_df(self, df: pd.DataFrame) -> tuple[st candidates_cast.discard("~") return tuple(candidates_cast) - def _translate_df_to_candidate_ids( - self, df: pd.DataFrame, candidate_id_map: dict[Candidate, str] + def _translate_df_ranking_values( + self, df: pd.DataFrame, candidate_id_map: dict[Candidate, int] | dict[int, Candidate] ) -> pd.DataFrame: ranking_cols = [col for col in df.columns if col.startswith("Ranking_")] @@ -655,11 +649,11 @@ def _translate_df_to_candidate_ids( return translated_df def _init_from_rank_df( - self, df: pd.DataFrame, candidate_id_map: dict[Candidate, str] + self, df: pd.DataFrame, candidate_id_map: dict[Candidate, int] ) -> tuple[ pd.DataFrame, tuple[str, ...], - dict[Candidate, str], + dict[Candidate, int], ]: """ Validate the dataframe and determine the candidates cast. @@ -675,8 +669,8 @@ def _init_from_rank_df( candidates_cast = self.__find_candidates_cast_from_init_rank_df(df) for cand in candidates_cast: if cand not in candidate_id_map: - candidate_id_map[cand] = str(len(candidate_id_map)) - new_df = self._translate_df_to_candidate_ids(df, candidate_id_map) + candidate_id_map[cand] = len(candidate_id_map) + new_df = self._translate_df_ranking_values(df, candidate_id_map) if len(new_df) == 0: self.max_ranking_length = 0 @@ -834,7 +828,7 @@ def __str__(self) -> str: __repr__ = __str__ def __to_rank_csv_header( - self, candidate_mapping: dict[Candidate, str], include_voter_set: bool + self, candidate_mapping: dict[Candidate, int], include_voter_set: bool ) -> list[list]: """ Construct the header rows for the PrefProfile a custom CSV format. @@ -855,7 +849,7 @@ def __to_rank_csv_header( return header def __to_rank_csv_ranking_list( - self, rank_ballot: RankBallot, candidate_mapping: dict[Candidate, str] + self, rank_ballot: RankBallot, candidate_mapping: dict[Candidate, int] ) -> list: """ Create the list of ranking data for a ballot in the profile. @@ -883,7 +877,7 @@ def __to_rank_csv_ballot_row( self, ballot: RankBallot, include_voter_set: bool, - candidate_mapping: dict[Candidate, str], + candidate_mapping: dict[Candidate, int], weight_precision: int, ) -> list[list]: """ @@ -908,7 +902,7 @@ def __to_rank_csv_ballot_row( return row def __to_rank_csv_data_column_names( - self, include_voter_set: bool, candidate_mapping: dict[Candidate, str] + self, include_voter_set: bool, candidate_mapping: dict[Candidate, int] ) -> list: """ Create the data column header. @@ -956,7 +950,7 @@ def to_csv( if len(self.ballots) == 0: raise ProfileError("Cannot write a profile with no ballots to a csv.") - candidate_mapping: dict[Candidate, str] = {c: str(i) for i, c in enumerate(self.candidates)} + candidate_mapping: dict[Candidate, int] = {c: str(i) for i, c in enumerate(self.candidates)} header = self.__to_rank_csv_header(candidate_mapping, include_voter_set) data_col_names = self.__to_rank_csv_data_column_names(include_voter_set, candidate_mapping) From dbfc01798b1bbe07c28ef7e4a8f3a2163d432e06 Mon Sep 17 00:00:00 2001 From: Grace Gibson Date: Wed, 17 Jun 2026 15:33:43 -0500 Subject: [PATCH 03/11] accept int or str candidates --- src/votekit/animations.py | 107 ++++--- src/votekit/ballot.py | 52 ++-- src/votekit/cleaning/rank_ballots_cleaning.py | 9 +- .../cleaning/rank_profiles_cleaning.py | 29 +- src/votekit/elections/election_state.py | 30 +- .../ranking/abstract_ranking.py | 7 +- .../election_types/ranking/alaska.py | 5 +- .../election_types/ranking/plurality_veto.py | 37 ++- .../election_types/ranking/ranked_pairs.py | 3 +- .../election_types/ranking/schulze.py | 3 +- .../ranking/simultaneous_veto.py | 91 +++--- .../ranking/stv/numpy_stv_base.py | 39 +-- .../election_types/ranking/stv/stv.py | 44 +-- src/votekit/elections/transfers.py | 5 +- src/votekit/matrices/candidate/comentions.py | 12 +- src/votekit/models.py | 18 +- src/votekit/plots/bar_plot.py | 169 ++++++----- .../plots/profiles/multi_profile_bar_plot.py | 65 +++-- .../plots/profiles/profile_bar_plot.py | 71 +++-- .../pref_profile/csv_utils/score_csv_utils.py | 8 +- src/votekit/pref_profile/pref_profile.py | 270 ++++++++++++------ src/votekit/pref_profile/utils.py | 23 +- src/votekit/representation_scores.py | 14 +- src/votekit/types.py | 5 +- src/votekit/utils.py | 120 ++++---- .../bloc_slate_generators/test_cambridge.py | 13 +- .../bloc_slate_generators/test_slate_bt.py | 22 +- .../bloc_slate_generators/test_slate_pl.py | 22 +- .../ranking/test_random_dictator.py | 3 +- .../ranking/test_simultaneous_veto.py | 3 +- tests/elections/test_election_model.py | 3 +- 31 files changed, 791 insertions(+), 511 deletions(-) diff --git a/src/votekit/animations.py b/src/votekit/animations.py index 1a75afa7..76dd65ff 100644 --- a/src/votekit/animations.py +++ b/src/votekit/animations.py @@ -9,7 +9,7 @@ from dataclasses import dataclass from enum import IntEnum from pathlib import Path -from typing import List, Literal, Mapping, Optional, Sequence +from typing import List, Literal, Mapping, Optional, Sequence, cast import manim from manim import ( @@ -36,6 +36,7 @@ remove_cand_rank_ballot, ) from votekit.elections.election_types.ranking.stv import STV +from votekit.types import Candidate from votekit.utils import ballots_by_first_cand, mentions @@ -144,17 +145,17 @@ class _EliminationEvent(_AnimationEvent): An animation event representing a round in which a candidate was eliminated. Attributes: - candidate (str): The name of the eliminated candidate. + candidate (Candidate): The name of the eliminated candidate. display_name (str): The candidate name to use for display purposes, such as a nickname. - support_transferred (Mapping[str, float]): A dictionary mapping names of candidates + support_transferred (Mapping[Candidate, float]): A dictionary mapping names of candidates to the amount of support they received from the elimination. round_number (int): The round of the election process associated to this event. """ - candidate: str + candidate: Candidate display_name: str - support_transferred: Mapping[str, float] + support_transferred: Mapping[Candidate, float] round_number: int def get_message(self) -> str: @@ -168,12 +169,12 @@ class _EliminationOffscreenEvent(_AnimationEvent): were eliminated. Attributes: - support_transferred (Mapping[str, float]): A dictionary mapping names of candidates + support_transferred (Mapping[Candidate, float]): A dictionary mapping names of candidates to the total amount of support they received from the eliminations. round_numbers (List[int]): The rounds of the election process associated to this event. """ - support_transferred: Mapping[str, float] + support_transferred: Mapping[Candidate, float] round_numbers: List[int] def get_message(self) -> str: @@ -193,10 +194,10 @@ class _WinEvent(_AnimationEvent): An animation event representing a round in which some number of candidates were elected. Attributes: - candidates (Sequence[str]): The names of the elected candidates. + candidates (Sequence[Candidate]): The names of the elected candidates. display_names (Sequence[str]): The candidate names to use for display purposes, such as nicknames. - support_transferred (Mapping[str, Mapping[str, float]]): A dictionary mapping + support_transferred (Mapping[Candidate, Mapping[str, float]]): A dictionary mapping pairs of candidate names to the amount of support transferred between them this round. For instance, if ``c1`` was elected this round, then ``support_transferred[c1][c2]`` will represent the amount of support @@ -204,9 +205,9 @@ class _WinEvent(_AnimationEvent): round_number (int): The round of the election process associated to this event. """ - candidates: Sequence[str] + candidates: Sequence[Candidate] display_names: Sequence[str] - support_transferred: Mapping[str, Mapping[str, float]] + support_transferred: Mapping[Candidate, Mapping[Candidate, float]] round_number: int def get_message(self) -> str: @@ -229,12 +230,15 @@ class STVAnimation: appearing on-screen. If ``"winners"``, focus only the elected candidates. If ``"viable"``, focus only the candidates with more mentions than the election threshold. If ``"all"``, focus all candidates. Defaults to ``"viable"``. - nicknames (Optional[dict[str,str]], optional): A dictionary mapping candidate names to - candidate "nicknames" to be used in the animation instead. The keys of ``nicknames`` - need not contain every candidate, only the ones for which the user would like to - provide a nickname. - candidate_colors (Optional[Mapping[str, ParsableManimColor]], optional): A dictionary - mapping candidate names to colors that should represent them in the animation. + nicknames (Optional[dict[Candidate,str] | dict[str, str] | dict[int, str]], optional): + A dictionary mapping candidate names to candidate "nicknames" + to be used in the animation instead. + The keys of ``nicknames`` need not contain every candidate, + only the ones for which the user would like to provide a nickname. + candidate_colors (Optional[Mapping[Candidate, ParsableManimColor] + | Mapping[str, ParsableManimColor]] | Mapping[int, ParsableManimColor], optional): + A dictionary mapping candidate names to colors + that should represent them in the animation. The colors in ``candidate_colors`` will override the bar fill colors provided by ``color_palette``. The keys of ``candidate_colors`` need not contain every candidate, only the ones for which the user would like to provide @@ -255,8 +259,9 @@ class STVAnimation: nicknames (dict[str,str], optional): A dictionary mapping candidate names to candidate "nicknames" to be used in the animation instead. color_palette (ColorPalette, optional): A color palette to use for the animation. - candidate_dict (dict[str, dict[str, object]]): A dictionary mapping each candidate's - name to a dictionary recording that candidate's support, display name, and color. + candidate_dict (dict[Candidate, dict[Candidate, object]]): A dictionary mapping + each candidate name to a dictionary + recording that candidate's support, display name, and color. events (List[_AnimationEvent]): A list of animation events in order of occurrence. font (str): The name of a font that the user prefers to use if available. delay_mult (float): A multiplier for the delay times between animations. @@ -273,9 +278,19 @@ def __init__( self, election: STV, title: Optional[str] = None, - focus: set[str] | List[str] | Literal["winners", "viable", "all"] = "viable", - nicknames: Optional[dict[str, str]] = None, - candidate_colors: Optional[Mapping[str, ParsableManimColor]] = None, + focus: set[Candidate] + | set[str] + | set[int] + | List[Candidate] + | List[str] + | List[int] + | Literal["winners", "viable", "all"] = "viable", + nicknames: Optional[dict[Candidate, str] | dict[str, str] | dict[int, str]] = None, + candidate_colors: Optional[ + Mapping[Candidate, ParsableManimColor] + | Mapping[str, ParsableManimColor] + | Mapping[int, ParsableManimColor] + ] = None, color_palette: ColorPalette = DARK_PALETTE, font: str = "", delay_mult: float = 1.0, @@ -340,7 +355,7 @@ def __init__( focus = focus | missing_winners self.focus = focus - self.nicknames = nicknames + self.nicknames: Mapping[Candidate, str] = cast(Mapping[Candidate, str], nicknames) self.color_palette = color_palette self.candidate_dict = self._make_candidate_dict(election, candidate_colors) self.events = self._make_event_list(election) @@ -355,8 +370,12 @@ def __init__( self.animation_duration = animation_duration def _make_candidate_dict( - self, election: STV, candidate_colors: Mapping[str, ParsableManimColor] - ) -> dict[str, dict[str, object]]: + self, + election: STV, + candidate_colors: Mapping[Candidate, ParsableManimColor] + | Mapping[str, ParsableManimColor] + | Mapping[int, ParsableManimColor], + ) -> dict[Candidate, dict[str, object]]: """ Create a dictionary sending candidate names to dictionaries recording that candidate's support, display name, and color. @@ -367,11 +386,11 @@ def _make_candidate_dict( names to their associated color codes in the candidate dictionary. Returns: - dict[str, dict[str,object]]: A dictionary whose keys are candidate names and whose + dict[Candidate, dict[str,object]]: A dictionary whose keys are candidate names and whose values are themselves dictionaries with details about each candidate. """ # Initialize dictionary and add "support" key for each candidate. - candidate_dict: dict[str, dict[str, object]] = { + candidate_dict: dict[Candidate, dict[str, object]] = { name: {"support": support} for name, support in election.election_states[0].scores.items() if name in self.focus @@ -387,9 +406,10 @@ def _make_candidate_dict( # Determine candidate color num_default_colors = len(self.color_palette.bar_fills) color_index = 0 + _candidate_colors = cast(Mapping[Candidate, ParsableManimColor], candidate_colors) for name in candidate_dict.keys(): - if name in candidate_colors.keys(): - candidate_dict[name]["color"] = candidate_colors[name] + if name in _candidate_colors.keys(): + candidate_dict[name]["color"] = _candidate_colors[name] else: candidate_dict[name]["color"] = self.color_palette.bar_fills[ color_index % num_default_colors @@ -418,7 +438,7 @@ def _make_event_list(self, election: STV) -> List[_AnimationEvent]: eliminated_candidates = [c for s in election_round.eliminated for c in s] if len(elected_candidates) > 0: # Win round - support_transferred: dict[str, dict[str, float]] = {} + support_transferred: dict[Candidate, dict[Candidate, float]] = {} if round_number == len(election): # If it's the last round, don't worry about the transferred votes support_transferred = {cand: {} for cand in elected_candidates} @@ -477,9 +497,9 @@ def _get_transferred_votes( self, election: STV, round_number: int, - cands_transferred_from: List[str], + cands_transferred_from: List[Candidate], event_type: Literal["win", "elimination"], - ) -> dict[str, dict[str, float]]: + ) -> dict[Candidate, dict[Candidate, float]]: """ Compute the number of votes transferred from elected or eliminated candidates to remaining candidates. @@ -487,14 +507,15 @@ def _get_transferred_votes( Args: election (STV): The election. round_number (int): The number of the round in question. - cands_transferred_from (List[str]): A list of the names of the elected or + cands_transferred_from (List[Candidate]): A list of the names of the elected or eliminated candidates. event_type (Literal["win", "elimination"]): ``"win"`` if candidates were elected this round, ``"elimination"`` otherwise. Returns: - dict[str, dict[str, float]]: A nested dictionary. If ``d`` is the return value, - ``c1`` was a candidate eliminated this round, and ``c2`` is a remaining candidate, + dict[Candidaate, dict[Candidate, float]]: A nested dictionary. + If ``d`` is the return value, ``c1`` was a candidate eliminated this round, + and ``c2`` is a remaining candidate, then ``d[c1][c2]`` will be the total support transferred this round from candidate ``c1`` to candidate ``c2``. @@ -506,7 +527,7 @@ def _get_transferred_votes( prev_profile, prev_state = election.get_step(round_number - 1) current_state = election.election_states[round_number] - transfers: dict[str, dict[str, float]] = {} + transfers: dict[Candidate, dict[Candidate, float]] = {} if event_type == "elimination": assert len(cands_transferred_from) == 1, ( "Tried to compute transferred votes in a round " @@ -532,7 +553,7 @@ def _get_transferred_votes( condense_rank_ballot(remove_cand_rank_ballot(cands_transferred_from, b)) for b in new_ballots ] - transfer_weights_from_candidate: dict[str, float] = defaultdict(float) + transfer_weights_from_candidate: dict[Candidate, float] = defaultdict(float) for ballot in clean_ballots: if ballot.ranking is not None: (to_candidate,) = ballot.ranking[0] @@ -587,7 +608,7 @@ def _compose_offscreen_eliminations( _EliminationOffscreenEvent: One offscreen elimination event summarizing ``event1`` and ``event2``. """ - support_transferred: dict[str, float] = defaultdict(float) + support_transferred: dict[Candidate, float] = defaultdict(float) for key, value in event1.support_transferred.items(): support_transferred[key] += value for key, value in event2.support_transferred.items(): @@ -685,8 +706,8 @@ class ElectionScene(manim.Scene): instantiated directly. Args: - candidate_dict (dict[str,dict]): A dictionary mapping each candidate to a dictionary of - attributes of the candidate. + candidate_dict (dict[Candidate,dict]): A dictionary mapping each candidate + to a dictionary of attributes of the candidate. events (List[_AnimationEvent]): A list of animation events to be constructed and rendered. title (Optional[str], optional): A string to be displayed at the beginning of the animation as a title screen. If ``None``, the animation will skip the title @@ -703,7 +724,7 @@ class ElectionScene(manim.Scene): def __init__( self, - candidate_dict: dict[str, dict], + candidate_dict: dict[Candidate, dict], events: List[_AnimationEvent], title: Optional[str] = None, color_palette: ColorPalette = DARK_PALETTE, @@ -1050,12 +1071,12 @@ def _update_quota_line(self, quota: float) -> None: run_time=self.animation_duration, ) - def _animate_win(self, cands_transferred_from: dict[str, dict], event: _WinEvent) -> None: + def _animate_win(self, cands_transferred_from: dict[Candidate, dict], event: _WinEvent) -> None: """ Animate a round in which one or more candidates are elected. Args: - cands_transferred_from (dict[str,dict]): A dictionary in which the keys are the + cands_transferred_from (dict[Candidate,dict]): A dictionary in which the keys are the candidates elected this round and the values are dictionaries recording the candidate's attributes. event (_WinEvent): The event to be animated. diff --git a/src/votekit/ballot.py b/src/votekit/ballot.py index 700d210f..f8378a8b 100644 --- a/src/votekit/ballot.py +++ b/src/votekit/ballot.py @@ -1,12 +1,15 @@ from __future__ import annotations from numbers import Real -from typing import Iterable, Optional, Sequence, TypeAlias, Union, overload +from typing import Iterable, Mapping, Optional, Sequence, TypeAlias, Union, overload from votekit.types import Candidate Ranking: TypeAlias = Optional[tuple[frozenset[Candidate], ...]] RankingLike: TypeAlias = Optional[Sequence[Candidate | Iterable[Candidate]]] +ScoresLike: TypeAlias = Optional[ + Mapping[Candidate, float | int] | Mapping[str, float | int] | Mapping[int, float | int] +] class Ballot: @@ -14,25 +17,28 @@ class Ballot: Ballot parent class, contains voter set and assigned weight. Args: - ranking (Optional[Sequence[Candidate | Iterable[Candidate]]]): Candidate ranking. + ranking (Optional[Sequence[str | int | Iterable[str | int]]]): Candidate ranking. Entry i of the sequence is a candidate or iterable of candidates ranked in position i. Candidate can be represented as a str or int. Allow mix of types in candidate set. - Defaults to None. Will be coerced to tuple[frozenset[Candidate], ...]. + Defaults to None. Will be coerced to tuple[frozenset[str | int], ...]. weight (Union[float, int]): Weight assigned to a given ballot. Defaults to 1.0 Can be input as int or float, and will be coerced to float. voter_set (Union[set[str], frozenset[str]]): Set of voters who cast the ballot. Defaults to frozenset(). Will be coerced to frozenset. - scores (Optional[dict[Candidate, Union[int, float]]): Scores for individual candidates. - Defaults to None. Values can be input as int or float but will be coerced to float. + scores (Optional[Mapping[str | int, float | int] | Mapping[str, float | int] + | Mapping[int, float | int]]): Scores for individual candidates. Defaults to None. + Values can be input as int or float but will be coerced to float. + Candidates can be strings, integers, or a mix of both. + Stored internally as a dict[str | int, float]. Only retains non-zero scores. Attributes: - ranking (Optional[tuple[frozenset[Candidate], ...]]): Tuple of candidate ranking. + ranking (Optional[tuple[frozenset[str | int], ...]]): Tuple of candidate ranking. Entry i of the tuple is a frozenset of candidates ranked in position i. weight (float): Weight assigned to a given ballot. voter_set (frozenset[str]): Set of voters who cast the ballot. - scores (Optional[dict[Candidate, float]]): Scores for individual candidates. + scores (Optional[Mapping[str | int, float | int]): Scores for individual candidates. Raises: TypeError: Only one of ranking or scores can be provided. @@ -63,7 +69,7 @@ def __new__( cls, *, ranking: None = None, - scores: dict[Candidate, Union[int, float]], + scores: ScoresLike, weight: Union[float, int] = 1.0, voter_set: Union[set[str], frozenset[str]] = frozenset(), ) -> ScoreBallot: ... @@ -72,8 +78,8 @@ def __new__( def __new__( cls, *, - ranking: Optional[Sequence[Candidate | Iterable[Candidate]]] = None, - scores: Optional[dict[Candidate, Union[int, float]]] = None, + ranking: RankingLike = None, + scores: ScoresLike = None, weight: Union[float, int] = 1.0, voter_set: Union[set[str], frozenset[str]] = frozenset(), ) -> Ballot: ... @@ -81,8 +87,8 @@ def __new__( def __new__( cls, *, - ranking: Optional[Sequence[Candidate | Iterable[Candidate]]] = None, - scores: Optional[dict[Candidate, Union[int, float]]] = None, + ranking: RankingLike = None, + scores: ScoresLike = None, weight: Union[float, int] = 1.0, voter_set: Union[set[str], frozenset[str]] = frozenset(), ): @@ -98,8 +104,8 @@ def __new__( def __init__( self, *, - ranking: Optional[Sequence[Candidate | Iterable[Candidate]]] = None, - scores: Optional[dict[Candidate, Union[int, float]]] = None, + ranking: RankingLike = None, + scores: ScoresLike = None, weight: Union[float, int] = 1.0, voter_set: Union[set[str], frozenset[str]] = frozenset(), ): @@ -155,12 +161,14 @@ class RankBallot(Ballot): Args: ranking (RankingLike): Ranking of candidates, defaults to None. + RankingLike = Sequence[str | int | Iterable[str | int]] | None weight (Union[int, float]): Weight of the ballot, defaults to 1.0. voter_set (Union[set[str], frozenset[str]]): Voter set of the ballot, defaults to frozenset(). Attributes: - ranking (RankingLike): Ranking of candidates. + ranking (Ranking): Ranking of candidates. + Ranking = tuple[frozenset[str | int], ...] | None weight (float): Weight of the ballot. voter_set (frozenset[str]): Voter set of the ballot. @@ -173,7 +181,7 @@ def __init__( self, *, ranking: RankingLike = None, - scores: Optional[dict[Candidate, Union[int, float]]] = None, + scores: ScoresLike = None, weight: Union[int, float] = 1.0, voter_set: Union[set[str], frozenset[str]] = frozenset(), ): @@ -254,13 +262,15 @@ class ScoreBallot(Ballot): Class to handle ballots with scores. Strips whitespace from candidate names. Args: - scores (Optional[dict[str, Union[int, float]]]): Scores of candidates, defaults to None. + scores (ScoresLike): Scores of candidates, defaults to None. + ScoresLike = Mapping[str | int, int | float] | Mapping[str, int | float] + | Mapping[int, int | float] | None weight (Union[int, float]): Weight of the ballot, defaults to 1.0. voter_set (Union[set[str], frozenset[str]]): Voter set of the ballot, defaults to frozenset(). Attributes: - scores (Optional[dict[str, float]]): Scores of candidates. + scores (Optional[dict[str | int, float]]): Scores of candidates. weight (float): Weight of the ballot. voter_set (frozenset[str]): Voter set of the ballot. @@ -274,7 +284,7 @@ def __init__( self, *, ranking: RankingLike = None, - scores: Optional[dict[Candidate, Union[int, float]]] = None, + scores: ScoresLike = None, weight: Union[int, float] = 1.0, voter_set: Union[set[str], frozenset[str]] = frozenset(), ): @@ -287,7 +297,7 @@ def __init__( super().__init__(weight=weight, voter_set=voter_set) def _convert_scores_to_float_strip_whitespace( - self, scores: Optional[dict[Candidate, float]] + self, scores: ScoresLike ) -> Optional[dict[Candidate, float]]: if scores is None: return None @@ -299,7 +309,7 @@ def _convert_scores_to_float_strip_whitespace( c.strip() if isinstance(c, str) else c: float(s) for c, s in scores.items() if s != 0 } - def _validate_scores_candidates(self, scores: Optional[dict[Candidate, Union[int, float]]]): + def _validate_scores_candidates(self, scores: ScoresLike): if scores is not None: if "~" in scores: raise ValueError( diff --git a/src/votekit/cleaning/rank_ballots_cleaning.py b/src/votekit/cleaning/rank_ballots_cleaning.py index ce80b1b1..8ea0a15c 100644 --- a/src/votekit/cleaning/rank_ballots_cleaning.py +++ b/src/votekit/cleaning/rank_ballots_cleaning.py @@ -1,6 +1,5 @@ -from typing import Union - from votekit.ballot import RankBallot +from votekit.types import Candidate def condense_rank_ballot( @@ -80,20 +79,20 @@ def remove_repeat_cands_rank_ballot( def remove_cand_rank_ballot( - removed: Union[str, list], + removed: Candidate | list[Candidate], ballot: RankBallot, ) -> RankBallot: """ Removes specified candidate(s) from ballot. Does not condense the resulting ballot. Args: - removed (Union[str, list]): Candidate or list of candidates to be removed. + removed (str | int | list[str | int]): Candidate or list of candidates to be removed. ballot (RankBallot): Ballot to remove candidates from. Returns: RankBallot: Ballot with candidate(s) removed. """ - if isinstance(removed, str): + if isinstance(removed, Candidate): removed = [removed] new_ranking = [] diff --git a/src/votekit/cleaning/rank_profiles_cleaning.py b/src/votekit/cleaning/rank_profiles_cleaning.py index 3bd66a7f..c33be372 100644 --- a/src/votekit/cleaning/rank_profiles_cleaning.py +++ b/src/votekit/cleaning/rank_profiles_cleaning.py @@ -1,5 +1,5 @@ from functools import partial -from typing import Callable, Union +from typing import Callable import numpy as np import pandas as pd @@ -9,6 +9,7 @@ ProfileError, RankProfile, ) +from votekit.types import Candidate def _iterate_and_clean_ranking_tuples( @@ -195,20 +196,21 @@ def remove_repeat_cands_rank_profile( def remove_cand_from_ranking_row( - removed: Union[str, list], + removed: Candidate | list[Candidate] | list[str] | list[int], ranking_tup: tuple[frozenset, ...], ) -> tuple[frozenset, ...]: """ Removes specified candidate(s) from ranking. Does not condense the resulting ranking. Args: - removed (Union[str, list]): Candidate or list of candidates to be removed. + removed (str | int | list[str | int] | list[str] | list[int]): + Candidate or list of candidates to be removed. ranking_tup (tuple): Ranking to remove candidates from. Returns: tuple: Ranking with candidate(s) removed. """ - if isinstance(removed, str): + if isinstance(removed, Candidate): removed = [removed] removed_set = set(removed) @@ -223,7 +225,7 @@ def remove_cand_from_ranking_row( def remove_cand_rank_profile( - removed: Union[str, list], + removed: Candidate | list[Candidate], profile: RankProfile, remove_empty_ballots: bool = True, remove_zero_weight_ballots: bool = True, @@ -237,7 +239,7 @@ def remove_cand_rank_profile( is handled correctly. Args: - removed (Union[str, list]): Candidate or list of candidates to be removed. + removed (str | int | list[str | int]): Candidate or list of candidates to be removed. profile (RankProfile): Profile to remove candidates from. remove_empty_ballots (bool, optional): Whether or not to remove ballots that have no ranking or scores as a result of cleaning. Defaults to True. @@ -254,7 +256,7 @@ def remove_cand_rank_profile( Raises: ProfileError: Profile must only contain ranked ballots. """ - if isinstance(removed, str): + if isinstance(removed, Candidate): removed = [removed] cleaned_profile = clean_rank_profile( @@ -397,7 +399,9 @@ def condense_rank_profile( ) -def _is_equiv_for_remove_and_condense(removed: list[str], ranking: pd.Series) -> bool: +def _is_equiv_for_remove_and_condense( + removed: list[Candidate] | list[str] | list[int], ranking: pd.Series +) -> bool: """ Returns True if the given ranking is equivalent to its removed and condensed form. It is equivalent if the ranking has no candidate in the removed list and either no empty @@ -405,7 +409,7 @@ def _is_equiv_for_remove_and_condense(removed: list[str], ranking: pd.Series) -> in the removed list, it is not equivalent. Args: - removed (list[str]): Candidates to be removed. + removed (list[str | int] | list[str] | list[int]): Candidates to be removed. ranking (pd.Series): Ranking to check. Returns: @@ -437,7 +441,7 @@ def _is_equiv_for_remove_and_condense(removed: list[str], ranking: pd.Series) -> def remove_and_condense_rank_profile( - removed: Union[str, list], + removed: Candidate | list[Candidate] | list[str] | list[int], profile: RankProfile, remove_empty_ballots: bool = True, remove_zero_weight_ballots: bool = True, @@ -457,7 +461,8 @@ def remove_and_condense_rank_profile( is handled correctly, and that ballot equivalence is checked. Args: - removed (Union[str, list]): Candidate or list of candidates to be removed. + removed (str | int | list[str | int] | list[str] | list[int]): + Candidate or list of candidates to be removed. profile (RankProfile): Profile to remove repeated candidates from. remove_empty_ballots (bool, optional): Whether or not to remove ballots that have no ranking or scores as a result of cleaning. Defaults to True. @@ -472,7 +477,7 @@ def remove_and_condense_rank_profile( CleanedRankProfile: A cleaned ``RankProfile``. """ - if isinstance(removed, str): + if isinstance(removed, Candidate): removed = [removed] cleaned_profile = clean_rank_profile( diff --git a/src/votekit/elections/election_state.py b/src/votekit/elections/election_state.py index 93b5bc28..7eec0497 100644 --- a/src/votekit/elections/election_state.py +++ b/src/votekit/elections/election_state.py @@ -1,6 +1,8 @@ from dataclasses import dataclass, field from typing import Any +from votekit.types import Candidate + @dataclass class ElectionState: @@ -11,26 +13,28 @@ class ElectionState: Attributes: round_number (int, optional): Round number, defaults to 0. - remaining (tuple[frozenset[str],...], optional): Remaining candidates, ordered to indicate - ranking, frozensets to indicate ties. Defaults to tuple with one empty set. - elected (tuple[frozenset[str],...], optional): Elected candidates, ordered to indicate - ranking, frozensets to indicate ties. Defaults to tuple with one empty set. - eliminated (tuple[frozenset[str],...], optional): Eliminated candidates, ordered to indicate - ranking, frozensets to indicate ties. Defaults to tuple with one empty set. - tiebreaks (dict[frozenset[str], tuple[frozenset[str],...]], optional): Stores + remaining (tuple[frozenset[Candidate],...], optional): Remaining candidates, ordered to + indicate ranking, frozensets to indicate ties. Defaults to tuple with one empty set. + elected (tuple[frozenset[Candidate],...], optional): Elected candidates, ordered to + indicate ranking, frozensets to indicate ties. Defaults to tuple with one empty set. + eliminated (tuple[frozenset[Candidate],...], optional): Eliminated candidates, ordered to + indicate ranking, frozensets to indicate ties. Defaults to tuple with one empty set. + tiebreaks (dict[frozenset[Candidate], tuple[frozenset[Candidate],...]], optional): Stores tiebreak resolutions. Keys are frozensets of tied candidates and values are resolutions of tiebreak. Defaults to empty dictionary. - scores(dict[str, float], optional): Stores score information. + scores(dict[Candidate, float], optional): Stores score information. Keys are candidates, values are scores. Only remaining candidates should be stored. """ round_number: int = 0 - remaining: tuple[frozenset[str], ...] = (frozenset(),) - elected: tuple[frozenset[str], ...] = (frozenset(),) - eliminated: tuple[frozenset[str], ...] = (frozenset(),) - tiebreaks: dict[frozenset[str], tuple[frozenset[str], ...]] = field(default_factory=dict) - scores: dict[str, float] = field(default_factory=dict) + remaining: tuple[frozenset[Candidate], ...] = (frozenset(),) + elected: tuple[frozenset[Candidate], ...] = (frozenset(),) + eliminated: tuple[frozenset[Candidate], ...] = (frozenset(),) + tiebreaks: dict[frozenset[Candidate], tuple[frozenset[Candidate], ...]] = field( + default_factory=dict + ) + scores: dict[Candidate, float] = field(default_factory=dict) def to_dict(self) -> dict[str, Any]: """ diff --git a/src/votekit/elections/election_types/ranking/abstract_ranking.py b/src/votekit/elections/election_types/ranking/abstract_ranking.py index aac975a4..7a41176b 100644 --- a/src/votekit/elections/election_types/ranking/abstract_ranking.py +++ b/src/votekit/elections/election_types/ranking/abstract_ranking.py @@ -4,6 +4,7 @@ from votekit.elections.election_state import ElectionState from votekit.models import Election from votekit.pref_profile import ProfileError, RankProfile +from votekit.types import Candidate class RankingElection(Election[RankProfile]): @@ -12,7 +13,7 @@ class RankingElection(Election[RankProfile]): Args: profile (RankProfile): The initial profile of ballots. - score_function (Callable[[RankProfile], dict[str, float]], optional): + score_function (Callable[[RankProfile], dict[str | float, float]], optional): A function that converts profiles to a score dictionary mapping candidates to their current score. Used in creating ElectionState objects and sorting candidates in Round 0. If None, no score dictionary is saved and all candidates are tied in Round 0. @@ -24,7 +25,7 @@ class RankingElection(Election[RankProfile]): election_states (list[ElectionState]): a list of election states, one for each round of the election. The list is 0 indexed, so the initial state is stored at index 0, round 1 at 1, etc. - score_function (Callable[[RankProfile], dict[str, float]], optional): + score_function (Callable[[RankProfile], dict[str | int, float]], optional): A function that converts profiles to a score dictionary mapping candidates to their current score. Used in creating ElectionState objects. Defaults to None. length (int): the number of rounds of the election. @@ -34,7 +35,7 @@ def __init__( self, profile: RankProfile, n_seats: int = 1, - score_function: Optional[Callable[[RankProfile], dict[str, float]]] = None, + score_function: Optional[Callable[[RankProfile], dict[Candidate, float]]] = None, sort_high_low: bool = True, ): if n_seats <= 0: diff --git a/src/votekit/elections/election_types/ranking/alaska.py b/src/votekit/elections/election_types/ranking/alaska.py index 9ae58b31..5a3c60ce 100644 --- a/src/votekit/elections/election_types/ranking/alaska.py +++ b/src/votekit/elections/election_types/ranking/alaska.py @@ -12,6 +12,7 @@ from votekit.elections.election_types.ranking.stv.stv import STV from votekit.elections.transfers import fractional_transfer from votekit.pref_profile import RankProfile +from votekit.types import Candidate from votekit.utils import first_place_votes from .abstract_ranking import RankingElection @@ -28,7 +29,7 @@ class Alaska(RankingElection): round. Defaults to 2. m_2 (int, optional): Number of seats to elect in STV round, i.e. number of overall winners. Defaults to 1. - transfer (Callable[[str, float], Union[tuple[Ballot], list[Ballot]], int], + transfer (Callable[[str | int, float], Union[tuple[Ballot], list[Ballot]], int], tuple[Ballot,...]], optional): Transfer method. Defaults to fractional transfer. Function signature is elected candidate, their number of first-place votes, the list of @@ -55,7 +56,7 @@ def __init__( m_1: int = 2, m_2: int = 1, transfer: Callable[ - [str, float, Union[tuple[RankBallot], list[RankBallot]], int], + [Candidate, float, Union[tuple[RankBallot], list[RankBallot]], int], tuple[RankBallot, ...], ] = fractional_transfer, quota: QuotaType | None = "droop", diff --git a/src/votekit/elections/election_types/ranking/plurality_veto.py b/src/votekit/elections/election_types/ranking/plurality_veto.py index 3097d2cf..89f3ca79 100644 --- a/src/votekit/elections/election_types/ranking/plurality_veto.py +++ b/src/votekit/elections/election_types/ranking/plurality_veto.py @@ -10,6 +10,7 @@ from votekit.elections.election_state import ElectionState from votekit.elections.election_types.ranking.abstract_ranking import RankingElection from votekit.pref_profile import RankProfile +from votekit.types import Candidate from votekit.utils import ( first_place_votes, score_dict_to_ranking, @@ -283,14 +284,16 @@ def _reset(self): resets veto caches, and empties _eliminated. """ self._internal_round_number = 0 - self._eliminated = set("~") + self._eliminated: set[Candidate] = set("~") self._voter_order_current_index = 0 self._veto_position_cache = [None for _ in range(self._n_ballots)] if self.tiebreak != "random": self._veto_cache = ["" for _ in range(self._n_ballots)] @abstractmethod - def _veto_loop(self, scores: dict[str, float]) -> tuple[frozenset[str], frozenset[str]]: + def _veto_loop( + self, scores: dict[Candidate, float] + ) -> tuple[frozenset[Candidate], frozenset[Candidate]]: """ Abstract method for veto loop to be defined by subclasses. @@ -302,10 +305,10 @@ def _veto_loop(self, scores: dict[str, float]) -> tuple[frozenset[str], frozense Ties will be broken in _run_step. Args: - scores (dict[str, float]): Mutable score dict, modified in place. + scores (dict[str | int, float]): Mutable score dict, modified in place. Returns: - tuple[frozenset[str], frozenset[str]]: A tuple of (eliminated, elected), + tuple[frozenset[str | int], frozenset[str | int]]: A tuple of (eliminated, elected), where eliminated contains candidates worthy of elimination and elected contains candidates worthy of election. """ @@ -346,7 +349,7 @@ def _run_step( remaining_set = self.candidates - self._eliminated if len(remaining_set) == self.n_seats: electable_candidates = remaining_set - eliminated_set: frozenset[str] = frozenset() + eliminated_set: frozenset[Candidate] = frozenset() else: eliminated_set, electable_candidates = self._veto_loop(new_scores) @@ -430,22 +433,24 @@ class PluralityVeto(_IterativeVetoBase): - a ballot has non-integer weight. """ - def _veto_loop(self, scores: dict[str, float]) -> tuple[frozenset[str], frozenset[str]]: + def _veto_loop( + self, scores: dict[Candidate, float] + ) -> tuple[frozenset[Candidate], frozenset[Candidate]]: """ Processes vetoes until some candidate's score reaches zero. Each voter decrements the score of their least favorite remaining candidate. Args: - scores (dict[str, float]): Mutable score dict, modified in place. + scores (dict[str | int, float]): Mutable score dict, modified in place. Returns: - tuple[frozenset[str], frozenset[str]]: A tuple of (eliminated, elected), + tuple[frozenset[str | int], frozenset[str | int]]: A tuple of (eliminated, elected), where each is a set of candidates worthy of elimination or election, respectively. """ - eliminated: set[str] = set() - elected: frozenset[str] = frozenset() + eliminated: set[Candidate] = set() + elected: frozenset[Candidate] = frozenset() if self._internal_round_number == 0: eliminated.update(c for c, score in scores.items() if score <= 0) @@ -508,7 +513,9 @@ class SerialVeto(_IterativeVetoBase): - a ballot has non-integer weight. """ - def _veto_loop(self, scores: dict[str, float]) -> tuple[frozenset[str], frozenset[str]]: + def _veto_loop( + self, scores: dict[Candidate, float] + ) -> tuple[frozenset[Candidate], frozenset[Candidate]]: """ Processes vetoes until some candidate is eliminated or all vetoes have been processed. @@ -517,14 +524,14 @@ def _veto_loop(self, scores: dict[str, float]) -> tuple[frozenset[str], frozense If all vetoes are processed, elects all remaining candidates. Args: - scores (dict[str, float]): Mutable score dict, modified in place. + scores (dict[str | int, float]): Mutable score dict, modified in place. Returns: - tuple[frozenset[str], frozenset[str]]: A tuple of (eliminated, elected), + tuple[frozenset[str | int], frozenset[str | int]]: A tuple of (eliminated, elected), where each is a set of candidates worthy of elimination or election, respectively. """ - eliminated: set[str] = set() - elected: frozenset[str] = frozenset() + eliminated: set[Candidate] = set() + elected: frozenset[Candidate] = frozenset() while self._voter_order_current_index < len(self._voter_order): voter_idx = self._voter_order[self._voter_order_current_index] ballot_idx = self._get_ballot_idx(voter_idx) diff --git a/src/votekit/elections/election_types/ranking/ranked_pairs.py b/src/votekit/elections/election_types/ranking/ranked_pairs.py index 117a9f67..d8cf9d80 100644 --- a/src/votekit/elections/election_types/ranking/ranked_pairs.py +++ b/src/votekit/elections/election_types/ranking/ranked_pairs.py @@ -8,6 +8,7 @@ pairwise_dict, ) from votekit.pref_profile import RankProfile +from votekit.types import Candidate from votekit.utils import tiebreak_set @@ -45,7 +46,7 @@ def __init__( n_seats = 1 self.tiebreak = tiebreak - def quick_tiebreak_candidates(profile: RankProfile) -> dict[str, float]: + def quick_tiebreak_candidates(profile: RankProfile) -> dict[Candidate, float]: candidate_set = frozenset(profile.candidates) tiebroken_candidates = tiebreak_set(candidate_set, tiebreak=self.tiebreak) diff --git a/src/votekit/elections/election_types/ranking/schulze.py b/src/votekit/elections/election_types/ranking/schulze.py index 81f1b07a..15a00042 100644 --- a/src/votekit/elections/election_types/ranking/schulze.py +++ b/src/votekit/elections/election_types/ranking/schulze.py @@ -9,6 +9,7 @@ pairwise_dict, ) from votekit.pref_profile import RankProfile +from votekit.types import Candidate from votekit.utils import tiebreak_set @@ -50,7 +51,7 @@ def __init__( n_seats = 1 self.tiebreak = tiebreak - def quick_tiebreak_candidates(profile: RankProfile) -> dict[str, float]: + def quick_tiebreak_candidates(profile: RankProfile) -> dict[Candidate, float]: candidate_set = frozenset(profile.candidates) tiebroken_candidates = tiebreak_set(candidate_set, tiebreak=self.tiebreak) diff --git a/src/votekit/elections/election_types/ranking/simultaneous_veto.py b/src/votekit/elections/election_types/ranking/simultaneous_veto.py index 500cbd74..549fad42 100644 --- a/src/votekit/elections/election_types/ranking/simultaneous_veto.py +++ b/src/votekit/elections/election_types/ranking/simultaneous_veto.py @@ -12,6 +12,7 @@ from votekit.elections.election_state import ElectionState from votekit.elections.election_types.ranking.abstract_ranking import RankingElection from votekit.pref_profile import RankProfile +from votekit.types import Candidate, CandidateFloatDictLike from votekit.utils import ( borda_scores, first_place_votes, @@ -29,7 +30,7 @@ class _SVState: veto_matrix: np.ndarray veto_position_cache: np.ndarray scores: np.ndarray - eliminated: frozenset[str] + eliminated: frozenset[Candidate] class SimultaneousVeto(RankingElection): @@ -49,10 +50,12 @@ class SimultaneousVeto(RankingElection): Args: profile (RankProfile): Profile to run election on. n_seats (int, optional): Number of seats to elect. Defaults to 1. - candidate_weights (Literal['first_place', 'uniform', 'borda', 'harmonic'] | dict[str, float] - | int, optional): Initial candidate scores. 'first_place' means candidates begin with - their first-place vote count. 'uniform' means all candidates begin with the same - score. 'borda' means candidates begin with their Borda scores. If a dictionary, + candidate_weights (Literal['first_place', 'uniform', 'borda', 'harmonic'] + | dict[str | int, float] | dict[str, float] | dict[int, float] | int, optional): + Initial candidate scores. + 'first_place' means candidates begin with their first-place vote count. + 'uniform' means all candidates begin with the same score. + 'borda' means candidates begin with their Borda scores. If a dictionary, keys are candidates and values are initial scores; a score must be provided for every candidate. If an integer k, candidates begin with their top-k vote count. Defaults to "first_place". @@ -95,7 +98,7 @@ def __init__( profile: RankProfile, n_seats: int | None = None, candidate_weights: ( - Literal["first_place", "uniform", "borda", "harmonic"] | dict[str, float] | int + Literal["first_place", "uniform", "borda", "harmonic"] | CandidateFloatDictLike | int ) = "first_place", tiebreak: Literal[ "first_place", "random", "borda", "remaining_score", "veto_pressure", "lex" @@ -121,7 +124,7 @@ def __init__( grouped_profile = profile.group_ballots() self._df = grouped_profile.df.copy() self.candidates = frozenset(grouped_profile.candidates_cast) - self._eliminated: set[str] = set("~") + self._eliminated: set[Candidate] = set("~") self._sorted_candidates = tuple(sorted(self.candidates)) self._candidate_to_idx = {c: i for i, c in enumerate(self._sorted_candidates)} @@ -155,12 +158,12 @@ def __init__( super().__init__(grouped_profile, n_seats=n_seats, score_function=score_func) - def _compute_scores_dict(self) -> dict[str, float]: + def _compute_scores_dict(self) -> dict[Candidate, float]: """ Converts self._scores (np.array) to dict[str, float]. Returns: - dict[str, float]: Dictionary mapping candidates to scores. + dict[str | int, float]: Dictionary mapping candidates to scores. """ return { cand: self._scores[idx] @@ -213,18 +216,20 @@ def _sv_validate_input( ) def _make_score_function( - self, candidate_weights: str | dict[str, float] | int - ) -> Callable[[RankProfile], dict[str, float]]: + self, + candidate_weights: str | CandidateFloatDictLike | int, + ) -> Callable[[RankProfile], dict[Candidate, float]]: """ Converts ``candidate_weights`` into a callable function. This function is used to generate initial scores and is also passed to super().__init__. Args: - candidate_weights (str | dict[str, float] | int): - How to initialize candidate scores. 'first_place' means candidates begin with their - first-place vote count. 'uniform' means all candidates begin with the same - score. 'borda' means candidates begin with their Borda scores. If a dictionary, + candidate_weights (str | dict[str | int, float] | dict[str, float] + | dict[int, float | int): How to initialize candidate scores. + 'first_place' means candidates begin with their first-place vote count. + 'uniform' means all candidates begin with the same score. + 'borda' means candidates begin with their Borda scores. If a dictionary, keys are candidates and values are initial scores; a score must be provided for every candidate. If an integer k, candidates begin with their top-k vote count. @@ -262,13 +267,13 @@ def _make_score_function( return partial(borda_scores, tie_convention=self.scoring_tie_convention) case "uniform": - def uniform_weights(profile: RankProfile) -> dict[str, float]: + def uniform_weights(profile: RankProfile) -> dict[Candidate, float]: return {c: 1.0 for c in profile.candidates} return uniform_weights case "harmonic" | "dowdall": - def harmonic_weights(profile: RankProfile) -> dict[str, float]: + def harmonic_weights(profile: RankProfile) -> dict[Candidate, float]: assert profile.max_ranking_length is not None harmonic_score_vector = [1 / (i + 1) for i in range(profile.max_ranking_length)] return score_dict_from_score_vector( @@ -295,7 +300,7 @@ def harmonic_weights(profile: RankProfile) -> dict[str, float]: f"The following candidates were missing: {missing_cands}" ) raise ValueError(msg) - custom_weight_map: dict[str, float] = {} + custom_weight_map: dict[Candidate, float] = {} for candidate in self.candidates: raw_weight = candidate_weights[candidate] if not isinstance(raw_weight, Real) or isinstance(raw_weight, bool): @@ -305,7 +310,7 @@ def harmonic_weights(profile: RankProfile) -> dict[str, float]: ) custom_weight_map[candidate] = float(raw_weight) - def custom_weights(profile: RankProfile) -> dict[str, float]: + def custom_weights(profile: RankProfile) -> dict[Candidate, float]: return {c: custom_weight_map[c] for c in profile.candidates} return custom_weights @@ -331,12 +336,12 @@ def _initialize_veto_matrix(self, n_candidates: int, n_ballots: int) -> np.ndarr veto_matrix[veto_indices, ballot_idx] = veto_weight return veto_matrix - def _update_veto_matrix(self, candidate: str): + def _update_veto_matrix(self, candidate: Candidate): """ Updates veto matrix in place by redistributing veto pressure from an eliminated candidate. Args: - candidate (str): Candidate being eliminated. + candidate (str | int): Candidate being eliminated. """ candidate_idx = self._candidate_to_idx[candidate] ballots_to_update = np.flatnonzero(self._veto_matrix[candidate_idx]) @@ -352,7 +357,7 @@ def _update_veto_matrix(self, candidate: str): veto_weight /= len(veto_indices) self._veto_matrix[veto_indices, ballot_idx] += veto_weight - def _get_vetoes(self, ballot_idx: np.intp) -> frozenset[str]: + def _get_vetoes(self, ballot_idx: np.intp) -> frozenset[Candidate]: """ Given a ballot index, returns the candidate(s) to veto. @@ -368,7 +373,7 @@ def _get_vetoes(self, ballot_idx: np.intp) -> frozenset[str]: self._df containing that ballot. Returns: - frozenset[str]: The candidate(s) to be vetoed. + frozenset[Candidate]: The candidate(s) to be vetoed. Raises: ValueError: If the ballot has no remaining candidates to veto. @@ -404,22 +409,23 @@ def _is_finished(self) -> bool: def _break_tie( self, - candidates: frozenset[str], + candidates: frozenset[Candidate], candidate_idx: Iterable[int], profile: RankProfile, - ) -> tuple[frozenset[str], ...]: + ) -> tuple[frozenset[Candidate], ...]: """ Takes candidate names and indices and returns a tiebroken order of names. Args: - candidates (frozenset[str]): Names of tied candidates. + candidates (frozenset[str | int]): Names of tied candidates. candidate_idx (Iterable[int]): Indices of tied candidates. profile (RankProfile): RankProfile of the current round. Passed to tiebreak_set() if ``tiebreak`` is not 'veto_pressure' or 'remaining_score'. Returns: - tuple[frozenset[str], ...]: Tiebroken ordering of candidates (each in their own set). + tuple[frozenset[str | int], ...]: Tiebroken ordering of candidates + (each in their own set). """ def make_singleton_ranking(indices: list[int]) -> tuple[frozenset[str], ...]: @@ -450,7 +456,7 @@ def make_singleton_ranking(indices: list[int]) -> tuple[frozenset[str], ...]: def _eliminate_one_candidate( self, profile: RankProfile - ) -> tuple[str | None, dict[frozenset[str], tuple[frozenset[str], ...]]]: + ) -> tuple[Candidate | None, dict[frozenset[Candidate], tuple[frozenset[Candidate], ...]]]: """ Eliminate exactly one candidate whose score has hit zero, breaking a tie if necessary. @@ -458,16 +464,16 @@ def _eliminate_one_candidate( profile (RankProfile): RankProfile of the current round. Returns: - tuple[str | None, dict[frozenset[str], tuple[frozenset[str], ...]]]: + tuple[str | int | None, dict[frozenset[str | int], tuple[frozenset[str | int], ...]]]: Returns a tuple (eliminated_candidate, tiebreaks), where eliminated_candidate - is either a str giving the name of the eliminated candidate, or ``None``, + is either a str or int giving the name of the eliminated candidate, or ``None``, signaling that no candidate was eliminated; and tiebreaks is a dict mapping a set of simultaneously-eliminated candidates to a tiebroken order; if only one candidate is eliminated, tiebreaks is empty. """ idx_to_elim = np.where((self._scores <= 0) & (self._veto_pressure > 0))[0] - tiebreaks: dict[frozenset[str], tuple[frozenset[str], ...]] = {} + tiebreaks: dict[frozenset[Candidate], tuple[frozenset[Candidate], ...]] = {} match idx_to_elim.size: case 0: return None, tiebreaks @@ -489,7 +495,7 @@ def _eliminate_one_candidate( def _handle_all_zeroed( self, profile: RankProfile - ) -> tuple[Sentinel, dict[frozenset[str], tuple[frozenset[str], ...]]]: + ) -> tuple[Sentinel, dict[frozenset[Candidate], tuple[frozenset[Candidate], ...]]]: """ Handles the case in which all remaining candidates' scores hit zero simultaneously. @@ -500,7 +506,7 @@ def _handle_all_zeroed( profile (RankProfile): RankProfile of the current round. Returns: - tuple[Sentinel, dict[frozenset[str], tuple[frozenset[str], ...]]]: + tuple[Sentinel, dict[frozenset[str | int], tuple[frozenset[str | int], ...]]]: Returns a tuple (eliminated_candidate, tiebreaks), where eliminated_candidate is a Sentinel indicating that the election is over, and tiebreaks is a dict mapping the set of remaining candidates to a tiebroken order of the same. @@ -517,7 +523,9 @@ def _handle_all_zeroed( def _veto_step( self, profile: RankProfile - ) -> tuple[str | Sentinel | None, dict[frozenset[str], tuple[frozenset[str], ...]]]: + ) -> tuple[ + Candidate | Sentinel | None, dict[frozenset[Candidate], tuple[frozenset[Candidate], ...]] + ]: """ Core of the SimultaneousVeto algorithm. @@ -536,9 +544,10 @@ def _veto_step( Used for tiebreaking, if necessary. Returns: - tuple[str | Sentinel | None, dict[frozenset[str], tuple[frozenset[str], ...]]]: - A 2-tuple of (eliminated_candidate, tiebreaks). eliminated_candidate is one of: - - a str indicating the candidate to be eliminated + tuple[str | int | Sentinel | None, dict[frozenset[str | int], + tuple[frozenset[Candidate], ...]]]: A 2-tuple of (eliminated_candidate, tiebreaks). + eliminated_candidate is one of: + - a str or int indicating the candidate to be eliminated - NO_CANDIDATES_REMAINING, a Sentinel indicating the end of the election - None, an error code signaling the failure to eliminate a candidate this round and tiebreaks is a dict mapping an unordered frozenset of candidates to their @@ -552,7 +561,7 @@ def _veto_step( # handle floating point imprecision: self._scores[np.abs(self._scores) < 1e-10] = 0 - eliminated_candidate: str | Sentinel | None = None + eliminated_candidate: Candidate | Sentinel | None = None if np.any(self._scores): eliminated_candidate, tiebreaks = self._eliminate_one_candidate(profile) else: @@ -616,9 +625,9 @@ def _run_step( else: self._write_state(current_round) - eliminated: tuple[frozenset[str], ...] = (frozenset(),) - elected: tuple[frozenset[str], ...] = (frozenset(),) - tiebreaks: dict[frozenset[str], tuple[frozenset[str], ...]] = {} + eliminated: tuple[frozenset[Candidate], ...] = (frozenset(),) + elected: tuple[frozenset[Candidate], ...] = (frozenset(),) + tiebreaks: dict[frozenset[Candidate], tuple[frozenset[Candidate], ...]] = {} remaining_set = self.candidates - self._eliminated if len(remaining_set) <= self.n_seats: diff --git a/src/votekit/elections/election_types/ranking/stv/numpy_stv_base.py b/src/votekit/elections/election_types/ranking/stv/numpy_stv_base.py index 64f87a6c..e238e3b5 100644 --- a/src/votekit/elections/election_types/ranking/stv/numpy_stv_base.py +++ b/src/votekit/elections/election_types/ranking/stv/numpy_stv_base.py @@ -10,6 +10,7 @@ from votekit.elections.election_state import ElectionState from votekit.pref_profile import RankProfile +from votekit.types import Candidate from votekit.utils import tiebreak_set QuotaType: TypeAlias = Literal["droop", "hare"] @@ -63,7 +64,7 @@ class NumpyElectionDataTracker: initial_fpv_scores: NDArray fpv_by_round: list[NDArray] = field(default_factory=list) play_by_play: list[ElectionPlay] = field(default_factory=list) - tiebreak_record: list[dict[frozenset[str], tuple[frozenset[str], ...]]] = field( + tiebreak_record: list[dict[frozenset[Candidate], tuple[frozenset[Candidate], ...]]] = field( default_factory=list ) candidate_sets_by_fpv: list[set[int]] | None = None @@ -75,7 +76,7 @@ class NumpySTVBase(ABC): Abstract base class for numpy-based STV-style elections. Attributes: - candidates (list[str]): List of candidate names, indexed + candidates (list[str | int]): List of candidate names, indexed to correspond to ballot matrix entries. profile (RankProfile): The original RankProfile for reference. n_seats (int): Number of seats to be elected. @@ -89,7 +90,7 @@ class NumpySTVBase(ABC): _loser_tiebreak (TiebreakType): Tiebreak method for losers, set to "first_place" by default. """ - candidates: list[str] + candidates: list[Candidate] profile: RankProfile n_seats: int election_states: list[ElectionState] @@ -253,7 +254,7 @@ def _run_election( ) -> tuple[ list[NDArray], list[ElectionPlay], - list[dict[frozenset[str], tuple[frozenset[str], ...]]], + list[dict[frozenset[Candidate], tuple[frozenset[Candidate], ...]]], ]: """ Core election logic to be implemented by child classes. @@ -270,7 +271,7 @@ def _run_election( fpv_by_round (list[NDArray]): List of first-preference vote tallies by round. play_by_play (list[ElectionPlay]): List of dictionaries representing the actions taken in each round. - tiebreak_record (list[dict[frozenset[str], tuple[frozenset[str], ...]]]): + tiebreak_record (list[dict[frozenset[str | int], tuple[frozenset[str | int], ...]]]): List of dictionaries representing tiebreak resolutions for each round. """ pass @@ -326,7 +327,7 @@ def get_remaining(self, round_number: int = -1) -> tuple[frozenset, ...]: else (frozenset(),) ) - def get_elected(self, round_number: int = -1) -> tuple[frozenset[str], ...]: + def get_elected(self, round_number: int = -1) -> tuple[frozenset[Candidate], ...]: """ Fetch the elected candidates up to the given round number. @@ -335,7 +336,7 @@ def get_elected(self, round_number: int = -1) -> tuple[frozenset[str], ...]: -1, which accesses the final profile. Returns: - tuple[frozenset[str], ...]: + tuple[frozenset[str | int], ...]: Tuple of winning candidates in order of election. Candidates in the same set were elected simultaneously, i.e. in the final ranking they are tied. @@ -353,7 +354,7 @@ def get_elected(self, round_number: int = -1) -> tuple[frozenset[str], ...]: ] return tuple(frozenset([self.candidates[c] for c in w_list]) for w_list in list_of_winners) - def get_eliminated(self, round_number: int = -1) -> tuple[frozenset[str], ...]: + def get_eliminated(self, round_number: int = -1) -> tuple[frozenset[Candidate], ...]: """ Fetch the eliminated candidates up to the given round number. @@ -362,7 +363,7 @@ def get_eliminated(self, round_number: int = -1) -> tuple[frozenset[str], ...]: -1, which accesses the final profile. Returns: - tuple[frozenset[str], ...]: + tuple[frozenset[str | int], ...]: Tuple of eliminated candidates in reverse order of elimination. Candidates in the same set were eliminated simultaneously, i.e. in the final ranking they are tied. @@ -547,7 +548,7 @@ def get_step(self, round_number: int = -1) -> tuple[RankProfile, ElectionState]: def _fpv_tiebreak( self, tied_cands: list[int], winner_tiebreak_bool: bool - ) -> tuple[int, tuple[frozenset[str], ...]]: + ) -> tuple[int, tuple[frozenset[Candidate], ...]]: """ Break ties among tied_cands using initial_fpv tallies. @@ -579,7 +580,7 @@ def _fpv_tiebreak( if cluster & tied_cands_set ] - packaged_ranking: tuple[frozenset[str], ...] = tuple( + packaged_ranking: tuple[frozenset[Candidate], ...] = tuple( frozenset(self.candidates[i] for i in cluster) for cluster in clusters_containing_tied_cands ) @@ -632,16 +633,16 @@ def _run_winner_tiebreak( self, tied_winners: list[int], round_number: int, - mutant_tiebreak_record: list[dict[frozenset[str], tuple[frozenset[str], ...]]], - ) -> tuple[int, list[dict[frozenset[str], tuple[frozenset[str], ...]]]]: + mutant_tiebreak_record: list[dict[frozenset[Candidate], tuple[frozenset[Candidate], ...]]], + ) -> tuple[int, list[dict[frozenset[Candidate], tuple[frozenset[Candidate], ...]]]]: """ Handle winner tiebreaking logic. Args: tied_winners (list[int]): List of candidate indices that are tied. round_number (int): The current round number. - mutant_tiebreak_record (list[dict[frozenset[str], tuple[frozenset[str], ...]]]): - Tiebreak record for each round. + mutant_tiebreak_record (list[dict[frozenset[str | int], + tuple[frozenset[str | int], ...]]]): Tiebreak record for each round. Returns: tuple: (index of new winner, updated tiebreak record) @@ -667,16 +668,16 @@ def _run_loser_tiebreak( self, tied_losers: list[int], round_number: int, - mutant_tiebreak_record: list[dict[frozenset[str], tuple[frozenset[str], ...]]], - ) -> tuple[int, list[dict[frozenset[str], tuple[frozenset[str], ...]]]]: + mutant_tiebreak_record: list[dict[frozenset[Candidate], tuple[frozenset[Candidate], ...]]], + ) -> tuple[int, list[dict[frozenset[Candidate], tuple[frozenset[Candidate], ...]]]]: """ Handle loser tiebreaking logic. Args: tied_losers (list[int]): List of candidate indices that are tied. round_number (int): The current round number. - mutant_tiebreak_record (list[dict[frozenset[str], tuple[frozenset[str], ...]]]): - Tiebreak record for each round. + mutant_tiebreak_record (list[dict[frozenset[str | int], + tuple[frozenset[str | int], ...]]]): Tiebreak record for each round. Returns: tuple: (index of new loser, updated tiebreak record) diff --git a/src/votekit/elections/election_types/ranking/stv/stv.py b/src/votekit/elections/election_types/ranking/stv/stv.py index 7fdc22f2..122cbcfe 100644 --- a/src/votekit/elections/election_types/ranking/stv/stv.py +++ b/src/votekit/elections/election_types/ranking/stv/stv.py @@ -25,6 +25,7 @@ from votekit.elections.election_types.ranking.stv.utils import numpy_random_transfer from votekit.elections.transfers import fractional_transfer from votekit.pref_profile import ProfileError, RankProfile +from votekit.types import Candidate from votekit.utils import ( _first_place_votes_from_df_no_ties, ballots_by_first_cand, @@ -258,14 +259,14 @@ def _find_loser( mutant_bool_ballot_matrix: NDArray, mutant_winner_list: list[int], mutant_eliminated_or_exhausted: list[int], - mutant_tiebreak_record: list[dict[frozenset[str], tuple[frozenset[str], ...]]], + mutant_tiebreak_record: list[dict[frozenset[Candidate], tuple[frozenset[Candidate], ...]]], ) -> tuple[ int, tuple[ NDArray, list[int], list[int], - list[dict[frozenset[str], tuple[frozenset[str], ...]]], + list[dict[frozenset[Candidate], tuple[frozenset[Candidate], ...]]], ], ]: """ @@ -277,7 +278,8 @@ def _find_loser( mutant_bool_ballot_matrix (NDArray): Boolean mask for eliminated candidates. mutant_winner_list (list[int]): List of winner candidate indices so far. mutant_eliminated_or_exhausted (list[int]): List of eliminated candidate indices so far. - mutant_tiebreak_record (list[dict[frozenset[str], tuple[frozenset[str], ...]]]): + mutant_tiebreak_record (list[dict[frozenset[str | int], + tuple[frozenset[str | int], ...]]]): Tiebreak record for each round. Returns: @@ -316,14 +318,14 @@ def _find_winners( mutant_bool_ballot_matrix: NDArray, mutant_winner_list: list[int], mutant_eliminated_or_exhausted: list[int], - mutant_tiebreak_record: list[dict[frozenset[str], tuple[frozenset[str], ...]]], + mutant_tiebreak_record: list[dict[frozenset[Candidate], tuple[frozenset[Candidate], ...]]], ) -> tuple[ list[int], tuple[ NDArray, list[int], list[int], - list[dict[frozenset[str], tuple[frozenset[str], ...]]], + list[dict[frozenset[Candidate], tuple[frozenset[Candidate], ...]]], ], ]: """ @@ -337,7 +339,8 @@ def _find_winners( mutant_winner_list (list[int]): List of winner candidate indices so far. mutant_eliminated_or_exhausted (list[int]): List of eliminated/elected candidate indices so far. - mutant_tiebreak_record (list[dict[frozenset[str], tuple[frozenset[str], ...]]]): + mutant_tiebreak_record (list[dict[frozenset[str | int], + tuple[frozenset[str | int], ...]]]): Tiebreak record for each round. Returns: @@ -393,7 +396,7 @@ def _run_election( ) -> tuple[ list[NDArray], list[ElectionPlay], - list[dict[frozenset[str], tuple[frozenset[str], ...]]], + list[dict[frozenset[Candidate], tuple[frozenset[Candidate], ...]]], ]: """ Core election logic for STV. @@ -406,7 +409,7 @@ def _run_election( fpv_by_round (list[NDArray]): List of first-preference vote tallies by round. play_by_play (list[ElectionPlay]): List of dictionaries representing the actions taken in each round. - tiebreak_record (list[dict[frozenset[str], tuple[frozenset[str], ...]]]): + tiebreak_record (list[dict[frozenset[str | int], tuple[frozenset[str | int], ...]]]): List of dictionaries representing tiebreak resolutions for each round. """ ballot_matrix = data.ballot_matrix @@ -422,7 +425,7 @@ def _run_election( ballot_weight_sitting_with_winners = 0.0 winner_list: list[int] = [] eliminated_or_exhausted: list[int] = [] - tiebreak_record: list[dict[frozenset[str], tuple[frozenset[str], ...]]] = [] + tiebreak_record: list[dict[frozenset[Candidate], tuple[frozenset[Candidate], ...]]] = [] pos_vec: NDArray = np.zeros(ballot_matrix.shape[0], dtype=np.int8) mutant_bool_ballot_matrix: NDArray = np.ones_like(ballot_matrix, dtype=bool) @@ -706,7 +709,7 @@ def __init__( profile: RankProfile, n_seats: int | None = None, transfer: Callable[ - [str, float, Union[tuple[RankBallot], list[RankBallot]], int], + [Candidate, float, Union[tuple[RankBallot], list[RankBallot]], int], tuple[RankBallot, ...], ] = fractional_transfer, quota: QuotaType | None = "droop", @@ -727,7 +730,7 @@ def __init__( Args: profile (RankProfile): RankProfile to run election on. n_seats (int): Number of seats to be elected. Defaults to 1. - transfer (Callable[[str, float, Union[tuple[RankBallot], list[RankBallot]], int], + transfer (Callable[[str | int, float, Union[tuple[RankBallot], list[RankBallot]], int], tuple[RankBallot, ...]]): Transfer method. Defaults to fractional transfer. Function signature is elected candidate, their number of first-place votes, the list of ballots with them ranked first, and the threshold value. Returns the list of @@ -820,7 +823,7 @@ def _is_finished(self): def _simultaneous_elect_step( self, profile: RankProfile, prev_state: ElectionState - ) -> tuple[tuple[frozenset[str], ...], RankProfile]: + ) -> tuple[tuple[frozenset[Candidate], ...], RankProfile]: """ Run one step of an election from the given profile and previous state. @@ -831,7 +834,7 @@ def _simultaneous_elect_step( prev_state (ElectionState): The previous ElectionState. Returns: - tuple[tuple[frozenset[str],...], RankProfile]: + tuple[tuple[frozenset[str | int],...], RankProfile]: A tuple whose first entry is the elected candidates, ranked by first-place votes, and whose second entry is the profile of ballots after transfers. """ @@ -893,8 +896,8 @@ def _simultaneous_elect_step( def _single_elect_step( self, profile: RankProfile, prev_state: ElectionState ) -> tuple[ - tuple[frozenset[str], ...], - dict[frozenset[str], tuple[frozenset[str], ...]], + tuple[frozenset[Candidate], ...], + dict[frozenset[Candidate], tuple[frozenset[Candidate], ...]], RankProfile, ]: """ @@ -907,7 +910,8 @@ def _single_elect_step( prev_state (ElectionState): The previous ElectionState. Returns: - tuple[tuple[frozenset[str],...], dict[frozenset[str], tuple[frozenset[str],...]], + tuple[tuple[frozenset[str | int], ...], + dict[frozenset[str | int], tuple[frozenset[str | int], ...]], RankProfile]: A tuple whose first entry is the elected candidate, second is the tiebreak dict, and whose third entry is the profile of ballots after transfers. @@ -989,7 +993,7 @@ def _run_step( RankProfile: The profile of ballots after the round is completed. """ - tiebreaks: dict[frozenset[str], tuple[frozenset[str], ...]] = {} + tiebreaks: dict[frozenset[Candidate], tuple[frozenset[Candidate], ...]] = {} current_round = prev_state.round_number + 1 above_thresh_cands = [ @@ -1003,7 +1007,7 @@ def _run_step( else: elected, tiebreaks, new_profile = self._single_elect_step(profile, prev_state) # no one eliminated in elect round - eliminated: tuple[frozenset[str], ...] = (frozenset(),) + eliminated: tuple[frozenset[Candidate], ...] = (frozenset(),) # catches the possibility that we exhaust all ballots # without candidates reaching threshold @@ -1143,7 +1147,7 @@ def __init__( n_seats = 1 def _transfer( - winner: str, + winner: Candidate, _fpv: float, ballots: Union[tuple[RankBallot], list[RankBallot]], _threshold: int, @@ -1152,7 +1156,7 @@ def _transfer( Transfer ballots by removing the winner and condensing rankings. Args: - winner (str): The candidate to remove from ballots. + winner (str | int): The candidate to remove from ballots. _fpv (float): The number of first-place votes the winner had. ballots (Union[tuple[RankBallot], list[RankBallot]]): The ballots to transfer. _threshold (int): The threshold for election in this round. diff --git a/src/votekit/elections/transfers.py b/src/votekit/elections/transfers.py index bc9138a8..6e195d6a 100644 --- a/src/votekit/elections/transfers.py +++ b/src/votekit/elections/transfers.py @@ -4,10 +4,11 @@ from votekit.ballot import RankBallot from votekit.pref_profile import RankProfile +from votekit.types import Candidate def fractional_transfer( - winner: str, + winner: Candidate, fpv: float, ballots: Union[tuple[RankBallot], list[RankBallot]], threshold: int, @@ -16,7 +17,7 @@ def fractional_transfer( Calculates fractional transfer from winner, then removes winner from the list of ballots. Args: - winner (str): Candidate to transfer votes from. + winner (str | int): Candidate to transfer votes from. fpv (float): Number of first place votes for winning candidate. ballots (Union[tuple[RankBallot], list[RankBallot]]): List of Ballot objects. threshold (int): Value required to be elected, used to calculate transfer value. diff --git a/src/votekit/matrices/candidate/comentions.py b/src/votekit/matrices/candidate/comentions.py index 845c9a20..c3522fab 100644 --- a/src/votekit/matrices/candidate/comentions.py +++ b/src/votekit/matrices/candidate/comentions.py @@ -1,29 +1,29 @@ -from typing import Union - import numpy as np from votekit.ballot import RankBallot from votekit.matrices._utils import _convert_dict_to_matrix from votekit.pref_profile import RankProfile +from votekit.types import Candidate -def comention(cands: Union[str, list[str]], ballot: RankBallot): +def comention(cands: Candidate | list[Candidate] | list[str] | list[int], ballot: RankBallot): """ Takes cands and returns true if they all appear on the ballot in the ranking. Args: - cands (Union[str, list[str]]): Candidate name or list of candidate names. + cands (str | int, list[str | int] | list[str] | list[int]): + Candidate name or list of candidate names. ballot (RankBallot): RankBallot. Returns: bool: True if all candidates appear in ballot. """ - all_cands: set[str] = set() + all_cands: set[Candidate] = set() if ballot.ranking: all_cands = all_cands.union(c for s in ballot.ranking for c in s) - if isinstance(cands, str): + if isinstance(cands, Candidate): cands = [cands] return set(cands).issubset(all_cands) diff --git a/src/votekit/models.py b/src/votekit/models.py index fb2454eb..3c4cf4d0 100644 --- a/src/votekit/models.py +++ b/src/votekit/models.py @@ -5,6 +5,7 @@ from votekit.elections.election_state import ElectionState from votekit.pref_profile.pref_profile import PreferenceProfile +from votekit.types import Candidate from votekit.utils import ( score_dict_to_ranking, ) @@ -30,16 +31,17 @@ class Election(Generic[P]): election_states (list[ElectionState]): A list of election states, one for each round of the election. The list is 0 indexed, so the initial state is stored at index 0, round 1 at 1, etc. - score_function (Callable[[PreferenceProfile], dict[str, float]], optional): + score_function (Callable[[PreferenceProfile], dict[str | int, float]], optional): A function that converts profiles to a score dictionary mapping candidates to their current score. Used in creating ElectionState objects. Defaults to None. + Candidates can be strings, integers, or a mix of both. length (int): The number of rounds of the election. """ def __init__( self, profile: P, - score_function: Optional[Callable[[P], dict[str, float]]] = None, + score_function: Optional[Callable[[P], dict[Candidate, float]]] = None, sort_high_low: bool = True, ): self._validate_params_and_profile(profile) @@ -90,7 +92,7 @@ def get_step(self, round_number: int = -1) -> tuple[P, ElectionState]: """ return (self.get_profile(round_number), self.election_states[round_number]) - def get_elected(self, round_number: int = -1) -> tuple[frozenset[str], ...]: + def get_elected(self, round_number: int = -1) -> tuple[frozenset[Candidate], ...]: """ Fetch the elected candidates up to the given round number. @@ -99,7 +101,7 @@ def get_elected(self, round_number: int = -1) -> tuple[frozenset[str], ...]: -1, which accesses the final profile. Returns: - tuple[frozenset[str],...]: + tuple[frozenset[Candidate],...]: List of winning candidates in order of election. Candidates in the same set were elected simultaneously, i.e. in the final ranking they are tied. @@ -153,7 +155,7 @@ def get_eliminated(self, round_number: int = -1) -> tuple[frozenset[str], ...]: ] ) - def get_remaining(self, round_number: int = -1) -> tuple[frozenset[str], ...]: + def get_remaining(self, round_number: int = -1) -> tuple[frozenset[Candidate], ...]: """ Fetch the remaining candidates after the given round. @@ -162,13 +164,13 @@ def get_remaining(self, round_number: int = -1) -> tuple[frozenset[str], ...]: -1, which accesses the final profile. Returns: - tuple[frozenset[str],...]: + tuple[frozenset[str | int],...]: Tuple of sets of remaining candidates. Ordering of tuple denotes ranking of remaining candidates, sets denote ties. """ return tuple(self.election_states[round_number].remaining) - def get_ranking(self, round_number: int = -1) -> tuple[frozenset[str], ...]: + def get_ranking(self, round_number: int = -1) -> tuple[frozenset[Candidate], ...]: """ Fetch the ranking of candidates after a given round. @@ -177,7 +179,7 @@ def get_ranking(self, round_number: int = -1) -> tuple[frozenset[str], ...]: -1, which accesses the final profile. Returns: - tuple[frozenset[str],...]: Ranking of candidates. + tuple[frozenset[str | int],...]: Ranking of candidates. """ # len condition handles empty remaining candidates return tuple( diff --git a/src/votekit/plots/bar_plot.py b/src/votekit/plots/bar_plot.py index dfc34883..9f04e2cf 100644 --- a/src/votekit/plots/bar_plot.py +++ b/src/votekit/plots/bar_plot.py @@ -1,6 +1,6 @@ import warnings from collections.abc import Mapping -from typing import Any, Literal, Optional, Tuple, Union +from typing import Any, Literal, Optional, Tuple, TypeAlias, Union, cast import matplotlib.patches as mpatches from matplotlib import pyplot as plt @@ -11,24 +11,36 @@ from votekit.utils import COLOR_LIST DEFAULT_LINE_KWDS = {"linestyle": "-", "linewidth": 2, "color": "grey", "alpha": 0.5} -CategoryLabel = str | int - - -def add_null_keys(data: Mapping[str, Mapping[str, float]]) -> dict[str, dict[str, float]]: +CategoryLabel: TypeAlias = str | int +DataMapping: TypeAlias = ( + Mapping[str, Mapping[CategoryLabel, float]] + | Mapping[str, Mapping[str, float]] + | Mapping[str, Mapping[int, float]] +) +CategoryLabelList: TypeAlias = list[CategoryLabel] | list[str] | list[int] +CategoryLabelMapping: TypeAlias = ( + Mapping[CategoryLabel, CategoryLabel] + | Mapping[str, CategoryLabel] + | Mapping[int, CategoryLabel] +) + + +def add_null_keys(data: DataMapping) -> dict[str, dict[CategoryLabel, float]]: """ Prepares dictionary of dictionaries to be passed to ``multi_bar_plot()``. If a key is missing from a dictionary, this function adds the key with value 0. Args: - data (dict[str, dict[str, float]]): Categorical data to be cleaned. The value of each dict - should be the frequency of the key which is the category name. + data (dict[str, dict[str | int, float]]): Categorical data to be cleaned. + The value of each dict should be the frequency of the key which is the category name. Returns: - dict[str, dict[str, float]]: Cleaned data. + dict[str, dict[str | int, float]]: Cleaned data. """ - x_labels: list[str] = [] - clean_data = {label: dict(data_dict) for label, data_dict in data.items()} + x_labels: list[CategoryLabel] = [] + _data = cast(Mapping[str, Mapping[CategoryLabel, float]], data) + clean_data = {label: dict(data_dict) for label, data_dict in _data.items()} for data_dict in clean_data.values(): for x_label in data_dict.keys(): @@ -45,10 +57,10 @@ def add_null_keys(data: Mapping[str, Mapping[str, float]]) -> dict[str, dict[str def _set_default_bar_plot_args( *, - data: Mapping[str, Mapping[str, float]], + data: DataMapping, data_set_colors: Optional[Mapping[str, str]], bar_width: Optional[float], - category_ordering: Optional[list[str]], + category_ordering: Optional[CategoryLabelList], legend_font_size: Optional[float], threshold_values: Optional[Union[list[float], float]], threshold_kwds: Optional[Union[list[dict], dict]], @@ -129,9 +141,9 @@ def _set_default_bar_plot_args( def _validate_bar_plot_args( *, - data: Mapping[str, Mapping[str, float]], - category_ordering: list[str], - categories_legend: Optional[Mapping[str, CategoryLabel]], + data: DataMapping, + category_ordering: CategoryLabelList, + categories_legend: Optional[CategoryLabelMapping], bar_width: float, threshold_values: Optional[list[float]], threshold_kwds: Optional[list[dict[str, object]]], @@ -140,12 +152,16 @@ def _validate_bar_plot_args( Validates bar plot arguments. Args: - data (dict[str, dict[str, float]]): Categorical data to be plotted. Top level keys are - data set labels. Inner keys are categories, and inner values are the height of the bars. - category_ordering (list[str]): Ordering of x labels. Must match data keys. - categories_legend (dict[str, str], optional): Dictionary mapping data categories - to relabeling. If provided, generates a second legend for data - categories and relabels the x-axis accordingly. Can be a subset of the data keys. + data (dict[str, dict[str | int, float]] | dict[str, dict[str, float]] + | dict[str, dict[int, float]]): Categorical data to be plotted. + Top level keys are data set labels. + Inner keys are categories, and inner values are the height of the bars. + category_ordering (list[str | int] | list[str] | list[int]): Ordering of x labels. + Must match data keys. + categories_legend (dict[str | int, str] | dict[str, str] | dict[int, str] | None): + Dictionary mapping data categories to relabeling. If provided, generates a second legend + for data categories and relabels the x-axis accordingly. + Can be a subset of the data keys. bar_width (float): Width of bars. Must be between 0 and 1/len(data). threshold_values (list[float], optional): List of values to plot horizontal lines at. @@ -229,17 +245,19 @@ def _validate_bar_plot_args( return bar_width -def _normalize_data_dict(data_dict: Mapping[str, float]) -> dict[str, float]: +def _normalize_data_dict( + data_dict: Mapping[str | int, float] | Mapping[str, float] | Mapping[int, float], +) -> dict[str | int, float] | dict[str, float] | dict[int, float]: """ Normalizes data so number of total observations is 1. Raises a ValueError if the total mass is 0. Args: - data (dict[str, float]): Single data dictionary whose keys are categories and values - are bar heights. + data (dict[str | int, float] | dict[str, float] | dict[int, float]): + Single data dictionary whose keys are categories and values are bar heights. Returns: - dict[str, float]: Normalized data. + dict[str | int, float] | dict[str, float] | dict[int, float]: Normalized data. Raise: ValueError: If the sum of the data is 0. @@ -256,28 +274,36 @@ def _normalize_data_dict(data_dict: Mapping[str, float]) -> dict[str, float]: def _prepare_data_bar_plot( *, normalize: bool, - data: Mapping[str, Mapping[str, float]], - category_ordering: list[str], + data: DataMapping, + category_ordering: CategoryLabelList, ) -> tuple[list[list[float]], float]: """ Formats data and normalizes if required. Args: normalize (bool): Whether or not to normalize data. - data (dict[str, dict[str, float]]): Categorical data to be plotted. Top level keys are - data set labels. Inner keys are categories, and inner values are the height of the bars. - category_ordering (list[str]): Ordering of x labels. + data (dict[str, dict[str | int, float]] | dict[str, dict[str, float]] + | dict[str, dict[int], float]]): Categorical data to be plotted. + Top level keys are data set labels. + Inner keys are categories, and inner values are the height of the bars. + category_ordering (list[str | int] | list[str] | list[int]): Ordering of x labels. Returns: list[list[float]]: Height of bars, one list for each data set. """ - plot_data: Mapping[str, Mapping[str, float]] = data + plot_data: Mapping[str, Mapping[str | int, float]] = cast( + Mapping[str, Mapping[str | int, float]], data + ) if normalize: - plot_data = {label: _normalize_data_dict(data_dict) for label, data_dict in data.items()} + plot_data = cast( + Mapping[str, Mapping[str | int, float]], + {label: _normalize_data_dict(data_dict) for label, data_dict in data.items()}, + ) + _category_ordering = cast(list[CategoryLabel], category_ordering) y_data = [ - [data_dict[x_label] for x_label in category_ordering] for data_dict in plot_data.values() + [data_dict[x_label] for x_label in _category_ordering] for data_dict in plot_data.values() ] all_data_values = [value for data_dict in data.values() for value in data_dict.values()] @@ -294,10 +320,10 @@ def _prepare_data_bar_plot( def _plot_datasets_on_bar_plot( *, ax: Axes, - category_ordering: list[str], + category_ordering: CategoryLabelList, y_data: list[list[float]], data_set_labels: list[str], - categories_legend: Optional[Mapping[str, CategoryLabel]], + categories_legend: Optional[CategoryLabelMapping], bar_width: float, data_set_to_color: Mapping[str, str], font_size: float, @@ -305,13 +331,14 @@ def _plot_datasets_on_bar_plot( """ Args: - category_ordering (list[str]): Ordering of x labels. + category_ordering (list[str | int] | list[str] | list[int]): Ordering of x labels. y_data (list[list[floats]]): List of lists where each sublist is the bar heights for a data set. data_set_labels (list[str]): List of labels for data sets. - categories_legend (dict[str, str], optional): Dictionary mapping data categories - to relabeling. If provided, generates a second legend for data - categories and relabels the x-axis accordingly. Can be a subset of the data keys. + categories_legend (dict[str | int, str] | dict[str, str] | dict[int, str] | None): + Dictionary mapping data categories to relabeling. + If provided, generates a second legend for data categories + and relabels the x-axis accordingly. Can be a subset of the data keys. bar_width (float): Width of bars. data_set_to_color (dict): Dictionary mapping data set labels to colors. font_size (float): Font size for figure. @@ -320,7 +347,9 @@ def _plot_datasets_on_bar_plot( Axes: Matplotlib axes containing bar plot. """ if categories_legend: - tick_labels = [categories_legend.get(c, c) for c in category_ordering] + _category_ordering = cast(list[str | int], category_ordering) + _categories_legend = cast(Mapping[str | int, str | int], categories_legend) + tick_labels = [_categories_legend.get(c, c) for c in _category_ordering] else: tick_labels = category_ordering @@ -432,7 +461,7 @@ def _add_data_sets_legend_bar_plot( ax: Axes, data_set_labels: list[str], data_set_to_color: Mapping[str, str], - categories_legend: Optional[Mapping[str, CategoryLabel]], + categories_legend: Optional[CategoryLabelMapping], threshold_kwds: Optional[list[dict]], legend_font_size: float, legend_loc: str, @@ -445,9 +474,9 @@ def _add_data_sets_legend_bar_plot( ax (Axes): Matplotlib axes containing barplot. data_set_labels (list[str]): Labels for each data set. data_set_to_color (dict): Dictionary mapping data set labels to colors. - categories_legend (dict[str, str], optional): Dictionary mapping x-axis - categories to description in legend. Defaults to None, in which case legend is just - x-axis labels. + categories_legend (dict[str | int, str] | dict[str, str] | dict[int, str] | None): + Dictionary mapping x-axis categories to description in legend. + Defaults to None, in which case legend is just x-axis labels. threshold_kwds (list[dict], optional): List of plotting keywords for the horizontal lines. legend_font_size (float): The font size to use for the legend. @@ -489,7 +518,7 @@ def _add_data_sets_legend_bar_plot( def _add_categories_legend_bar_plot( *, ax: Axes, - categories_legend: Mapping[str, CategoryLabel], + categories_legend: CategoryLabelMapping, legend_font_size: float, legend_loc: str, legend_bbox_to_anchor: Tuple[float, float], @@ -500,8 +529,8 @@ def _add_categories_legend_bar_plot( Args: ax (Axes): Matplotlib axes containing barplot. - categories_legend (dict[str, str]): Dictionary mapping x-axis - categories to description in legend. + categories_legend (dict[str | int, str] | dict[str, str] | dict[int, str]): + Dictionary mapping x-axis categories to description in legend. legend_font_size (float): The font size to use for the legend. legend_loc(str): The location parameter to pass to ``Axes.legend(loc=)``. legend_bbox_to_anchor (Tuple[float, float]): The bounding box to anchor the legend to. @@ -545,17 +574,19 @@ def _add_categories_legend_bar_plot( def multi_bar_plot( - data: Mapping[str, Mapping[str, float]], + data: Mapping[str, Mapping[CategoryLabel, float]] + | Mapping[str, Mapping[str, float]] + | Mapping[str, Mapping[int, float]], *, normalize: bool = False, data_set_colors: Optional[Mapping[str, str]] = None, bar_width: Optional[float] = None, - category_ordering: Optional[list[str]] = None, + category_ordering: Optional[CategoryLabelList] = None, x_axis_name: Optional[str] = None, y_axis_name: Optional[str] = None, title: Optional[str] = None, show_data_set_legend: bool = False, - categories_legend: Optional[Mapping[str, CategoryLabel]] = None, + categories_legend: Optional[CategoryLabelMapping] = None, threshold_values: Optional[Union[list[float], float]] = None, threshold_kwds: Optional[Union[list[dict], dict]] = None, legend_font_size: Optional[float] = None, @@ -565,24 +596,27 @@ def multi_bar_plot( Plots bar plot of categorical data. Args: - data (dict[str, dict[str, float]]): Categorical data to be plotted. Top level keys are - data set labels. Inner keys are categories, and inner values are the height of the bars. + data (dict[str, dict[str | int, float]] | dict[str, dict[str, float]] + | dict[str, dict[int, float]]): Categorical data to be plotted. + Top level keys are data set labels. + Inner keys are categories, and inner values are the height of the bars. normalize (bool, optional): Whether or not to normalize data. Defaults to False. data_set_colors (dict[str, str], optional): Dictionary mapping data set labels to colors. Defaults to None, in which case we use a subset of ``COLOR_LIST`` from ``utils`` module. Dictionary keys can be a subset of the data sets. bar_width (float, optional): Width of bars. Defaults to None which computes the bar width as 0.7 divided by the number of data sets. Must be in the interval :math:`(0,1]`. - category_ordering (list[str], optional): Ordering of x-labels. Defaults to order retrieved - from data dictionary. + category_ordering (list[str | int] | list[str] | list[int] | None): Ordering of x-labels. + Defaults to order retrieved from data dictionary. x_axis_name (str, optional): Name of x-axis. Defaults to None, which does not plot a name. y_axis_name (str, optional): Name of y-axis. Defaults to None, which does not plot a name. title (str, optional): Title for the figure. Defaults to None, which does not plot a title. show_data_set_legend (bool, optional): Whether or not to plot the data set legend. Defaults to False. Is automatically shown if any threshold lines have the keyword "label" passed through ``threshold_kwds``. - categories_legend (dict[str, str], optional): Dictionary mapping data categories - to relabeling. Defaults to None. If provided, generates a second legend for data + categories_legend (dict[str | int, str] | dict[str, str] | dict[int, str] | None): + Dictionary mapping data categories to relabeling. + Defaults to None. If provided, generates a second legend for data categories and relabels the x-axis accordingly. Can be a subset of the data keys. threshold_values (Union[list[float], float], optional): List of values to plot horizontal lines at. Can be provided as a list or a single float. @@ -689,18 +723,18 @@ def multi_bar_plot( def bar_plot( - data: Mapping[str, float], + data: Mapping[str | int, float] | Mapping[str, float] | Mapping[int, float], *, data_set_label: str = "Data set", normalize: bool = False, data_set_color: str = COLOR_LIST[0], bar_width: Optional[float] = None, - category_ordering: Optional[list[str]] = None, + category_ordering: Optional[CategoryLabelList] = None, x_axis_name: Optional[str] = None, y_axis_name: Optional[str] = None, title: Optional[str] = None, show_data_set_legend: bool = False, - categories_legend: Optional[Mapping[str, CategoryLabel]] = None, + categories_legend: Optional[CategoryLabelMapping] = None, threshold_values: Optional[Union[list[float], float]] = None, threshold_kwds: Optional[Union[list[dict], dict]] = None, legend_font_size: Optional[float] = None, @@ -710,25 +744,26 @@ def bar_plot( Plots bar plot of a single categorical data set. Wrapper for ``multi_bar_plot``. Args: - data (dict[str, float]): Categorical data set to be plotted. Keys are categories, and - values are the height of the bars. + data (dict[str | int, float] | dict[str, float] | dict[int, float]): + Categorical data set to be plotted. + Keys are categories, and values are the height of the bars. data_set_label(str, optional): Label for data set. Defaults to "Data set". normalize (bool, optional): Whether or not to normalize data. Defaults to False. data_set_color (str, optional): Color of data set. Defaults to the first color from ``COLOR_LIST`` from ``utils`` module. bar_width (float, optional): Width of bars. Defaults to None which computes the bar width as 0.7 divided by the number of data sets. Must be in the interval :math:`(0,1]`. - category_ordering (list[str], optional): Ordering of x-labels. Defaults to order retrieved - from data dictionary. + category_ordering (list[str | int] | list[str] | list[int] | None): Ordering of x-labels. + Defaults to order retrieved from data dictionary. x_axis_name (str, optional): Name of x-axis. Defaults to None, which does not plot a name. y_axis_name (str, optional): Name of y-axis. Defaults to None, which does not plot a name. title (str, optional): Title for the figure. Defaults to None, which does not plot a title. show_data_set_legend (bool, optional): Whether or not to plot the data set legend. Defaults to False. Is automatically shown if any threshold lines have the keyword "label" passed through ``threshold_kwds``. - categories_legend (dict[str, str], optional): Dictionary mapping data categories - to description. Defaults to None. If provided, generates a second legend for data - categories. + categories_legend (dict[str | int, str] | dict[str, str] | dict[int, str] | None): + Dictionary mapping data categories to description. Defaults to None. + If provided, generates a second legend for data categories. threshold_values (Union[list[float], float], optional): List of values to plot horizontal lines at. Can be provided as a list or a single float. threshold_kwds (Union[list[dict], dict], optional): List of plotting @@ -750,7 +785,7 @@ def bar_plot( """ return multi_bar_plot( - {data_set_label: data}, + cast(DataMapping, {data_set_label: data}), normalize=normalize, data_set_colors={data_set_label: data_set_color}, bar_width=bar_width, diff --git a/src/votekit/plots/profiles/multi_profile_bar_plot.py b/src/votekit/plots/profiles/multi_profile_bar_plot.py index 467c52f5..b5b4d83d 100644 --- a/src/votekit/plots/profiles/multi_profile_bar_plot.py +++ b/src/votekit/plots/profiles/multi_profile_bar_plot.py @@ -1,11 +1,12 @@ from collections.abc import Callable, Mapping from functools import partial -from typing import Any, Optional, TypeVar, Union +from typing import Any, Optional, TypeAlias, TypeVar, Union from matplotlib.axes import Axes from votekit.plots.bar_plot import add_null_keys, multi_bar_plot from votekit.pref_profile import PreferenceProfile, RankProfile +from votekit.types import Candidate from votekit.utils import ( COLOR_LIST, ballot_lengths, @@ -16,12 +17,16 @@ ProfileT = TypeVar("ProfileT", bound=PreferenceProfile) PlotLabel = str | int +CandidateList: TypeAlias = list[Candidate] | list[str] | list[int] +CandidatePlotLabelMapping: TypeAlias = ( + Mapping[Candidate, PlotLabel] | Mapping[str, PlotLabel] | Mapping[int, PlotLabel] +) def _create_data_dict( profile_dict: Mapping[str, ProfileT], - stat_function: Callable[[ProfileT], dict[str, float]], -) -> dict[str, dict[str, float]]: + stat_function: Callable[[ProfileT], dict[Candidate, float]], +) -> dict[str, dict[Candidate, float]]: """ Create the correctly formatted dict to pass to ``multi_bar_plot``. Ensures each subdictionary has the same keys, and uses the default value 0 if a key is missing. @@ -29,12 +34,12 @@ def _create_data_dict( Args: profile_dict (dict[str, RankProfile | ScoreProfile]): Keys are profile labels and values are profiles to plot statistics for. - stat_function (Callable[[RankProfile | ScoreProfile], dict[str, float]]): Which stat + stat_function (Callable[[RankProfile | ScoreProfile], dict[str | int, float]]): Which stat to use for the bar plot. Must be a callable that takes a profile and returns - a dict with str keys and float values. + a dict with str and int keys and float values. Returns: - dict[str, dict[str, float]]: Data dictionary for ``multi_bar_plot``. + dict[str, dict[str | int, float]]: Data dictionary for ``multi_bar_plot``. """ @@ -45,7 +50,7 @@ def _create_data_dict( def multi_profile_bar_plot( profile_dict: Mapping[str, ProfileT], - stat_function: Callable[[ProfileT], dict[str, float]], + stat_function: Callable[[ProfileT], dict[Candidate, float]], normalize: bool = False, profile_colors: Optional[Mapping[str, str]] = None, bar_width: Optional[float] = None, @@ -67,9 +72,9 @@ def multi_profile_bar_plot( Args: profile_dict (dict[str, RankProfile | ScoreProfile]): Keys are profile labels and values are profiles to plot statistics for. - stat_function (Callable[[RankProfile | ScoreProfile], dict[str, float]]): Which stat + stat_function (Callable[[RankProfile | ScoreProfile], dict[str | int, float]]): Which stat to use for the bar plot. Must be a callable that takes a profile and returns - a dict with str keys and float values. + a dict with str and int keys and float values. normalize (bool, optional): Whether or not to normalize data. Defaults to False. profile_colors (dict[str, str], optional): Dictionary mapping profile labels to colors. Defaults to None, in which case we use a subset of ``COLOR_LIST`` @@ -149,12 +154,12 @@ def multi_profile_borda_plot( normalize: bool = False, profile_colors: Optional[Mapping[str, str]] = None, bar_width: Optional[float] = None, - candidate_ordering: Optional[list[str]] = None, + candidate_ordering: Optional[CandidateList] = None, x_axis_name: Optional[str] = None, y_axis_name: Optional[str] = None, title: Optional[str] = None, show_profile_legend: bool = False, - candidate_legend: Optional[Mapping[str, PlotLabel]] = None, + candidate_legend: Optional[CandidatePlotLabelMapping] = None, relabel_candidates_with_int: bool = False, threshold_values: Optional[Union[list[float], float]] = None, threshold_kwds: Optional[Union[list[dict], dict]] = None, @@ -176,17 +181,17 @@ def multi_profile_borda_plot( from ``utils`` module. Dictionary keys can be a subset of the profiles. bar_width (float, optional): Width of bars. Defaults to None which computes the bar width as 0.7 divided by the number of data sets. Must be in the interval :math:`(0,1]`. - candidate_ordering (list[str], optional): Ordering of x-labels. Defaults to decreasing - borda scores from the first profile. + candidate_ordering (list[str | int] | list[str] | list[int] | None): Ordering of x-labels. + Defaults to decreasing borda scores from the first profile. x_axis_name (str, optional): Name of x-axis. Defaults to None, which does not plot a name. y_axis_name (str, optional): Name of y-axis. Defaults to None, which does not plot a name. title (str, optional): Title for the figure. Defaults to None, which does not plot a title. show_profile_legend (bool, optional): Whether or not to plot the profile legend. Defaults to False. Is automatically shown if any threshold lines have the keyword "label" passed through ``threshold_kwds``. - candidate_legend (dict[str, str], optional): Dictionary mapping candidates - to relableing. Defaults to None. If provided, generates a second legend for data - categories. + candidate_legend (dict[str, str] | dict[str | int, str] | dict[int, str], optional): + Dictionary mapping candidates to relableing. Defaults to None. + If provided, generates a second legend for data categories. relabel_candidates_with_int (bool, optional): Relabel the candidates with integer labels. Defaults to False. If ``candidate_legend`` is passed, those labels supercede. threshold_values (Union[list[float], float], optional): List of values to plot horizontal @@ -263,12 +268,12 @@ def multi_profile_mentions_plot( normalize: bool = False, profile_colors: Optional[Mapping[str, str]] = None, bar_width: Optional[float] = None, - candidate_ordering: Optional[list[str]] = None, + candidate_ordering: Optional[CandidateList] = None, x_axis_name: Optional[str] = None, y_axis_name: Optional[str] = None, title: Optional[str] = None, show_profile_legend: bool = False, - candidate_legend: Optional[Mapping[str, PlotLabel]] = None, + candidate_legend: Optional[CandidatePlotLabelMapping] = None, relabel_candidates_with_int: bool = False, threshold_values: Optional[Union[list[float], float]] = None, threshold_kwds: Optional[Union[list[dict], dict]] = None, @@ -290,17 +295,17 @@ def multi_profile_mentions_plot( from ``utils`` module. Dictionary keys can be a subset of the profiles. bar_width (float, optional): Width of bars. Defaults to None which computes the bar width as 0.7 divided by the number of data sets. Must be in the interval :math:`(0,1]`. - candidate_ordering (list[str], optional): Ordering of x-labels. Defaults to order retrieved - from score dictionary. + candidate_ordering (list[str | int] | list[str] | list[int] | None): Ordering of x-labels. + Defaults to order retrieved from score dictionary. x_axis_name (str, optional): Name of x-axis. Defaults to None, which does not plot a name. y_axis_name (str, optional): Name of y-axis. Defaults to None, which does not plot a name. title (str, optional): Title for the figure. Defaults to None, which does not plot a title. show_profile_legend (bool, optional): Whether or not to plot the profile legend. Defaults to False. Is automatically shown if any threshold lines have the keyword "label" passed through ``threshold_kwds``. - candidate_legend (dict[str, str], optional): Dictionary mapping candidates - to relableing. Defaults to None. If provided, generates a second legend for data - categories. + candidate_legend (dict[str | int, str] | dict[str, str] | dict[int, str] | None): + Dictionary mapping candidates to relabeling. Defaults to None. + If provided, generates a second legend for data categories. relabel_candidates_with_int (bool, optional): Relabel the candidates with integer labels. Defaults to False. If ``candidate_legend`` is passed, those labels supercede. threshold_values (Union[list[float], float], optional): List of values to plot horizontal @@ -376,12 +381,12 @@ def multi_profile_fpv_plot( normalize: bool = False, profile_colors: Optional[Mapping[str, str]] = None, bar_width: Optional[float] = None, - candidate_ordering: Optional[list[str]] = None, + candidate_ordering: Optional[CandidateList] = None, x_axis_name: Optional[str] = None, y_axis_name: Optional[str] = None, title: Optional[str] = None, show_profile_legend: bool = False, - candidate_legend: Optional[Mapping[str, PlotLabel]] = None, + candidate_legend: Optional[CandidatePlotLabelMapping] = None, relabel_candidates_with_int: bool = False, threshold_values: Optional[Union[list[float], float]] = None, threshold_kwds: Optional[Union[list[dict], dict]] = None, @@ -403,17 +408,17 @@ def multi_profile_fpv_plot( from ``utils`` module. Dictionary keys can be a subset of the profiles. bar_width (float, optional): Width of bars. Defaults to None which computes the bar width as 0.7 divided by the number of data sets. Must be in the interval :math:`(0,1]`. - candidate_ordering (list[str], optional): Ordering of x-labels. Defaults to order retrieved - from score dictionary. + candidate_ordering (list[str | int] | list[str] | list[int] | None): Ordering of x-labels. + Defaults to order retrieved from score dictionary. x_axis_name (str, optional): Name of x-axis. Defaults to None, which does not plot a name. y_axis_name (str, optional): Name of y-axis. Defaults to None, which does not plot a name. title (str, optional): Title for the figure. Defaults to None, which does not plot a title. show_profile_legend (bool, optional): Whether or not to plot the profile legend. Defaults to False. Is automatically shown if any threshold lines have the keyword "label" passed through ``threshold_kwds``. - candidate_legend (dict[str, str], optional): Dictionary mapping candidates - to relableing. Defaults to None. If provided, generates a second legend for data - categories. + candidate_legend (dict[str | int, str] | dict[str, str] | dict[int, str] | None): + Dictionary mapping candidates to relabeling. Defaults to None. + If provided, generates a second legend for data categories. relabel_candidates_with_int (bool, optional): Relabel the candidates with integer labels. Defaults to False. If ``candidate_legend`` is passed, those labels supercede. threshold_values (Union[list[float], float], optional): List of values to plot horizontal diff --git a/src/votekit/plots/profiles/profile_bar_plot.py b/src/votekit/plots/profiles/profile_bar_plot.py index 423f9dcf..f0bd44e3 100644 --- a/src/votekit/plots/profiles/profile_bar_plot.py +++ b/src/votekit/plots/profiles/profile_bar_plot.py @@ -5,6 +5,7 @@ from votekit.plots.bar_plot import bar_plot from votekit.pref_profile import PreferenceProfile, RankProfile +from votekit.types import Candidate, CandidateFloatDictLike from votekit.utils import ( COLOR_LIST, ballot_lengths, @@ -19,18 +20,20 @@ def profile_bar_plot( profile: ProfileT, - stat_function: Callable[[ProfileT], dict[str, float]], + stat_function: Callable[[ProfileT], CandidateFloatDictLike], *, profile_label: str = "Profile", normalize: bool = False, profile_color: str = COLOR_LIST[0], bar_width: Optional[float] = None, - category_ordering: Optional[list[str]] = None, + category_ordering: Optional[list[Candidate] | list[str] | list[int]] = None, x_axis_name: Optional[str] = None, y_axis_name: Optional[str] = None, title: Optional[str] = None, show_profile_legend: bool = False, - categories_legend: Optional[Mapping[str, PlotLabel]] = None, + categories_legend: Optional[ + Mapping[Candidate, PlotLabel] | Mapping[str, PlotLabel] | Mapping[int, PlotLabel] + ] = None, threshold_values: Optional[Union[list[float], float]] = None, threshold_kwds: Optional[Union[list[dict], dict]] = None, legend_font_size: Optional[float] = None, @@ -41,26 +44,27 @@ def profile_bar_plot( Args: profile (RankProfile): Profile to plot statistics for. - stat_function (Callable[[RankProfile], dict[str, float]]): Which stat - to use for the bar plot. Must be a callable that takes a profile and returns - a dict with str keys and float values. + stat_function (Callable[[RankProfile], dict[str | int, float] | dict[str, float] + | dict[int, float]]): Which stat to use for the bar plot. + Must be a callable that takes a profile and returns a dict + with str and/or int keys and float values. profile_label (str, optional): Label for profile. Defaults to "Profile". normalize (bool, optional): Whether or not to normalize data. Defaults to False. profile_color (str, optional): Color to plot. Defaults to the first color from ``COLOR_LIST`` from ``utils`` module. bar_width (float, optional): Width of bars. Defaults to None which computes the bar width as 0.7 divided by the number of data sets. Must be in the interval :math:`(0,1]`. - category_ordering (list[str], optional): Ordering of x-labels. Defaults to order retrieved - from data dictionary. + category_ordering (list[str | int] | list[str] | list[int] | None): Ordering of x-labels. + Defaults to order retrieved from data dictionary. x_axis_name (str, optional): Name of x-axis. Defaults to None, which does not plot a name. y_axis_name (str, optional): Name of y-axis. Defaults to None, which does not plot a name. title (str, optional): Title for the figure. Defaults to None, which does not plot a title. show_profile_legend (bool, optional): Whether or not to plot the profile legend. Defaults to False. Is automatically shown if any threshold lines have the keyword "label" passed through ``threshold_kwds``. - categories_legend (dict[str, str], optional): Dictionary mapping data categories - to description. Defaults to None. If provided, generates a second legend for data - categories. + categories_legend (dict[str | int, str] | dict[str, str] | dict[int, str] | None): + Dictionary mapping data categories to description. Defaults to None. + If provided, generates a second legend for data categories. threshold_values (Union[list[float], float], optional): List of values to plot horizontal lines at. Can be provided as a list or a single float. threshold_kwds (Union[list[dict], dict], optional): List of plotting @@ -108,12 +112,14 @@ def profile_borda_plot( normalize: bool = False, profile_color: str = COLOR_LIST[0], bar_width: Optional[float] = None, - candidate_ordering: Optional[list[str]] = None, + candidate_ordering: Optional[list[Candidate] | list[str] | list[int]] = None, x_axis_name: Optional[str] = None, y_axis_name: Optional[str] = None, title: Optional[str] = None, show_profile_legend: bool = False, - candidate_legend: Optional[Mapping[str, PlotLabel]] = None, + candidate_legend: Optional[ + Mapping[Candidate, PlotLabel] | Mapping[str, PlotLabel] | Mapping[int, PlotLabel] + ] = None, relabel_candidates_with_int: bool = False, threshold_values: Optional[Union[list[float], float]] = None, threshold_kwds: Optional[Union[list[dict], dict]] = None, @@ -134,16 +140,17 @@ def profile_borda_plot( ``COLOR_LIST`` from ``utils`` module. bar_width (float, optional): Width of bars. Defaults to None which computes the bar width as 0.7 divided by the number of data sets. Must be in the interval :math:`(0,1]`. - candidate_ordering (list[str], optional): Ordering of x-labels. Defaults to decreasing - order of Borda scores. + candidate_ordering (list[str | int] | list[str] | list[int] | None): Ordering of x-labels. + Defaults to decreasing order of Borda scores. x_axis_name (str, optional): Name of x-axis. Defaults to None, which does not plot a name. y_axis_name (str, optional): Name of y-axis. Defaults to None, which does not plot a name. title (str, optional): Title for the figure. Defaults to None, which does not plot a title. show_profile_legend (bool, optional): Whether or not to plot the profile legend. Defaults to False. Is automatically shown if any threshold lines have the keyword "label" passed through ``threshold_kwds``. - candidate_legend (dict[str, str], optional): Dictionary mapping candidates - to alternate label. Defaults to None. If provided, generates a second legend. + candidate_legend (dict[str | int, str] | dict[str, str] | dict[int, str] | None): + Dictionary mapping candidates to alternate label. Defaults to None. + If provided, generates a second legend. relabel_candidates_with_int (bool, optional): Relabel the candidates with integer labels. Defaults to False. If ``candidate_legend`` is passed, those labels supercede. threshold_values (Union[list[float], float], optional): List of values to plot horizontal @@ -200,12 +207,14 @@ def profile_mentions_plot( normalize: bool = False, profile_color: str = COLOR_LIST[0], bar_width: Optional[float] = None, - candidate_ordering: Optional[list[str]] = None, + candidate_ordering: Optional[list[Candidate] | list[str] | list[int]] = None, x_axis_name: Optional[str] = None, y_axis_name: Optional[str] = None, title: Optional[str] = None, show_profile_legend: bool = False, - candidate_legend: Optional[Mapping[str, PlotLabel]] = None, + candidate_legend: Optional[ + Mapping[Candidate, PlotLabel] | Mapping[str, PlotLabel] | Mapping[int, PlotLabel] + ] = None, relabel_candidates_with_int: bool = False, threshold_values: Optional[Union[list[float], float]] = None, threshold_kwds: Optional[Union[list[dict], dict]] = None, @@ -226,16 +235,17 @@ def profile_mentions_plot( ``COLOR_LIST`` from ``utils`` module. bar_width (float, optional): Width of bars. Defaults to None which computes the bar width as 0.7 divided by the number of data sets. Must be in the interval :math:`(0,1]`. - candidate_ordering (list[str], optional): Ordering of x-labels. Defaults to decreasing - order of mentions. + candidate_ordering (list[str | int] | list[str] | list[int], optional): + Ordering of x-labels. Defaults to decreasing order of mentions. x_axis_name (str, optional): Name of x-axis. Defaults to None, which does not plot a name. y_axis_name (str, optional): Name of y-axis. Defaults to None, which does not plot a name. title (str, optional): Title for the figure. Defaults to None, which does not plot a title. show_profile_legend (bool, optional): Whether or not to plot the profile legend. Defaults to False. Is automatically shown if any threshold lines have the keyword "label" passed through ``threshold_kwds``. - candidate_legend (dict[str, str], optional): Dictionary mapping candidates - to alternate label. Defaults to None. If provided, generates a second legend. + candidate_legend (dict[str | int, str] | dict[str, str] |dict[int, str] | None): + Dictionary mapping candidates to alternate label. Defaults to None. + If provided, generates a second legend. relabel_candidates_with_int (bool, optional): Relabel the candidates with integer labels. Defaults to False. If ``candidate_legend`` is passed, those labels supercede. threshold_values (Union[list[float], float], optional): List of values to plot horizontal @@ -292,12 +302,14 @@ def profile_fpv_plot( normalize: bool = False, profile_color: str = COLOR_LIST[0], bar_width: Optional[float] = None, - candidate_ordering: Optional[list[str]] = None, + candidate_ordering: Optional[list[Candidate] | list[str] | list[int]] = None, x_axis_name: Optional[str] = None, y_axis_name: Optional[str] = None, title: Optional[str] = None, show_profile_legend: bool = False, - candidate_legend: Optional[Mapping[str, PlotLabel]] = None, + candidate_legend: Optional[ + Mapping[Candidate, PlotLabel] | Mapping[str, PlotLabel] | Mapping[int, PlotLabel] + ] = None, relabel_candidates_with_int: bool = False, threshold_values: Optional[Union[list[float], float]] = None, threshold_kwds: Optional[Union[list[dict], dict]] = None, @@ -318,16 +330,17 @@ def profile_fpv_plot( ``COLOR_LIST`` from ``utils`` module. bar_width (float, optional): Width of bars. Defaults to None which computes the bar width as 0.7 divided by the number of data sets. Must be in the interval :math:`(0,1]`. - candidate_ordering (list[str], optional): Ordering of x-labels. Defaults to decreasing - order of first place votes. + candidate_ordering (list[str | int] | list[str] | list[int] | None): Ordering of x-labels. + Defaults to decreasing order of first place votes. x_axis_name (str, optional): Name of x-axis. Defaults to None, which does not plot a name. y_axis_name (str, optional): Name of y-axis. Defaults to None, which does not plot a name. title (str, optional): Title for the figure. Defaults to None, which does not plot a title. show_profile_legend (bool, optional): Whether or not to plot the profile legend. Defaults to False. Is automatically shown if any threshold lines have the keyword "label" passed through ``threshold_kwds``. - candidate_legend (dict[str, str], optional): Dictionary mapping candidates - to alternate label. Defaults to None. If provided, generates a second legend. + candidate_legend (dict[str | int, str] | dict[str, str] | dict[int, str] | None): + Dictionary mapping candidates to alternate label. + Defaults to None. If provided, generates a second legend. relabel_candidates_with_int (bool, optional): Relabel the candidates with integer labels. Defaults to False. If ``candidate_legend`` is passed, those labels supercede. threshold_values (Union[list[float], float], optional): List of values to plot horizontal diff --git a/src/votekit/pref_profile/csv_utils/score_csv_utils.py b/src/votekit/pref_profile/csv_utils/score_csv_utils.py index 61dce22a..0b8966ea 100644 --- a/src/votekit/pref_profile/csv_utils/score_csv_utils.py +++ b/src/votekit/pref_profile/csv_utils/score_csv_utils.py @@ -2,6 +2,8 @@ from typing import Tuple +from votekit.types import Candidate + from ...ballot import ScoreBallot from .csv_utils import ( _validate_csv_ballot_row_break_idxs, @@ -61,7 +63,7 @@ def _parse_ballot_from_score_csv( includes_voter_set (bool): Whether or not the csv contains voter sets. break_indices (list[int]): Where the columns of the csv change from one data type to another. - inv_candidate_mapping (dict[str, str]): The iverted candidate mapping of prefix + inv_candidate_mapping (dict[str, str]): The inverted candidate mapping of prefix to the cand. Returns: @@ -78,7 +80,9 @@ def _parse_ballot_from_score_csv( "must be float." ) - scores = {c: float(ballot_row[i]) for i, c in enumerate(candidates) if ballot_row[i]} + scores: dict[Candidate, int | float] = { + c: float(ballot_row[i]) for i, c in enumerate(candidates) if ballot_row[i] + } if includes_voter_set: voter_set = set(v.strip() for v in ballot_row[break_indices[-1] + 1 :]) diff --git a/src/votekit/pref_profile/pref_profile.py b/src/votekit/pref_profile/pref_profile.py index 0f23cf6b..da00b5dc 100644 --- a/src/votekit/pref_profile/pref_profile.py +++ b/src/votekit/pref_profile/pref_profile.py @@ -39,7 +39,8 @@ class PreferenceProfile: Args: ballots (Sequence[Ballot], optional): Tuple of ``Ballot`` objects. Defaults to empty tuple. - candidates (tuple[str], optional): Tuple of candidate strings. Defaults to empty tuple. + candidates (tuple[str | int], optional): Tuple of candidates. + Candidate can be a str or int. Defaults to empty tuple. If empty, computes this from any candidate listed on a ballot with positive weight. max_ranking_length (int, optional): The length of the longest allowable ballot, i.e., how many candidates are allowed to be ranked in an election. Defaults to longest observed @@ -55,12 +56,12 @@ class PreferenceProfile: Parameters: ballots (Sequence[Ballot]): Tuple of ``Ballot`` objects. - candidates (tuple[str]): Tuple of candidate strings. + candidates (tuple[str | int]): Tuple of candidates. A candidate can be a str or int. max_ranking_length (int): The length of the longest allowable ballot, i.e., how many candidates are allowed to be ranked in an election. df (pandas.DataFrame): Data frame view of the ballots. - candidates_cast (tuple[str]): Tuple of candidates who appear on any ballot with positive - weight, either in the ranking or in the score dictionary. + candidates_cast (tuple[str | int]): Tuple of candidates who appear on any ballot with + positive weight, either in the ranking or in the score dictionary. total_ballot_wt (float): Sum of ballot weights. num_ballots (int): Length of ballot list. contains_rankings (bool): Whether or not the profile contains ballots with @@ -89,7 +90,7 @@ def __new__( cls, *, ballots: Sequence[RankBallot], - candidates: Sequence[str] = tuple(), + candidates: Sequence[Candidate] = tuple(), max_ranking_length: int = 0, df: pd.DataFrame = pd.DataFrame(), **kwargs, @@ -100,7 +101,7 @@ def __new__( cls, *, ballots: Sequence[ScoreBallot], - candidates: Sequence[str] = tuple(), + candidates: Sequence[Candidate] = tuple(), max_ranking_length: int = 0, df: pd.DataFrame = pd.DataFrame(), **kwargs, @@ -111,7 +112,7 @@ def __new__( cls, *, ballots: Sequence[Ballot] = tuple(), - candidates: Sequence[str] = tuple(), + candidates: Sequence[Candidate] = tuple(), max_ranking_length: int = 0, df: pd.DataFrame = pd.DataFrame(), **kwargs, @@ -121,7 +122,7 @@ def __new__( cls, *, ballots: Sequence[Ballot] = tuple(), - candidates: Sequence[str] = tuple(), + candidates: Sequence[Candidate] = tuple(), max_ranking_length: int = 0, df: pd.DataFrame = pd.DataFrame(), **kwargs, @@ -150,7 +151,7 @@ def __new__( f"{len(score_idxs)} ScoreBallots and {len(rank_idxs)} RankBallots." ) - if any(c.startswith("Ranking_") for c in df.columns): + if any(c.startswith("Ranking_") for c in df.columns if isinstance(c, str)): return super().__new__(RankProfile) return super().__new__(ScoreProfile) @@ -159,8 +160,8 @@ def __init__( self, *, ballots: Sequence[Ballot] = tuple(), - candidates_cast: Sequence[str] = tuple(), - candidates: Sequence[str] = tuple(), + candidates_cast: Sequence[Candidate] = tuple(), + candidates: Sequence[Candidate] = tuple(), max_ranking_length: Optional[int] = None, df: pd.DataFrame = pd.DataFrame(), ): @@ -175,6 +176,10 @@ def __init__( self._is_frozen = True + @cached_property + def df(self) -> pd.DataFrame: + raise NotImplementedError + def _find_num_ballots(self) -> int: """ Compute and set the number of ballots. @@ -225,8 +230,10 @@ def _validate_and_set_candidates(self) -> None: f"{set(self.candidates_cast) - set(self.candidates)}." ) - self.candidates = tuple([c for c in self.candidates]) - self.candidates_cast = tuple([c for c in self.candidates_cast]) + self.candidates = tuple([c.strip() if isinstance(c, str) else c for c in self.candidates]) + self.candidates_cast = tuple( + [c.strip() if isinstance(c, str) else c for c in self.candidates_cast] + ) def __setattr__(self, name, value): if getattr(self, "_is_frozen", False): @@ -337,15 +344,14 @@ def __init__( (self._df, self.candidates_cast, candidate_id_map) = self._init_from_rank_ballots( cast(Sequence[RankBallot], ballots), candidate_id_map ) + if self.candidates == tuple(): + self.candidates = self.candidates_cast else: self._df, self.candidates_cast, candidate_id_map = self._init_from_rank_df( df, candidate_id_map ) - if self.candidates == tuple(): - self.candidates = self.candidates_cast - self.max_ranking_length = self._find_max_ranking_length() if self.max_ranking_length > 0: @@ -361,6 +367,7 @@ def __init__( raise ValueError(msg) self.id_candidate_map = {cand_id: cand for cand, cand_id in candidate_id_map.items()} + self.candidate_id_map = candidate_id_map if self._candidates == tuple(): self._candidates = tuple(candidate_id_map[cand] for cand in self.candidates) self._candidates_cast = tuple(candidate_id_map[cand] for cand in self.candidates_cast) @@ -374,6 +381,11 @@ def __init__( @cached_property def df(self) -> pd.DataFrame: + """ + Compute the dataframe as a cached property. + The dataframe is internally stored with candidate ids. + The dataframe will be translated to original candidate names. + """ return self._translate_df_ranking_values(self._df, self.id_candidate_map) def __update_ballot_ranking_data( @@ -381,7 +393,7 @@ def __update_ballot_ranking_data( rank_ballot_data: dict[str, list], idx: int, rank_ballot: RankBallot, - candidates_cast: list[str], + candidates_cast: list[Candidate], num_ballots: int, candidate_id_map: dict[Candidate, int], ): @@ -392,9 +404,10 @@ def __update_ballot_ranking_data( rank_ballot_data (dict[str, list]): Dictionary storing ballot data. idx (int): Index of ballot. rank_ballot (RankBallot): Ballot. - candidates_cast (list[str]): List of candidates who have received votes. + candidates_cast (list[str | int]): List of candidates who have received votes. num_ballots (int): Total number of ballots. - candidate_id_map (dict[Candidate,str]): mapping of candidate to their id (int) + candidate_id_map (dict[str | int,int]): mapping of candidate names to integer IDs. + """ if rank_ballot.ranking is None: @@ -431,7 +444,7 @@ def __update_rank_ballot_data_attrs( rank_ballot_data: dict[str, list], idx: int, rank_ballot: RankBallot, - candidates_cast: list[str], + candidates_cast: list[Candidate], num_ballots: int, candidate_id_map: dict[Candidate, int], ): @@ -442,9 +455,9 @@ def __update_rank_ballot_data_attrs( rank_ballot_data (dict[str, list]): Dictionary storing ballot data. idx (int): Index of ballot. rank_ballot (RankBallot): Ballot. - candidates_cast (list[str]): List of candidates who have received votes. + candidates_cast (list[str | int]): List of candidates who have received votes. num_ballots (int): Total number of ballots. - candidate_id_map (dict[Candidate, int]): Mapping of candidate name to id (integer) + candidate_id_map (dict[str | int, int]): Mapping of candidate names to integer IDs. """ rank_ballot_data["Weight"][idx] = rank_ballot.weight @@ -516,7 +529,7 @@ def __init_formatted_rank_df( def _init_from_rank_ballots( self, ballots: Sequence[RankBallot], candidate_id_map: dict[Candidate, int] - ) -> tuple[pd.DataFrame, tuple[str, ...], dict[Candidate, int]]: + ) -> tuple[pd.DataFrame, tuple[Candidate, ...], dict[Candidate, int]]: """ Create the pandas dataframe representation of the profile. @@ -524,7 +537,8 @@ def _init_from_rank_ballots( ballots (Sequence[RankBallot,...]): Sequence of ballots. Returns: - tuple[pd.DataFrame, tuple[str, ...], dict[Candidate, int]]: df, candidates_cast, candidate_id_map + tuple[pd.DataFrame, tuple[str | int, ...], dict[str | int, int]]: + df, candidates_cast, candidate_id_map """ # `rank_ballot_data` sends {Weight, Voter Set} keys to a list to be @@ -533,7 +547,7 @@ def _init_from_rank_ballots( # the ballot at index in the df. num_ballots, rank_ballot_data = self.__init_rank_ballot_data(ballots) - candidates_cast: list[str] = [] + candidates_cast: list[Candidate] = [] for i, b in enumerate(ballots): self.__update_rank_ballot_data_attrs( @@ -604,7 +618,7 @@ def __validate_init_rank_df(self, df: pd.DataFrame) -> None: f"Ranking column 'Ranking_{i + 1}' not in dataframe: {df.columns}" ) - def __find_candidates_cast_from_init_rank_df(self, df: pd.DataFrame) -> tuple[str, ...]: + def __find_candidates_cast_from_init_rank_df(self, df: pd.DataFrame) -> tuple[Candidate, ...]: """ Compute which candidates received votes from the df and set the candidates_cast and candidates attr. @@ -613,7 +627,7 @@ def __find_candidates_cast_from_init_rank_df(self, df: pd.DataFrame) -> tuple[st df (pd.DataFrame): Dataframe representation of ballots. Returns: - tuple[str]: Candidates cast. + tuple[str | int]: Candidates cast. """ mask = df["Weight"] > 0 @@ -628,21 +642,28 @@ def __find_candidates_cast_from_init_rank_df(self, df: pd.DataFrame) -> tuple[st return tuple(candidates_cast) def _translate_df_ranking_values( - self, df: pd.DataFrame, candidate_id_map: dict[Candidate, int] | dict[int, Candidate] + self, df: pd.DataFrame, candidate_mapping: dict[Candidate, int] | dict[int, Candidate] ) -> pd.DataFrame: - ranking_cols = [col for col in df.columns if col.startswith("Ranking_")] + """ + Translate candidate values in ranking columns using a candidate mapping. + Maps either candidate names to integer IDs for internal storage or + integer IDs to candidate names for the public-facing df. - all_cand_values = set().union(*df[ranking_cols].to_numpy().ravel()) - {"~"} - missing_cands = all_cand_values - set(candidate_id_map.keys()) - if missing_cands: - raise ValueError( - f"Candidates {missing_cands} are missing from the candidate_id_map. " - "Add them to the dictionary to successfully translate to candidate ids." - ) + Args: + df (pd.DataFrame): Dataframe representation of ballots. + candidate_mapping (dict[str | int, int] | dict[int, str | int]): + Mapping from candidates names to integer IDs, or vice versa. + Returns: + pd.DataFrame: Copy of df with ranking values translated + + Raises: + + """ + ranking_cols = [col for col in df.columns if col.startswith("Ranking_")] translated_df = df.copy() translated_df[ranking_cols] = translated_df[ranking_cols].map( - lambda ranking: frozenset(candidate_id_map[cand] for cand in ranking) + lambda ranking: frozenset(candidate_mapping[cand] for cand in ranking) if ranking != frozenset("~") else ranking ) @@ -652,7 +673,7 @@ def _init_from_rank_df( self, df: pd.DataFrame, candidate_id_map: dict[Candidate, int] ) -> tuple[ pd.DataFrame, - tuple[str, ...], + tuple[Candidate, ...], dict[Candidate, int], ]: """ @@ -660,9 +681,10 @@ def _init_from_rank_df( Args: df (pd.DataFrame): Dataframe representation of ballots. + candidate_id_map (dict[Candidate, int]): Mapping of candidate names to integer IDs. Returns - tuple[pd.DataFrame, tuple[str]]: df, candidates_cast + tuple[pd.DataFrame, tuple[str | int]]: df, candidates_cast """ self.__validate_init_rank_df_params(df) self.__validate_init_rank_df(df) @@ -828,7 +850,7 @@ def __str__(self) -> str: __repr__ = __str__ def __to_rank_csv_header( - self, candidate_mapping: dict[Candidate, int], include_voter_set: bool + self, candidate_mapping: dict[Candidate, str], include_voter_set: bool ) -> list[list]: """ Construct the header rows for the PrefProfile a custom CSV format. @@ -849,7 +871,7 @@ def __to_rank_csv_header( return header def __to_rank_csv_ranking_list( - self, rank_ballot: RankBallot, candidate_mapping: dict[Candidate, int] + self, rank_ballot: RankBallot, candidate_mapping: dict[Candidate, str] ) -> list: """ Create the list of ranking data for a ballot in the profile. @@ -877,7 +899,7 @@ def __to_rank_csv_ballot_row( self, ballot: RankBallot, include_voter_set: bool, - candidate_mapping: dict[Candidate, int], + candidate_mapping: dict[Candidate, str], weight_precision: int, ) -> list[list]: """ @@ -902,7 +924,7 @@ def __to_rank_csv_ballot_row( return row def __to_rank_csv_data_column_names( - self, include_voter_set: bool, candidate_mapping: dict[Candidate, int] + self, include_voter_set: bool, candidate_mapping: dict[Candidate, str] ) -> list: """ Create the data column header. @@ -950,7 +972,7 @@ def to_csv( if len(self.ballots) == 0: raise ProfileError("Cannot write a profile with no ballots to a csv.") - candidate_mapping: dict[Candidate, int] = {c: str(i) for i, c in enumerate(self.candidates)} + candidate_mapping = {c: str(i) for i, c in enumerate(self.candidates)} header = self.__to_rank_csv_header(candidate_mapping, include_voter_set) data_col_names = self.__to_rank_csv_data_column_names(include_voter_set, candidate_mapping) @@ -1024,81 +1046,119 @@ def __init__( self, *, ballots: Sequence[Ballot] = tuple(), - candidates: Sequence[str] = tuple(), + candidates: Sequence[Candidate] = tuple(), max_ranking_length: Optional[int] = None, df: pd.DataFrame = pd.DataFrame(), ): self.candidates = tuple(candidates) + self._candidates = tuple([cand_id for cand_id in range(len(self.candidates))]) + candidate_id_map = { + cand: cand_id for cand, cand_id in zip(self.candidates, self._candidates, strict=True) + } if df.equals(pd.DataFrame()): ( - self.df, + self._df, self.candidates_cast, - ) = self._init_from_score_ballots(cast(Sequence[ScoreBallot], ballots)) + candidate_id_map, + ) = self._init_from_score_ballots( + cast( + Sequence[ScoreBallot], + ballots, + ), + candidate_id_map, + ) if self.candidates == tuple(): self.candidates = self.candidates_cast else: - self.df, self.candidates_cast = self._init_from_score_df(df) + self._df, self.candidates_cast, candidate_id_map = self._init_from_score_df( + df, candidate_id_map + ) + + self.id_candidate_map = {cand_id: cand for cand, cand_id in candidate_id_map.items()} + self.candidate_id_map = candidate_id_map + if self._candidates == tuple(): + self._candidates = tuple(candidate_id_map[cand] for cand in self.candidates) + self._candidates_cast = tuple(candidate_id_map[cand] for cand in self.candidates_cast) super().__init__( candidates=self.candidates, candidates_cast=self.candidates_cast, - df=self.df, + df=self._df, ) + @cached_property + def df(self) -> pd.DataFrame: + """ + Compute the dataframe as a cached property. + The dataframe is internally stored with candidate ids. + The dataframe will be translated to original candidate names. + """ + return self._translate_df_score_values(self._df, self.id_candidate_map) + def __update_ballot_scores_data( self, - score_ballot_data: dict[str, list], + score_ballot_data: dict[str | int, list], idx: int, ballot: ScoreBallot, - candidates_cast: list[str], + candidates_cast: list[Candidate], num_ballots: int, + candidate_id_map: dict[Candidate, int], ) -> None: """ Update the score data from a ballot. Args: - ballot_data (dict[str, list]): Dictionary storing ballot data. + ballot_data (dict[str | int, list]): Dictionary storing ballot data. + Dictionary keys represent the column names. + The candidate columns will be their integer ids. idx (int): Index of ballot. ballot (ScoreBallot): Ballot. candidates_cast (list[str]): List of candidates who have received votes. num_ballots (int): Total number of ballots. + candidate_id_map (dict[str | int, int]): Mapping of candidates to integer IDs. """ if ballot.scores is None: return for c, score in ballot.scores.items(): - if ballot.weight > 0 and c not in candidates_cast: - candidates_cast.append(c) - - if c not in score_ballot_data: + if c not in candidate_id_map: if self.candidates: raise ProfileError( f"Candidate {c} found in ballot {ballot} but not in " f"candidate list {self.candidates}." ) - score_ballot_data[c] = [np.nan] * num_ballots - score_ballot_data[c][idx] = score + candidate_id_map[c] = len(candidate_id_map) + if ballot.weight > 0 and c not in candidates_cast: + candidates_cast.append(c) + cand_col = candidate_id_map[c] + if cand_col not in score_ballot_data: + score_ballot_data[cand_col] = [np.nan] * num_ballots + score_ballot_data[cand_col][idx] = score def __update_score_ballot_data_attrs( self, - score_ballot_data: dict[str, list], + score_ballot_data: dict[str | int, list], idx: int, ballot: ScoreBallot, - candidates_cast: list[str], + candidates_cast: list[Candidate], num_ballots: int, + candidate_id_map: dict[Candidate, int], ) -> None: """ Update all ballot data from a ballot. Args: - ballot_data (dict[str, list]): Dictionary storing ballot data. + ballot_data (dict[str | int, list]): Dictionary storing ballot data. + Dictionary keys represent the column names. + The candidate column names will be their integer IDs. idx (int): Index of ballot. ballot (ScoreBallot): Ballot. candidates_cast (list[str]): List of candidates who have received votes. num_ballots (int): Total number of ballots. + candidate_id_map (dict[str | int, int]): Mapping of candidates to integer IDs. """ score_ballot_data["Weight"][idx] = ballot.weight @@ -1112,11 +1172,12 @@ def __update_score_ballot_data_attrs( ballot=ballot, candidates_cast=candidates_cast, num_ballots=num_ballots, + candidate_id_map=candidate_id_map, ) def __init_score_ballot_data( self, ballots: Sequence[ScoreBallot] - ) -> Tuple[int, dict[str, list]]: + ) -> Tuple[int, dict[str | int, list]]: """ Create the ballot data objects. @@ -1124,32 +1185,38 @@ def __init_score_ballot_data( ballots (Sequence[ScoreBallot,...]): Tuple of ballots. Returns: - Tuple[int, dict[str, list]]: num_ballots, score_ballot_data + Tuple[int, dict[str | int, list]]: num_ballots, score_ballot_data """ num_ballots = len(ballots) - score_ballot_data: dict[str, list] = { + score_ballot_data: dict[str | int, list] = { "Weight": [np.nan] * num_ballots, "Voter Set": [set()] * num_ballots, } - if self.candidates != tuple(): - score_ballot_data.update({c: [np.nan] * num_ballots for c in self.candidates}) + if self._candidates != tuple(): + score_ballot_data.update( + {cand_id: [np.nan] * num_ballots for cand_id in self._candidates} + ) return num_ballots, score_ballot_data def __init_formatted_score_df( self, - score_ballot_data: dict[str, list], - candidates_cast: list[str], + score_ballot_data: dict[str | int, list], + candidates_cast: list[Candidate], + candidate_id_map: dict[Candidate, int], ) -> pd.DataFrame: """ Create a pandas dataframe from the ballot data. Args: - score_ballot_data (dict[str, list]): Dictionary storing ballot data. + score_ballot_data (dict[str | int, list]): Dictionary storing ballot data. + Dictionary keys represent the column names. + The candidate columns will be their integer ids. candidates_cast (list[str]): List of candidates who received votes. + candidate_id_map (dict[str | int, int]): Mapping of candidates to integer IDs. Returns: pd.DataFrame: Dataframe of profile. @@ -1160,30 +1227,35 @@ def __init_formatted_score_df( "Weight", ] - col_order = list(self.candidates) + temp_col_order + col_order = list(self._candidates) + temp_col_order - if self.candidates == tuple(): - remaining_cands = set(candidates_cast) - set(df.columns) + if self._candidates == tuple(): + cand_ids = [candidate_id_map[cand] for cand in candidates_cast] + remaining_cands = set(cand_ids) - set(df.columns) empty_df_cols = np.full((len(df), len(remaining_cands)), np.nan) df[list(remaining_cands)] = empty_df_cols - - col_order = sorted([c for c in df.columns if c not in temp_col_order]) + temp_col_order + col_order = [ + candidate_id_map[cand] + for cand in sorted(candidate_id_map.keys(), key=lambda cand: str(cand)) + ] + temp_col_order df = df[col_order] df.index.name = "Ballot Index" return df def _init_from_score_ballots( - self, ballots: Sequence[ScoreBallot] - ) -> tuple[pd.DataFrame, tuple[str, ...]]: + self, ballots: Sequence[ScoreBallot], candidate_id_map: dict[Candidate, int] + ) -> tuple[pd.DataFrame, tuple[Candidate, ...], dict[Candidate, int]]: """ Create the pandas dataframe representation of the profile. Args: ballots (Sequence[ScoreBallot,...]): Tuple of ballots. + candidate_id_map (dict[str | int, int]): Mapping of candidate names to integer IDs. Returns: - tuple[pd.DataFrame, tuple[str, ...]]: df, candidates_cast + tuple[pd.DataFrame, tuple[str | int, ...], dict[str | int, int]]: + df, candidates_cast, candidate_id_map """ # `score_ballot_data` sends {Weight, Voter Set} keys to a list to be @@ -1192,7 +1264,7 @@ def _init_from_score_ballots( # the ballot at index in the df. num_ballots, score_ballot_data = self.__init_score_ballot_data(ballots) - candidates_cast: list[str] = [] + candidates_cast: list[Candidate] = [] for i, b in enumerate(ballots): self.__update_score_ballot_data_attrs( @@ -1201,15 +1273,18 @@ def _init_from_score_ballots( ballot=b, candidates_cast=candidates_cast, num_ballots=num_ballots, + candidate_id_map=candidate_id_map, ) df = self.__init_formatted_score_df( score_ballot_data=score_ballot_data, candidates_cast=candidates_cast, + candidate_id_map=candidate_id_map, ) return ( df, tuple(candidates_cast), + candidate_id_map, ) def __validate_init_score_df_params(self, df: pd.DataFrame) -> None: @@ -1258,7 +1333,7 @@ def __validate_init_score_df(self, df: pd.DataFrame) -> None: if c not in df.columns: raise ProfileError(f"Candidate column '{c}' not in dataframe: {df.columns}") - def __find_candidates_cast_from_init_score_df(self, df: pd.DataFrame) -> tuple[str, ...]: + def __find_candidates_cast_from_init_score_df(self, df: pd.DataFrame) -> tuple[Candidate, ...]: """ Compute which candidates received votes from the df and set the candidates_cast and candidates attr. @@ -1267,12 +1342,12 @@ def __find_candidates_cast_from_init_score_df(self, df: pd.DataFrame) -> tuple[s df (pd.DataFrame): Dataframe representation of ballots. Returns: - tuple[str]: Candidates cast. + tuple[str | int]: Candidates cast. """ mask = df["Weight"] > 0 - candidates_cast: set[str] = set() + candidates_cast: set[Candidate] = set() positive = df.loc[mask, list(self.candidates)].gt(0).any() # .any() applies along the columns, so we get a boolean series where the @@ -1281,7 +1356,9 @@ def __find_candidates_cast_from_init_score_df(self, df: pd.DataFrame) -> tuple[s return tuple(candidates_cast) - def _init_from_score_df(self, df: pd.DataFrame) -> tuple[pd.DataFrame, tuple[str, ...]]: + def _init_from_score_df( + self, df: pd.DataFrame, candidate_id_map: dict[Candidate, int] + ) -> tuple[pd.DataFrame, tuple[Candidate, ...], dict[Candidate, int]]: """ Validate the dataframe and determine the candidates cast. @@ -1289,13 +1366,38 @@ def _init_from_score_df(self, df: pd.DataFrame) -> tuple[pd.DataFrame, tuple[str df (pd.DataFrame): Dataframe representation of ballots. Returns - tuple[pd.DataFrame, tuple[str]]: df, candidates_cast + tuple[pd.DataFrame, tuple[str], dict[Candidate, int]]: + df, candidates_cast, candidate_id_map """ self.__validate_init_score_df_params(df) self.__validate_init_score_df(df) candidates_cast = self.__find_candidates_cast_from_init_score_df(df) + for cand in candidates_cast: + if cand not in candidate_id_map: + candidate_id_map[cand] = len(candidate_id_map) + new_df = self._translate_df_score_values(df, candidate_id_map) + return new_df, candidates_cast, candidate_id_map - return df, candidates_cast + def _translate_df_score_values( + self, df: pd.DataFrame, candidate_mapping: dict[Candidate, int] | dict[int, Candidate] + ) -> pd.DataFrame: + """ + Rename candidate columns using a candidate mapping. + Maps either candidate names to integer IDs for internal storage or + integer IDs to candidate names for the public-facing df. + + Args: + df (pd.DataFrame): Dataframe representation of ballots. + candidate_mapping (dict[str | int, int] | dict[int, str | int]): + Mapping from candidates names to integer IDs, or vice versa. + + Returns: + pd.DataFrame: Copy of df with columns renamed. + + """ + translated_df = df.copy() + translated_df.rename(columns=candidate_mapping, inplace=True) + return translated_df @cached_property def ballots(self: ScoreProfile) -> tuple[ScoreBallot, ...]: diff --git a/src/votekit/pref_profile/utils.py b/src/votekit/pref_profile/utils.py index 597be5e4..6723c8e3 100644 --- a/src/votekit/pref_profile/utils.py +++ b/src/votekit/pref_profile/utils.py @@ -12,6 +12,7 @@ import pandas as pd from votekit.ballot import Ballot, RankBallot, ScoreBallot +from votekit.types import Candidate def _convert_ranking_cols_to_ranking( @@ -76,13 +77,13 @@ def convert_row_to_rank_ballot(row: pd.Series, max_ranking_length: int = 0) -> R ) -def convert_row_to_score_ballot(row: pd.Series, candidates: tuple[str, ...]) -> ScoreBallot: +def convert_row_to_score_ballot(row: pd.Series, candidates: tuple[Candidate, ...]) -> ScoreBallot: """ Convert a row of a properly formatted profile.df to a Ballot. Args: row (pd.Series): Row of a profile.df. - candidates (tuple[str,...]): The name of the candidates. + candidates (tuple[str | int,...]): The name of the candidates. Returns: ScoreBallot: Ballot corresponding to the row of the df. @@ -99,14 +100,14 @@ def convert_row_to_score_ballot(row: pd.Series, candidates: tuple[str, ...]) -> def _df_to_rank_ballot_tuple( - df: pd.DataFrame, candidates: tuple[str, ...], max_ranking_length: int = 0 + df: pd.DataFrame, candidates: tuple[Candidate, ...], max_ranking_length: int = 0 ) -> tuple[RankBallot, ...]: """ Convert a properly formatted profile.df into a list of ballots. Args: df (pd.DataFrame): A profile.df. - candidates (tuple[str,...]): The candidates. + candidates (tuple[str | int,...]): The candidates. max_ranking_length (int, optional): The maximum length of a ranking. Defaults to 0, which is used for ballots with no ranking. @@ -197,7 +198,7 @@ def score_profile_to_ballot_dict( def rank_profile_to_ranking_dict( rank_profile: RankProfile, standardize: bool = False -) -> dict[tuple[frozenset[str], ...], float]: +) -> dict[tuple[frozenset[Candidate], ...], float]: """ Converts profile to dictionary with keys = rankings and values = corresponding total weights. @@ -208,8 +209,8 @@ def rank_profile_to_ranking_dict( weight. Defaults to False. Returns: - dict[tuple[frozenset[str],...], float]: - A dictionary with rankings (keys) and corresponding total weights (values). + dict[tuple[frozenset[str | int],...], float]: + A dictionary with candidate rankings (keys) and corresponding total weights (values). Raises: TypeError: Profile must be a RankProfile. @@ -232,7 +233,7 @@ def rank_profile_to_ranking_dict( def score_profile_to_scores_dict( score_profile: ScoreProfile, standardize: bool = False -) -> dict[tuple[tuple[str, float], ...] | None, float]: +) -> dict[tuple[tuple[Candidate, float], ...] | None, float]: """ Converts profile to dictionary with keys = scores and values = corresponding total weights. @@ -243,8 +244,8 @@ def score_profile_to_scores_dict( weight. Defaults to False. Returns: - dict[tuple[tuple[str, float], ...] | None, float]: - A dictionary with scores (keys) and corresponding total weights (values). + dict[tuple[tuple[str | int, float], ...] | None, float]: + A dictionary with candidate scores (keys) and corresponding total weights (values). Raises: TypeError: Profile must be a ScoreProfile. @@ -255,7 +256,7 @@ def score_profile_to_scores_dict( raise TypeError(("Profile must be a ScoreProfile.")) tot_weight = score_profile.total_ballot_wt - di: dict[tuple[tuple[str, float], ...] | None, float] = {} + di: dict[tuple[tuple[Candidate, float], ...] | None, float] = {} for ballot in score_profile.ballots: scores = tuple(ballot.scores.items()) if ballot.scores else None weight = ballot.weight diff --git a/src/votekit/representation_scores.py b/src/votekit/representation_scores.py index 77938794..8af517f5 100644 --- a/src/votekit/representation_scores.py +++ b/src/votekit/representation_scores.py @@ -1,14 +1,15 @@ import warnings from itertools import combinations -from typing import Optional +from typing import Optional, Sequence from votekit.pref_profile import RankProfile +from votekit.types import Candidate def r_representation_score( profile: RankProfile, r: int, - candidate_list: list[str], + candidate_list: Sequence[Candidate], ) -> float: """ Compute the r-representation score for the given candidate set. This computes the share @@ -20,7 +21,8 @@ def r_representation_score( r (int): Consider a voter represented if a member of the candidate_list is in one of the top r positions of their ballot. Typical choices are 1, the number of seats, or the max ballot length. - candidate_list (list[str]): List of candidates to consider. + candidate_list (Sequence[str | int]): List of candidates to consider. + Candidates can be strings, integers, or mix of both. Returns: float: r-representation score for candidate_list in profile. @@ -64,7 +66,7 @@ def winner_sets_r_representation_scores( profile: RankProfile, n_seats: int, r: int, - candidate_list: Optional[list[str]] = None, + candidate_list: Optional[Sequence[Candidate]] = None, ) -> dict[frozenset, float]: """ Return r-representation score for all possible winner sets. This computes the share @@ -77,7 +79,9 @@ def winner_sets_r_representation_scores( r (int): Consider a voter represented if a member of the candidate_set is in one of the top r positions of their ballot. Typical choices are 1, the number of seats, or the max ballot length. - candidate_list (list[str], optional): List of candidates to consider as possible winners. + candidate_list (Sequence[str | int], optional): + List of candidates to consider as possible winners. + Candidates can be either strings, integers, or a mix of both. Defaults to None, in which case all candidates who received at least one vote are used. Returns: diff --git a/src/votekit/types.py b/src/votekit/types.py index aa5d07de..2ccf1710 100644 --- a/src/votekit/types.py +++ b/src/votekit/types.py @@ -1,3 +1,4 @@ -from typing import TypeAlias, Union +from typing import TypeAlias -Candidate: TypeAlias = Union[str, int] +Candidate: TypeAlias = str | int +CandidateFloatDictLike: TypeAlias = dict[Candidate, float] | dict[str, float] | dict[int, float] diff --git a/src/votekit/utils.py b/src/votekit/utils.py index 6badce3b..a0b4f32a 100644 --- a/src/votekit/utils.py +++ b/src/votekit/utils.py @@ -1,7 +1,7 @@ import math import random from itertools import permutations -from typing import Literal, Optional, Sequence, Union +from typing import Literal, Optional, Sequence import numpy as np import pandas as pd @@ -9,6 +9,7 @@ from votekit.ballot import Ballot, RankBallot from votekit.pref_profile import RankProfile, ScoreProfile +from votekit.types import Candidate, CandidateFloatDictLike COLOR_LIST = [ "#0099cd", @@ -53,7 +54,7 @@ ] -def ballots_by_first_cand(profile: RankProfile) -> dict[str, list[RankBallot]]: +def ballots_by_first_cand(profile: RankProfile) -> dict[Candidate, list[RankBallot]]: """ Partitions the profile by first place candidate. Assumes there are no ties within first place positions of ballots. @@ -62,7 +63,7 @@ def ballots_by_first_cand(profile: RankProfile) -> dict[str, list[RankBallot]]: profile (RankProfile): Profile to partititon. Returns: - dict[str, list[RankBallot]]: + dict[str | int, list[RankBallot]]: A dictionary whose keys are candidates and values are lists of ballots that have that candidate first. """ @@ -77,7 +78,7 @@ def ballots_by_first_cand(profile: RankProfile) -> dict[str, list[RankBallot]]: weights = df["Weight"].to_numpy() voter_sets = df["Voter Set"].to_numpy().astype(object) - cand_dict: dict[str, list[RankBallot]] = {c: [] for c in profile.candidates} + cand_dict: dict[Candidate, list[RankBallot]] = {c: [] for c in profile.candidates} tilde = frozenset({"~"}) for row, w, voter_set in zip(rank_arr, weights, voter_sets): @@ -169,7 +170,7 @@ def validate_score_vector(score_vector: Sequence[float]): def _score_dict_from_rankings_df_no_ties( profile: RankProfile, score_vector: Sequence[float], -) -> dict[str, float]: +) -> dict[Candidate, float]: """ Score the candidates based on a score vector. For example, the vector (1,0,...) would return the first place votes for each candidate. Vectors should be non-increasing and @@ -188,7 +189,7 @@ def _score_dict_from_rankings_df_no_ties( the profile. If it is shorter, we add 0s. Returns: - dict[str, float]: + dict[str | int, float]: Dictionary mapping candidates to scores. """ @@ -202,10 +203,10 @@ def _score_dict_from_rankings_df_no_ties( if len(score_vector) < max_len: score_vector = list(score_vector) + [0] * (max_len - len(score_vector)) - df = profile.df + df = profile._df - cand_frznst = [frozenset({c}) for c in profile.candidates_cast] - all_frznst = cand_frznst + [frozenset({"~"}), frozenset()] + cand_id_frznst = [frozenset({cand_id}) for cand_id in profile._candidates_cast] + all_frznst = cand_id_frznst + [frozenset({"~"}), frozenset()] n_buckets = len(all_frznst) idx_of_empty = all_frznst.index(frozenset()) @@ -231,14 +232,17 @@ def _score_dict_from_rankings_df_no_ties( weights_flat = weight_matrix.ravel() bucket_sums = np.bincount(codes_flat, weights=weights_flat, minlength=n_buckets) - return {next(iter(k)): round(bucket_sums[idx], 10) for idx, k in enumerate(cand_frznst)} + return { + profile.id_candidate_map[cand_id]: round(bucket_sums[idx], 10) + for idx, cand_id in enumerate(profile._candidates_cast) + } def score_dict_from_score_vector( profile: RankProfile, score_vector: Sequence[float], tie_convention: Literal["high", "average", "low"] = "low", -) -> dict[str, float]: +) -> dict[Candidate, float]: """ Score the candidates based on a score vector. For example, the vector (1,0,...) would return the first place votes for each candidate. Vectors should be non-increasing and @@ -262,7 +266,7 @@ def score_dict_from_score_vector( receive the points for 4th place. Returns: - dict[str, float]: + dict[str | int, float]: Dictionary mapping candidates to scores. """ validate_score_vector(score_vector) @@ -274,15 +278,15 @@ def score_dict_from_score_vector( if len(score_vector) < max_length: score_vector = list(score_vector) + [0] * (max_length - len(score_vector)) - scores = {c: 0.0 for c in profile.candidates_cast} + scores = {c: 0.0 for c in profile._candidates_cast} try: ranking_cols = [f"Ranking_{i}" for i in range(1, max_length + 1)] - ranking_mat = profile.df[ranking_cols].to_numpy() + ranking_mat = profile._df[ranking_cols].to_numpy() except KeyError as e: raise TypeError("Ballots must have rankings.") from e - weights = profile.df["Weight"].to_numpy(dtype=float) + weights = profile._df["Weight"].to_numpy(dtype=float) if tie_convention not in ["high", "average", "low"]: raise ValueError( @@ -319,12 +323,12 @@ def score_dict_from_score_vector( scores[c] += round(allocation * wt, 10) current_ind += position_size - return scores + return {profile.id_candidate_map[cand_id]: score for cand_id, score in scores.items()} def _first_place_votes_from_df_no_ties( profile: RankProfile, -) -> dict[str, float]: +) -> dict[Candidate, float]: """ Computes first place votes for all candidates_cast in a ``RankProfile``. Intended to be much faster than first_place_votes, but does not handle ties in ballots. @@ -333,7 +337,7 @@ def _first_place_votes_from_df_no_ties( profile (RankProfile): The profile to compute first place votes for. Returns: - dict[str, float]: + dict[str | int, float]: Dictionary mapping candidates to number of first place votes. """ # equiv to score vector of (1,0,0,...) @@ -346,7 +350,7 @@ def _first_place_votes_from_df_no_ties( def first_place_votes( profile: RankProfile, tie_convention: Literal["high", "average", "low"] = "average", -) -> dict[str, float]: +) -> dict[Candidate, float]: """ Computes first place votes for all candidates_cast in a ``RankProfile``. @@ -357,8 +361,7 @@ def first_place_votes( first, each receives 1/n points. "high" would award them each one point, and "low" 0. Returns: - dict[str, float]: - Dictionary mapping candidates to number of first place votes. + dict[int, float]: Dictionary mapping candidate ids to number of first place votes. """ # equiv to score vector of (1,0,0,...) if not isinstance(profile, RankProfile): @@ -371,7 +374,7 @@ def first_place_votes( def mentions( profile: RankProfile, -) -> dict[str, float]: +) -> dict[Candidate, float]: """ Calculates total mentions for all candidates in a ``RankProfile``. @@ -379,7 +382,7 @@ def mentions( profile (RankProfile): RankProfile of ballots. Returns: - dict[str, float]: + dict[Candidate, float]: Dictionary mapping candidates to mention totals (values). """ mentions = {c: 0.0 for c in profile.candidates} @@ -399,7 +402,7 @@ def borda_scores( profile: RankProfile, borda_max: Optional[int] = None, tie_convention: Literal["high", "average", "low"] = "low", -) -> dict[str, float]: +) -> dict[Candidate, float]: r""" Calculates Borda scores for candidates_cast in a ``RankProfile``. The Borda vector is :math:`(n,n-1,\dots,1)` where :math:`n` is the ``borda_max`. @@ -416,7 +419,7 @@ def borda_scores( receive the points for 4th place. Returns: - dict[str, float]: + dict[str | int, float]: Dictionary mapping candidates to Borda scores. """ if not isinstance(profile, RankProfile): @@ -431,12 +434,12 @@ def borda_scores( def tiebreak_set( - r_set: frozenset[str], + r_set: frozenset[Candidate], profile: Optional[RankProfile] = None, tiebreak: str = "random", scoring_tie_convention: Literal["high", "average", "low"] = "low", backup_tiebreak_convention: Optional[str] = None, -) -> tuple[frozenset[str], ...]: +) -> tuple[frozenset[Candidate], ...]: """ Break a single set of candidates into multiple sets each with a single candidate according to a tiebreak rule. Rule 1: random. Rule 2: first-place votes; break the tie based on @@ -444,7 +447,7 @@ def tiebreak_set( profile. Rule 4: lex/lexicographic/alph/alphabetical; break the tie alphabetically. Args: - r_set (frozenset[str]): Set of candidates on which to break tie. + r_set (frozenset[str | int]): Set of candidates on which to break tie. profile (RankProfile, optional): Profile used to break ties in first-place votes or Borda setting. Defaults to None, which implies a random tiebreak. tiebreak (str): Tiebreak method to use. Options are "random", "first_place", and @@ -462,9 +465,15 @@ def tiebreak_set( "lex" if the initial tiebreak is alphabetical, and "random" otherwise. Returns: - tuple[frozenset[str],...]: tiebroken ranking + tuple[frozenset[Candidate],...]: tiebroken ranking """ if tiebreak in ["alphabetical", "lexicographic", "alph", "lex"]: + if any(isinstance(cand, int) for cand in r_set): + int_cands = [cand for cand in r_set if isinstance(cand, int)] + raise TypeError( + "Alphabetical/Lexicographic tie breaks are not possible with integer candidates. " + f"{int_cands} are integer candidates." + ) sorted_cands = sorted([c for c in r_set]) new_ranking = tuple(map(lambda c: frozenset({c}), sorted_cands)) @@ -513,27 +522,30 @@ def tiebreak_set( def tiebroken_ranking( - ranking: tuple[frozenset[str], ...], + ranking: tuple[frozenset[Candidate], ...], profile: Optional[RankProfile] = None, tiebreak: str = "random", -) -> tuple[tuple[frozenset[str], ...], dict[frozenset[str], tuple[frozenset[str], ...]]]: +) -> tuple[ + tuple[frozenset[Candidate], ...], dict[frozenset[Candidate], tuple[frozenset[Candidate], ...]] +]: """ Breaks ties in a list-of-sets ranking according to a given scheme. Args: - ranking (list[set[str]]): A list-of-set ranking of candidates. + ranking (tuple[frozenset[str | int]]): A list-of-set ranking of candidates. profile (RankProfile, optional): Profile used to break ties in first-place votes or Borda setting. Defaults to None, which implies a random tiebreak. tiebreak (str, optional): Method of tiebreak, currently supports 'random', 'borda', 'first_place'. Defaults to random. Returns: - tuple[tuple[frozenset[str], ...], dict[frozenset[str], tuple[frozenset[str],...]]]: + tuple[tuple[frozenset[str | int], ...], dict[frozenset[str | int] + tuple[frozenset[str | int],...]]]: The first entry of the tuple is a list-of-set ranking of candidates (broken down to one candidate sets). The second entry is a dictionary that maps tied sets to their resolution. """ - new_ranking: list[frozenset[str]] = [frozenset()] * len([c for s in ranking for c in s]) + new_ranking: list[frozenset[Candidate]] = [frozenset()] * len([c for s in ranking for c in s]) i = 0 tied_dict = {} @@ -550,23 +562,24 @@ def tiebroken_ranking( def score_dict_to_ranking( - score_dict: dict[str, float], sort_high_low: bool = True -) -> tuple[frozenset[str], ...]: + score_dict: CandidateFloatDictLike, + sort_high_low: bool = True, +) -> tuple[frozenset[Candidate], ...]: """ Sorts candidates into a tuple of frozensets ranking based on a scoring dictionary. Args: - score_dict (dict[str, float]): Dictionary between candidates - and their score. + score_dict (dict[str | int, float] | dict[str, float] | dict[int, float]): + Dictionary between candidates and their score. sort_high_low (bool, optional): How to sort candidates based on scores. True sorts from high to low. Defaults to True. Returns: - tuple[frozenset[str],...]: Candidate rankings in a list-of-sets form. + tuple[frozenset[str | int],...]: Candidate rankings in a list-of-sets form. """ - score_to_cand: dict[float, list[str]] = {s: [] for s in score_dict.values()} + score_to_cand: dict[float, list[Candidate]] = {s: [] for s in score_dict.values()} for c, score in score_dict.items(): score_to_cand[score].append(c) @@ -584,14 +597,14 @@ def score_dict_to_ranking( def elect_cands_from_set_ranking( - ranking: Sequence[Union[frozenset[str], set[str]]], + ranking: Sequence[frozenset[Candidate] | set[Candidate] | set[str] | set[int]], n_seats: int, profile: Optional[RankProfile] = None, tiebreak: Optional[str] = None, ) -> tuple[ - tuple[frozenset[str], ...], - tuple[frozenset[str], ...], - Optional[tuple[frozenset[str], tuple[frozenset[str], ...]]], + tuple[frozenset[Candidate], ...], + tuple[frozenset[Candidate], ...], + Optional[tuple[frozenset[Candidate], tuple[frozenset[Candidate], ...]]], ]: """ Given a ranking, elect the top n_seats candidates in the ranking. @@ -601,7 +614,8 @@ def elect_cands_from_set_ranking( is a tie set and whose second entry is the resolution of the tie. Args: - ranking (tuple[frozenset[str],...]): A list-of-set ranking of candidates. + ranking (Sequence[frozenset[str | int] | set[str | int] | set[str] | set[int]]): + A list-of-set ranking of candidates. n_seats (int): Number of seats to elect. profile (RankProfile, optional): Profile used to break ties in first-place votes or Borda setting. Defaults to None, which implies a random tiebreak. @@ -609,8 +623,8 @@ def elect_cands_from_set_ranking( 'first_place'. Defaults to None, which does not break ties. Returns: - tuple[tuple[frozenset[str]]], list[tuple[frozenset[str]], - Optional[tuple[frozenset[str], tuple[frozenset[str], ...]]]: + tuple[tuple[frozenset[str | int]]], list[tuple[frozenset[str | int]], + Optional[tuple[frozenset[str | int], tuple[frozenset[str | int], ...]]]: A list-of-sets of elected candidates, a list-of-sets of remaining candidates, and a tuple whose first entry is a tie set and whose second entry is the resolution of the tie. If no ties were broken, the tuple returns None. @@ -620,14 +634,14 @@ def elect_cands_from_set_ranking( if n_seats > len([c for s in ranking for c in s]): raise ValueError("n_seats must be no more than the number of candidates.") - ranking_fs: tuple[frozenset[str], ...] = tuple( + ranking_fs: tuple[frozenset[Candidate], ...] = tuple( s if isinstance(s, frozenset) else frozenset(s) for s in ranking ) num_elected = 0 - elected: list[frozenset[str]] = [] + elected: list[frozenset[Candidate]] = [] i = 0 - tiebreak_ranking: Optional[tuple[frozenset[str], tuple[frozenset[str], ...]]] = None + tiebreak_ranking: Optional[tuple[frozenset[Candidate], tuple[frozenset[Candidate], ...]]] = None while num_elected < n_seats: elected.append(ranking_fs[i]) @@ -642,7 +656,7 @@ def elect_cands_from_set_ranking( tiebroken = tiebreak_set(frozenset(ranking_fs[i]), profile, tiebreak) elected += tiebroken[: (n_seats - num_elected)] - remaining: list[frozenset[str]] = list(tiebroken[(n_seats - num_elected) :]) + remaining: list[frozenset[Candidate]] = list(tiebroken[(n_seats - num_elected) :]) if i < len(ranking_fs): remaining += list(ranking_fs[(i + 1) :]) @@ -713,7 +727,7 @@ def resolve_profile_ties(profile: RankProfile) -> RankProfile: def score_profile_from_ballot_scores( profile: ScoreProfile, -) -> dict[str, float]: +) -> dict[Candidate, float]: """ Score the candidates based on the ``scores`` parameter of the ballots. All ballots must have a ``scores`` parameter; note that a ``scores`` dictionary @@ -723,7 +737,7 @@ def score_profile_from_ballot_scores( profile (ScoreProfile): Profile to score. Returns: - dict[str, float]: + dict[str | int, float]: Dictionary mapping candidates to scores. """ scores = {c: 0.0 for c in profile.candidates} diff --git a/tests/ballot_generators/bloc_slate_generators/test_cambridge.py b/tests/ballot_generators/bloc_slate_generators/test_cambridge.py index 0a3a5c3e..2622723d 100644 --- a/tests/ballot_generators/bloc_slate_generators/test_cambridge.py +++ b/tests/ballot_generators/bloc_slate_generators/test_cambridge.py @@ -14,11 +14,12 @@ from votekit.ballot_generator.bloc_slate_generator.config import BlocSlateConfig from votekit.pref_interval import PreferenceInterval from votekit.pref_profile import RankProfile +from votekit.types import Candidate PROB_THRESHOLD = 0.01 -def ballot_ranking(ballot: RankBallot) -> tuple[frozenset[str], ...]: +def ballot_ranking(ballot: RankBallot) -> tuple[frozenset[Candidate], ...]: assert ballot.ranking is not None return ballot.ranking @@ -297,7 +298,7 @@ def test_two_bloc_two_slate_cambridge_distribution_matches_name_ballot_dist( cand for cand_set in ballot_ranking(ballot) for cand in cand_set - if cand[0] == slate + if isinstance(cand, str) and cand[0] == slate ) for ballot in profile.ballots for _ in range(int(ballot.weight)) @@ -343,4 +344,10 @@ def test_cambridge_zero_support_slates(): profile_dict = cambridge_profiles_by_bloc_generator(config) profile = profile_dict["A"] - assert all("A" in list(ballot_ranking(ballot)[0])[0] for ballot in profile.ballots) + assert all( + [ + isinstance(first_place_slate := list(ballot_ranking(ballot)[0])[0], str) + and "A" in first_place_slate + for ballot in profile.ballots + ] + ) diff --git a/tests/ballot_generators/bloc_slate_generators/test_slate_bt.py b/tests/ballot_generators/bloc_slate_generators/test_slate_bt.py index 0e16f1e7..2b13fbb8 100644 --- a/tests/ballot_generators/bloc_slate_generators/test_slate_bt.py +++ b/tests/ballot_generators/bloc_slate_generators/test_slate_bt.py @@ -13,11 +13,12 @@ ) from votekit.pref_interval import PreferenceInterval from votekit.pref_profile import RankProfile +from votekit.types import Candidate PROB_THRESHOLD = 0.01 -def ballot_ranking(ballot: RankBallot) -> tuple[frozenset[str], ...]: +def ballot_ranking(ballot: RankBallot) -> tuple[frozenset[Candidate], ...]: assert ballot.ranking is not None return ballot.ranking @@ -187,7 +188,10 @@ def test_two_bloc_two_slate_sbt_distribution_matches_name_ballot_dist( a_comparisons_profile = [ tuple( - cand for cand_set in ballot_ranking(ballot) for cand in cand_set if cand[0] == "A" + cand + for cand_set in ballot_ranking(ballot) + for cand in cand_set + if isinstance(cand, str) and cand[0] == "A" ) for ballot in profile.ballots for _ in range(int(ballot.weight)) @@ -195,7 +199,10 @@ def test_two_bloc_two_slate_sbt_distribution_matches_name_ballot_dist( b_comparisons_profile = [ tuple( - cand for cand_set in ballot_ranking(ballot) for cand in cand_set if cand[0] == "B" + cand + for cand_set in ballot_ranking(ballot) + for cand in cand_set + if isinstance(cand, str) and cand[0] == "B" ) for ballot in profile.ballots for _ in range(int(ballot.weight)) @@ -264,7 +271,7 @@ def test_one_bloc_three_slate_sbt_distribution_matches_name_ballot_dist( cand for cand_set in ballot_ranking(ballot) for cand in cand_set - if cand[0] == slate and cand[-1] in ["1", "2"] + if isinstance(cand, str) and cand[0] == slate and cand[-1] in ["1", "2"] ) for ballot in profile.ballots for _ in range(int(ballot.weight)) @@ -301,7 +308,12 @@ def test_sbt_zero_support_slates(): profile = slate_bt_profile_generator(config) zero_support_slate_perms = [ - tuple(cand[0] for cand_set in ballot_ranking(ballot) for cand in cand_set if cand[0] != "A") + tuple( + cand[0] + for cand_set in ballot_ranking(ballot) + for cand in cand_set + if isinstance(cand, str) and cand[0] != "A" + ) for ballot in profile.ballots for _ in range(int(ballot.weight)) ] diff --git a/tests/ballot_generators/bloc_slate_generators/test_slate_pl.py b/tests/ballot_generators/bloc_slate_generators/test_slate_pl.py index 5783296a..ba888f5a 100644 --- a/tests/ballot_generators/bloc_slate_generators/test_slate_pl.py +++ b/tests/ballot_generators/bloc_slate_generators/test_slate_pl.py @@ -11,11 +11,12 @@ ) from votekit.pref_interval import PreferenceInterval from votekit.pref_profile import RankProfile +from votekit.types import Candidate PROB_THRESHOLD = 0.01 -def ballot_ranking(ballot: RankBallot) -> tuple[frozenset[str], ...]: +def ballot_ranking(ballot: RankBallot) -> tuple[frozenset[Candidate], ...]: assert ballot.ranking is not None return ballot.ranking @@ -156,7 +157,10 @@ def test_two_bloc_two_slate_spl_distribution_matches_name_ballot_dist( a_comparisons_profile = [ tuple( - cand for cand_set in ballot_ranking(ballot) for cand in cand_set if cand[0] == "A" + cand + for cand_set in ballot_ranking(ballot) + for cand in cand_set + if isinstance(cand, str) and cand[0] == "A" ) for ballot in profile.ballots for _ in range(int(ballot.weight)) @@ -164,7 +168,10 @@ def test_two_bloc_two_slate_spl_distribution_matches_name_ballot_dist( b_comparisons_profile = [ tuple( - cand for cand_set in ballot_ranking(ballot) for cand in cand_set if cand[0] == "B" + cand + for cand_set in ballot_ranking(ballot) + for cand in cand_set + if isinstance(cand, str) and cand[0] == "B" ) for ballot in profile.ballots for _ in range(int(ballot.weight)) @@ -236,7 +243,7 @@ def test_one_bloc_three_slate_spl_distribution_matches_name_ballot_dist( cand for cand_set in ballot_ranking(ballot) for cand in cand_set - if cand[0] == slate and cand[-1] in ["1", "2"] + if isinstance(cand, str) and cand[0] == slate and cand[-1] in ["1", "2"] ) for ballot in profile.ballots for _ in range(int(ballot.weight)) @@ -273,7 +280,12 @@ def test_spl_zero_support_slates(): profile = slate_pl_profile_generator(config) zero_support_slate_perms = [ - tuple(cand[0] for cand_set in ballot_ranking(ballot) for cand in cand_set if cand[0] != "A") + tuple( + cand[0] + for cand_set in ballot_ranking(ballot) + for cand in cand_set + if isinstance(cand, str) and cand[0] != "A" + ) for ballot in profile.ballots for _ in range(int(ballot.weight)) ] diff --git a/tests/elections/election_types/ranking/test_random_dictator.py b/tests/elections/election_types/ranking/test_random_dictator.py index 0da7d9ab..9cc01ad9 100644 --- a/tests/elections/election_types/ranking/test_random_dictator.py +++ b/tests/elections/election_types/ranking/test_random_dictator.py @@ -8,10 +8,11 @@ from votekit.ballot import RankBallot from votekit.elections import RandomDictator from votekit.pref_profile import RankProfile +from votekit.types import Candidate from votekit.utils import first_place_votes -def run_election_once(test_profile: RankProfile, seed: int) -> str: +def run_election_once(test_profile: RankProfile, seed: int) -> Candidate: """Run one election and return the winner.""" random.seed(seed) np.random.seed(seed) diff --git a/tests/elections/election_types/ranking/test_simultaneous_veto.py b/tests/elections/election_types/ranking/test_simultaneous_veto.py index 43e2d398..7749c68e 100644 --- a/tests/elections/election_types/ranking/test_simultaneous_veto.py +++ b/tests/elections/election_types/ranking/test_simultaneous_veto.py @@ -6,6 +6,7 @@ from votekit.ballot import RankBallot from votekit.elections import SimultaneousVeto from votekit.pref_profile import RankProfile +from votekit.types import Candidate # --------------------------------------------------------------------------- # Helpers @@ -24,7 +25,7 @@ def make_profile( return RankProfile(ballots=ballots, max_ranking_length=max_ranking_length) -def elected_set(election: SimultaneousVeto) -> frozenset[str]: +def elected_set(election: SimultaneousVeto) -> frozenset[Candidate]: """Flatten all elected candidates into a single frozenset.""" return frozenset(c for s in election.get_elected() for c in s) diff --git a/tests/elections/test_election_model.py b/tests/elections/test_election_model.py index 99628235..774f24ba 100644 --- a/tests/elections/test_election_model.py +++ b/tests/elections/test_election_model.py @@ -22,7 +22,8 @@ def score(profile: RankProfile) -> dict: assert ballot.ranking is not None for s in ballot.ranking: for c in s: - score_dict[c] += "ABCDEF".index(c) + if isinstance(c, str): + score_dict[c] += "ABCDEF".index(c) return score_dict super().__init__(profile, score_function=score, sort_high_low=sort_high_low) From 610f73c40799b934d0b95031de287ad5894d74ef Mon Sep 17 00:00:00 2001 From: Grace Gibson Date: Fri, 19 Jun 2026 08:01:31 -0500 Subject: [PATCH 04/11] fix pyright errors --- src/votekit/animations.py | 11 +- .../name_bradley_terry.py | 6 +- src/votekit/elections/election_state.py | 10 +- .../election_types/ranking/plurality_veto.py | 24 +-- .../ranking/simultaneous_veto.py | 17 +- .../ranking/stv/numpy_stv_base.py | 8 +- .../graphs/pairwise_comparison_graph.py | 58 ++++--- src/votekit/models.py | 6 +- src/votekit/pref_profile/pref_profile.py | 13 +- .../std_generators/test_iac.py | 3 +- tests/elections/test_election_model.py | 4 +- .../pref_profile/rank_profile/test_rank_pp.py | 30 ++++ .../rank_profile/test_rank_pp_df.py | 36 ++++ .../score_profile/test_score_pp.py | 162 ++++++++++-------- .../score_profile/test_score_pp_df.py | 34 ++++ 15 files changed, 292 insertions(+), 130 deletions(-) diff --git a/src/votekit/animations.py b/src/votekit/animations.py index 76dd65ff..4bcd0304 100644 --- a/src/votekit/animations.py +++ b/src/votekit/animations.py @@ -1188,15 +1188,18 @@ def _animate_win(self, cands_transferred_from: dict[Candidate, dict], event: _Wi self.play(*transformations, run_time=self.animation_duration) def _animate_elimination( - self, cands_transferred_from: dict[str, dict], event: _EliminationEvent + self, + cands_transferred_from: dict[Candidate, dict] | dict[str, dict] | dict[int, dict], + event: _EliminationEvent, ) -> None: """ Animate a round in which a candidate was eliminated. Args: - cands_transferred_from (dict[str,dict]): A dictionary in which the keys are the - candidates eliminated this round and the values are dictionaries recording - the candidate's attributes. + cands_transferred_from (dict[Candidate, dict] | dict[str,dict] | dict[int, dict]): + A dictionary in which the keys are the candidates eliminated this round + and the values are dictionaries recording the candidate's attributes. + Candidates can be strings, integers, or a mix of both. event (_EliminationEvent): The event to be animated. Notes: diff --git a/src/votekit/ballot_generator/bloc_slate_generator/name_bradley_terry.py b/src/votekit/ballot_generator/bloc_slate_generator/name_bradley_terry.py index 59126c4b..142407b8 100644 --- a/src/votekit/ballot_generator/bloc_slate_generator/name_bradley_terry.py +++ b/src/votekit/ballot_generator/bloc_slate_generator/name_bradley_terry.py @@ -27,6 +27,7 @@ from votekit.ballot_generator.bloc_slate_generator.config import BlocSlateConfig from votekit.ballot_generator.utils import system_memory from votekit.pref_profile import RankProfile +from votekit.types import Candidate # ==================================================== # ================= Helper Functions ================= @@ -227,7 +228,7 @@ def _inner_name_bradley_terry(config: BlocSlateConfig) -> dict[str, RankProfile] # - Other speed improvements def _bradley_terry_mcmc( n_ballots: int, - pref_interval: Mapping[str, float], + pref_interval: Mapping[Candidate, float], seed_ballot: RankBallot, verbose: bool = False, burn_in_time: int = 0, @@ -239,7 +240,8 @@ def _bradley_terry_mcmc( Args: n_ballots (int): the number of ballots to sample - pref_interval (Mapping[str, float]): the preference interval to determine BT distribution + pref_interval (Mapping[Candidate, float]): the preference interval + to determine BT distribution. Candidate can be a str or int. seed_ballot (RankBallot): the seed ballot for the Markov chain verbose (bool): If True, print the acceptance ratio of the chain. Defaults to False. burn_in_time (int): the number of ballots discarded in the beginning of the chain diff --git a/src/votekit/elections/election_state.py b/src/votekit/elections/election_state.py index 7eec0497..45f62627 100644 --- a/src/votekit/elections/election_state.py +++ b/src/votekit/elections/election_state.py @@ -13,16 +13,16 @@ class ElectionState: Attributes: round_number (int, optional): Round number, defaults to 0. - remaining (tuple[frozenset[Candidate],...], optional): Remaining candidates, ordered to + remaining (tuple[frozenset[str | int],...], optional): Remaining candidates, ordered to indicate ranking, frozensets to indicate ties. Defaults to tuple with one empty set. - elected (tuple[frozenset[Candidate],...], optional): Elected candidates, ordered to + elected (tuple[frozenset[str | int],...], optional): Elected candidates, ordered to indicate ranking, frozensets to indicate ties. Defaults to tuple with one empty set. - eliminated (tuple[frozenset[Candidate],...], optional): Eliminated candidates, ordered to + eliminated (tuple[frozenset[str | int],...], optional): Eliminated candidates, ordered to indicate ranking, frozensets to indicate ties. Defaults to tuple with one empty set. - tiebreaks (dict[frozenset[Candidate], tuple[frozenset[Candidate],...]], optional): Stores + tiebreaks (dict[frozenset[str | int], tuple[frozenset[str | int],...]], optional): Stores tiebreak resolutions. Keys are frozensets of tied candidates and values are resolutions of tiebreak. Defaults to empty dictionary. - scores(dict[Candidate, float], optional): Stores score information. + scores(dict[str | int, float], optional): Stores score information. Keys are candidates, values are scores. Only remaining candidates should be stored. """ diff --git a/src/votekit/elections/election_types/ranking/plurality_veto.py b/src/votekit/elections/election_types/ranking/plurality_veto.py index 89f3ca79..d2c4c6ba 100644 --- a/src/votekit/elections/election_types/ranking/plurality_veto.py +++ b/src/votekit/elections/election_types/ranking/plurality_veto.py @@ -114,7 +114,7 @@ def __init__( self.tiebreak_order = None if self.tiebreak != "random": # stores the most recent veto each ballot gave - self._veto_cache = ["" for _ in range(self._n_ballots)] + self._veto_cache: list[Candidate] = ["" for _ in range(self._n_ballots)] self.tiebreak_order = tiebreak_set( self.candidates, @@ -177,7 +177,7 @@ def _get_ballot_idx(self, voter_idx: int) -> np.intp: ) return ballot_idx - def _break_tie(self, candidate_set: frozenset[str]) -> str: + def _break_tie(self, candidate_set: frozenset[Candidate]) -> Candidate: """ Chooses a veto from a set of last-place candidates. @@ -185,25 +185,26 @@ def _break_tie(self, candidate_set: frozenset[str]) -> str: ``tiebreak_order``, which is defined at instantiation. Args: - candidate_set (frozenset[str]): The set of tied candidates. + candidate_set (frozenset[Candidate]): The set of tied candidates. + Candidates are strings or integers. Returns: - str: The candidate to be vetoed. + Candidate: The candidate to be vetoed. Candidate is a str or int. """ if self.tiebreak == "random": - def rank(c: str) -> float: + def rank(c: Candidate) -> float: return random.random() else: - def rank(c: str) -> float: + def rank(c: Candidate) -> float: return self._tiebreak_ranks[c] # in _tiebreak_order, higher position is worse; veto the worst remaining return max(candidate_set, key=rank) - def _find_potential_vetoes(self, ballot_idx: np.intp) -> frozenset[str]: + def _find_potential_vetoes(self, ballot_idx: np.intp) -> frozenset[Candidate]: """ Given a ballot index, returns the set of last-place candidates (before tiebreaking). @@ -214,7 +215,8 @@ def _find_potential_vetoes(self, ballot_idx: np.intp) -> frozenset[str]: ballot_idx (np.intp): A ballot index in [0, n_ballots). Returns: - frozenset[str]: The candidate(s) tied for last place on this ballot. + frozenset[Candidate]: The candidate(s) tied for last place on this ballot. + Candidates can be integers, strings, or a mix of both. """ cached_pos = self._veto_position_cache[ballot_idx] @@ -236,7 +238,7 @@ def _find_potential_vetoes(self, ballot_idx: np.intp) -> frozenset[str]: return potential_vetoes - def _get_veto(self, ballot_idx: np.intp) -> str: + def _get_veto(self, ballot_idx: np.intp) -> Candidate: """ Given a ballot index, returns the candidate to veto. @@ -248,7 +250,7 @@ def _get_veto(self, ballot_idx: np.intp) -> str: ballot_idx (np.intp): A ballot index in [0, n_ballots). Returns: - str: The candidate to be vetoed. + Candidate: The candidate to be vetoed. Candidate is a str or int. Raises: RuntimeError: If the ballot contains no remaining candidates. @@ -288,7 +290,7 @@ def _reset(self): self._voter_order_current_index = 0 self._veto_position_cache = [None for _ in range(self._n_ballots)] if self.tiebreak != "random": - self._veto_cache = ["" for _ in range(self._n_ballots)] + self._veto_cache: list[Candidate] = ["" for _ in range(self._n_ballots)] @abstractmethod def _veto_loop( diff --git a/src/votekit/elections/election_types/ranking/simultaneous_veto.py b/src/votekit/elections/election_types/ranking/simultaneous_veto.py index 549fad42..8efce835 100644 --- a/src/votekit/elections/election_types/ranking/simultaneous_veto.py +++ b/src/votekit/elections/election_types/ranking/simultaneous_veto.py @@ -2,7 +2,7 @@ from dataclasses import dataclass from functools import partial from numbers import Real -from typing import Literal +from typing import Literal, cast import numpy as np from typing_extensions import Sentinel @@ -143,7 +143,8 @@ def __init__( # scores are stored as a vector where position indicates the candidate self._scores = np.zeros(n_candidates) - score_func = self._make_score_function(candidate_weights) + _candidate_weights: dict[Candidate, float] = cast(dict[Candidate, float], candidate_weights) + score_func = self._make_score_function(_candidate_weights) for candidate, score in score_func(grouped_profile).items(): candidate_idx = self._candidate_to_idx[candidate] self._scores[candidate_idx] = score @@ -217,7 +218,7 @@ def _sv_validate_input( def _make_score_function( self, - candidate_weights: str | CandidateFloatDictLike | int, + candidate_weights: str | dict[Candidate, float] | int, ) -> Callable[[RankProfile], dict[Candidate, float]]: """ Converts ``candidate_weights`` into a callable function. @@ -225,8 +226,8 @@ def _make_score_function( This function is used to generate initial scores and is also passed to super().__init__. Args: - candidate_weights (str | dict[str | int, float] | dict[str, float] - | dict[int, float | int): How to initialize candidate scores. + candidate_weights (str | dict[Candidate, float] | int): How to initialize + candidate scores. Candidates can be strings or integers. 'first_place' means candidates begin with their first-place vote count. 'uniform' means all candidates begin with the same score. 'borda' means candidates begin with their Borda scores. If a dictionary, @@ -234,8 +235,8 @@ def _make_score_function( for every candidate. If an integer k, candidates begin with their top-k vote count. Returns: - Callable[[RankProfile], dict[str, float]]: Score function that takes a RankProfile and - returns a dict mapping candidates to scores. + Callable[[RankProfile], dict[Candidate, float]]: Score function that takes a RankProfile + and returns a dict mapping candidates to scores. Candidates can be strs or ints. Raises: ValueError: If any of the following: @@ -428,7 +429,7 @@ def _break_tie( (each in their own set). """ - def make_singleton_ranking(indices: list[int]) -> tuple[frozenset[str], ...]: + def make_singleton_ranking(indices: list[int]) -> tuple[frozenset[Candidate], ...]: """Convert sorted candidate indices to a tuple of singleton frozensets.""" return tuple(frozenset((self._sorted_candidates[i],)) for i in indices) diff --git a/src/votekit/elections/election_types/ranking/stv/numpy_stv_base.py b/src/votekit/elections/election_types/ranking/stv/numpy_stv_base.py index e238e3b5..ac820134 100644 --- a/src/votekit/elections/election_types/ranking/stv/numpy_stv_base.py +++ b/src/votekit/elections/election_types/ranking/stv/numpy_stv_base.py @@ -197,7 +197,7 @@ def _make_election_states(self): round_number=0, remaining=self.get_remaining(0), scores={ - self.candidates[c]: self._data.fpv_by_round[0][c] + self.candidates[int(c)]: float(self._data.fpv_by_round[0][c]) for c in self._data.fpv_by_round[0].nonzero()[0] }, ) @@ -215,7 +215,7 @@ def _make_election_states(self): else (frozenset(),) ) packaged_scores = { - self.candidates[c]: self._data.fpv_by_round[i + 1][c] + self.candidates[int(c)]: float(self._data.fpv_by_round[i + 1][c]) for c in self._data.fpv_by_round[i + 1].nonzero()[0] } e_states.append( @@ -383,7 +383,7 @@ def get_eliminated(self, round_number: int = -1) -> tuple[frozenset[Candidate], ] return tuple(frozenset([self.candidates[c] for c in l_list]) for l_list in list_of_losers) - def get_ranking(self, round_number: int = -1) -> tuple[frozenset[str], ...]: + def get_ranking(self, round_number: int = -1) -> tuple[frozenset[Candidate], ...]: """ Fetch the ranking of candidates after a given round. @@ -392,7 +392,7 @@ def get_ranking(self, round_number: int = -1) -> tuple[frozenset[str], ...]: -1, which accesses the final profile. Returns: - tuple[frozenset[str],...]: Ranking of candidates. + tuple[frozenset[Candidate],...]: Ranking of candidates. Candidate can be str or int. """ return tuple( [ diff --git a/src/votekit/graphs/pairwise_comparison_graph.py b/src/votekit/graphs/pairwise_comparison_graph.py index c47facc5..6cfc8a9a 100644 --- a/src/votekit/graphs/pairwise_comparison_graph.py +++ b/src/votekit/graphs/pairwise_comparison_graph.py @@ -1,6 +1,6 @@ from functools import cache from itertools import combinations -from typing import Optional +from typing import Optional, cast import matplotlib.patches as mpatches import matplotlib.pyplot as plt @@ -11,9 +11,10 @@ from numpy.typing import NDArray from votekit.pref_profile import RankProfile +from votekit.types import Candidate -def __rows_to_indices(profile: RankProfile, cand_name_to_idx: dict[str, int]) -> NDArray: +def __rows_to_indices(profile: RankProfile, cand_name_to_idx: dict[Candidate, int]) -> NDArray: """ Converts the ranking columns of a RankProfile to integer indices. Each singleton candidate set is converted to an index based on the provided candidates list. @@ -22,8 +23,8 @@ def __rows_to_indices(profile: RankProfile, cand_name_to_idx: dict[str, int]) -> Args: profile (RankProfile): The preference profile containing rankings. - cand_name_to_idx (dict[str, int]): A mapping from candidate names to their integer index - representations. + cand_name_to_idx (dict[Candidate, int]): A mapping from candidate names to their + integer index representations. Candidate can be str or int. Returns: NDArray: A tuple containing: An NDArray of integer indices representing the rankings. @@ -83,7 +84,7 @@ def __tally_and_mutate_head_to_head( def pairwise_dict( profile: RankProfile, *, sort_candidate_pairs: bool = True -) -> dict[tuple[str, str], tuple[float, float]]: +) -> dict[tuple[Candidate, Candidate], tuple[float, float]]: """ Computes a dictionary whose keys are candidate pairs (A,B) and whose values are lists [a,b] where 'a' denotes the number of times A beats B head to head, and 'b' is the reverse. @@ -94,7 +95,8 @@ def pairwise_dict( will be sorted lexicographically. Defaults to True. Returns: - dict[tuple[str, str], tuple[float, float]]: Pairwise comparison dictionary. + dict[tuple[Candidate, Candidate], tuple[float, float]]: Pairwise comparison dictionary. + Candidate can be str or int. """ if not isinstance(profile, RankProfile): raise ValueError("Profile must be of type RankProfile.") @@ -191,22 +193,34 @@ def get_dominating_tiers_digraph(graph: nx.DiGraph) -> list[set[str]]: def restrict_pairwise_dict_to_subset( - cand_subset: list[str] | tuple[str] | set[str], - pairwise_dict: dict[tuple[str, str], tuple[float, float]], -) -> dict[tuple[str, str], tuple[float, float]]: + cand_subset: list[Candidate] + | tuple[Candidate] + | set[Candidate] + | list[str] + | set[str] + | list[int] + | set[int], + pairwise_dict: dict[tuple[Candidate, Candidate], tuple[float, float]] + | dict[tuple[str, str], tuple[float, float]] + | dict[tuple[int, int], tuple[float, float]], +) -> dict[tuple[Candidate, Candidate], tuple[float, float]]: """ Restricts the full pairwise dictionary to a subset of candidates. The pairwise dictionary is a dictionary whose keys are candidate pairs (A,B) and whose values are lists [a,b] where 'a' denotes the number of times A beats B head to head, and 'b' is the reverse. Args: - cands (list[str] | tuple[str] | set[str]): Candidate subset to restrict to. - pairwise_dict (dict[tuple[str, str], tuple[float, float]): Full pairwise comparison - dictionary. + cands (list[Candidate] | tuple[Candidate] | set[Candidate] + | list[str] | set[str] | list[int] | set[int]): Candidate subset to restrict to. + Candidates can be strings, integers, or mix of both. + pairwise_dict (dict[tuple[Candidate, Candidate], tuple[float, float]] + | dict[tuple[str, str], tuple[float, float]] + | dict[tuple[int, int], tuple[float, float]]): Full pairwise + comparison dictionary. Candidates can be strings, integers, or mix of both Returns: - dict[tuple[str, str], tuple[float, float]]: Pairwise dict restricted to the provided - candidates. + dict[dict[tuple[Candidate, Candidate], tuple[float, float]] : Pairwise dict restricted + to the provided candidates. Candidates can be strings or integers. Raises: ValueError: cand_subset must be at least length 2. @@ -215,7 +229,11 @@ def restrict_pairwise_dict_to_subset( if len(cand_subset) < 2: raise ValueError(f"Must be at least two candidates in cand_subset: {cand_subset}") - candidates = [c for s in pairwise_dict.keys() for c in s] + _pairwise_dict: dict[tuple[Candidate, Candidate], tuple[float, float]] = cast( + dict[tuple[Candidate, Candidate], tuple[float, float]], pairwise_dict + ) + + candidates = [c for s in _pairwise_dict.keys() for c in s] extra_cands = set(cand_subset).difference(candidates) if extra_cands != set(): @@ -226,13 +244,13 @@ def restrict_pairwise_dict_to_subset( ) ) - new_pairwise_dict = {} + new_pairwise_dict: dict[tuple[Candidate, Candidate], tuple[float, float]] = {} for tup in combinations(cand_subset, 2): - if tup in pairwise_dict: - new_pairwise_dict[tup] = pairwise_dict[tup] + if tup in _pairwise_dict: + new_pairwise_dict[tup] = _pairwise_dict[tup] rev_tup = (tup[1], tup[0]) - if rev_tup in pairwise_dict: - new_pairwise_dict[rev_tup] = pairwise_dict[rev_tup] + if rev_tup in _pairwise_dict: + new_pairwise_dict[rev_tup] = _pairwise_dict[rev_tup] return new_pairwise_dict diff --git a/src/votekit/models.py b/src/votekit/models.py index 3c4cf4d0..a7e5e4d8 100644 --- a/src/votekit/models.py +++ b/src/votekit/models.py @@ -123,7 +123,7 @@ def get_elected(self, round_number: int = -1) -> tuple[frozenset[Candidate], ... ] ) - def get_eliminated(self, round_number: int = -1) -> tuple[frozenset[str], ...]: + def get_eliminated(self, round_number: int = -1) -> tuple[frozenset[Candidate], ...]: """ Fetch the eliminated candidates up to the given round number. @@ -132,10 +132,10 @@ def get_eliminated(self, round_number: int = -1) -> tuple[frozenset[str], ...]: -1, which accesses the final profile. Returns: - tuple[frozenset[str],...]: + tuple[frozenset[Candidate],...]: Tuple of eliminated candidates in reverse order of elimination. Candidates in the same set were eliminated simultaneously, i.e. in the final ranking - they are tied. + they are tied. Candidates can be strings or integers. """ if ( round_number < -len(self.election_states) diff --git a/src/votekit/pref_profile/pref_profile.py b/src/votekit/pref_profile/pref_profile.py index da00b5dc..fbac9cd5 100644 --- a/src/votekit/pref_profile/pref_profile.py +++ b/src/votekit/pref_profile/pref_profile.py @@ -287,6 +287,9 @@ def group_ballots(self) -> Self: def ballots(self) -> tuple[Ballot, ...]: raise NotImplementedError + _candidates: tuple[int, ...] + _candidates_cast: tuple[int, ...] + def to_pickle(self, fpath: Union[str, PathLike, Path]): """ Saves profile to pickle file. @@ -1493,12 +1496,15 @@ def __str__(self) -> str: __repr__ = __str__ def __to_score_csv_header( - self, candidate_mapping: dict[str, str], include_voter_set: bool + self, candidate_mapping: dict[Candidate, str], include_voter_set: bool ) -> list[list]: """ Construct the header rows for the PrefProfile a custom CSV format. Args: + candidate_mapping (dict[Candidate, str]): Candidate name mapped to integer IDs. + integer IDs are cast to strings for csv. + Candidate can be str or int. include_voter_set (bool): Whether or not to include the voter set of each ballot. """ @@ -1553,7 +1559,7 @@ def __to_score_csv_ballot_row( return row def __to_score_csv_data_column_names( - self, include_voter_set: bool, candidate_mapping: dict[str, str] + self, include_voter_set: bool, candidate_mapping: dict[Candidate, str] ) -> list: """ Create the data column header. @@ -1561,7 +1567,8 @@ def __to_score_csv_data_column_names( Args: include_voter_set (bool): Whether or not to include the voter set of each ballot. - candidate_mapping (dict[str, str]): Maps candidate names to prefixes. + candidate_mapping (dict[Candidate, str]): Maps candidate names to IDs. + Candidate can be str or int. """ data_col_names = [f"{cand_label}" for cand_label in candidate_mapping.values()] data_col_names += ["&", "Weight", "&"] diff --git a/tests/ballot_generators/std_generators/test_iac.py b/tests/ballot_generators/std_generators/test_iac.py index 690d5f59..80f10d3e 100644 --- a/tests/ballot_generators/std_generators/test_iac.py +++ b/tests/ballot_generators/std_generators/test_iac.py @@ -8,9 +8,10 @@ from votekit.ballot_generator import iac_profile_generator from votekit.ballot_generator.std_generator import impartial_anon_culture as iac_module from votekit.pref_profile import RankProfile +from votekit.types import Candidate -def _ballot_counter(profile: RankProfile) -> Counter[tuple[str, ...]]: +def _ballot_counter(profile: RankProfile) -> Counter[tuple[Candidate, ...]]: return Counter( { tuple(next(iter(rank)) for rank in ballot.ranking): ballot.weight diff --git a/tests/elections/test_election_model.py b/tests/elections/test_election_model.py index 774f24ba..720c6f1c 100644 --- a/tests/elections/test_election_model.py +++ b/tests/elections/test_election_model.py @@ -71,8 +71,8 @@ def _run_step( new_state = ElectionState( round_number=(prev_state.round_number + 1), remaining=score_dict_to_ranking(scores, self.sort_high_low), - elected=tuple([frozenset(elected)]), - eliminated=tuple([frozenset(eliminated)]), + elected=tuple([frozenset({elected})]), + eliminated=tuple([frozenset({eliminated})]), scores=scores, ) diff --git a/tests/pref_profile/rank_profile/test_rank_pp.py b/tests/pref_profile/rank_profile/test_rank_pp.py index 91594d06..d99fa44f 100644 --- a/tests/pref_profile/rank_profile/test_rank_pp.py +++ b/tests/pref_profile/rank_profile/test_rank_pp.py @@ -79,3 +79,33 @@ def test_get_candidates_received_votes(): "B", "C", } + + +def test_int_only_candidates(): + profile_w_int_cands = RankProfile( + ballots=( + RankBallot(ranking=[{2}, {1}]), + RankBallot(ranking=[{3}, {4}]), + RankBallot(ranking=[{1, 2}]), + ), + ) + vote_cands = profile_w_int_cands.candidates_cast + all_cands = profile_w_int_cands.candidates + + assert set(vote_cands) == {1, 2, 3, 4} + assert set(all_cands) == {1, 2, 3, 4} + + +def test_str_int_mix_candidates(): + profile_w_mix_cands = RankProfile( + ballots=( + RankBallot(ranking=[{"A"}, {1}]), + RankBallot(ranking=[{"C"}, {"B"}]), + RankBallot(ranking=[{1, 2}]), + ), + ) + vote_cands = profile_w_mix_cands.candidates_cast + all_cands = profile_w_mix_cands.candidates + + assert set(vote_cands) == {"A", "B", "C", 1, 2} + assert set(all_cands) == {"A", "B", "C", 1, 2} diff --git a/tests/pref_profile/rank_profile/test_rank_pp_df.py b/tests/pref_profile/rank_profile/test_rank_pp_df.py index 42656605..e259dc5d 100644 --- a/tests/pref_profile/rank_profile/test_rank_pp_df.py +++ b/tests/pref_profile/rank_profile/test_rank_pp_df.py @@ -62,3 +62,39 @@ def test_pp_df_rankings_args(): true_df = pd.DataFrame(data) true_df.index.name = "Ballot Index" assert pp.df.equals(true_df) + + +def test_df_with_cand_ids_as_ranking_values(): + rank_profile = RankProfile( + ballots=ballots_rankings, + candidates=["A", "B", "C", "D", "E"], + max_ranking_length=4, + ) + candidate_ids = set([i for i in range(len(rank_profile.candidates))]) + candidate_id_map = dict(zip(rank_profile.candidates, candidate_ids)) + + id_A = candidate_id_map["A"] + id_B = candidate_id_map["B"] + id_C = candidate_id_map["C"] + id_D = candidate_id_map["D"] + cand_id_data = { + "Ranking_1": [ + frozenset({id_A}), + frozenset({id_A, id_B}), + frozenset("~"), + frozenset("~"), + ], + "Ranking_2": [frozenset({id_B}), frozenset(), frozenset("~"), frozenset("~")], + "Ranking_3": [ + frozenset({id_C}), + frozenset({id_D}), + frozenset("~"), + frozenset("~"), + ], + "Ranking_4": [frozenset("~"), frozenset("~"), frozenset("~"), frozenset("~")], + "Voter Set": [set(), {"Chris"}, set(), set()], + "Weight": [2.0, 1.0, 1.0, 0.0], + } + true_id_df = pd.DataFrame(cand_id_data) + true_id_df.index.name = "Ballot Index" + assert rank_profile._df.equals(true_id_df) diff --git a/tests/pref_profile/score_profile/test_score_pp.py b/tests/pref_profile/score_profile/test_score_pp.py index a945432c..85f92b0e 100644 --- a/tests/pref_profile/score_profile/test_score_pp.py +++ b/tests/pref_profile/score_profile/test_score_pp.py @@ -1,3 +1,7 @@ +import pytest + +from votekit.ballot import ScoreBallot +from votekit.exceptions import ProfileError from votekit.pref_profile import ScoreProfile @@ -11,70 +15,94 @@ def test_init(): assert not empty_profile.num_ballots -# def test_unique_cands_validator(): -# with pytest.raises(ValueError, match="All candidates must be unique."): -# ScoreProfile(candidates=("A", "A", "B")) - -# ScoreProfile(candidates=("A", "B")) - - -# def test_strip_whitespace(): -# pp = ScoreProfile(candidates=("A ", " B", " C ")) - -# assert pp.candidates == ("A", "B", "C") - - -# def test_ballots_frozen(): -# p = ScoreProfile(ballots=[ScoreBallot()]) -# b_list = p.ballots - -# assert b_list == (ScoreBallot(),) - -# with pytest.raises( -# AttributeError, -# match="Cannot modify frozen instance: tried to set 'ballots'", -# ): -# p.ballots = (ScoreBallot(weight=5),) - - -# def test_candidates_frozen(): -# profile_no_cands = ScoreProfile( -# ballots=[ -# ScoreBallot(scores={"A": 4}), -# ScoreBallot(scores={"B": 4}), -# ScoreBallot(scores={"C": 4}), -# ] -# ) -# assert set(profile_no_cands.candidates) == set(["A", "B", "C"]) -# assert set(profile_no_cands.candidates_cast) == set(["A", "B", "C"]) - -# with pytest.raises( -# AttributeError, match="Cannot modify frozen instance: tried to set 'candidates'" -# ): -# profile_no_cands.candidates = tuple() - -# with pytest.raises( -# AttributeError, -# match="Cannot modify frozen instance: tried to set 'candidates_cast'", -# ): -# profile_no_cands.candidates_cast = tuple() - - -# def test_get_candidates_received_votes(): -# profile_w_cands = ScoreProfile( -# ballots=[ -# ScoreBallot(scores={"A": 4}), -# ScoreBallot(scores={"B": 4}), -# ScoreBallot(scores={"C": 4}), -# ], -# candidates=("A", "B", "C", "D", "E"), -# ) -# vote_cands = profile_w_cands.candidates_cast -# all_cands = profile_w_cands.candidates - -# assert set(all_cands) == {"A", "B", "C", "D", "E"} -# assert set(vote_cands) == { -# "A", -# "B", -# "C", -# } +def test_unique_cands_validator(): + with pytest.raises(ProfileError, match="All candidates must be unique."): + ScoreProfile(candidates=("A", "A", "B")) + + +def test_strip_whitespace(): + pp = ScoreProfile(candidates=("A ", " B", " C ")) + + assert pp.candidates == ("A", "B", "C") + + +def test_ballots_frozen(): + p = ScoreProfile(ballots=[ScoreBallot()]) + b_list = p.ballots + + assert b_list == (ScoreBallot(),) + + with pytest.raises( + AttributeError, + match="Cannot modify frozen instance: tried to set 'ballots'", + ): + p.ballots = (ScoreBallot(weight=5),) + + +def test_candidates_frozen(): + profile_no_cands = ScoreProfile( + ballots=[ + ScoreBallot(scores={"A": 4}), + ScoreBallot(scores={"B": 4}), + ScoreBallot(scores={"C": 4}), + ] + ) + assert set(profile_no_cands.candidates) == set(["A", "B", "C"]) + assert set(profile_no_cands.candidates_cast) == set(["A", "B", "C"]) + + with pytest.raises( + AttributeError, match="Cannot modify frozen instance: tried to set 'candidates'" + ): + profile_no_cands.candidates = tuple() + + with pytest.raises( + AttributeError, + match="Cannot modify frozen instance: tried to set 'candidates_cast'", + ): + profile_no_cands.candidates_cast = tuple() + + +def test_get_candidates_received_votes(): + profile_w_cands = ScoreProfile( + ballots=[ + ScoreBallot(scores={"A": 4}), + ScoreBallot(scores={"B": 4}), + ScoreBallot(scores={"C": 4}), + ], + candidates=("A", "B", "C", "D", "E"), + ) + vote_cands = profile_w_cands.candidates_cast + all_cands = profile_w_cands.candidates + + assert set(all_cands) == {"A", "B", "C", "D", "E"} + assert set(vote_cands) == { + "A", + "B", + "C", + } + + +def test_int_only_candidates(): + profile_int_cands = ScoreProfile( + ballots=[ + ScoreBallot(scores={1: 4}), + ScoreBallot(scores={2: 4}), + ScoreBallot(scores={3: 4}), + ] + ) + + assert set(profile_int_cands.candidates) == set([1, 2, 3]) + assert set(profile_int_cands.candidates_cast) == set([1, 2, 3]) + + +def test_str_int_mix_candidates(): + profile_int_cands = ScoreProfile( + ballots=[ + ScoreBallot(scores={"A": 4}), + ScoreBallot(scores={2: 4}), + ScoreBallot(scores={3: 4}), + ] + ) + + assert set(profile_int_cands.candidates) == set(["A", 2, 3]) + assert set(profile_int_cands.candidates_cast) == set(["A", 2, 3]) diff --git a/tests/pref_profile/score_profile/test_score_pp_df.py b/tests/pref_profile/score_profile/test_score_pp_df.py index e1749b8a..3fbfaa18 100644 --- a/tests/pref_profile/score_profile/test_score_pp_df.py +++ b/tests/pref_profile/score_profile/test_score_pp_df.py @@ -75,3 +75,37 @@ def test_pp_df_scores_args(): true_df = pd.DataFrame(data) true_df.index.name = "Ballot Index" assert pp.df.equals(true_df) + + +def test_df_with_cand_ids_as_score_cols(): + pp = ScoreProfile( + ballots=ballots_scores, + candidates=["A", "B", "C", "D", "E"], + ) + data = { + "A": [ + 1, + np.nan, + np.nan, + np.nan, + ], + "B": [ + 2, + np.nan, + np.nan, + np.nan, + ], + "C": [ + np.nan, + np.nan, + np.nan, + np.nan, + ], + "D": [np.nan, 2, np.nan, np.nan], + "E": [np.nan, 1, np.nan, np.nan], + "Voter Set": [set(), {"Chris"}, set(), set()], + "Weight": [2.0, 1.0, 1.0, 0.0], + } + true_df = pd.DataFrame(data) + true_df.index.name = "Ballot Index" + assert pp.df.equals(true_df) From 43a0e5fdb26acefb49d46adbb4ad5b87482115ed Mon Sep 17 00:00:00 2001 From: Grace Gibson Date: Fri, 19 Jun 2026 09:08:25 -0500 Subject: [PATCH 05/11] add mixed cand fuzz test, update std ballot generators to accept mixed candidates --- .../std_generator/impartial_anon_culture.py | 6 +- .../std_generator/impartial_culture.py | 16 +++-- .../ballot_generator/std_generator/spacial.py | 35 ++++++---- src/votekit/utils.py | 2 +- tests/test_mixed_candidates.py | 70 +++++++++++++++++++ 5 files changed, 106 insertions(+), 23 deletions(-) create mode 100644 tests/test_mixed_candidates.py diff --git a/src/votekit/ballot_generator/std_generator/impartial_anon_culture.py b/src/votekit/ballot_generator/std_generator/impartial_anon_culture.py index 0dd75dad..30850159 100644 --- a/src/votekit/ballot_generator/std_generator/impartial_anon_culture.py +++ b/src/votekit/ballot_generator/std_generator/impartial_anon_culture.py @@ -13,6 +13,7 @@ from typing import Optional, Sequence from votekit.pref_profile import RankProfile +from votekit.types import Candidate from votekit.utils import build_df_from_ballot_samples, index_to_lexicographic_ballot # ==================================================== @@ -154,7 +155,7 @@ def _sample_anonymous_profile_ballot_counts( def iac_profile_generator( - candidates: Sequence[str], + candidates: Sequence[Candidate], number_of_ballots: int, max_ballot_length: Optional[int] = None, ) -> RankProfile: @@ -163,7 +164,8 @@ def iac_profile_generator( is equally likely. Args: - candidates (Sequence[str]): List of candidate strings. + candidates (Sequence[Candidate]): List of candidate strings. + Candidates can be strings, integers, or mix of both. number_of_ballots (int): Number of ballots to generate. max_ballot_length (Optional[int]): Maximum length of each ballot. If None, defaults to the number of candidates. diff --git a/src/votekit/ballot_generator/std_generator/impartial_culture.py b/src/votekit/ballot_generator/std_generator/impartial_culture.py index c92d3cd7..a7701047 100644 --- a/src/votekit/ballot_generator/std_generator/impartial_culture.py +++ b/src/votekit/ballot_generator/std_generator/impartial_culture.py @@ -13,6 +13,7 @@ import numpy as np from votekit.pref_profile import RankProfile +from votekit.types import Candidate from votekit.utils import ( build_df_from_ballot_samples, fixed_zero_index_lex_block_size, @@ -30,7 +31,7 @@ def _generate_profile_optimized_non_short( - candidates: Sequence[str], + candidates: Sequence[Candidate], number_of_ballots: int, max_ballot_length: Optional[int] = None, ) -> RankProfile: @@ -40,7 +41,8 @@ def _generate_profile_optimized_non_short( short ballots are disallowed Args: - candidates (Sequence[str]): the list of candidates in the election + candidates (Sequence[Candidate]): the list of candidates in the election. + Candidates can be strings or integers. number_of_ballots (int): the number of ballots to generate max_ballot_length (Optional[int]): the maximum length allowed in the profile. If None, defaults to the number of candidates. Defaults to None. @@ -66,7 +68,7 @@ def _generate_profile_optimized_non_short( def _generate_profile_optimized_with_short( - candidates: Sequence[str], + candidates: Sequence[Candidate], number_of_ballots: int, max_ballot_length: Optional[int] = None, ) -> RankProfile: @@ -77,7 +79,8 @@ def _generate_profile_optimized_with_short( the indices to ballots using a help function Args: - candidates (Sequence[str]): the list of candidates in the election + candidates (Sequence[Candidate]): the list of candidates in the election + Candidates can be strings or integers. number_of_ballots (int): the number of ballots to generate for the profile max_ballot_length (Optional[int]): the maximum length allowed in the profile. If None, @@ -117,7 +120,7 @@ def _generate_profile_optimized_with_short( def ic_profile_generator( - candidates: Sequence[str], + candidates: Sequence[Candidate], number_of_ballots: int, max_ballot_length: Optional[int] = None, allow_short_ballots: bool = False, @@ -127,7 +130,8 @@ def ic_profile_generator( Equivalent to the ballot simplex with an alpha value of infinity. Args: - candidates (Sequence[str]): The list of candidates in the election. + candidates (Sequence[Candidate]): The list of candidates in the election. + Candidates can be strings or integers. number_of_ballots (int): The number of ballots to generate for the profile. max_ballot_length (Optional[int]): Maximum length of each ballot. If None, defaults to the number of candidates. diff --git a/src/votekit/ballot_generator/std_generator/spacial.py b/src/votekit/ballot_generator/std_generator/spacial.py index 1f23f346..c51dbd94 100644 --- a/src/votekit/ballot_generator/std_generator/spacial.py +++ b/src/votekit/ballot_generator/std_generator/spacial.py @@ -12,7 +12,7 @@ clustered multi-dimensional spacial model where voters are clustered around candidates. """ -from typing import Any, Callable, Dict, Optional, Sequence, Tuple +from typing import Any, Callable, Dict, Optional, Sequence, Tuple, cast import numpy as np import pandas as pd @@ -20,6 +20,7 @@ from votekit.metrics import euclidean_dist from votekit.pref_profile import RankProfile +from votekit.types import Candidate # ================================================= # ================= API Functions ================= @@ -193,14 +194,14 @@ def spacial_profile_and_positions_generator( def clustered_spacial_profile_and_positions_generator( - number_of_ballots: dict[str, int], - candidates: list[str], + number_of_ballots: dict[Candidate, int] | dict[str, int] | dict[int, int], + candidates: list[Candidate] | list[str] | list[int], voter_dist: Callable[..., np.ndarray] = np.random.normal, voter_dist_kwargs: Optional[Dict[str, Any]] = None, candidate_dist: Callable[..., np.ndarray] = np.random.uniform, candidate_dist_kwargs: Optional[Dict[str, Any]] = None, distance: Callable[[np.ndarray, np.ndarray], float] = euclidean_dist, -) -> Tuple[RankProfile, dict[str, np.ndarray], np.ndarray]: +) -> Tuple[RankProfile, dict[Candidate, np.ndarray], np.ndarray]: """ Generate a clustered spatial rank profile and sampled positions. @@ -218,9 +219,11 @@ def clustered_spacial_profile_and_positions_generator( ) Args: - number_of_ballots (dict[str, int]): The number of voters attributed - to each candidate {candidate string: # voters}. - candidates (list[str]): Candidate names used when building rankings. + number_of_ballots (dict[Candidate, int] | dict[str, int] | dict[int, int]): + The number of voters attributed to each candidate + {candidate string or integer: # voters}. + candidates (list[Candidate] | list[str] | list[int]): Candidate names + used when building rankings. voter_dist (Callable[..., np.ndarray], optional): Distribution sampler used to draw voter positions centered at each candidate location. Defaults to ``np.random.normal``. @@ -241,12 +244,16 @@ def clustered_spacial_profile_and_positions_generator( ``euclidean_dist``. Returns: - Tuple[RankProfile, dict[str, numpy.ndarray], numpy.ndarray]: + Tuple[RankProfile, dict[Candidate, numpy.ndarray], numpy.ndarray]: A tuple containing the preference profile object, a dictionary with each candidate's position in the metric space, and a matrix where each row is a single voter's position in the metric space. """ + + _number_of_ballots: dict[Candidate, int] = cast(dict[Candidate, int], number_of_ballots) + _candidates: list[Candidate] = cast(list[Candidate], candidates) + if voter_dist_kwargs is None: if voter_dist is np.random.normal: voter_dist_kwargs = { @@ -287,20 +294,20 @@ def clustered_spacial_profile_and_positions_generator( "Distance function is invalid or incompatible with voter/candidate distributions." ) - candidate_position_dict: dict[str, NDArray] = { - c: candidate_dist(**candidate_dist_kwargs) for c in candidates + candidate_position_dict: dict[Candidate, NDArray] = { + c: candidate_dist(**candidate_dist_kwargs) for c in _candidates } - n_voters = sum(number_of_ballots.values()) + n_voters = sum(_number_of_ballots.values()) voter_positions = [np.zeros(2) for _ in range(n_voters)] vidx = 0 for c, c_position in candidate_position_dict.items(): - for _ in range(number_of_ballots[c]): + for _ in range(_number_of_ballots[c]): voter_dist_kwargs["loc"] = c_position voter_positions[vidx] = voter_dist(**voter_dist_kwargs) vidx += 1 - n_candidates = len(candidates) + n_candidates = len(_candidates) ballot_pool = np.full((n_voters, n_candidates), frozenset("~"), dtype=object) for i in range(len(voter_positions)): v_position = voter_positions[i] @@ -326,7 +333,7 @@ def clustered_spacial_profile_and_positions_generator( ) return ( RankProfile( - candidates=candidates, + candidates=_candidates, df=df, max_ranking_length=n_candidates, ), diff --git a/src/votekit/utils.py b/src/votekit/utils.py index a0b4f32a..c2d3fe10 100644 --- a/src/votekit/utils.py +++ b/src/votekit/utils.py @@ -911,7 +911,7 @@ def index_to_lexicographic_ballot( def build_df_from_ballot_samples( - ballots_freq_dict: dict[tuple[int, ...], int], candidates: Sequence[str] + ballots_freq_dict: dict[tuple[int, ...], int], candidates: Sequence[Candidate] ): """ Helper function which creates a pandas df to instantiate a diff --git a/tests/test_mixed_candidates.py b/tests/test_mixed_candidates.py new file mode 100644 index 00000000..32206417 --- /dev/null +++ b/tests/test_mixed_candidates.py @@ -0,0 +1,70 @@ +from functools import partial + +import pytest + +from votekit import utils +from votekit.ballot_generator import ic_profile_generator +from votekit.elections import STV, Borda, Plurality + +MIXED_CANDS = ["A", "B", "1", 1, 2, 3] +N_SEATS = 2 + + +@pytest.fixture(params=[10, 1000, 10000]) +def ic_mixed_profile(request): + """ + IC profile over mixed str/int candidates. + """ + return ic_profile_generator(candidates=MIXED_CANDS, number_of_ballots=request.param) + + +@pytest.mark.parametrize( + "make_election", + [ + pytest.param( + lambda profile: Borda(profile, n_seats=N_SEATS, tiebreak="random"), id="borda" + ), + pytest.param( + lambda profile: Plurality(profile, n_seats=N_SEATS, tiebreak="random"), id="plurality" + ), + pytest.param(lambda profile: STV(profile, n_seats=N_SEATS, tiebreak="random"), id="stv"), + ], +) +def test_election_runs_with_mixed_candidates(ic_mixed_profile, make_election): + """ + Election Fuzz Test: Run Borda, STV, and Plurality elections on profiles + with mixed candidate types. IC ballot generator will generate + ballots with mix of string and integer candidates + with varying number of ballots (10, 1000, 10000). + """ + election = make_election(ic_mixed_profile) + elected = election.get_elected() + n_elected = sum(len(seat) for seat in elected) + assert n_elected == N_SEATS + + +SCORE_VECTOR = [5, 4, 3, 2, 1, 0] + +UTILS_FXNS = [ + pytest.param(utils.first_place_votes, id="first_place_votes"), + pytest.param(utils.mentions, id="mentions"), + pytest.param(utils.borda_scores, id="borda_scores"), + pytest.param(utils.ballots_by_first_cand, id="ballots_by_first_cand"), + pytest.param(utils.ballot_lengths, id="ballot_lengths"), + pytest.param(utils.add_missing_cands, id="add_missing_cands"), + pytest.param( + partial(utils.score_dict_from_score_vector, score_vector=SCORE_VECTOR), + id="score_dict_from_score_vector", + ), +] + + +@pytest.mark.parametrize("utils_fxns", UTILS_FXNS) +def test_utils_fxns_accept_mixed_candidates(ic_mixed_profile, utils_fxns): + """ + utils.py Functions Fuzz Test: Run all utils.py functions with + profiles of mixed candidate types. + The same profiles from the Election Fuzz Test. + """ + result = utils_fxns(ic_mixed_profile) + assert result is not None From f8976dd58146ab15fae3dcd657d6b6ad38b7131d Mon Sep 17 00:00:00 2001 From: Grace Gibson Date: Fri, 19 Jun 2026 10:01:09 -0500 Subject: [PATCH 06/11] add internal df tests, mixed cands ballots, warning 1, 1 cands --- src/votekit/ballot.py | 32 +++++++++++++++-- tests/ballot/test_RankBallot.py | 17 ++++++++++ tests/ballot/test_ScoreBallot.py | 16 +++++++++ .../rank_profile/test_rank_pp_df.py | 6 +++- .../score_profile/test_score_pp_df.py | 34 +++++++++++++------ 5 files changed, 90 insertions(+), 15 deletions(-) diff --git a/src/votekit/ballot.py b/src/votekit/ballot.py index f8378a8b..ba654181 100644 --- a/src/votekit/ballot.py +++ b/src/votekit/ballot.py @@ -1,5 +1,6 @@ from __future__ import annotations +import warnings from numbers import Real from typing import Iterable, Mapping, Optional, Sequence, TypeAlias, Union, overload @@ -218,13 +219,26 @@ def _convert_ranking_candidates_to_frozenset_strip_whitespace( def _validate_ranking_candidates(self, ranking: Ranking): if ranking is None: return - if any(c == "~" for cand_set in ranking for c in cand_set): + if any(cand == "~" for cand_set in ranking for cand in cand_set): raise ValueError( f"Candidate '~' found in ballot ranking {ranking}." " '~' is a reserved character and cannot be used for" " candidate names." ) - # add a warning if candidates are of mixed type and if str and int are equivalent? + + str_cands = {cand for cand_set in ranking for cand in cand_set if isinstance(cand, str)} + int_cands = {cand for cand_set in ranking for cand in cand_set if isinstance(cand, int)} + collisions = { + str_cand + for str_cand in str_cands + if str_cand.lstrip("-").isdigit() and int(str_cand) in int_cands + } + if collisions: + warnings.warn( + f"Candidates {collisions} appear as both str and int (e.g. '1' and 1)." + " These will be treated as separate candidates.", + UserWarning, + ) def __eq__(self, other): if not isinstance(other, RankBallot): @@ -317,7 +331,19 @@ def _validate_scores_candidates(self, scores: ScoresLike): " '~' is a reserved character and cannot be used for" " candidate names." ) - # add a warning if candidates are of mixed type + str_cands = {cand for cand in scores.keys() if isinstance(cand, str)} + int_cands = {cand for cand in scores.keys() if isinstance(cand, int)} + collisions = { + str_cand + for str_cand in str_cands + if str_cand.lstrip("-").isdigit() and int(str_cand) in int_cands + } + if collisions: + warnings.warn( + f"Candidates {collisions} appear as both str and int (e.g. '1' and 1)." + " These will be treated as separate candidates.", + UserWarning, + ) def __eq__(self, other): if not isinstance(other, ScoreBallot): diff --git a/tests/ballot/test_RankBallot.py b/tests/ballot/test_RankBallot.py index e5bb0c48..48182824 100644 --- a/tests/ballot/test_RankBallot.py +++ b/tests/ballot/test_RankBallot.py @@ -182,3 +182,20 @@ def test_mixed_str_and_iterable_ranking_elements(): frozenset({"D"}), frozenset({"E"}), ) + + +def test_mixed_str_int_candidates_ballot(): + b = RankBallot(ranking=["A", {"B", 1}, "D", {2}, 3], weight=1, voter_set={"A"}) + assert b.ranking == ( + frozenset({"A"}), + frozenset({"B", 1}), + frozenset({"D"}), + frozenset({2}), + frozenset({3}), + ) + + +def test_equivalent_str_int_candidates_gives_warning(): + with pytest.warns(UserWarning, match="will be treated as separate candidates"): + b = RankBallot(ranking=[1, "1"]) + assert b.ranking == (frozenset({1}), frozenset({"1"})) diff --git a/tests/ballot/test_ScoreBallot.py b/tests/ballot/test_ScoreBallot.py index 62844733..cbfd6aaa 100644 --- a/tests/ballot/test_ScoreBallot.py +++ b/tests/ballot/test_ScoreBallot.py @@ -142,3 +142,19 @@ def test_rank_sub_ballot(): def test_rank_and_score(): with pytest.raises(TypeError, match="Only one of ranking or scores can be provided."): ScoreBallot(ranking=[{"A"}], scores={"A": 1}) + + +def test_mixed_str_int_candidates_ballot(): + b = ScoreBallot( + scores={"A": 2, 1: 1}, + weight=3, + voter_set={"Chris"}, + ) + + assert b.scores == {"A": 2, 1: 1} + + +def test_equivalent_str_int_candidates_gives_warning(): + with pytest.warns(UserWarning, match="will be treated as separate candidates"): + b = ScoreBallot(scores={"1": 2, 1: 1}) + assert b.scores == {"1": 2, 1: 1} diff --git a/tests/pref_profile/rank_profile/test_rank_pp_df.py b/tests/pref_profile/rank_profile/test_rank_pp_df.py index e259dc5d..8d359c08 100644 --- a/tests/pref_profile/rank_profile/test_rank_pp_df.py +++ b/tests/pref_profile/rank_profile/test_rank_pp_df.py @@ -64,7 +64,7 @@ def test_pp_df_rankings_args(): assert pp.df.equals(true_df) -def test_df_with_cand_ids_as_ranking_values(): +def test_internal_df_with_cand_ids_as_ranking_values(): rank_profile = RankProfile( ballots=ballots_rankings, candidates=["A", "B", "C", "D", "E"], @@ -72,6 +72,7 @@ def test_df_with_cand_ids_as_ranking_values(): ) candidate_ids = set([i for i in range(len(rank_profile.candidates))]) candidate_id_map = dict(zip(rank_profile.candidates, candidate_ids)) + candidates_cast_ids = set([candidate_id_map[cand] for cand in rank_profile.candidates_cast]) id_A = candidate_id_map["A"] id_B = candidate_id_map["B"] @@ -98,3 +99,6 @@ def test_df_with_cand_ids_as_ranking_values(): true_id_df = pd.DataFrame(cand_id_data) true_id_df.index.name = "Ballot Index" assert rank_profile._df.equals(true_id_df) + assert rank_profile._candidates == tuple(candidate_ids) + assert rank_profile._candidates_cast == tuple(candidates_cast_ids) + assert rank_profile.candidate_id_map == candidate_id_map diff --git a/tests/pref_profile/score_profile/test_score_pp_df.py b/tests/pref_profile/score_profile/test_score_pp_df.py index 3fbfaa18..96b5a31b 100644 --- a/tests/pref_profile/score_profile/test_score_pp_df.py +++ b/tests/pref_profile/score_profile/test_score_pp_df.py @@ -77,35 +77,47 @@ def test_pp_df_scores_args(): assert pp.df.equals(true_df) -def test_df_with_cand_ids_as_score_cols(): - pp = ScoreProfile( +def test_internal_df_with_cand_ids_as_score_cols(): + score_profile = ScoreProfile( ballots=ballots_scores, candidates=["A", "B", "C", "D", "E"], ) - data = { - "A": [ + candidate_ids = set([i for i in range(len(score_profile.candidates))]) + candidate_id_map = dict(zip(score_profile.candidates, candidate_ids)) + candidates_cast_ids = set([candidate_id_map[cand] for cand in score_profile.candidates_cast]) + + id_A = candidate_id_map["A"] + id_B = candidate_id_map["B"] + id_C = candidate_id_map["C"] + id_D = candidate_id_map["D"] + id_E = candidate_id_map["E"] + cand_id_data = { + id_A: [ 1, np.nan, np.nan, np.nan, ], - "B": [ + id_B: [ 2, np.nan, np.nan, np.nan, ], - "C": [ + id_C: [ np.nan, np.nan, np.nan, np.nan, ], - "D": [np.nan, 2, np.nan, np.nan], - "E": [np.nan, 1, np.nan, np.nan], + id_D: [np.nan, 2, np.nan, np.nan], + id_E: [np.nan, 1, np.nan, np.nan], "Voter Set": [set(), {"Chris"}, set(), set()], "Weight": [2.0, 1.0, 1.0, 0.0], } - true_df = pd.DataFrame(data) - true_df.index.name = "Ballot Index" - assert pp.df.equals(true_df) + true_id_df = pd.DataFrame(cand_id_data) + true_id_df.index.name = "Ballot Index" + assert score_profile._df.equals(true_id_df) + assert score_profile._candidates == tuple(candidate_ids) + assert score_profile._candidates_cast == tuple(candidates_cast_ids) + assert score_profile.candidate_id_map == candidate_id_map From e2d4ceccc9b969fe973bcdc05189d4b8ce58146f Mon Sep 17 00:00:00 2001 From: Grace Gibson Date: Fri, 19 Jun 2026 10:21:00 -0500 Subject: [PATCH 07/11] add df with mixed cands tests --- .../rank_profile/test_rank_pp_df.py | 45 +++++++++++++++++ .../score_profile/test_score_pp_df.py | 50 +++++++++++++++++++ 2 files changed, 95 insertions(+) diff --git a/tests/pref_profile/rank_profile/test_rank_pp_df.py b/tests/pref_profile/rank_profile/test_rank_pp_df.py index 8d359c08..ab714865 100644 --- a/tests/pref_profile/rank_profile/test_rank_pp_df.py +++ b/tests/pref_profile/rank_profile/test_rank_pp_df.py @@ -9,6 +9,13 @@ RankBallot(), RankBallot(weight=0), ] +mixed_ballot_rankings = [ + RankBallot(ranking=({"A"}, {1}, {2}), weight=2), + RankBallot(ranking=({"A", "B"}, frozenset(), {"C"}), voter_set={"Chris"}), + RankBallot(ranking=(2, 1)), + RankBallot(), + RankBallot(weight=0), +] def test_pp_df_rankings(): @@ -64,6 +71,44 @@ def test_pp_df_rankings_args(): assert pp.df.equals(true_df) +def test_df_with_mixed_cand_types_as_ranking_values(): + rank_profile = RankProfile( + ballots=mixed_ballot_rankings, + candidates=["A", "B", "C", 1, 2, 3], + max_ranking_length=4, + ) + + data = { + "Ranking_1": [ + frozenset({"A"}), + frozenset({"A", "B"}), + frozenset({2}), + frozenset("~"), + frozenset("~"), + ], + "Ranking_2": [frozenset({1}), frozenset(), frozenset({1}), frozenset("~"), frozenset("~")], + "Ranking_3": [ + frozenset({2}), + frozenset({"C"}), + frozenset("~"), + frozenset("~"), + frozenset("~"), + ], + "Ranking_4": [ + frozenset("~"), + frozenset("~"), + frozenset("~"), + frozenset("~"), + frozenset("~"), + ], + "Voter Set": [set(), {"Chris"}, set(), set(), set()], + "Weight": [2.0, 1.0, 1.0, 1.0, 0.0], + } + true_df = pd.DataFrame(data) + true_df.index.name = "Ballot Index" + assert rank_profile.df.equals(true_df) + + def test_internal_df_with_cand_ids_as_ranking_values(): rank_profile = RankProfile( ballots=ballots_rankings, diff --git a/tests/pref_profile/score_profile/test_score_pp_df.py b/tests/pref_profile/score_profile/test_score_pp_df.py index 96b5a31b..77289c27 100644 --- a/tests/pref_profile/score_profile/test_score_pp_df.py +++ b/tests/pref_profile/score_profile/test_score_pp_df.py @@ -17,6 +17,20 @@ ScoreBallot(weight=0), ] +mixed_ballots_scores = [ + ScoreBallot( + weight=2, + scores={ + "A": 1, + 1: 2, + }, + ), + ScoreBallot(scores={"A": 2, "B": 1}, voter_set={"Chris"}), + ScoreBallot(scores={1: 2, 2: 1}), + ScoreBallot(), + ScoreBallot(weight=0), +] + def test_pp_df_scores(): pp = ScoreProfile(ballots=ballots_scores) @@ -77,6 +91,42 @@ def test_pp_df_scores_args(): assert pp.df.equals(true_df) +def test_df_with_mixed_cand_types_as_score_cols(): + score_profile = ScoreProfile(ballots=mixed_ballots_scores, candidates=["A", "B", "C", 1, 2, 3]) + + data = { + "A": [ + 1, + 2, + np.nan, + np.nan, + np.nan, + ], + "B": [ + np.nan, + 1, + np.nan, + np.nan, + np.nan, + ], + "C": [ + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + ], + 1: [2, np.nan, 2, np.nan, np.nan], + 2: [np.nan, np.nan, 1, np.nan, np.nan], + 3: [np.nan, np.nan, np.nan, np.nan, np.nan], + "Voter Set": [set(), {"Chris"}, set(), set(), set()], + "Weight": [2.0, 1.0, 1.0, 1.0, 0.0], + } + true_df = pd.DataFrame(data) + true_df.index.name = "Ballot Index" + assert score_profile.df.equals(true_df) + + def test_internal_df_with_cand_ids_as_score_cols(): score_profile = ScoreProfile( ballots=ballots_scores, From c9308cae58f7667c0d8059e8a5f2732e98e3e5b5 Mon Sep 17 00:00:00 2001 From: Grace Gibson Date: Fri, 19 Jun 2026 11:10:19 -0500 Subject: [PATCH 08/11] update docstring to candidate, cand=int|str --- src/votekit/animations.py | 3 +- src/votekit/ballot.py | 24 +++--- src/votekit/cleaning/rank_ballots_cleaning.py | 3 +- src/votekit/elections/election_state.py | 15 ++-- .../ranking/abstract_ranking.py | 3 +- .../election_types/ranking/alaska.py | 4 +- .../election_types/ranking/plurality_veto.py | 27 ++++--- .../ranking/simultaneous_veto.py | 29 ++++--- .../ranking/stv/numpy_stv_base.py | 25 +++--- src/votekit/elections/transfers.py | 3 +- .../graphs/pairwise_comparison_graph.py | 4 +- src/votekit/matrices/_utils.py | 8 +- src/votekit/matrices/candidate/comentions.py | 17 ++-- src/votekit/models.py | 10 ++- src/votekit/pref_profile/pref_profile.py | 77 +++++++++++++------ src/votekit/pref_profile/utils.py | 12 ++- src/votekit/representation_scores.py | 6 +- src/votekit/utils.py | 43 +++++++---- 18 files changed, 202 insertions(+), 111 deletions(-) diff --git a/src/votekit/animations.py b/src/votekit/animations.py index 4bcd0304..81f91143 100644 --- a/src/votekit/animations.py +++ b/src/votekit/animations.py @@ -513,11 +513,12 @@ def _get_transferred_votes( were elected this round, ``"elimination"`` otherwise. Returns: - dict[Candidaate, dict[Candidate, float]]: A nested dictionary. + dict[Candidate, dict[Candidate, float]]: A nested dictionary. If ``d`` is the return value, ``c1`` was a candidate eliminated this round, and ``c2`` is a remaining candidate, then ``d[c1][c2]`` will be the total support transferred this round from candidate ``c1`` to candidate ``c2``. + Candidates can be strings, integers, or mix of both. Notes: This function supports the election, but not the elimination, of multiple candidates diff --git a/src/votekit/ballot.py b/src/votekit/ballot.py index ba654181..1e3d1632 100644 --- a/src/votekit/ballot.py +++ b/src/votekit/ballot.py @@ -18,28 +18,28 @@ class Ballot: Ballot parent class, contains voter set and assigned weight. Args: - ranking (Optional[Sequence[str | int | Iterable[str | int]]]): Candidate ranking. + ranking (Optional[Sequence[Candidate | Iterable[Candidate]]]): Candidate ranking. Entry i of the sequence is a candidate or iterable of candidates ranked in position i. - Candidate can be represented as a str or int. Allow mix of types in candidate set. + Candidates can be strings, integers, or mix of both. Defaults to None. Will be coerced to tuple[frozenset[str | int], ...]. weight (Union[float, int]): Weight assigned to a given ballot. Defaults to 1.0 Can be input as int or float, and will be coerced to float. voter_set (Union[set[str], frozenset[str]]): Set of voters who cast the ballot. Defaults to frozenset(). Will be coerced to frozenset. - scores (Optional[Mapping[str | int, float | int] | Mapping[str, float | int] + scores (Optional[Mapping[Candidate, float | int] | Mapping[str, float | int] | Mapping[int, float | int]]): Scores for individual candidates. Defaults to None. Values can be input as int or float but will be coerced to float. - Candidates can be strings, integers, or a mix of both. + Candidates can be strings, integers, or mix of both. Stored internally as a dict[str | int, float]. Only retains non-zero scores. Attributes: - ranking (Optional[tuple[frozenset[str | int], ...]]): Tuple of candidate ranking. + ranking (Optional[tuple[frozenset[Candidate], ...]]): Tuple of candidate ranking. Entry i of the tuple is a frozenset of candidates ranked in position i. weight (float): Weight assigned to a given ballot. voter_set (frozenset[str]): Set of voters who cast the ballot. - scores (Optional[Mapping[str | int, float | int]): Scores for individual candidates. + scores (Optional[Mapping[Candidate, float | int]): Scores for individual candidates. Raises: TypeError: Only one of ranking or scores can be provided. @@ -162,20 +162,22 @@ class RankBallot(Ballot): Args: ranking (RankingLike): Ranking of candidates, defaults to None. - RankingLike = Sequence[str | int | Iterable[str | int]] | None + RankingLike = Sequence[Candidate | Iterable[Candidate]] | None + Canidates can be strings, integers, or mix of both. weight (Union[int, float]): Weight of the ballot, defaults to 1.0. voter_set (Union[set[str], frozenset[str]]): Voter set of the ballot, defaults to frozenset(). Attributes: ranking (Ranking): Ranking of candidates. - Ranking = tuple[frozenset[str | int], ...] | None + Ranking = tuple[frozenset[Candidate], ...] | None weight (float): Weight of the ballot. voter_set (frozenset[str]): Voter set of the ballot. Raises: ValueError: Candidate '~' found in ballot ranking. ValueError: Ballot weight cannot be negative. + UserWarning: '1' and 1 candidates are treated as separate candidates. """ def __init__( @@ -277,14 +279,15 @@ class ScoreBallot(Ballot): Args: scores (ScoresLike): Scores of candidates, defaults to None. - ScoresLike = Mapping[str | int, int | float] | Mapping[str, int | float] + ScoresLike = Mapping[Candidate, int | float] | Mapping[str, int | float] | Mapping[int, int | float] | None + Candidates can be strings, integers, or mix of both. weight (Union[int, float]): Weight of the ballot, defaults to 1.0. voter_set (Union[set[str], frozenset[str]]): Voter set of the ballot, defaults to frozenset(). Attributes: - scores (Optional[dict[str | int, float]]): Scores of candidates. + scores (Optional[dict[Candidate, float]]): Scores of candidates. weight (float): Weight of the ballot. voter_set (frozenset[str]): Voter set of the ballot. @@ -292,6 +295,7 @@ class ScoreBallot(Ballot): ValueError: Candidate '~' found in ballot scores. ValueError: Ballot weight cannot be negative. TypeError: Score values must be numeric. + UserWarning: '1' and 1 candidates are treated as separate candidates. """ def __init__( diff --git a/src/votekit/cleaning/rank_ballots_cleaning.py b/src/votekit/cleaning/rank_ballots_cleaning.py index 8ea0a15c..a11bdf84 100644 --- a/src/votekit/cleaning/rank_ballots_cleaning.py +++ b/src/votekit/cleaning/rank_ballots_cleaning.py @@ -86,7 +86,8 @@ def remove_cand_rank_ballot( Removes specified candidate(s) from ballot. Does not condense the resulting ballot. Args: - removed (str | int | list[str | int]): Candidate or list of candidates to be removed. + removed (Candidate | list[Candidate]): Candidate or list of candidates + to be removed. Candidates can be strings, integers, or mix of both. ballot (RankBallot): Ballot to remove candidates from. Returns: diff --git a/src/votekit/elections/election_state.py b/src/votekit/elections/election_state.py index 45f62627..210f70de 100644 --- a/src/votekit/elections/election_state.py +++ b/src/votekit/elections/election_state.py @@ -13,17 +13,22 @@ class ElectionState: Attributes: round_number (int, optional): Round number, defaults to 0. - remaining (tuple[frozenset[str | int],...], optional): Remaining candidates, ordered to + remaining (tuple[frozenset[Candidate],...], optional): Remaining candidates, ordered to indicate ranking, frozensets to indicate ties. Defaults to tuple with one empty set. - elected (tuple[frozenset[str | int],...], optional): Elected candidates, ordered to + Candidates can be strings, integers, or mix of both. + elected (tuple[frozenset[Candidate],...], optional): Elected candidates, ordered to indicate ranking, frozensets to indicate ties. Defaults to tuple with one empty set. - eliminated (tuple[frozenset[str | int],...], optional): Eliminated candidates, ordered to + Candidates can be strings, integers, or mix of both. + eliminated (tuple[frozenset[Candidate],...], optional): Eliminated candidates, ordered to indicate ranking, frozensets to indicate ties. Defaults to tuple with one empty set. - tiebreaks (dict[frozenset[str | int], tuple[frozenset[str | int],...]], optional): Stores + Candidates can be strings, integers, or mix of both. + tiebreaks (dict[frozenset[Candidate], tuple[frozenset[Candidate],...]], optional): Stores tiebreak resolutions. Keys are frozensets of tied candidates and values are resolutions of tiebreak. Defaults to empty dictionary. - scores(dict[str | int, float], optional): Stores score information. + Candidates can be strings, integers, or mix of both. + scores(dict[Candidate, float], optional): Stores score information. Keys are candidates, values are scores. Only remaining candidates should be stored. + Candidates can be strings, integers, or mix of both. """ diff --git a/src/votekit/elections/election_types/ranking/abstract_ranking.py b/src/votekit/elections/election_types/ranking/abstract_ranking.py index 7a41176b..cb37703d 100644 --- a/src/votekit/elections/election_types/ranking/abstract_ranking.py +++ b/src/votekit/elections/election_types/ranking/abstract_ranking.py @@ -25,9 +25,10 @@ class RankingElection(Election[RankProfile]): election_states (list[ElectionState]): a list of election states, one for each round of the election. The list is 0 indexed, so the initial state is stored at index 0, round 1 at 1, etc. - score_function (Callable[[RankProfile], dict[str | int, float]], optional): + score_function (Callable[[RankProfile], dict[Candidate, float]], optional): A function that converts profiles to a score dictionary mapping candidates to their current score. Used in creating ElectionState objects. Defaults to None. + Candidates can be strings, integers, or mix of both. length (int): the number of rounds of the election. """ diff --git a/src/votekit/elections/election_types/ranking/alaska.py b/src/votekit/elections/election_types/ranking/alaska.py index 5a3c60ce..9de609f6 100644 --- a/src/votekit/elections/election_types/ranking/alaska.py +++ b/src/votekit/elections/election_types/ranking/alaska.py @@ -29,12 +29,12 @@ class Alaska(RankingElection): round. Defaults to 2. m_2 (int, optional): Number of seats to elect in STV round, i.e. number of overall winners. Defaults to 1. - transfer (Callable[[str | int, float], Union[tuple[Ballot], list[Ballot]], int], + transfer (Callable[[Candidate, float], Union[tuple[Ballot], list[Ballot]], int], tuple[Ballot,...]], optional): Transfer method. Defaults to fractional transfer. Function signature is elected candidate, their number of first-place votes, the list of ballots with them ranked first, and the threshold value. Returns the list of ballots - after transfer. + after transfer. Candidates can be strings, integers, or mix of both. quota (str, optional): Formula to calculate quota. Accepts "droop" or "hare". Defaults to "droop". simultaneous (bool, optional): True if all candidates who cross threshold in a round are diff --git a/src/votekit/elections/election_types/ranking/plurality_veto.py b/src/votekit/elections/election_types/ranking/plurality_veto.py index d2c4c6ba..c974060a 100644 --- a/src/votekit/elections/election_types/ranking/plurality_veto.py +++ b/src/votekit/elections/election_types/ranking/plurality_veto.py @@ -52,7 +52,8 @@ class _IterativeVetoBase(RankingElection, ABC): Attributes: n_seats (int): The number of seats to be filled in the election. - candidates (frozenset[str]): The set of candidates in the election. + candidates (frozenset[Candidate]): The set of candidates in the election. + Candidates can be strings, integers, or mix of both. tiebreak_order (Optional[tuple[frozenset[str]]]): The candidate ordering used to break last-place ties when processing vetoes. ``None`` if ``tiebreak`` = 'random'. @@ -307,12 +308,14 @@ def _veto_loop( Ties will be broken in _run_step. Args: - scores (dict[str | int, float]): Mutable score dict, modified in place. + scores (dict[Candidate, float]): Mutable score dict, modified in place. + Candidates can be strings, integers, or mix of both. Returns: - tuple[frozenset[str | int], frozenset[str | int]]: A tuple of (eliminated, elected), + tuple[frozenset[Candidate], frozenset[Candidate]]: A tuple of (eliminated, elected), where eliminated contains candidates worthy of elimination and elected contains candidates worthy of election. + Candidates can be strings, integers, or mix of both. """ raise NotImplementedError @@ -423,9 +426,11 @@ class PluralityVeto(_IterativeVetoBase): Attributes: n_seats (int): The number of seats to be filled in the election. - candidates (frozenset[str]): The set of candidates in the election. - tiebreak_order (Optional[tuple[frozenset[str]]]): The candidate ordering used to break + candidates (frozenset[Candidate]): The set of candidates in the election. + Candidates can be strings, integers, or mix of both. + tiebreak_order (Optional[tuple[frozenset[Candidate]]]): The candidate ordering used to break last-place ties when processing vetoes. ``None`` if ``tiebreak`` = 'random'. + Candidates can be strings, integers, or mix of both. Raises: ValueError: If any of the following: @@ -503,9 +508,11 @@ class SerialVeto(_IterativeVetoBase): Attributes: n_seats (int): The number of seats to be filled in the election. - candidates (frozenset[str]): The set of candidates in the election. - tiebreak_order (Optional[tuple[frozenset[str]]]): The candidate ordering used to break + candidates (frozenset[Candidate]): The set of candidates in the election. + Candidates can be strings, integers, or mix of both. + tiebreak_order (Optional[tuple[frozenset[Candidate]]]): The candidate ordering used to break last-place ties when processing vetoes. ``None`` if ``tiebreak`` = 'random'. + Candidates can be strings, integers, or mix of both. Raises: ValueError: If any of the following: @@ -526,11 +533,13 @@ def _veto_loop( If all vetoes are processed, elects all remaining candidates. Args: - scores (dict[str | int, float]): Mutable score dict, modified in place. + scores (dict[Candidate, float]): Mutable score dict, modified in place. + Candidates can be strings, integers, or mix of both. Returns: - tuple[frozenset[str | int], frozenset[str | int]]: A tuple of (eliminated, elected), + tuple[frozenset[Candidate], frozenset[Candidate]]: A tuple of (eliminated, elected), where each is a set of candidates worthy of elimination or election, respectively. + Candidates can be strings, integers, or mix of both. """ eliminated: set[Candidate] = set() elected: frozenset[Candidate] = frozenset() diff --git a/src/votekit/elections/election_types/ranking/simultaneous_veto.py b/src/votekit/elections/election_types/ranking/simultaneous_veto.py index 8efce835..efaaba1f 100644 --- a/src/votekit/elections/election_types/ranking/simultaneous_veto.py +++ b/src/votekit/elections/election_types/ranking/simultaneous_veto.py @@ -51,13 +51,14 @@ class SimultaneousVeto(RankingElection): profile (RankProfile): Profile to run election on. n_seats (int, optional): Number of seats to elect. Defaults to 1. candidate_weights (Literal['first_place', 'uniform', 'borda', 'harmonic'] - | dict[str | int, float] | dict[str, float] | dict[int, float] | int, optional): + | dict[Candidate, float] | dict[str, float] | dict[int, float] | int, optional): Initial candidate scores. 'first_place' means candidates begin with their first-place vote count. 'uniform' means all candidates begin with the same score. 'borda' means candidates begin with their Borda scores. If a dictionary, keys are candidates and values are initial scores; a score must be provided - for every candidate. If an integer k, candidates begin with their top-k vote count. + for every candidate. Candidates can be strings, integers, or mix of both. + If an integer k, candidates begin with their top-k vote count. Defaults to "first_place". tiebreak (Literal['first_place', 'random', 'borda', 'remaining_score', 'veto_pressure', 'lex'], optional): Method for breaking ties when multiple candidates @@ -70,8 +71,10 @@ class SimultaneousVeto(RankingElection): even if it is larger than ``n_seats``. Defaults to False. Attributes: - candidates (frozenset[str]): Candidates in the initial profile. - initial_scores (dict[str, float]): Initial scores of each candidate before veto process. + candidates (frozenset[Candidate]): Candidates in the initial profile. + Candidates can be strings, integers, or mix of both. + initial_scores (dict[Candidate, float]): Initial scores of each candidate before veto + process. Candidates can be strings, integers, or mix of both. Raises: ValueError: If any of the following: @@ -227,7 +230,7 @@ def _make_score_function( Args: candidate_weights (str | dict[Candidate, float] | int): How to initialize - candidate scores. Candidates can be strings or integers. + candidate scores. Candidates can be strings, integers, or mix of both. 'first_place' means candidates begin with their first-place vote count. 'uniform' means all candidates begin with the same score. 'borda' means candidates begin with their Borda scores. If a dictionary, @@ -375,6 +378,7 @@ def _get_vetoes(self, ballot_idx: np.intp) -> frozenset[Candidate]: Returns: frozenset[Candidate]: The candidate(s) to be vetoed. + Candidates can be strings, integers, or mix of both. Raises: ValueError: If the ballot has no remaining candidates to veto. @@ -418,15 +422,17 @@ def _break_tie( Takes candidate names and indices and returns a tiebroken order of names. Args: - candidates (frozenset[str | int]): Names of tied candidates. + candidates (frozenset[Candidate]): Names of tied candidates. + Candidates can be strings, integers, or mix of both. candidate_idx (Iterable[int]): Indices of tied candidates. profile (RankProfile): RankProfile of the current round. Passed to tiebreak_set() if ``tiebreak`` is not 'veto_pressure' or 'remaining_score'. Returns: - tuple[frozenset[str | int], ...]: Tiebroken ordering of candidates + tuple[frozenset[Candidate], ...]: Tiebroken ordering of candidates (each in their own set). + Candidates can be strings, integers, or mix of both. """ def make_singleton_ranking(indices: list[int]) -> tuple[frozenset[Candidate], ...]: @@ -465,12 +471,13 @@ def _eliminate_one_candidate( profile (RankProfile): RankProfile of the current round. Returns: - tuple[str | int | None, dict[frozenset[str | int], tuple[frozenset[str | int], ...]]]: + tuple[Candidate | None, dict[frozenset[Candidate], tuple[frozenset[Candidate], ...]]]: Returns a tuple (eliminated_candidate, tiebreaks), where eliminated_candidate is either a str or int giving the name of the eliminated candidate, or ``None``, signaling that no candidate was eliminated; and tiebreaks is a dict mapping a set of simultaneously-eliminated candidates to a tiebroken order; if only one candidate is eliminated, tiebreaks is empty. + Candidates can be strings, integers, or mix of both. """ idx_to_elim = np.where((self._scores <= 0) & (self._veto_pressure > 0))[0] @@ -507,10 +514,11 @@ def _handle_all_zeroed( profile (RankProfile): RankProfile of the current round. Returns: - tuple[Sentinel, dict[frozenset[str | int], tuple[frozenset[str | int], ...]]]: + tuple[Sentinel, dict[frozenset[Candidate], tuple[frozenset[Candidate], ...]]]: Returns a tuple (eliminated_candidate, tiebreaks), where eliminated_candidate is a Sentinel indicating that the election is over, and tiebreaks is a dict mapping the set of remaining candidates to a tiebroken order of the same. + Candidates can be strings, integers, or mix of both. """ tiebreaks = {} if not self.return_all_tied_winners: @@ -545,7 +553,7 @@ def _veto_step( Used for tiebreaking, if necessary. Returns: - tuple[str | int | Sentinel | None, dict[frozenset[str | int], + tuple[str | int | Sentinel | None, dict[frozenset[Candidate], tuple[frozenset[Candidate], ...]]]: A 2-tuple of (eliminated_candidate, tiebreaks). eliminated_candidate is one of: - a str or int indicating the candidate to be eliminated @@ -553,6 +561,7 @@ def _veto_step( - None, an error code signaling the failure to eliminate a candidate this round and tiebreaks is a dict mapping an unordered frozenset of candidates to their tiebroken order (a tuple of singleton frozensets). + Candidates can be strings, integers, or mix of both. """ self._veto_pressure = self._veto_matrix.sum(axis=1) diff --git a/src/votekit/elections/election_types/ranking/stv/numpy_stv_base.py b/src/votekit/elections/election_types/ranking/stv/numpy_stv_base.py index ac820134..a0c4a179 100644 --- a/src/votekit/elections/election_types/ranking/stv/numpy_stv_base.py +++ b/src/votekit/elections/election_types/ranking/stv/numpy_stv_base.py @@ -76,8 +76,9 @@ class NumpySTVBase(ABC): Abstract base class for numpy-based STV-style elections. Attributes: - candidates (list[str | int]): List of candidate names, indexed + candidates (list[Candidate]): List of candidate names, indexed to correspond to ballot matrix entries. + Candidates can be strings, integers, or mix of both. profile (RankProfile): The original RankProfile for reference. n_seats (int): Number of seats to be elected. election_states (list[ElectionState]): List of ElectionState objects representing @@ -271,8 +272,9 @@ def _run_election( fpv_by_round (list[NDArray]): List of first-preference vote tallies by round. play_by_play (list[ElectionPlay]): List of dictionaries representing the actions taken in each round. - tiebreak_record (list[dict[frozenset[str | int], tuple[frozenset[str | int], ...]]]): + tiebreak_record (list[dict[frozenset[Candidate], tuple[frozenset[Candidate], ...]]]): List of dictionaries representing tiebreak resolutions for each round. + Candidates can be strings, integers, or mix of both. """ pass @@ -299,9 +301,10 @@ def get_remaining(self, round_number: int = -1) -> tuple[frozenset, ...]: -1, which accesses the final profile. Returns: - tuple[frozenset[str], ...]: + tuple[frozenset[Candidate], ...]: Tuple of sets of remaining candidates. Ordering of tuple denotes ranking of remaining candidates, sets denote ties. + Candidates can be strings, integers, or mix of both. """ tallies = self._data.fpv_by_round[round_number].copy() elected_cands_as_list_of_str = [ @@ -336,10 +339,10 @@ def get_elected(self, round_number: int = -1) -> tuple[frozenset[Candidate], ... -1, which accesses the final profile. Returns: - tuple[frozenset[str | int], ...]: + tuple[frozenset[Candidate], ...]: Tuple of winning candidates in order of election. Candidates in the same set were elected simultaneously, i.e. in the final ranking - they are tied. + they are tied. Candidates can be strings, integers, or mix of both. """ if ( round_number < -len(self._data.fpv_by_round) @@ -363,10 +366,10 @@ def get_eliminated(self, round_number: int = -1) -> tuple[frozenset[Candidate], -1, which accesses the final profile. Returns: - tuple[frozenset[str | int], ...]: + tuple[frozenset[Candidate], ...]: Tuple of eliminated candidates in reverse order of elimination. Candidates in the same set were eliminated simultaneously, i.e. in the final ranking - they are tied. + they are tied. Candidates can be strings, integers, or mix of both. """ if ( round_number < -len(self._data.fpv_by_round) @@ -392,7 +395,8 @@ def get_ranking(self, round_number: int = -1) -> tuple[frozenset[Candidate], ... -1, which accesses the final profile. Returns: - tuple[frozenset[Candidate],...]: Ranking of candidates. Candidate can be str or int. + tuple[frozenset[Candidate],...]: Ranking of candidates. + Candidates can be strings, integers, or mix of both. """ return tuple( [ @@ -676,8 +680,9 @@ def _run_loser_tiebreak( Args: tied_losers (list[int]): List of candidate indices that are tied. round_number (int): The current round number. - mutant_tiebreak_record (list[dict[frozenset[str | int], - tuple[frozenset[str | int], ...]]]): Tiebreak record for each round. + mutant_tiebreak_record (list[dict[frozenset[Candidate], + tuple[frozenset[Candidate], ...]]]): Tiebreak record for each round. + Candidates can be strings, integers, or mix of both. Returns: tuple: (index of new loser, updated tiebreak record) diff --git a/src/votekit/elections/transfers.py b/src/votekit/elections/transfers.py index 6e195d6a..0df465ef 100644 --- a/src/votekit/elections/transfers.py +++ b/src/votekit/elections/transfers.py @@ -17,7 +17,8 @@ def fractional_transfer( Calculates fractional transfer from winner, then removes winner from the list of ballots. Args: - winner (str | int): Candidate to transfer votes from. + winner (Candidate): Candidate to transfer votes from. + Candidate can be a string or integer. fpv (float): Number of first place votes for winning candidate. ballots (Union[tuple[RankBallot], list[RankBallot]]): List of Ballot objects. threshold (int): Value required to be elected, used to calculate transfer value. diff --git a/src/votekit/graphs/pairwise_comparison_graph.py b/src/votekit/graphs/pairwise_comparison_graph.py index 6cfc8a9a..bbd681b4 100644 --- a/src/votekit/graphs/pairwise_comparison_graph.py +++ b/src/votekit/graphs/pairwise_comparison_graph.py @@ -24,7 +24,7 @@ def __rows_to_indices(profile: RankProfile, cand_name_to_idx: dict[Candidate, in Args: profile (RankProfile): The preference profile containing rankings. cand_name_to_idx (dict[Candidate, int]): A mapping from candidate names to their - integer index representations. Candidate can be str or int. + integer index representations. Candidates can be strings, integers, or mix of both. Returns: NDArray: A tuple containing: An NDArray of integer indices representing the rankings. @@ -96,7 +96,7 @@ def pairwise_dict( Returns: dict[tuple[Candidate, Candidate], tuple[float, float]]: Pairwise comparison dictionary. - Candidate can be str or int. + Candidates can be strings, integers, or mix of both. """ if not isinstance(profile, RankProfile): raise ValueError("Profile must be of type RankProfile.") diff --git a/src/votekit/matrices/_utils.py b/src/votekit/matrices/_utils.py index f36c1a0e..14aba82f 100644 --- a/src/votekit/matrices/_utils.py +++ b/src/votekit/matrices/_utils.py @@ -3,8 +3,14 @@ import numpy as np import pandas as pd +from votekit.types import Candidate -def _convert_dict_to_matrix(data_dict: dict[str, dict[str, Any]]) -> np.ndarray: + +def _convert_dict_to_matrix( + data_dict: dict[Candidate, dict[Candidate, Any]] + | dict[str, dict[str, Any]] + | dict[int, dict[int, Any]], +) -> np.ndarray: """ Convert a nested dictionary to a numpy matrix with float entries. Will respect the order of the dictionaries. diff --git a/src/votekit/matrices/candidate/comentions.py b/src/votekit/matrices/candidate/comentions.py index c3522fab..7f63038e 100644 --- a/src/votekit/matrices/candidate/comentions.py +++ b/src/votekit/matrices/candidate/comentions.py @@ -11,8 +11,9 @@ def comention(cands: Candidate | list[Candidate] | list[str] | list[int], ballot Takes cands and returns true if they all appear on the ballot in the ranking. Args: - cands (str | int, list[str | int] | list[str] | list[int]): + cands (Candidate, list[Candidate] | list[str] | list[int]): Candidate name or list of candidate names. + Candidates can be strings, integers, or mix of both. ballot (RankBallot): RankBallot. Returns: @@ -29,15 +30,17 @@ def comention(cands: Candidate | list[Candidate] | list[str] | list[int], ballot return set(cands).issubset(all_cands) -def comention_above(i: str, j: str, ballot: RankBallot) -> bool: +def comention_above(i: Candidate, j: Candidate, ballot: RankBallot) -> bool: """ Takes candidates i,j and returns True if i >= j in the ranking. Requires that the ballot has a ranking. Args: - i (str): Candidate name. - j (str): Candidate name. + i (Candidate): Candidate name. + Candidates can be strings, integers, or mix of both. + j (Candidate): Candidate name. + Candidates can be strings, integers, or mix of both. ballot (RankBallot): RankBallot. Returns: @@ -59,7 +62,7 @@ def comention_above(i: str, j: str, ballot: RankBallot) -> bool: def comentions_matrix( - pref_profile: RankProfile, candidates: list[str], symmetric: bool = False + pref_profile: RankProfile, candidates: list[Candidate], symmetric: bool = False ) -> np.ndarray: """ Takes a preference profile and converts to a matrix @@ -69,8 +72,8 @@ def comentions_matrix( Args: pref_profile (RankProfile): Profile. - candidates (list[str]): List of candidates to use. Indexing of this list matches indexing of - output array. + candidates (list[Candidate]): List of candidates to use. Indexing of this list matches + indexing of output array. Candidates can be strings, integers, or mix of both. symmetric (bool, optional): Whether or not to make the matrix symmetric. Defaults to False in which case the i,j entry is comentions where i >= j. True means the i,j entry is comentions of i,j. diff --git a/src/votekit/models.py b/src/votekit/models.py index a7e5e4d8..66bdc5c8 100644 --- a/src/votekit/models.py +++ b/src/votekit/models.py @@ -31,10 +31,10 @@ class Election(Generic[P]): election_states (list[ElectionState]): A list of election states, one for each round of the election. The list is 0 indexed, so the initial state is stored at index 0, round 1 at 1, etc. - score_function (Callable[[PreferenceProfile], dict[str | int, float]], optional): + score_function (Callable[[PreferenceProfile], dict[Candidate, float]], optional): A function that converts profiles to a score dictionary mapping candidates to their current score. Used in creating ElectionState objects. Defaults to None. - Candidates can be strings, integers, or a mix of both. + Candidates can be strings, integers, or mix of both. length (int): The number of rounds of the election. """ @@ -164,9 +164,10 @@ def get_remaining(self, round_number: int = -1) -> tuple[frozenset[Candidate], . -1, which accesses the final profile. Returns: - tuple[frozenset[str | int],...]: + tuple[frozenset[Candidate],...]: Tuple of sets of remaining candidates. Ordering of tuple denotes ranking of remaining candidates, sets denote ties. + Candidate can be strings, integers, or mix of both. """ return tuple(self.election_states[round_number].remaining) @@ -179,7 +180,8 @@ def get_ranking(self, round_number: int = -1) -> tuple[frozenset[Candidate], ... -1, which accesses the final profile. Returns: - tuple[frozenset[str | int],...]: Ranking of candidates. + tuple[frozenset[Candidate],...]: Ranking of candidates. + Candidates can be strings, integers, or mix of both. """ # len condition handles empty remaining candidates return tuple( diff --git a/src/votekit/pref_profile/pref_profile.py b/src/votekit/pref_profile/pref_profile.py index fbac9cd5..a0d4ca43 100644 --- a/src/votekit/pref_profile/pref_profile.py +++ b/src/votekit/pref_profile/pref_profile.py @@ -56,12 +56,14 @@ class PreferenceProfile: Parameters: ballots (Sequence[Ballot]): Tuple of ``Ballot`` objects. - candidates (tuple[str | int]): Tuple of candidates. A candidate can be a str or int. + candidates (tuple[Candidate]): Tuple of candidates. + Candidates can be strings, integers, or mix of both. max_ranking_length (int): The length of the longest allowable ballot, i.e., how many candidates are allowed to be ranked in an election. df (pandas.DataFrame): Data frame view of the ballots. - candidates_cast (tuple[str | int]): Tuple of candidates who appear on any ballot with + candidates_cast (tuple[Candidate]): Tuple of candidates who appear on any ballot with positive weight, either in the ranking or in the score dictionary. + Candidates can be strings, integers, or mix of both. total_ballot_wt (float): Sum of ballot weights. num_ballots (int): Length of ballot list. contains_rankings (bool): Whether or not the profile contains ballots with @@ -407,9 +409,11 @@ def __update_ballot_ranking_data( rank_ballot_data (dict[str, list]): Dictionary storing ballot data. idx (int): Index of ballot. rank_ballot (RankBallot): Ballot. - candidates_cast (list[str | int]): List of candidates who have received votes. + candidates_cast (list[Candidate]): List of candidates who have received votes. + Candidates can be strings, integers, or mix of both. num_ballots (int): Total number of ballots. - candidate_id_map (dict[str | int,int]): mapping of candidate names to integer IDs. + candidate_id_map (dict[Candidate,int]): mapping of candidate names to integer IDs. + Candidates can be strings, integers, or mix of both. """ @@ -458,9 +462,11 @@ def __update_rank_ballot_data_attrs( rank_ballot_data (dict[str, list]): Dictionary storing ballot data. idx (int): Index of ballot. rank_ballot (RankBallot): Ballot. - candidates_cast (list[str | int]): List of candidates who have received votes. + candidates_cast (list[Candidate]): List of candidates who have received votes. + Candidates can be strings, integers, or mix of both. num_ballots (int): Total number of ballots. - candidate_id_map (dict[str | int, int]): Mapping of candidate names to integer IDs. + candidate_id_map (dict[Candidate, int]): Mapping of candidate names to integer IDs. + Candidates can be strings, integers, or mix of both. """ rank_ballot_data["Weight"][idx] = rank_ballot.weight @@ -540,8 +546,9 @@ def _init_from_rank_ballots( ballots (Sequence[RankBallot,...]): Sequence of ballots. Returns: - tuple[pd.DataFrame, tuple[str | int, ...], dict[str | int, int]]: + tuple[pd.DataFrame, tuple[Candidate, ...], dict[Candidate, int]]: df, candidates_cast, candidate_id_map + Candidates can be strings, integers, or mix of both. """ # `rank_ballot_data` sends {Weight, Voter Set} keys to a list to be @@ -630,7 +637,8 @@ def __find_candidates_cast_from_init_rank_df(self, df: pd.DataFrame) -> tuple[Ca df (pd.DataFrame): Dataframe representation of ballots. Returns: - tuple[str | int]: Candidates cast. + tuple[Candidate]: Candidates cast. + Candidates can be strings, integers, or mix of both. """ mask = df["Weight"] > 0 @@ -654,8 +662,9 @@ def _translate_df_ranking_values( Args: df (pd.DataFrame): Dataframe representation of ballots. - candidate_mapping (dict[str | int, int] | dict[int, str | int]): + candidate_mapping (dict[Candidate, int] | dict[int, str | int]): Mapping from candidates names to integer IDs, or vice versa. + Candidates can be strings, integers, or mix of both. Returns: pd.DataFrame: Copy of df with ranking values translated @@ -685,9 +694,11 @@ def _init_from_rank_df( Args: df (pd.DataFrame): Dataframe representation of ballots. candidate_id_map (dict[Candidate, int]): Mapping of candidate names to integer IDs. + Candidates can be strings, integers, or mix of both. Returns - tuple[pd.DataFrame, tuple[str | int]]: df, candidates_cast + tuple[pd.DataFrame, tuple[Candidate]]: df, candidates_cast + Candidates can be strings, integers, or mix of both. """ self.__validate_init_rank_df_params(df) self.__validate_init_rank_df(df) @@ -912,7 +923,8 @@ def __to_rank_csv_ballot_row( ballot (Ballot): Ballot. include_voter_set (bool): Whether or not to include the voter set of each ballot. - candidate_mapping (dict[str, int]): Mapping candidate names to integers. + candidate_mapping (dict[Candidate, int]): Mapping candidate names to + integers. Candidates can be strings, integers, or mix of both. weight_precision (int): Number of decimals to round float weights to. """ @@ -935,7 +947,8 @@ def __to_rank_csv_data_column_names( Args: include_voter_set (bool): Whether or not to include the voter set of each ballot. - candidate_mapping (dict[str, str]): Maps candidate names to prefixes. + candidate_mapping (dict[Candidate, str]): Maps candidate names to + prefixes. Candidates can be strings, integers, or mix of both. """ assert self.max_ranking_length is not None data_col_names = [f"Ranking_{i + 1}" for i in range(self.max_ranking_length)] @@ -1114,14 +1127,17 @@ def __update_ballot_scores_data( Update the score data from a ballot. Args: - ballot_data (dict[str | int, list]): Dictionary storing ballot data. + ballot_data (dict[Candidate, list]): Dictionary storing ballot data. Dictionary keys represent the column names. The candidate columns will be their integer ids. + Candidates can be strings, integers, or mix of both. idx (int): Index of ballot. ballot (ScoreBallot): Ballot. - candidates_cast (list[str]): List of candidates who have received votes. + candidates_cast (list[Candidate]): List of candidates who have received votes. + Candidates can be strings, integers, or mix of both. num_ballots (int): Total number of ballots. - candidate_id_map (dict[str | int, int]): Mapping of candidates to integer IDs. + candidate_id_map (dict[Candidate, int]): Mapping of candidates to integer IDs. + Candidates can be strings, integers, or mix of both. """ if ballot.scores is None: return @@ -1154,14 +1170,17 @@ def __update_score_ballot_data_attrs( Update all ballot data from a ballot. Args: - ballot_data (dict[str | int, list]): Dictionary storing ballot data. + ballot_data (dict[Candidate, list]): Dictionary storing ballot data. Dictionary keys represent the column names. The candidate column names will be their integer IDs. + Candidates can be strings, integers, or mix of both. idx (int): Index of ballot. ballot (ScoreBallot): Ballot. - candidates_cast (list[str]): List of candidates who have received votes. + candidates_cast (list[Candidate]): List of candidates who have received votes. + Candidates can be strings, integers, or mix of both. num_ballots (int): Total number of ballots. - candidate_id_map (dict[str | int, int]): Mapping of candidates to integer IDs. + candidate_id_map (dict[Candidate, int]): Mapping of candidates to integer IDs. + Candidates can be strings, integers, or mix of both. """ score_ballot_data["Weight"][idx] = ballot.weight @@ -1189,6 +1208,7 @@ def __init_score_ballot_data( Returns: Tuple[int, dict[str | int, list]]: num_ballots, score_ballot_data + Dictionary keys include the candidates integer IDs. """ num_ballots = len(ballots) @@ -1217,9 +1237,11 @@ def __init_formatted_score_df( Args: score_ballot_data (dict[str | int, list]): Dictionary storing ballot data. Dictionary keys represent the column names. - The candidate columns will be their integer ids. - candidates_cast (list[str]): List of candidates who received votes. - candidate_id_map (dict[str | int, int]): Mapping of candidates to integer IDs. + The candidate columns will be their integer IDs. + candidates_cast (list[Candidate]): List of candidates who received votes. + Candidates can be strings, integers, or mix of both. + candidate_id_map (dict[Candidate, int]): Mapping of candidates to integer IDs. + Candidates can be strings, integers, or mix of both. Returns: pd.DataFrame: Dataframe of profile. @@ -1254,11 +1276,13 @@ def _init_from_score_ballots( Args: ballots (Sequence[ScoreBallot,...]): Tuple of ballots. - candidate_id_map (dict[str | int, int]): Mapping of candidate names to integer IDs. + candidate_id_map (dict[Candidate, int]): Mapping of candidate names to integer IDs. + Candidates can be strings, integers, or mix of both. Returns: - tuple[pd.DataFrame, tuple[str | int, ...], dict[str | int, int]]: + tuple[pd.DataFrame, tuple[Candidate, ...], dict[Candidate, int]]: df, candidates_cast, candidate_id_map + Candidates can be strings, integers, or mix of both. """ # `score_ballot_data` sends {Weight, Voter Set} keys to a list to be @@ -1367,10 +1391,13 @@ def _init_from_score_df( Args: df (pd.DataFrame): Dataframe representation of ballots. + candidate_id_map (dict[Candidate, int]): Mapping of candidate names to integer IDs. + Candidates can be strings, integers, or mix of both. Returns - tuple[pd.DataFrame, tuple[str], dict[Candidate, int]]: + tuple[pd.DataFrame, tuple[Candidate], dict[Candidate, int]]: df, candidates_cast, candidate_id_map + Candidates can be strings, integers, or mix of both. """ self.__validate_init_score_df_params(df) self.__validate_init_score_df(df) @@ -1504,7 +1531,7 @@ def __to_score_csv_header( Args: candidate_mapping (dict[Candidate, str]): Candidate name mapped to integer IDs. integer IDs are cast to strings for csv. - Candidate can be str or int. + Candidates can be strings, integers, or mix of both. include_voter_set (bool): Whether or not to include the voter set of each ballot. """ diff --git a/src/votekit/pref_profile/utils.py b/src/votekit/pref_profile/utils.py index 6723c8e3..9a3c65eb 100644 --- a/src/votekit/pref_profile/utils.py +++ b/src/votekit/pref_profile/utils.py @@ -83,7 +83,8 @@ def convert_row_to_score_ballot(row: pd.Series, candidates: tuple[Candidate, ... Args: row (pd.Series): Row of a profile.df. - candidates (tuple[str | int,...]): The name of the candidates. + candidates (tuple[Candidate,...]): The name of the candidates. + Candidates can be strings, integers, or mix of both. Returns: ScoreBallot: Ballot corresponding to the row of the df. @@ -107,7 +108,8 @@ def _df_to_rank_ballot_tuple( Args: df (pd.DataFrame): A profile.df. - candidates (tuple[str | int,...]): The candidates. + candidates (tuple[Candidate,...]): The candidates. + Candidates can be strings, integers, or mix of both. max_ranking_length (int, optional): The maximum length of a ranking. Defaults to 0, which is used for ballots with no ranking. @@ -209,8 +211,9 @@ def rank_profile_to_ranking_dict( weight. Defaults to False. Returns: - dict[tuple[frozenset[str | int],...], float]: + dict[tuple[frozenset[Candidate],...], float]: A dictionary with candidate rankings (keys) and corresponding total weights (values). + Candidates can be strings, integers, or mix of both. Raises: TypeError: Profile must be a RankProfile. @@ -244,8 +247,9 @@ def score_profile_to_scores_dict( weight. Defaults to False. Returns: - dict[tuple[tuple[str | int, float], ...] | None, float]: + dict[tuple[tuple[Candidate, float], ...] | None, float]: A dictionary with candidate scores (keys) and corresponding total weights (values). + Candidates can be strings, integers, or mix of both. Raises: TypeError: Profile must be a ScoreProfile. diff --git a/src/votekit/representation_scores.py b/src/votekit/representation_scores.py index 8af517f5..54e0c0f6 100644 --- a/src/votekit/representation_scores.py +++ b/src/votekit/representation_scores.py @@ -21,7 +21,7 @@ def r_representation_score( r (int): Consider a voter represented if a member of the candidate_list is in one of the top r positions of their ballot. Typical choices are 1, the number of seats, or the max ballot length. - candidate_list (Sequence[str | int]): List of candidates to consider. + candidate_list (Sequence[Candidate]): List of candidates to consider. Candidates can be strings, integers, or mix of both. Returns: @@ -79,9 +79,9 @@ def winner_sets_r_representation_scores( r (int): Consider a voter represented if a member of the candidate_set is in one of the top r positions of their ballot. Typical choices are 1, the number of seats, or the max ballot length. - candidate_list (Sequence[str | int], optional): + candidate_list (Sequence[Candidate], optional): List of candidates to consider as possible winners. - Candidates can be either strings, integers, or a mix of both. + Candidates can be either strings, integers, or mix of both. Defaults to None, in which case all candidates who received at least one vote are used. Returns: diff --git a/src/votekit/utils.py b/src/votekit/utils.py index c2d3fe10..2826f265 100644 --- a/src/votekit/utils.py +++ b/src/votekit/utils.py @@ -63,9 +63,10 @@ def ballots_by_first_cand(profile: RankProfile) -> dict[Candidate, list[RankBall profile (RankProfile): Profile to partititon. Returns: - dict[str | int, list[RankBallot]]: + dict[Candidate, list[RankBallot]]: A dictionary whose keys are candidates and values are lists of ballots that have that candidate first. + Candidates can be strings, integers, or mix of both. """ if not isinstance(profile, RankProfile): raise TypeError("Ballots must have rankings.") @@ -189,8 +190,9 @@ def _score_dict_from_rankings_df_no_ties( the profile. If it is shorter, we add 0s. Returns: - dict[str | int, float]: + dict[Candidate, float]: Dictionary mapping candidates to scores. + Candidates can be strings, integers, or mix of both. """ validate_score_vector(score_vector) @@ -266,8 +268,9 @@ def score_dict_from_score_vector( receive the points for 4th place. Returns: - dict[str | int, float]: + dict[Candidate, float]: Dictionary mapping candidates to scores. + Candidates can be strings, integers, or mix of both. """ validate_score_vector(score_vector) @@ -337,8 +340,9 @@ def _first_place_votes_from_df_no_ties( profile (RankProfile): The profile to compute first place votes for. Returns: - dict[str | int, float]: + dict[Candidate, float]: Dictionary mapping candidates to number of first place votes. + Candidates can be strings, integers, or mix of both. """ # equiv to score vector of (1,0,0,...) assert profile.max_ranking_length is not None @@ -419,8 +423,9 @@ def borda_scores( receive the points for 4th place. Returns: - dict[str | int, float]: + dict[Candidate, float]: Dictionary mapping candidates to Borda scores. + Candidates can be strings, integers, or mix of both. """ if not isinstance(profile, RankProfile): raise TypeError("Profile must be of type RankProfile.") @@ -447,7 +452,8 @@ def tiebreak_set( profile. Rule 4: lex/lexicographic/alph/alphabetical; break the tie alphabetically. Args: - r_set (frozenset[str | int]): Set of candidates on which to break tie. + r_set (frozenset[Candidate]): Set of candidates on which to break tie. + Candidates can be strings, integers, or mix of both. profile (RankProfile, optional): Profile used to break ties in first-place votes or Borda setting. Defaults to None, which implies a random tiebreak. tiebreak (str): Tiebreak method to use. Options are "random", "first_place", and @@ -466,6 +472,7 @@ def tiebreak_set( Returns: tuple[frozenset[Candidate],...]: tiebroken ranking + Candidates can be strings, integers, or mix of both. """ if tiebreak in ["alphabetical", "lexicographic", "alph", "lex"]: if any(isinstance(cand, int) for cand in r_set): @@ -532,18 +539,19 @@ def tiebroken_ranking( Breaks ties in a list-of-sets ranking according to a given scheme. Args: - ranking (tuple[frozenset[str | int]]): A list-of-set ranking of candidates. + ranking (tuple[frozenset[Candidate]]): A list-of-set ranking of candidates. + Candidates can be strings, integers, or mix of both. profile (RankProfile, optional): Profile used to break ties in first-place votes or Borda setting. Defaults to None, which implies a random tiebreak. tiebreak (str, optional): Method of tiebreak, currently supports 'random', 'borda', 'first_place'. Defaults to random. Returns: - tuple[tuple[frozenset[str | int], ...], dict[frozenset[str | int] - tuple[frozenset[str | int],...]]]: + tuple[tuple[frozenset[Candidate], ...], dict[frozenset[Candidate] + tuple[frozenset[Candidate],...]]]: The first entry of the tuple is a list-of-set ranking of candidates (broken down to one candidate sets). The second entry is a dictionary that maps tied sets to their - resolution. + resolution. Candidates can be strings, integers, or mix of both. """ new_ranking: list[frozenset[Candidate]] = [frozenset()] * len([c for s in ranking for c in s]) @@ -569,14 +577,16 @@ def score_dict_to_ranking( Sorts candidates into a tuple of frozensets ranking based on a scoring dictionary. Args: - score_dict (dict[str | int, float] | dict[str, float] | dict[int, float]): + score_dict (dict[Candidate, float] | dict[str, float] | dict[int, float]): Dictionary between candidates and their score. + Candidates can be strings, integers, or mix of both. sort_high_low (bool, optional): How to sort candidates based on scores. True sorts from high to low. Defaults to True. Returns: - tuple[frozenset[str | int],...]: Candidate rankings in a list-of-sets form. + tuple[frozenset[Candidate],...]: Candidate rankings in a list-of-sets form. + Candidates can be strings, integers, or mix of both. """ score_to_cand: dict[float, list[Candidate]] = {s: [] for s in score_dict.values()} @@ -614,8 +624,9 @@ def elect_cands_from_set_ranking( is a tie set and whose second entry is the resolution of the tie. Args: - ranking (Sequence[frozenset[str | int] | set[str | int] | set[str] | set[int]]): + ranking (Sequence[frozenset[Candidate] | set[Candidate] | set[str] | set[int]]): A list-of-set ranking of candidates. + Candidates can be strings, integers, or mix of both. n_seats (int): Number of seats to elect. profile (RankProfile, optional): Profile used to break ties in first-place votes or Borda setting. Defaults to None, which implies a random tiebreak. @@ -623,11 +634,12 @@ def elect_cands_from_set_ranking( 'first_place'. Defaults to None, which does not break ties. Returns: - tuple[tuple[frozenset[str | int]]], list[tuple[frozenset[str | int]], - Optional[tuple[frozenset[str | int], tuple[frozenset[str | int], ...]]]: + tuple[tuple[frozenset[Candidate]]], list[tuple[frozenset[Candidate]], + Optional[tuple[frozenset[Candidate], tuple[frozenset[Candidate], ...]]]: A list-of-sets of elected candidates, a list-of-sets of remaining candidates, and a tuple whose first entry is a tie set and whose second entry is the resolution of the tie. If no ties were broken, the tuple returns None. + Candidates can be strings, integers, or mix of both. """ if n_seats < 1: raise ValueError("n_seats must be strictly positive") @@ -921,6 +933,7 @@ def build_df_from_ballot_samples( sampled frequency. The keys should be in candidate id form candidates : list of candidates in the profile + Candidates can be strings, integers, or mix of both. returns: pandas df """ From 9622fea9e12034d75ad88e4e973fd50880ba110b Mon Sep 17 00:00:00 2001 From: Grace Gibson Date: Fri, 19 Jun 2026 11:12:18 -0500 Subject: [PATCH 09/11] update docstring for _convert_dict_to_matrix --- src/votekit/matrices/_utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/votekit/matrices/_utils.py b/src/votekit/matrices/_utils.py index 14aba82f..24419260 100644 --- a/src/votekit/matrices/_utils.py +++ b/src/votekit/matrices/_utils.py @@ -16,8 +16,10 @@ def _convert_dict_to_matrix( Will respect the order of the dictionaries. Args: - data_dict (dict[str, dict[str, Any]]): Top level keys are rows, bottom level keys are columns. + data_dict (dict[Candidate, dict[Candidate, Any]] | dict[str, dict[str, Any]] + | dict[int, dict[int, Any]]): Top level keys are rows, bottom level keys are columns. Values must be convertable to float. + Candidates can be strings, integers, or mix of both. Returns np.ndarray: Matrix representing data in dictionary. From 0be9799e6d3056105540d9eef96e36b8af4806c4 Mon Sep 17 00:00:00 2001 From: Grace Gibson Date: Thu, 25 Jun 2026 14:46:35 -0500 Subject: [PATCH 10/11] fix candidate docstrings, add lexico sort for mixed candidates, add simultaneous veto and ranked pairs to mixed cand testing --- src/votekit/animations.py | 51 ++++++++++--------- src/votekit/ballot.py | 12 ++--- .../ballot_generator/std_generator/spacial.py | 12 ++--- .../cleaning/rank_profiles_cleaning.py | 19 +++---- .../cleaning/score_profiles_cleaning.py | 10 ++-- .../election_types/ranking/plurality_veto.py | 11 ++-- .../election_types/ranking/ranked_pairs.py | 5 +- .../election_types/ranking/schulze.py | 5 +- .../ranking/simultaneous_veto.py | 5 +- .../election_types/ranking/stv/stv.py | 4 +- .../graphs/pairwise_comparison_graph.py | 36 ++++++------- src/votekit/matrices/candidate/comentions.py | 28 +++++----- src/votekit/metrics/distances.py | 7 ++- src/votekit/models.py | 4 +- .../plots/profiles/multi_profile_bar_plot.py | 27 ++++++---- .../plots/profiles/profile_bar_plot.py | 49 ++++++++++-------- .../pref_profile/cleaned_pref_profile.py | 5 +- src/votekit/pref_profile/pref_profile.py | 29 +++++++---- src/votekit/sorting.py | 48 +++++++++++++++++ src/votekit/types.py | 1 + src/votekit/utils.py | 21 +++----- tests/ballot/test_ScoreBallot.py | 2 +- tests/test_mixed_candidates.py | 12 ++++- 23 files changed, 245 insertions(+), 158 deletions(-) create mode 100644 src/votekit/sorting.py diff --git a/src/votekit/animations.py b/src/votekit/animations.py index 81f91143..ac930a98 100644 --- a/src/votekit/animations.py +++ b/src/votekit/animations.py @@ -223,22 +223,24 @@ class STVAnimation: election (STV): An STV election to animate. title (str, optional): Text to be displayed at the beginning of the animation as a title screen. If ``None``, the title screen will be skipped. Defaults to ``None``. - focus (set[str], list[str], "winners", "viable", or "all", optional): A set or list of - names of candidates that should appear on-screen. This is useful for elections - with many candidates. Note that any candidates that won the election are on-screen - automatically, so passing an empty set will result in only elected candidates - appearing on-screen. If ``"winners"``, focus only the elected candidates. - If ``"viable"``, focus only the candidates with more mentions than the election - threshold. If ``"all"``, focus all candidates. Defaults to ``"viable"``. + focus (list[Candidate] | list[str] | list[int] | set[Candidate] | set[str] | set[int], + "winners", "viable", or "all", optional): An iterable of names of candidates that should + appear on-screen. Candidates can be strings, integers, or mix of both. + This is useful for elections with many candidates. Note that any candidates that won the + election are on-screen automatically, so passing an empty set will result in only + elected candidates appearing on-screen. If ``"winners"``, focus only the elected + candidates. If ``"viable"``, focus only the candidates with more mentions than the + election threshold. If ``"all"``, focus all candidates. Defaults to ``"viable"``. nicknames (Optional[dict[Candidate,str] | dict[str, str] | dict[int, str]], optional): - A dictionary mapping candidate names to candidate "nicknames" - to be used in the animation instead. + A dictionary mapping candidate names to candidate "nicknames" to be used in the + animation instead. Candidates can be strings, integers, or mix of both. The keys of ``nicknames`` need not contain every candidate, only the ones for which the user would like to provide a nickname. + Candidates can be strings, integers, or mix of both. candidate_colors (Optional[Mapping[Candidate, ParsableManimColor] | Mapping[str, ParsableManimColor]] | Mapping[int, ParsableManimColor], optional): - A dictionary mapping candidate names to colors - that should represent them in the animation. + A dictionary mapping candidate names to colors that should represent them in the + animation. Candidates can be strings, integers, or mix of both. The colors in ``candidate_colors`` will override the bar fill colors provided by ``color_palette``. The keys of ``candidate_colors`` need not contain every candidate, only the ones for which the user would like to provide @@ -255,13 +257,13 @@ class STVAnimation: Attributes: title (str, optional): Text to be displayed at the beginning of the animation as a title screen. - focus (set[str]): A set of names of candidates that should appear on-screen. + focus (set[Candidate]): A set of names of candidates that should appear on-screen. nicknames (dict[str,str], optional): A dictionary mapping candidate names to candidate "nicknames" to be used in the animation instead. color_palette (ColorPalette, optional): A color palette to use for the animation. candidate_dict (dict[Candidate, dict[Candidate, object]]): A dictionary mapping - each candidate name to a dictionary - recording that candidate's support, display name, and color. + each candidate name to a dictionary recording that candidate's support, display name, + and color. Candidates can be strings, integers, or mix of both. events (List[_AnimationEvent]): A list of animation events in order of occurrence. font (str): The name of a font that the user prefers to use if available. delay_mult (float): A multiplier for the delay times between animations. @@ -278,12 +280,12 @@ def __init__( self, election: STV, title: Optional[str] = None, - focus: set[Candidate] + focus: list[Candidate] + | list[str] + | list[int] + | set[Candidate] | set[str] | set[int] - | List[Candidate] - | List[str] - | List[int] | Literal["winners", "viable", "all"] = "viable", nicknames: Optional[dict[Candidate, str] | dict[str, str] | dict[int, str]] = None, candidate_colors: Optional[ @@ -382,12 +384,14 @@ def _make_candidate_dict( Args: election (STV): An STV election from which to extract the candidates. - candidate_colors (Mapping[str, ParsableManimColor]): A dictionary mapping candidate - names to their associated color codes in the candidate dictionary. - + candidate_colors (Mapping[Candidate, ParsableManimColor] | + Mapping[str, ParsableManimColor] | Mapping[int, ParsableManimColor]): A dictionary + mapping candidate names to their associated color codes in the candidate dictionary. + Candidates can be strings, integers, or mix of both. Returns: dict[Candidate, dict[str,object]]: A dictionary whose keys are candidate names and whose values are themselves dictionaries with details about each candidate. + Candidates can be strings, integers, or mix of both. """ # Initialize dictionary and add "support" key for each candidate. candidate_dict: dict[Candidate, dict[str, object]] = { @@ -508,7 +512,7 @@ def _get_transferred_votes( election (STV): The election. round_number (int): The number of the round in question. cands_transferred_from (List[Candidate]): A list of the names of the elected or - eliminated candidates. + eliminated candidates. Candidates can be strings, integers, or mix of both. event_type (Literal["win", "elimination"]): ``"win"`` if candidates were elected this round, ``"elimination"`` otherwise. @@ -709,6 +713,7 @@ class ElectionScene(manim.Scene): Args: candidate_dict (dict[Candidate,dict]): A dictionary mapping each candidate to a dictionary of attributes of the candidate. + Candidates can be strings, integers, or mix of both. events (List[_AnimationEvent]): A list of animation events to be constructed and rendered. title (Optional[str], optional): A string to be displayed at the beginning of the animation as a title screen. If ``None``, the animation will skip the title @@ -1079,7 +1084,7 @@ def _animate_win(self, cands_transferred_from: dict[Candidate, dict], event: _Wi Args: cands_transferred_from (dict[Candidate,dict]): A dictionary in which the keys are the candidates elected this round and the values are dictionaries recording - the candidate's attributes. + the candidate's attributes. Candidates can be strings, integers, or mix of both. event (_WinEvent): The event to be animated. """ # Box the winners' names diff --git a/src/votekit/ballot.py b/src/votekit/ballot.py index 1e3d1632..497375b3 100644 --- a/src/votekit/ballot.py +++ b/src/votekit/ballot.py @@ -35,8 +35,8 @@ class Ballot: Attributes: ranking (Optional[tuple[frozenset[Candidate], ...]]): Tuple of candidate ranking. - Entry i of the tuple is a - frozenset of candidates ranked in position i. + Entry i of the tuple is a frozenset of candidates ranked in position i. + Candidates can be strings, integers, or mix of both. weight (float): Weight assigned to a given ballot. voter_set (frozenset[str]): Set of voters who cast the ballot. scores (Optional[Mapping[Candidate, float | int]): Scores for individual candidates. @@ -59,7 +59,7 @@ class Ballot: def __new__( cls, *, - ranking: Sequence[Candidate | Iterable[Candidate]], + ranking: RankingLike, scores: None = None, weight: Union[float, int] = 1.0, voter_set: Union[set[str], frozenset[str]] = frozenset(), @@ -358,11 +358,7 @@ def __eq__(self, other): def __hash__(self): return ( - hash( - tuple(sorted((c, s) for c, s in self.scores.items())) - if self.scores is not None - else self.scores - ) + hash(frozenset(self.scores.items()) if self.scores is not None else self.scores) + super().__hash__() ) diff --git a/src/votekit/ballot_generator/std_generator/spacial.py b/src/votekit/ballot_generator/std_generator/spacial.py index c51dbd94..81c23872 100644 --- a/src/votekit/ballot_generator/std_generator/spacial.py +++ b/src/votekit/ballot_generator/std_generator/spacial.py @@ -20,7 +20,7 @@ from votekit.metrics import euclidean_dist from votekit.pref_profile import RankProfile -from votekit.types import Candidate +from votekit.types import Candidate, CandidateListLike # ================================================= # ================= API Functions ================= @@ -195,7 +195,7 @@ def spacial_profile_and_positions_generator( def clustered_spacial_profile_and_positions_generator( number_of_ballots: dict[Candidate, int] | dict[str, int] | dict[int, int], - candidates: list[Candidate] | list[str] | list[int], + candidates: CandidateListLike, voter_dist: Callable[..., np.ndarray] = np.random.normal, voter_dist_kwargs: Optional[Dict[str, Any]] = None, candidate_dist: Callable[..., np.ndarray] = np.random.uniform, @@ -220,10 +220,10 @@ def clustered_spacial_profile_and_positions_generator( Args: number_of_ballots (dict[Candidate, int] | dict[str, int] | dict[int, int]): - The number of voters attributed to each candidate - {candidate string or integer: # voters}. + The number of voters attributed to each candidate {candidate: # voters}. + Candidates can be strings, integers, or mix of both. candidates (list[Candidate] | list[str] | list[int]): Candidate names - used when building rankings. + used when building rankings. Candidates can be strings, integers, or mix of both. voter_dist (Callable[..., np.ndarray], optional): Distribution sampler used to draw voter positions centered at each candidate location. Defaults to ``np.random.normal``. @@ -248,7 +248,7 @@ def clustered_spacial_profile_and_positions_generator( A tuple containing the preference profile object, a dictionary with each candidate's position in the metric space, and a matrix where each row is a single voter's position - in the metric space. + in the metric space. Candidates can be strings, integers, or mix of both. """ _number_of_ballots: dict[Candidate, int] = cast(dict[Candidate, int], number_of_ballots) diff --git a/src/votekit/cleaning/rank_profiles_cleaning.py b/src/votekit/cleaning/rank_profiles_cleaning.py index c33be372..66d95638 100644 --- a/src/votekit/cleaning/rank_profiles_cleaning.py +++ b/src/votekit/cleaning/rank_profiles_cleaning.py @@ -9,7 +9,7 @@ ProfileError, RankProfile, ) -from votekit.types import Candidate +from votekit.types import Candidate, CandidateListLike def _iterate_and_clean_ranking_tuples( @@ -196,15 +196,16 @@ def remove_repeat_cands_rank_profile( def remove_cand_from_ranking_row( - removed: Candidate | list[Candidate] | list[str] | list[int], + removed: Candidate | CandidateListLike, ranking_tup: tuple[frozenset, ...], ) -> tuple[frozenset, ...]: """ Removes specified candidate(s) from ranking. Does not condense the resulting ranking. Args: - removed (str | int | list[str | int] | list[str] | list[int]): + removed (Candidate | list[Candidate] | list[str] | list[int]): Candidate or list of candidates to be removed. + Candidates can be strings, integers, or mix of both. ranking_tup (tuple): Ranking to remove candidates from. Returns: @@ -399,9 +400,7 @@ def condense_rank_profile( ) -def _is_equiv_for_remove_and_condense( - removed: list[Candidate] | list[str] | list[int], ranking: pd.Series -) -> bool: +def _is_equiv_for_remove_and_condense(removed: CandidateListLike, ranking: pd.Series) -> bool: """ Returns True if the given ranking is equivalent to its removed and condensed form. It is equivalent if the ranking has no candidate in the removed list and either no empty @@ -409,7 +408,8 @@ def _is_equiv_for_remove_and_condense( in the removed list, it is not equivalent. Args: - removed (list[str | int] | list[str] | list[int]): Candidates to be removed. + removed (list[Candidate] | list[str] | list[int]): Candidates to be removed. + Candidates can be strings, integers, or mix of both. ranking (pd.Series): Ranking to check. Returns: @@ -441,7 +441,7 @@ def _is_equiv_for_remove_and_condense( def remove_and_condense_rank_profile( - removed: Candidate | list[Candidate] | list[str] | list[int], + removed: Candidate | CandidateListLike, profile: RankProfile, remove_empty_ballots: bool = True, remove_zero_weight_ballots: bool = True, @@ -461,8 +461,9 @@ def remove_and_condense_rank_profile( is handled correctly, and that ballot equivalence is checked. Args: - removed (str | int | list[str | int] | list[str] | list[int]): + removed (Candidate | list[Candidate] | list[str] | list[int]): Candidate or list of candidates to be removed. + Candidates can be strings, integers, or mix of both. profile (RankProfile): Profile to remove repeated candidates from. remove_empty_ballots (bool, optional): Whether or not to remove ballots that have no ranking or scores as a result of cleaning. Defaults to True. diff --git a/src/votekit/cleaning/score_profiles_cleaning.py b/src/votekit/cleaning/score_profiles_cleaning.py index 8226e37b..56de9742 100644 --- a/src/votekit/cleaning/score_profiles_cleaning.py +++ b/src/votekit/cleaning/score_profiles_cleaning.py @@ -1,10 +1,11 @@ from functools import partial -from typing import Callable, Union +from typing import Callable import numpy as np import pandas as pd from votekit.pref_profile import CleanedScoreProfile, ProfileError, ScoreProfile +from votekit.types import Candidate, CandidateListLike def _iterate_and_clean_score_tuples( @@ -139,7 +140,7 @@ def remove_cand_from_score_tuple( def remove_cand_score_profile( - removed: Union[str, list], + removed: Candidate | CandidateListLike, profile: ScoreProfile, remove_empty_ballots: bool = True, remove_zero_weight_ballots: bool = True, @@ -155,7 +156,8 @@ def remove_cand_score_profile( about which ballots were adjusted. Args: - removed (Union[str, list]): Candidate or list of candidates to be removed. + removed (Candidate | list[Candidate] | list[str] | list[int]): Candidate or list of + candidates to be removed. Candidates can be strings, integers, or mix of both. profile (ScoreProfile): Profile to remove candidates from. remove_empty_ballots (bool, optional): Whether or not to remove ballots that have no ranking or scores as a result of cleaning. Defaults to True. @@ -172,7 +174,7 @@ def remove_cand_score_profile( Raises: ProfileError: Profile must only contain score ballots. """ - if isinstance(removed, str): + if isinstance(removed, Candidate): removed = [removed] removed_idxs = [i for i, c in enumerate(profile.df.columns) if c in removed] diff --git a/src/votekit/elections/election_types/ranking/plurality_veto.py b/src/votekit/elections/election_types/ranking/plurality_veto.py index c974060a..e7d4a12f 100644 --- a/src/votekit/elections/election_types/ranking/plurality_veto.py +++ b/src/votekit/elections/election_types/ranking/plurality_veto.py @@ -313,9 +313,8 @@ def _veto_loop( Returns: tuple[frozenset[Candidate], frozenset[Candidate]]: A tuple of (eliminated, elected), - where eliminated contains candidates worthy of elimination - and elected contains candidates worthy of election. - Candidates can be strings, integers, or mix of both. + where eliminated contains candidates worthy of elimination and elected contains + candidates worthy of election. Candidates can be strings, integers, or mix of both. """ raise NotImplementedError @@ -449,11 +448,13 @@ def _veto_loop( Each voter decrements the score of their least favorite remaining candidate. Args: - scores (dict[str | int, float]): Mutable score dict, modified in place. + scores (dict[Candidate, float]): Mutable score dict, modified in place. + Candidates can be strings, integers, or mix of both. Returns: - tuple[frozenset[str | int], frozenset[str | int]]: A tuple of (eliminated, elected), + tuple[frozenset[Candidate], frozenset[Candidate]]: A tuple of (eliminated, elected), where each is a set of candidates worthy of elimination or election, respectively. + Candidates can be strings, integers, or mix of both. """ eliminated: set[Candidate] = set() diff --git a/src/votekit/elections/election_types/ranking/ranked_pairs.py b/src/votekit/elections/election_types/ranking/ranked_pairs.py index d8cf9d80..7d589f77 100644 --- a/src/votekit/elections/election_types/ranking/ranked_pairs.py +++ b/src/votekit/elections/election_types/ranking/ranked_pairs.py @@ -8,6 +8,7 @@ pairwise_dict, ) from votekit.pref_profile import RankProfile +from votekit.sorting import sort_candidates_lexicographically from votekit.types import Candidate from votekit.utils import tiebreak_set @@ -121,7 +122,9 @@ def _run_step( ) ordered_candidates = [ - candidate for candidate_set in dominating_tiers for candidate in sorted(candidate_set) + candidate + for candidate_set in dominating_tiers + for candidate in sort_candidates_lexicographically(candidate_set) ] elected = tuple(frozenset({c}) for c in ordered_candidates[: self.n_seats]) diff --git a/src/votekit/elections/election_types/ranking/schulze.py b/src/votekit/elections/election_types/ranking/schulze.py index 15a00042..c4e16b61 100644 --- a/src/votekit/elections/election_types/ranking/schulze.py +++ b/src/votekit/elections/election_types/ranking/schulze.py @@ -9,6 +9,7 @@ pairwise_dict, ) from votekit.pref_profile import RankProfile +from votekit.sorting import sort_candidates_lexicographically from votekit.types import Candidate from votekit.utils import tiebreak_set @@ -155,8 +156,8 @@ def _run_step( ordered_candidates.extend(candidate_set) else: tier_key = frozenset(candidate_set) - for s in tiebreak_resolutions[tier_key]: - ordered_candidates.extend(sorted(s)) + for cand_set in tiebreak_resolutions[tier_key]: + ordered_candidates.extend(sort_candidates_lexicographically(cand_set)) elected = tuple(frozenset({c}) for c in ordered_candidates[: self.n_seats]) remaining = tuple(frozenset({c}) for c in ordered_candidates[self.n_seats :]) diff --git a/src/votekit/elections/election_types/ranking/simultaneous_veto.py b/src/votekit/elections/election_types/ranking/simultaneous_veto.py index efaaba1f..1a813b9a 100644 --- a/src/votekit/elections/election_types/ranking/simultaneous_veto.py +++ b/src/votekit/elections/election_types/ranking/simultaneous_veto.py @@ -12,6 +12,7 @@ from votekit.elections.election_state import ElectionState from votekit.elections.election_types.ranking.abstract_ranking import RankingElection from votekit.pref_profile import RankProfile +from votekit.sorting import sort_candidates_lexicographically from votekit.types import Candidate, CandidateFloatDictLike from votekit.utils import ( borda_scores, @@ -129,7 +130,7 @@ def __init__( self.candidates = frozenset(grouped_profile.candidates_cast) self._eliminated: set[Candidate] = set("~") - self._sorted_candidates = tuple(sorted(self.candidates)) + self._sorted_candidates = tuple(sort_candidates_lexicographically(self.candidates)) self._candidate_to_idx = {c: i for i, c in enumerate(self._sorted_candidates)} # unmentioned candidates are considered tied for last place @@ -658,7 +659,7 @@ def _run_step( else: elected = tiebreaks[remaining_set][: self.n_seats] # elect top n_seats else: - assert isinstance(eliminated_candidate, str) + assert isinstance(eliminated_candidate, Candidate) eliminated = (frozenset((eliminated_candidate,)),) new_profile = remove_and_condense_rank_profile( removed=eliminated_candidate, diff --git a/src/votekit/elections/election_types/ranking/stv/stv.py b/src/votekit/elections/election_types/ranking/stv/stv.py index 122cbcfe..a16826b6 100644 --- a/src/votekit/elections/election_types/ranking/stv/stv.py +++ b/src/votekit/elections/election_types/ranking/stv/stv.py @@ -730,11 +730,11 @@ def __init__( Args: profile (RankProfile): RankProfile to run election on. n_seats (int): Number of seats to be elected. Defaults to 1. - transfer (Callable[[str | int, float, Union[tuple[RankBallot], list[RankBallot]], int], + transfer (Callable[[Candidate, float, Union[tuple[RankBallot], list[RankBallot]], int], tuple[RankBallot, ...]]): Transfer method. Defaults to fractional transfer. Function signature is elected candidate, their number of first-place votes, the list of ballots with them ranked first, and the threshold value. Returns the list of - ballots after transfer. + ballots after transfer. Candidates can be strings, integers, or mix of both. quota (QuotaType, optional): Formula to calculate quota. Accepts "droop" or "hare". Defaults to "droop". simultaneous (bool, optional): True if all candidates who cross threshold in a round are diff --git a/src/votekit/graphs/pairwise_comparison_graph.py b/src/votekit/graphs/pairwise_comparison_graph.py index bbd681b4..8a9baa60 100644 --- a/src/votekit/graphs/pairwise_comparison_graph.py +++ b/src/votekit/graphs/pairwise_comparison_graph.py @@ -1,6 +1,6 @@ from functools import cache from itertools import combinations -from typing import Optional, cast +from typing import Optional, Sequence, cast import matplotlib.patches as mpatches import matplotlib.pyplot as plt @@ -11,6 +11,7 @@ from numpy.typing import NDArray from votekit.pref_profile import RankProfile +from votekit.sorting import sort_candidates_lexicographically from votekit.types import Candidate @@ -106,7 +107,7 @@ def pairwise_dict( candidates_lst = list(profile.candidates_cast) if sort_candidate_pairs: - candidates_lst.sort() + candidates_lst = sort_candidates_lexicographically(candidates_lst) n_cands = len(candidates_lst) @@ -125,12 +126,12 @@ def pairwise_dict( head_to_head_matrix[cand_to_idx[a], cand_to_idx[b]], head_to_head_matrix[cand_to_idx[b], cand_to_idx[a]], ) - for a, b in combinations(sorted(candidates_lst), 2) + for a, b in combinations(sort_candidates_lexicographically(candidates_lst), 2) } return pairwise -def get_dominating_tiers_digraph(graph: nx.DiGraph) -> list[set[str]]: +def get_dominating_tiers_digraph(graph: nx.DiGraph) -> list[set[Candidate]]: """ Compute the dominating tiers of the pairwise comparison graph. Candidates in a tier beat all other candidates in lower tiers in head to head comparisons. @@ -141,7 +142,8 @@ def get_dominating_tiers_digraph(graph: nx.DiGraph) -> list[set[str]]: graph (nx.DiGraph): A directed graph representing pairwise comparisons. Returns: - list[set[str]]: Dominating tiers, where the first entry of the list is the highest tier. + list[set[Candidate]]: Dominating tiers, where the first entry of the list is the highest + tier. Candidates can be strings, integers, or mix of both. """ # Condense the head-to-head cycles so we have a directed acyclic graph (DAG) condensed_acyclic_graph = nx.condensation(graph) @@ -193,13 +195,7 @@ def get_dominating_tiers_digraph(graph: nx.DiGraph) -> list[set[str]]: def restrict_pairwise_dict_to_subset( - cand_subset: list[Candidate] - | tuple[Candidate] - | set[Candidate] - | list[str] - | set[str] - | list[int] - | set[int], + cand_subset: Sequence[Candidate], pairwise_dict: dict[tuple[Candidate, Candidate], tuple[float, float]] | dict[tuple[str, str], tuple[float, float]] | dict[tuple[int, int], tuple[float, float]], @@ -210,17 +206,16 @@ def restrict_pairwise_dict_to_subset( where 'a' denotes the number of times A beats B head to head, and 'b' is the reverse. Args: - cands (list[Candidate] | tuple[Candidate] | set[Candidate] - | list[str] | set[str] | list[int] | set[int]): Candidate subset to restrict to. + cands (Sequence[Candidate]): Candidate subset to restrict to. Candidates can be strings, integers, or mix of both. pairwise_dict (dict[tuple[Candidate, Candidate], tuple[float, float]] | dict[tuple[str, str], tuple[float, float]] | dict[tuple[int, int], tuple[float, float]]): Full pairwise - comparison dictionary. Candidates can be strings, integers, or mix of both + comparison dictionary. Candidates can be strings, integers, or mix of both. Returns: dict[dict[tuple[Candidate, Candidate], tuple[float, float]] : Pairwise dict restricted - to the provided candidates. Candidates can be strings or integers. + to the provided candidates. Candidates can be strings, integers, or mix of both. Raises: ValueError: cand_subset must be at least length 2. @@ -320,13 +315,14 @@ def ties_or_beats(self, candidate: str) -> set[str]: return set(self.pairwise_graph.predecessors(candidate)) @cache - def get_dominating_tiers(self) -> list[set[str]]: + def get_dominating_tiers(self) -> list[set[Candidate]]: """ Compute the dominating tiers of the pairwise comparison graph. Candidates in a tier beat all other candidates in lower tiers in head to head comparisons. Returns: - list[set[str]]: Dominating tiers, where the first entry of the list is the highest tier. + list[set[Candidate]]: Dominating tiers, where the first entry of the list is the highest + tier. Candidates can be strings, integers, or mix of both. """ return get_dominating_tiers_digraph(self.pairwise_graph) @@ -341,12 +337,12 @@ def has_condorcet_winner(self) -> bool: dominating_tiers = self.get_dominating_tiers() return len(dominating_tiers[0]) == 1 - def get_condorcet_winner(self) -> str: + def get_condorcet_winner(self) -> Candidate: """ Returns the condorcet winner. Raises a ValueError if no condorcet winner. Returns: - str: The condorcet winner. + Candidate: The condorcet winner. Candidate can be a string or integer. Raises: ValueError: There is no condorcet winner. diff --git a/src/votekit/matrices/candidate/comentions.py b/src/votekit/matrices/candidate/comentions.py index 7f63038e..948421a6 100644 --- a/src/votekit/matrices/candidate/comentions.py +++ b/src/votekit/matrices/candidate/comentions.py @@ -3,15 +3,15 @@ from votekit.ballot import RankBallot from votekit.matrices._utils import _convert_dict_to_matrix from votekit.pref_profile import RankProfile -from votekit.types import Candidate +from votekit.types import Candidate, CandidateListLike -def comention(cands: Candidate | list[Candidate] | list[str] | list[int], ballot: RankBallot): +def comention(cands: Candidate | CandidateListLike, ballot: RankBallot): """ Takes cands and returns true if they all appear on the ballot in the ranking. Args: - cands (Candidate, list[Candidate] | list[str] | list[int]): + cands (Candidate | list[Candidate] | list[str] | list[int]): Candidate name or list of candidate names. Candidates can be strings, integers, or mix of both. ballot (RankBallot): RankBallot. @@ -30,35 +30,35 @@ def comention(cands: Candidate | list[Candidate] | list[str] | list[int], ballot return set(cands).issubset(all_cands) -def comention_above(i: Candidate, j: Candidate, ballot: RankBallot) -> bool: +def comention_above(cand_a: Candidate, cand_b: Candidate, ballot: RankBallot) -> bool: """ - Takes candidates i,j and returns True if i >= j in the ranking. + Takes two candidates and returns True if cand_a >= cand_b in the ranking. Requires that the ballot has a ranking. Args: - i (Candidate): Candidate name. + above_cand (Candidate): Candidate to check as ranked at or above cand_b. Candidates can be strings, integers, or mix of both. - j (Candidate): Candidate name. + below_cand (Candidate): Candidate to check as ranked at or below cand_a. Candidates can be strings, integers, or mix of both. ballot (RankBallot): RankBallot. Returns: - bool: True if both i and j appear in ballot and i >= j. + bool: True if both cand_a and cand_b appear in ballot and cand_a >= cand_b. """ if not isinstance(ballot, RankBallot): raise TypeError("Ballot must be of type RankBallot.") if ballot.ranking is None: raise TypeError(f"RankBallot must have a ranking: {ballot}") - i_index, j_index = (-1, -1) + cand_a_index, cand_b_index = (-1, -1) for rank, s in enumerate(ballot.ranking): - if i in s: - i_index = rank - if j in s: - j_index = rank + if cand_a in s: + cand_a_index = rank + if cand_b in s: + cand_b_index = rank - return (i_index >= 0 and j_index >= 0) and (i_index <= j_index) + return (cand_a_index >= 0 and cand_b_index >= 0) and (cand_a_index <= cand_b_index) def comentions_matrix( diff --git a/src/votekit/metrics/distances.py b/src/votekit/metrics/distances.py index ece8564a..5375668a 100644 --- a/src/votekit/metrics/distances.py +++ b/src/votekit/metrics/distances.py @@ -6,6 +6,7 @@ from scipy.stats import kendalltau from votekit.pref_profile import RankProfile, rank_profile_to_ranking_dict +from votekit.sorting import sort_candidates_lexicographically def emd_via_scipy_linear_program( @@ -286,7 +287,9 @@ def __build_simultaneous_profile_distribution( profile1 = pp1.group_ballots() profile2 = pp2.group_ballots() - cand_to_index_mapping = {cand: i for i, cand in enumerate(sorted(profile1.candidates))} + cand_to_index_mapping = { + cand: i for i, cand in enumerate(sort_candidates_lexicographically(profile1.candidates)) + } profile_distribution_dict = dict() @@ -463,7 +466,7 @@ def profiles_to_ndarrys(profiles: list[RankProfile]): for i in range(len(profile_dicts)): election = combined_dict | profile_dicts[i] - elect_distr = [float(election[key]) for key in sorted(election.keys())] + elect_distr = [float(election[key]) for key in election.keys()] electn_ndarry[:, i] = elect_distr return electn_ndarry diff --git a/src/votekit/models.py b/src/votekit/models.py index 66bdc5c8..ef4d31b2 100644 --- a/src/votekit/models.py +++ b/src/votekit/models.py @@ -19,11 +19,11 @@ class Election(Generic[P]): Args: profile (PreferenceProfile): The initial profile of ballots. - score_function (Callable[[PreferenceProfile], dict[str, float]], optional): + score_function (Callable[[PreferenceProfile], dict[Candidate, float]], optional): A function that converts profiles to a score dictionary mapping candidates to their current score. Used in creating ElectionState objects and sorting candidates in Round 0. If None, no score dictionary is saved and all candidates are tied in Round 0. - Defaults to None. + Defaults to None. Candidates can be strings, integers, or mix of both. sort_high_low (bool, optional): How to sort candidates based on `score_function`. True sorts from high to low. Defaults to True. diff --git a/src/votekit/plots/profiles/multi_profile_bar_plot.py b/src/votekit/plots/profiles/multi_profile_bar_plot.py index b5b4d83d..24b19c46 100644 --- a/src/votekit/plots/profiles/multi_profile_bar_plot.py +++ b/src/votekit/plots/profiles/multi_profile_bar_plot.py @@ -6,7 +6,7 @@ from votekit.plots.bar_plot import add_null_keys, multi_bar_plot from votekit.pref_profile import PreferenceProfile, RankProfile -from votekit.types import Candidate +from votekit.types import Candidate, CandidateListLike from votekit.utils import ( COLOR_LIST, ballot_lengths, @@ -17,7 +17,6 @@ ProfileT = TypeVar("ProfileT", bound=PreferenceProfile) PlotLabel = str | int -CandidateList: TypeAlias = list[Candidate] | list[str] | list[int] CandidatePlotLabelMapping: TypeAlias = ( Mapping[Candidate, PlotLabel] | Mapping[str, PlotLabel] | Mapping[int, PlotLabel] ) @@ -154,7 +153,7 @@ def multi_profile_borda_plot( normalize: bool = False, profile_colors: Optional[Mapping[str, str]] = None, bar_width: Optional[float] = None, - candidate_ordering: Optional[CandidateList] = None, + candidate_ordering: Optional[CandidateListLike] = None, x_axis_name: Optional[str] = None, y_axis_name: Optional[str] = None, title: Optional[str] = None, @@ -181,17 +180,19 @@ def multi_profile_borda_plot( from ``utils`` module. Dictionary keys can be a subset of the profiles. bar_width (float, optional): Width of bars. Defaults to None which computes the bar width as 0.7 divided by the number of data sets. Must be in the interval :math:`(0,1]`. - candidate_ordering (list[str | int] | list[str] | list[int] | None): Ordering of x-labels. + candidate_ordering (list[Candidate] | list[str] | list[int] | None): Ordering of x-labels. Defaults to decreasing borda scores from the first profile. + Candidates can be strings, integers, or mix of both. x_axis_name (str, optional): Name of x-axis. Defaults to None, which does not plot a name. y_axis_name (str, optional): Name of y-axis. Defaults to None, which does not plot a name. title (str, optional): Title for the figure. Defaults to None, which does not plot a title. show_profile_legend (bool, optional): Whether or not to plot the profile legend. Defaults to False. Is automatically shown if any threshold lines have the keyword "label" passed through ``threshold_kwds``. - candidate_legend (dict[str, str] | dict[str | int, str] | dict[int, str], optional): + candidate_legend (dict[Candidate, str] | dict[str, str] | dict[int, str], optional): Dictionary mapping candidates to relableing. Defaults to None. If provided, generates a second legend for data categories. + Candidates can be strings, integers, or mix of both. relabel_candidates_with_int (bool, optional): Relabel the candidates with integer labels. Defaults to False. If ``candidate_legend`` is passed, those labels supercede. threshold_values (Union[list[float], float], optional): List of values to plot horizontal @@ -268,7 +269,7 @@ def multi_profile_mentions_plot( normalize: bool = False, profile_colors: Optional[Mapping[str, str]] = None, bar_width: Optional[float] = None, - candidate_ordering: Optional[CandidateList] = None, + candidate_ordering: Optional[CandidateListLike] = None, x_axis_name: Optional[str] = None, y_axis_name: Optional[str] = None, title: Optional[str] = None, @@ -295,17 +296,19 @@ def multi_profile_mentions_plot( from ``utils`` module. Dictionary keys can be a subset of the profiles. bar_width (float, optional): Width of bars. Defaults to None which computes the bar width as 0.7 divided by the number of data sets. Must be in the interval :math:`(0,1]`. - candidate_ordering (list[str | int] | list[str] | list[int] | None): Ordering of x-labels. + candidate_ordering (list[Candidate] | list[str] | list[int] | None): Ordering of x-labels. Defaults to order retrieved from score dictionary. + Candidates can be strings, integers, or mix of both. x_axis_name (str, optional): Name of x-axis. Defaults to None, which does not plot a name. y_axis_name (str, optional): Name of y-axis. Defaults to None, which does not plot a name. title (str, optional): Title for the figure. Defaults to None, which does not plot a title. show_profile_legend (bool, optional): Whether or not to plot the profile legend. Defaults to False. Is automatically shown if any threshold lines have the keyword "label" passed through ``threshold_kwds``. - candidate_legend (dict[str | int, str] | dict[str, str] | dict[int, str] | None): + candidate_legend (dict[Candidate, str] | dict[str, str] | dict[int, str] | None): Dictionary mapping candidates to relabeling. Defaults to None. If provided, generates a second legend for data categories. + Candidates can be strings, integers, or mix of both. relabel_candidates_with_int (bool, optional): Relabel the candidates with integer labels. Defaults to False. If ``candidate_legend`` is passed, those labels supercede. threshold_values (Union[list[float], float], optional): List of values to plot horizontal @@ -381,7 +384,7 @@ def multi_profile_fpv_plot( normalize: bool = False, profile_colors: Optional[Mapping[str, str]] = None, bar_width: Optional[float] = None, - candidate_ordering: Optional[CandidateList] = None, + candidate_ordering: Optional[CandidateListLike] = None, x_axis_name: Optional[str] = None, y_axis_name: Optional[str] = None, title: Optional[str] = None, @@ -408,17 +411,19 @@ def multi_profile_fpv_plot( from ``utils`` module. Dictionary keys can be a subset of the profiles. bar_width (float, optional): Width of bars. Defaults to None which computes the bar width as 0.7 divided by the number of data sets. Must be in the interval :math:`(0,1]`. - candidate_ordering (list[str | int] | list[str] | list[int] | None): Ordering of x-labels. + candidate_ordering (list[Candidate] | list[str] | list[int] | None): Ordering of x-labels. Defaults to order retrieved from score dictionary. + Candidates can be strings, integers, or mix of both. x_axis_name (str, optional): Name of x-axis. Defaults to None, which does not plot a name. y_axis_name (str, optional): Name of y-axis. Defaults to None, which does not plot a name. title (str, optional): Title for the figure. Defaults to None, which does not plot a title. show_profile_legend (bool, optional): Whether or not to plot the profile legend. Defaults to False. Is automatically shown if any threshold lines have the keyword "label" passed through ``threshold_kwds``. - candidate_legend (dict[str | int, str] | dict[str, str] | dict[int, str] | None): + candidate_legend (dict[Candidate, str] | dict[str, str] | dict[int, str] | None): Dictionary mapping candidates to relabeling. Defaults to None. If provided, generates a second legend for data categories. + Candidates can be strings, integers, or mix of both. relabel_candidates_with_int (bool, optional): Relabel the candidates with integer labels. Defaults to False. If ``candidate_legend`` is passed, those labels supercede. threshold_values (Union[list[float], float], optional): List of values to plot horizontal diff --git a/src/votekit/plots/profiles/profile_bar_plot.py b/src/votekit/plots/profiles/profile_bar_plot.py index f0bd44e3..b1beab2a 100644 --- a/src/votekit/plots/profiles/profile_bar_plot.py +++ b/src/votekit/plots/profiles/profile_bar_plot.py @@ -5,7 +5,7 @@ from votekit.plots.bar_plot import bar_plot from votekit.pref_profile import PreferenceProfile, RankProfile -from votekit.types import Candidate, CandidateFloatDictLike +from votekit.types import Candidate, CandidateFloatDictLike, CandidateListLike from votekit.utils import ( COLOR_LIST, ballot_lengths, @@ -26,7 +26,7 @@ def profile_bar_plot( normalize: bool = False, profile_color: str = COLOR_LIST[0], bar_width: Optional[float] = None, - category_ordering: Optional[list[Candidate] | list[str] | list[int]] = None, + category_ordering: Optional[CandidateListLike] = None, x_axis_name: Optional[str] = None, y_axis_name: Optional[str] = None, title: Optional[str] = None, @@ -44,27 +44,30 @@ def profile_bar_plot( Args: profile (RankProfile): Profile to plot statistics for. - stat_function (Callable[[RankProfile], dict[str | int, float] | dict[str, float] + stat_function (Callable[[RankProfile], dict[Candidate, float] | dict[str, float] | dict[int, float]]): Which stat to use for the bar plot. Must be a callable that takes a profile and returns a dict - with str and/or int keys and float values. + with Candidate keys and float values. + Candidates can be strings, integers, or mix of both. profile_label (str, optional): Label for profile. Defaults to "Profile". normalize (bool, optional): Whether or not to normalize data. Defaults to False. profile_color (str, optional): Color to plot. Defaults to the first color from ``COLOR_LIST`` from ``utils`` module. bar_width (float, optional): Width of bars. Defaults to None which computes the bar width as 0.7 divided by the number of data sets. Must be in the interval :math:`(0,1]`. - category_ordering (list[str | int] | list[str] | list[int] | None): Ordering of x-labels. + category_ordering (list[Candidate] | list[str] | list[int] | None): Ordering of x-labels. Defaults to order retrieved from data dictionary. + Candidates can be strings, integers, or mix of both. x_axis_name (str, optional): Name of x-axis. Defaults to None, which does not plot a name. y_axis_name (str, optional): Name of y-axis. Defaults to None, which does not plot a name. title (str, optional): Title for the figure. Defaults to None, which does not plot a title. show_profile_legend (bool, optional): Whether or not to plot the profile legend. Defaults to False. Is automatically shown if any threshold lines have the keyword "label" passed through ``threshold_kwds``. - categories_legend (dict[str | int, str] | dict[str, str] | dict[int, str] | None): - Dictionary mapping data categories to description. Defaults to None. + categories_legend (dict[Candidate, str | int] | dict[str, str | int] | dict[int, str | int] + | None): Dictionary mapping data categories to description. Defaults to None. If provided, generates a second legend for data categories. + Candidates can be strings, integers, or mix of both. threshold_values (Union[list[float], float], optional): List of values to plot horizontal lines at. Can be provided as a list or a single float. threshold_kwds (Union[list[dict], dict], optional): List of plotting @@ -112,7 +115,7 @@ def profile_borda_plot( normalize: bool = False, profile_color: str = COLOR_LIST[0], bar_width: Optional[float] = None, - candidate_ordering: Optional[list[Candidate] | list[str] | list[int]] = None, + candidate_ordering: Optional[CandidateListLike] = None, x_axis_name: Optional[str] = None, y_axis_name: Optional[str] = None, title: Optional[str] = None, @@ -140,17 +143,19 @@ def profile_borda_plot( ``COLOR_LIST`` from ``utils`` module. bar_width (float, optional): Width of bars. Defaults to None which computes the bar width as 0.7 divided by the number of data sets. Must be in the interval :math:`(0,1]`. - candidate_ordering (list[str | int] | list[str] | list[int] | None): Ordering of x-labels. + candidate_ordering (list[Candidate] | list[str] | list[int] | None): Ordering of x-labels. Defaults to decreasing order of Borda scores. + Candidates can be strings, integers, or mix of both. x_axis_name (str, optional): Name of x-axis. Defaults to None, which does not plot a name. y_axis_name (str, optional): Name of y-axis. Defaults to None, which does not plot a name. title (str, optional): Title for the figure. Defaults to None, which does not plot a title. show_profile_legend (bool, optional): Whether or not to plot the profile legend. Defaults to False. Is automatically shown if any threshold lines have the keyword "label" passed through ``threshold_kwds``. - candidate_legend (dict[str | int, str] | dict[str, str] | dict[int, str] | None): - Dictionary mapping candidates to alternate label. Defaults to None. + candidate_legend (dict[Candidate, str | int] | dict[str, str | int] | dict[int, str | int] + | None): Dictionary mapping candidates to alternate label. Defaults to None. If provided, generates a second legend. + Candidates can be strings, integers, or mix of both. relabel_candidates_with_int (bool, optional): Relabel the candidates with integer labels. Defaults to False. If ``candidate_legend`` is passed, those labels supercede. threshold_values (Union[list[float], float], optional): List of values to plot horizontal @@ -207,7 +212,7 @@ def profile_mentions_plot( normalize: bool = False, profile_color: str = COLOR_LIST[0], bar_width: Optional[float] = None, - candidate_ordering: Optional[list[Candidate] | list[str] | list[int]] = None, + candidate_ordering: Optional[CandidateListLike] = None, x_axis_name: Optional[str] = None, y_axis_name: Optional[str] = None, title: Optional[str] = None, @@ -235,17 +240,19 @@ def profile_mentions_plot( ``COLOR_LIST`` from ``utils`` module. bar_width (float, optional): Width of bars. Defaults to None which computes the bar width as 0.7 divided by the number of data sets. Must be in the interval :math:`(0,1]`. - candidate_ordering (list[str | int] | list[str] | list[int], optional): - Ordering of x-labels. Defaults to decreasing order of mentions. + candidate_ordering (list[Candidate] | list[str] | list[int] | None): Ordering of x-labels. + Defaults to decreasing order of mentions. + Candidates can be strings, integers, or mix of both. x_axis_name (str, optional): Name of x-axis. Defaults to None, which does not plot a name. y_axis_name (str, optional): Name of y-axis. Defaults to None, which does not plot a name. title (str, optional): Title for the figure. Defaults to None, which does not plot a title. show_profile_legend (bool, optional): Whether or not to plot the profile legend. Defaults to False. Is automatically shown if any threshold lines have the keyword "label" passed through ``threshold_kwds``. - candidate_legend (dict[str | int, str] | dict[str, str] |dict[int, str] | None): - Dictionary mapping candidates to alternate label. Defaults to None. + candidate_legend (dict[Candidate, str | int] | dict[str, str | int] |dict[int, str | int] + | None): Dictionary mapping candidates to alternate label. Defaults to None. If provided, generates a second legend. + Candidates can be strings, integers, or mix of both. relabel_candidates_with_int (bool, optional): Relabel the candidates with integer labels. Defaults to False. If ``candidate_legend`` is passed, those labels supercede. threshold_values (Union[list[float], float], optional): List of values to plot horizontal @@ -302,7 +309,7 @@ def profile_fpv_plot( normalize: bool = False, profile_color: str = COLOR_LIST[0], bar_width: Optional[float] = None, - candidate_ordering: Optional[list[Candidate] | list[str] | list[int]] = None, + candidate_ordering: Optional[CandidateListLike] = None, x_axis_name: Optional[str] = None, y_axis_name: Optional[str] = None, title: Optional[str] = None, @@ -330,17 +337,19 @@ def profile_fpv_plot( ``COLOR_LIST`` from ``utils`` module. bar_width (float, optional): Width of bars. Defaults to None which computes the bar width as 0.7 divided by the number of data sets. Must be in the interval :math:`(0,1]`. - candidate_ordering (list[str | int] | list[str] | list[int] | None): Ordering of x-labels. + candidate_ordering (list[Candidate] | list[str] | list[int] | None): Ordering of x-labels. Defaults to decreasing order of first place votes. + Candidates can be strings, integers, or mix of both. x_axis_name (str, optional): Name of x-axis. Defaults to None, which does not plot a name. y_axis_name (str, optional): Name of y-axis. Defaults to None, which does not plot a name. title (str, optional): Title for the figure. Defaults to None, which does not plot a title. show_profile_legend (bool, optional): Whether or not to plot the profile legend. Defaults to False. Is automatically shown if any threshold lines have the keyword "label" passed through ``threshold_kwds``. - candidate_legend (dict[str | int, str] | dict[str, str] | dict[int, str] | None): - Dictionary mapping candidates to alternate label. + candidate_legend (dict[Candidate, str | int] | dict[str, str | int] | dict[int, str | int] + | None): Dictionary mapping candidates to alternate label. Defaults to None. If provided, generates a second legend. + Candidates can be strings, integers, or mix of both. relabel_candidates_with_int (bool, optional): Relabel the candidates with integer labels. Defaults to False. If ``candidate_legend`` is passed, those labels supercede. threshold_values (Union[list[float], float], optional): List of values to plot horizontal diff --git a/src/votekit/pref_profile/cleaned_pref_profile.py b/src/votekit/pref_profile/cleaned_pref_profile.py index ea4f475a..d80a0e18 100644 --- a/src/votekit/pref_profile/cleaned_pref_profile.py +++ b/src/votekit/pref_profile/cleaned_pref_profile.py @@ -177,8 +177,9 @@ class CleanedScoreProfile(ScoreProfile): Args: ballots (tuple[Ballot], optional): Tuple of ``Ballot`` objects. Defaults to empty tuple. - candidates (tuple[str], optional): Tuple of candidate strings. Defaults to empty tuple. - If empty, computes this from any candidate listed on a ballot with positive weight. + candidates (tuple[Candidate], optional): Tuple of candidates. Defaults to empty tuple. + Candidates can be strings, integers, or mix of both. If empty, computes this from any + candidate listed on a ballot with positive weight. parent_profile (ScoreProfile | CleanedScoreProfile): The profile that was altered. If you apply multiple cleaning functions, the parent is always the profile immediately before cleaning, so you need to recurse to get the original, uncleaned profile. diff --git a/src/votekit/pref_profile/pref_profile.py b/src/votekit/pref_profile/pref_profile.py index a0d4ca43..e898d592 100644 --- a/src/votekit/pref_profile/pref_profile.py +++ b/src/votekit/pref_profile/pref_profile.py @@ -28,6 +28,7 @@ convert_row_to_rank_ballot, convert_row_to_score_ballot, ) +from votekit.sorting import sort_candidates_lexicographically from votekit.types import Candidate @@ -39,9 +40,10 @@ class PreferenceProfile: Args: ballots (Sequence[Ballot], optional): Tuple of ``Ballot`` objects. Defaults to empty tuple. - candidates (tuple[str | int], optional): Tuple of candidates. + candidates (tuple[Candidate], optional): Tuple of candidates. Candidate can be a str or int. Defaults to empty tuple. If empty, computes this from any candidate listed on a ballot with positive weight. + Candidates can be strings, integers, or mix of both. max_ranking_length (int, optional): The length of the longest allowable ballot, i.e., how many candidates are allowed to be ranked in an election. Defaults to longest observed ballot. @@ -387,10 +389,10 @@ def __init__( @cached_property def df(self) -> pd.DataFrame: """ - Compute the dataframe as a cached property. - The dataframe is internally stored with candidate ids. - The dataframe will be translated to original candidate names. + The dataframe of all ballots cast within a profile. """ + # NOTE: The dataframe is internally stored with candidate integer IDs. The dataframe will be + # translated to original candidate names as a cached property. return self._translate_df_ranking_values(self._df, self.id_candidate_map) def __update_ballot_ranking_data( @@ -544,6 +546,8 @@ def _init_from_rank_ballots( Args: ballots (Sequence[RankBallot,...]): Sequence of ballots. + candidate_id_map (dict[Candidate, int]): Mapping of candidates to integer IDs. + Candidates can be strings, integers, or mix of both. Returns: tuple[pd.DataFrame, tuple[Candidate, ...], dict[Candidate, int]]: @@ -870,6 +874,8 @@ def __to_rank_csv_header( Construct the header rows for the PrefProfile a custom CSV format. Args: + candidate_mapping (dict[Candidate, str]): Mapping of candidate to ID. + Candidates can be strings, integers, or mix of both. include_voter_set (bool): Whether or not to include the voter set of each ballot. """ @@ -892,7 +898,8 @@ def __to_rank_csv_ranking_list( Args: rank_ballot (RankBallot): Ballot. - candidate_mapping (dict[str, int]): Mapping candidate names to integers. + candidate_mapping (dict[Candidate, str]): Mapping candidate names to IDs. + Candidates can be strings, integers, or mix of both. """ assert self.max_ranking_length is not None @@ -1108,10 +1115,10 @@ def __init__( @cached_property def df(self) -> pd.DataFrame: """ - Compute the dataframe as a cached property. - The dataframe is internally stored with candidate ids. - The dataframe will be translated to original candidate names. + The dataframe of all ballots cast within a profile. """ + # NOTE: The dataframe is internally stored with candidate integer IDs. The dataframe will be + # translated to original candidate names as a cached property. return self._translate_df_score_values(self._df, self.id_candidate_map) def __update_ballot_scores_data( @@ -1261,7 +1268,7 @@ def __init_formatted_score_df( df[list(remaining_cands)] = empty_df_cols col_order = [ candidate_id_map[cand] - for cand in sorted(candidate_id_map.keys(), key=lambda cand: str(cand)) + for cand in sort_candidates_lexicographically(candidate_id_map.keys()) ] + temp_col_order df = df[col_order] @@ -1460,7 +1467,9 @@ def __add__(self, other): new_df = pd.concat([df_1, df_2], ignore_index=True) new_df.index.name = "Ballot Index" - new_candidates = sorted(set(self.candidates).union(other.candidates)) + new_candidates = sort_candidates_lexicographically( + set(self.candidates).union(other.candidates) + ) new_df = new_df[new_candidates + ["Weight", "Voter Set"]] return ScoreProfile( diff --git a/src/votekit/sorting.py b/src/votekit/sorting.py new file mode 100644 index 00000000..f9ef02e4 --- /dev/null +++ b/src/votekit/sorting.py @@ -0,0 +1,48 @@ +from typing import Iterable + +from votekit.types import Candidate + + +def sort_candidates_lexicographically(candidates: Iterable[Candidate]) -> list[Candidate]: + """ + Sort candidates in lexicographical/alphabetical order. + + If candidates are of mixed type (i.e. strings and integers), integer candidates will be ordered + before string candidates with the exception of string candidates that can be cast to an integer. + String candidates that can be cast to a corresponding integer candidate will follow directly + after that integer in lexicographical order. String candidates that can be cast to an integer + but have no corresponding integer cadidate will follow all integer candidates, ordered by their + integer value. + + Example: + If we have candidates = ["1", "01", 1, "1.0", 2, "20", "3"], + the sorted candidates will be [1, "01", "1", 2, "3", "20", "1.0"] + "01" and "1" string candidates are equivalent to the 1 integer candidate. + "1.0" cannot be converted into an integer and is treated as a non-integer string candidate. + "3" and "20" are string candidates that can be cast to integers but do not have a + corresponding integer candidate. + + Args: + candidates (Sequence[Candidate]): list of candidates to sort + + Returns: + tuple[Candidate,...]: sorted candidates + + """ + candidates = list(candidates) + try: + return sorted(candidates) + except TypeError: + int_candidates = [cand for cand in candidates if isinstance(cand, int)] + + def sort_mixed_cands(cand): + if isinstance(cand, int): + return (0, cand, "") + elif isinstance(cand, str) and cand.isdigit(): + str_as_int_cand = int(cand) + if str_as_int_cand in int_candidates: + return (0, str_as_int_cand, cand) + return (1, 0, str_as_int_cand) + return (2, 0, cand) + + return sorted(candidates, key=sort_mixed_cands) diff --git a/src/votekit/types.py b/src/votekit/types.py index 2ccf1710..a1c33cc8 100644 --- a/src/votekit/types.py +++ b/src/votekit/types.py @@ -2,3 +2,4 @@ Candidate: TypeAlias = str | int CandidateFloatDictLike: TypeAlias = dict[Candidate, float] | dict[str, float] | dict[int, float] +CandidateListLike: TypeAlias = list[Candidate] | list[str] | list[int] diff --git a/src/votekit/utils.py b/src/votekit/utils.py index 2826f265..0dd241c1 100644 --- a/src/votekit/utils.py +++ b/src/votekit/utils.py @@ -9,6 +9,7 @@ from votekit.ballot import Ballot, RankBallot from votekit.pref_profile import RankProfile, ScoreProfile +from votekit.sorting import sort_candidates_lexicographically from votekit.types import Candidate, CandidateFloatDictLike COLOR_LIST = [ @@ -64,9 +65,8 @@ def ballots_by_first_cand(profile: RankProfile) -> dict[Candidate, list[RankBall Returns: dict[Candidate, list[RankBallot]]: - A dictionary whose keys are candidates and values are lists of ballots that - have that candidate first. - Candidates can be strings, integers, or mix of both. + A dictionary whose keys are candidates and values are lists of ballots that have that + candidate first. Candidates can be strings, integers, or mix of both. """ if not isinstance(profile, RankProfile): raise TypeError("Ballots must have rankings.") @@ -472,16 +472,10 @@ def tiebreak_set( Returns: tuple[frozenset[Candidate],...]: tiebroken ranking - Candidates can be strings, integers, or mix of both. + Candidates can be strings, integers, or mix of both. """ if tiebreak in ["alphabetical", "lexicographic", "alph", "lex"]: - if any(isinstance(cand, int) for cand in r_set): - int_cands = [cand for cand in r_set if isinstance(cand, int)] - raise TypeError( - "Alphabetical/Lexicographic tie breaks are not possible with integer candidates. " - f"{int_cands} are integer candidates." - ) - sorted_cands = sorted([c for c in r_set]) + sorted_cands = sort_candidates_lexicographically([c for c in r_set]) new_ranking = tuple(map(lambda c: frozenset({c}), sorted_cands)) elif tiebreak == "random": @@ -749,8 +743,9 @@ def score_profile_from_ballot_scores( profile (ScoreProfile): Profile to score. Returns: - dict[str | int, float]: + dict[Candidate, float]: Dictionary mapping candidates to scores. + Candidates can be strings, integers, or mix of both. """ scores = {c: 0.0 for c in profile.candidates} if not isinstance(profile, ScoreProfile): @@ -932,7 +927,7 @@ def build_df_from_ballot_samples( ballots_freq_dict: dictionary mapping ballots to sampled frequency. The keys should be in candidate id form - candidates : list of candidates in the profile + candidates (Candidate): list of candidates in the profile. Candidates can be strings, integers, or mix of both. returns: pandas df diff --git a/tests/ballot/test_ScoreBallot.py b/tests/ballot/test_ScoreBallot.py index cbfd6aaa..f4784d44 100644 --- a/tests/ballot/test_ScoreBallot.py +++ b/tests/ballot/test_ScoreBallot.py @@ -52,7 +52,7 @@ def test_ballot_is_frozen_del(): def test_ballot_hash(): b1 = ScoreBallot(scores={"A": 1, "B": 2}, weight=2, voter_set={"A"}) - b2 = ScoreBallot(scores={"A": 1, "B": 2}, weight=2, voter_set={"A"}) + b2 = ScoreBallot(scores={"B": 2, "A": 1}, weight=2, voter_set={"A"}) b3 = ScoreBallot(scores={"A": 2, "B": 2}, weight=2, voter_set={"B"}) assert b1 == b2 and hash(b1) == hash(b2) diff --git a/tests/test_mixed_candidates.py b/tests/test_mixed_candidates.py index 32206417..febc8e03 100644 --- a/tests/test_mixed_candidates.py +++ b/tests/test_mixed_candidates.py @@ -4,11 +4,13 @@ from votekit import utils from votekit.ballot_generator import ic_profile_generator -from votekit.elections import STV, Borda, Plurality +from votekit.elections import STV, Borda, Plurality, RankedPairs, SimultaneousVeto MIXED_CANDS = ["A", "B", "1", 1, 2, 3] N_SEATS = 2 +pytestmark = [pytest.mark.filterwarnings("ignore:.*appear as both str and int.*:UserWarning")] + @pytest.fixture(params=[10, 1000, 10000]) def ic_mixed_profile(request): @@ -28,6 +30,14 @@ def ic_mixed_profile(request): lambda profile: Plurality(profile, n_seats=N_SEATS, tiebreak="random"), id="plurality" ), pytest.param(lambda profile: STV(profile, n_seats=N_SEATS, tiebreak="random"), id="stv"), + pytest.param( + lambda profile: RankedPairs(profile, n_seats=N_SEATS, tiebreak="lexicographic"), + id="ranked_pairs", + ), + pytest.param( + lambda profile: SimultaneousVeto(profile, n_seats=N_SEATS, tiebreak="random"), + id="simultaneous_veto", + ), ], ) def test_election_runs_with_mixed_candidates(ic_mixed_profile, make_election): From 61fe8eb79d5ac341acef4a364268058fae91c6a4 Mon Sep 17 00:00:00 2001 From: Grace Gibson Date: Fri, 26 Jun 2026 12:07:13 -0500 Subject: [PATCH 11/11] update to/from_csv to handle mixed type cands --- .../pref_profile/csv_utils/rank_csv_utils.py | 42 ++++++++++++---- .../pref_profile/csv_utils/score_csv_utils.py | 41 ++++++++++++---- src/votekit/pref_profile/pref_profile.py | 10 +++- ...p_misformat_header_value_2_non_tuples.csv} | 0 ...isformat_header_value_2_non_valid_type.csv | 34 +++++++++++++ ...p_misformat_header_value_2_non_tuples.csv} | 0 ...isformat_header_value_2_non_valid_type.csv | 32 +++++++++++++ .../rank_profile/test_rank_pp_csv.py | 48 ++++++++++++++++++- .../score_profile/test_score_pp_csv.py | 29 ++++++++++- 9 files changed, 214 insertions(+), 22 deletions(-) rename tests/pref_profile/data/rank_profile/{test_csv_pp_misformat_header_value_2.csv => test_csv_pp_misformat_header_value_2_non_tuples.csv} (100%) create mode 100644 tests/pref_profile/data/rank_profile/test_csv_pp_misformat_header_value_2_non_valid_type.csv rename tests/pref_profile/data/score_profile/{test_csv_pp_misformat_header_value_2.csv => test_csv_pp_misformat_header_value_2_non_tuples.csv} (100%) create mode 100644 tests/pref_profile/data/score_profile/test_csv_pp_misformat_header_value_2_non_valid_type.csv diff --git a/src/votekit/pref_profile/csv_utils/rank_csv_utils.py b/src/votekit/pref_profile/csv_utils/rank_csv_utils.py index b0fab927..d347d099 100644 --- a/src/votekit/pref_profile/csv_utils/rank_csv_utils.py +++ b/src/votekit/pref_profile/csv_utils/rank_csv_utils.py @@ -2,6 +2,8 @@ from typing import Tuple +from votekit.types import Candidate + from ...ballot import RankBallot from .csv_utils import ( _validate_csv_ballot_row_break_idxs, @@ -9,10 +11,12 @@ _validate_csv_ballot_weight, ) +VALID_TYPE_MAP = {"str": str, "int": int} + def _parse_profile_data_from_rank_csv( csv_data: list[list[str]], -) -> Tuple[dict[str, str], int, bool, list[int]]: +) -> Tuple[dict[str, str] | dict[str, Candidate], int, bool, list[int]]: """ Parse the profile data from a PreferenceProfile csv. @@ -20,12 +24,18 @@ def _parse_profile_data_from_rank_csv( csv_data (list[list[str]]): Data from csv. Returns: - Tuple[dict[str, str], int, bool, list[int]]: + Tuple[dict[str, str] | dict[str, Candidate], int, bool, list[int]]: inv_candidate_mapping, max_ranking_length, includes_voter_set, break_indices + Candidates can be strings, integers, or mix of both. """ candidate_row = [c_tuple.strip("()").split(":") for c_tuple in csv_data[2]] - inv_candidate_mapping = {prefix: cand for cand, prefix in candidate_row} + if len(candidate_row[0]) == 3: + inv_candidate_mapping = { + prefix: VALID_TYPE_MAP[cand_type](cand) for cand, cand_type, prefix in candidate_row + } + else: + inv_candidate_mapping = {prefix: cand for cand, prefix in candidate_row} max_ranking_length = int(csv_data[4][0]) @@ -46,7 +56,7 @@ def _parse_ballot_from_rank_csv( ballot_row: list[str], includes_voter_set: bool, break_indices: list[int], - inv_candidate_mapping: dict[str, str], + inv_candidate_mapping: dict[str, str] | dict[str, Candidate], ) -> RankBallot: """ Parse a ballot from a PreferenceProfile csv row. @@ -56,8 +66,8 @@ def _parse_ballot_from_rank_csv( includes_voter_set (bool): Whether or not the csv contains voter sets. break_indices (list[int]): Where the columns of the csv change from one data type to another. - inv_candidate_mapping (dict[str, str]): The inverted candidate mapping of prefix - to the cand. + inv_candidate_mapping (dict[str, str] | dict[str, Candidate]): The inverted candidate + mapping of prefix to the cand. Candidates can be strings, integers, or mix of both. Returns: RankBallot: Ballot formatted from row of csv. @@ -143,11 +153,21 @@ def _validate_rank_csv_header_values(header_data: list[list[str]]): raise ValueError( ( "csv file is improperly formatted. Row 2 should contain tuples mapping candidates " - "to their unique prefixes. For example, (Chris:Ch), (Colleen: Co). " - f"Not {header_data[2]}. " + boiler_plate + "to their unique prefixes. For example, (Chris:0), (Colleen: 1) or (1:int:0)," + f" (A:str:1). Not {header_data[2]}. " + boiler_plate ) ) + candidate_tuples = [c_tuple.strip("()").split(":") for c_tuple in header_data[2]] + if len(candidate_tuples[0]) == 3: + _, candidate_types, _ = zip(*candidate_tuples) + if any(cand_type not in VALID_TYPE_MAP.keys() for cand_type in candidate_types): + raise ValueError( + "csv file is improperly formatted. Row 2 should contain candidate types of str or" + f" int, not {set(VALID_TYPE_MAP.keys()) ^ set(candidate_types)} within" + f" {header_data[2]}. " + boiler_plate + ) + if len(header_data[4]) != 1: raise ValueError( ( @@ -365,13 +385,17 @@ def _validate_rank_csv_ballot_rows(csv_data: list[list[str]]): Raises: ValueError: If a row of the csv is improperly formatted for VoteKit. + """ candidate_row = csv_data[2] max_ranking_row = csv_data[4] include_voter_set_row = csv_data[6] candidate_tuples = [c_tuple.strip("()").split(":") for c_tuple in candidate_row] - candidates, candidate_prefixes = zip(*candidate_tuples) + if len(candidate_tuples[0]) == 3: + candidates, candidate_types, candidate_prefixes = zip(*candidate_tuples) + else: + candidates, candidate_prefixes = zip(*candidate_tuples) max_ranking_length = int(max_ranking_row[0]) include_voter_set = include_voter_set_row[0] == "True" diff --git a/src/votekit/pref_profile/csv_utils/score_csv_utils.py b/src/votekit/pref_profile/csv_utils/score_csv_utils.py index 0b8966ea..8006824a 100644 --- a/src/votekit/pref_profile/csv_utils/score_csv_utils.py +++ b/src/votekit/pref_profile/csv_utils/score_csv_utils.py @@ -18,11 +18,12 @@ EQ_SIGN_ROW = 5 COLUMN_NAMES_ROW = 6 DATA_START_ROW = COLUMN_NAMES_ROW + 1 +VALID_TYPE_MAP = {"str": str, "int": int} def _parse_profile_data_from_score_csv( csv_data: list[list[str]], -) -> Tuple[dict[str, str], bool, list[int]]: +) -> Tuple[dict[str, str] | dict[str, Candidate], bool, list[int]]: """ Parse the profile data from a ScoreProfile csv. @@ -30,12 +31,18 @@ def _parse_profile_data_from_score_csv( csv_data (list[list[str]]): Data from csv. Returns: - Tuple[dict[str, str], bool, list[int]]: + Tuple[dict[str, str] | dict[str, Candidate], bool, list[int]]: inv_candidate_mapping, max_ranking_length, includes_voter_set, break_indices + Candidates can be strings, integers, or mix of both. """ candidate_row = [c_tuple.strip("()").split(":") for c_tuple in csv_data[2]] - inv_candidate_mapping = {prefix: cand for cand, prefix in candidate_row} + if len(candidate_row[0]) == 3: + inv_candidate_mapping = { + prefix: VALID_TYPE_MAP[cand_type](cand) for cand, cand_type, prefix in candidate_row + } + else: + inv_candidate_mapping = {prefix: cand for cand, prefix in candidate_row} includes_voter_set = csv_data[VOTER_SET_VALUE_ROW][0] == "True" @@ -53,7 +60,7 @@ def _parse_ballot_from_score_csv( ballot_row: list[str], includes_voter_set: bool, break_indices: list[int], - inv_candidate_mapping: dict[str, str], + inv_candidate_mapping: dict[str, str] | dict[str, Candidate], ) -> ScoreBallot: """ Parse a ballot from a ScoreProfile csv row. @@ -63,8 +70,8 @@ def _parse_ballot_from_score_csv( includes_voter_set (bool): Whether or not the csv contains voter sets. break_indices (list[int]): Where the columns of the csv change from one data type to another. - inv_candidate_mapping (dict[str, str]): The inverted candidate mapping of prefix - to the cand. + inv_candidate_mapping (dict[str, str] | dict[str, Candidate]): The inverted candidate + mapping of prefix to the cand. Candidates can be strings, integers, or mix of both. Returns: ScoreBallot: Ballot formatted from row of csv. @@ -105,16 +112,29 @@ def _validate_score_csv_header_values(header_data: list[list[str]]): "PreferenceProfile.to_csv()." ) - if any(char not in c_tuple for c_tuple in header_data[2] for char in "(:)"): + if any( + char not in c_tuple for c_tuple in header_data[CANDIDATES_MAPPING_ROW] for char in "(:)" + ): raise ValueError( ( f"csv file is improperly formatted. Row {CANDIDATES_MAPPING_ROW} should contain " "tuples mapping candidates " - "to their unique prefixes. For example, (Chris:Ch), (Colleen: Co). " + "to their unique prefixes. For example, (Chris:0), (Colleen: 1) or (1:int:0)," + " (A:str:1) " f"Not {header_data[2]}. " + boiler_plate ) ) + candidate_tuples = [c_tuple.strip("()").split(":") for c_tuple in header_data[2]] + if len(candidate_tuples[0]) == 3: + _, candidate_types, _ = zip(*candidate_tuples) + if any(cand_type not in VALID_TYPE_MAP.keys() for cand_type in candidate_types): + raise ValueError( + "csv file is improperly formatted. Row 2 should contain candidate types of str or" + f" int, not {set(VALID_TYPE_MAP.keys()) ^ set(candidate_types)}" + f" within {header_data[2]}. " + boiler_plate + ) + if len(header_data[VOTER_SET_VALUE_ROW]) != 1 or header_data[VOTER_SET_VALUE_ROW][0] not in [ "True", "False", @@ -307,7 +327,10 @@ def _validate_score_csv_ballot_rows(csv_data: list[list[str]]): include_voter_set_row = csv_data[VOTER_SET_VALUE_ROW] candidate_tuples = [c_tuple.strip("()").split(":") for c_tuple in candidate_row] - candidates, candidate_prefixes = zip(*candidate_tuples) + if len(candidate_tuples[0]) == 3: + candidates, candidate_types, candidate_prefixes = zip(*candidate_tuples) + else: + candidates, candidate_prefixes = zip(*candidate_tuples) include_voter_set = include_voter_set_row[0] == "True" diff --git a/src/votekit/pref_profile/pref_profile.py b/src/votekit/pref_profile/pref_profile.py index e898d592..af14e464 100644 --- a/src/votekit/pref_profile/pref_profile.py +++ b/src/votekit/pref_profile/pref_profile.py @@ -882,7 +882,10 @@ def __to_rank_csv_header( header = [ ["VoteKit RankProfile"], ["Candidates"], - [f"({str(c)}:{cand_label})" for c, cand_label in candidate_mapping.items()], + [ + f"({str(c)}:{type(c).__name__}:{cand_label})" + for c, cand_label in candidate_mapping.items() + ], ] header += [["Max Ranking Length"], [str(self.max_ranking_length)]] header += [["Includes Voter Set"], [str(include_voter_set)]] @@ -1547,7 +1550,10 @@ def __to_score_csv_header( header = [ ["VoteKit ScoreProfile"], ["Candidates"], - [f"({c}:{cand_label})" for c, cand_label in candidate_mapping.items()], + [ + f"({c}:{type(c).__name__}:{cand_label})" + for c, cand_label in candidate_mapping.items() + ], ] header += [["Includes Voter Set"], [str(include_voter_set)]] header += [["="] * 10] diff --git a/tests/pref_profile/data/rank_profile/test_csv_pp_misformat_header_value_2.csv b/tests/pref_profile/data/rank_profile/test_csv_pp_misformat_header_value_2_non_tuples.csv similarity index 100% rename from tests/pref_profile/data/rank_profile/test_csv_pp_misformat_header_value_2.csv rename to tests/pref_profile/data/rank_profile/test_csv_pp_misformat_header_value_2_non_tuples.csv diff --git a/tests/pref_profile/data/rank_profile/test_csv_pp_misformat_header_value_2_non_valid_type.csv b/tests/pref_profile/data/rank_profile/test_csv_pp_misformat_header_value_2_non_valid_type.csv new file mode 100644 index 00000000..d903c306 --- /dev/null +++ b/tests/pref_profile/data/rank_profile/test_csv_pp_misformat_header_value_2_non_valid_type.csv @@ -0,0 +1,34 @@ +VoteKit RankProfile +Candidates +(Aleine:str:Alei),(Alex:str:Alex),(1:int:C),(1.0:float:D),(E:str:E) +Max Ranking Length +3 +Includes Voter Set +True +=,=,=,=,=,=,=,=,=,= +Ranking_1,Ranking_2,Ranking_3,&,Weight,&,Voter Set +"{'Alex', 'Alei'}",{},{'C'},&,1.5,&,Chris,Peter +"{'Alex', 'Alei'}",{},{'C'},&,0.5,&,Moon +{'Alei'},{'Alex'},,&,1.0,& +{'Alei'},{'Alex'},,&,1.0,& +{'Alei'},{'Alex'},,&,1.0,& +"{'Alex', 'Alei'}",{},{'C'},&,1.5,&,Chris,Peter +"{'Alex', 'Alei'}",{},{'C'},&,0.5,&,Moon +{'Alei'},{'Alex'},,&,1.0,& +{'Alei'},{'Alex'},,&,1.0,& +{'Alei'},{'Alex'},,&,1.0,& +"{'Alex', 'Alei'}",{},{'C'},&,1.5,&,Chris,Peter +"{'Alex', 'Alei'}",{},{'C'},&,0.5,&,Moon +{'Alei'},{'Alex'},,&,1.0,& +{'Alei'},{'Alex'},,&,1.0,& +{'Alei'},{'Alex'},,&,1.0,& +"{'Alex', 'Alei'}",{},{'C'},&,1.5,&,Chris,Peter +"{'Alex', 'Alei'}",{},{'C'},&,0.5,&,Moon +{'Alei'},{'Alex'},,&,1.0,& +{'Alei'},{'Alex'},,&,1.0,& +{'Alei'},{'Alex'},,&,1.0,& +"{'Alex', 'Alei'}",{},{'C'},&,1.5,&,Chris,Peter +"{'Alex', 'Alei'}",{},{'C'},&,0.5,&,Moon +{'Alei'},{'Alex'},,&,1.0,& +{'Alei'},{'Alex'},,&,1.0,& +{'Alei'},{'Alex'},,&,1.0,& diff --git a/tests/pref_profile/data/score_profile/test_csv_pp_misformat_header_value_2.csv b/tests/pref_profile/data/score_profile/test_csv_pp_misformat_header_value_2_non_tuples.csv similarity index 100% rename from tests/pref_profile/data/score_profile/test_csv_pp_misformat_header_value_2.csv rename to tests/pref_profile/data/score_profile/test_csv_pp_misformat_header_value_2_non_tuples.csv diff --git a/tests/pref_profile/data/score_profile/test_csv_pp_misformat_header_value_2_non_valid_type.csv b/tests/pref_profile/data/score_profile/test_csv_pp_misformat_header_value_2_non_valid_type.csv new file mode 100644 index 00000000..640e1b34 --- /dev/null +++ b/tests/pref_profile/data/score_profile/test_csv_pp_misformat_header_value_2_non_valid_type.csv @@ -0,0 +1,32 @@ +VoteKit ScoreProfile +Candidates +(Alex:str:0),(Allen:str:1),(1:int:2),(1.0:float:3),(D:str:4) +Includes Voter Set +True +=,=,=,=,=,=,=,=,=,= +0,1,2,3,4,&,Weight,&,Voter Set +2.0,4.0,0.0,1.0,0.0,&,1.0,& +2.0,4.0,0.0,1.0,0.0,&,1.0,& +2.0,4.0,1.0,0.0,0.0,&,1.0,& +2.0,4.0,1.0,0.0,0.0,&,1.0,& +5.0,4.0,1.0,0.0,0.0,&,1.0,& +2.0,4.0,0.0,1.0,0.0,&,1.0,& +2.0,4.0,0.0,1.0,0.0,&,1.0,& +2.0,4.0,1.0,0.0,0.0,&,1.0,& +2.0,4.0,1.0,0.0,0.0,&,1.0,& +5.0,4.0,1.0,0.0,0.0,&,1.0,& +2.0,4.0,0.0,1.0,0.0,&,1.0,& +2.0,4.0,0.0,1.0,0.0,&,1.0,& +2.0,4.0,1.0,0.0,0.0,&,1.0,& +2.0,4.0,1.0,0.0,0.0,&,1.0,& +5.0,4.0,1.0,0.0,0.0,&,1.0,& +2.0,4.0,0.0,1.0,0.0,&,1.0,& +2.0,4.0,0.0,1.0,0.0,&,1.0,& +2.0,4.0,1.0,0.0,0.0,&,1.0,& +2.0,4.0,1.0,0.0,0.0,&,1.0,& +5.0,4.0,1.0,0.0,0.0,&,1.0,& +2.0,4.0,0.0,1.0,0.0,&,1.0,& +2.0,4.0,0.0,1.0,0.0,&,1.0,& +2.0,4.0,1.0,0.0,0.0,&,1.0,& +2.0,4.0,1.0,0.0,0.0,&,1.0,& +5.0,4.0,1.0,0.0,0.0,&,1.0,& diff --git a/tests/pref_profile/rank_profile/test_rank_pp_csv.py b/tests/pref_profile/rank_profile/test_rank_pp_csv.py index 6042a9aa..1d3385d0 100644 --- a/tests/pref_profile/rank_profile/test_rank_pp_csv.py +++ b/tests/pref_profile/rank_profile/test_rank_pp_csv.py @@ -49,6 +49,49 @@ def test_csv_bijection_rankings(tmp_path): assert profile_rankings == read_profile +def test_csv_mixed_cand_rankings(tmp_path): + profile_rankings = RankProfile( + ballots=( + RankBallot( + ranking=({"A", "B"}, frozenset(), {"1"}), + voter_set={"Chris", "Peter"}, + weight=1.5, + ), + RankBallot( + ranking=({1, 2}, frozenset(), {3}), + voter_set={"Moon"}, + weight=0.5, + ), + RankBallot( + ranking=( + {"A"}, + {1}, + ), + ), + RankBallot( + ranking=( + {2}, + {"1"}, + ), + ), + RankBallot( + ranking=( + {"B"}, + {"A"}, + ), + ), + ) + * 5, + max_ranking_length=3, + candidates=["A", "B", "1", 1, 2, 3], + ) + + out = str(tmp_path / "test_csv_pp_mixed_cand_rankings.csv") + profile_rankings.to_csv(out, include_voter_set=True) + read_profile = RankProfile.from_csv(out) + assert profile_rankings == read_profile + + def test_csv_filepath_error(): with pytest.raises(ValueError, match="File path must be provided."): RankProfile().to_csv("") @@ -88,7 +131,10 @@ def test_csv_misformatted_header_rows_error(): def test_csv_misformatted_header_values_error(): with pytest.raises(ValueError, match="Row 2 should contain tuples mapping candidates"): - RankProfile.from_csv(f"{filepath}/test_csv_pp_misformat_header_value_2.csv") + RankProfile.from_csv(f"{filepath}/test_csv_pp_misformat_header_value_2_non_tuples.csv") + + with pytest.raises(ValueError, match="Row 2 should contain candidate types of str or int"): + RankProfile.from_csv(f"{filepath}/test_csv_pp_misformat_header_value_2_non_valid_type.csv") with pytest.raises( ValueError, diff --git a/tests/pref_profile/score_profile/test_score_pp_csv.py b/tests/pref_profile/score_profile/test_score_pp_csv.py index 8c1f59cf..5f395793 100644 --- a/tests/pref_profile/score_profile/test_score_pp_csv.py +++ b/tests/pref_profile/score_profile/test_score_pp_csv.py @@ -30,6 +30,30 @@ def test_csv_bijection_scores(tmp_path): assert profile == read_profile +def test_csv_mixed_cand_scores(tmp_path): + profile = ScoreProfile( + ballots=( + ScoreBallot(scores={"1": 2, "A": 4, "B": 1}, voter_set={"Chris"}), + ScoreBallot(scores={1: 2, 2: 4, 3: 1}, voter_set={"Peter", "Moon"}), + ScoreBallot( + scores={"A": 2, "B": 4, 1: 1}, + ), + ScoreBallot( + scores={3: 2, 2: 4, "1": 1}, + ), + ScoreBallot( + scores={"B": 5, 1: 4, "A": 1}, + ), + ) + * 5, + candidates=["A", "B", "1", 1, 2, 3], + ) + out = str(tmp_path / "test_csv_pp_mixed_cand_scores.csv") + profile.to_csv(out, include_voter_set=True) + read_profile = ScoreProfile.from_csv(out) + assert profile == read_profile + + def test_csv_filepath_error(): with pytest.raises(ValueError, match="File path must be provided."): ScoreProfile().to_csv("") @@ -63,7 +87,10 @@ def test_csv_misformatted_header_rows_error(): def test_csv_misformatted_header_values_error(): with pytest.raises(ValueError, match="Row 2 should contain tuples mapping candidates"): - ScoreProfile.from_csv(f"{filepath}/test_csv_pp_misformat_header_value_2.csv") + ScoreProfile.from_csv(f"{filepath}/test_csv_pp_misformat_header_value_2_non_tuples.csv") + + with pytest.raises(ValueError, match="Row 2 should contain candidate types of str or int"): + ScoreProfile.from_csv(f"{filepath}/test_csv_pp_misformat_header_value_2_non_valid_type.csv") with pytest.raises( ValueError,