diff --git a/scikit_posthocs/_plotting.py b/scikit_posthocs/_plotting.py index 20be9ca..eacde5f 100644 --- a/scikit_posthocs/_plotting.py +++ b/scikit_posthocs/_plotting.py @@ -1,5 +1,5 @@ -from copy import deepcopy from typing import Dict, List, Optional, Set, Tuple, Union +from itertools import combinations import numpy as np from matplotlib import colors, pyplot @@ -42,13 +42,12 @@ def sign_array(p_values: Union[List, np.ndarray, DataFrame], alpha: float = 0.05 [ 1, -1, 0], [ 1, 0, -1]]) """ - sig_array = deepcopy(np.array(p_values)) - sig_array[sig_array == 0] = 1e-10 - sig_array[sig_array > alpha] = 0 - sig_array[(sig_array < alpha) & (sig_array > 0)] = 1 - np.fill_diagonal(sig_array, -1) - - return sig_array + p_values = np.asarray(p_values) + if (p_values < 0).any(): + raise ValueError("P values matrix must be non-negative") + result = (p_values <= alpha).astype(np.int8) # Returns a copy + np.fill_diagonal(result, -1) + return result def sign_table( @@ -500,7 +499,6 @@ def critical_difference_diagram( markers = [] elbows = [] labels = [] - crossbars = [] # True if pairwise comparison is NOT significant adj_matrix = DataFrame( @@ -518,45 +516,42 @@ def critical_difference_diagram( ranks.iloc[: len(ranks) // 2], ranks.iloc[len(ranks) // 2 :], ) - # points_left, points_right = np.array_split(ranks.sort_values(), 2) - - # Sets of points under the same crossbar - crossbar_sets = _find_maximal_cliques(adj_matrix) - # Sort by lowest rank and filter single-valued sets - crossbar_sets = sorted( - (x for x in crossbar_sets if len(x) > 1), key=lambda x: ranks[list(x)].min() + # Arrays of ranks for each crossbar (each crossbar is a maximal clique) + crossbar_ranks = ( + ranks.reindex(bar).sort_values().values + for bar in _find_maximal_cliques(adj_matrix) + if len(bar) > 1 ) - # Create stacking of crossbars: for each level, try to fit the crossbar, + # Create stacking of crossbars: for each level, try to fit the widest crossbar, # so that it does not intersect with any other in the level. If it does not # fit in any level, create a new level for it. - crossbar_levels: list[list[set]] = [] - for bar in crossbar_sets: - for level, bars_in_level in enumerate(crossbar_levels): - if not any(bool(bar & bar_in_lvl) for bar_in_lvl in bars_in_level): - ypos = -level - 1 - bars_in_level.append(bar) + crossbar_levels: list[list[np.ndarray]] = [] + for bar_i in sorted(crossbar_ranks, key=lambda x: x[0] - x[-1]): + for bars_in_level in crossbar_levels: + if all( + (bar_i[-1] < bar_j[0]) or (bar_i[0] > bar_j[-1]) # True if no intersection + for bar_j in bars_in_level + ): + bars_in_level.append(bar_i) break else: - ypos = -len(crossbar_levels) - 1 - crossbar_levels.append([bar]) - - crossbars.append( - ax.plot( - # Adding a separate line between each pair enables showing a - # marker over each elbow with crossbar_props={'marker': 'o'}. - [ranks[i] for i in bar], - [ypos] * len(bar), - **crossbar_props, - ) - ) + crossbar_levels.append([bar_i]) # Create a new level + + # Plot crossbars. + # We add a separate segment between each elbow, enabling the display of a + # marker over each elbow, e.g. crossbar_props={'marker': 'o'}. + crossbars = [ + [ax.plot(bar, [-i] * len(bar), **crossbar_props) for bar in level] + for i, level in enumerate(crossbar_levels) + ] - lowest_crossbar_ypos = -len(crossbar_levels) + elbow_start_y = -len(crossbars) def plot_items(points, xpos, label_fmt, color_palette, label_props): """Plot each marker + elbow + label.""" - ypos = lowest_crossbar_ypos - 1 + ypos = elbow_start_y for idx, (label, rank) in enumerate(points.items()): if not color_palette or len(color_palette) == 0: elbow, *_ = ax.plot( diff --git a/tests/test_posthocs.py b/tests/test_posthocs.py index ef0965c..8ba1bdb 100644 --- a/tests/test_posthocs.py +++ b/tests/test_posthocs.py @@ -160,6 +160,20 @@ def test_find_maximal_cliques_6x6(self): set(map(frozenset, expected)), ) + def test_cd_diagram_single_bar(self): + index = list("abcdef") + ranks = Series([2.1, 1.2, 4.5, 3.2, 5.7, 6.5], index=index) + sig_matrix = DataFrame( + 1, # No significant differences + index=index, + columns=index, + ) + output = splt.critical_difference_diagram(ranks, sig_matrix) + self.assertEqual(len(output["markers"]), len(ranks)) + self.assertEqual(len(output["elbows"]), len(ranks)) + self.assertEqual(len(output["labels"]), len(ranks)) + self.assertEqual(len(output["crossbars"]), 1) + def test_cd_diagram_number_of_artists(self): index = list("abcdef") ranks = Series([2.1, 1.2, 4.5, 3.2, 5.7, 6.5], index=index) @@ -182,6 +196,60 @@ def test_cd_diagram_number_of_artists(self): self.assertEqual(len(output["labels"]), len(ranks)) self.assertEqual(len(output["crossbars"]), 2) + def test_cd_diagram_all_significant(self): + index = list("abcdef") + ranks = Series(np.arange(len(index)), index=index) + sig_matrix = DataFrame( + np.eye(len(index)), # All significant + index=index, + columns=index, + ) + output = splt.critical_difference_diagram(ranks, sig_matrix) + self.assertEqual(len(output["markers"]), len(ranks)) + self.assertEqual(len(output["elbows"]), len(ranks)) + self.assertEqual(len(output["labels"]), len(ranks)) + self.assertEqual(len(output["crossbars"]), 0) + + def test_cd_diagram_non_intersecting_crossbars(self): + index = list("abcdef") + # Swap the ranks of 'c' and 'd' + ranks = Series([0, 1, 3, 2, 4, 5], index=index) + sig_matrix = DataFrame( + [ + [1, 1, 1, 0, 0, 0], + [1, 1, 1, 0, 0, 0], + [1, 1, 1, 0, 0, 0], + [0, 0, 0, 1, 1, 1], + [0, 0, 0, 1, 1, 1], + [0, 0, 0, 1, 1, 1], + ], + index=index, + columns=index, + ) + output = splt.critical_difference_diagram(ranks, sig_matrix) + crossbars = output["crossbars"] + y_positions = set(bar.get_ydata()[0] for level in crossbars for bar in level) + self.assertEqual(len(crossbars), len(y_positions)) + + def test_cd_diagram_normal_distributions(self): + rng = np.random.default_rng(0) + experiment_values = rng.normal( + loc=[-5.2, -6, -2.1, -1.7, -6.4], + scale=np.full(fill_value=.1, shape=(10, 1)), + ) + df = DataFrame(experiment_values, columns=["A", "B", "C", "D", "E"]) + + test_result = sp.posthoc_conover_friedman(df.to_numpy()) + average_ranks = df.rank(ascending=False, axis=1).mean(axis=0) + + output = splt.critical_difference_diagram( + ranks=average_ranks, sig_matrix=test_result + ) + self.assertEqual(len(output["markers"]), df.shape[1]) + self.assertEqual(len(output["elbows"]), df.shape[1]) + self.assertEqual(len(output["labels"]), df.shape[1]) + self.assertEqual(len(output["crossbars"]), 0) + # Outliers tests def test_outliers_iqr(self): x = np.array([4, 5, 6, 10, 12, 4, 3, 1, 2, 3, 23, 5, 3])