From 2a7af8685b40a035f5121318ba4b4ea95b0f9217 Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Sun, 23 May 2021 21:14:54 +0200 Subject: [PATCH 1/8] Add `simpleunicodehistogram` Co-authored-by: C. Brenhin Keller --- src/plot_hist.jl | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 src/plot_hist.jl diff --git a/src/plot_hist.jl b/src/plot_hist.jl new file mode 100644 index 0000000..1aa34ba --- /dev/null +++ b/src/plot_hist.jl @@ -0,0 +1,35 @@ +function simpleunicodehistogram(x::AbstractArray; nbins::Integer=10, plotwidth::Integer=30, showcounts::Bool=true, xlabel="", ylabel="") + # Find bounds, round them nicely + l, u = extrema(x) + digitsneeded = ceil(Int, -log10(u-l))+1 + l = floor(l, digits=digitsneeded) + u = ceil(u, digits=digitsneeded) + + # Fill histogram + dx = (u - l) / nbins + histcounts = fill(0, nbins) + @inbounds for i ∈ 1:length(x) + index = ceil(Int, (x[i] - l) / dx) + if 1 <= index <= nbins + histcounts[index] += 1 + end + end + binedges = range(l,u,length=nbins+1) + + # Print the histogram + blocks = [" ","▏","▎","▍","▌","▋","▊","▉","█","█"] + scale = plotwidth/maximum(histcounts) + lowerlabels = string.(round.(binedges[1:end-1], digits=digitsneeded+ceil(Int,log10(nbins)-1))) + upperlabels = string.(round.(binedges[2:end], digits=digitsneeded+ceil(Int,log10(nbins)-1))) + longestlower = maximum(length.(lowerlabels)) + longestupper = maximum(length.(upperlabels)) + println(ylabel*"\n") + for i=1:nbins + nblocks = histcounts[i] * scale + blockstring = repeat("█", floor(Int, nblocks)) * blocks[ceil(Int,(nblocks - floor(nblocks))*8)+1] + println(" (" * lowerlabels[i] * " "^(longestlower - length(lowerlabels[i])) * + " - " * upperlabels[i] * " "^(longestupper - length(upperlabels[i])) * + "] " * blockstring * (showcounts ? " $(histcounts[i])" : "")) + end + println("\n" * " "^max(plotwidth÷2 + 6 - length(xlabel)÷2, 0) * xlabel) +end From 03ccac4fe640d191220f82391eecda194bb840a2 Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Sun, 23 May 2021 22:09:58 +0200 Subject: [PATCH 2/8] tweak `simpleunicodehistogram` for outliers --- Project.toml | 2 -- src/BenchmarkHistograms.jl | 8 +++--- src/plot_hist.jl | 55 +++++++++++++++++++++++++++----------- 3 files changed, 45 insertions(+), 20 deletions(-) diff --git a/Project.toml b/Project.toml index 6f46ec3..36ab0b0 100644 --- a/Project.toml +++ b/Project.toml @@ -7,11 +7,9 @@ version = "0.1.1" BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" -UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228" [compat] BenchmarkTools = "0.7, 1.0" -UnicodePlots = "1.3" julia = "1" [extras] diff --git a/src/BenchmarkHistograms.jl b/src/BenchmarkHistograms.jl index 8535156..dce7564 100644 --- a/src/BenchmarkHistograms.jl +++ b/src/BenchmarkHistograms.jl @@ -1,6 +1,5 @@ module BenchmarkHistograms -using UnicodePlots using Statistics using Printf using BenchmarkTools: BenchmarkTools @@ -20,7 +19,7 @@ export @benchmark const NBINS = Ref(0) Controls the number of histogram bins used. -When `NBINS[] <= 0`, the number is chosen automatically by UnicodePlots. +When `NBINS[] <= 0`, the number is chosen automatically by Sturge's rule (i.e. `log2(length(data))+1`). """ const NBINS = Ref(0) @@ -53,7 +52,7 @@ function Base.show(io::IO, ::MIME"text/plain", bp::BenchmarkHistogram; nbins=NBI println(io, "samples: ", length(t), "; evals/sample: ", t.params.evals, "; memory estimate: ", memorystr, "; allocs estimate: ", allocsstr) if length(t) > 0 bin_arg = nbins <= 0 ? NamedTuple() : (; nbins=nbins) - show(io, histogram(t.times; ylabel="ns", xlabel="Counts", bin_arg...)) + simple_unicode_histogram(io, t.times; ylabel="ns", xlabel="Counts", bin_arg...) println(io) end print(io, "min: ", minstr, "; mean: ", meanstr, "; median: ", medstr, "; max: ", maxstr, ".") @@ -70,4 +69,7 @@ end # so that we don't have to rely on internals. include("vendor.jl") +# The code to draw the histograms +include("plot_hist.jl") + end diff --git a/src/plot_hist.jl b/src/plot_hist.jl index 1aa34ba..0876d19 100644 --- a/src/plot_hist.jl +++ b/src/plot_hist.jl @@ -1,35 +1,60 @@ -function simpleunicodehistogram(x::AbstractArray; nbins::Integer=10, plotwidth::Integer=30, showcounts::Bool=true, xlabel="", ylabel="") - # Find bounds, round them nicely - l, u = extrema(x) - digitsneeded = ceil(Int, -log10(u-l))+1 - l = floor(l, digits=digitsneeded) - u = ceil(u, digits=digitsneeded) +# Modified from https://github.com/JuliaCI/BenchmarkTools.jl/pull/180#issuecomment-711128281 +function simple_unicode_histogram(io::IO, x::AbstractArray; nbins::Integer=ceil(Int, log2(length(x))+1), plotwidth::Integer=30, showcounts::Bool=true, xlabel="", ylabel="") + # Find bounds. Our naive attempt is to use equal width + # bins from the minimum to the maximum. + l, M = extrema(x) + initial_dx = (M - l) / nbins + + # Now, we check: if we don't have some big outliers, we'd expect + # the 99.9 percentile, `Q`, to be within a few bins of the maximum. + # Here, we choose 2. If it is not, then we decide that indeed + # there are outliers. We will instead divide the range from + # the minimum to `Q` equally with `nbins-1` bins, and then reserve + # the last bin to hold everything greater than `Q`. + Q = quantile(x, 0.999) + truncate = M - Q > 2*initial_dx + + # our "upper bound" + u = truncate ? Q : M # Fill histogram - dx = (u - l) / nbins histcounts = fill(0, nbins) - @inbounds for i ∈ 1:length(x) - index = ceil(Int, (x[i] - l) / dx) + dx = truncate ? (u - l) / (nbins - 1) : initial_dx + for xi in x + index = ceil(Int, (xi - l) / dx) if 1 <= index <= nbins histcounts[index] += 1 + else + histcounts[end] += 1 end end - binedges = range(l,u,length=nbins+1) + + if truncate + binedges = [range(l,u,length=nbins); M] + else + binedges = range(l,u,length=nbins+1) + end # Print the histogram + digitsneeded = ceil(Int, -log10(u-l))+1 blocks = [" ","▏","▎","▍","▌","▋","▊","▉","█","█"] scale = plotwidth/maximum(histcounts) lowerlabels = string.(round.(binedges[1:end-1], digits=digitsneeded+ceil(Int,log10(nbins)-1))) upperlabels = string.(round.(binedges[2:end], digits=digitsneeded+ceil(Int,log10(nbins)-1))) longestlower = maximum(length.(lowerlabels)) longestupper = maximum(length.(upperlabels)) - println(ylabel*"\n") + !isempty(ylabel) && println(io, ylabel, "\n") for i=1:nbins nblocks = histcounts[i] * scale blockstring = repeat("█", floor(Int, nblocks)) * blocks[ceil(Int,(nblocks - floor(nblocks))*8)+1] - println(" (" * lowerlabels[i] * " "^(longestlower - length(lowerlabels[i])) * - " - " * upperlabels[i] * " "^(longestupper - length(upperlabels[i])) * - "] " * blockstring * (showcounts ? " $(histcounts[i])" : "")) + print(io, " (", lowerlabels[i], " "^(longestlower - length(lowerlabels[i]))) + print(io, " - ", upperlabels[i], " "^(longestupper - length(upperlabels[i])), "] ") + printstyled(io, blockstring; color=:green) + if showcounts + print(io, histcounts[i]) + end + println(io) end - println("\n" * " "^max(plotwidth÷2 + 6 - length(xlabel)÷2, 0) * xlabel) + isempty(xlabel) || println(io, "\n", " "^max(plotwidth÷2 + 6 - length(xlabel)÷2, 0), xlabel) + return nothing end From 9d70bbf19c44a4948dfc585e8df41598288252ca Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Sun, 23 May 2021 23:17:13 +0200 Subject: [PATCH 3/8] rename things, add configuration, fix tests, update readme --- Project.toml | 2 +- README.md | 156 +++++++++++++++++++------------- generate_readme/README.jl | 8 +- src/BenchmarkHistograms.jl | 13 ++- src/plot_hist.jl | 60 ------------ src/simple_unicode_histogram.jl | 66 ++++++++++++++ test/runtests.jl | 32 ++++--- 7 files changed, 194 insertions(+), 143 deletions(-) delete mode 100644 src/plot_hist.jl create mode 100644 src/simple_unicode_histogram.jl diff --git a/Project.toml b/Project.toml index 36ab0b0..0cc3af4 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "BenchmarkHistograms" uuid = "a80a1652-aad8-438d-b80b-ecb1a674e33b" authors = ["Eric Hanson <5846501+ericphanson@users.noreply.github.com> and contributors"] -version = "0.1.1" +version = "0.1.2" [deps] BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" diff --git a/README.md b/README.md index 42f5098..0ca8699 100644 --- a/README.md +++ b/README.md @@ -3,19 +3,19 @@ # BenchmarkHistograms -Wraps [BenchmarkTools.jl](https://github.com/JuliaCI/BenchmarkTools.jl/) to provide a UnicodePlots.jl-powered `show` method for `@benchmark`. This is accomplished by a custom `@benchmark` method which wraps the output in a `BenchmarkPlot` struct with a custom show method. +Wraps [BenchmarkTools.jl](https://github.com/JuliaCI/BenchmarkTools.jl/) to provide a unicode histogram `show` method for `@benchmark`. This is accomplished by a custom `@benchmark` method which wraps the output in a `BenchmarkPlot` struct with a custom show method. This means one should not call `using` on both BenchmarkHistograms and BenchmarkTools in the same namespace, or else these `@benchmark` macros will conflict ("WARNING: using `BenchmarkTools.@benchmark` in module Main conflicts with an existing identifier.") -However, BenchmarkHistograms re-exports all of BenchmarkTools (including the module `BenchmarkTools` itself), so you can simply call `using BenchmarkHistograms` instead. +However, BenchmarkHistograms re-exports all the export of BenchmarkTools, so you can simply call `using BenchmarkHistograms`. Providing this functionality in BenchmarkTools itself was discussed in . +Thanks to @brenhinkeller for providing the initial plotting code there. -Use the setting `BenchmarkHistograms.NBINS[]` to change the number of histogram bins used, e.g. -```julia -BenchmarkHistograms.NBINS[] = 10 -``` -to use 10 bins. +Use the setting `BenchmarkHistograms.NBINS` to change the number of histogram bins used, e.g. `BenchmarkHistograms.NBINS[] = 10` for 10 bins. + +Likewise use the setting `BenchmarkHistograms.OUTLIER_QUANTILE` to tweak which values count as outliers and may be grouped into a single bin. +For example, `BenchmarkHistograms.OUTLIER_QUANTILE[] = 0.99` counts any values past the 99 percentile as possible outliers. This value defaults to `0.999` and is disabled by setting it to `1.0`. ## Example @@ -29,22 +29,27 @@ using BenchmarkHistograms ``` samples: 10000; evals/sample: 1000; memory estimate: 0 bytes; allocs estimate: 0 - ┌ ┐ - [ 4.0, 6.0) ┤▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 7823 - [ 6.0, 8.0) ┤▇▇▇▇▇▇▇ 1643 - [ 8.0, 10.0) ┤▇▇ 529 - [10.0, 12.0) ┤ 2 - [12.0, 14.0) ┤ 2 - ns [14.0, 16.0) ┤ 0 - [16.0, 18.0) ┤ 0 - [18.0, 20.0) ┤ 0 - [20.0, 22.0) ┤ 0 - [22.0, 24.0) ┤ 0 - [24.0, 26.0) ┤ 0 - [26.0, 28.0) ┤ 1 - └ ┘ - Counts -min: 4.916 ns (0.00% GC); mean: 5.724 ns (0.00% GC); median: 5.208 ns (0.00% GC); max: 27.458 ns (0.00% GC). +ns + + (8.04 - 8.53 ] ██████████████████████████████▏7673 + (8.53 - 9.02 ] ▌109 + (9.02 - 9.51 ] ▏3 + (9.51 - 10.01] 0 + (10.01 - 10.5 ] 0 + (10.5 - 10.99] █████▋1431 + (10.99 - 11.48] ██▌624 + (11.48 - 11.97] ▍70 + (11.97 - 12.46] ▎38 + (12.46 - 12.95] ▏4 + (12.95 - 13.44] ▏1 + (13.44 - 13.93] ▏2 + (13.93 - 14.42] ▏7 + (14.42 - 14.92] ▏22 + (14.92 - 21.88] ▏16 + + Counts + +min: 8.041 ns (0.00% GC); mean: 8.812 ns (0.00% GC); median: 8.166 ns (0.00% GC); max: 21.875 ns (0.00% GC). ``` That benchmark does not have a very interesting distribution, but it's not hard to find more interesting cases. @@ -54,18 +59,26 @@ That benchmark does not have a very interesting distribution, but it's not hard ``` ``` -samples: 3192; evals/sample: 1000; memory estimate: 0 bytes; allocs estimate: 0 - ┌ ┐ - [ 0.0, 500.0) ┤▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 2036 - [ 500.0, 1000.0) ┤ 0 - [1000.0, 1500.0) ┤ 0 - ns [1500.0, 2000.0) ┤ 0 - [2000.0, 2500.0) ┤ 0 - [2500.0, 3000.0) ┤ 0 - [3000.0, 3500.0) ┤▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 1156 - └ ┘ - Counts -min: 1.875 ns (0.00% GC); mean: 1.141 μs (0.00% GC); median: 4.521 ns (0.00% GC); max: 3.315 μs (0.00% GC). +samples: 3110; evals/sample: 1000; memory estimate: 0 bytes; allocs estimate: 0 +ns + + (0.0 - 280.0 ] ██████████████████████████████ 1964 + (280.0 - 570.0 ] 0 + (570.0 - 850.0 ] 0 + (850.0 - 1130.0] 0 + (1130.0 - 1410.0] 0 + (1410.0 - 1690.0] 0 + (1690.0 - 1970.0] 0 + (1970.0 - 2250.0] 0 + (2250.0 - 2540.0] 0 + (2540.0 - 2820.0] 0 + (2820.0 - 3100.0] 0 + (3100.0 - 3380.0] █████████████████1105 + (3380.0 - 3660.0] ▊41 + + Counts + +min: 2.500 ns (0.00% GC); mean: 1.181 μs (0.00% GC); median: 5.334 ns (0.00% GC); max: 3.663 μs (0.00% GC). ``` Here, we see a bimodal distribution; in the case `5` is indeed in the vector, we find it very quickly, in the 0-1000 ns range (thanks to `sort` which places it at the front). In the case 5 is not present, we need to check every entry to be sure, and we end up in the 3000-4000 ns range. @@ -77,18 +90,26 @@ Without the `sort`, we end up with more of a uniform distribution: ``` ``` -samples: 2461; evals/sample: 999; memory estimate: 0 bytes; allocs estimate: 0 - ┌ ┐ - [ 0.0, 500.0) ┤▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 364 - [ 500.0, 1000.0) ┤▇▇▇▇▇▇▇▇▇▇▇▇ 327 - [1000.0, 1500.0) ┤▇▇▇▇▇▇▇▇▇▇ 266 - ns [1500.0, 2000.0) ┤▇▇▇▇▇▇▇▇ 214 - [2000.0, 2500.0) ┤▇▇▇▇▇▇▇▇ 213 - [2500.0, 3000.0) ┤▇▇▇▇▇ 146 - [3000.0, 3500.0) ┤▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 931 - └ ┘ - Counts -min: 8.842 ns (0.00% GC); mean: 1.972 μs (0.00% GC); median: 2.154 μs (0.00% GC); max: 3.364 μs (0.00% GC). +samples: 2393; evals/sample: 1000; memory estimate: 0 bytes; allocs estimate: 0 +ns + + (0.0 - 310.0 ] ███████▏214 + (310.0 - 610.0 ] ██████▍191 + (610.0 - 910.0 ] █████▊173 + (910.0 - 1220.0] █████▊174 + (1220.0 - 1520.0] █████▏155 + (1520.0 - 1830.0] ████▍133 + (1830.0 - 2130.0] ████119 + (2130.0 - 2430.0] ███▍100 + (2430.0 - 2740.0] ██▉86 + (2740.0 - 3040.0] ███▍102 + (3040.0 - 3350.0] ██████████████████████████████ 912 + (3350.0 - 3650.0] █30 + (3650.0 - 5870.0] ▎4 + + Counts + +min: 2.334 ns (0.00% GC); mean: 2.037 μs (0.00% GC); median: 2.236 μs (0.00% GC); max: 5.869 μs (0.00% GC). ``` This function gives a somewhat more Gaussian distribution of times, kindly supplied by Mason Protter: @@ -100,24 +121,28 @@ f() = sum((sin(i) for i in 1:round(Int, 1000 + 100*randn()))) ``` ``` -samples: 10000; evals/sample: 1; memory estimate: 0 bytes; allocs estimate: 0 - ┌ ┐ - [ 8000.0, 9000.0) ┤ 12 - [ 9000.0, 10000.0) ┤▇ 117 - [10000.0, 11000.0) ┤▇▇▇▇▇▇▇ 635 - [11000.0, 12000.0) ┤▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 1810 - [12000.0, 13000.0) ┤▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 2959 - [13000.0, 14000.0) ┤▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 2460 - ns [14000.0, 15000.0) ┤▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 1451 - [15000.0, 16000.0) ┤▇▇▇▇▇ 456 - [16000.0, 17000.0) ┤▇ 89 - [17000.0, 18000.0) ┤ 9 - [18000.0, 19000.0) ┤ 1 - [19000.0, 20000.0) ┤ 0 - [20000.0, 21000.0) ┤ 1 - └ ┘ - Counts -min: 8.109 μs (0.00% GC); mean: 12.865 μs (0.00% GC); median: 12.820 μs (0.00% GC); max: 20.459 μs (0.00% GC). +samples: 10000; evals/sample: 3; memory estimate: 0 bytes; allocs estimate: 0 +ns + + (7030.0 - 7480.0 ] ▏11 + (7480.0 - 7930.0 ] █▍128 + (7930.0 - 8380.0 ] ████████▏788 + (8380.0 - 8830.0 ] █████████████████████▏2044 + (8830.0 - 9280.0 ] ██████████████████████████████ 2916 + (9280.0 - 9730.0 ] ███████████████████████▉2309 + (9730.0 - 10180.0] ████████████▎1182 + (10180.0 - 10630.0] ████▎413 + (10630.0 - 11080.0] █▌140 + (11080.0 - 11530.0] ▌44 + (11530.0 - 11980.0] ▏6 + (11980.0 - 12430.0] ▏3 + (12430.0 - 12880.0] 0 + (12880.0 - 13330.0] ▏5 + (13330.0 - 18330.0] ▏11 + + Counts + +min: 7.028 μs (0.00% GC); mean: 9.184 μs (0.00% GC); median: 9.153 μs (0.00% GC); max: 18.333 μs (0.00% GC). ``` See also for another example of where looking at the whole histogram can be useful in benchmarking. @@ -125,3 +150,4 @@ See also . +# Thanks to @brenhinkeller for providing the initial plotting code there. -# Use the setting `BenchmarkHistograms.NBINS[] = 10` to change the number of histogram bins used. +# Use the setting `BenchmarkHistograms.NBINS` to change the number of histogram bins used, e.g. `BenchmarkHistograms.NBINS[] = 10` for 10 bins. + +# Likewise use the setting `BenchmarkHistograms.OUTLIER_QUANTILE` to tweak which values count as outliers and may be grouped into a single bin. +# For example, `BenchmarkHistograms.OUTLIER_QUANTILE[] = 0.99` counts any values past the 99 percentile as possible outliers. This value defaults to `0.999` and is disabled by setting it to `1.0`. # ## Example diff --git a/src/BenchmarkHistograms.jl b/src/BenchmarkHistograms.jl index dce7564..b0cd462 100644 --- a/src/BenchmarkHistograms.jl +++ b/src/BenchmarkHistograms.jl @@ -23,6 +23,14 @@ When `NBINS[] <= 0`, the number is chosen automatically by Sturge's rule (i.e. ` """ const NBINS = Ref(0) +""" + OUTLIER_QUANTILE = Ref(0.999) + +Controls which benchmarking times count as outliers and may be grouped into a single bin. +Set `OUTLIER_QUANTILE[] = 1.0` to avoid this behavior. +""" +const OUTLIER_QUANTILE = Ref(0.999) + struct BenchmarkHistogram trial::BenchmarkTools.Trial end @@ -52,7 +60,8 @@ function Base.show(io::IO, ::MIME"text/plain", bp::BenchmarkHistogram; nbins=NBI println(io, "samples: ", length(t), "; evals/sample: ", t.params.evals, "; memory estimate: ", memorystr, "; allocs estimate: ", allocsstr) if length(t) > 0 bin_arg = nbins <= 0 ? NamedTuple() : (; nbins=nbins) - simple_unicode_histogram(io, t.times; ylabel="ns", xlabel="Counts", bin_arg...) + simple_unicode_histogram(io, t.times; ylabel="ns", xlabel="Counts", + outlier_quantile=OUTLIER_QUANTILE[], bin_arg...) println(io) end print(io, "min: ", minstr, "; mean: ", meanstr, "; median: ", medstr, "; max: ", maxstr, ".") @@ -70,6 +79,6 @@ end include("vendor.jl") # The code to draw the histograms -include("plot_hist.jl") +include("simple_unicode_histogram.jl") end diff --git a/src/plot_hist.jl b/src/plot_hist.jl deleted file mode 100644 index 0876d19..0000000 --- a/src/plot_hist.jl +++ /dev/null @@ -1,60 +0,0 @@ -# Modified from https://github.com/JuliaCI/BenchmarkTools.jl/pull/180#issuecomment-711128281 -function simple_unicode_histogram(io::IO, x::AbstractArray; nbins::Integer=ceil(Int, log2(length(x))+1), plotwidth::Integer=30, showcounts::Bool=true, xlabel="", ylabel="") - # Find bounds. Our naive attempt is to use equal width - # bins from the minimum to the maximum. - l, M = extrema(x) - initial_dx = (M - l) / nbins - - # Now, we check: if we don't have some big outliers, we'd expect - # the 99.9 percentile, `Q`, to be within a few bins of the maximum. - # Here, we choose 2. If it is not, then we decide that indeed - # there are outliers. We will instead divide the range from - # the minimum to `Q` equally with `nbins-1` bins, and then reserve - # the last bin to hold everything greater than `Q`. - Q = quantile(x, 0.999) - truncate = M - Q > 2*initial_dx - - # our "upper bound" - u = truncate ? Q : M - - # Fill histogram - histcounts = fill(0, nbins) - dx = truncate ? (u - l) / (nbins - 1) : initial_dx - for xi in x - index = ceil(Int, (xi - l) / dx) - if 1 <= index <= nbins - histcounts[index] += 1 - else - histcounts[end] += 1 - end - end - - if truncate - binedges = [range(l,u,length=nbins); M] - else - binedges = range(l,u,length=nbins+1) - end - - # Print the histogram - digitsneeded = ceil(Int, -log10(u-l))+1 - blocks = [" ","▏","▎","▍","▌","▋","▊","▉","█","█"] - scale = plotwidth/maximum(histcounts) - lowerlabels = string.(round.(binedges[1:end-1], digits=digitsneeded+ceil(Int,log10(nbins)-1))) - upperlabels = string.(round.(binedges[2:end], digits=digitsneeded+ceil(Int,log10(nbins)-1))) - longestlower = maximum(length.(lowerlabels)) - longestupper = maximum(length.(upperlabels)) - !isempty(ylabel) && println(io, ylabel, "\n") - for i=1:nbins - nblocks = histcounts[i] * scale - blockstring = repeat("█", floor(Int, nblocks)) * blocks[ceil(Int,(nblocks - floor(nblocks))*8)+1] - print(io, " (", lowerlabels[i], " "^(longestlower - length(lowerlabels[i]))) - print(io, " - ", upperlabels[i], " "^(longestupper - length(upperlabels[i])), "] ") - printstyled(io, blockstring; color=:green) - if showcounts - print(io, histcounts[i]) - end - println(io) - end - isempty(xlabel) || println(io, "\n", " "^max(plotwidth÷2 + 6 - length(xlabel)÷2, 0), xlabel) - return nothing -end diff --git a/src/simple_unicode_histogram.jl b/src/simple_unicode_histogram.jl new file mode 100644 index 0000000..e57aa4d --- /dev/null +++ b/src/simple_unicode_histogram.jl @@ -0,0 +1,66 @@ +# Modified from https://github.com/JuliaCI/BenchmarkTools.jl/pull/180#issuecomment-711128281 by @brenhinkeller + +const BLOCKS = [" ","▏","▎","▍","▌","▋","▊","▉","█","█"] + +function simple_unicode_histogram(io::IO, x::AbstractArray; + nbins::Integer=ceil(Int, log2(length(x))+1), + plot_width::Integer=30, show_counts::Bool=true, + outlier_quantile = 0.999, + xlabel="", ylabel="") + # Find bounds. Our naive attempt is to use equal width + # bins from the minimum to the maximum. + l, M = extrema(x) + initial_dx = (M - l) / nbins + + # Now, we check: if we don't have some big outliers, we'd expect + # the 99.9 percentile, `Q`, to be within a few bins of the maximum. + # Here, we choose 2. If it is not, then we decide that indeed + # there are outliers. We will instead divide the range from + # the minimum to `Q` equally with `nbins-1` bins, and then reserve + # the last bin to hold everything greater than `Q`. + Q = quantile(x, outlier_quantile) + truncate = M - Q > 2*initial_dx + + # our "upper bound" + u = truncate ? Q : M + + # Fill histogram + hist_counts = fill(0, nbins) + dx = truncate ? (u - l) / (nbins - 1) : initial_dx + for xi in x + index = ceil(Int, (xi - l) / dx) + if 1 <= index <= nbins + hist_counts[index] += 1 + else + hist_counts[end] += 1 + end + end + + if truncate + bin_edges = [range(l,u,length=nbins); M] + else + bin_edges = range(l,u,length=nbins+1) + end + + # Print the histogram + d = ceil(Int, -log10(u-l))+1 + scale = plot_width/maximum(hist_counts) + lower_labels = string.(round.(bin_edges[1:end-1], digits=d+ceil(Int,log10(nbins)-1))) + upper_labels = string.(round.(bin_edges[2:end], digits=d+ceil(Int,log10(nbins)-1))) + longest_lower = maximum(length.(lower_labels)) + longest_upper = maximum(length.(upper_labels)) + !isempty(ylabel) && println(io, ylabel, "\n") + for i=1:nbins + nblocks = hist_counts[i] * scale + block_string = repeat("█", floor(Int, nblocks)) * BLOCKS[ceil(Int,(nblocks - floor(nblocks))*8)+1] + print(io, " (", lower_labels[i], " "^(longest_lower - length(lower_labels[i]))) + print(io, " - ", upper_labels[i], " "^(longest_upper - length(upper_labels[i])), "] ") + printstyled(io, block_string; color=:green) + if show_counts + print(io, hist_counts[i]) + end + println(io) + end + isempty(xlabel) || println(io, "\n", " "^max(plot_width ÷2 + 6 - length(xlabel)÷2, 0), xlabel) + return nothing +end diff --git a/test/runtests.jl b/test/runtests.jl index 309cdb7..6412908 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2,8 +2,9 @@ using BenchmarkHistograms using Test import BenchmarkTools +block_regex = Regex(string("(", join(BenchmarkHistograms.BLOCKS[2:end], "|"), string(")"))) -function counting_tests(nbins=nothing) +function counting_tests(nbins=nothing, outlier_quantile=nothing) bh = @benchmark 1+1 output = sprint(show, MIME"text/plain"(), bh) @@ -23,8 +24,9 @@ function counting_tests(nbins=nothing) # Summary stats @test n_matches(r"min") == n_matches(r"mean") == n_matches(r"median") == n_matches(r"max") == 1 @test n_matches(r"% GC") == 4 - # Corners of the plot - @test n_matches(r"┌") == n_matches(r"┐") == n_matches(r"└") == n_matches(r"┘") == 1 + + # Bars of the plot + @test n_matches(block_regex) > 1 return nothing end @@ -34,9 +36,6 @@ function empty_test() empty!(bh.trial.times) output = sprint(show, MIME"text/plain"(), bh) - # Don't want to test the exact string since the stats will - # fluctuate. So let's just test that it contains the right - # number of the right things, and assume they're arranged properly. n_matches = r -> length(collect(eachmatch(r, output))) @test n_matches(r"samples:") == 1 @@ -47,17 +46,21 @@ function empty_test() @test n_matches(r"Counts") == 0 @test n_matches(r"min") == n_matches(r"mean") == n_matches(r"median") == n_matches(r"max") == 1 @test n_matches(r"% GC") == 0 - @test n_matches(r"┌") == n_matches(r"┐") == n_matches(r"└") == n_matches(r"┘") == 0 + # Bars of the plot + @test n_matches(block_regex) == 0 return nothing end -function with_bins(f, nbins) - pre = BenchmarkHistograms.NBINS[] +function with_params(f, nbins, outlier_quantile) + pre_bins = BenchmarkHistograms.NBINS[] + pre_q = BenchmarkHistograms.OUTLIER_QUANTILE[] BenchmarkHistograms.NBINS[] = nbins + BenchmarkHistograms.OUTLIER_QUANTILE[] = outlier_quantile try - f(nbins) + f(nbins, outlier_quantile) finally - BenchmarkHistograms.NBINS[] = pre + BenchmarkHistograms.NBINS[] = pre_bins + BenchmarkHistograms.OUTLIER_QUANTILE[] = pre_q end return nothing end @@ -69,8 +72,11 @@ end @testset "Counting tests" begin counting_tests() - with_bins(counting_tests, 10) - with_bins(counting_tests, -1) + # we don't actually test that changing the parameters + # does something, but we at least test that we can + # change them to some different values without getting errors. + with_params(counting_tests, 10, 0.99) + with_params(counting_tests, -1, 1.0) empty_test() end end From aff3f2f05ae9109f45d43225335ac47e646daae3 Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Mon, 24 May 2021 00:18:38 +0200 Subject: [PATCH 4/8] refactor `simple_unicode_histogram` --- src/simple_unicode_histogram.jl | 44 ++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/src/simple_unicode_histogram.jl b/src/simple_unicode_histogram.jl index e57aa4d..78e16d5 100644 --- a/src/simple_unicode_histogram.jl +++ b/src/simple_unicode_histogram.jl @@ -2,11 +2,7 @@ const BLOCKS = [" ","▏","▎","▍","▌","▋","▊","▉","█","█"] -function simple_unicode_histogram(io::IO, x::AbstractArray; - nbins::Integer=ceil(Int, log2(length(x))+1), - plot_width::Integer=30, show_counts::Bool=true, - outlier_quantile = 0.999, - xlabel="", ylabel="") +function get_edges(x; nbins, outlier_quantile) # Find bounds. Our naive attempt is to use equal width # bins from the minimum to the maximum. l, M = extrema(x) @@ -24,9 +20,21 @@ function simple_unicode_histogram(io::IO, x::AbstractArray; # our "upper bound" u = truncate ? Q : M + if truncate + bin_edges = [range(l,u,length=nbins); M] + else + bin_edges = range(l,u,length=nbins+1) + end + return bin_edges, truncate +end + +function get_counts(x, bin_edges; nbins, truncate) + u = truncate ? bin_edges[end-1] : bin_edges[end] + l = bin_edges[1] + # Fill histogram hist_counts = fill(0, nbins) - dx = truncate ? (u - l) / (nbins - 1) : initial_dx + dx = truncate ? (u - l) / (nbins - 1) : (u - l) / nbins for xi in x index = ceil(Int, (xi - l) / dx) if 1 <= index <= nbins @@ -35,14 +43,26 @@ function simple_unicode_histogram(io::IO, x::AbstractArray; hist_counts[end] += 1 end end + return hist_counts +end - if truncate - bin_edges = [range(l,u,length=nbins); M] - else - bin_edges = range(l,u,length=nbins+1) - end +function simple_unicode_histogram(io::IO, x::AbstractVector; + nbins::Integer=ceil(Int, log2(length(x))+1), + plot_width::Integer=30, show_counts::Bool=true, + outlier_quantile = 0.999, + xlabel="", ylabel="") + + bin_edges, truncate = get_edges(x; nbins, outlier_quantile) + hist_counts = get_counts(x, bin_edges; nbins, truncate) + return simple_unicode_histogram(io, bin_edges, hist_counts; plot_width, show_counts, xlabel, ylabel, truncate) +end - # Print the histogram +function simple_unicode_histogram(io::IO, bin_edges::AbstractVector, hist_counts::AbstractVector; + plot_width::Integer=30, show_counts::Bool=true, + xlabel="", ylabel="", truncate=true) + nbins = length(bin_edges) - 1 + l = first(bin_edges) + u = truncate ? bin_edges[end-1] : bin_edges[end] d = ceil(Int, -log10(u-l))+1 scale = plot_width/maximum(hist_counts) lower_labels = string.(round.(bin_edges[1:end-1], digits=d+ceil(Int,log10(nbins)-1))) From b01918be2889624b0e9c6bf7263630394fcbdb61 Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Mon, 24 May 2021 00:20:18 +0200 Subject: [PATCH 5/8] add `comparison` (WIP) --- src/BenchmarkHistograms.jl | 5 +++++ src/comparison.jl | 29 +++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100644 src/comparison.jl diff --git a/src/BenchmarkHistograms.jl b/src/BenchmarkHistograms.jl index b0cd462..03fce78 100644 --- a/src/BenchmarkHistograms.jl +++ b/src/BenchmarkHistograms.jl @@ -15,6 +15,8 @@ end # Export our own `@benchmark` export @benchmark +export comparison + """ const NBINS = Ref(0) @@ -81,4 +83,7 @@ include("vendor.jl") # The code to draw the histograms include("simple_unicode_histogram.jl") +# Comparison plots +include("comparison.jl") + end diff --git a/src/comparison.jl b/src/comparison.jl new file mode 100644 index 0000000..49d42d8 --- /dev/null +++ b/src/comparison.jl @@ -0,0 +1,29 @@ +function comparison_histogram(io::IO, x::AbstractVector, y::AbstractVector; + nbins::Integer=ceil(Int, log2(min(length(x), length(y)))+1), + plot_width::Integer=30, show_counts::Bool=true, + outlier_quantile = 0.999, + xlabel="", ylabel="") + + bin_edges, truncate = get_edges([x; y]; nbins, outlier_quantile) + hist_counts_x = get_counts(x, bin_edges; nbins, truncate) + + simple_unicode_histogram(io, bin_edges, hist_counts_x; plot_width, show_counts, xlabel="", ylabel, truncate) + hist_counts_y = get_counts(y, bin_edges; nbins, truncate) + println(io) + simple_unicode_histogram(io, bin_edges, hist_counts_y; plot_width, show_counts, xlabel, ylabel, truncate) + return nothing +end + +comparison(bench1::BenchmarkHistogram, bench2::BenchmarkHistogram; kwargs...) = comparison(stdout, bench1, bench2; kwargs...) + +function comparison(io::IO, bench1::BenchmarkHistogram, bench2::BenchmarkHistogram; nbins::Integer=NBINS[], + plot_width::Integer=30, show_counts::Bool=true, + outlier_quantile = OUTLIER_QUANTILE[], + xlabel="Counts", ylabel="") + x = bench1.trial.times + y = bench2.trial.times + if nbins <= 0 + nbins = ceil(Int, log2(min(length(x), length(y)))+1) + end + return comparison_histogram(io, x, y; nbins, plot_width, show_counts, outlier_quantile, xlabel, ylabel) +end From e1cca729ce93b895c816b510046f1c08bd0e321c Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Mon, 24 May 2021 00:26:13 +0200 Subject: [PATCH 6/8] use Julia 1.0 compatible syntax --- src/simple_unicode_histogram.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/simple_unicode_histogram.jl b/src/simple_unicode_histogram.jl index e57aa4d..e2d3476 100644 --- a/src/simple_unicode_histogram.jl +++ b/src/simple_unicode_histogram.jl @@ -37,9 +37,9 @@ function simple_unicode_histogram(io::IO, x::AbstractArray; end if truncate - bin_edges = [range(l,u,length=nbins); M] + bin_edges = [range(l;stop=u,length=nbins); M] else - bin_edges = range(l,u,length=nbins+1) + bin_edges = range(l;stop=u,length=nbins+1) end # Print the histogram From adf88340dbca748ef692e81e2b1ec1675750e9cc Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Mon, 24 May 2021 00:46:25 +0200 Subject: [PATCH 7/8] use ugly pre-1.5 keyword argument syntax --- src/comparison.jl | 12 ++++++------ src/simple_unicode_histogram.jl | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/comparison.jl b/src/comparison.jl index 49d42d8..da1220e 100644 --- a/src/comparison.jl +++ b/src/comparison.jl @@ -4,13 +4,13 @@ function comparison_histogram(io::IO, x::AbstractVector, y::AbstractVector; outlier_quantile = 0.999, xlabel="", ylabel="") - bin_edges, truncate = get_edges([x; y]; nbins, outlier_quantile) - hist_counts_x = get_counts(x, bin_edges; nbins, truncate) + bin_edges, truncate = get_edges([x; y]; nbins=nbins, outlier_quantile=outlier_quantile) + hist_counts_x = get_counts(x, bin_edges; nbins=nbins, truncate=truncate) - simple_unicode_histogram(io, bin_edges, hist_counts_x; plot_width, show_counts, xlabel="", ylabel, truncate) - hist_counts_y = get_counts(y, bin_edges; nbins, truncate) + simple_unicode_histogram(io, bin_edges, hist_counts_x; plot_width=plot_width, show_counts=show_counts, xlabel="", ylabel=ylabel, truncate=truncate) + hist_counts_y = get_counts(y, bin_edges; nbins=nbins, truncate=truncate) println(io) - simple_unicode_histogram(io, bin_edges, hist_counts_y; plot_width, show_counts, xlabel, ylabel, truncate) + simple_unicode_histogram(io, bin_edges, hist_counts_y; plot_width=plot_width, show_counts=show_counts, xlabel=xlabel, ylabel=ylabel, truncate=truncate) return nothing end @@ -25,5 +25,5 @@ function comparison(io::IO, bench1::BenchmarkHistogram, bench2::BenchmarkHistogr if nbins <= 0 nbins = ceil(Int, log2(min(length(x), length(y)))+1) end - return comparison_histogram(io, x, y; nbins, plot_width, show_counts, outlier_quantile, xlabel, ylabel) + return comparison_histogram(io, x, y; nbins=nbins, plot_width=plot_width, show_counts=show_counts, outlier_quantile=outlier_quantile, xlabel=xlabel, ylabel=ylabel) end diff --git a/src/simple_unicode_histogram.jl b/src/simple_unicode_histogram.jl index 1e3c27c..64c80e1 100644 --- a/src/simple_unicode_histogram.jl +++ b/src/simple_unicode_histogram.jl @@ -52,9 +52,9 @@ function simple_unicode_histogram(io::IO, x::AbstractVector; outlier_quantile = 0.999, xlabel="", ylabel="") - bin_edges, truncate = get_edges(x; nbins, outlier_quantile) - hist_counts = get_counts(x, bin_edges; nbins, truncate) - return simple_unicode_histogram(io, bin_edges, hist_counts; plot_width, show_counts, xlabel, ylabel, truncate) + bin_edges, truncate = get_edges(x; nbins=nbins, outlier_quantile=outlier_quantile) + hist_counts = get_counts(x, bin_edges; nbins=nbins, truncate=truncate) + return simple_unicode_histogram(io, bin_edges, hist_counts; plot_width=plot_width, show_counts=show_counts, xlabel=xlabel, ylabel=ylabel, truncate=truncate) end function simple_unicode_histogram(io::IO, bin_edges::AbstractVector, hist_counts::AbstractVector; From 075e407ad6b5a4784e984a1b8d0bfae397b69c95 Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Mon, 24 May 2021 00:51:51 +0200 Subject: [PATCH 8/8] add export to tests --- test/runtests.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index 6412908..9bba374 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -67,7 +67,7 @@ end @testset "BenchmarkHistograms.jl" begin @testset "Exports" begin - @test symdiff(names(BenchmarkTools), names(BenchmarkHistograms)) == [:BenchmarkHistograms] + @test symdiff(names(BenchmarkTools), names(BenchmarkHistograms)) == [:BenchmarkHistograms, :comparison] end @testset "Counting tests" begin