-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdensity.py
More file actions
3435 lines (2892 loc) · 136 KB
/
Copy pathdensity.py
File metadata and controls
3435 lines (2892 loc) · 136 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# density.py - Corrected Version
from __future__ import annotations
"""
Module for calculating spectral density metrics for musical audio analysis.
Implements weight functions, density calculations, and combined metrics for
harmonic and inharmonic components.
Improvements:
- Expanded and standardized documentation
- Reinforced parameter validation
- More robust error handling
- Performance optimization in critical functions
- Consistent naming in English
"""
import numpy as np
import pandas as pd
from typing import Callable, Union, Optional, Dict, Tuple, List, Any, Literal
import logging
import warnings
logger = logging.getLogger(__name__)
# PHASE 3: Import constants
try:
from constants import (
SPARSITY_THRESHOLD_RELATIVE, SPECTRAL_CONCENTRATION_DEFAULT_PEAKS,
PERCEPTUAL_DENSITY_POWER_EXPONENT, PERCEPTUAL_DENSITY_OCCUPANCY_WEIGHT,
PERCEPTUAL_DENSITY_UNIFORMITY_WEIGHT, PERCEPTUAL_DENSITY_COMPLETENESS_WEIGHT,
PERCEPTUAL_DENSITY_LOG_SCALE_FACTOR,
HARMONIC_DETECTION_THRESHOLD_DB, HARMONIC_TOLERANCE_BASE,
HARMONIC_TOLERANCE_ADAPTIVE_FACTOR, HARMONIC_MAX_CHECK,
NUM_CRITICAL_BANDS, CRITICAL_BAND_MASKING_STRONG_THRESHOLD,
CRITICAL_BAND_MASKING_MODERATE_THRESHOLD, CRITICAL_BAND_MASKING_WEAK_THRESHOLD,
MASKING_WITHIN_BAND_OFFSET_DB, MASKING_ADJACENT_BAND_OFFSET_DB,
MASKING_ADJACENT_BAND_SLOPE_DB, MASKING_NEARBY_BAND_OFFSET_DB,
MASKING_NEARBY_BAND_SLOPE_DB, MASKING_FAR_BAND_OFFSET_DB,
MASKING_FAR_BAND_SLOPE_DB, MASKING_ABSOLUTE_THRESHOLD_DB,
# REMOVIDO: Equal loudness constants (não mais usadas - densidade física)
# EQUAL_LOUDNESS_LOW_WEIGHT_MIN, EQUAL_LOUDNESS_HIGH_WEIGHT_MAX,
# EQUAL_LOUDNESS_HIGH_WEIGHT_DECAY, EQUAL_LOUDNESS_HIGH_FREQ_RANGE,
FREQ_MID_LOW_HZ, FREQ_MID_HIGH_HZ, # Ainda usadas em outras funções
SMOOTHING_WINDOW_PERCENTAGE, SMOOTHING_MIN_WINDOW_LENGTH,
SMOOTHING_POLYORDER, SMOOTHING_NOISE_FLOOR_PERCENTILE, SMOOTHING_NOISE_FLOOR_MULTIPLIER,
EPSILON, EPSILON_POWER, EPSILON_AMPLITUDE, EPSILON_FREQUENCY,
HARMONIC_COMPLETENESS_WEIGHT_BASE
)
except ImportError:
# Fallback if constants.py not available
SPARSITY_THRESHOLD_RELATIVE = 0.01
SPECTRAL_CONCENTRATION_DEFAULT_PEAKS = 5
PERCEPTUAL_DENSITY_POWER_EXPONENT = 0.3
PERCEPTUAL_DENSITY_OCCUPANCY_WEIGHT = 0.5
PERCEPTUAL_DENSITY_UNIFORMITY_WEIGHT = 0.3
PERCEPTUAL_DENSITY_COMPLETENESS_WEIGHT = 0.2
PERCEPTUAL_DENSITY_LOG_SCALE_FACTOR = 3.0
HARMONIC_DETECTION_THRESHOLD_DB = -60.0
HARMONIC_TOLERANCE_BASE = 0.1
HARMONIC_TOLERANCE_ADAPTIVE_FACTOR = 0.1
HARMONIC_MAX_CHECK = 100
NUM_CRITICAL_BANDS = 24
CRITICAL_BAND_MASKING_STRONG_THRESHOLD = 0.5
CRITICAL_BAND_MASKING_MODERATE_THRESHOLD = 1.0
CRITICAL_BAND_MASKING_WEAK_THRESHOLD = 2.0
MASKING_WITHIN_BAND_OFFSET_DB = -10.0
MASKING_ADJACENT_BAND_OFFSET_DB = -15.0
MASKING_ADJACENT_BAND_SLOPE_DB = -10.0
MASKING_NEARBY_BAND_OFFSET_DB = -20.0
MASKING_NEARBY_BAND_SLOPE_DB = -5.0
MASKING_FAR_BAND_OFFSET_DB = -30.0
MASKING_FAR_BAND_SLOPE_DB = -2.0
MASKING_ABSOLUTE_THRESHOLD_DB = -80.0
# REMOVIDO: Equal loudness constants (não mais usadas - densidade física)
# EQUAL_LOUDNESS_LOW_WEIGHT_MIN = 0.5
# EQUAL_LOUDNESS_HIGH_WEIGHT_MAX = 1.0
# EQUAL_LOUDNESS_HIGH_WEIGHT_DECAY = 0.5
# EQUAL_LOUDNESS_HIGH_FREQ_RANGE = 15000.0
# FREQ_MID_LOW_HZ = 1000.0
# FREQ_MID_HIGH_HZ = FREQ_MAX_HZ / 4.0
SMOOTHING_WINDOW_PERCENTAGE = 0.05
SMOOTHING_MIN_WINDOW_LENGTH = 11
SMOOTHING_POLYORDER = 3
SMOOTHING_NOISE_FLOOR_PERCENTILE = 15.0
SMOOTHING_NOISE_FLOOR_MULTIPLIER = 1.5
EPSILON = 1e-12
EPSILON_POWER = 1e-12
EPSILON_AMPLITUDE = 1e-20
EPSILON_FREQUENCY = 1e-6
HARMONIC_COMPLETENESS_WEIGHT_BASE = 1.0
# ======================================================================
# PHASE 1: Spectral Smoothing Functions
# ======================================================================
def apply_spectral_smoothing(
spectrum_magnitude: np.ndarray,
method: str = "savitzky_golay",
window_length: Optional[int] = None,
polyorder: int = 3,
noise_floor_percentile: float = 15.0,
noise_floor_multiplier: float = 1.5
) -> np.ndarray:
"""
Apply spectral smoothing to reduce narrow high-frequency noise peaks and noise.
Spectral-smoothing helper.
This function applies smoothing to the magnitude spectrum before
temporal aggregation to reduce isolated narrow peaks that contradict
expected harmonic spectrum behavior.
Args:
spectrum_magnitude: 2D array of magnitude spectrum (freq x time)
method: Smoothing method ('savitzky_golay' or 'moving_average')
window_length: Smoothing window length (auto-calculated if None)
polyorder: Polynomial order for Savitzky-Golay (must be < window_length)
noise_floor_percentile: Percentile for noise floor estimation
noise_floor_multiplier: Multiplier for noise floor threshold
Returns:
Smoothed magnitude spectrum (same shape as input)
"""
if spectrum_magnitude.size == 0:
return spectrum_magnitude
spectrum = np.asarray(spectrum_magnitude, dtype=float)
# Handle 1D case (single time frame)
if spectrum.ndim == 1:
spectrum = spectrum[:, np.newaxis]
was_1d = True
else:
was_1d = False
n_freq, n_time = spectrum.shape
# Auto-calculate window length if not provided
if window_length is None:
# Use 5% of spectrum length, minimum 11, must be odd
window_length = max(11, int(n_freq * 0.05))
if window_length % 2 == 0:
window_length += 1
window_length = min(window_length, n_freq - 1)
# Ensure window_length is odd and valid
if window_length % 2 == 0:
window_length += 1
window_length = max(3, min(window_length, n_freq - 1))
# PHASE 3: Use constant instead of magic number
# Ensure polyorder < window_length
polyorder = min(SMOOTHING_POLYORDER, window_length - 1)
smoothed = np.zeros_like(spectrum)
try:
if method == "savitzky_golay":
# Use Savitzky-Golay filter (preferred for preserving peaks)
try:
from scipy.signal import savgol_filter
for t in range(n_time):
# FIX: Apply double-pass smoothing to reduce artifacts
# First pass: standard Savitzky-Golay
smoothed_pass1 = savgol_filter(
spectrum[:, t],
window_length=window_length,
polyorder=polyorder,
mode='nearest' # Handle boundaries
)
# Second pass: lighter smoothing to remove residual artifacts
# Use smaller window for second pass (half size, must be odd)
window_length_2 = max(5, (window_length // 2) | 1) # Ensure odd
polyorder_2 = min(2, window_length_2 - 1)
smoothed[:, t] = savgol_filter(
smoothed_pass1,
window_length=window_length_2,
polyorder=polyorder_2,
mode='nearest'
)
logger.debug(
f"Applied Savitzky-Golay smoothing: window={window_length}, "
f"polyorder={polyorder}"
)
except ImportError:
logger.warning("scipy not available, falling back to moving average")
method = "moving_average"
if method == "moving_average":
# Use moving average (fallback if scipy not available)
try:
from scipy.ndimage import uniform_filter1d
for t in range(n_time):
smoothed[:, t] = uniform_filter1d(
spectrum[:, t],
size=window_length,
mode='nearest'
)
logger.debug(f"Applied moving average smoothing: window={window_length}")
except ImportError:
# Pure NumPy implementation if scipy not available
logger.warning("scipy.ndimage not available, using pure NumPy moving average")
for t in range(n_time):
# Simple moving average using convolution
kernel = np.ones(window_length) / window_length
smoothed[:, t] = np.convolve(spectrum[:, t], kernel, mode='same')
# PHASE 3: Use constants instead of magic numbers (if not provided)
# PHASE 3: Use constants if not provided
# Noise floor removal
if noise_floor_percentile is None or noise_floor_percentile <= 0:
noise_floor_percentile = SMOOTHING_NOISE_FLOOR_PERCENTILE
if noise_floor_multiplier is None or noise_floor_multiplier <= 0:
noise_floor_multiplier = SMOOTHING_NOISE_FLOOR_MULTIPLIER
if noise_floor_percentile > 0:
# FIX: Improved noise floor removal with adaptive threshold
# This prevents removal of valid components at specific frequencies
# Divide spectrum into bands and estimate noise floor per band
n_bands = min(10, n_freq // 100) # Adaptive number of bands
if n_bands > 1:
band_size = n_freq // n_bands
noise_floors = []
for b in range(n_bands):
start_idx = b * band_size
end_idx = (b + 1) * band_size if b < n_bands - 1 else n_freq
band_data = smoothed[start_idx:end_idx, :]
if band_data.size > 0:
band_noise = np.percentile(band_data, noise_floor_percentile)
noise_floors.append(band_noise)
if len(noise_floors) > 0:
# Use median of band noise floors (more robust than global)
global_noise_floor = np.median(noise_floors)
else:
global_noise_floor = np.percentile(smoothed, noise_floor_percentile)
else:
# Fallback to global estimation
global_noise_floor = np.percentile(smoothed, noise_floor_percentile)
# FIX: Use adaptive threshold with smooth rolloff instead of hard cutoff
noise_threshold = global_noise_floor * noise_floor_multiplier
# FIX: Apply smooth rolloff instead of hard cutoff to prevent artifacts
# Use sigmoid-like function for smooth transition
# Mathematical verification (reference):
# Avoid division by zero: when noise_threshold ≈ 0, use direct clipping
excess = smoothed - noise_threshold
if noise_threshold > 1e-10: # Avoid division by zero
# Smooth rolloff: keep everything above threshold, gradual reduction below
rolloff_factor = np.where(
excess > 0,
1.0, # Above threshold: keep
np.maximum(0.0, 1.0 + excess / (noise_threshold * 0.5)) # Below: gradual reduction
)
smoothed = smoothed * rolloff_factor
else:
# If noise threshold is too small, just clip negative values
smoothed = np.maximum(smoothed, 0.0)
logger.debug(
f"Noise floor removal: percentile={noise_floor_percentile}%, "
f"threshold={noise_threshold:.6e}, bands={n_bands}"
)
# Restore original shape
if was_1d:
smoothed = smoothed[:, 0]
return smoothed
except Exception as e:
logger.warning(f"Spectral smoothing failed: {e}, returning original spectrum")
return spectrum_magnitude
def estimate_noise_floor(
psd: np.ndarray,
percentile: float = 15.0
) -> float:
"""
Estimate noise floor from PSD using percentile method.
Noise-floor estimation helper.
Args:
psd: Power spectral density array
percentile: Percentile to use for noise floor estimation
Returns:
Estimated noise floor value
"""
if psd.size == 0:
return 0.0
psd_flat = np.asarray(psd).flatten()
psd_positive = psd_flat[psd_flat > 0]
if len(psd_positive) == 0:
return 0.0
noise_floor = np.percentile(psd_positive, percentile)
return float(noise_floor)
# ----------------------------------------------------------------------
# Spectral-Density metrics (restaurado)
# ----------------------------------------------------------------------
class SpectralDensityMetrics:
"""
Conjunto de métricas espectrais clássicas.
Referências:
* Krimphoff et al., 1994 – sparsity / concentration
* Peeters et al., 2011 – timbre toolbox
* Zwicker & Fastl, 1999 – densidade perceptual por bandas Bark
"""
# ---------- 1) Sparsity (0 = denso ; 1 = esparso)
@staticmethod
def spectral_sparsity(amplitudes: np.ndarray,
frequencies: Optional[np.ndarray] = None) -> float:
"""
Mede quão 'esparso' é o espectro. Valores altos indicam poucos bins
acima de um limiar relativo; valores baixos indicam ocupação densa.
"""
if amplitudes.size == 0:
return 1.0
# Normalização por pico para invariância a ganho
amps = amplitudes.astype(float)
amax = float(np.max(amps)) if amps.size else 0.0
if amax > 0.0:
amps = amps / amax
# PHASE 3: Use constant instead of magic number
# Limiar relativo (~ -40 dB)
threshold = SPARSITY_THRESHOLD_RELATIVE
significant = int(np.sum(amps > threshold))
if frequencies is None or frequencies.size == 0:
return float(np.clip(1.0 - significant / max(amps.size, 1), 0.0, 1.0))
# Se houver frequências, corrige a expectativa pelo espaçamento efetivo
f = frequencies.astype(float)
w = amps
f_mean = float(np.average(f, weights=w)) if np.sum(w) > 0 else float(np.mean(f))
f_std = float(np.sqrt(np.average((f - f_mean) ** 2, weights=w))) if np.sum(w) > 0 else float(np.std(f))
bw_eff = 4.0 * f_std
bw_nom = float(f[-1] - f[0]) if f.size > 1 else 0.0
expected = (bw_eff / (bw_nom / f.size)) if (bw_nom > 0 and f.size > 0) else float(amps.size)
return float(np.clip(1.0 - significant / max(expected, 1.0), 0.0, 1.0))
# ---------- 2) Concentration (0 = difuso ; 1 = concentrado)
@staticmethod
def spectral_concentration(amplitudes: np.ndarray, n_peaks: int = SPECTRAL_CONCENTRATION_DEFAULT_PEAKS) -> float:
"""
Fração de energia nos n picos principais (com pequena correção por dimensão).
"""
if amplitudes.size == 0:
return 0.0
a = amplitudes.astype(float)
if not np.isfinite(a).any() or np.sum(a) <= 0:
return 0.0
# Ordenar por amplitude/energia
sorted_amps = np.sort(a)[::-1]
peak_e = float(np.sum(sorted_amps[:max(1, n_peaks)]))
total_e = float(np.sum(sorted_amps))
conc_raw = peak_e / total_e if total_e > 0 else 0.0
# Penalização suave por dimensionalidade (evita triviais com poucos bins)
if a.size > n_peaks:
conc_raw *= (1.0 - n_peaks / float(a.size))
return float(np.clip(conc_raw, 0.0, 1.0))
# ---------- 3) Physical Spectral Density (PSD-integrated bandwidth occupancy)
@staticmethod
def physical_spectral_density(amplitudes: np.ndarray,
frequencies: np.ndarray,
bin_width_hz: Optional[float] = None) -> float: # noqa: ARG001
"""
Computes effective partial density as ``N_eff / N``.
This is the Hill diversity index with q = 2 (inverse Herfindahl),
normalized by component count N (Hill, 1973; Jost, 2006).
It quantifies the effective number of active partials relative to
the observed component count.
This diverges conceptually from "classical" spectral-density framings
associated with Krimphoff et al. (1994) and Peeters et al. (2011),
which the previous naming could suggest.
See: docs/CANONICAL_PIPELINE_AND_EXPORT_SEMANTICS.md
("Naming caveat: effective_partial_density vs. classical density").
"""
if amplitudes is None or amplitudes.size == 0:
return 0.0
if bin_width_hz is not None:
warnings.warn(
"'bin_width_hz' is deprecated and ignored in physical_spectral_density; "
"it will be removed in a 4.x release.",
DeprecationWarning,
stacklevel=2,
)
amp = np.asarray(amplitudes, dtype=float)
amp = amp[np.isfinite(amp) & (amp > 0.0)]
if amp.size == 0:
return 0.0
power = np.square(amp)
total_power = float(np.sum(power))
if total_power <= 0.0:
return 0.0
# Hill q=2 = 1 / Σ p_i² ; equivalently (Σ p)^2 / Σ p^2 (numerically safer).
n_eff = float((total_power ** 2) / float(np.sum(power * power)))
n_components = float(amp.size)
score = n_eff / n_components if n_components > 0 else 0.0
return float(np.clip(score, 0.0, 1.0))
# ---------- 4) Perceptual Spectral Density (Bark scale with justified parameters)
@staticmethod
def perceptual_spectral_density(amplitudes: np.ndarray,
frequencies: np.ndarray) -> float:
"""
FIX 2 (re-attached): this method was previously orphaned inside the
module-level wrapper for `physical_spectral_density` (after its
`return`), so `hasattr(SpectralDensityMetrics, "perceptual_spectral_density")`
was False. Re-anchored as a real `@staticmethod` of the class.
Mathematical foundation (Zwicker & Terhardt, 1980):
B(f) = 13*arctan(0.00076*f) + 3.5*arctan((f/7500)**2)
Combines occupancy (fraction of active Bark bands) with the entropy
of the energy distribution across the active bands.
"""
if amplitudes.size == 0 or frequencies.size == 0:
return 0.0
f = np.maximum(frequencies.astype(float), 1.0)
amp = amplitudes.astype(float)
power = np.square(amp)
bark = 13.0 * np.arctan(0.00076 * f) + 3.5 * np.arctan((f / 7500.0) ** 2)
bmin, bmax = int(np.floor(bark.min())), int(np.ceil(bark.max()))
bmin = max(0, bmin)
bmax = min(24, bmax)
band_energies = np.zeros(bmax - bmin + 1)
for i, b in enumerate(range(bmin, bmax + 1)):
band_center = b + 0.5
distances = np.abs(bark - band_center)
weights = np.maximum(0.0, 1.0 - distances)
band_energies[i] = np.sum(power * weights)
active_bands = int(np.sum(band_energies > 0))
total_bands = len(band_energies)
occupancy = active_bands / total_bands if total_bands > 0 else 0.0
total_energy = float(np.sum(band_energies))
if total_energy > 0:
band_fractions = band_energies[band_energies > 0] / total_energy
entropy = -np.sum(band_fractions * np.log2(band_fractions + 1e-10))
max_entropy = np.log2(len(band_fractions)) if len(band_fractions) > 0 else 1.0
uniformity = entropy / max_entropy if max_entropy > 0 else 0.0
else:
uniformity = 0.0
# Unconstrained design choice: occupancy/uniformity blend has no direct literature fit yet.
# Weights (0.6, 0.4) should be treated as sensitivity-analysis candidates.
density = 0.6 * occupancy + 0.4 * uniformity
return float(np.clip(density, 0.0, 1.0))
def physical_spectral_density(
amplitudes: np.ndarray,
frequencies: np.ndarray,
bin_width_hz: Optional[float] = None
) -> float:
"""
Module-level wrapper for physical_spectral_density.
Keeps backward compatibility with imports like:
`from density import physical_spectral_density`.
"""
return SpectralDensityMetrics.physical_spectral_density(
amplitudes=amplitudes,
frequencies=frequencies,
bin_width_hz=bin_width_hz
)
def perceptual_spectral_density(
amplitudes: np.ndarray,
frequencies: np.ndarray,
) -> float:
"""Module-level wrapper for perceptual_spectral_density (FIX 2)."""
return SpectralDensityMetrics.perceptual_spectral_density(
amplitudes=amplitudes,
frequencies=frequencies,
)
def calculate_harmonic_density(
harmonic_amplitudes: np.ndarray,
threshold_db: float = -60.0,
fundamental_freq: float | None = None,
sr: float | None = None,
include_amp_factor: bool = True,
amp_weight: float = 0.20,
max_expected_harmonics: int | None = None
) -> float:
if harmonic_amplitudes.size == 0:
return 0.0
# 1) máximo teórico dependente de f0
if max_expected_harmonics is None and fundamental_freq and fundamental_freq > 0:
nyq = (sr/2.0) if sr else 20000.0
max_expected_harmonics = max(1, int(nyq // fundamental_freq))
max_expected_harmonics = max_expected_harmonics or 50 # fallback
# 2) contagem acima do threshold (em dB)
amps_db = 20*np.log10(np.maximum(harmonic_amplitudes, 1e-12))
significant = amps_db > threshold_db
density_count = significant.sum() / max_expected_harmonics
# 3) fator de amplitude (opcional e fraco)
if include_amp_factor:
avg_amp = np.mean(harmonic_amplitudes[significant]) if significant.any() else 0.0
amp_factor = np.tanh(avg_amp)
density = (1.0-amp_weight)*density_count + amp_weight*amp_factor
else:
density = density_count
return float(np.clip(density, 0.0, 1.0))
def calculate_inharmonic_density(
inharmonic_amplitudes: np.ndarray,
threshold_db: float = -60.0,
max_expected_partials: int = 50 # CORRECTED: Parameterized
) -> float:
"""
Same as harmonic density, but for inharmonic components.
"""
return calculate_harmonic_density(inharmonic_amplitudes, threshold_db=threshold_db, max_expected_harmonics=max_expected_partials)
def compute_spectral_entropy(power: np.ndarray) -> float:
"""
Calcula a entropia espectral normalizada (Shannon entropy).
Args:
power: vetor de potências espectrais (amplitude^2 ou magnitude em dB convertido)
Returns:
Entropia espectral normalizada (0 = máximo foco, 1 = máxima dispersão)
"""
if len(power) == 0:
logger.warning("Array de potências vazio para entropia")
return 0.0
# Garantir que temos potências (valores positivos)
power = np.abs(power)
# Remover zeros e valores muito pequenos
power = power[power > 1e-12]
if len(power) == 0:
logger.warning("Todas as potências são zero ou muito pequenas")
return 0.0
# Calcular soma total
total_power = np.sum(power)
if total_power <= 0:
logger.warning("Potência total <= 0")
return 0.0
# Normalizar para distribuição de probabilidade
p = power / total_power
# Calcular entropia de Shannon
entropy = -np.sum(p * np.log2(p))
# Normalizar pela entropia máxima (distribuição uniforme)
max_entropy = np.log2(len(power))
if max_entropy > 0:
normalized_entropy = entropy / max_entropy
else:
normalized_entropy = 0.0
# Garantir intervalo [0, 1]
normalized_entropy = np.clip(normalized_entropy, 0.0, 1.0)
logger.debug(f"Entropia espectral: {normalized_entropy:.4f} (entropia: {entropy:.4f}, max: {max_entropy:.4f})")
return normalized_entropy
def _critical_band_masking(
masker_freq: float,
masker_level_db: float,
probe_freq: float,
probe_level_db: float
) -> float:
"""
Calculate masking threshold using Parncutt (1989) model.
Critical-band analysis - masking model.
Args:
masker_freq: Frequency of masking tone (Hz)
masker_level_db: Level of masking tone (dB)
probe_freq: Frequency of probe tone (Hz)
probe_level_db: Level of probe tone (dB)
Returns:
Masking threshold in dB (probe is masked if probe_level < threshold)
"""
# Convert to Bark scale (using existing function)
masker_bark = _hz_to_bark(np.array([masker_freq]))[0]
probe_bark = _hz_to_bark(np.array([probe_freq]))[0]
# Bark distance
bark_distance = abs(probe_bark - masker_bark)
# Parncutt (1989) masking model
# Threshold increases with masker level and decreases with distance
# PHASE 3: Use constants instead of magic numbers
if bark_distance < CRITICAL_BAND_MASKING_STRONG_THRESHOLD:
# Within same critical band: strong masking
threshold_db = masker_level_db + MASKING_WITHIN_BAND_OFFSET_DB
elif bark_distance < CRITICAL_BAND_MASKING_MODERATE_THRESHOLD:
# Adjacent critical band: moderate masking
threshold_db = masker_level_db + MASKING_ADJACENT_BAND_OFFSET_DB + MASKING_ADJACENT_BAND_SLOPE_DB * bark_distance
elif bark_distance < CRITICAL_BAND_MASKING_WEAK_THRESHOLD:
# Nearby critical bands: weak masking
threshold_db = masker_level_db + MASKING_NEARBY_BAND_OFFSET_DB + MASKING_NEARBY_BAND_SLOPE_DB * bark_distance
else:
# Far critical bands: minimal masking
threshold_db = masker_level_db + MASKING_FAR_BAND_OFFSET_DB + MASKING_FAR_BAND_SLOPE_DB * bark_distance
# Ensure threshold is reasonable (not below absolute threshold)
threshold_db = max(threshold_db, MASKING_ABSOLUTE_THRESHOLD_DB)
return threshold_db
def estimate_noise_floor_by_critical_bands(
frequencies_hz: np.ndarray,
magnitudes_db: np.ndarray,
noise_floor_percentile: float = 5.0,
noise_floor_multiplier: float = 1.5,
noise_floor_margin_db: Optional[float] = None,
) -> np.ndarray:
"""
Estimate noise floor per frequency band using percentile method.
PHASE 5: Physical-Acoustic Model - Uses Hz-based frequency bands (not Bark)
This function estimates noise floor separately for each frequency band (Hz scale),
providing more accurate noise estimation that accounts for frequency-dependent
characteristics of noise and signal.
CHANGED: Now uses physical frequency bands (Hz) instead of perceptual critical bands (Bark)
to maintain physical-acoustic consistency.
Args:
frequencies_hz: Array of frequencies in Hz
magnitudes_db: Array of magnitudes in dB
noise_floor_percentile: Percentile to use for noise floor estimation (default 5.0)
noise_floor_multiplier: Linear-domain factor applied to the noise floor.
FIX 3 — interpreted as a *linear amplitude factor* and converted to
an additive dB margin via ``20 * log10(multiplier)`` so the threshold
actually moves up, not down. (Multiplying a negative dB value by 1.5
yields a *lower*, more permissive threshold, which was the bug.)
noise_floor_margin_db: Optional explicit additive margin in dB. When
provided it takes precedence over ``noise_floor_multiplier``.
Returns:
Array of noise floor thresholds in dB (one per frequency)
References:
- Moore, B. C. J. (2012). An Introduction to the Psychology of Hearing (6th ed.)
- Zwicker, E., & Fastl, H. (1999). Psychoacoustics: Facts and Models (2nd ed.)
"""
if frequencies_hz.size == 0 or magnitudes_db.size == 0:
return np.array([])
# PHYSICAL MODEL: Use frequency bands in Hz (not Bark)
# Define frequency bands based on physical frequency ranges
# These bands cover the audible spectrum (20-20000 Hz) with logarithmic spacing
frequency_bands_hz = [
(20.0, 200.0), # Sub-bass / Bass
(200.0, FREQ_MID_LOW_HZ), # Low-mid
(FREQ_MID_LOW_HZ, FREQ_MID_HIGH_HZ), # Mid-high
(FREQ_MID_HIGH_HZ, 20000.0) # High
]
# Allocate frequencies to bands
band_indices = np.zeros(len(frequencies_hz), dtype=int)
for i, (f_low, f_high) in enumerate(frequency_bands_hz):
mask = (frequencies_hz >= f_low) & (frequencies_hz < f_high)
band_indices[mask] = i
# Handle frequencies above highest band
band_indices[frequencies_hz >= frequency_bands_hz[-1][1]] = len(frequency_bands_hz) - 1
# Estimate noise floor per frequency band
num_bands = len(frequency_bands_hz)
noise_floors_per_band = np.full(num_bands, MASKING_ABSOLUTE_THRESHOLD_DB)
# FIX 3 — additive dB margin instead of dB * multiplier.
# If `noise_floor_margin_db` is given, use it directly. Otherwise convert
# `noise_floor_multiplier` (interpreted as a linear amplitude factor) to dB.
if noise_floor_margin_db is not None and np.isfinite(noise_floor_margin_db):
margin_db = float(noise_floor_margin_db)
else:
try:
mult = float(noise_floor_multiplier)
except (TypeError, ValueError):
mult = 1.5
if not np.isfinite(mult) or mult <= 0.0:
mult = 1.5
margin_db = 20.0 * float(np.log10(mult))
for band_idx in range(num_bands):
band_mask = (band_indices == band_idx)
if np.sum(band_mask) > 0:
band_magnitudes = magnitudes_db[band_mask]
band_noise_floor_db = float(np.percentile(band_magnitudes, noise_floor_percentile))
noise_floors_per_band[band_idx] = max(
band_noise_floor_db + margin_db,
MASKING_ABSOLUTE_THRESHOLD_DB,
)
# Map noise floor back to each frequency based on its frequency band
# SMOOTHING: Add smooth transitions at band boundaries to avoid discontinuities
# This prevents systematic peaks at boundaries (200 Hz, 1000 Hz, mid-high boundary)
noise_floors = noise_floors_per_band[band_indices].astype(float)
# Add smooth transitions at boundaries (±20% of band width, minimum 50 Hz)
# This prevents abrupt changes that cause systematic peaks
# Increased from 10% to 20% to better handle notes near boundaries (e.g., A#3 at 233 Hz near 200 Hz boundary)
transition_width_factor = 0.2 # 20% of band width for transition
min_transition_width = 50.0 # Minimum 50 Hz transition to ensure smoothness
for i in range(len(frequency_bands_hz) - 1):
boundary = frequency_bands_hz[i][1] # Upper boundary of band i
band_width = boundary - frequency_bands_hz[i][0]
transition_width = max(band_width * transition_width_factor, min_transition_width)
# Find frequencies near boundary
near_boundary_mask = (frequencies_hz >= boundary - transition_width) & (frequencies_hz <= boundary + transition_width)
if np.sum(near_boundary_mask) > 0:
# Interpolate between adjacent band noise floors
noise_floor_low = noise_floors_per_band[i]
noise_floor_high = noise_floors_per_band[i + 1]
# Linear interpolation based on distance from boundary
distances = frequencies_hz[near_boundary_mask] - boundary
normalized_distances = distances / transition_width # -1 to +1
weights = 0.5 * (1.0 - normalized_distances) # 1.0 at boundary-transition_width, 0.0 at boundary+transition_width
# Smooth interpolation
noise_floors[near_boundary_mask] = (
weights * noise_floor_low + (1.0 - weights) * noise_floor_high
)
return noise_floors
def apply_spectral_masking_filter(
frequencies_hz: np.ndarray,
magnitudes_db: np.ndarray,
amplitudes: np.ndarray,
mask_components: bool = True
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
"""
Apply spectral masking filter to remove masked components.
Optional advanced psychoacoustic module: not part of the physical effective-density /
fatness pipeline (masking estimates perceptual audibility, not spectral component richness).
PHASE 4: Enhanced Spectral Masking - Parncutt (1989) Model
This function applies psychoacoustic masking to identify and optionally remove
components that are masked by stronger components in nearby critical bands.
Args:
frequencies_hz: Array of frequencies in Hz
magnitudes_db: Array of magnitudes in dB
amplitudes: Array of amplitudes (linear)
mask_components: If True, remove masked components; if False, only mark them
Returns:
Tuple of (filtered_frequencies, filtered_magnitudes_db, filtered_amplitudes, mask)
where mask indicates which components are audible (not masked)
References:
- Parncutt, R. (1989). Harmony: A Psychoacoustical Approach.
"""
if frequencies_hz.size == 0:
return np.array([]), np.array([]), np.array([]), np.array([], dtype=bool)
n_components = len(frequencies_hz)
is_audible = np.ones(n_components, dtype=bool)
# Sort by magnitude (descending) to process strongest components first
sort_indices = np.argsort(magnitudes_db)[::-1]
# For each component, check if it's masked by stronger components
for i, idx in enumerate(sort_indices):
probe_freq = frequencies_hz[idx]
probe_level_db = magnitudes_db[idx]
# Check masking by all stronger components (already processed)
is_masked = False
for j in range(i): # Only check components processed before (stronger)
masker_idx = sort_indices[j]
if not is_audible[masker_idx]: # Skip if masker was already masked
continue
masker_freq = frequencies_hz[masker_idx]
masker_level_db = magnitudes_db[masker_idx]
# Calculate masking threshold
masking_threshold = _critical_band_masking(
masker_freq, masker_level_db,
probe_freq, probe_level_db
)
# If probe level is below masking threshold, it's masked
if probe_level_db < masking_threshold:
is_masked = True
break
# Mark as masked if below threshold
if is_masked:
is_audible[idx] = False
# Apply filter if requested
if mask_components:
filtered_freqs = frequencies_hz[is_audible]
filtered_mags_db = magnitudes_db[is_audible]
filtered_amps = amplitudes[is_audible]
else:
filtered_freqs = frequencies_hz
filtered_mags_db = magnitudes_db
filtered_amps = amplitudes
return filtered_freqs, filtered_mags_db, filtered_amps, is_audible
def validate_spectral_density_metric(
calculated_value: float,
frequencies_hz: np.ndarray,
amplitudes: np.ndarray,
expected_range: Optional[Tuple[float, float]] = None,
reference_value: Optional[float] = None,
tolerance: float = 0.2
) -> Dict[str, Union[bool, float, str]]:
"""
Validate Spectral Density Metric against ground truth or expected values.
PHASE 4: Ground Truth Validation
This function validates the calculated Spectral Density Metric against:
1. Expected range (if provided)
2. Reference value (if provided, e.g., from known test signals)
3. Physical constraints (energy conservation, positive values)
Args:
calculated_value: Calculated Spectral Density Metric value
frequencies_hz: Array of frequencies used in calculation
amplitudes: Array of amplitudes used in calculation
expected_range: Optional tuple (min, max) for expected range
reference_value: Optional reference value for comparison
tolerance: Tolerance for comparison with reference (default 20%)
Returns:
Dictionary with validation results:
- 'is_valid': bool - Overall validation status
- 'errors': list - List of error messages
- 'warnings': list - List of warning messages
- 'comparison_with_reference': dict - Comparison with reference if provided
- 'physical_checks': dict - Physical constraint checks
References:
- Parseval's theorem for energy conservation
- Psychoacoustic limits for spectral density
"""
errors = []
warnings = []
physical_checks = {}
comparison = {}
# 1. Physical constraint checks
# Value should be positive (sum of powers)
if calculated_value < 0:
errors.append(f"Spectral Density Metric is negative: {calculated_value}")
physical_checks['positive'] = False
else:
physical_checks['positive'] = True
# Value should be finite
if not np.isfinite(calculated_value):
errors.append(f"Spectral Density Metric is not finite: {calculated_value}")
physical_checks['finite'] = False
else:
physical_checks['finite'] = True
# Energy conservation check: metric should be related to total spectral energy
if frequencies_hz.size > 0 and amplitudes.size > 0:
total_energy = np.sum(amplitudes ** 2)
if total_energy > 0:
# Spectral Density Metric should be proportional to total energy
# (exact relationship depends on weight function, but should be correlated)
energy_ratio = calculated_value / total_energy
if energy_ratio > 10.0: # Unusually high ratio
warnings.append(
f"Spectral Density Metric / Total Energy ratio is high: {energy_ratio:.2f}"
)
physical_checks['energy_ratio'] = energy_ratio
else:
if calculated_value > 0:
errors.append(
"Spectral Density Metric > 0 but total energy = 0 (inconsistent)"
)
# 2. Expected range check
if expected_range is not None:
min_val, max_val = expected_range
if calculated_value < min_val or calculated_value > max_val:
errors.append(
f"Spectral Density Metric ({calculated_value:.2f}) outside expected range "
f"[{min_val:.2f}, {max_val:.2f}]"
)
physical_checks['in_range'] = False
else:
physical_checks['in_range'] = True
# 3. Reference value comparison
if reference_value is not None:
if reference_value > 0:
relative_error = abs(calculated_value - reference_value) / reference_value
comparison['relative_error'] = relative_error
comparison['absolute_error'] = abs(calculated_value - reference_value)
if relative_error > tolerance:
errors.append(
f"Spectral Density Metric ({calculated_value:.2f}) differs from reference "
f"({reference_value:.2f}) by {relative_error*100:.1f}% (tolerance: {tolerance*100:.1f}%)"
)
comparison['within_tolerance'] = False
else:
comparison['within_tolerance'] = True
else:
warnings.append("Reference value is zero or negative, skipping comparison")
# Overall validation status
is_valid = len(errors) == 0
return {
'is_valid': is_valid,
'errors': errors,
'warnings': warnings,
'comparison_with_reference': comparison,
'physical_checks': physical_checks,
'calculated_value': calculated_value
}
def calculate_perceptual_spectral_density(
harmonic_amplitudes: np.ndarray,
harmonic_frequencies: np.ndarray,
fundamental_freq: float,
threshold_db: float = -60.0,
frequency_limit: float = 20000.0
) -> float:
"""
Calcula a densidade espectral perceptual baseada em princípios psicoacústicos.
PHASE 2: Enhanced with 24 Critical Bands and Masking Model
Esta métrica considera:
1. Número de harmônicos audíveis presentes vs. possíveis
2. Distribuição de energia ao longo do espectro
3. Ponderação perceptual usando 24 bandas críticas (Moore, 2012)