Skip to content
6 changes: 6 additions & 0 deletions src/practicelens/api/contracts.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,12 @@ class InputSuitabilitySummaryPayload(TypedDict):
duration_ratio: float
duration_diagnostic: str
duration_diagnostic_message: str | None
reference_activity_start_s: float | None
take_activity_start_s: float | None
start_offset_s: float | None
leading_noise_duration_s: float
start_diagnostic: str
start_diagnostic_message: str | None
alignment_coverage: float
voiced_frame_coverage: float
reference_voiced_frame_coverage: float
Expand Down
136 changes: 136 additions & 0 deletions src/practicelens/diagnostics/input_suitability.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,22 @@
_VOICED_WARNING_MIN = 0.35
_VOICED_LOW_MIN = 0.15
_ONSET_PRESENT_MIN = 2
_START_OFFSET_WARNING_S = 0.35
_LEADING_NOISE_WARNING_S = 0.25
_ENERGY_ACTIVITY_RELATIVE_THRESHOLD = 0.20
_SCORE_DIGITS = 6
_DURATION_WARNING_MESSAGE = (
"Take duration differs substantially from the reference. Possible causes include extra silence, "
"a restart, a missing section, or unrelated material."
)
_START_DELAY_MESSAGE = (
"The take start may be delayed relative to the reference. This may indicate a weak or missing first note, "
"late playing, or leading silence before the musical activity."
)
_LEADING_NOISE_MESSAGE = (
"The take may contain leading noise before the first clear musical activity. Possible causes include handling noise, "
"breath noise, pickup noise, or room noise before the performance starts."
)


def summarize_input_suitability(
Expand All @@ -33,6 +44,12 @@ def summarize_input_suitability(
duration_ratio = _duration_ratio(reference_duration_s, take_duration_s)
duration_diagnostic = _duration_diagnostic(reference_duration_s, take_duration_s, duration_ratio)
duration_diagnostic_message = _duration_diagnostic_message(duration_diagnostic)
reference_activity_start_s = _activity_start_s(reference)
take_activity_start_s = _activity_start_s(take)
start_offset_s = _start_offset_s(reference_activity_start_s, take_activity_start_s)
leading_noise_duration_s = _leading_noise_duration_s(take, take_activity_start_s)
start_diagnostic = _start_diagnostic(start_offset_s, leading_noise_duration_s)
start_diagnostic_message = _start_diagnostic_message(start_diagnostic)
alignment_coverage = _round_ratio(alignment.coverage_ratio)
reference_voiced_coverage = _round_ratio(_voiced_ratio(reference))
take_voiced_coverage = _round_ratio(_voiced_ratio(take))
Expand All @@ -56,6 +73,14 @@ def summarize_input_suitability(
if duration_ratio < _DURATION_RATIO_LOW_MIN or duration_ratio > _DURATION_RATIO_LOW_MAX:
low_confidence = True

if start_diagnostic == "start_region_unavailable":
reasons.append("Start-region activity evidence is unavailable.")
elif start_diagnostic == "start_region_ok":
reasons.append("Start-region activity appears aligned enough for review.")
else:
reasons.append(start_diagnostic_message or "Start-region activity may not align cleanly.")
risk_points += 1

if alignment_coverage >= _ALIGNMENT_WARNING_MIN:
reasons.append("Alignment coverage is broad.")
else:
Expand Down Expand Up @@ -89,6 +114,12 @@ def summarize_input_suitability(
duration_ratio=duration_ratio,
duration_diagnostic=duration_diagnostic,
duration_diagnostic_message=duration_diagnostic_message,
reference_activity_start_s=_seconds_or_none(reference_activity_start_s),
take_activity_start_s=_seconds_or_none(take_activity_start_s),
start_offset_s=_seconds_or_none(start_offset_s),
leading_noise_duration_s=Seconds(_round_ratio(leading_noise_duration_s)),
start_diagnostic=start_diagnostic,
start_diagnostic_message=start_diagnostic_message,
alignment_coverage=alignment_coverage,
voiced_frame_coverage=voiced_frame_coverage,
reference_voiced_frame_coverage=reference_voiced_coverage,
Expand Down Expand Up @@ -130,6 +161,111 @@ def _duration_diagnostic_message(duration_diagnostic: str) -> str | None:
return None


def _activity_start_s(bundle: FeatureBundle) -> float | None:
musical_starts = [_first_voiced_time_s(bundle), _first_onset_time_s(bundle)]
available_musical_starts = [start for start in musical_starts if start is not None]
if available_musical_starts:
return min(available_musical_starts)
return _first_energy_activity_time_s(bundle)


def _first_voiced_time_s(bundle: FeatureBundle) -> float | None:
for time_s, voiced in zip(bundle.time_axis_s, bundle.voiced_mask, strict=False):
if voiced:
return float(time_s)
return None


def _first_onset_time_s(bundle: FeatureBundle) -> float | None:
if not bundle.onset_times_s:
return None
return float(bundle.onset_times_s[0])


def _first_energy_activity_time_s(bundle: FeatureBundle) -> float | None:
if not bundle.time_axis_s or not bundle.energy_curve:
return None
peak_energy = max(bundle.energy_curve)
if peak_energy <= 0.0:
return None
threshold = peak_energy * _ENERGY_ACTIVITY_RELATIVE_THRESHOLD
for time_s, energy in zip(bundle.time_axis_s, bundle.energy_curve, strict=False):
if energy >= threshold:
return float(time_s)
return None


def _start_offset_s(reference_activity_start_s: float | None, take_activity_start_s: float | None) -> float | None:
if reference_activity_start_s is None or take_activity_start_s is None:
return None
return _round_ratio(take_activity_start_s - reference_activity_start_s)


def _leading_noise_duration_s(bundle: FeatureBundle, take_activity_start_s: float | None) -> float:
if take_activity_start_s is None or take_activity_start_s <= 0.0:
return 0.0
if not bundle.time_axis_s or not bundle.energy_curve:
return 0.0

activity_index = _first_index_at_or_after(bundle.time_axis_s, take_activity_start_s)
if activity_index <= 0:
return 0.0

pre_activity_energy = bundle.energy_curve[:activity_index]
post_activity_energy = bundle.energy_curve[activity_index:]
if not pre_activity_energy or not post_activity_energy:
return 0.0

max_post_activity_energy = max(post_activity_energy)
if max_post_activity_energy <= 0.0:
return 0.0

noise_threshold = max_post_activity_energy * _ENERGY_ACTIVITY_RELATIVE_THRESHOLD
noisy_times = [
time_s
for time_s, energy in zip(
bundle.time_axis_s[:activity_index],
pre_activity_energy,
strict=False,
)
if energy >= noise_threshold
]
if not noisy_times:
return 0.0
return _round_ratio(max(0.0, take_activity_start_s - noisy_times[0]))


def _first_index_at_or_after(time_axis_s: tuple[float, ...], start_s: float) -> int:
for index, time_s in enumerate(time_axis_s):
if time_s >= start_s:
return index
return len(time_axis_s)


def _start_diagnostic(start_offset_s: float | None, leading_noise_duration_s: float) -> str:
if start_offset_s is None:
return "start_region_unavailable"
if leading_noise_duration_s >= _LEADING_NOISE_WARNING_S:
return "take_leading_noise_before_activity"
if start_offset_s >= _START_OFFSET_WARNING_S:
return "take_activity_starts_late"
return "start_region_ok"


def _start_diagnostic_message(start_diagnostic: str) -> str | None:
if start_diagnostic == "take_activity_starts_late":
return _START_DELAY_MESSAGE
if start_diagnostic == "take_leading_noise_before_activity":
return _LEADING_NOISE_MESSAGE
return None


def _seconds_or_none(value: float | None) -> Seconds | None:
if value is None:
return None
return Seconds(_round_ratio(value))


def _voiced_ratio(bundle: FeatureBundle) -> float:
if not bundle.voiced_mask:
return 0.0
Expand Down
6 changes: 6 additions & 0 deletions src/practicelens/domain/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,12 @@ class InputSuitabilitySummary:
duration_ratio: float = 0.0
duration_diagnostic: str = "duration_ratio_unavailable"
duration_diagnostic_message: str | None = None
reference_activity_start_s: Seconds | None = None
take_activity_start_s: Seconds | None = None
start_offset_s: Seconds | None = None
leading_noise_duration_s: Seconds = Seconds(0.0)
start_diagnostic: str = "start_region_unavailable"
start_diagnostic_message: str | None = None
alignment_coverage: float = 0.0
voiced_frame_coverage: float = 0.0
reference_voiced_frame_coverage: float = 0.0
Expand Down
6 changes: 6 additions & 0 deletions src/practicelens/reporting/input_suitability_payload.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@ def input_suitability_to_payload(summary: InputSuitabilitySummary) -> dict[str,
"duration_ratio": summary.duration_ratio,
"duration_diagnostic": summary.duration_diagnostic,
"duration_diagnostic_message": summary.duration_diagnostic_message,
"reference_activity_start_s": summary.reference_activity_start_s,
"take_activity_start_s": summary.take_activity_start_s,
"start_offset_s": summary.start_offset_s,
"leading_noise_duration_s": summary.leading_noise_duration_s,
"start_diagnostic": summary.start_diagnostic,
"start_diagnostic_message": summary.start_diagnostic_message,
"alignment_coverage": summary.alignment_coverage,
"voiced_frame_coverage": summary.voiced_frame_coverage,
"reference_voiced_frame_coverage": summary.reference_voiced_frame_coverage,
Expand Down
Loading
Loading