diff --git a/DELIVERIES.md b/DELIVERIES.md index b3f3efe..4d277c5 100644 --- a/DELIVERIES.md +++ b/DELIVERIES.md @@ -7,7 +7,7 @@ clip counts, prosody QA results, known limitations, and the SynthBanshee commit |---|------|------|---------|------|------:|------:|------------|-------------------|--------|-----| | 001 | 2026-04-15 | [debug-run-1](deliveries/001-debug-run-1/notes.md) | she_proves | A | 1 | 2m 36s | IT | v1 baseline (pre-V3) | superseded | [#1](https://github.com/DataHackIL/avdp-synth-corpus/pull/1) | | 002 | 2026-04-15 | [m2a-wettest](deliveries/002-m2a-wettest/notes.md) | she_proves | A | 8 | ~17m | SV, IT, NEG, NEU | M2a SSML prosody | superseded | [#2](https://github.com/DataHackIL/avdp-synth-corpus/pull/2) | -| 003 | 2026-05-12 | [multi-project-multi-voice](deliveries/003-multi-project-multi-voice/notes.md) | she_proves + elephant | A + B | 20 | ~42m | SV, IT, NEG, NEU | M2a + post-#78 / #102 / #103 / #105 / #106 | provisional | [#TBD](https://github.com/DataHackIL/avdp-synth-corpus/pulls) | +| 003 | 2026-05-12 | [multi-project-multi-voice](deliveries/003-multi-project-multi-voice/notes.md) | she_proves + elephant | A + B | 20 | ~42m | SV, IT, NEG, NEU | M2a + post-#78 / #102 / #103 / #105 / #106 / #110 / #111 / #112 | provisional | [#TBD](https://github.com/DataHackIL/avdp-synth-corpus/pulls) | ## Status definitions diff --git a/assets/speech/00e74ff34ced839475db570dfdecb74ae25f7ba6798c6e69e6410a4fbc1c0ef2.wav b/assets/speech/00e74ff34ced839475db570dfdecb74ae25f7ba6798c6e69e6410a4fbc1c0ef2.wav new file mode 100644 index 0000000..911df8b Binary files /dev/null and b/assets/speech/00e74ff34ced839475db570dfdecb74ae25f7ba6798c6e69e6410a4fbc1c0ef2.wav differ diff --git a/assets/speech/06d5e33263cd9726600ab842200d35367e74c42b47f035538a9cb6c0ce312f50.wav b/assets/speech/06d5e33263cd9726600ab842200d35367e74c42b47f035538a9cb6c0ce312f50.wav new file mode 100644 index 0000000..15b2fde Binary files /dev/null and b/assets/speech/06d5e33263cd9726600ab842200d35367e74c42b47f035538a9cb6c0ce312f50.wav differ diff --git a/assets/speech/398e8f9a789fe69c56de0d76f9294acf8067e631097ff33858445a988e1fc359.wav b/assets/speech/398e8f9a789fe69c56de0d76f9294acf8067e631097ff33858445a988e1fc359.wav new file mode 100644 index 0000000..5f1cda7 Binary files /dev/null and b/assets/speech/398e8f9a789fe69c56de0d76f9294acf8067e631097ff33858445a988e1fc359.wav differ diff --git a/assets/speech/39f858ffbaf7686d6088e099db1f9943b04f1620bc43a043cdbfe93be0862db9.wav b/assets/speech/39f858ffbaf7686d6088e099db1f9943b04f1620bc43a043cdbfe93be0862db9.wav new file mode 100644 index 0000000..50e85b3 Binary files /dev/null and b/assets/speech/39f858ffbaf7686d6088e099db1f9943b04f1620bc43a043cdbfe93be0862db9.wav differ diff --git a/assets/speech/b42c0a4eb3097aa278ea42c34b26bd93e011dfa2c88b10d239ead5df00b7ddee.wav b/assets/speech/b42c0a4eb3097aa278ea42c34b26bd93e011dfa2c88b10d239ead5df00b7ddee.wav new file mode 100644 index 0000000..a5d121b Binary files /dev/null and b/assets/speech/b42c0a4eb3097aa278ea42c34b26bd93e011dfa2c88b10d239ead5df00b7ddee.wav differ diff --git a/assets/speech/ff7d4199ba62ad009c4b6d6154d6951b64865eafe3084ea5204e17c608f3cb40.wav b/assets/speech/ff7d4199ba62ad009c4b6d6154d6951b64865eafe3084ea5204e17c608f3cb40.wav new file mode 100644 index 0000000..b3cc488 Binary files /dev/null and b/assets/speech/ff7d4199ba62ad009c4b6d6154d6951b64865eafe3084ea5204e17c608f3cb40.wav differ diff --git a/data/he/agg_m_30-45_001/sp_it_a_0001_00.json b/data/he/agg_m_30-45_001/sp_it_a_0001_00.json index 0a2a5e7..0afc2e5 100644 --- a/data/he/agg_m_30-45_001/sp_it_a_0001_00.json +++ b/data/he/agg_m_30-45_001/sp_it_a_0001_00.json @@ -222,7 +222,6 @@ ], "tier": "A", "transcript_path": "data/he/agg_m_30-45_001/sp_it_a_0001_00.txt", - "tts_engine": "azure_he_IL", "violence_typology": "IT", "weak_label": { "has_violence": true, diff --git a/data/he/agg_m_30-45_001/sp_it_a_0002_00.json b/data/he/agg_m_30-45_001/sp_it_a_0002_00.json index f164a1d..5a889c5 100644 --- a/data/he/agg_m_30-45_001/sp_it_a_0002_00.json +++ b/data/he/agg_m_30-45_001/sp_it_a_0002_00.json @@ -194,7 +194,6 @@ ], "tier": "A", "transcript_path": "data/he/agg_m_30-45_001/sp_it_a_0002_00.txt", - "tts_engine": "azure_he_IL", "violence_typology": "IT", "weak_label": { "has_violence": true, diff --git a/data/he/agg_m_30-45_001/sp_neg_a_0001_00.json b/data/he/agg_m_30-45_001/sp_neg_a_0001_00.json index af076d2..3206e95 100644 --- a/data/he/agg_m_30-45_001/sp_neg_a_0001_00.json +++ b/data/he/agg_m_30-45_001/sp_neg_a_0001_00.json @@ -131,7 +131,6 @@ ], "tier": "A", "transcript_path": "data/he/agg_m_30-45_001/sp_neg_a_0001_00.txt", - "tts_engine": "azure_he_IL", "violence_typology": "NEG", "weak_label": { "has_violence": false, diff --git a/data/he/agg_m_30-45_001/sp_neg_a_0002_00.json b/data/he/agg_m_30-45_001/sp_neg_a_0002_00.json index 2bb5b8c..12dbdff 100644 --- a/data/he/agg_m_30-45_001/sp_neg_a_0002_00.json +++ b/data/he/agg_m_30-45_001/sp_neg_a_0002_00.json @@ -138,7 +138,6 @@ ], "tier": "A", "transcript_path": "data/he/agg_m_30-45_001/sp_neg_a_0002_00.txt", - "tts_engine": "azure_he_IL", "violence_typology": "NEG", "weak_label": { "has_violence": false, diff --git a/data/he/agg_m_30-45_001/sp_neg_a_0003_00.json b/data/he/agg_m_30-45_001/sp_neg_a_0003_00.json index 0440ab8..ff83b2f 100644 --- a/data/he/agg_m_30-45_001/sp_neg_a_0003_00.json +++ b/data/he/agg_m_30-45_001/sp_neg_a_0003_00.json @@ -131,7 +131,6 @@ ], "tier": "A", "transcript_path": "data/he/agg_m_30-45_001/sp_neg_a_0003_00.txt", - "tts_engine": "azure_he_IL", "violence_typology": "NEG", "weak_label": { "has_violence": false, diff --git a/data/he/agg_m_30-45_001/sp_neu_a_0001_00.json b/data/he/agg_m_30-45_001/sp_neu_a_0001_00.json index 7cd88f8..e66013b 100644 --- a/data/he/agg_m_30-45_001/sp_neu_a_0001_00.json +++ b/data/he/agg_m_30-45_001/sp_neu_a_0001_00.json @@ -129,7 +129,6 @@ ], "tier": "A", "transcript_path": "data/he/agg_m_30-45_001/sp_neu_a_0001_00.txt", - "tts_engine": "azure_he_IL", "violence_typology": "NEU", "weak_label": { "has_violence": false, diff --git a/data/he/agg_m_30-45_001/sp_neu_a_0002_00.json b/data/he/agg_m_30-45_001/sp_neu_a_0002_00.json index df9ff31..4d444ad 100644 --- a/data/he/agg_m_30-45_001/sp_neu_a_0002_00.json +++ b/data/he/agg_m_30-45_001/sp_neu_a_0002_00.json @@ -136,7 +136,6 @@ ], "tier": "A", "transcript_path": "data/he/agg_m_30-45_001/sp_neu_a_0002_00.txt", - "tts_engine": "azure_he_IL", "violence_typology": "NEU", "weak_label": { "has_violence": false, diff --git a/data/he/agg_m_30-45_001/sp_neu_a_0003_00.json b/data/he/agg_m_30-45_001/sp_neu_a_0003_00.json index a7e7b17..259f75c 100644 --- a/data/he/agg_m_30-45_001/sp_neu_a_0003_00.json +++ b/data/he/agg_m_30-45_001/sp_neu_a_0003_00.json @@ -129,7 +129,6 @@ ], "tier": "A", "transcript_path": "data/he/agg_m_30-45_001/sp_neu_a_0003_00.txt", - "tts_engine": "azure_he_IL", "violence_typology": "NEU", "weak_label": { "has_violence": false, diff --git a/data/he/agg_m_30-45_001/sp_sv_a_0001_00.json b/data/he/agg_m_30-45_001/sp_sv_a_0001_00.json index 2ab0e3b..81064c3 100644 --- a/data/he/agg_m_30-45_001/sp_sv_a_0001_00.json +++ b/data/he/agg_m_30-45_001/sp_sv_a_0001_00.json @@ -222,7 +222,6 @@ ], "tier": "A", "transcript_path": "data/he/agg_m_30-45_001/sp_sv_a_0001_00.txt", - "tts_engine": "azure_he_IL", "violence_typology": "SV", "weak_label": { "has_violence": true, diff --git a/data/he/agg_m_30-45_001/sp_sv_a_0002_00.json b/data/he/agg_m_30-45_001/sp_sv_a_0002_00.json index 86bd482..9f9357c 100644 --- a/data/he/agg_m_30-45_001/sp_sv_a_0002_00.json +++ b/data/he/agg_m_30-45_001/sp_sv_a_0002_00.json @@ -187,7 +187,6 @@ ], "tier": "A", "transcript_path": "data/he/agg_m_30-45_001/sp_sv_a_0002_00.txt", - "tts_engine": "azure_he_IL", "violence_typology": "SV", "weak_label": { "has_violence": true, diff --git a/data/he/agg_m_30-45_002/sp_it_a_0003_00.json b/data/he/agg_m_30-45_002/sp_it_a_0003_00.json index f28a8f4..108f11a 100644 --- a/data/he/agg_m_30-45_002/sp_it_a_0003_00.json +++ b/data/he/agg_m_30-45_002/sp_it_a_0003_00.json @@ -208,7 +208,6 @@ ], "tier": "A", "transcript_path": "data/he/agg_m_30-45_002/sp_it_a_0003_00.txt", - "tts_engine": "azure_he_IL", "violence_typology": "IT", "weak_label": { "has_violence": true, diff --git a/data/he/agg_m_30-45_002/sp_sv_a_0003_00.json b/data/he/agg_m_30-45_002/sp_sv_a_0003_00.json index 9fa87a0..32e17d2 100644 --- a/data/he/agg_m_30-45_002/sp_sv_a_0003_00.json +++ b/data/he/agg_m_30-45_002/sp_sv_a_0003_00.json @@ -11,7 +11,7 @@ "channels": 1, "clip_id": "sp_sv_a_0003_00", "dirty_file_path": "assets/speech/dirty/sp_sv_a_0003_00_dirty.wav", - "duration_seconds": 102.8018125, + "duration_seconds": 100.6418125, "elephant_meta": null, "generation_date": "2026-05-12", "generation_metadata": { @@ -201,7 +201,6 @@ ], "tier": "A", "transcript_path": "data/he/agg_m_30-45_002/sp_sv_a_0003_00.txt", - "tts_engine": "azure_he_IL", "violence_typology": "SV", "weak_label": { "has_violence": true, diff --git a/data/he/agg_m_30-45_002/sp_sv_a_0003_00.jsonl b/data/he/agg_m_30-45_002/sp_sv_a_0003_00.jsonl index 6c23aac..6582a51 100644 --- a/data/he/agg_m_30-45_002/sp_sv_a_0003_00.jsonl +++ b/data/he/agg_m_30-45_002/sp_sv_a_0003_00.jsonl @@ -1,14 +1,14 @@ {"event_id": "sp_sv_a_0003_00_EVT_000", "clip_id": "sp_sv_a_0003_00", "onset": 0.944125, "offset": 8.9850625, "tier1_category": "VERB", "tier2_subtype": "VERB_SHOUT", "intensity": 2, "speaker_id": "AGG_M_30-45_002", "speaker_role": "AGG", "emotional_state": "anger", "confidence": 1.0, "label_source": "auto", "iaa_reviewed": false, "truncated": false, "notes": null} {"event_id": "sp_sv_a_0003_00_EVT_001", "clip_id": "sp_sv_a_0003_00", "onset": 9.341125, "offset": 15.3420625, "tier1_category": "VERB", "tier2_subtype": "VERB_SHOUT", "intensity": 2, "speaker_id": "VIC_F_25-40_003", "speaker_role": "VIC", "emotional_state": "calm", "confidence": 1.0, "label_source": "auto", "iaa_reviewed": false, "truncated": false, "notes": null} -{"event_id": "sp_sv_a_0003_00_EVT_002", "clip_id": "sp_sv_a_0003_00", "onset": 15.4646875, "offset": 27.145625, "tier1_category": "VERB", "tier2_subtype": "VERB_THREAT", "intensity": 3, "speaker_id": "AGG_M_30-45_002", "speaker_role": "AGG", "emotional_state": "anger", "confidence": 1.0, "label_source": "auto", "iaa_reviewed": false, "truncated": false, "notes": null} -{"event_id": "sp_sv_a_0003_00_EVT_003", "clip_id": "sp_sv_a_0003_00", "onset": 27.4384375, "offset": 35.239375, "tier1_category": "VERB", "tier2_subtype": "VERB_THREAT", "intensity": 3, "speaker_id": "VIC_F_25-40_003", "speaker_role": "VIC", "emotional_state": "fear", "confidence": 1.0, "label_source": "auto", "iaa_reviewed": false, "truncated": false, "notes": null} -{"event_id": "sp_sv_a_0003_00_EVT_004", "clip_id": "sp_sv_a_0003_00", "onset": 35.3638125, "offset": 47.45475, "tier1_category": "DIST", "tier2_subtype": "DIST_SCREAM", "intensity": 4, "speaker_id": "AGG_M_30-45_002", "speaker_role": "AGG", "emotional_state": "contempt", "confidence": 1.0, "label_source": "auto", "iaa_reviewed": false, "truncated": false, "notes": null} -{"event_id": "sp_sv_a_0003_00_EVT_005", "clip_id": "sp_sv_a_0003_00", "onset": 47.560125, "offset": 55.3610625, "tier1_category": "DIST", "tier2_subtype": "DIST_SCREAM", "intensity": 4, "speaker_id": "VIC_F_25-40_003", "speaker_role": "VIC", "emotional_state": "fear", "confidence": 1.0, "label_source": "auto", "iaa_reviewed": false, "truncated": false, "notes": null} -{"event_id": "sp_sv_a_0003_00_EVT_006", "clip_id": "sp_sv_a_0003_00", "onset": 54.8495625, "offset": 68.7205, "tier1_category": "DIST", "tier2_subtype": "DIST_SCREAM", "intensity": 4, "speaker_id": "AGG_M_30-45_002", "speaker_role": "AGG", "emotional_state": "anger", "confidence": 1.0, "label_source": "auto", "iaa_reviewed": false, "truncated": false, "notes": null} -{"event_id": "sp_sv_a_0003_00_EVT_007", "clip_id": "sp_sv_a_0003_00", "onset": 68.8215625, "offset": 74.4325, "tier1_category": "PHYS", "tier2_subtype": "PHYS_HARD", "intensity": 5, "speaker_id": "VIC_F_25-40_003", "speaker_role": "VIC", "emotional_state": "desperation", "confidence": 1.0, "label_source": "auto", "iaa_reviewed": false, "truncated": false, "notes": null} -{"event_id": "sp_sv_a_0003_00_EVT_008", "clip_id": "sp_sv_a_0003_00", "onset": 74.6168125, "offset": 77.82775, "tier1_category": "PHYS", "tier2_subtype": "PHYS_HARD", "intensity": 5, "speaker_id": "AGG_M_30-45_002", "speaker_role": "AGG", "emotional_state": "contempt", "confidence": 1.0, "label_source": "auto", "iaa_reviewed": false, "truncated": false, "notes": null} -{"event_id": "sp_sv_a_0003_00_EVT_009", "clip_id": "sp_sv_a_0003_00", "onset": 78.0688125, "offset": 83.97975, "tier1_category": "PHYS", "tier2_subtype": "PHYS_HARD", "intensity": 5, "speaker_id": "VIC_F_25-40_003", "speaker_role": "VIC", "emotional_state": "fear", "confidence": 1.0, "label_source": "auto", "iaa_reviewed": false, "truncated": true, "notes": null} -{"event_id": "sp_sv_a_0003_00_EVT_010", "clip_id": "sp_sv_a_0003_00", "onset": 83.771875, "offset": 88.0328125, "tier1_category": "DIST", "tier2_subtype": "DIST_SCREAM", "intensity": 4, "speaker_id": "AGG_M_30-45_002", "speaker_role": "AGG", "emotional_state": "contempt", "confidence": 1.0, "label_source": "auto", "iaa_reviewed": false, "truncated": false, "notes": null} -{"event_id": "sp_sv_a_0003_00_EVT_011", "clip_id": "sp_sv_a_0003_00", "onset": 88.169375, "offset": 92.5003125, "tier1_category": "DIST", "tier2_subtype": "DIST_SCREAM", "intensity": 4, "speaker_id": "VIC_F_25-40_003", "speaker_role": "VIC", "emotional_state": "grief", "confidence": 1.0, "label_source": "auto", "iaa_reviewed": false, "truncated": false, "notes": null} -{"event_id": "sp_sv_a_0003_00_EVT_012", "clip_id": "sp_sv_a_0003_00", "onset": 92.8525, "offset": 98.5334375, "tier1_category": "VERB", "tier2_subtype": "VERB_SHOUT", "intensity": 2, "speaker_id": "AGG_M_30-45_002", "speaker_role": "AGG", "emotional_state": "calm", "confidence": 1.0, "label_source": "auto", "iaa_reviewed": false, "truncated": false, "notes": null} -{"event_id": "sp_sv_a_0003_00_EVT_013", "clip_id": "sp_sv_a_0003_00", "onset": 98.980875, "offset": 102.3018125, "tier1_category": "VERB", "tier2_subtype": "VERB_SHOUT", "intensity": 2, "speaker_id": "VIC_F_25-40_003", "speaker_role": "VIC", "emotional_state": "grief", "confidence": 1.0, "label_source": "auto", "iaa_reviewed": false, "truncated": false, "notes": null} +{"event_id": "sp_sv_a_0003_00_EVT_002", "clip_id": "sp_sv_a_0003_00", "onset": 15.4646875, "offset": 27.185625, "tier1_category": "VERB", "tier2_subtype": "VERB_THREAT", "intensity": 3, "speaker_id": "AGG_M_30-45_002", "speaker_role": "AGG", "emotional_state": "anger", "confidence": 1.0, "label_source": "auto", "iaa_reviewed": false, "truncated": false, "notes": null} +{"event_id": "sp_sv_a_0003_00_EVT_003", "clip_id": "sp_sv_a_0003_00", "onset": 27.4784375, "offset": 33.959375, "tier1_category": "VERB", "tier2_subtype": "VERB_THREAT", "intensity": 3, "speaker_id": "VIC_F_25-40_003", "speaker_role": "VIC", "emotional_state": "fear", "confidence": 1.0, "label_source": "auto", "iaa_reviewed": false, "truncated": false, "notes": null} +{"event_id": "sp_sv_a_0003_00_EVT_004", "clip_id": "sp_sv_a_0003_00", "onset": 34.0838125, "offset": 46.17475, "tier1_category": "DIST", "tier2_subtype": "DIST_SCREAM", "intensity": 4, "speaker_id": "AGG_M_30-45_002", "speaker_role": "AGG", "emotional_state": "contempt", "confidence": 1.0, "label_source": "auto", "iaa_reviewed": false, "truncated": false, "notes": null} +{"event_id": "sp_sv_a_0003_00_EVT_005", "clip_id": "sp_sv_a_0003_00", "onset": 46.280125, "offset": 54.1610625, "tier1_category": "DIST", "tier2_subtype": "DIST_SCREAM", "intensity": 4, "speaker_id": "VIC_F_25-40_003", "speaker_role": "VIC", "emotional_state": "fear", "confidence": 1.0, "label_source": "auto", "iaa_reviewed": false, "truncated": false, "notes": null} +{"event_id": "sp_sv_a_0003_00_EVT_006", "clip_id": "sp_sv_a_0003_00", "onset": 53.7695625, "offset": 67.6405, "tier1_category": "DIST", "tier2_subtype": "DIST_SCREAM", "intensity": 4, "speaker_id": "AGG_M_30-45_002", "speaker_role": "AGG", "emotional_state": "anger", "confidence": 1.0, "label_source": "auto", "iaa_reviewed": false, "truncated": false, "notes": null} +{"event_id": "sp_sv_a_0003_00_EVT_007", "clip_id": "sp_sv_a_0003_00", "onset": 67.7415625, "offset": 73.3525, "tier1_category": "PHYS", "tier2_subtype": "PHYS_HARD", "intensity": 5, "speaker_id": "VIC_F_25-40_003", "speaker_role": "VIC", "emotional_state": "desperation", "confidence": 1.0, "label_source": "auto", "iaa_reviewed": false, "truncated": false, "notes": null} +{"event_id": "sp_sv_a_0003_00_EVT_008", "clip_id": "sp_sv_a_0003_00", "onset": 73.5368125, "offset": 77.18775, "tier1_category": "PHYS", "tier2_subtype": "PHYS_HARD", "intensity": 5, "speaker_id": "AGG_M_30-45_002", "speaker_role": "AGG", "emotional_state": "contempt", "confidence": 1.0, "label_source": "auto", "iaa_reviewed": false, "truncated": false, "notes": null} +{"event_id": "sp_sv_a_0003_00_EVT_009", "clip_id": "sp_sv_a_0003_00", "onset": 77.4288125, "offset": 81.81975, "tier1_category": "PHYS", "tier2_subtype": "PHYS_HARD", "intensity": 5, "speaker_id": "VIC_F_25-40_003", "speaker_role": "VIC", "emotional_state": "fear", "confidence": 1.0, "label_source": "auto", "iaa_reviewed": false, "truncated": true, "notes": null} +{"event_id": "sp_sv_a_0003_00_EVT_010", "clip_id": "sp_sv_a_0003_00", "onset": 81.611875, "offset": 85.8728125, "tier1_category": "DIST", "tier2_subtype": "DIST_SCREAM", "intensity": 4, "speaker_id": "AGG_M_30-45_002", "speaker_role": "AGG", "emotional_state": "contempt", "confidence": 1.0, "label_source": "auto", "iaa_reviewed": false, "truncated": false, "notes": null} +{"event_id": "sp_sv_a_0003_00_EVT_011", "clip_id": "sp_sv_a_0003_00", "onset": 86.009375, "offset": 90.3403125, "tier1_category": "DIST", "tier2_subtype": "DIST_SCREAM", "intensity": 4, "speaker_id": "VIC_F_25-40_003", "speaker_role": "VIC", "emotional_state": "grief", "confidence": 1.0, "label_source": "auto", "iaa_reviewed": false, "truncated": false, "notes": null} +{"event_id": "sp_sv_a_0003_00_EVT_012", "clip_id": "sp_sv_a_0003_00", "onset": 90.6925, "offset": 96.3734375, "tier1_category": "VERB", "tier2_subtype": "VERB_SHOUT", "intensity": 2, "speaker_id": "AGG_M_30-45_002", "speaker_role": "AGG", "emotional_state": "calm", "confidence": 1.0, "label_source": "auto", "iaa_reviewed": false, "truncated": false, "notes": null} +{"event_id": "sp_sv_a_0003_00_EVT_013", "clip_id": "sp_sv_a_0003_00", "onset": 96.820875, "offset": 100.1418125, "tier1_category": "VERB", "tier2_subtype": "VERB_SHOUT", "intensity": 2, "speaker_id": "VIC_F_25-40_003", "speaker_role": "VIC", "emotional_state": "grief", "confidence": 1.0, "label_source": "auto", "iaa_reviewed": false, "truncated": false, "notes": null} diff --git a/data/he/agg_m_30-45_002/sp_sv_a_0003_00.txt b/data/he/agg_m_30-45_002/sp_sv_a_0003_00.txt index 25ef392..7d4c5df 100644 --- a/data/he/agg_m_30-45_002/sp_sv_a_0003_00.txt +++ b/data/he/agg_m_30-45_002/sp_sv_a_0003_00.txt @@ -5,39 +5,39 @@ [SPEAKER: VIC_F_25-40_003 | ROLE: VIC | ONSET: 9.34 | OFFSET: 15.34] אני גם עבדתי היום. לא הספקתי, יש שאריות במקרר, אפשר לחמם. [ACTION: VERB_SHOUT | INTENSITY: 2] -[SPEAKER: AGG_M_30-45_002 | ROLE: AGG | ONSET: 15.46 | OFFSET: 27.15] +[SPEAKER: AGG_M_30-45_002 | ROLE: AGG | ONSET: 15.46 | OFFSET: 27.19] שאריות. תמיד שאריות. אני לא מבין מה את עושה כל היום, באמת. הבית בבלגן, אין אוכל, כלום לא מסודר. [ACTION: VERB_THREAT | INTENSITY: 3] -[SPEAKER: VIC_F_25-40_003 | ROLE: VIC | ONSET: 27.44 | OFFSET: 35.24] +[SPEAKER: VIC_F_25-40_003 | ROLE: VIC | ONSET: 27.48 | OFFSET: 33.96] זה לא הוגן. אני מטפלת בילדים, מנקה, עובדת. אתה לא רואה כלום מזה? [ACTION: VERB_THREAT | INTENSITY: 3] -[SPEAKER: AGG_M_30-45_002 | ROLE: AGG | ONSET: 35.36 | OFFSET: 47.45] +[SPEAKER: AGG_M_30-45_002 | ROLE: AGG | ONSET: 34.08 | OFFSET: 46.17] אל תעלי לי את הטון. אני מזהיר אותך. כל פעם את מתחילה עם הסיפורים האלה, כאילו את הקורבן הגדול. [ACTION: DIST_SCREAM | INTENSITY: 4] -[SPEAKER: VIC_F_25-40_003 | ROLE: VIC | ONSET: 47.56 | OFFSET: 55.36] +[SPEAKER: VIC_F_25-40_003 | ROLE: VIC | ONSET: 46.28 | OFFSET: 54.16] אני לא מתחילה כלום, אני רק אומרת שאני עייפה. למה אתה צריך לצעוק על כל דבר? [ACTION: DIST_SCREAM | INTENSITY: 4] -[SPEAKER: AGG_M_30-45_002 | ROLE: AGG | ONSET: 54.85 | OFFSET: 68.72] +[SPEAKER: AGG_M_30-45_002 | ROLE: AGG | ONSET: 53.77 | OFFSET: 67.64] עייפה? עייפה?! אני עייף מלשמוע את התירוצים שלך. את חושבת שאני פרייאר? שאני אבוא הביתה ואסתדר לבד? [ACTION: DIST_SCREAM | INTENSITY: 4] -[SPEAKER: VIC_F_25-40_003 | ROLE: VIC | ONSET: 68.82 | OFFSET: 74.43] +[SPEAKER: VIC_F_25-40_003 | ROLE: VIC | ONSET: 67.74 | OFFSET: 73.35] תפסיק, אתה מפחיד אותי. בבקשה, בוא נדבר בשקט. [ACTION: PHYS_HARD | INTENSITY: 5] -[SPEAKER: AGG_M_30-45_002 | ROLE: AGG | ONSET: 74.62 | OFFSET: 77.83] +[SPEAKER: AGG_M_30-45_002 | ROLE: AGG | ONSET: 73.54 | OFFSET: 77.19] תשתקי! סתמי את הפה כבר! [ACTION: PHYS_HARD | INTENSITY: 5] -[SPEAKER: VIC_F_25-40_003 | ROLE: VIC | ONSET: 78.07 | OFFSET: 83.98] +[SPEAKER: VIC_F_25-40_003 | ROLE: VIC | ONSET: 77.43 | OFFSET: 81.82] אאאה! לא! עזוב אותי! [ACTION: PHYS_HARD | INTENSITY: 5] -[SPEAKER: AGG_M_30-45_002 | ROLE: AGG | ONSET: 83.77 | OFFSET: 88.03] +[SPEAKER: AGG_M_30-45_002 | ROLE: AGG | ONSET: 81.61 | OFFSET: 85.87] תלמדי לסגור את הפה. שמעת אותי? [ACTION: DIST_SCREAM | INTENSITY: 4] -[SPEAKER: VIC_F_25-40_003 | ROLE: VIC | ONSET: 88.17 | OFFSET: 92.50] +[SPEAKER: VIC_F_25-40_003 | ROLE: VIC | ONSET: 86.01 | OFFSET: 90.34] כואב לי... למה אתה עושה את זה... למה... [ACTION: DIST_SCREAM | INTENSITY: 4] -[SPEAKER: AGG_M_30-45_002 | ROLE: AGG | ONSET: 92.85 | OFFSET: 98.53] +[SPEAKER: AGG_M_30-45_002 | ROLE: AGG | ONSET: 90.69 | OFFSET: 96.37] אל תעשי מזה עניין. את הבאת את זה על עצמך. אם היית מכינה ארוחה כמו בן אדם, שום דבר לא היה קורה. [ACTION: VERB_SHOUT | INTENSITY: 2] -[SPEAKER: VIC_F_25-40_003 | ROLE: VIC | ONSET: 98.98 | OFFSET: 102.30] +[SPEAKER: VIC_F_25-40_003 | ROLE: VIC | ONSET: 96.82 | OFFSET: 100.14] אני לא יכולה ככה... אני לא יכולה... [ACTION: VERB_SHOUT | INTENSITY: 2] diff --git a/data/he/agg_m_30-45_002/sp_sv_a_0003_00.wav b/data/he/agg_m_30-45_002/sp_sv_a_0003_00.wav index 11dfae9..68488b1 100644 Binary files a/data/he/agg_m_30-45_002/sp_sv_a_0003_00.wav and b/data/he/agg_m_30-45_002/sp_sv_a_0003_00.wav differ diff --git a/data/he/ben_m_40-55_003/el_it_b_0001_00.json b/data/he/ben_m_40-55_003/el_it_b_0001_00.json index d0ddd20..0a0013c 100644 --- a/data/he/ben_m_40-55_003/el_it_b_0001_00.json +++ b/data/he/ben_m_40-55_003/el_it_b_0001_00.json @@ -137,7 +137,6 @@ ], "tier": "B", "transcript_path": "data/he/ben_m_40-55_003/el_it_b_0001_00.txt", - "tts_engine": "azure_he_IL", "violence_typology": "IT", "weak_label": { "has_violence": true, diff --git a/data/he/ben_m_40-55_003/el_it_b_0002_00.json b/data/he/ben_m_40-55_003/el_it_b_0002_00.json index e6cc086..92c4cca 100644 --- a/data/he/ben_m_40-55_003/el_it_b_0002_00.json +++ b/data/he/ben_m_40-55_003/el_it_b_0002_00.json @@ -185,7 +185,6 @@ ], "tier": "B", "transcript_path": "data/he/ben_m_40-55_003/el_it_b_0002_00.txt", - "tts_engine": "azure_he_IL", "violence_typology": "IT", "weak_label": { "has_violence": true, diff --git a/data/he/ben_m_40-55_003/el_neg_b_0001_00.json b/data/he/ben_m_40-55_003/el_neg_b_0001_00.json index 8ca8fd4..b64a877 100644 --- a/data/he/ben_m_40-55_003/el_neg_b_0001_00.json +++ b/data/he/ben_m_40-55_003/el_neg_b_0001_00.json @@ -95,7 +95,6 @@ ], "tier": "B", "transcript_path": "data/he/ben_m_40-55_003/el_neg_b_0001_00.txt", - "tts_engine": "azure_he_IL", "violence_typology": "NEG", "weak_label": { "has_violence": false, diff --git a/data/he/ben_m_40-55_003/el_neg_b_0002_00.json b/data/he/ben_m_40-55_003/el_neg_b_0002_00.json index 4530fc8..526969a 100644 --- a/data/he/ben_m_40-55_003/el_neg_b_0002_00.json +++ b/data/he/ben_m_40-55_003/el_neg_b_0002_00.json @@ -103,7 +103,6 @@ ], "tier": "B", "transcript_path": "data/he/ben_m_40-55_003/el_neg_b_0002_00.txt", - "tts_engine": "azure_he_IL", "violence_typology": "NEG", "weak_label": { "has_violence": false, diff --git a/data/he/ben_m_40-55_003/el_neu_b_0001_00.json b/data/he/ben_m_40-55_003/el_neu_b_0001_00.json index 8cb5c3f..a40248b 100644 --- a/data/he/ben_m_40-55_003/el_neu_b_0001_00.json +++ b/data/he/ben_m_40-55_003/el_neu_b_0001_00.json @@ -93,7 +93,6 @@ ], "tier": "B", "transcript_path": "data/he/ben_m_40-55_003/el_neu_b_0001_00.txt", - "tts_engine": "azure_he_IL", "violence_typology": "NEU", "weak_label": { "has_violence": false, diff --git a/data/he/ben_m_40-55_003/el_neu_b_0002_00.json b/data/he/ben_m_40-55_003/el_neu_b_0002_00.json index fd20c21..6707045 100644 --- a/data/he/ben_m_40-55_003/el_neu_b_0002_00.json +++ b/data/he/ben_m_40-55_003/el_neu_b_0002_00.json @@ -93,7 +93,6 @@ ], "tier": "B", "transcript_path": "data/he/ben_m_40-55_003/el_neu_b_0002_00.txt", - "tts_engine": "azure_he_IL", "violence_typology": "NEU", "weak_label": { "has_violence": false, diff --git a/data/he/ben_m_40-55_003/el_sv_b_0001_00.json b/data/he/ben_m_40-55_003/el_sv_b_0001_00.json index edcaaae..49b7092 100644 --- a/data/he/ben_m_40-55_003/el_sv_b_0001_00.json +++ b/data/he/ben_m_40-55_003/el_sv_b_0001_00.json @@ -150,7 +150,6 @@ ], "tier": "B", "transcript_path": "data/he/ben_m_40-55_003/el_sv_b_0001_00.txt", - "tts_engine": "azure_he_IL", "violence_typology": "SV", "weak_label": { "has_violence": true, diff --git a/data/he/ben_m_40-55_003/el_sv_b_0002_00.json b/data/he/ben_m_40-55_003/el_sv_b_0002_00.json index 4bebeb7..e04cea2 100644 --- a/data/he/ben_m_40-55_003/el_sv_b_0002_00.json +++ b/data/he/ben_m_40-55_003/el_sv_b_0002_00.json @@ -171,7 +171,6 @@ ], "tier": "B", "transcript_path": "data/he/ben_m_40-55_003/el_sv_b_0002_00.txt", - "tts_engine": "azure_he_IL", "violence_typology": "SV", "weak_label": { "has_violence": true, diff --git a/data/he/manifest.csv b/data/he/manifest.csv index 7655c08..4003ca4 100644 --- a/data/he/manifest.csv +++ b/data/he/manifest.csv @@ -10,7 +10,7 @@ sp_neu_a_0003_00,she_proves,NEU,A,105.072875,AGG_M_30-45_001|VIC_F_25-40_002,he- sp_sv_a_0001_00,she_proves,SV,A,110.4551875,AGG_M_30-45_001|VIC_F_25-40_002,he-IL-AvriNeural|he-IL-HilaNeural,True,5,emotion_downgrade,train,data/he/agg_m_30-45_001/sp_sv_a_0001_00.wav,data/he/agg_m_30-45_001/sp_sv_a_0001_00.jsonl sp_sv_a_0002_00,she_proves,SV,A,92.0704375,AGG_M_30-45_001|VIC_F_25-40_002,he-IL-AvriNeural|he-IL-HilaNeural,True,5,emotion_downgrade,train,data/he/agg_m_30-45_001/sp_sv_a_0002_00.wav,data/he/agg_m_30-45_001/sp_sv_a_0002_00.jsonl sp_it_a_0003_00,she_proves,IT,A,113.8649375,AGG_M_30-45_002|VIC_F_25-40_003,he-IL-Chirp3-HD-Achird|he-IL-Chirp3-HD-Achernar,True,5,emotion_downgrade,train,data/he/agg_m_30-45_002/sp_it_a_0003_00.wav,data/he/agg_m_30-45_002/sp_it_a_0003_00.jsonl -sp_sv_a_0003_00,she_proves,SV,A,102.8018125,AGG_M_30-45_002|VIC_F_25-40_003,he-IL-Chirp3-HD-Achird|he-IL-Chirp3-HD-Achernar,True,5,emotion_downgrade,train,data/he/agg_m_30-45_002/sp_sv_a_0003_00.wav,data/he/agg_m_30-45_002/sp_sv_a_0003_00.jsonl +sp_sv_a_0003_00,she_proves,SV,A,100.6418125,AGG_M_30-45_002|VIC_F_25-40_003,he-IL-Chirp3-HD-Achird|he-IL-Chirp3-HD-Achernar,True,5,emotion_downgrade,train,data/he/agg_m_30-45_002/sp_sv_a_0003_00.wav,data/he/agg_m_30-45_002/sp_sv_a_0003_00.jsonl el_it_b_0001_00,elephant_in_the_room,IT,B,149.9894375,BEN_M_40-55_003|SW_F_30-45_001,he-IL-AvriNeural|he-IL-HilaNeural,True,5,emotion_downgrade,train,data/he/ben_m_40-55_003/el_it_b_0001_00.wav,data/he/ben_m_40-55_003/el_it_b_0001_00.jsonl el_it_b_0002_00,elephant_in_the_room,IT,B,151.612375,BEN_M_40-55_003|SW_F_30-45_001,he-IL-AvriNeural|he-IL-HilaNeural,True,5,emotion_downgrade,train,data/he/ben_m_40-55_003/el_it_b_0002_00.wav,data/he/ben_m_40-55_003/el_it_b_0002_00.jsonl el_neg_b_0001_00,elephant_in_the_room,NEG,B,113.786625,BEN_M_40-55_003|SW_F_30-45_001,he-IL-AvriNeural|he-IL-HilaNeural,False,3,emotion_downgrade,train,data/he/ben_m_40-55_003/el_neg_b_0001_00.wav,data/he/ben_m_40-55_003/el_neg_b_0001_00.jsonl diff --git a/deliveries/003-multi-project-multi-voice/metadata.yaml b/deliveries/003-multi-project-multi-voice/metadata.yaml index e8e7efd..e9850d3 100644 --- a/deliveries/003-multi-project-multi-voice/metadata.yaml +++ b/deliveries/003-multi-project-multi-voice/metadata.yaml @@ -9,17 +9,20 @@ pr: url: "" synthbanshee: - commit: d92d61e # tip of main at delivery time (PRs #102, #103, #105, #106 merged) + commit: 1ea48f3 # tip of main after PRs #110/#111/#112 (delivery-003 schema-shift regen) generator_version: "0.1.0" related_prs: - 102 # fix(cli): normalized_dbfs records the *measured* peak - 103 # docs(spec): corpus-handoff schema drift - 105 # feat(scenes): sp_sv_a_0003 + sp_it_a_0003 (Google-pair shadows) - 106 # fix(tts): #72 — phrase prosody volume must be %, not dB (unblocked this delivery) + - 110 # fix(tests): #107 — isolate SYNTHBANSHEE_* env vars in CliRunner tests + - 111 # fix(cli,manifest): #108 — write repo-relative paths in clip JSON and manifest + - 112 # fix(labels,qa): #109 — drop hardcoded tts_engine, derive single_backend from generation_metadata.tts_backend clips: total: 20 - total_duration_seconds: 2500.79 # ~41.7 min (qa-report) + total_duration_seconds: 2498.63 # ~41.6 min (qa-report; regen 2026-05-12 — 2.2s shorter than initial 2500.79 because sp_sv_a_0003 Google audio bits differ slightly between renders; bit-identical Azure clips unchanged) failed_validation: 0 projects: she_proves: 12 @@ -74,10 +77,14 @@ qa_findings_closed_vs_002: - warn_no_overlap # was on 4 clips → 0 (overlap_ratio 100% on I4+) - warn_emotion_downgrade # was on 4 clips → 0 (emotion_downgrade_ratio 0%) +qa_findings_closed_post_regen_2026_05_12: + - single_backend # PR #112 — qa.py now derives backend from generation_metadata.tts_backend; reports clips_by_tts_backend = {azure: 18, google: 2} + - absolute_paths_in_clip_json # PR #111 — dirty_file_path / transcript_path now repo-relative + - leaked_pytest_tmp_path # PR #110 — env-var isolation prevents future test leaks into the corpus; the existing leaked tmp_path on sp_neu_a_0001_00 was overwritten by the regen + qa_findings_open: - "low_voice_diversity_male: 2 voice families across the corpus; threshold ≥3 — partial progress (1 → 2)." - "low_voice_diversity_female: 2 voice families across the corpus; threshold ≥3 — partial progress (1 → 2)." - - "single_backend: misleading — actual backends are azure + google; the run-summary counts `clip.tts_engine` which is currently hardcoded to azure_he_IL in cli.py. Tracked as a follow-up synthbanshee labeling bug." - "vic_f0_high (per-clip): sp_it_a_0003_00 and sp_sv_a_0003_00 — Google Chirp HD female F0 baseline runs higher than the Azure Hila reference the M10a thresholds use." - "quality_flagged_clips: 15 — mostly from #87 effective-prosody-cap activations (170 total); expected at I3+." @@ -91,6 +98,19 @@ fixes_vs_002: - "AGG RMS escalation now firing (was flat in delivery 002, fixed post-M3)." - "Inter-turn overlap now present on I4+ clips (zero overlap in delivery 002, fixed post-M8a)." +regen_2026_05_12: + reason: "Schema-shift regen after PRs #110 / #111 / #112 landed in SynthBanshee main." + cost: "$0 — all Azure clips hit the SHA-256 SSML cache; only sp_sv_a_0003 Google audio differed slightly (Google Chirp HD doesn't use the content-hash cache the same way Azure does)." + changes: + - "Removed `tts_engine: azure_he_IL` (#109) — was wrong for the 2 Google clips and dead weight on Azure clips." + - "Rewrote `dirty_file_path` and `transcript_path` to repo-relative form (#108)." + - "Rewrote `manifest.csv` `wav_path` / `strong_labels_path` columns to repo-relative form (#108)." + - "Regenerated `qa-report.json` — `single_backend` warning gone; `clips_by_tts_engine` renamed to `clips_by_tts_backend` with `{azure: 18, google: 2}` values." + - "Replaced the leaked pytest tmp_path in `sp_neu_a_0001_00.dirty_file_path` (#107 fingerprint) with the canonical `assets/speech/dirty/sp_neu_a_0001_00_dirty.wav`." + audio_diff: + - "19 of 20 clips: WAV bytes byte-identical with original delivery-003 (Azure SSML cache hit)." + - "sp_sv_a_0003_00.wav: minor bit-level difference (Google Chirp HD re-render); peak / RMS / duration within ±0.02s and ±0.1 dB of original. Validation still PASSED." + known_limitations: - "Voice diversity partial — 2 voices per gender; threshold for `low_voice_diversity_*` to clear is ≥3." - "Speaker-disjoint splits not feasible at this scale (4 unique speakers across 20 clips); all clips will be `split: train` in the manifest." diff --git a/deliveries/003-multi-project-multi-voice/notes.md b/deliveries/003-multi-project-multi-voice/notes.md index 37ff6c2..3e5f0fe 100644 --- a/deliveries/003-multi-project-multi-voice/notes.md +++ b/deliveries/003-multi-project-multi-voice/notes.md @@ -8,7 +8,7 @@ training pipelines around — not for model training itself. ## Contents -**20 clips. 41.7 min total. 4 unique voice families across Azure + Google backends.** +**20 clips. 41.6 min total. 4 unique voice families across Azure + Google backends.** ### She-Proves Tier A — Azure (10 clips) @@ -53,13 +53,21 @@ training pipelines around — not for model training itself. ## Pipeline version -SynthBanshee `0.1.0` / commit [`d92d61e`](https://github.com/DataHackIL/SynthBanshee/commit/d92d61e) (tip of `main` at delivery time). Carries four corrections vs delivery 002: +SynthBanshee `0.1.0` / commit [`1ea48f3`](https://github.com/DataHackIL/SynthBanshee/commit/1ea48f3) (tip of `main` after the 2026-05-12 schema-shift regen). Carries seven corrections vs delivery 002 — four were already in the initial 2026-05-12 delivery, three landed in the schema-shift regen later that day: + +**Initial delivery (commit `d92d61e`):** - **[PR #102](https://github.com/DataHackIL/SynthBanshee/pull/102)** — `preprocessing_applied.normalized_dbfs` now records the *measured* post-preprocess peak (was hardcoded `-1.0`). Pair with `generation_metadata.loudness_target_peak_dbfs` to diagnose loudness drift; the schema docstring at `labels/schema.py:175` pins the measured-vs-target split. - **[PR #103](https://github.com/DataHackIL/SynthBanshee/pull/103)** — `docs/spec.md` pinned the `has_violence` derivation rule (`any(e.tier1_category != "NONE")`), added the §2.5 identifier-casing table, rewrote §5.1 field notes. - **[PR #105](https://github.com/DataHackIL/SynthBanshee/pull/105)** — added `sp_sv_a_0003` + `sp_it_a_0003` Google-pair shadow scenes (this delivery's voice-diversity vehicle). - **[PR #106](https://github.com/DataHackIL/SynthBanshee/pull/106)** — root cause for [#72](https://github.com/DataHackIL/SynthBanshee/issues/72) found and fixed: `_HINT_DEFAULTS["stress"]` was emitting nested `` inside outer ``, which Azure rejects with `SSML parse error 0x80045003`. **Required to unblock this delivery** — without the fix, 6 of 8 elephant Tier B scenes (every one whose LLM script carries a `stress` phrase hint at I3+) failed reliably. +**Schema-shift regen (commit `1ea48f3`, same-day):** + +- **[PR #110](https://github.com/DataHackIL/SynthBanshee/pull/110)** — Closes [#107](https://github.com/DataHackIL/SynthBanshee/issues/107): pytest tests no longer leak generated clips into the corpus when `SYNTHBANSHEE_DATA_DIR` is set in the parent shell. The regen overwrote the one leaked tmp_path that had been baked into `sp_neu_a_0001_00.dirty_file_path` during the initial delivery. +- **[PR #111](https://github.com/DataHackIL/SynthBanshee/pull/111)** — Closes [#108](https://github.com/DataHackIL/SynthBanshee/issues/108): clip JSON `dirty_file_path` / `transcript_path` and manifest CSV `wav_path` / `strong_labels_path` are now written as repo-relative POSIX strings, anchored at `--data-root` (envvar `SYNTHBANSHEE_DATA_ROOT`). Misconfigured `data_root` now logs a loud warning rather than silently falling back to absolute. +- **[PR #112](https://github.com/DataHackIL/SynthBanshee/pull/112)** — Closes [#109](https://github.com/DataHackIL/SynthBanshee/issues/109): `ClipMetadata.tts_engine` (always hardcoded to `"azure_he_IL"`, even for Google clips) removed entirely. `qa-report` now derives backend diversity from `generation_metadata.tts_backend` per speaker — the `single_backend` warning on this delivery is correctly absent post-regen. `RunSummary.clips_by_tts_engine` renamed to `clips_by_tts_backend` with `{azure, google, unknown}` value space. + ## Speaker / voice / backend matrix | Project | Speaker dir | Speakers (canonical UPPERCASE id) | Voice family — M | Voice family — F | Backend | @@ -87,10 +95,15 @@ This is the first delivery with multi-project and multi-backend coverage in one **Voice diversity — partial progress:** 1 → 2 unique voice families per gender. The `low_voice_diversity_*` thresholds expect ≥3, so the run-level warnings still fire; consumer teams should read this as "ladder climbed, not yet at the top." +**Closed after the 2026-05-12 regen** (PRs [#110](https://github.com/DataHackIL/SynthBanshee/pull/110) / [#111](https://github.com/DataHackIL/SynthBanshee/pull/111) / [#112](https://github.com/DataHackIL/SynthBanshee/pull/112)): + +- `single_backend` (run-level) — **resolved**: `qa.py` now derives backend diversity from `generation_metadata.tts_backend.values()` rather than the removed `clip.tts_engine` field. Current `qa-report.json` shows `"backend_count": 2` and `"clips_by_tts_backend": {"azure": 18, "google": 2}`. The field was also renamed `clips_by_tts_engine` → `clips_by_tts_backend` to reflect the new value-space (`"azure"` / `"google"` instead of the old `"azure_he_IL"` literal). +- Absolute paths in clip JSON — **resolved**: `dirty_file_path` and `transcript_path` are now repo-relative (POSIX) by contract. +- Leaked pytest tmp_path on `sp_neu_a_0001_00.dirty_file_path` — **resolved**: the regen overwrote it with the canonical `assets/speech/dirty/sp_neu_a_0001_00_dirty.wav`. The autouse env-var strip fixture (PR #110) prevents future leaks of this shape. + **Still open** (not this delivery's scope): - `low_voice_diversity_male` / `low_voice_diversity_female` — at 2 voices/gender; threshold is ≥3. -- `single_backend` (run-level) — **misleading**: the corpus actually uses Azure + Google. The qa-report counts `clip.tts_engine` which is currently hardcoded to `"azure_he_IL"` in `cli.py:_run_generate_pipeline`; this is a synthbanshee labeling bug, not a real diversity finding. (Tracked in a follow-up issue — see `qa-report.json` for the raw counts and `speakers[].voice_family` per clip for the actual backend distribution.) - `vic_f0_high` (per-clip): 2 clips — `sp_it_a_0003_00` and `sp_sv_a_0003_00` — flagged. Both are the Google Chirp HD female voice (`he-IL-Chirp3-HD-Achernar`), whose F0 baseline runs higher than the Azure Hila reference the M10a thresholds were calibrated against. - `quality_flagged_clips: 15` (mostly from `prosody_cap_activations`) — the #87 effective-prosody cap fires often at I3+; expected behaviour, recorded in `generation_metadata.effective_prosody_caps` per turn. - General Hebrew TTS naturalness backlog ([#92](https://github.com/DataHackIL/SynthBanshee/issues/92)). diff --git a/deliveries/003-multi-project-multi-voice/qa-report.json b/deliveries/003-multi-project-multi-voice/qa-report.json index fb39c97..af0dd69 100644 --- a/deliveries/003-multi-project-multi-voice/qa-report.json +++ b/deliveries/003-multi-project-multi-voice/qa-report.json @@ -79,9 +79,10 @@ ] }, "run_summary": { - "backend_count": 1, - "clips_by_tts_engine": { - "azure_he_IL": 20 + "backend_count": 2, + "clips_by_tts_backend": { + "azure": 18, + "google": 2 }, "clips_with_i4_plus": 10, "emotion_downgrade_ratio": 0.0, @@ -111,38 +112,38 @@ }, { "f0_median_hz": 108.1102658399776, - "f0_std_hz_mean": 22.851487932674182, + "f0_std_hz_mean": 22.87457837400195, "intensity": 2, - "lufs_db_mean": -27.690152509897995, + "lufs_db_mean": -27.858718608795442, "n_turns": 31, - "rms_db_mean": -28.62001685439644, + "rms_db_mean": -28.788583135307867, "role": "AGG" }, { "f0_median_hz": 113.55498244923697, - "f0_std_hz_mean": 25.441891968713627, + "f0_std_hz_mean": 25.11460795909526, "intensity": 3, - "lufs_db_mean": -25.801129710828317, + "lufs_db_mean": -25.931500633349966, "n_turns": 22, - "rms_db_mean": -26.75737709404332, + "rms_db_mean": -26.877416667444773, "role": "AGG" }, { "f0_median_hz": 121.35169484693589, - "f0_std_hz_mean": 35.61756253167847, + "f0_std_hz_mean": 34.322822303383205, "intensity": 4, - "lufs_db_mean": -26.443119582974802, + "lufs_db_mean": -26.898978317965003, "n_turns": 17, - "rms_db_mean": -27.750911063452683, + "rms_db_mean": -28.213887073074247, "role": "AGG" }, { - "f0_median_hz": 117.21605517913326, - "f0_std_hz_mean": 28.456606185647296, + "f0_median_hz": 116.5428845670826, + "f0_std_hz_mean": 30.25917677178986, "intensity": 5, - "lufs_db_mean": -23.10049106389533, + "lufs_db_mean": -23.238750890246557, "n_turns": 14, - "rms_db_mean": -24.40714167053957, + "rms_db_mean": -24.601174662522972, "role": "AGG" }, { @@ -156,45 +157,44 @@ }, { "f0_median_hz": 208.25380769954342, - "f0_std_hz_mean": 43.64342839303118, + "f0_std_hz_mean": 43.13462092052036, "intensity": 2, - "lufs_db_mean": -28.325761478113975, + "lufs_db_mean": -28.484112755519416, "n_turns": 33, - "rms_db_mean": -29.22597481860275, + "rms_db_mean": -29.384326109032514, "role": "VIC" }, { "f0_median_hz": 223.20090456450697, - "f0_std_hz_mean": 49.68458303287865, + "f0_std_hz_mean": 49.677754860679514, "intensity": 3, - "lufs_db_mean": -32.210243183604646, + "lufs_db_mean": -32.316328495323894, "n_turns": 26, - "rms_db_mean": -33.30509871970152, + "rms_db_mean": -33.40566421774339, "role": "VIC" }, { "f0_median_hz": 229.74119028909763, - "f0_std_hz_mean": 64.27051044991096, + "f0_std_hz_mean": 62.82210186299769, "intensity": 4, - "lufs_db_mean": -34.47744642450853, + "lufs_db_mean": -34.87101520257984, "n_turns": 14, - "rms_db_mean": -35.56674703043024, + "rms_db_mean": -35.94259629163534, "role": "VIC" }, { - "f0_median_hz": 218.73289323986043, - "f0_std_hz_mean": 79.75888422107323, + "f0_median_hz": 238.52991146631203, + "f0_std_hz_mean": 74.43498941733397, "intensity": 5, - "lufs_db_mean": -35.63919225446456, + "lufs_db_mean": -36.88653341783261, "n_turns": 5, - "rms_db_mean": -36.89408535853518, + "rms_db_mean": -37.94338251236866, "role": "VIC" } ], "run_warnings": [ "low_voice_diversity_male", - "low_voice_diversity_female", - "single_backend" + "low_voice_diversity_female" ], "voices_by_gender": { "female": 2, @@ -227,7 +227,7 @@ "quality_flagged_clips": 15, "speaker_count": 6, "total_clips": 20, - "total_duration_seconds": 2500.7868750000002 + "total_duration_seconds": 2498.6268749999995 }, "structural_warnings": {} }