Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions backend/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,7 @@ def cmd_process(args):
from services.clip_generator import generate_clip
from services.transcript_parser import parse_speaker_transcript
from services.audio_analyzer import get_energy_profile
from services.audio_events import get_event_profile, is_available as audio_events_available
from services.encoder import get_encoder_info
from presets import get_preset, DEFAULT_PRESET, MIN_CLIP_DURATION, MAX_CLIP_DURATION, TARGET_CLIP_DURATION_MIN, TARGET_CLIP_DURATION_MAX

Expand Down Expand Up @@ -657,6 +658,7 @@ def _transcribe_progress(pct, msg):

# ── Step 2: Analyze audio energy ──
energy_scores = None
reaction_scores = None
if config.get("energy_boost", True):
print(" [2/4] Analyzing audio energy...")
try:
Expand All @@ -665,6 +667,15 @@ def _transcribe_progress(pct, msg):
print(f" {len(profile['peak_times'])} peak moments found")
except Exception as e:
print(f" Skipped (error: {e})")
if audio_events_available():
try:
reactions = get_event_profile(video_path, segments)
reaction_scores = reactions["segment_scores"]
n = len(reactions["reaction_times"])
if n:
print(f" {n} laughter/reaction moments found")
except Exception as e:
print(f" Reactions skipped (error: {e})")
else:
print(" [2/4] Audio analysis skipped (--no-energy)")

Expand Down Expand Up @@ -712,6 +723,7 @@ def _transcribe_progress(pct, msg):
clips = _suggest_clips(
segments=segments,
energy_scores=energy_scores,
reaction_scores=reaction_scores,
top_n=top_n,
min_dur=config.get("min_clip_duration", MIN_CLIP_DURATION),
max_dur=config.get("max_clip_duration", MAX_CLIP_DURATION),
Expand Down Expand Up @@ -1607,6 +1619,7 @@ def _rerender_clip(r):
def _suggest_clips(
segments: list,
energy_scores: list | None = None,
reaction_scores: list | None = None,
top_n: int = 5,
min_dur: float = MIN_CLIP_DURATION,
max_dur: float = MAX_CLIP_DURATION,
Expand Down Expand Up @@ -1804,6 +1817,15 @@ def _find_sentence_boundary_end(segs, idx, max_lookahead=3):
if max_e > 7:
reasons.append("high_energy")

# ── 6b. Laughter / reactions (0-6 pts) ──
if reaction_scores:
seg_reactions = reaction_scores[snap_start : snap_end + 1]
if seg_reactions:
max_r = max(seg_reactions)
score += min(max_r, 6)
if max_r > 3:
reasons.append("laughter")

# ── 7. Density check — penalize sparse/rambling segments ──
words_per_sec = len(text.split()) / max(dur, 1)
if words_per_sec < 1.5:
Expand Down
29 changes: 23 additions & 6 deletions backend/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,11 +87,19 @@ def handle_transcribe(task_id: str, params: dict):
except Exception:
pass # energy is a nice-to-have

events_data = None
try:
from services.audio_events import extract_audio_events
events_data = extract_audio_events(file_path)
except Exception:
pass # reactions are a nice-to-have

packed_path, packed_md = write_packed(
result,
cache_hash,
source_label=os.path.basename(file_path),
energy_data=energy_data,
events_data=events_data,
)
result["packed_path"] = packed_path
result["packed_size_bytes"] = len(packed_md.encode("utf-8"))
Expand Down Expand Up @@ -253,18 +261,27 @@ def handle_pack_transcript(task_id: str, params: dict):
return

energy_data = params.get("energy_data")
if energy_data is None and params.get("file_path"):
try:
from services.audio_analyzer import extract_audio_energy
energy_data = extract_audio_energy(params["file_path"])
except Exception:
pass
events_data = params.get("events_data")
if params.get("file_path"):
if energy_data is None:
try:
from services.audio_analyzer import extract_audio_energy
energy_data = extract_audio_energy(params["file_path"])
except Exception:
pass
if events_data is None:
try:
from services.audio_events import extract_audio_events
events_data = extract_audio_events(params["file_path"])
except Exception:
pass

path, md = write_packed(
transcript,
cache_hash,
source_label=params.get("source_label"),
energy_data=energy_data,
events_data=events_data,
)
emit_result(task_id, "success", data={
"packed_path": path,
Expand Down
Binary file added backend/models/yamnet.onnx
Binary file not shown.
Loading
Loading