OpenSportsLab
diff --git a/‎annotation_tool/controllers/README.md‎
Lines changed: 1 addition & 1 deletion b/‎annotation_tool/controllers/README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎annotation_tool/controllers/dataset_explorer_controller.py‎
Lines changed: 127 additions & 3 deletions b/‎annotation_tool/controllers/dataset_explorer_controller.py‎
Lines changed: 127 additions & 3 deletions
@@ -17,7 +17,7 @@ Owns runtime business logic: dataset lifecycle, mutation history, playback contr
 - `media_controller.py`: media playback and mute routing.
 - `welcome_controller.py`: welcome-page routing.
 - `hf_transfer_controller.py`: threaded Hugging Face download/upload orchestration for GUI menu actions.
-- `classification/`, `localization/`, `description/`, `dense_description/`: mode controllers.
+- `classification/`, `localization/`, `description/`, `dense_description/`, `question_answer/`: mode controllers.
 
 ## Key Functions and Responsibilities
 ### `DatasetExplorerController`
 
@@ -227,6 +227,7 @@ class DatasetExplorerController(QObject):
     dataSelected = pyqtSignal(str)
     sampleSelectionChanged = pyqtSignal(object)
     schemaContextChanged = pyqtSignal(dict)
+    questionBankChanged = pyqtSignal(list)
     classificationActionListChanged = pyqtSignal(list)
     mediaRouteRequested = pyqtSignal(str, bool)
     mediaStopRequested = pyqtSignal()
@@ -239,6 +240,7 @@ class DatasetExplorerController(QObject):
     resetEditorsRequested = pyqtSignal()
     editorTabRequested = pyqtSignal(int)
     descSaveRequested = pyqtSignal()
+    qaSaveRequested = pyqtSignal()
     clearMarkersRequested = pyqtSignal()
     annotationPanelsEnabledRequested = pyqtSignal(bool)
     headerDraftMutationRequested = pyqtSignal(dict)
@@ -260,7 +262,7 @@ class DatasetExplorerController(QObject):
         "description",
         "metadata",
     )
-    HEADER_EXCLUDED_KEYS = {"data", "labels"}
+    HEADER_EXCLUDED_KEYS = {"data", "labels", "questions"}
 
     def __init__(
         self,
@@ -342,6 +344,18 @@ def modalities(self):
     def modalities(self, value):
         self.dataset_json["modalities"] = list(value) if isinstance(value, list) else ["video"]
 
+    @property
+    def question_definitions(self) -> list:
+        questions = self.dataset_json.get("questions")
+        if not isinstance(questions, list):
+            questions = []
+            self.dataset_json["questions"] = questions
+        return questions
+
+    @question_definitions.setter
+    def question_definitions(self, value):
+        self.dataset_json["questions"] = list(value) if isinstance(value, list) else []
+
     @property
     def project_header_known(self) -> dict:
         return {
@@ -552,6 +566,9 @@ def _emit_selected_sample(self, sample_id: str):
     def _emit_schema_context(self):
         self.schemaContextChanged.emit(copy.deepcopy(self.label_definitions))
 
+    def _emit_question_bank_context(self):
+        self.questionBankChanged.emit(copy.deepcopy(self.question_definitions))
+
     def _emit_classification_action_list(self):
         self.classificationActionListChanged.emit(copy.deepcopy(self.action_item_data))
 
@@ -721,6 +738,7 @@ def clear_annotations_for_path(self, path: str):
             "events",
             "captions",
             "dense_captions",
+            "answers",
         ):
             sample.pop(field, None)
 
@@ -881,6 +899,8 @@ def _prompt_unsaved_close_action(self) -> str:
     def save_project(self):
         if self._active_mode_idx() == 2:
             self.descSaveRequested.emit()
+        if self._active_mode_idx() == 4:
+            self.qaSaveRequested.emit()
 
         if not self.current_json_path:
             return self.export_project()
@@ -889,6 +909,8 @@ def save_project(self):
     def export_project(self):
         if self._active_mode_idx() == 2:
             self.descSaveRequested.emit()
+        if self._active_mode_idx() == 4:
+            self.qaSaveRequested.emit()
 
         path, _ = QFileDialog.getSaveFileName(
             self.panel,
@@ -1001,9 +1023,66 @@ def _default_dataset_json(self):
             "modalities": ["video"],
             "metadata": {},
             "labels": {},
+            "questions": [],
             "data": [],
         }
 
+    @staticmethod
+    def _normalize_question_id(question_id: str) -> str:
+        return str(question_id or "").strip()
+
+    def _normalize_questions_payload(self, questions) -> list:
+        normalized = []
+        seen_ids = set()
+        for raw_question in list(questions or []):
+            if not isinstance(raw_question, dict):
+                continue
+
+            question_id = self._normalize_question_id(raw_question.get("id"))
+            question_text = str(raw_question.get("question") or "").strip()
+            if not question_id or not question_text:
+                continue
+            if question_id in seen_ids:
+                continue
+
+            seen_ids.add(question_id)
+            normalized.append({"id": question_id, "question": question_text})
+        return normalized
+
+    @staticmethod
+    def _normalize_sample_answers_payload(answers, valid_question_ids: set) -> list:
+        normalized = []
+        seen_question_ids = set()
+        for raw_answer in list(answers or []):
+            if not isinstance(raw_answer, dict):
+                continue
+            question_id = str(raw_answer.get("question_id") or "").strip()
+            if (
+                not question_id
+                or question_id not in valid_question_ids
+                or question_id in seen_question_ids
+            ):
+                continue
+            answer_text = str(raw_answer.get("answer") or "").strip()
+            if not answer_text:
+                continue
+            normalized.append({"question_id": question_id, "answer": answer_text})
+            seen_question_ids.add(question_id)
+        return normalized
+
+    def next_question_id(self) -> str:
+        max_suffix = 0
+        for question in self.question_definitions:
+            if not isinstance(question, dict):
+                continue
+            question_id = self._normalize_question_id(question.get("id"))
+            if not question_id.startswith("q"):
+                continue
+            suffix = question_id[1:]
+            if suffix.isdigit():
+                max_suffix = max(max_suffix, int(suffix))
+        return f"q{max_suffix + 1}"
+
     def _normalize_dataset_json(self, data):
         if not isinstance(data, dict):
             return None, "Root JSON must be an object."
@@ -1016,6 +1095,8 @@ def _normalize_dataset_json(self, data):
 
         if not isinstance(normalized.get("labels"), dict):
             normalized["labels"] = {}
+        normalized["questions"] = self._normalize_questions_payload(normalized.get("questions"))
+        valid_question_ids = {question["id"] for question in normalized["questions"]}
         if not isinstance(normalized.get("metadata"), dict):
             normalized["metadata"] = {}
         if not isinstance(normalized.get("modalities"), list):
@@ -1061,6 +1142,15 @@ def _normalize_dataset_json(self, data):
                     if isinstance(event, dict):
                         event["position_ms"] = _safe_int(event.get("position_ms", 0))
 
+            normalized_answers = self._normalize_sample_answers_payload(
+                sample.get("answers"),
+                valid_question_ids,
+            )
+            if normalized_answers:
+                sample["answers"] = normalized_answers
+            else:
+                sample.pop("answers", None)
+
             cleaned_data.append(sample)
 
         normalized["data"] = cleaned_data
@@ -1430,10 +1520,12 @@ def _sample_supports_mode(self, sample: dict, mode_idx: int) -> bool:
             return any(isinstance(cap, dict) and str(cap.get("text", "")).strip() for cap in captions)
         if mode_idx == 3:
             return bool(sample.get("dense_captions"))
+        if mode_idx == 4:
+            return self._has_non_empty_answers(sample)
         return False
 
     def _available_mode_indices_for_sample(self, sample: dict):
-        return [mode_idx for mode_idx in (0, 1, 2, 3) if self._sample_supports_mode(sample, mode_idx)]
+        return [mode_idx for mode_idx in (0, 1, 2, 3, 4) if self._sample_supports_mode(sample, mode_idx)]
 
     def _reconcile_annotation_tab_for_sample(self, sample: dict) -> bool:
         available_modes = self._available_mode_indices_for_sample(sample)
@@ -1558,10 +1650,28 @@ def _label_state_for_sample(self, sample):
             for cap in captions
         )
 
-        hand = bool(_ManualAnnotationRecord(sample)) or bool(sample.get("events")) or bool(sample.get("dense_captions")) or has_caption_text
+        hand = (
+            bool(_ManualAnnotationRecord(sample))
+            or bool(sample.get("events"))
+            or bool(sample.get("dense_captions"))
+            or has_caption_text
+            or self._has_non_empty_answers(sample)
+        )
         smart = self._has_smart_labels(sample) or self._has_smart_events(sample)
         return bool(hand), bool(smart)
 
+    @staticmethod
+    def _has_non_empty_answers(sample: dict) -> bool:
+        answers = sample.get("answers")
+        if not isinstance(answers, list):
+            return False
+        for entry in answers:
+            if not isinstance(entry, dict):
+                continue
+            if str(entry.get("answer") or "").strip():
+                return True
+        return False
+
     @staticmethod
     def _has_smart_labels(sample: dict) -> bool:
         labels = sample.get("labels")
@@ -1784,6 +1894,7 @@ def _build_new_sample(self, source_group):
             "events": [],
             "captions": [],
             "dense_captions": [],
+            "answers": [],
         }
 
     def handle_add_sample(self):
@@ -1859,6 +1970,8 @@ def _dataset_json_for_write(self, save_path: str):
 
         base_dir = os.path.dirname(os.path.abspath(save_path))
         written = copy.deepcopy(normalized)
+        written["questions"] = self._normalize_questions_payload(written.get("questions"))
+        valid_question_ids = {question["id"] for question in written["questions"]}
         for sample in written.get("data", []):
             new_inputs = []
 
@@ -1884,6 +1997,14 @@ def _dataset_json_for_write(self, save_path: str):
                 sample.pop("captions", None)
             if not sample.get("dense_captions"):
                 sample.pop("dense_captions", None)
+            normalized_answers = self._normalize_sample_answers_payload(
+                sample.get("answers"),
+                valid_question_ids,
+            )
+            if normalized_answers:
+                sample["answers"] = normalized_answers
+            else:
+                sample.pop("answers", None)
             if not sample.get("metadata"):
                 sample.pop("metadata", None)
             # Never persist retired smart-* keys.
@@ -1896,6 +2017,7 @@ def _dataset_json_for_write(self, save_path: str):
                 definition.pop("label_colors", None)
         written.setdefault("metadata", {})
         written.setdefault("modalities", ["video"])
+        written.setdefault("questions", [])
         if not written.get("description"):
             written["description"] = ""
         return written
@@ -1917,6 +2039,7 @@ def _write_dataset_json(self, save_path: str):
         self._add_recent_project(self.current_json_path)
         self._rebuild_runtime_index()
         self._refresh_header_panel()
+        self._refresh_schema_panels()
         self.saveStateRefreshRequested.emit()
         self.statusMessageRequested.emit("Saved", f"Saved to {os.path.basename(save_path)}", 1500)
         return True
@@ -1927,3 +2050,4 @@ def _write_dataset_json(self, save_path: str):
     def _refresh_schema_panels(self):
         self.schemaRefreshRequested.emit()
         self._emit_schema_context()
+        self._emit_question_bank_context()