From 97d9e4b1e3a0e87f0c84c9ba11b856441f5e7d54 Mon Sep 17 00:00:00 2001
From: Amit Moryossef <amit@nagish.com>
Date: Tue, 28 Apr 2026 09:18:43 +0000
Subject: [PATCH 1/2] Add Konrad et al. 2024 on Public DGS Corpus release 4

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/index.md       |  1 +
 src/references.bib | 25 +++++++++++++++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/src/index.md b/src/index.md
index cc117882..0685e71c 100644
--- a/src/index.md
+++ b/src/index.md
@@ -1142,6 +1142,7 @@ are collections of annotated single signs. They are synthesized [@dataset:ebling
 contain parallel sequences of signs and spoken language.
 Available continuous sign corpora are extremely limited, containing 4-6 orders of magnitude fewer sentence pairs than similar corpora for spoken language machine translation [@arivazhagan2019massively].
 Moreover, while automatic speech recognition (ASR) datasets contain up to 50,000 hours of recordings [@pratap2020mls], the most extensive continuous sign language corpus contains only 1,150 hours, and only 50 of them are publicly available [@dataset:hanke-etal-2020-extending].
+@konrad-etal-2024-corpus describe the fourth release of the Public DGS Corpus, expanding it to 52.4 hours, adding a new iLex-based portal alongside MY DGS, MY DGS – annotated and MY DGS – ANNIS, and providing additional pose representations from MediaPipe and Apple Vision Framework, including 3D keypoint estimates.
 These datasets are usually synthesized [@dataset:databases2007volumes;@dataset:Crasborn2008TheCN;@dataset:ko2019neural;@dataset:hanke-etal-2020-extending] or recorded in studio conditions [@dataset:forster2014extensions;@cihan2018neural], which does not account for noise in real-life conditions. Moreover, some contain signed interpretations of spoken language rather than naturally-produced signs, which may not accurately represent native signing since translation is now a part of the discourse event.
 
 
diff --git a/src/references.bib b/src/references.bib
index f712af18..61364197 100644
--- a/src/references.bib
+++ b/src/references.bib
@@ -4171,6 +4171,7 @@ @inproceedings{petrovich2022TEMOSGeneratingDiverse
  year = {2022}
 }
 
+<<<<<<< HEAD
 @inproceedings{bono-etal-2024-data,
  address = {Torino, Italia},
  author = {Bono, Mayumi  and
@@ -4515,3 +4516,27 @@ @inproceedings{susman-kimmelman-2024-eye
     url = "https://aclanthology.org/2024.signlang-1.40/",
     pages = "361--369"
 }
+
+@inproceedings{konrad-etal-2024-corpus,
+    title = "Corpus {\`a} la carte {--} Improving Access to the {P}ublic {DGS} {C}orpus",
+    author = {Konrad, Reiner  and
+      Hanke, Thomas  and
+      Isard, Amy  and
+      Schulder, Marc  and
+      K{\"o}nig, Lutz  and
+      Bleicken, Julian  and
+      B{\"o}se, Oliver},
+    editor = "Efthimiou, Eleni  and
+      Fotinea, Stavroula-Evita  and
+      Hanke, Thomas  and
+      Hochgesang, Julie A.  and
+      Mesch, Johanna  and
+      Schulder, Marc",
+    booktitle = "Proceedings of the LREC-COLING 2024 11th Workshop on the Representation and Processing of Sign Languages: Evaluation of Sign Language Resources",
+    month = may,
+    year = "2024",
+    address = "Torino, Italia",
+    publisher = "ELRA and ICCL",
+    url = "https://aclanthology.org/2024.signlang-1.20/",
+    pages = "184--193"
+}

From ad8b34004d56615f354e6ba5dc9f438afbe90c26 Mon Sep 17 00:00:00 2001
From: AmitMY <amit@nagish.com>
Date: Tue, 28 Apr 2026 09:55:01 +0000
Subject: [PATCH 2/2] Move konrad-etal entry to Dataset Papers (apply review
 pattern)

---
 src/index.md       | 6 ++----
 src/references.bib | 1 -
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/src/index.md b/src/index.md
index 0685e71c..68020361 100644
--- a/src/index.md
+++ b/src/index.md
@@ -1104,9 +1104,6 @@ Some special features are cross-level links, non-temporal objects, timepoint tra
 3D viewing of motion capture data and a project tool for managing whole corpora of annotation files.
 Anvil installation is [available](http://www.anvil-software.de/download/index.html) for Windows, macOS, and Linux.
 
-##### MY DGS -- ANNIS Query Wizard
-@isard-2024-building presented a web-based Query Wizard that guided users through the construction of valid ANNIS Query Language expressions over the Public DGS Corpus [@dataset:hanke-etal-2020-extending] by composing context-sensitive blocks for annotation tiers, metadata, and inter-tier connections, which could then be opened directly in the MY DGS -- ANNIS portal.
-
 ##### Other {-}
 @battisti-etal-2024-advancing presented a transcription and annotation scheme for continuous L1 and L2 data in Swiss German Sign Language (DSGS), introducing conventions for non-manual components and L2 learner errors, and outlined an initial inter-annotator agreement validation approach.
 
@@ -1124,6 +1121,8 @@ Research papers which do not necessarily contribute new theory or architectures
 
 @hall-etal-2024-phonological digitized and phonologically transcribed the Canadian Dictionary of ASL using the Sign Language Phonetic Annotator-Analyzer software, producing a searchable resource that captured handshape, movement, location, and relation parameters for roughly 2000 signs to enable phonologically based queries that paper-based dictionaries cannot support.
 
+The Public DGS Corpus also saw multiple SignLang 2024 contributions: @konrad-etal-2024-corpus describe its fourth release, expanding it to 52.4 hours, adding a new iLex-based portal alongside MY DGS, MY DGS – annotated and MY DGS – ANNIS, and providing additional MediaPipe and Apple Vision Framework pose representations including 3D keypoint estimates; @isard-2024-building introduced a web-based Query Wizard that guided users through the construction of valid ANNIS Query Language expressions over the corpus by composing context-sensitive blocks for annotation tiers, metadata, and inter-tier connections.
+
 <!-- TODO: LSA-T aka dataset:dal2022lsa, they use AlphaPose "with the Halpe full-body keypoints format", a visualizer tool, and a baseline SLT model. Especially might be good to mention FiftyOne https://docs.voxel51.com/, "which
 provides useful features such as allowing to filter samples by label, video, playlist,
 or by the confidence score of the signer inference." -->
@@ -1142,7 +1141,6 @@ are collections of annotated single signs. They are synthesized [@dataset:ebling
 contain parallel sequences of signs and spoken language.
 Available continuous sign corpora are extremely limited, containing 4-6 orders of magnitude fewer sentence pairs than similar corpora for spoken language machine translation [@arivazhagan2019massively].
 Moreover, while automatic speech recognition (ASR) datasets contain up to 50,000 hours of recordings [@pratap2020mls], the most extensive continuous sign language corpus contains only 1,150 hours, and only 50 of them are publicly available [@dataset:hanke-etal-2020-extending].
-@konrad-etal-2024-corpus describe the fourth release of the Public DGS Corpus, expanding it to 52.4 hours, adding a new iLex-based portal alongside MY DGS, MY DGS – annotated and MY DGS – ANNIS, and providing additional pose representations from MediaPipe and Apple Vision Framework, including 3D keypoint estimates.
 These datasets are usually synthesized [@dataset:databases2007volumes;@dataset:Crasborn2008TheCN;@dataset:ko2019neural;@dataset:hanke-etal-2020-extending] or recorded in studio conditions [@dataset:forster2014extensions;@cihan2018neural], which does not account for noise in real-life conditions. Moreover, some contain signed interpretations of spoken language rather than naturally-produced signs, which may not accurately represent native signing since translation is now a part of the discourse event.
 
 
diff --git a/src/references.bib b/src/references.bib
index 61364197..55752ea7 100644
--- a/src/references.bib
+++ b/src/references.bib
@@ -4171,7 +4171,6 @@ @inproceedings{petrovich2022TEMOSGeneratingDiverse
  year = {2022}
 }
 
-<<<<<<< HEAD
 @inproceedings{bono-etal-2024-data,
  address = {Torino, Italia},
  author = {Bono, Mayumi  and