From 1a4373ec9b36286c661aa8d15b0ee66d06331ef6 Mon Sep 17 00:00:00 2001
From: tanmay-9 <tanmaygarg9879@gmail.com>
Date: Thu, 17 Apr 2025 16:13:29 +0200
Subject: [PATCH 1/7] Added commands to setup Oxigraph

Added all the commands necessary to setup SPARQL endpoint for oxigraph natively and containerized
---
 pyproject.toml                            |   2 +
 src/qoxigraph/__init__.py                 |   0
 src/qoxigraph/commands/__init__.py        |   0
 src/qoxigraph/commands/example_queries.py |  12 ++
 src/qoxigraph/commands/extract_queries.py |   1 +
 src/qoxigraph/commands/get_data.py        |   1 +
 src/qoxigraph/commands/index.py           | 109 ++++++++++++++
 src/qoxigraph/commands/log.py             |  50 +++++++
 src/qoxigraph/commands/query.py           |  54 +++++++
 src/qoxigraph/commands/setup_config.py    | 109 ++++++++++++++
 src/qoxigraph/commands/start.py           | 175 ++++++++++++++++++++++
 src/qoxigraph/commands/status.py          |  19 +++
 src/qoxigraph/commands/stop.py            |  71 +++++++++
 13 files changed, 603 insertions(+)
 create mode 100644 src/qoxigraph/__init__.py
 create mode 100644 src/qoxigraph/commands/__init__.py
 create mode 100644 src/qoxigraph/commands/example_queries.py
 create mode 120000 src/qoxigraph/commands/extract_queries.py
 create mode 120000 src/qoxigraph/commands/get_data.py
 create mode 100644 src/qoxigraph/commands/index.py
 create mode 100644 src/qoxigraph/commands/log.py
 create mode 100644 src/qoxigraph/commands/query.py
 create mode 100644 src/qoxigraph/commands/setup_config.py
 create mode 100644 src/qoxigraph/commands/start.py
 create mode 100644 src/qoxigraph/commands/status.py
 create mode 100644 src/qoxigraph/commands/stop.py

diff --git a/pyproject.toml b/pyproject.toml
index b053fe62..12c82f47 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -27,6 +27,8 @@ Github = "https://github.com/ad-freiburg/qlever"
 
 [project.scripts]
 "qlever" = "qlever.qlever_main:main"
+"qoxigraph" = "qlever.qlever_main:main"
+"qlever-old" = "qlever.qlever_old:main"
 
 [tool.setuptools]
 license-files = ["LICENSE"]
diff --git a/src/qoxigraph/__init__.py b/src/qoxigraph/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/qoxigraph/commands/__init__.py b/src/qoxigraph/commands/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/qoxigraph/commands/example_queries.py b/src/qoxigraph/commands/example_queries.py
new file mode 100644
index 00000000..d62982a8
--- /dev/null
+++ b/src/qoxigraph/commands/example_queries.py
@@ -0,0 +1,12 @@
+from __future__ import annotations
+
+from qlever.commands.example_queries import (
+    ExampleQueriesCommand as QleverExampleQueriesCommand,
+)
+
+
+class ExampleQueriesCommand(QleverExampleQueriesCommand):
+    def execute(self, args) -> bool:
+        if not args.sparql_endpoint:
+            args.sparql_endpoint = f"localhost:{args.port}/query"
+        return super().execute(args)
diff --git a/src/qoxigraph/commands/extract_queries.py b/src/qoxigraph/commands/extract_queries.py
new file mode 120000
index 00000000..5667cc52
--- /dev/null
+++ b/src/qoxigraph/commands/extract_queries.py
@@ -0,0 +1 @@
+../../qlever/commands/extract_queries.py
\ No newline at end of file
diff --git a/src/qoxigraph/commands/get_data.py b/src/qoxigraph/commands/get_data.py
new file mode 120000
index 00000000..4900dbb8
--- /dev/null
+++ b/src/qoxigraph/commands/get_data.py
@@ -0,0 +1 @@
+../../qlever/commands/get_data.py
\ No newline at end of file
diff --git a/src/qoxigraph/commands/index.py b/src/qoxigraph/commands/index.py
new file mode 100644
index 00000000..128b9a82
--- /dev/null
+++ b/src/qoxigraph/commands/index.py
@@ -0,0 +1,109 @@
+from __future__ import annotations
+
+import glob
+import shlex
+from pathlib import Path
+
+from qlever.command import QleverCommand
+from qlever.containerize import Containerize
+from qlever.log import log
+from qlever.util import binary_exists, run_command
+
+
+class IndexCommand(QleverCommand):
+    def __init__(self):
+        self.script_name = "qoxigraph"
+
+    def description(self) -> str:
+        return "Build the index for a given RDF dataset"
+
+    def should_have_qleverfile(self) -> bool:
+        return True
+
+    def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
+        return {
+            "data": ["name", "format"],
+            "index": ["input_files"],
+            "runtime": ["system", "image", "index_container"],
+        }
+
+    def additional_arguments(self, subparser):
+        subparser.add_argument(
+            "--index-binary",
+            type=str,
+            default="oxigraph",
+            help=(
+                "The binary for building the index (default: oxigraph) "
+                "(this requires that you have oxigraph-cli installed "
+                "on your machine)"
+            ),
+        )
+
+    @staticmethod
+    def wrap_cmd_in_container(args, cmd: str) -> str:
+        return Containerize().containerize_command(
+            cmd=cmd,
+            container_system=args.system,
+            run_subcommand="run --rm",
+            image_name=args.image,
+            container_name=args.index_container,
+            volumes=[("$(pwd)", "/index")],
+            working_directory="/index",
+            use_bash=False,
+        )
+
+    def execute(self, args) -> bool:
+        index_cmd = f"load --location . --file {args.input_files}"
+        index_cmd += f" |& tee {args.name}.index-log.txt"
+
+        index_cmd = (
+            f"{args.index_binary} {index_cmd}"
+            if args.system == "native"
+            else self.wrap_cmd_in_container(args, index_cmd)
+        )
+
+        # Show the command line.
+        self.show(index_cmd, only_show=args.show)
+        if args.show:
+            return True
+
+        # Check if all of the input files exist.
+        for pattern in shlex.split(args.input_files):
+            if len(glob.glob(pattern)) == 0:
+                log.error(f'No file matching "{pattern}" found')
+                log.info("")
+                log.info(
+                    f"Did you call `{self.script_name} get-data`? If you did, "
+                    "check GET_DATA_CMD and INPUT_FILES in the Qleverfile"
+                )
+                return False
+
+        # When running natively, check if the binary exists and works.
+        if args.system == "native":
+            if not binary_exists(args.index_binary, "index-binary"):
+                return False
+        else:
+            if Containerize().is_running(args.system, args.index_container):
+                log.info(
+                    f"{args.system} container {args.index_container} is still up, "
+                    "which means that data loading is in progress. Please wait..."
+                )
+                return False
+
+        if len([p.name for p in Path.cwd().glob("*.sst")]) != 0:
+            log.error(
+                "Index files (*.sst) found in current directory "
+                "which shows presence of a previous index"
+            )
+            log.info("")
+            log.info("Aborting the index operation...")
+            return False
+
+        # Run the index command.
+        try:
+            run_command(index_cmd, show_output=True, show_stderr=True)
+        except Exception as e:
+            log.error(f"Building the index failed: {e}")
+            return False
+
+        return True
diff --git a/src/qoxigraph/commands/log.py b/src/qoxigraph/commands/log.py
new file mode 100644
index 00000000..a90d2228
--- /dev/null
+++ b/src/qoxigraph/commands/log.py
@@ -0,0 +1,50 @@
+from __future__ import annotations
+
+from qlever.commands.log import LogCommand as QleverLogCommand
+from qlever.containerize import Containerize
+from qlever.log import log
+from qlever.util import run_command
+
+
+class LogCommand(QleverLogCommand):
+    def __init__(self):
+        self.script_name = "qoxigraph"
+
+    def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
+        return {
+            "data": ["name"],
+            "runtime": [
+                "system",
+                "image",
+                "server_container",
+            ],
+        }
+
+    def execute(self, args) -> bool:
+        if args.system == "native":
+            return super().execute(args)
+
+        log_cmd = f"{args.system} logs "
+
+        if not args.from_beginning:
+            log_cmd += f"-n {args.tail_num_lines} "
+        if not args.no_follow:
+            log_cmd += "-f "
+
+        log_cmd += args.server_container
+
+        # Show the command line.
+        self.show(log_cmd, only_show=args.show)
+        if args.show:
+            return True
+
+        if not Containerize().is_running(args.system, args.server_container):
+            log.error(f"No server container {args.server_container} found!\n")
+            log.info(f"Are you sure you called `{self.script_name} start`?")
+            return False
+
+        try:
+            run_command(log_cmd, show_output=True, show_stderr=True)
+        except Exception as e:
+            log.error(f"Cannot display container logs - {e}")
+        return True
diff --git a/src/qoxigraph/commands/query.py b/src/qoxigraph/commands/query.py
new file mode 100644
index 00000000..6518905f
--- /dev/null
+++ b/src/qoxigraph/commands/query.py
@@ -0,0 +1,54 @@
+from __future__ import annotations
+
+from qlever.commands.query import QueryCommand as QleverQueryCommand
+
+
+class QueryCommand(QleverQueryCommand):
+    def additional_arguments(self, subparser) -> None:
+        subparser.add_argument(
+            "query",
+            type=str,
+            nargs="?",
+            default="SELECT * WHERE { ?s ?p ?o } LIMIT 10",
+            help="SPARQL query to send",
+        )
+        subparser.add_argument(
+            "--predefined-query",
+            type=str,
+            choices=self.predefined_queries.keys(),
+            help="Use a predefined query",
+        )
+        subparser.add_argument(
+            "--sparql-endpoint", type=str, help="URL of the SPARQL endpoint"
+        )
+        subparser.add_argument(
+            "--accept",
+            type=str,
+            choices=[
+                "text/tab-separated-values",
+                "text/csv",
+                "application/sparql-results+json",
+                "application/sparql-results+xml",
+            ],
+            default="text/tab-separated-values",
+            help="Accept header for the SPARQL query",
+        )
+        subparser.add_argument(
+            "--get",
+            action="store_true",
+            default=False,
+            help="Use GET request instead of POST",
+        )
+        subparser.add_argument(
+            "--no-time",
+            action="store_true",
+            default=False,
+            help="Do not print the (end-to-end) time taken",
+        )
+
+    def execute(self, args) -> bool:
+        if not args.sparql_endpoint:
+            args.sparql_endpoint = f"localhost:{args.port}/query"
+        args.pin_to_cache = None
+        args.access_token = None
+        super().execute(args)
diff --git a/src/qoxigraph/commands/setup_config.py b/src/qoxigraph/commands/setup_config.py
new file mode 100644
index 00000000..b6d9225b
--- /dev/null
+++ b/src/qoxigraph/commands/setup_config.py
@@ -0,0 +1,109 @@
+from __future__ import annotations
+
+from configparser import RawConfigParser
+from pathlib import Path
+
+from qlever.command import QleverCommand
+from qlever.log import log
+from qlever.qleverfile import Qleverfile
+
+
+class SetupConfigCommand(QleverCommand):
+    IMAGE = "ghcr.io/oxigraph/oxigraph"
+
+    FILTER_CRITERIA = {
+        "data": [],
+        "index": ["INPUT_FILES"],
+        "server": ["PORT"],
+        "runtime": ["SYSTEM", "IMAGE"],
+        "ui": ["UI_CONFIG"],
+    }
+
+    def __init__(self):
+        self.qleverfiles_path = (
+            Path(__file__).parent.parent.parent / "qlever" / "Qleverfiles"
+        )
+        self.qleverfile_names = [
+            p.name.split(".")[1]
+            for p in self.qleverfiles_path.glob("Qleverfile.*")
+        ]
+
+    def description(self) -> str:
+        return "Get a pre-configured Qleverfile"
+
+    def should_have_qleverfile(self) -> bool:
+        return False
+
+    def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
+        return {}
+
+    def additional_arguments(self, subparser) -> None:
+        subparser.add_argument(
+            "config_name",
+            type=str,
+            choices=self.qleverfile_names,
+            help="The name of the pre-configured Qleverfile to create",
+        )
+
+    def validate_qleverfile_setup(
+        self, args, qleverfile_path: Path
+    ) -> bool | None:
+        # Construct the command line and show it.
+        setup_config_show = (
+            f"Creating Qleverfile for {args.config_name} using "
+            f"Qleverfile.{args.config_name} file in {self.qleverfiles_path}"
+        )
+        self.show(setup_config_show, only_show=args.show)
+        if args.show:
+            return True
+
+        # If there is already a Qleverfile in the current directory, exit.
+        if qleverfile_path.exists():
+            log.error("`Qleverfile` already exists in current directory")
+            log.info("")
+            log.info(
+                "If you want to create a new Qleverfile using "
+                "`qlever setup-config`, delete the existing Qleverfile "
+                "first"
+            )
+            return False
+        return None
+
+    def get_filtered_qleverfile_parser(
+        self, config_name: str
+    ) -> RawConfigParser:
+        qleverfile_config_path = (
+            self.qleverfiles_path / f"Qleverfile.{config_name}"
+        )
+        qleverfile_parser = Qleverfile.filter(
+            qleverfile_config_path, self.FILTER_CRITERIA
+        )
+        if qleverfile_parser.has_section("runtime"):
+            qleverfile_parser.set("runtime", "IMAGE", self.IMAGE)
+        return qleverfile_parser
+
+    def execute(self, args) -> bool:
+        qleverfile_path = Path("Qleverfile")
+        exit_status = self.validate_qleverfile_setup(args, qleverfile_path)
+        if exit_status is not None:
+            return exit_status
+
+        qleverfile_parser = self.get_filtered_qleverfile_parser(
+            args.config_name
+        )
+        # Copy the Qleverfile to the current directory.
+        try:
+            with qleverfile_path.open("w") as f:
+                qleverfile_parser.write(f)
+        except Exception as e:
+            log.error(
+                f'Could not copy "{qleverfile_path}" to current directory: {e}'
+            )
+            return False
+
+        # If we get here, everything went well.
+        log.info(
+            f'Created Qleverfile for config "{args.config_name}"'
+            f" in current directory"
+        )
+        return True
diff --git a/src/qoxigraph/commands/start.py b/src/qoxigraph/commands/start.py
new file mode 100644
index 00000000..8a038344
--- /dev/null
+++ b/src/qoxigraph/commands/start.py
@@ -0,0 +1,175 @@
+from __future__ import annotations
+
+import subprocess
+import time
+from pathlib import Path
+
+from qlever.command import QleverCommand
+from qlever.containerize import Containerize
+from qlever.log import log
+from qlever.util import binary_exists, is_server_alive, run_command
+
+
+class StartCommand(QleverCommand):
+    def __init__(self):
+        self.script_name = "qoxigraph"
+
+    def description(self) -> str:
+        return (
+            "Start the server for Oxigraph (requires that you have built an "
+            "index before)"
+        )
+
+    def should_have_qleverfile(self) -> bool:
+        return True
+
+    def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
+        return {
+            "data": ["name"],
+            "server": ["host_name", "port"],
+            "runtime": ["system", "image", "server_container"],
+        }
+
+    def additional_arguments(self, subparser):
+        subparser.add_argument(
+            "--run-in-foreground",
+            action="store_true",
+            default=False,
+            help=(
+                "Run the start command in the foreground "
+                "(default: run in the background)"
+            ),
+        )
+        subparser.add_argument(
+            "--server-binary",
+            type=str,
+            default="oxigraph",
+            help=(
+                "The binary for starting the server (default: oxigraph) "
+                "(this requires that you have oxigraph-cli installed "
+                "on your machine)"
+            ),
+        )
+
+    @staticmethod
+    def wrap_cmd_in_container(args, cmd: str) -> str:
+        run_subcommand = "run --restart=unless-stopped"
+        if not args.run_in_foreground:
+            run_subcommand += " -d"
+        return Containerize().containerize_command(
+            cmd=cmd,
+            container_system=args.system,
+            run_subcommand=run_subcommand,
+            image_name=args.image,
+            container_name=args.server_container,
+            volumes=[("$(pwd)", "/index")],
+            ports=[(args.port, args.port)],
+            working_directory="/index",
+            use_bash=False,
+        )
+
+    def execute(self, args) -> bool:
+        bind = (
+            f"{args.host_name}:{args.port}"
+            if args.system == "native"
+            else f"0.0.0.0:{args.port}"
+        )
+        start_cmd = f"serve-read-only --location . --bind={bind}"
+
+        if args.system == "native":
+            start_cmd = f"{args.server_binary} {start_cmd}"
+            if not args.run_in_foreground:
+                start_cmd = (
+                    f"nohup {start_cmd} > {args.name}.server-log.txt 2>&1 &"
+                )
+        else:
+            start_cmd = self.wrap_cmd_in_container(args, start_cmd)
+
+        # Show the command line.
+        self.show(start_cmd, only_show=args.show)
+        if args.show:
+            return True
+
+        endpoint_url = f"http://{args.host_name}:{args.port}/query"
+
+        # When running natively, check if the binary exists and works.
+        if args.system == "native":
+            if not binary_exists(args.server_binary, "server-binary"):
+                return False
+        else:
+            if Containerize().is_running(args.system, args.server_container):
+                log.error(
+                    f"Server container {args.server_container} already exists!\n"
+                )
+                log.info(
+                    f"To kill the existing server, use `{self.script_name} stop`"
+                )
+                return False
+
+        # Check if index files (*.sst) present in cwd
+        if len([p.name for p in Path.cwd().glob("*.sst")]) == 0:
+            log.error(f"No Oxigraph index files for {args.name} found!\n")
+            log.info(
+                f"Did you call `{self.script_name} index`? If you did, check "
+                "if .sst index files are present in current working directory."
+            )
+            return False
+
+        if is_server_alive(url=endpoint_url):
+            log.error(f"Oxigraph server already running on {endpoint_url}\n")
+            log.info(
+                f"To kill the existing server, use `{self.script_name} stop`"
+            )
+            return False
+
+        try:
+            process = run_command(
+                start_cmd,
+                use_popen=args.run_in_foreground,
+            )
+        except Exception as e:
+            log.error(f"Starting the Oxigraph server failed ({e})")
+            return False
+
+        # Tail the server log until the server is ready (note that the `exec`
+        # is important to make sure that the tail process is killed and not
+        # just the bash process).
+        if args.run_in_foreground:
+            log.info(
+                "Follow the server logs as long as the server is"
+                " running (Ctrl-C stops the server)"
+            )
+        else:
+            log.info(
+                "Follow the server logs until the server is ready"
+                " (Ctrl-C stops following the log, but NOT the server)"
+            )
+        log.info("")
+        if args.system == "native":
+            log_cmd = f"exec tail -f {args.name}.server-log.txt"
+        else:
+            time.sleep(2)
+            log_cmd = f"exec {args.system} logs -f {args.server_container}"
+        log_proc = subprocess.Popen(log_cmd, shell=True)
+        while not is_server_alive(endpoint_url):
+            time.sleep(1)
+
+        log.info(
+            f"Oxigraph server webapp for {args.name} will be available at "
+            f"http://{args.host_name}:{args.port} and the sparql endpoint for "
+            f"queries is {endpoint_url} when the server is ready"
+        )
+
+        # Kill the log process
+        if not args.run_in_foreground:
+            log_proc.terminate()
+
+        # With `--run-in-foreground`, wait until the server is stopped.
+        if args.run_in_foreground:
+            try:
+                process.wait()
+            except KeyboardInterrupt:
+                process.terminate()
+            log_proc.terminate()
+
+        return True
diff --git a/src/qoxigraph/commands/status.py b/src/qoxigraph/commands/status.py
new file mode 100644
index 00000000..eb2de86c
--- /dev/null
+++ b/src/qoxigraph/commands/status.py
@@ -0,0 +1,19 @@
+from __future__ import annotations
+
+from qlever.commands.status import StatusCommand as QleverStatusCommand
+
+
+class StatusCommand(QleverStatusCommand):
+    DEFAULT_REGEX = "oxigraph\\s+serve-read-only"
+
+    def description(self) -> str:
+        return "Show Oxigraph processes running on this machine"
+
+    def additional_arguments(self, subparser) -> None:
+        subparser.add_argument(
+            "--cmdline-regex",
+            default=self.DEFAULT_REGEX,
+            help=(
+                "Show only processes where the command line matches this regex"
+            ),
+        )
diff --git a/src/qoxigraph/commands/stop.py b/src/qoxigraph/commands/stop.py
new file mode 100644
index 00000000..ed9c1036
--- /dev/null
+++ b/src/qoxigraph/commands/stop.py
@@ -0,0 +1,71 @@
+from __future__ import annotations
+
+from qlever.command import QleverCommand
+from qlever.commands import stop as qlever_stop
+from qlever.log import log
+from qoxigraph.commands.status import StatusCommand
+
+
+class StopCommand(QleverCommand):
+    # Override this with StatusCommand from child class for execute
+    # method to work as intended
+    STATUS_COMMAND = StatusCommand()
+    DEFAULT_REGEX = "oxigraph\\s+serve-read-only.*:%%PORT%%"
+
+    def __init__(self):
+        pass
+
+    def description(self) -> str:
+        return "Stop Oxigraph server for a given dataset or port"
+
+    def should_have_qleverfile(self) -> bool:
+        return True
+
+    def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
+        return {
+            "data": ["name"],
+            "server": ["port"],
+            "runtime": ["system", "server_container"],
+        }
+
+    def additional_arguments(self, subparser) -> None:
+        subparser.add_argument(
+            "--cmdline-regex",
+            default=self.DEFAULT_REGEX,
+            help="Show only processes where the command "
+            "line matches this regex",
+        )
+
+    def execute(self, args) -> bool:
+        cmdline_regex = args.cmdline_regex.replace("%%PORT%%", str(args.port))
+        description = (
+            f'Checking for processes matching "{cmdline_regex}"'
+            if args.system == "native"
+            else f"Checking for container with name {args.server_container}"
+        )
+
+        self.show(description, only_show=args.show)
+        if args.show:
+            return True
+
+        if args.system == "native":
+            stop_process_results = (
+                qlever_stop.StopCommand().stop_process_with_regex(
+                    cmdline_regex
+                )
+            )
+            if stop_process_results is None:
+                return False
+            if len(stop_process_results) > 0:
+                return all(stop_process_results)
+
+            # If no matching process found, show a message and the output of the
+            # status command.
+            log.error("No matching process found")
+            args.cmdline_regex = self.STATUS_COMMAND.DEFAULT_REGEX
+            log.info("")
+            StatusCommand().execute(args)
+            return True
+
+        # First check if container is running and if yes, stop and remove it
+        return qlever_stop.stop_container(args.server_container)

From 956a59df7e60f71f57315173c36ca81acec13ba8 Mon Sep 17 00:00:00 2001
From: tanmay-9 <tanmaygarg9879@gmail.com>
Date: Mon, 19 May 2025 23:51:47 +0200
Subject: [PATCH 2/7] Fix host_name bug in example_querie and add util
 stop_with_regex command to stop

---
 src/qoxigraph/commands/example_queries.py | 2 +-
 src/qoxigraph/commands/stop.py            | 7 ++-----
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/src/qoxigraph/commands/example_queries.py b/src/qoxigraph/commands/example_queries.py
index d62982a8..4ef76c24 100644
--- a/src/qoxigraph/commands/example_queries.py
+++ b/src/qoxigraph/commands/example_queries.py
@@ -8,5 +8,5 @@
 class ExampleQueriesCommand(QleverExampleQueriesCommand):
     def execute(self, args) -> bool:
         if not args.sparql_endpoint:
-            args.sparql_endpoint = f"localhost:{args.port}/query"
+            args.sparql_endpoint = f"{args.host_name}:{args.port}/query"
         return super().execute(args)
diff --git a/src/qoxigraph/commands/stop.py b/src/qoxigraph/commands/stop.py
index ed9c1036..dedd1ff2 100644
--- a/src/qoxigraph/commands/stop.py
+++ b/src/qoxigraph/commands/stop.py
@@ -3,6 +3,7 @@
 from qlever.command import QleverCommand
 from qlever.commands import stop as qlever_stop
 from qlever.log import log
+from qlever.util import stop_process_with_regex
 from qoxigraph.commands.status import StatusCommand
 
 
@@ -49,11 +50,7 @@ def execute(self, args) -> bool:
             return True
 
         if args.system == "native":
-            stop_process_results = (
-                qlever_stop.StopCommand().stop_process_with_regex(
-                    cmdline_regex
-                )
-            )
+            stop_process_results = stop_process_with_regex(cmdline_regex)
             if stop_process_results is None:
                 return False
             if len(stop_process_results) > 0:

From b1310ad42b07ab8ba9df177da06c4c2fb1e585c6 Mon Sep 17 00:00:00 2001
From: tanmay-9 <tanmaygarg9879@gmail.com>
Date: Fri, 27 Mar 2026 15:59:39 +0100
Subject: [PATCH 3/7] Update branch code and have only qoxigraph latest code

---
 README.md                                     |   80 +-
 pyproject.toml                                |   14 +-
 src/qlever/Qleverfiles/Qleverfile.dblp        |    5 +-
 src/qlever/Qleverfiles/Qleverfile.default     |    2 +-
 src/qlever/Qleverfiles/Qleverfile.fbeasy      |    6 +-
 src/qlever/Qleverfiles/Qleverfile.freebase    |    2 +-
 src/qlever/Qleverfiles/Qleverfile.imdb        |    7 +-
 src/qlever/Qleverfiles/Qleverfile.ohm-planet  |    5 +-
 src/qlever/Qleverfiles/Qleverfile.osm-country |   49 +-
 src/qlever/Qleverfiles/Qleverfile.osm-planet  |   15 +-
 .../Qleverfile.osm-planet-from-pbf            |   42 +
 src/qlever/Qleverfiles/Qleverfile.pubchem     |    4 +-
 src/qlever/Qleverfiles/Qleverfile.uniprot     |    7 +-
 src/qlever/Qleverfiles/Qleverfile.wikidata    |    2 +-
 .../Qleverfiles/Qleverfile.wikidata-munged    |   47 +
 .../Qleverfiles/Qleverfile.wikipathways       |    2 +-
 src/qlever/command.py                         |    2 +-
 src/qlever/commands/add_text_index.py         |   17 +-
 src/qlever/commands/benchmark_queries.py      | 1241 ++++++++++++++++
 src/qlever/commands/cache_stats.py            |   86 +-
 src/qlever/commands/clear_cache.py            |   32 +-
 src/qlever/commands/extract_queries.py        |   19 +-
 src/qlever/commands/get_data.py               |    2 +-
 src/qlever/commands/index.py                  |   37 +-
 src/qlever/commands/index_stats.py            |  501 ++++---
 src/qlever/commands/log.py                    |    2 +-
 src/qlever/commands/materialized_view.py      |  110 ++
 src/qlever/commands/query.py                  |    5 +-
 src/qlever/commands/rebuild_index.py          |  337 +++++
 src/qlever/commands/reset_updates.py          |   59 +
 src/qlever/commands/settings.py               |  113 +-
 src/qlever/commands/setup_config.py           |    2 +-
 src/qlever/commands/start.py                  |   46 +-
 src/qlever/commands/status.py                 |    4 +-
 src/qlever/commands/stop.py                   |    6 +-
 src/qlever/commands/system_info.py            |    2 +-
 src/qlever/commands/ui.py                     |   20 +-
 src/qlever/commands/update.py                 |   90 ++
 src/qlever/commands/update_wikidata.py        | 1306 +++++++++++++++++
 src/qlever/commands/warmup.py                 |    2 +-
 src/qlever/config.py                          |   10 +
 src/qlever/containerize.py                    |   14 +-
 src/qlever/qlever_main.py                     |    3 +-
 src/qlever/qleverfile.py                      |  103 +-
 src/qlever/util.py                            |   66 +-
 src/qoxigraph/commands/benchmark_queries.py   |   17 +
 src/qoxigraph/commands/index.py               |  168 ++-
 src/qoxigraph/commands/index_stats.py         |   72 +
 src/qoxigraph/commands/log.py                 |   18 +-
 src/qoxigraph/commands/query.py               |   12 +-
 src/qoxigraph/commands/setup_config.py        |  120 +-
 src/qoxigraph/commands/start.py               |  183 ++-
 src/qoxigraph/commands/status.py              |    4 +-
 src/qoxigraph/commands/stop.py                |   31 +-
 src/qoxigraph/qleverfile.py                   |   77 +
 .../test_benchmark_queries_methods.py         |  454 ++++++
 .../commands/test_cache_stats_execute.py      |   34 +-
 .../test_cache_stats_other_methods.py         |    4 +-
 test/qlever/commands/test_index_execute.py    |   29 +-
 .../commands/test_index_other_methods.py      |    4 +
 .../commands/test_index_stats_methods.py      |  266 ++++
 test/qlever/commands/test_start_execute.py    |   20 +-
 .../commands/test_start_other_methods.py      |    4 +-
 test/qlever/commands/test_status_execute.py   |    2 +-
 .../commands/test_status_other_methods.py     |    2 +-
 test/qlever/commands/test_stop_execute.py     |   14 +-
 .../commands/test_stop_other_methods.py       |    2 +-
 test/qlever/conftest.py                       |   16 +
 68 files changed, 5314 insertions(+), 765 deletions(-)
 create mode 100644 src/qlever/Qleverfiles/Qleverfile.osm-planet-from-pbf
 create mode 100644 src/qlever/Qleverfiles/Qleverfile.wikidata-munged
 create mode 100644 src/qlever/commands/benchmark_queries.py
 create mode 100644 src/qlever/commands/materialized_view.py
 create mode 100644 src/qlever/commands/rebuild_index.py
 create mode 100644 src/qlever/commands/reset_updates.py
 create mode 100644 src/qlever/commands/update.py
 create mode 100644 src/qlever/commands/update_wikidata.py
 create mode 100644 src/qoxigraph/commands/benchmark_queries.py
 create mode 100644 src/qoxigraph/commands/index_stats.py
 create mode 100644 src/qoxigraph/qleverfile.py
 create mode 100644 test/qlever/commands/test_benchmark_queries_methods.py
 create mode 100644 test/qlever/commands/test_index_stats_methods.py
 create mode 100644 test/qlever/conftest.py

diff --git a/README.md b/README.md
index bb0765b5..0ef5bac9 100644
--- a/README.md
+++ b/README.md
@@ -1,72 +1,34 @@
 # QLever
 
-QLever is a very fast SPARQL engine, much faster than most existing engines. It
-can handle graphs with more than hundred billion triples on a single machine
-with moderate resources. See https://qlever.cs.uni-freiburg.de for more
-information and many public SPARQL endpoints that use QLever
+This repository provides a self-documenting and easy-to-use command-line tool
+for QLever (pronounced "Clever"), a graph database implementing the
+[RDF](https://www.w3.org/TR/rdf11-concepts/) and
+[SPARQL](https://www.w3.org/TR/sparql11-overview/) standards. 
+For a detailed description of what QLever is and what it can do, see 
+[here](https://github.com/ad-freiburg/qlever).
 
-This project provides a Python script that can control everything that QLever
-does, in particular, creating SPARQL endpoints for arbitrary RDF datasets. It
-is supposed to be very easy to use and self-explanatory as you use it. In
-particular, the tool provides context-sensitive autocompletion of all its
-commands and options. If you use a container system (like Docker or Podman),
-you don't even have to download any QLever code, but the script will download
-the required image for you.
+# Documentation
 
-NOTE: There has been a major update on 24.03.2024, which changed some of the
-Qleverfile variables and command-line options (all for the better, of course).
-If you encounter any problems, please contact us by opening an issue on
-https://github.com/ad-freiburg/qlever-control/issues.
+View the latest documentation at <https://docs.qlever.dev/quickstart>.
 
 # Installation
 
-Simply do `pip install qlever` and make sure that the directory where pip
-installs the package is in your `PATH`. Typically, `pip` will warn you when
-that is not the case and tell you what to do.
+There are native packages available for
+- [Debian and Ubuntu](https://docs.qlever.dev/quickstart/#debian-and-ubuntu)
+- [macOS](https://docs.qlever.dev/quickstart/#macos-apple-silicon)
 
-# Usage
-
-Create an empty directory, with a name corresponding to the dataset you want to
-work with. For the following example, take `olympics`. Go to that directory
-and do the following. After the first call, `qlever` will tell you how to
-activate autocompletion for all its commands and options (it's very easy, but
-`pip` cannot do that automatically).
-
-```
-qlever setup-config olympics   # Get Qleverfile (config file) for this dataset
-qlever get-data                # Download the dataset
-qlever index                   # Build index data structures for this dataset
-qlever start                   # Start a QLever server using that index
-qlever example-queries         # Launch some example queries
-qlever ui                      # Launch the QLever UI
-```
-
-This will create a SPARQL endpoint for the [120 Years of
-Olympics](https://github.com/wallscope/olympics-rdf) dataset. It is a great
-dataset for getting started because it is small, but not trivial (around 2
-million triples), and the downloading and indexing should only take a few
-seconds.
-
-Each command will also show you the command line it uses. That way you can
-learn, on the side, how QLever works internally. If you just want to know the
-command line for a particular command, without executing it, you can append
-`--show` like this:
-
-```
-qlever index --show
-```
-
-There are many more commands and options, see `qlever --help` for general help,
-`qlever <command> --help` for help on a specific command, or just the
-autocompletion.
+On other platforms simply install the `qlever` command-line
+[python package using `pipx`/`uv`](https://docs.qlever.dev/quickstart/#others).
+Note: QLever will be executed in a container which will come with a performance penalty.
 
 # Use with your own dataset
 
-To use QLever with your own dataset, you should also write a `Qleverfile`, like
-in the example above. The easiest way to write a `Qleverfile` is to get one of
-the existing ones (using `qlever setup-config ...` as explained above) and then
-change it according to your needs (the variable names should be self-explanatory).
-Pick one for a dataset that is similar to yours and when in doubt, pick `olympics`.
+To use QLever with your own dataset, you need a `Qleverfile`, like in the
+example above. The easiest way to write a `Qleverfile` is to get one of the
+existing ones (using `qlever setup-config ...`) and then
+change it according to your needs. Pick one for a dataset that is similar to
+yours and when in doubt, pick `olympics`. A
+[reference of all options](https://docs.qlever.dev/qleverfile/) is available.
 
 # For developers
 
@@ -84,7 +46,7 @@ pip install -e .
 Then you can use `qlever` just as if you had installed it via `pip install
 qlever`. Note that you don't have to rerun `pip install -e .` when you modify
 any of the `*.py` files and not even when you add new commands in
-`src/qlever/commands`. The exceutable created by `pip` simply links and refers
+`src/qlever/commands`. The executable created by `pip` simply links and refers
 to the files in your working copy.
 
 If you have bug fixes or new useful features or commands, please open a pull
diff --git a/pyproject.toml b/pyproject.toml
index 12c82f47..531f98aa 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,12 +5,13 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "qlever"
 description = "Command-line tool for using the QLever graph database"
-version = "0.5.23"
+version = "0.5.45"
 authors = [
     { name = "Hannah Bast", email = "bast@cs.uni-freiburg.de" }
 ]
 readme = "README.md"
-license = { text = "Apache-2.0" }
+license = "Apache-2.0"
+license-files = ["LICENSE"]
 requires-python = ">=3.8"
 
 keywords = ["Graph database", "Triplestore", "Knowledge graphs", "SPARQL", "RDF"]
@@ -20,18 +21,19 @@ classifiers = [
  "Topic :: Database :: Front-Ends"
 ]
 
-dependencies = [ "psutil", "termcolor", "argcomplete", "pyyaml" ]
+dependencies = [ "psutil", "termcolor", "argcomplete", "pyyaml", "rdflib", "requests-sse", "tqdm" ]
 
 [project.urls]
-Github = "https://github.com/ad-freiburg/qlever"
+homepage = "https://github.com/ad-freiburg/qlever"
+documentation = "https://docs.qlever.dev"
+repository = "https://github.com/ad-freiburg/qlever.git"
+bugtracker = "https://github.com/ad-freiburg/qlever/issues"
 
 [project.scripts]
 "qlever" = "qlever.qlever_main:main"
 "qoxigraph" = "qlever.qlever_main:main"
-"qlever-old" = "qlever.qlever_old:main"
 
 [tool.setuptools]
-license-files = ["LICENSE"]
 package-data = { "qlever" = ["Qleverfiles/*"] }
 
 [tool.pytest.ini_options]
diff --git a/src/qlever/Qleverfiles/Qleverfile.dblp b/src/qlever/Qleverfiles/Qleverfile.dblp
index 639fe31d..e82b3acb 100644
--- a/src/qlever/Qleverfiles/Qleverfile.dblp
+++ b/src/qlever/Qleverfiles/Qleverfile.dblp
@@ -12,13 +12,14 @@ DATA_TARFILE = dblp_KG_with_associated_data.tar
 GET_DATA_URL = https://sparql.dblp.org/download/${DATA_TARFILE}
 GET_DATA_CMD = (curl -LROC - ${GET_DATA_URL} && tar -xf ${DATA_TARFILE}) 2>&1 | tee ${NAME}.download-log.txt && rm -f ${DATA_TARFILE}
 VERSION      = $$(date -r dblp.ttl.gz +"%d.%m.%Y %H:%M" || echo "NO_DATE")
-DESCRIPTION  = DBLP computer science bibliography + citations from OpenCitations, data from ${GET_DATA_URL} (version ${VERSION})
+DESCRIPTION  = DBLP computer science bibliography + citations from OpenCitations, data from ${GET_DATA_URL}, version ${VERSION}
 FORMAT       = ttl
 
 [index]
 INPUT_FILES      = *.gz
 MULTI_INPUT_JSON = { "cmd": "zcat {}", "for-each": "*.gz" }
-SETTINGS_JSON    = { "ascii-prefixes-only": false, "num-triples-per-batch": 5000000, "prefixes-external": [""] }
+SETTINGS_JSON    = { "num-triples-per-batch": 5000000 }
+STXXL_MEMORY     = 5G
 
 [server]
 PORT               = 7015
diff --git a/src/qlever/Qleverfiles/Qleverfile.default b/src/qlever/Qleverfiles/Qleverfile.default
index eb452b2e..dc4075f3 100644
--- a/src/qlever/Qleverfiles/Qleverfile.default
+++ b/src/qlever/Qleverfiles/Qleverfile.default
@@ -36,7 +36,7 @@ ACCESS_TOKEN =
 
 # Use SYSTEM = docker to run QLever inside a docker container; the Docker image
 # will be downloaded automatically. Use SYSTEM = native to use self-compiled
-# binaries `IndexBuilderMain` and `ServerMain` (which should be in you PATH).
+# binaries `qlever-index` and `qlever-server` (which should be in you PATH).
 [runtime]
 SYSTEM = docker
 IMAGE  = docker.io/adfreiburg/qlever:latest
diff --git a/src/qlever/Qleverfiles/Qleverfile.fbeasy b/src/qlever/Qleverfiles/Qleverfile.fbeasy
index 36320816..26ac0b3a 100644
--- a/src/qlever/Qleverfiles/Qleverfile.fbeasy
+++ b/src/qlever/Qleverfiles/Qleverfile.fbeasy
@@ -8,13 +8,13 @@
 NAME              = fbeasy
 DATA_URL          = https://freebase-easy.cs.uni-freiburg.de
 GET_DATA_CMD      = wget -nc ${DATA_URL}/dump/fbeasy.nt
-DESCRIPTION       = RDF data from ${DATA_URL}, latest version from 18.07.2019
-TEXT_DESCRIPTION  = Sentences from Wikipedia that mention at least one Freebase entity
+DESCRIPTION       = Freebase Easy, RDF NT from ${DATA_URL}, latest version from 18.07.2019
+TEXT_DESCRIPTION  = Sentences from the English Wikipedia that mention at least one Freebase entity
 
 [index]
 INPUT_FILES     = fbeasy.nt
 CAT_INPUT_FILES = cat ${INPUT_FILES}
-SETTINGS_JSON   = { "ascii-prefixes-only": true, "num-triples-per-batch": 10000000 }
+SETTINGS_JSON   = { "num-triples-per-batch": 10000000 }
 
 [server]
 PORT               = 7003
diff --git a/src/qlever/Qleverfiles/Qleverfile.freebase b/src/qlever/Qleverfiles/Qleverfile.freebase
index 381ee716..4f19075b 100644
--- a/src/qlever/Qleverfiles/Qleverfile.freebase
+++ b/src/qlever/Qleverfiles/Qleverfile.freebase
@@ -8,7 +8,7 @@
 NAME         = freebase
 DATA_URL     = http://commondatastorage.googleapis.com/freebase-public/rdf/freebase-rdf-latest.gz
 GET_DATA_CMD = wget -nc ${DATA_URL}
-DESCRIPTION  = RDF data from ${DATA_URL}, latest (and final) version from 09.08.2015
+DESCRIPTION  = Freebase, RDF NT from ${DATA_URL}, latest (and final) version from 09.08.2015
 
 [index]
 INPUT_FILES     = freebase-rdf-latest.gz
diff --git a/src/qlever/Qleverfiles/Qleverfile.imdb b/src/qlever/Qleverfiles/Qleverfile.imdb
index f4965cbc..dd5e7b79 100644
--- a/src/qlever/Qleverfiles/Qleverfile.imdb
+++ b/src/qlever/Qleverfiles/Qleverfile.imdb
@@ -13,13 +13,14 @@ GET_PREFIXES     = echo "@prefix imdb: <https://www.imdb.com/> ."
 GET_IMDB_BASICS  = FILE=title.basics.tsv.gz; curl -sLO -C - ${IMDB_DATA_URL}/$${FILE}; zcat $${FILE} | sed 1d | awk -F'\t' '{ gsub("\\\\", "\\\\", $$3); gsub("\"", "\\\"", $$3); printf "imdb:%s imdb:id \"%s\" ; imdb:type \"%s\" ; imdb:title \"%s\" .\n", $$1, $$1, $$2, $$3 }'; rm -f $${FILE}
 GET_IMDB_RATINGS = FILE=title.ratings.tsv.gz; curl -sLO -C - ${IMDB_DATA_URL}/$${FILE}; zcat $${FILE} | sed 1d | awk -F'\t' '{ printf "imdb:%s imdb:averageRating %s ; imdb:numVotes %s .\n", $$1, $$2, $$3 }'; rm -f $${FILE}
 GET_DATA_CMD     = (${GET_PREFIXES}; ${GET_IMDB_BASICS}; ${GET_IMDB_RATINGS}) > ${NAME}.ttl
-DESCRIPTION      = RDF data derived from ${IMDB_DATA_URL}
-TEXT_DESCRIPTION = All literals, search with FILTER CONTAINS(?var, "...")
+VERSION          = $$(date -r imdb.ttl +"%d.%m.%Y %H:%M" || echo "NO_DATE")
+DESCRIPTION      = IMDb, CSV from ${IMDB_DATA_URL}, converted to RDF TTL using awk, version ${VERSION}
+TEXT_DESCRIPTION = All literals, search with [ ql:contains-word "..."; ql:contains-entity ?literal ]
 
 [index]
 INPUT_FILES     = ${data:NAME}.ttl
 CAT_INPUT_FILES = cat ${INPUT_FILES}
-SETTINGS_JSON   = { "ascii-prefixes-only": false, "num-triples-per-batch": 1000000 }
+SETTINGS_JSON   = { "num-triples-per-batch": 1000000 }
 TEXT_INDEX      = from_literals
 
 [server]
diff --git a/src/qlever/Qleverfiles/Qleverfile.ohm-planet b/src/qlever/Qleverfiles/Qleverfile.ohm-planet
index f20d3b94..5585f48b 100644
--- a/src/qlever/Qleverfiles/Qleverfile.ohm-planet
+++ b/src/qlever/Qleverfiles/Qleverfile.ohm-planet
@@ -11,10 +11,10 @@ NAME           = ohm-planet
 GET_DATA_URL   = https://planet.openhistoricalmap.org/planet
 CHECK_BINARIES = osm2rdf -h > /dev/null || (echo "osm2rdf not found, make sure that it's installed and in your PATH" && exit 1)
 GET_DATA_CMD_1 = unbuffer wget -O ${NAME}.pbf $$(curl -s ${GET_DATA_URL}/state.txt) 2>&1 | tee ${NAME}.download-log.txt
-GET_DATA_CMD_2 = osm2rdf ${NAME}.pbf -o ${NAME}.ttl --source-dataset OHM --output-compression gz --store-locations=disk-dense --cache . --num-threads 12 --add-way-node-order --no-untagged-nodes-geometric-relations 2>&1 | tee ${NAME}.osm2rdf-log.txt
+GET_DATA_CMD_2 = osm2rdf ${NAME}.pbf -o ${NAME}.ttl --source-dataset OHM --output-compression gz --store-locations=disk-dense --cache . --num-threads 12 --iri-prefix-for-untagged-nodes http://www.openhistoricalmap.org/node/ 2>&1 | tee ${NAME}.osm2rdf-log.txt
 GET_DATA_CMD   = ${CHECK_BINARIES} && ${GET_DATA_CMD_1} && echo && ${GET_DATA_CMD_2}
 VERSION        = $$(date -r ${NAME}.pbf +%d.%m.%Y || echo "NO_DATE")
-DESCRIPTION    = OHM Planet, data from ${GET_DATA_URL} version ${VERSION} (with GeoSPARQL predicates ogc:sfContains and ogc:sfIntersects)
+DESCRIPTION    = OHM from ${GET_DATA_URL} (with GeoSPARQL predicates ogc:sfContains and ogc:sfIntersects), version ${VERSION} 
 
 [index]
 INPUT_FILES        = ${data:NAME}.ttl.gz
@@ -22,6 +22,7 @@ MULTI_INPUT_JSON   = { "cmd": "zcat ${INPUT_FILES}", "parallel": "true" }
 STXXL_MEMORY       = 5G
 PARSER_BUFFER_SIZE = 50M
 SETTINGS_JSON      = { "num-triples-per-batch": 5000000 }
+ENCODE_AS_ID       = https://www.openhistoricalmap.org/node/ http://www.openhistoricalmap.org/node/ https://www.openhistoricalmap.org/way/ https://www.openhistoricalmap.org/relation/ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#ohmnode_tagged_ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#ohmnode_untagged_ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#ohmway_ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#ohmrel_ https://www.openstreetmap.org/changeset/
 
 [server]
 PORT                        = 7037
diff --git a/src/qlever/Qleverfiles/Qleverfile.osm-country b/src/qlever/Qleverfiles/Qleverfile.osm-country
index 1b2c334f..45e7c86d 100644
--- a/src/qlever/Qleverfiles/Qleverfile.osm-country
+++ b/src/qlever/Qleverfiles/Qleverfile.osm-country
@@ -1,48 +1,39 @@
-# Qleverfile for OSM of some country, use with https://github.com/ad-freiburg/qlever-control
+# Qleverfile for OSM of some country, use with `qlever` CLI
 #
-# qlever get-data  # downloads .pbf file from Geofabrik und builds .ttl.bz2 using osm2rdf
-# qlever index     # for example Germany takes ~30 minutes and ~10 GB RAM (on an AMD Ryzen 9 5900X)
-# qlever start     # starts the server
-#
-# Make sure that osm2rdf is in your path. Set CONTINENT and COUNTRY such that
-# the link under GET_DATA_CMD exists (the names are usually the canonical
-# names). The time for osm2rdf is around the same as that for "qlever index".
+# Make sure that `osm2rdf` is in your path. Set CONTINENT and COUNTRY
+# such that the link under GET_DATA_CMD exists (the names are usually
+# the canonical names).
 
-# Dataset settings
 [data]
-CONTINENT         = europe
-COUNTRY           = switzerland
-NAME              = osm-${COUNTRY}
-PBF               = ${NAME}.pbf
-WITH_TEXT         = false
-VERSION           = $$(ls -l --time-style=+%d.%m.%Y ${PBF} 2> /dev/null | cut -d' ' -f6)
-GET_DATA_CMD      = wget -nc -O ${PBF} https://download.geofabrik.de/${CONTINENT}/${COUNTRY}-latest.osm.pbf; rm -f ${NAME}.*.bz2; ( time osm2rdf ${PBF} -o ${NAME}.ttl --cache . ) 2>&1 | tee ${NAME}.osm2rdf-log.txt; rm -f spatial-*
-DESCRIPTION       = OSM ${COUNTRY}, dump from ${VERSION} with ogc:sfContains
+CONTINENT    = europe
+COUNTRY      = switzerland
+NAME         = osm-${COUNTRY}
+GET_DATA_URL = https://download.geofabrik.de/${CONTINENT}/${COUNTRY}-latest.osm.pbf
+GET_DATA_CMD = wget -nc -O ${NAME}.pbf ${GET_DATA_URL}; (time osm2rdf ${NAME}.pbf -o ${NAME}.ttl --output-compression gz --cache . --iri-prefix-for-untagged-nodes http://www.openstreetmap.org/node/) 2>&1 | tee ${NAME}.osm2rdf-log.txt
+VERSION      = $$(ls -l --time-style=+%d.%m.%Y ${NAME}.pbf 2> /dev/null | cut -d' ' -f6)
+DESCRIPTION  = OSM ${COUNTRY}, PBF from ${GET_DATA_URL}, converted to RDF with osm2rdf, version ${VERSION}
 
-# Indexer settings
 [index]
-INPUT_FILES       = ${data:NAME}.ttl.bz2
-CAT_INPUT_FILES   = bzcat ${data:NAME}.ttl.bz2
-STXXL_MEMORY      = 10G
-SETTINGS_JSON     = { "prefixes-external": [ "\"LINESTRING(", "\"MULTIPOLYGON(", "\"POLYGON(" ], "ascii-prefixes-only": false, "num-triples-per-batch": 1000000 }
+INPUT_FILES      = ${data:NAME}.ttl.gz
+CAT_INPUT_FILES  = zcat ${data:NAME}.ttl.gz
+PARALLEL_PARSING = true
+VOCABULARY_TYPE  = on-disk-compressed-geo-split
+STXXL_MEMORY     = 10G
+SETTINGS_JSON    = { "num-triples-per-batch": 10000000 }
+ENCODE_AS_ID     = https://www.openstreetmap.org/node/ http://www.openstreetmap.org/node/ https://www.openstreetmap.org/way/ https://www.openstreetmap.org/relation/ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#osmnode_tagged_ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#osmnode_untagged_ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#osmway_ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#osmrel_ https://www.openstreetmap.org/changeset/
 
-# Server settings
 [server]
-HOSTNAME                    = localhost
 PORT                        = 7025
-ACCESS_TOKEN                = ${data:NAME}_%RANDOM%
+ACCESS_TOKEN                = ${data:NAME}
 MEMORY_FOR_QUERIES          = 20G
 CACHE_MAX_SIZE              = 10G
 CACHE_MAX_SIZE_SINGLE_ENTRY = 5G
-CACHE_MAX_NUM_ENTRIES       = 100
 TIMEOUT                     = 100s
 
-# Runtime to use
 [runtime]
 SYSTEM = docker
 IMAGE = docker.io/adfreiburg/qlever:latest
 
-# Qlever UI
 [ui]
 UI_PORT   = 7000
-UI_CONFIG = osm
+UI_CONFIG = osm-planet
diff --git a/src/qlever/Qleverfiles/Qleverfile.osm-planet b/src/qlever/Qleverfiles/Qleverfile.osm-planet
index 8ccfb8c5..051d39c1 100644
--- a/src/qlever/Qleverfiles/Qleverfile.osm-planet
+++ b/src/qlever/Qleverfiles/Qleverfile.osm-planet
@@ -8,19 +8,20 @@
 
 [data]
 NAME         = osm-planet
-DATA_URL     = https://osm2rdf.cs.uni-freiburg.de/ttl/planet.osm.ttl.bz2
-GET_DATA_CMD = unbuffer wget -O ${NAME}.ttl.bz2 ${DATA_URL} | tee ${NAME}.download-log.txt
+GET_DATA_URL = https://osm2rdf.cs.uni-freiburg.de/ttl/planet.osm.ttl.bz2
+GET_DATA_CMD = unbuffer wget -O ${NAME}.ttl.bz2 ${GET_DATA_URL} | tee ${NAME}.download-log.txt
 VERSION      = $$(date -r ${NAME}.ttl.bz2 +"%d.%m.%Y" || echo "NO_DATE")
-DESCRIPTION  = OSM Planet, data from ${DATA_URL} version ${VERSION} (complete OSM data, with GeoSPARQL predicates ogc:sfContains and ogc:sfIntersects)
+DESCRIPTION  = OpenStreetMap, RDF TTL from ${GET_DATA_URL} including DE-9IM triples, version ${VERSION}
 
 [index]
 INPUT_FILES        = ${data:NAME}.ttl.bz2
-CAT_INPUT_FILES    = lbzcat -n 2 ${INPUT_FILES}
-PARALLEL_PARSING   = true
+MULTI_INPUT_JSON   = { "cmd": "lbzcat -n 2 ${INPUT_FILES}", "parallel": "true" }
+VOCABULARY_TYPE    = on-disk-compressed-geo-split
 PARSER_BUFFER_SIZE = 100M
-STXXL_MEMORY       = 40G
+STXXL_MEMORY       = 60G
 SETTINGS_JSON      = { "num-triples-per-batch": 10000000 }
-ULIMIT             = 10000
+ULIMIT             = 50000
+ENCODE_AS_ID       = https://www.openstreetmap.org/node/ http://www.openstreetmap.org/node/ https://www.openstreetmap.org/way/ https://www.openstreetmap.org/relation/ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#osmnode_ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#osmway_ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#osmrel_ https://www.openstreetmap.org/changeset/
 
 [server]
 PORT                        = 7007
diff --git a/src/qlever/Qleverfiles/Qleverfile.osm-planet-from-pbf b/src/qlever/Qleverfiles/Qleverfile.osm-planet-from-pbf
new file mode 100644
index 00000000..f26daf8e
--- /dev/null
+++ b/src/qlever/Qleverfiles/Qleverfile.osm-planet-from-pbf
@@ -0,0 +1,42 @@
+# Qleverfile for OSM Planet, use with the QLever CLI (`pip install qlever`)
+#
+# qlever get-data  # download ~100 GB (pbf), convert with osm2rdf, ~200B triples
+# qlever index     # ~40 hours, ~60 GB RAM, ~2.5 TB index size on disk
+# qlever start     # a few seconds, adjust MEMORY_FOR_QUERIES as needed
+#
+# Measured on an AMD Ryzen 9 9950X with 128 GB RAM and 4 x 8 TB NVMe (02.10.2025)
+
+[data]
+NAME         = osm-planet
+PLANET_PBF   = planet-250929.osm.pbf
+GET_DATA_URL = https://planet.openstreetmap.org/pbf/${PLANET_PBF}
+GET_PBF_CMD  = unbuffer wget -O ${PLANET_PBF} ${GET_DATA_URL}
+OSM2RDF_CMD  = unbuffer osm2rdf ${PLANET_PBF} -o ${NAME}.ttl --num-threads 20 --output-compression gz --cache . --store-locations disk-dense --iri-prefix-for-untagged-nodes http://www.openstreetmap.org/node/ --split-tag-key-by-semicolon ref --split-tag-key-by-semicolon service
+GET_DATA_CMD = ${GET_PBF_CMD} && ${OSM2RDF_CMD} 2>&1 | tee ${NAME}.osm2rdf-log.txt
+VERSION      = $$(date -r ${PLANET_PBF} +%d.%m.%Y || echo "NO_DATE")
+DESCRIPTION  = OpenStreetMap, PBF from ${GET_DATA_URL}, converted to RDF TTL and enhanced by DE-9IM triples using osm2rdf
+
+[index]
+INPUT_FILES        = ${data:NAME}.ttl.gz
+MULTI_INPUT_JSON   = { "cmd": "zcat ${INPUT_FILES}", "parallel": "true" }
+VOCABULARY_TYPE    = on-disk-compressed-geo-split
+PARSER_BUFFER_SIZE = 100M
+STXXL_MEMORY       = 60G
+SETTINGS_JSON      = { "num-triples-per-batch": 10000000 }
+ULIMIT             = 50000
+ENCODE_AS_ID       = https://www.openstreetmap.org/node/ http://www.openstreetmap.org/node/ https://www.openstreetmap.org/way/ https://www.openstreetmap.org/relation/ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#osmnode_tagged_ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#osmnode_untagged_ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#osmway_ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#osmrel_ https://www.openstreetmap.org/changeset/
+
+[server]
+PORT                        = 7007
+ACCESS_TOKEN                = ${data:NAME}
+MEMORY_FOR_QUERIES          = 40G
+CACHE_MAX_SIZE              = 20G
+CACHE_MAX_SIZE_SINGLE_ENTRY = 10G
+TIMEOUT                     = 600s
+
+[runtime]
+SYSTEM = docker
+IMAGE  = docker.io/adfreiburg/qlever:latest
+
+[ui]
+UI_CONFIG = osm-planet
diff --git a/src/qlever/Qleverfiles/Qleverfile.pubchem b/src/qlever/Qleverfiles/Qleverfile.pubchem
index 25aef1ca..fa7894dd 100644
--- a/src/qlever/Qleverfiles/Qleverfile.pubchem
+++ b/src/qlever/Qleverfiles/Qleverfile.pubchem
@@ -24,11 +24,11 @@ ONTOLOGIES_DIR      = RDF.ontologies
 PUBCHEM_DIR         = RDF.pubchem
 ONTOLOGIES_CSV      = ontologies.csv
 CHECK_REQUIREMENTS  = for CMD in docker parallel; do $$CMD --version >/dev/null 2>&1 || (echo "Requires \"$$CMD\", please install it"; false); done
-GET_DATA_CMD_1      = mkdir -p ${ONTOLOGIES_DIR} && cd ${ONTOLOGIES_DIR} && cat ${ONTOLOGIES_CSV} | parallel --colsep "," 'FILE={2} && URL={3} && ERRFILE=$${FILE%.*}.jena-stderr; echo "Processing $$URL ($$FILE) ..." && curl -sLRo $$FILE $$URL && docker run --rm -v $$(pwd):/data stain/jena riot --output=NT /data/$$FILE 2> $$ERRFILE | gzip -c > $${FILE%.*}.nt.gz && rm -f $$FILE; if [ -s $$ERRFILE ]; then grep -q "ERROR *riot" $$ERRFILE && echo "riot ERRORs in $$FILE, check $$ERRFILE"; else rm $$ERRFILE; fi'
+GET_DATA_CMD_1      = mkdir -p ${ONTOLOGIES_DIR} && (cd ${ONTOLOGIES_DIR} && cat ${ONTOLOGIES_CSV} | parallel --colsep "," 'FILE={2} && URL={3} && ERRFILE=$${FILE%.*}.jena-stderr; echo "Processing $$URL ($$FILE) ..." && curl -sLRo $$FILE $$URL && docker run --rm -v $$(pwd):/data stain/jena riot --output=NT /data/$$FILE 2> $$ERRFILE | gzip -c > $${FILE%.*}.nt.gz && rm -f $$FILE; if [ -s $$ERRFILE ]; then grep -q "ERROR *riot" $$ERRFILE && echo "riot ERRORs in $$FILE, check $$ERRFILE"; else rm $$ERRFILE; fi')
 GET_DATA_CMD_2      = mkdir -p ${PUBCHEM_DIR} && wget -r -nv -nH --cut-dirs=2 --no-parent -P ${PUBCHEM_DIR} ${GET_DATA_URL}
 GET_DATA_CMD        = ${CHECK_REQUIREMENTS} && ${GET_DATA_CMD_1} 2>&1 | tee pubchem.get-data-log.txt; ${GET_DATA_CMD_2} 2>&1 | tee -a pubchem.get-data-log.txt
 VERSION             = $$(date -r void.ttl +%d.%m.%Y || echo "NO_DATE")
-DESCRIPTION         = PubChem RDF from ${GET_DATA_URL} (version ${VERSION}) + associated ontologies (bao, bfo, biopax-level3, chebi, cheminf, cito, dublin_core_terms, fabio, go, iao, ncit, obi, pr, ro, sio, skos, so, uo)
+DESCRIPTION         = PubChem, RDF TTL from ${GET_DATA_URL} + associated ontologies (bao, bfo, biopax-level3, chebi, cheminf, cito, dublin_core_terms, fabio, go, iao, ncit, obi, pr, ro, sio, skos, so, uo), version ${data:VERSION}
 MAKE_ONTOLOGIES_CSV = $$(mkdir -p ${ONTOLOGIES_DIR} && echo "BAO - BioAssay Ontology,bao.owl,https://data.bioontology.org/ontologies/BAO/submissions/56/download?apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb\nBFO - Basic Formal Ontology,bfo.owl,http://purl.obolibrary.org/obo/bfo.owl\nBioPAX - biological pathway data,bp.owl,http://www.biopax.org/release/biopax-level3.owl\nCHEMINF - Chemical Information Ontology,cheminf.owl,http://purl.obolibrary.org/obo/cheminf.owl\nChEBI - Chemical Entities of Biological Interest,chebi.owl,http://purl.obolibrary.org/obo/chebi.owl\nCiTO,cito.nt,http://purl.org/spar/cito.nt\nDCMI Terms,dcterms.nt,https://www.dublincore.org/specifications/dublin-core/dcmi-terms/dublin_core_terms.nt\nFaBiO,fabio.nt,http://purl.org/spar/fabio.nt\nGO - Gene Ontology,go.owl,http://purl.obolibrary.org/obo/go.owl\nIAO - Information Artifact Ontology,iao.owl,http://purl.obolibrary.org/obo/iao.owl\nNCIt,ncit.owl,http://purl.obolibrary.org/obo/ncit.owl\nNDF-RT,ndfrt.owl,https://data.bioontology.org/ontologies/NDF-RT/submissions/1/download?apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb\nOBI - Ontology for Biomedical Investigations,obi.owl,http://purl.obolibrary.org/obo/obi.owl\nOWL,owl.ttl,http://www.w3.org/2002/07/owl.ttl\nPDBo,pdbo.owl,http://rdf.wwpdb.org/schema/pdbx-v40.owl\nPR - PRotein Ontology (PRO),pr.owl,http://purl.obolibrary.org/obo/pr.owl\nRDF Schema,rdfs.ttl,https://www.w3.org/2000/01/rdf-schema.ttl\nRDF,rdf.ttl,http://www.w3.org/1999/02/22-rdf-syntax-ns.ttl\nRO - Relation Ontology,ro.owl,http://purl.obolibrary.org/obo/ro.owl\nSIO - Semanticscience Integrated Ontology,sio.owl,http://semanticscience.org/ontology/sio.owl\nSKOS,skos.rdf,http://www.w3.org/TR/skos-reference/skos.rdf\nSO - Sequence types and features ontology,so.owl,http://purl.obolibrary.org/obo/so.owl\nUO - Units of measurement ontology,uo.owl,http://purl.obolibrary.org/obo/uo.owl" > ${ONTOLOGIES_DIR}/${ONTOLOGIES_CSV})
 
 [index]
diff --git a/src/qlever/Qleverfiles/Qleverfile.uniprot b/src/qlever/Qleverfiles/Qleverfile.uniprot
index 295bf4ab..c3626d45 100644
--- a/src/qlever/Qleverfiles/Qleverfile.uniprot
+++ b/src/qlever/Qleverfiles/Qleverfile.uniprot
@@ -12,7 +12,7 @@
 
 [data]
 NAME             = uniprot
-DATE             = 2024-11-27
+DATE             = 2025-06-18
 RDFXML_DIR       = rdf.${DATE}
 TTL_DIR          = ttl.${DATE}
 UNIPROT_URL      = https://ftp.uniprot.org/pub/databases/uniprot/current_release/rdf
@@ -22,7 +22,7 @@ GET_EXAMPLES_CMD = mkdir -p ${TTL_DIR} && git clone ${EXAMPLES_URL} && (cd sparq
 GET_RDFXML_CMD   = mkdir -p ${RDFXML_DIR} && (echo "${RHEA_URL}/chebi.owl.gz"; echo "${RHEA_URL}/rhea.rdf.gz"; curl -s ${UNIPROT_URL}/RELEASE.meta4 | sed "s/<metalink.*/<metalink>/" | xmllint --xpath "/metalink/files/file/url[@location=\"ch\"]/text()" -) | while read URL; do wget --no-verbose -P ${RDFXML_DIR} $$URL 2>&1 | tee -a uniprot.download-log; done
 RDFXML2TTL_CMD   = mkdir -p ${TTL_DIR} && for RDFXML in ${RDFXML_DIR}/*.{owl,owl.xz,rdf,rdf.xz}; do echo "xzcat -f $$RDFXML | rdfxml --output=ttl -q 2> ${TTL_DIR}/$$(basename $$RDFXML).stderr | gzip -c > ${TTL_DIR}/$$(basename $$RDFXML | sed 's/\(rdf\|rdf.xz\|owl\|owl.xz\)$$/ttl.gz/') && echo 'DONE converting $$RDFXML'"; done | parallel
 GET_DATA_CMD     = date > ${NAME}.get-data.begin-date && ${GET_EXAMPLES_CMD} && ${GET_RDFXML_CMD} && ${RDFXML2TTL_CMD} && date > ${NAME}.get-data.end-date
-DESCRIPTION      = Complete UniProt data from ${UNIPROT_URL}, with additional data from ${RHEA_URL} and ${EXAMPLES_URL}
+DESCRIPTION      = UniProt, RDF XML from ${UNIPROT_URL} + additional data from ${RHEA_URL} and ${EXAMPLES_URL}, version ${DATE}
 
 [index]
 INPUT_FILES      = ${data:TTL_DIR}/*.ttl.gz
@@ -55,7 +55,8 @@ MULTI_INPUT_JSON = [{ "cmd": "zcat {}", "graph": "http://sparql.uniprot.org/unip
                     { "cmd": "zcat ${data:TTL_DIR}/examples_uniprot.ttl.gz", "graph": "http://sparql.uniprot.org/.well-known/sparql-examples" },
                     { "cmd": "zcat ${data:TTL_DIR}/core.ttl.gz", "graph": "http://purl.uniprot.org/core" }]
 SETTINGS_JSON    = { "languages-internal": [], "prefixes-external": [""], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": true, "num-triples-per-batch": 25000000 }
-STXXL_MEMORY     = 60G
+STXXL_MEMORY     = 80G
+ULIMIT           = 50000
 
 [server]
 PORT                        = 7018
diff --git a/src/qlever/Qleverfiles/Qleverfile.wikidata b/src/qlever/Qleverfiles/Qleverfile.wikidata
index e6ec6f6c..5e9c4cd8 100644
--- a/src/qlever/Qleverfiles/Qleverfile.wikidata
+++ b/src/qlever/Qleverfiles/Qleverfile.wikidata
@@ -16,7 +16,7 @@ GET_DATA_URL      = https://dumps.wikimedia.org/wikidatawiki/entities
 GET_DATA_CMD      = curl -LRC - -O ${GET_DATA_URL}/latest-all.ttl.bz2 -O ${GET_DATA_URL}/latest-lexemes.ttl.bz2 2>&1 | tee wikidata.download-log.txt && curl -sL ${GET_DATA_URL}/dcatap.rdf | docker run -i --rm -v $$(pwd):/data stain/jena riot --syntax=RDF/XML --output=NT /dev/stdin > dcatap.nt
 DATE_WIKIDATA     = $$(date -r latest-all.ttl.bz2 +%d.%m.%Y || echo "NO_DATE")
 DATE_WIKIPEDIA    = $$(date -r wikipedia-abstracts.nt +%d.%m.%Y || echo "NO_DATE")
-DESCRIPTION       = Full Wikidata dump from ${GET_DATA_URL} (latest-all.ttl.bz2 and latest-lexemes.ttl.bz2, version ${DATE_WIKIDATA})
+DESCRIPTION       = Complete Wikidata, from ${GET_DATA_URL} (latest-all.ttl.bz2 and latest-lexemes.ttl.bz2), version ${DATE_WIKIDATA}
 
 [index]
 INPUT_FILES      = latest-all.ttl.bz2 latest-lexemes.ttl.bz2 dcatap.nt
diff --git a/src/qlever/Qleverfiles/Qleverfile.wikidata-munged b/src/qlever/Qleverfiles/Qleverfile.wikidata-munged
new file mode 100644
index 00000000..834cd364
--- /dev/null
+++ b/src/qlever/Qleverfiles/Qleverfile.wikidata-munged
@@ -0,0 +1,47 @@
+# Qleverfile for Wikidata MUNGED, use with `qlever` CLI (`pipx install qlever`)
+#
+# qlever get-data  # ~24 hours, ~110 GB (compressed), ~18 billion triples
+# qlever index     # ~4 hours, ~20 GB RAM, ~500 GB index size on disk
+# qlever start     # a few seconds, adjust MEMORY_FOR_QUERIES as needed
+#
+# Measured on an AMD Ryzen 9 9950X with 128 GB RAM, and NVMe SSD (17.01.2026)
+
+[DEFAULT]
+NAME = wikidata
+
+[data]
+GET_DATA_URL_BASE     = https://dumps.wikimedia.org/wikidatawiki/entities
+GET_DATA_URL_ALL      = ${GET_DATA_URL_BASE}/20260112/wikidata-20260112-all-BETA.ttl.bz2
+GET_DATA_URL_LEXEMES  = ${GET_DATA_URL_BASE}/20260116/wikidata-20260116-lexemes-BETA.ttl.bz2
+GET_DATA_WDQS_VER     = 0.3.156
+GET_DATA_WGET_CMD     = unbuffer wget -q --show-progress
+GET_DATA_CMD_1        = ${GET_DATA_WGET_CMD} -O service-${GET_DATA_WDQS_VER}-dist.tar.gz https://archiva.wikimedia.org/repository/releases/org/wikidata/query/rdf/service/${GET_DATA_WDQS_VER}/service-${GET_DATA_WDQS_VER}-dist.tar.gz | tee wikidata.download-log.txt && tar -xzf service-${GET_DATA_WDQS_VER}-dist.tar.gz && rm service-${GET_DATA_WDQS_VER}-dist.tar.gz
+GET_DATA_CMD_2        = ${GET_DATA_WGET_CMD} -O dcatap.rdf https://dumps.wikimedia.org/wikidatawiki/entities/dcatap.rdf | tee -a wikidata.download-log.txt && cat dcatap.rdf | docker run -i --rm -v $$(pwd):/data stain/jena riot --syntax=RDF/XML --output=NT /dev/stdin > dcatap.nt && rm dcatap.rdf && ${GET_DATA_WGET_CMD} -O latest-lexemes.ttl.bz2 ${GET_DATA_URL_LEXEMES} 2>&1 | tee -a wikidata.download-log.txt && ${GET_DATA_WGET_CMD} -O latest-all.ttl.bz2 ${GET_DATA_URL_ALL} 2>&1 | tee -a wikidata.download-log.txt
+GET_DATA_CMD_3        = service-${GET_DATA_WDQS_VER}/munge.sh -f latest-all.ttl.bz2 -d . -c 150000000 && mv wikidump-000000001.ttl.gz latest-all.MUNGED.ttl.gz && touch -r latest-all.ttl.bz2 latest-all.MUNGED.ttl.gz
+GET_DATA_CMD          = ${GET_DATA_CMD_1} && ${GET_DATA_CMD_2} && ${GET_DATA_CMD_3}
+DATE_WIKIDATA_ALL     = $$(date -r latest-all.ttl.bz2 +%d.%m.%Y || echo "NO_DATE")
+DATE_WIKIDATA_LEXEMES = $$(date -r latest-lexemes.ttl.bz2 +%d.%m.%Y || echo "NO_DATE")
+DESCRIPTION           = Complete Wikidata, TTL from ${GET_DATA_URL_BASE} (latest-all.ttl.bz2 from ${DATE_WIKIDATA_ALL} munged, latest-lexemes.ttl.bz2 from ${DATE_WIKIDATA_LEXEMES}), updated LIVE
+
+[index]
+INPUT_FILES      = latest-all.MUNGED.ttl.gz latest-lexemes.ttl.bz2 dcatap.nt
+MULTI_INPUT_JSON = { "cmd": "zcat latest-all.MUNGED.ttl.gz", "format": "ttl", "parallel": "true" }
+                   { "cmd": "lbzcat -n 1 latest-lexemes.ttl.bz2", "format": "ttl", "parallel": "false" }
+                   { "cmd": "cat dcatap.nt", "format": "nt", "parallel": "false" }
+SETTINGS_JSON    = { "num-triples-per-batch": 5000000, "languages-internal": [], "prefixes-external": [""], "locale": { "language": "en", "country": "US", "ignore-punctuation": true } }
+STXXL_MEMORY     = 10G
+
+[server]
+PORT                        = 7001
+ACCESS_TOKEN                = ${data:NAME}
+MEMORY_FOR_QUERIES          = 20G
+CACHE_MAX_SIZE              = 15G
+CACHE_MAX_SIZE_SINGLE_ENTRY = 5G
+TIMEOUT                     = 600s
+
+[runtime]
+SYSTEM = docker
+IMAGE  = adfreiburg/qlever:latest
+
+[ui]
+UI_CONFIG = wikidata
diff --git a/src/qlever/Qleverfiles/Qleverfile.wikipathways b/src/qlever/Qleverfiles/Qleverfile.wikipathways
index 7e9d0432..2cfb6c6b 100644
--- a/src/qlever/Qleverfiles/Qleverfile.wikipathways
+++ b/src/qlever/Qleverfiles/Qleverfile.wikipathways
@@ -8,7 +8,7 @@
 
 [data]
 NAME             = wikipathways
-RELEASE          = 20240810
+RELEASE          = current
 GET_DATA_URL     = https://data.wikipathways.org/${RELEASE}/rdf
 GET_DATA_CMD     = wget -O wikipathways-rdf-void.ttl ${GET_DATA_URL}/wikipathways-rdf-void.ttl && \
                     wget ${GET_DATA_URL}/wikipathways-${RELEASE}-rdf-wp.zip && \
diff --git a/src/qlever/command.py b/src/qlever/command.py
index e48b3d17..a4d95923 100644
--- a/src/qlever/command.py
+++ b/src/qlever/command.py
@@ -42,7 +42,7 @@ def should_have_qleverfile(self) -> bool:
         pass
 
     @abstractmethod
-    def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
+    def relevant_qleverfile_arguments(self) -> dict[str, list[str]]:
         """
         Retun the arguments relevant for this command. This must be a subset of
         the names of `all_arguments` defined in `QleverConfig`. Only these
diff --git a/src/qlever/commands/add_text_index.py b/src/qlever/commands/add_text_index.py
index 943c701f..df250287 100644
--- a/src/qlever/commands/add_text_index.py
+++ b/src/qlever/commands/add_text_index.py
@@ -5,7 +5,7 @@
 from qlever.command import QleverCommand
 from qlever.containerize import Containerize
 from qlever.log import log
-from qlever.util import get_existing_index_files, run_command
+from qlever.util import binary_exists, get_existing_index_files, run_command
 
 
 class AddTextIndexCommand(QleverCommand):
@@ -22,7 +22,7 @@ def description(self) -> str:
     def should_have_qleverfile(self) -> bool:
         return True
 
-    def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
+    def relevant_qleverfile_arguments(self) -> dict[str, list[str]]:
         return {
             "data": ["name"],
             "index": [
@@ -80,17 +80,8 @@ def execute(self, args) -> bool:
         if args.show:
             return True
 
-        # When running natively, check if the binary exists and works.
-        if args.system == "native":
-            try:
-                run_command(f"{args.index_binary} --help")
-            except Exception as e:
-                log.error(
-                    f'Running "{args.index_binary}" failed ({e}), '
-                    f"set `--index-binary` to a different binary or "
-                    f"use `--container_system`"
-                )
-                return False
+        if not binary_exists(args.index_binary, "index-binary", args):
+            return False
 
         # Check if text index files already exist.
         existing_text_index_files = get_existing_index_files(
diff --git a/src/qlever/commands/benchmark_queries.py b/src/qlever/commands/benchmark_queries.py
new file mode 100644
index 00000000..05a3647d
--- /dev/null
+++ b/src/qlever/commands/benchmark_queries.py
@@ -0,0 +1,1241 @@
+from __future__ import annotations
+
+import csv
+import json
+import re
+import shlex
+import subprocess
+import time
+import traceback
+from io import StringIO
+from pathlib import Path
+from types import SimpleNamespace
+from typing import Any
+
+import rdflib
+import yaml
+from termcolor import colored
+
+from qlever import command_objects, engine_name, script_name
+from qlever.command import QleverCommand
+from qlever.commands.clear_cache import ClearCacheCommand
+from qlever.commands.ui import dict_to_yaml
+from qlever.containerize import Containerize
+from qlever.log import log, mute_log
+from qlever.util import run_command, run_curl_command
+
+
+def pretty_printed_query(
+    query: str, show_prefixes: bool, system: str = "docker"
+) -> str:
+    """
+    Pretty-print a SPARQL query using the sparql-formatter Docker image.
+    Optionally strips PREFIX declarations from the output.
+    Argument `system` can either be docker or podman.
+    """
+    if system not in Containerize.supported_systems():
+        system = "docker"
+    remove_prefixes_cmd = " | sed '/^PREFIX /Id'" if not show_prefixes else ""
+    pretty_print_query_cmd = (
+        f"echo {shlex.quote(query)}"
+        f" | {system} run -i --rm docker.io/sparqling/sparql-formatter"
+        f"{remove_prefixes_cmd} | grep -v '^$'"
+    )
+    try:
+        query_pretty_printed = run_command(
+            pretty_print_query_cmd, return_output=True
+        )
+        return query_pretty_printed.rstrip()
+    except Exception as e:
+        log.debug(
+            f"Failed to pretty-print query, returning original query: {e}"
+        )
+        return query.rstrip()
+
+
+def sparql_query_type(query: str) -> str:
+    """
+    Determine the SPARQL query type (SELECT, ASK, CONSTRUCT, DESCRIBE)
+    from the query string. Returns "UNKNOWN" if no type is found.
+    """
+    match = re.search(
+        r"(SELECT|ASK|CONSTRUCT|DESCRIBE)\s", query, re.IGNORECASE
+    )
+    if match:
+        return match.group(1).upper()
+    else:
+        return "UNKNOWN"
+
+
+def filter_queries(
+    queries: list[tuple[str, str, str]], query_ids: str, query_regex: str
+) -> list[tuple[str, str, str]]:
+    """
+    Given a list of queries (tuple of query name, desc and full sparql query),
+    filter them and keep the ones which are a part of query_ids
+    and match with query_regex (if provided).
+    """
+    # Parse query_ids into a list of indices
+    total_queries = len(queries)
+    query_indices = []
+    for part in query_ids.split(","):
+        part = part.strip()
+        if not part:
+            continue
+        try:
+            if "-" in part:
+                start, end = part.split("-", 1)
+                if end == "$":
+                    end = total_queries
+                query_indices.extend(range(int(start) - 1, int(end)))
+            else:
+                idx = (int(part) if part != "$" else total_queries) - 1
+                query_indices.append(idx)
+        except ValueError as exc:
+            log.error(f"Invalid query ID '{part}': {exc}")
+            return []
+
+    # Check for duplicate indices
+    seen = set()
+    for idx in query_indices:
+        if idx in seen:
+            log.error(f"Duplicate query ID {idx + 1} in '{query_ids}'")
+            return []
+        seen.add(idx)
+
+    # Filter by regex and collect results
+    try:
+        filtered_queries = []
+        pattern = (
+            re.compile(query_regex, re.IGNORECASE) if query_regex else None
+        )
+        for query_idx in query_indices:
+            if query_idx < 0 or query_idx >= total_queries:
+                continue
+
+            name, description, query = queries[query_idx]
+
+            # Only include queries that match the query_regex if present
+            if pattern and not (
+                pattern.search(name)
+                or pattern.search(description)
+                or pattern.search(query)
+            ):
+                continue
+
+            filtered_queries.append((name, description, query))
+        return filtered_queries
+    except Exception as exc:
+        log.error(f"Error filtering queries: {exc}")
+        return []
+
+
+def parse_queries_tsv(queries_cmd: str) -> list[tuple[str, str, str]]:
+    """
+    Execute the given bash command to fetch tsv queries and return a
+    list of queries i.e. tuple(query_name, "", full_sparql_query)
+    Note: query_description is returned as empty to match the return
+    structure of parse_queries_yml.
+    """
+    try:
+        tsv_queries_str = run_command(queries_cmd, return_output=True)
+        if len(tsv_queries_str) == 0:
+            log.error("No queries found in the TSV queries file")
+            return []
+        return [
+            (query_name, "", sparql_query)
+            for line in tsv_queries_str.strip().splitlines()
+            for query_name, sparql_query in [line.split("\t", 1)]
+        ]
+    except Exception as exc:
+        log.error(f"Failed to read the TSV queries file: {exc}")
+        return []
+
+
+def parse_queries_yml(
+    queries_file: str,
+) -> tuple[str | None, str | None, list[tuple[str, str, str]]]:
+    """
+    Parse a YML file, validate its structure and return a tuple of
+    (benchmark_name, benchmark_description, queries) where queries is a
+    list of tuple(query_name, query_description, full_sparql_query).
+    """
+    with open(queries_file, "r", encoding="utf-8") as q_file:
+        try:
+            data = yaml.safe_load(q_file)
+        except yaml.YAMLError as exc:
+            log.error(f"Error parsing {queries_file} file: {exc}")
+            return None, None, []
+
+    # Validate the structure
+    if not isinstance(data, dict) or "queries" not in data:
+        log.error("Error: YAML file must contain a top-level 'queries' key")
+        return None, None, []
+
+    if not isinstance(data["queries"], list):
+        log.error("Error: 'queries' key in YML file must hold a list.")
+        return None, None, []
+
+    queries = []
+    for query in data["queries"]:
+        if (
+            not isinstance(query, dict)
+            or "query" not in query
+            or "name" not in query
+        ):
+            log.error(
+                "Error: Each item in 'queries' must contain "
+                "'name' and 'query' keys."
+            )
+            return None, None, []
+        queries.append(
+            (query["name"], query.get("description", ""), query["query"])
+        )
+    return data.get("name"), data.get("description"), queries
+
+
+def get_result_size(
+    count_only: bool,
+    query_type: str,
+    accept_header: str,
+    result_file: str,
+) -> tuple[int, dict[str, str] | None]:
+    """
+    Get the result size and error_msg dict (if query failed) for
+    different accept headers
+    """
+
+    def get_json_error_msg(e: Exception) -> dict[str, str]:
+        error_msg = {
+            "short": "Malformed JSON",
+            "long": "curl returned with code 200, "
+            "but the JSON is malformed: " + re.sub(r"\s+", " ", str(e)),
+        }
+        return error_msg
+
+    result_size = 0
+    error_msg = None
+    # CASE 0: The result is empty despite a 200 HTTP code (not a
+    # problem for CONSTRUCT and DESCRIBE queries).
+    if Path(result_file).stat().st_size == 0 and (
+        not query_type == "CONSTRUCT" and not query_type == "DESCRIBE"
+    ):
+        result_size = 0
+        error_msg = {
+            "short": "Empty result",
+            "long": "curl returned with code 200, but the result is empty",
+        }
+
+    # CASE 1: Just counting the size of the result (TSV or JSON).
+    elif count_only:
+        if accept_header in ("text/tab-separated-values", "text/csv"):
+            result_size = run_command(
+                f"sed 1d {result_file}", return_output=True
+            )
+        elif accept_header == "application/qlever-results+json":
+            try:
+                # sed cmd to get the number between 2nd and 3rd double_quotes
+                result_size = run_command(
+                    f"jq '.res[0]' {result_file}"
+                    " | sed 's/[^0-9]*\\([0-9]*\\).*/\\1/'",
+                    return_output=True,
+                )
+            except Exception as e:
+                error_msg = get_json_error_msg(e)
+        else:
+            try:
+                result_size = run_command(
+                    f'jq -r ".results.bindings[0]'
+                    f" | to_entries[0].value.value"
+                    f' | tonumber" {result_file}',
+                    return_output=True,
+                )
+            except Exception as e:
+                error_msg = get_json_error_msg(e)
+
+    # CASE 2: Downloading the full result (TSV, CSV, Turtle, JSON).
+    else:
+        if accept_header in ("text/tab-separated-values", "text/csv"):
+            result_size = run_command(
+                f"sed 1d {result_file} | wc -l", return_output=True
+            )
+        elif accept_header == "text/turtle":
+            result_size = run_command(
+                f"sed '1d;/^@prefix/d;/^\\s*$/d' {result_file} | wc -l",
+                return_output=True,
+            )
+        elif accept_header == "application/qlever-results+json":
+            try:
+                result_size = run_command(
+                    f'jq -r ".resultsize" {result_file}',
+                    return_output=True,
+                )
+            except Exception as e:
+                error_msg = get_json_error_msg(e)
+        else:
+            try:
+                result_size = int(
+                    run_command(
+                        f'jq -r ".results.bindings | length" {result_file}',
+                        return_output=True,
+                    ).rstrip()
+                )
+            except Exception as e:
+                error_msg = get_json_error_msg(e)
+    return int(result_size), error_msg
+
+
+def get_single_int_result(result_file: str) -> int | None:
+    """
+    When downloading the full result of a query with accept header as
+    application/sparql-results+json and result_size == 1, get the single
+    integer result value (if any).
+    """
+    single_int_result = None
+    try:
+        single_int_result = int(
+            run_command(
+                f'jq -e -r ".results.bindings[0][] | .value" {result_file}',
+                return_output=True,
+            ).rstrip()
+        )
+    except Exception:
+        pass
+    return single_int_result
+
+
+def restart_server(start_only: bool = False) -> bool:
+    """
+    Restart the SPARQL server after the server hangs i.e. doesn't return
+    results after timeout + 30s
+    Extremely useful for benchmarking oxigraph (doesn't have timeout implemented)
+    and blazegraph (sometimes doesn't terminate query execution at timeout)
+    Only useful when Qleverfile in CWD and configured properly i.e. no command
+    line args needed to call stop and start commands
+    """
+    stop_cmd = f"{script_name} stop"
+    start_cmd = f"{script_name} start"
+    if not start_only:
+        try:
+            run_command(stop_cmd)
+            time.sleep(2)
+        except Exception as e:
+            log.warning(f"{script_name} process could not be stopped!: {e}")
+    try:
+        run_command(start_cmd)
+        time.sleep(5)
+        log.info(f"Successfully restarted {engine_name} server after hang!")
+        return True
+    except Exception as e:
+        log.warning(
+            f"{script_name} server could not be restarted. This might affect "
+            f"the benchmark process!: {e}"
+        )
+        return False
+
+
+def resolve_benchmark_metadata(
+    cli_name: str | None,
+    cli_description: str | None,
+    yml_name: str | None,
+    yml_description: str | None,
+    dataset: str | None,
+) -> tuple[str | None, str | None]:
+    """
+    Resolve benchmark name and description using priority:
+    1. CLI args (highest priority)
+    2. YML file fields
+    3. Default values derived from dataset name
+    """
+    dataset_name = dataset.capitalize() if dataset else None
+    default_description = (
+        f"{dataset_name} benchmark ran using {script_name} benchmark-queries"
+        if dataset_name
+        else None
+    )
+    benchmark_name = cli_name or yml_name or dataset_name
+    benchmark_description = (
+        cli_description or yml_description or default_description
+    )
+    return benchmark_name, benchmark_description
+
+
+def compute_index_stats() -> tuple[float | None, float | None]:
+    """
+    Compute the index size (Bytes) and time (seconds) if available
+    """
+    index_stats = command_objects["index-stats"]
+    index_time = index_size = None
+    index_log_file = next(Path.cwd().glob("*.index-log.txt"), None)
+
+    if index_log_file:
+        index_args = SimpleNamespace(
+            time_unit="s",
+            size_unit="B",
+            ignore_text_index=False,
+            name=index_log_file.name.split(".")[0],
+        )
+        durations = index_stats.execute_time(index_args, index_log_file.name)
+        if len(durations) > 0 and "TOTAL time" in durations:
+            index_time = durations["TOTAL time"][0]
+        sizes = index_stats.execute_space(index_args)
+        if len(sizes) > 0 and "TOTAL size" in sizes:
+            index_size = sizes["TOTAL size"][0]
+
+    return index_time, index_size
+
+
+def get_query_results(
+    result_file: str, result_size: int, accept_header: str
+) -> tuple[list[str], list[list[str]]]:
+    """
+    Return headers and query results as a tuple for various accept headers
+    """
+    if accept_header in ("text/tab-separated-values", "text/csv"):
+        separator = "," if accept_header == "text/csv" else "\t"
+        get_result_cmd = f"sed -n '1,{result_size + 1}p' {result_file}"
+        results_str = run_command(get_result_cmd, return_output=True)
+        results = results_str.splitlines()
+        reader = csv.reader(StringIO(results_str), delimiter=separator)
+        headers = next(reader)
+        results = [row for row in reader]
+        return headers, results
+
+    elif accept_header == "application/qlever-results+json":
+        get_result_cmd = (
+            f"jq '{{headers: .selected, results: .res[0:{result_size}]}}' "
+            f"{result_file}"
+        )
+        results_str = run_command(get_result_cmd, return_output=True)
+        results_json = json.loads(results_str)
+        return results_json["headers"], results_json["results"]
+
+    elif accept_header == "application/sparql-results+json":
+        get_result_cmd = (
+            f"jq '{{headers: .head.vars, "
+            f"bindings: .results.bindings[0:{result_size}]}}' "
+            f"{result_file}"
+        )
+        results_str = run_command(get_result_cmd, return_output=True)
+        results_json = json.loads(results_str)
+        results = []
+        bindings = results_json.get("bindings", [])
+        for binding in bindings:
+            result = []
+            if not binding or not isinstance(binding, dict):
+                results.append([])
+                continue
+            for obj in binding.values():
+                value = '"' + obj["value"] + '"'
+                if obj["type"] == "uri":
+                    value = "<" + value.strip('"') + ">"
+                elif "datatype" in obj:
+                    value += "^^<" + obj["datatype"] + ">"
+                elif "xml:lang" in obj:
+                    value += "@" + obj["xml:lang"]
+                result.append(value)
+            results.append(result)
+        return results_json["headers"], results
+
+    else:  # text/turtle
+        graph = rdflib.Graph()
+        graph.parse(result_file, format="turtle")
+        headers = ["?subject", "?predicate", "?object"]
+        results = []
+        for i, (s, p, o) in enumerate(graph):
+            if i >= result_size:
+                break
+            results.append([str(s), str(p), str(o)])
+        return headers, results
+
+
+def get_result_yml_query_record(
+    name: str,
+    description: str,
+    query: str,
+    client_time: float,
+    result: str | dict[str, str],
+    result_size: int | None,
+    max_result_size: int,
+    accept_header: str,
+    server_restarted: bool,
+) -> dict[str, Any]:
+    """
+    Construct a dictionary with query information for output result yaml file
+    """
+    record = {
+        "name": name,
+        "description": description,
+        "query": query,
+        "runtime_info": {},
+        "server_restarted": server_restarted,
+    }
+    headers = results = []
+    if result_size is None and isinstance(result, dict):
+        results = f"{result['short']}: {result['long']}"
+        headers = []
+    if result_size and isinstance(result, str):
+        record["result_size"] = result_size
+        result_size = (
+            max_result_size if result_size > max_result_size else result_size
+        )
+        headers, results = get_query_results(
+            result, result_size, accept_header
+        )
+        if accept_header == "application/qlever-results+json":
+            runtime_info_cmd = (
+                f"jq 'if .runtimeInformation then"
+                f" .runtimeInformation else"
+                f' "null" end\' {result}'
+            )
+            runtime_info_str = run_command(
+                runtime_info_cmd, return_output=True
+            )
+            if runtime_info_str != "null":
+                record["runtime_info"] = json.loads(runtime_info_str)
+    record["runtime_info"]["client_time"] = client_time
+    record["headers"] = headers
+    record["results"] = results
+    return record
+
+
+def write_query_records_to_result_file(
+    query_data: dict[str, list[dict[str, Any]]], out_file: Path
+) -> None:
+    """
+    Write yaml record for all queries to output yaml file
+    """
+    config_yaml = dict_to_yaml(query_data)
+    with open(out_file, "w") as eval_yaml_file:
+        eval_yaml_file.write(config_yaml)
+        log.info("")
+        log.info(
+            f"Generated result yaml file: {out_file.stem}{out_file.suffix} "
+            f"in the directory {out_file.parent.resolve()}"
+        )
+
+
+class BenchmarkQueriesCommand(QleverCommand):
+    """
+    Class for running a given sequence of benchmark or example queries and
+    showing their processing times and result sizes.
+    """
+
+    def __init__(self):
+        pass
+
+    def description(self) -> str:
+        return (
+            "Run the given benchmark or example queries and show their "
+            "processing times and result sizes. Optionally, store the "
+            "benchmark results in a YML file."
+        )
+
+    def should_have_qleverfile(self) -> bool:
+        return False
+
+    def relevant_qleverfile_arguments(self) -> dict[str, list[str]]:
+        return {
+            "server": ["host_name", "port", "timeout"],
+            "runtime": ["system"],
+            "ui": ["ui_config"],
+        }
+
+    def additional_arguments(self, subparser) -> None:
+        subparser.add_argument(
+            "--sparql-endpoint", type=str, help="URL of the SPARQL endpoint"
+        )
+        subparser.add_argument(
+            "--sparql-endpoint-preset",
+            choices=[
+                "https://qlever.dev/api/wikidata",
+                "https://qlever.dev/api/uniprot",
+                "https://qlever.dev/api/pubchem",
+                "https://qlever.dev/api/osm-planet",
+                "https://wikidata.demo.openlinksw.com/sparql",
+                "https://sparql.uniprot.org/sparql",
+            ],
+            help="SPARQL endpoint from fixed list (to save typing)",
+        )
+        subparser.add_argument(
+            "--queries-tsv",
+            type=str,
+            default=None,
+            help=(
+                "Path to a TSV file containing the benchmark queries "
+                "(short_query_name, full_sparql_query)"
+            ),
+        )
+        subparser.add_argument(
+            "--queries-yml",
+            type=str,
+            default=None,
+            help=(
+                "Path to a YML file containing the benchmark queries. "
+                "The YML file must follow this structure -> "
+                "name: <benchmark name (str)>, "
+                "description: <benchmark description (str)>, "
+                "queries: <list[query]> where each query contains: "
+                "name: <short query name (mandatory)>, "
+                "description <query description (optional)>, "
+                "query: <full sparql query (mandatory)>"
+            ),
+        )
+        subparser.add_argument(
+            "--query-ids",
+            type=str,
+            default="1-$",
+            help="Query IDs as comma-separated list of "
+            "ranges (e.g., 1-5,7,12-$)",
+        )
+        subparser.add_argument(
+            "--query-regex",
+            type=str,
+            help="Only consider example queries matching "
+            "this regex (using grep -Pi)",
+        )
+        subparser.add_argument(
+            "--example-queries",
+            action="store_true",
+            default=False,
+            help=(
+                "Run the example queries for the given --ui-config "
+                "instead of the benchmark queries from a TSV or YML file"
+            ),
+        )
+        subparser.add_argument(
+            "--download-or-count",
+            choices=["download", "count"],
+            default="download",
+            help="Whether to download the full result "
+            "or just compute the size of the result",
+        )
+        subparser.add_argument(
+            "--limit", type=int, help="Limit on the number of results"
+        )
+        subparser.add_argument(
+            "--remove-offset-and-limit",
+            action="store_true",
+            default=False,
+            help="Remove OFFSET and LIMIT from the query",
+        )
+        subparser.add_argument(
+            "--accept",
+            type=str,
+            choices=[
+                "text/tab-separated-values",
+                "text/csv",
+                "application/sparql-results+json",
+                "application/qlever-results+json",
+                "application/octet-stream",
+                "text/turtle",
+                "AUTO",
+            ],
+            default="application/sparql-results+json",
+            help="Accept header for the SPARQL query; AUTO means "
+            "`text/turtle` for CONSTRUCT AND DESCRIBE queries, "
+            "`application/sparql-results+json` for all others",
+        )
+        subparser.add_argument(
+            "--clear-cache",
+            choices=["yes", "no"],
+            default="no",
+            help="Clear the cache before each query (only works for QLever)",
+        )
+        subparser.add_argument(
+            "--width-query-name",
+            type=int,
+            default=70,
+            help="Width for printing the query name",
+        )
+        subparser.add_argument(
+            "--width-error-message",
+            type=int,
+            default=50,
+            help="Width for printing the error message (0 = no limit)",
+        )
+        subparser.add_argument(
+            "--width-result-size",
+            type=int,
+            default=14,
+            help="Width for printing the result size",
+        )
+        subparser.add_argument(
+            "--add-query-type-to-description",
+            action="store_true",
+            default=False,
+            help="Add the query type (SELECT, ASK, CONSTRUCT, DESCRIBE, "
+            "UNKNOWN) to the query description",
+        )
+        subparser.add_argument(
+            "--show-query",
+            choices=["always", "never", "on-error"],
+            default="never",
+            help="Show the queries that will be executed (always, never, on error)",
+        )
+        subparser.add_argument(
+            "--show-prefixes",
+            action="store_true",
+            default=False,
+            help="When showing the query, also show the prefixes",
+        )
+        subparser.add_argument(
+            "--results-dir",
+            type=str,
+            default=".",
+            help=(
+                "The directory where the YML result file would be saved "
+                "for the evaluation web app (Default = current working directory)"
+            ),
+        )
+        subparser.add_argument(
+            "--result-file",
+            type=str,
+            default=None,
+            help=(
+                "Base name used for the result YML file, should be of the "
+                "form `<dataset>.<engine>`, e.g., `wikidata.qlever`"
+            ),
+        )
+        subparser.add_argument(
+            "--max-results-output-file",
+            type=int,
+            default=5,
+            help=(
+                "Maximum number of results per query in the output result "
+                "YML file (Default = 5)"
+            ),
+        )
+        subparser.add_argument(
+            "--benchmark-name",
+            type=str,
+            default=None,
+            help=(
+                "Benchmark name to be saved in result YML file (This will "
+                "override the 'name' field in --queries-yml file). This benchmark "
+                "name would be displayed as header title when comparing RDF Graph "
+                "Databases on the evaluation web app. Only relevant "
+                "when --result-file argument is passed."
+            ),
+        )
+        subparser.add_argument(
+            "--benchmark-description",
+            type=str,
+            default=None,
+            help=(
+                "Benchmark description to be saved in result YML file (This "
+                "will override the 'description' field in --queries-yml file). "
+                "This benchmark description would be displayed as additional "
+                "help text on the evaluation web app for the given benchmark. "
+                "Only relevant when --result-file argument is passed."
+            ),
+        )
+        subparser.add_argument(
+            "--restart-on-hang",
+            action="store_true",
+            help=(
+                "Enable automatic server recovery during benchmarking. "
+                "If a query continues running for more than 30 seconds past the "
+                "configured timeout, the benchmark runner will assume the SPARQL "
+                "server is stuck. It will then stop and restart the server for "
+                "the current engine, and resume execution with the next query. "
+                "NOTE: This only works if all the server parameters for start and "
+                "stop are configured in the Qleverfile and no arguments are needed "
+                f"for the {script_name} start and {script_name} stop commands."
+            ),
+        )
+
+    def execute(self, args) -> bool:
+        # We can't have both `--remove-offset-and-limit` and `--limit`.
+        if args.remove_offset_and_limit and args.limit:
+            log.error("Cannot have both --remove-offset-and-limit and --limit")
+            return False
+
+        # Extract dataset and sparql_engine name from result file
+        dataset = engine = None
+        if args.result_file is not None:
+            result_file_parts = args.result_file.split(".")
+            if len(result_file_parts) != 2:
+                log.error(
+                    "The argument of --result-file should be of the form "
+                    "`<dataset>.<engine>`, e.g., `wikidata.qlever`"
+                )
+                return False
+            dataset, engine = result_file_parts
+
+            # Make sure results_dir is a directory path and if it doesn't
+            # exist, create the directory
+            results_dir_path = Path(args.results_dir)
+            if results_dir_path.exists():
+                if not results_dir_path.is_dir():
+                    log.error(
+                        f"{results_dir_path} exists but is not a directory"
+                    )
+                    return False
+            else:
+                log.info(
+                    f"Creating results directory: {results_dir_path.absolute()}"
+                )
+                results_dir_path.mkdir(parents=True, exist_ok=True)
+
+        # If `args.accept` is `application/sparql-results+json` or
+        # `application/qlever-results+json` or `AUTO`, we need `jq`.
+        if args.accept in (
+            "application/sparql-results+json",
+            "application/qlever-results+json",
+            "AUTO",
+        ):
+            try:
+                subprocess.run(
+                    "jq --version",
+                    shell=True,
+                    check=True,
+                    stdout=subprocess.DEVNULL,
+                    stderr=subprocess.DEVNULL,
+                )
+            except Exception as e:
+                log.error(f"Please install `jq` for {args.accept} ({e})")
+                return False
+
+        # Ensure unique source for benchmark queries
+        if not any((args.queries_tsv, args.queries_yml, args.example_queries)):
+            log.error(
+                "No benchmark or example queries to read! Either pass benchmark "
+                "queries using --queries-tsv or --queries-yml, or pass the "
+                "argument --example-queries to run example queries for the "
+                f"given ui_config {args.ui_config}"
+            )
+            return False
+
+        if all((args.queries_tsv, args.queries_yml)):
+            log.error("Cannot have both --queries-tsv and --queries-yml")
+            return False
+
+        if any((args.queries_tsv, args.queries_yml)) and args.example_queries:
+            queries_file_arg = "tsv" if args.queries_tsv else "yml"
+            log.error(
+                f"Cannot have both --queries-{queries_file_arg} and "
+                "--example-queries"
+            )
+            return False
+
+        # Handle shortcuts for SPARQL endpoint.
+        if args.sparql_endpoint_preset:
+            args.sparql_endpoint = args.sparql_endpoint_preset
+
+        # Limit only works with full result.
+        if args.limit and args.download_or_count == "count":
+            log.error("Limit only works with full result")
+            return False
+
+        # Clear cache only works for QLever.
+        is_qlever = (
+            not args.sparql_endpoint
+            or args.sparql_endpoint.startswith("https://qlever")
+        )
+        if engine is not None:
+            is_qlever = is_qlever or "qlever" in engine.lower()
+        if args.clear_cache == "yes":
+            if is_qlever:
+                log.warning(
+                    "Clearing the cache before each query"
+                    " (only works for QLever)"
+                )
+            else:
+                log.warning(
+                    "Clearing the cache only works for QLever"
+                    ", option `--clear-cache` is ignored"
+                )
+                args.clear_cache = "no"
+
+        # Show what the command will do.
+        example_queries_cmd = (
+            f"curl -sv https://qlever.dev/api/examples/{args.ui_config}"
+        )
+        sparql_endpoint = (
+            args.sparql_endpoint or f"{args.host_name}:{args.port}"
+        )
+
+        self.show(
+            f"Obtain queries via: {args.queries_yml or args.queries_tsv or example_queries_cmd}\n"
+            f"SPARQL endpoint: {sparql_endpoint}\n"
+            f"Accept header: {args.accept}\n"
+            f"Download result for each query or just count:"
+            f" {args.download_or_count.upper()}"
+            + (f" with LIMIT {args.limit}" if args.limit else ""),
+            only_show=args.show,
+        )
+        if args.show:
+            return True
+
+        # Parse queries and extract benchmark name/description from YML.
+        yml_name = yml_description = None
+        if args.queries_yml:
+            yml_name, yml_description, queries = parse_queries_yml(
+                args.queries_yml
+            )
+        elif args.queries_tsv:
+            queries = parse_queries_tsv(f"cat {args.queries_tsv}")
+        else:
+            queries = parse_queries_tsv(example_queries_cmd)
+
+        filtered_queries = filter_queries(
+            queries, args.query_ids, args.query_regex
+        )
+
+        if len(filtered_queries) == 0 or not filtered_queries[0]:
+            log.error("No queries to process!")
+            return False
+
+        # We want the width of the query description to be an uneven number (in
+        # case we have to truncated it, in which case we want to have a " ... "
+        # in the middle).
+        width_query_name_half = args.width_query_name // 2
+        width_query_name = 2 * width_query_name_half + 1
+
+        try:
+            timeout = int(args.timeout[:-1])
+        except ValueError:
+            timeout = None
+
+        benchmark_name, benchmark_description = resolve_benchmark_metadata(
+            args.benchmark_name,
+            args.benchmark_description,
+            yml_name,
+            yml_description,
+            dataset,
+        )
+
+        # Launch the queries one after the other and for each print: the
+        # description, the result size (number of rows), and the query
+        # processing time (seconds).
+        query_times = []
+        result_sizes = []
+        result_yml_query_records = {
+            "name": benchmark_name,
+            "description": benchmark_description,
+            "queries": [],
+        }
+        if args.result_file:
+            if timeout:
+                result_yml_query_records["timeout"] = timeout
+
+            index_time, index_size = compute_index_stats()
+            result_yml_query_records["index_time"] = index_time
+            result_yml_query_records["index_size"] = index_size
+
+        num_failed = 0
+        for name, description, query in filtered_queries:
+            if len(query) == 0:
+                log.error(
+                    "Could not parse name, description and query, line is:"
+                )
+                log.info("")
+                log.info(f"{name}\t{description}\t{query}")
+                return False
+            query_type = sparql_query_type(query)
+            if args.add_query_type_to_description or args.accept == "AUTO":
+                # If no query description, use name and append query type to it
+                description = f"{description or name} [{query_type}]"
+
+            # Clear the cache.
+            if args.clear_cache == "yes":
+                args.server_url = sparql_endpoint
+                args.complete = False
+                clear_cache_successful = False
+                with mute_log():
+                    clear_cache_successful = ClearCacheCommand().execute(args)
+                if not clear_cache_successful:
+                    log.warn("Failed to clear the cache")
+
+            # Remove OFFSET and LIMIT (after the last closing bracket).
+            if args.remove_offset_and_limit or args.limit:
+                closing_bracket_idx = query.rfind("}")
+                regexes = [
+                    re.compile(r"OFFSET\s+\d+\s*", re.IGNORECASE),
+                    re.compile(r"LIMIT\s+\d+\s*", re.IGNORECASE),
+                ]
+                for regex in regexes:
+                    match = re.search(regex, query[closing_bracket_idx:])
+                    if match:
+                        query = (
+                            query[: closing_bracket_idx + match.start()]
+                            + query[closing_bracket_idx + match.end() :]
+                        )
+
+            # Limit query.
+            if args.limit:
+                query += f" LIMIT {args.limit}"
+
+            # Count query.
+            if args.download_or_count == "count":
+                # First find out if there is a FROM clause.
+                regex_from_clause = re.compile(
+                    r"\s*FROM\s+<[^>]+>\s*", re.IGNORECASE
+                )
+                match_from_clause = re.search(regex_from_clause, query)
+                from_clause = " "
+                if match_from_clause:
+                    from_clause = match_from_clause.group(0)
+                    query = (
+                        query[: match_from_clause.start()]
+                        + " "
+                        + query[match_from_clause.end() :]
+                    )
+                # Now we can add the outer SELECT COUNT(*).
+                query = (
+                    re.sub(
+                        r"SELECT ",
+                        "SELECT (COUNT(*) AS ?qlever_count_)"
+                        + from_clause
+                        + "WHERE { SELECT ",
+                        query,
+                        count=1,
+                        flags=re.IGNORECASE,
+                    )
+                    + " }"
+                )
+
+            # A bit of pretty-printing.
+            query = re.sub(r"\s+", " ", query)
+            query = re.sub(r"\s*\.\s*\}", " }", query)
+            if args.show_query == "always":
+                log.info("")
+                log.info(
+                    colored(
+                        pretty_printed_query(
+                            query, args.show_prefixes, args.system
+                        ),
+                        "cyan",
+                    )
+                )
+
+            # Accept header. For "AUTO", use `text/turtle` for CONSTRUCT
+            # queries and `application/sparql-results+json` for all others.
+            accept_header = args.accept
+            if accept_header == "AUTO":
+                if query_type == "CONSTRUCT" or query_type == "DESCRIBE":
+                    accept_header = "text/turtle"
+                else:
+                    accept_header = "application/sparql-results+json"
+
+            # Launch query.
+            curl_cmd = (
+                f"curl -Ls {sparql_endpoint}"
+                f' -w "HTTP code: %{{http_code}}\\n"'
+                f' -H "Accept: {accept_header}"'
+                f" --data-urlencode query={shlex.quote(query)}"
+            )
+            log.debug(curl_cmd)
+            result_file = (
+                f"qlever.example_queries.result.{abs(hash(curl_cmd))}.tmp"
+            )
+            result_size = 0
+            single_int_result = None
+            start_time = time.time()
+            server_restarted = False
+            try:
+                max_time = None
+                if args.restart_on_hang and timeout:
+                    max_time = timeout + 30
+                http_code = run_curl_command(
+                    sparql_endpoint,
+                    headers={"Accept": accept_header},
+                    params={"query": query},
+                    result_file=result_file,
+                    max_time=max_time,
+                ).strip()
+                time_seconds = time.time() - start_time
+                if http_code == "200":
+                    error_msg = None
+                else:
+                    error_msg = {
+                        "short": f"HTTP code: {http_code}",
+                        "long": re.sub(
+                            r"\s+", " ", Path(result_file).read_text()
+                        ),
+                    }
+            except Exception as e:
+                time_seconds = time.time() - start_time
+
+                # If curl timed out after hitting max_time = 30s
+                if "exit code 28" in str(e) and args.restart_on_hang:
+                    server_restarted = restart_server()
+                # If server is not responding and has crashed
+                elif (
+                    "exit code 52" in str(e) or "exit code 7" in str(e)
+                ) and args.restart_on_hang:
+                    server_restarted = restart_server(start_only=True)
+
+                if args.log_level == "DEBUG":
+                    traceback.print_exc()
+                error_msg = {
+                    "short": "Exception",
+                    "long": re.sub(r"\s+", " ", str(e)),
+                }
+
+            # Get result size (via the command line, in order to avoid loading
+            # a potentially large JSON file into Python, which is slow).
+            if error_msg is None:
+                result_size, error_msg = get_result_size(
+                    args.download_or_count == "count",
+                    query_type,
+                    accept_header,
+                    result_file,
+                )
+                if (
+                    result_size == 1
+                    and accept_header == "application/sparql-results+json"
+                    and args.download_or_count == "download"
+                ):
+                    single_int_result = get_single_int_result(result_file)
+
+            # Get the result yaml record if output file needs to be generated
+            if args.result_file is not None:
+                result_length = None if error_msg is not None else 1
+                result_length = (
+                    result_size
+                    if args.download_or_count == "download"
+                    and result_length is not None
+                    else result_length
+                )
+                query_results = (
+                    error_msg if error_msg is not None else result_file
+                )
+                query_record = get_result_yml_query_record(
+                    name=name,
+                    description=description,
+                    query=pretty_printed_query(
+                        query, args.show_prefixes, args.system
+                    ),
+                    client_time=time_seconds,
+                    result=query_results,
+                    result_size=result_length,
+                    max_result_size=args.max_results_output_file,
+                    accept_header=accept_header,
+                    server_restarted=server_restarted,
+                )
+                result_yml_query_records["queries"].append(query_record)
+
+            # Print name, time, result in tabular form.
+            if len(name) > width_query_name:
+                name = (
+                    name[: width_query_name_half - 2]
+                    + " ... "
+                    + name[-width_query_name_half + 2 :]
+                )
+            if error_msg is None:
+                result_size = int(result_size)
+                single_int_result = (
+                    f"   [single int result: {single_int_result:,}]"
+                    if single_int_result is not None
+                    else ""
+                )
+                log.info(
+                    f"{name:<{width_query_name}}  "
+                    f"{time_seconds:6.2f} s  "
+                    f"{result_size:>{args.width_result_size},}"
+                    f"{single_int_result}"
+                )
+                query_times.append(time_seconds)
+                result_sizes.append(result_size)
+            else:
+                num_failed += 1
+                if (
+                    args.width_error_message > 0
+                    and len(error_msg["long"]) > args.width_error_message
+                    and args.log_level != "DEBUG"
+                    and args.show_query != "on-error"
+                ):
+                    error_msg["long"] = (
+                        error_msg["long"][: args.width_error_message - 3]
+                        + "..."
+                    )
+                seperator_short_long = (
+                    "\n" if args.show_query == "on-error" else "  "
+                )
+                log.info(
+                    f"{name:<{width_query_name}}    "
+                    f"{colored('FAILED   ', 'red')}"
+                    f"{colored(error_msg['short'], 'red'):>{args.width_result_size}}"
+                    f"{seperator_short_long}"
+                    f"{colored(error_msg['long'], 'red')}"
+                )
+                if args.show_query == "on-error":
+                    log.info(
+                        colored(
+                            pretty_printed_query(
+                                query, args.show_prefixes, args.system
+                            ),
+                            "cyan",
+                        )
+                    )
+                    log.info("")
+
+            # Remove the result file (unless in debug mode).
+            if args.log_level != "DEBUG":
+                Path(result_file).unlink(missing_ok=True)
+
+        # Check that each query has a time and a result size, or it failed.
+        assert len(result_sizes) == len(query_times)
+        assert len(query_times) + num_failed == len(filtered_queries)
+
+        if args.result_file:
+            if len(result_yml_query_records["queries"]) != 0:
+                outfile_name = f"{dataset}.{engine}.results.yaml"
+                outfile = Path(args.results_dir) / outfile_name
+                write_query_records_to_result_file(
+                    query_data=result_yml_query_records,
+                    out_file=outfile,
+                )
+            else:
+                log.error(
+                    f"Nothing to write to output result YML file: {args.result_file}"
+                )
+
+        # Show statistics.
+        if len(query_times) > 0:
+            n = len(query_times)
+            total_query_time = sum(query_times)
+            average_query_time = total_query_time / n
+            median_query_time = sorted(query_times)[n // 2]
+            total_result_size = sum(result_sizes)
+            average_result_size = round(total_result_size / n)
+            median_result_size = sorted(result_sizes)[n // 2]
+            query_or_queries = "query" if n == 1 else "queries"
+            description = f"TOTAL   for {n} {query_or_queries}"
+            log.info("")
+            log.info(
+                f"{description:<{width_query_name}}  "
+                f"{total_query_time:6.2f} s  "
+                f"{total_result_size:>14,}"
+            )
+            description = f"AVERAGE for {n} {query_or_queries}"
+            log.info(
+                f"{description:<{width_query_name}}  "
+                f"{average_query_time:6.2f} s  "
+                f"{average_result_size:>14,}"
+            )
+            description = f"MEDIAN  for {n} {query_or_queries}"
+            log.info(
+                f"{description:<{width_query_name}}  "
+                f"{median_query_time:6.2f} s  "
+                f"{median_result_size:>14,}"
+            )
+
+        # Show number of failed queries.
+        if num_failed > 0:
+            log.info("")
+            description = "Number of FAILED queries"
+            num_failed_string = f"{num_failed:>6}"
+            if num_failed == len(filtered_queries):
+                num_failed_string += "  [all]"
+            log.info(
+                colored(
+                    f"{description:<{width_query_name}}  {num_failed:>24}",
+                    "red",
+                )
+            )
+
+        # Return success (has nothing to do with how many queries failed).
+        return True
diff --git a/src/qlever/commands/cache_stats.py b/src/qlever/commands/cache_stats.py
index 846bf756..481032c0 100644
--- a/src/qlever/commands/cache_stats.py
+++ b/src/qlever/commands/cache_stats.py
@@ -17,45 +17,58 @@ def __init__(self):
         pass
 
     def description(self) -> str:
-        return ("Show how much of the cache is currently being used")
+        return "Show how much of the cache is currently being used"
 
     def should_have_qleverfile(self) -> bool:
         return False
 
-    def relevant_qleverfile_arguments(self) -> dict[str: list[str]]:
+    def relevant_qleverfile_arguments(self) -> dict[str, list[str]]:
         return {"server": ["host_name", "port"]}
 
     def additional_arguments(self, subparser) -> None:
-        subparser.add_argument("--server-url",
-                               help="URL of the QLever server, default is "
-                               "{host_name}:{port}")
-        subparser.add_argument("--detailed",
-                               action="store_true",
-                               default=False,
-                               help="Show detailed statistics and settings")
+        subparser.add_argument(
+            "--sparql-endpoint",
+            help="URL of the SPARQL endpoint, default is {host_name}:{port}",
+        )
+        subparser.add_argument(
+            "--detailed",
+            action="store_true",
+            default=False,
+            help="Show detailed statistics and settings",
+        )
 
     def execute(self, args) -> bool:
         # Construct the two curl commands.
-        server_url = (args.server_url if args.server_url
-                      else f"{args.host_name}:{args.port}")
-        cache_stats_cmd = (f"curl -s {server_url} "
-                           f"--data-urlencode \"cmd=cache-stats\"")
-        cache_settings_cmd = (f"curl -s {server_url} "
-                              f"--data-urlencode \"cmd=get-settings\"")
+        sparql_endpoint = (
+            args.sparql_endpoint
+            if args.sparql_endpoint
+            else f"{args.host_name}:{args.port}"
+        )
+        cache_stats_cmd = (
+            f'curl -s {sparql_endpoint} --data-urlencode "cmd=cache-stats"'
+        )
+        cache_settings_cmd = (
+            f'curl -s {sparql_endpoint} --data-urlencode "cmd=get-settings"'
+        )
 
         # Show them.
-        self.show("\n".join([cache_stats_cmd, cache_settings_cmd]),
-                  only_show=args.show)
+        self.show(
+            "\n".join([cache_stats_cmd, cache_settings_cmd]),
+            only_show=args.show,
+        )
         if args.show:
             return True
 
         # Execute them.
         try:
             cache_stats = subprocess.check_output(cache_stats_cmd, shell=True)
-            cache_settings = subprocess.check_output(cache_settings_cmd,
-                                                     shell=True)
+            cache_settings = subprocess.check_output(
+                cache_settings_cmd, shell=True
+            )
             cache_stats_dict = json.loads(cache_stats)
             cache_settings_dict = json.loads(cache_settings)
+            if isinstance(cache_settings_dict, list):
+                cache_settings_dict = cache_settings_dict[0]
         except Exception as e:
             log.error(f"Failed to get cache stats and settings: {e}")
             return False
@@ -64,27 +77,35 @@ def execute(self, args) -> bool:
         if not args.detailed:
             cache_size = cache_settings_dict["cache-max-size"]
             if not cache_size.endswith(" GB"):
-                log.error(f"Cache size {cache_size} is not in GB, "
-                          f"QLever should return bytes instead")
+                log.error(
+                    f"Cache size {cache_size} is not in GB, "
+                    f"QLever should return bytes instead"
+                )
                 return False
             else:
                 cache_size = float(cache_size[:-3])
-            pinned_size = cache_stats_dict["pinned-size"] / 1e9
-            non_pinned_size = cache_stats_dict["non-pinned-size"] / 1e9
+            pinned_size = cache_stats_dict["cache-size-pinned"] / 1e9
+            non_pinned_size = cache_stats_dict["cache-size-unpinned"] / 1e9
             cached_size = pinned_size + non_pinned_size
             free_size = cache_size - cached_size
             if cached_size == 0:
                 log.info(f"Cache is empty, all {cache_size:.1f} GB available")
             else:
-                log.info(f"Pinned queries     : "
-                         f"{pinned_size:5.1f} GB of {cache_size:5.1f} GB"
-                         f"  [{pinned_size / cache_size:5.1%}]")
-                log.info(f"Non-pinned queries : "
-                         f"{non_pinned_size:5.1f} GB of {cache_size:5.1f} GB"
-                         f"  [{non_pinned_size / cache_size:5.1%}]")
-                log.info(f"FREE               : "
-                         f"{free_size:5.1f} GB of {cache_size:5.1f} GB"
-                         f"  [{1 - cached_size / cache_size:5.1%}]")
+                log.info(
+                    f"Pinned queries     : "
+                    f"{pinned_size:5.1f} GB of {cache_size:5.1f} GB"
+                    f"  [{pinned_size / cache_size:5.1%}]"
+                )
+                log.info(
+                    f"Non-pinned queries : "
+                    f"{non_pinned_size:5.1f} GB of {cache_size:5.1f} GB"
+                    f"  [{non_pinned_size / cache_size:5.1%}]"
+                )
+                log.info(
+                    f"FREE               : "
+                    f"{free_size:5.1f} GB of {cache_size:5.1f} GB"
+                    f"  [{1 - cached_size / cache_size:5.1%}]"
+                )
             return True
 
         # Complete version.
@@ -96,6 +117,7 @@ def show_dict_as_table(key_value_pairs):
                 if re.match(r"^\d+\.\d+$", value):
                     value = "{:.2f}".format(float(value))
                 log.info(f"{key.ljust(max_key_len)} : {value}")
+
         show_dict_as_table(cache_stats_dict.items())
         log.info("")
         show_dict_as_table(cache_settings_dict.items())
diff --git a/src/qlever/commands/clear_cache.py b/src/qlever/commands/clear_cache.py
index a92713f4..30251cf9 100644
--- a/src/qlever/commands/clear_cache.py
+++ b/src/qlever/commands/clear_cache.py
@@ -1,11 +1,11 @@
 from __future__ import annotations
 
 import re
-import subprocess
 
 from qlever.command import QleverCommand
 from qlever.commands.cache_stats import CacheStatsCommand
 from qlever.log import log
+from qlever.util import run_command
 
 
 class ClearCacheCommand(QleverCommand):
@@ -22,12 +22,12 @@ def description(self) -> str:
     def should_have_qleverfile(self) -> bool:
         return True
 
-    def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
+    def relevant_qleverfile_arguments(self) -> dict[str, list[str]]:
         return {"server": ["host_name", "port", "access_token"]}
 
     def additional_arguments(self, subparser) -> None:
         subparser.add_argument(
-            "--server-url",
+            "--sparql-endpoint",
             help="URL of the QLever server, default is {host_name}:{port}",
         )
         subparser.add_argument(
@@ -38,17 +38,19 @@ def additional_arguments(self, subparser) -> None:
         )
 
     def execute(self, args) -> bool:
+        # Determine SPARQL endpoint.
+        sparql_endpoint = (
+            args.sparql_endpoint
+            if args.sparql_endpoint
+            else (f"{args.host_name}:{args.port}")
+        )
+
         # Construct command line and show it.
-        clear_cache_cmd = "curl -s"
-        if args.server_url:
-            clear_cache_cmd += f" {args.server_url}"
-        else:
-            clear_cache_cmd += f" {args.host_name}:{args.port}"
-        cmd_val = "clear-cache-complete" if args.complete else "clear-cache"
-        clear_cache_cmd += f' --data-urlencode "cmd={cmd_val}"'
+        clear_cache_cmd = f"curl -s {sparql_endpoint} -d cmd=clear-cache"
         if args.complete:
             clear_cache_cmd += (
-                f" --data-urlencode access-token=" f'"{args.access_token}"'
+                f"-complete"
+                f' --data-urlencode access-token="{args.access_token}"'
             )
         self.show(clear_cache_cmd, only_show=args.show)
         if args.show:
@@ -57,13 +59,7 @@ def execute(self, args) -> bool:
         # Execute the command.
         try:
             clear_cache_cmd += ' -w " %{http_code}"'
-            result = subprocess.run(
-                clear_cache_cmd,
-                shell=True,
-                capture_output=True,
-                text=True,
-                check=True,
-            ).stdout
+            result = run_command(clear_cache_cmd, return_output=True)
             match = re.match(r"^(.*) (\d+)$", result, re.DOTALL)
             if not match:
                 raise Exception(f"Unexpected output:\n{result}")
diff --git a/src/qlever/commands/extract_queries.py b/src/qlever/commands/extract_queries.py
index a9802029..edea5648 100644
--- a/src/qlever/commands/extract_queries.py
+++ b/src/qlever/commands/extract_queries.py
@@ -20,7 +20,7 @@ def description(self) -> str:
     def should_have_qleverfile(self) -> bool:
         return True
 
-    def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
+    def relevant_qleverfile_arguments(self) -> dict[str, list[str]]:
         return {"data": ["name"]}
 
     def additional_arguments(self, subparser) -> None:
@@ -43,6 +43,12 @@ def additional_arguments(self, subparser) -> None:
             default="log-queries.txt",
             help="Output file for the extracted queries (default: `log-queries.txt`)",
         )
+        subparser.add_argument(
+            "--use-alive-check-tag-as-description-base",
+            action="store_true",
+            help="Use the tag from 'Alive check' messages"
+            " as the base for query descriptions (default: False)",
+        )
 
     def execute(self, args) -> bool:
         # Show what the command does.
@@ -74,11 +80,12 @@ def execute(self, args) -> bool:
         for line in log_file:
             # An "Alive check" message contains a tag, which we use as the base
             # name of the query description.
-            alive_check_regex = r"Alive check with message \"(.*)\""
-            match = re.search(alive_check_regex, line)
-            if match:
-                description_base = match.group(1)
-                continue
+            if args.use_alive_check_tag_as_description_base:
+                alive_check_regex = r"Alive check with message \"(.*)\""
+                match = re.search(alive_check_regex, line)
+                if match:
+                    description_base = match.group(1)
+                    continue
 
             # A new query in the log.
             if "Processing the following SPARQL query" in line:
diff --git a/src/qlever/commands/get_data.py b/src/qlever/commands/get_data.py
index b27eca5f..d77071f0 100644
--- a/src/qlever/commands/get_data.py
+++ b/src/qlever/commands/get_data.py
@@ -21,7 +21,7 @@ def description(self) -> str:
     def should_have_qleverfile(self) -> bool:
         return True
 
-    def relevant_qleverfile_arguments(self) -> dict[str: list[str]]:
+    def relevant_qleverfile_arguments(self) -> dict[str, list[str]]:
         return {"data": ["name", "get_data_cmd"], "index": ["input_files"]}
 
     def additional_arguments(self, subparser) -> None:
diff --git a/src/qlever/commands/index.py b/src/qlever/commands/index.py
index 41c25fff..b18bb02d 100644
--- a/src/qlever/commands/index.py
+++ b/src/qlever/commands/index.py
@@ -30,19 +30,23 @@ def description(self) -> str:
     def should_have_qleverfile(self) -> bool:
         return True
 
-    def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
+    def relevant_qleverfile_arguments(self) -> dict[str, list[str]]:
         return {
             "data": ["name", "format"],
             "index": [
                 "input_files",
                 "cat_input_files",
+                "encode_as_id",
                 "multi_input_json",
                 "parallel_parsing",
                 "settings_json",
+                "materialized_views",
+                "vocabulary_type",
                 "index_binary",
                 "only_pso_and_pos_permutations",
                 "ulimit",
                 "use_patterns",
+                "add_has_word_triples",
                 "text_index",
                 "stxxl_memory",
                 "parser_buffer_size",
@@ -98,8 +102,7 @@ def get_input_options_for_json(self, args) -> str:
             # Check that `input_spec` is a dictionary.
             if not isinstance(input_spec, dict):
                 raise self.InvalidInputJson(
-                    f"Element {i} in `MULTI_INPUT_JSON` must be a JSON "
-                    "object",
+                    f"Element {i} in `MULTI_INPUT_JSON` must be a JSON object",
                     input_spec,
                 )
             # For each `input_spec`, we must have a command.
@@ -184,6 +187,7 @@ def execute(self, args) -> bool:
             index_cmd = (
                 f"{args.cat_input_files} | {args.index_binary}"
                 f" -i {args.name} -s {args.name}.settings.json"
+                f" --vocabulary-type {args.vocabulary_type}"
                 f" -F {args.format} -f -"
             )
             if args.parallel_parsing:
@@ -199,6 +203,7 @@ def execute(self, args) -> bool:
             index_cmd = (
                 f"{args.index_binary}"
                 f" -i {args.name} -s {args.name}.settings.json"
+                f" --vocabulary-type {args.vocabulary_type}"
                 f" {input_options}"
             )
         else:
@@ -212,17 +217,20 @@ def execute(self, args) -> bool:
             return False
 
         # Add remaining options.
+        if args.encode_as_id:
+            index_cmd += f" --encode-as-id {args.encode_as_id}"
         if args.only_pso_and_pos_permutations:
-            index_cmd += " --only-pso-and-pos-permutations --no-patterns"
-        if not args.use_patterns:
+            index_cmd += " --only-pso-and-pos-permutations"
+        if args.use_patterns == "no":
             index_cmd += " --no-patterns"
+        if args.add_has_word_triples:
+            index_cmd += " --add-has-word-triples"
         if args.text_index in [
             "from_text_records",
             "from_text_records_and_literals",
         ]:
             index_cmd += (
-                f" -w {args.name}.wordsfile.tsv"
-                f" -d {args.name}.docsfile.tsv"
+                f" -w {args.name}.wordsfile.tsv -d {args.name}.docsfile.tsv"
             )
         if args.text_index in [
             "from_literals",
@@ -233,7 +241,11 @@ def execute(self, args) -> bool:
             index_cmd += f" --stxxl-memory {args.stxxl_memory}"
         if args.parser_buffer_size:
             index_cmd += f" --parser-buffer-size {args.parser_buffer_size}"
-        index_cmd += f" | tee {args.name}.index-log.txt"
+        if args.materialized_views:
+            index_cmd += (
+                f" --materialized-views {shlex.quote(args.materialized_views)}"
+            )
+        index_cmd += f" 2>&1 | tee {args.name}.index-log.txt"
 
         # If the total file size is larger than 10 GB, set ulimit (such that a
         # large number of open files is allowed).
@@ -266,10 +278,8 @@ def execute(self, args) -> bool:
         if args.show:
             return True
 
-        # When running natively, check if the binary exists and works.
-        if args.system == "native":
-            if not binary_exists(args.index_binary, "index-binary"):
-                return False
+        if not binary_exists(args.index_binary, "index-binary", args):
+            return False
 
         # Check if all of the input files exist.
         for pattern in shlex.split(args.input_files):
@@ -300,8 +310,7 @@ def execute(self, args) -> bool:
         ):
             if Containerize.is_running(args.system, args.index_container):
                 log.info(
-                    "Another index process is running, trying to stop "
-                    "it ..."
+                    "Another index process is running, trying to stop it ..."
                 )
                 log.info("")
                 try:
diff --git a/src/qlever/commands/index_stats.py b/src/qlever/commands/index_stats.py
index b997b8c7..d1c96b76 100644
--- a/src/qlever/commands/index_stats.py
+++ b/src/qlever/commands/index_stats.py
@@ -9,6 +9,261 @@
 from qlever.util import get_total_file_size
 
 
+def compute_durations(
+    lines: list[str],
+    time_unit: str,
+    ignore_text_index: bool,
+) -> dict[str, tuple[float | None, str]]:
+    """
+    Parse index build log lines and compute the duration of each
+    indexing phase. Returns a dict mapping phase names (e.g.
+    "Parse input", "TOTAL time") to (duration, unit) tuples. The
+    duration is None if the phase timestamps are missing. Returns
+    an empty dict on error.
+    """
+
+    # Helper function that finds the next line matching the given `regex`,
+    # starting from `current_line`, and extracts the time. Returns a tuple
+    # of the time and the regex match object.
+    #
+    # If `update_current_line` is `False`, then `current_line` will not be
+    # updated by this call.
+    #
+    # Otherwise, and this is the default behavior, `current_line` will be
+    # updated to the line after the first match, or one beyond the last
+    # line if no match is found.
+    current_line = 0
+
+    def find_next_line(regex: str, update_current_line: bool = True):
+        nonlocal lines
+        nonlocal current_line
+        current_line_backup = current_line
+        # Find starting from `current_line`.
+        while current_line < len(lines):
+            line = lines[current_line]
+            current_line += 1
+            timestamp_regex = r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}"
+            timestamp_format = "%Y-%m-%d %H:%M:%S"
+            regex_match = re.search(regex, line)
+            if regex_match:
+                try:
+                    return datetime.strptime(
+                        re.match(timestamp_regex, line).group(),
+                        timestamp_format,
+                    ), regex_match
+                except Exception as e:
+                    log.error(
+                        f"Could not parse timestamp of form "
+                        f'"{timestamp_regex}" from line '
+                        f' "{line.rstrip()}" ({e})'
+                    )
+        # If we get here, we did not find a matching line.
+        if not update_current_line:
+            current_line = current_line_backup
+        return None, None
+
+    # Find the lines matching the key_lines_regex and extract the time
+    # information from them.
+    overall_begin, _ = find_next_line(r"INFO:\s*Processing")
+    merge_begin, _ = find_next_line(r"INFO:\s*Merging partial vocab")
+    convert_begin, _ = find_next_line(r"INFO:\s*Converting triples")
+    perm_begin_and_info = []
+    while True:
+        # Find the next line that starts a permutation.
+        #
+        # NOTE: Should work for the old and new format of the index log
+        # file (old format: "Creating a pair" + names of permutations in
+        # line "Writing meta data for ..."; new format: name of
+        # permutations already in line "Creating permutations ...").
+        perm_begin, _ = find_next_line(
+            r"INFO:\s*Creating a pair", update_current_line=False
+        )
+        if perm_begin is None:
+            perm_begin, perm_info = find_next_line(
+                r"INFO:\s*Creating permutations ([A-Z]+ and [A-Z]+)",
+                update_current_line=False,
+            )
+        else:
+            _, perm_info = find_next_line(
+                r"INFO:\s*Writing meta data for ([A-Z]+ and [A-Z]+)",
+                update_current_line=False,
+            )
+        if perm_info is None:
+            break
+        perm_begin_and_info.append((perm_begin, perm_info))
+    convert_end = (
+        perm_begin_and_info[0][0] if len(perm_begin_and_info) > 0 else None
+    )
+    normal_end, _ = find_next_line(r"INFO:\s*Index build completed")
+    text_begin, _ = find_next_line(
+        r"INFO:\s*Adding text index", update_current_line=False
+    )
+    text_end, _ = find_next_line(
+        r"INFO:\s*Text index build comp", update_current_line=False
+    )
+    if ignore_text_index:
+        text_begin = text_end = None
+
+    # Check whether at least the first phase is done.
+    if overall_begin is None:
+        log.error("Missing line that index build has started")
+        return {}
+    if overall_begin and not merge_begin:
+        log.error(
+            "According to the log file, the index build "
+            "has started, but is still in its first "
+            "phase (parsing the input)"
+        )
+        return {}
+
+    def duration(
+        start_end_pairs: list[tuple[datetime | None, datetime | None]],
+    ) -> float | None:
+        """
+        Compute the total duration across all valid (start, end) pairs,
+        converted to `resolved_time_unit`. Returns None if no pair has
+        both timestamps available.
+        """
+        nonlocal resolved_time_unit
+        num_start_end_pairs = 0
+        diff_seconds = 0
+        for start, end in start_end_pairs:
+            if start and end:
+                diff_seconds += (end - start).total_seconds()
+                num_start_end_pairs += 1
+        if num_start_end_pairs > 0:
+            return diff_seconds / get_time_unit_factor(resolved_time_unit)
+        return None
+
+    # Determine the time unit based on the duration of the first phase
+    # (parsing), unless explicitly specified.
+    parse_duration = None
+    if merge_begin and overall_begin:
+        parse_duration = (merge_begin - overall_begin).total_seconds()
+    resolved_time_unit = get_time_unit(time_unit, parse_duration)
+
+    # Compute durations for each indexing phase. Each entry maps a
+    # phase name to (duration_in_time_unit, time_unit).
+    durations = {}
+    durations["Parse input"] = (
+        duration([(overall_begin, merge_begin)]),
+        resolved_time_unit,
+    )
+    durations["Build vocabularies"] = (
+        duration([(merge_begin, convert_begin)]),
+        resolved_time_unit,
+    )
+    durations["Convert to global IDs"] = (
+        duration([(convert_begin, convert_end)]),
+        resolved_time_unit,
+    )
+    for i in range(len(perm_begin_and_info)):
+        perm_begin, perm_info = perm_begin_and_info[i]
+        perm_end = (
+            perm_begin_and_info[i + 1][0]
+            if i + 1 < len(perm_begin_and_info)
+            else normal_end
+        )
+        perm_info_text = (
+            perm_info.group(1).replace(" and ", " & ")
+            if perm_info
+            else f"#{i + 1}"
+        )
+        perm_key = f"Permutation {perm_info_text}"
+        if perm_key in durations:
+            suffix = 2
+            while f"{perm_key} ({suffix})" in durations:
+                suffix += 1
+            perm_key = f"{perm_key} ({suffix})"
+        durations[perm_key] = (
+            duration([(perm_begin, perm_end)]),
+            resolved_time_unit,
+        )
+    durations["Text index"] = (
+        duration([(text_begin, text_end)]),
+        resolved_time_unit,
+    )
+    # TOTAL includes the text index time if it was built separately.
+    if text_begin and text_end:
+        durations["TOTAL time"] = (
+            duration([(overall_begin, normal_end), (text_begin, text_end)]),
+            resolved_time_unit,
+        )
+    elif normal_end:
+        durations["TOTAL time"] = (
+            duration([(overall_begin, normal_end)]),
+            resolved_time_unit,
+        )
+    return durations
+
+
+def get_time_unit(time_unit: str, parse_duration: float | None) -> str:
+    """
+    Resolve the time unit. If `time_unit` is not "auto", return it
+    as-is. Otherwise, pick a unit based on how long the parse phase
+    took (seconds if < 200s, minutes if < 1h, hours otherwise).
+    """
+    if time_unit != "auto":
+        return time_unit
+    time_unit = "h"
+    if parse_duration is not None:
+        if parse_duration < 200:
+            time_unit = "s"
+        elif parse_duration < 3600:
+            time_unit = "min"
+    return time_unit
+
+
+def get_time_unit_factor(time_unit: str) -> int:
+    """Return the number of seconds per `time_unit`."""
+    return {"s": 1, "min": 60, "h": 3600}[time_unit]
+
+
+def compute_sizes(
+    raw_sizes: dict[str, int], size_unit: str
+) -> dict[str, tuple[float, str]]:
+    """
+    Convert raw byte sizes into display-ready (size, unit) tuples.
+    `raw_sizes` maps category names ("index", "vocabulary", "text",
+    "total") to sizes in bytes. Returns a dict mapping display labels
+    (e.g. "Files index.*", "TOTAL size") to (converted_size, unit).
+    """
+    size_unit = get_size_unit(size_unit, raw_sizes["total"])
+    unit_factor = get_size_unit_factor(size_unit)
+    sizes = {k: v / unit_factor for k, v in raw_sizes.items()}
+
+    sizes_to_show = {}
+    sizes_to_show["Files index.*"] = (sizes["index"], size_unit)
+    sizes_to_show["Files vocabulary.*"] = (sizes["vocabulary"], size_unit)
+    if sizes["text"] > 0:
+        sizes_to_show["Files text.*"] = (sizes["text"], size_unit)
+    sizes_to_show["TOTAL size"] = (sizes["total"], size_unit)
+    return sizes_to_show
+
+
+def get_size_unit(size_unit: str, total_size: int) -> str:
+    """
+    Resolve the size unit. If `size_unit` is not "auto", return it
+    as-is. Otherwise, pick the largest unit that keeps the total
+    size >= 1 in that unit.
+    """
+    if size_unit != "auto":
+        return size_unit
+    size_unit = "TB"
+    if total_size < 1e6:
+        size_unit = "B"
+    elif total_size < 1e9:
+        size_unit = "MB"
+    elif total_size < 1e12:
+        size_unit = "GB"
+    return size_unit
+
+
+def get_size_unit_factor(size_unit: str) -> int | float:
+    """Return the number of bytes per `size_unit`."""
+    return {"B": 1, "MB": 1e6, "GB": 1e9, "TB": 1e12}[size_unit]
+
+
 class IndexStatsCommand(QleverCommand):
     """
     Class for executing the `index-stats` command.
@@ -23,7 +278,7 @@ def description(self) -> str:
     def should_have_qleverfile(self) -> bool:
         return False
 
-    def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
+    def relevant_qleverfile_arguments(self) -> dict[str, list[str]]:
         return {"data": ["name"]}
 
     def additional_arguments(self, subparser) -> None:
@@ -58,9 +313,13 @@ def additional_arguments(self, subparser) -> None:
             help="The size unit",
         )
 
-    def execute_time(self, args, log_file_name) -> bool:
+    def execute_time(
+        self, args, log_file_name: str
+    ) -> dict[str, tuple[float | None, str]]:
         """
-        Part of `execute` that shows the time used.
+        Read the index build log file(s) and delegate to
+        `compute_durations` for the actual parsing and computation.
+        Returns an empty dict on I/O error.
         """
 
         # Read the content of `log_file_name` into a list of lines.
@@ -69,213 +328,40 @@ def execute_time(self, args, log_file_name) -> bool:
                 lines = log_file.readlines()
         except Exception as e:
             log.error(f"Problem reading index log file {log_file_name}: {e}")
-            return False
+            return {}
         # If there is a separate `add-text-index-log.txt` file, append those
         # lines.
+        text_log_file_name = f"{args.name}.text-index-log.txt"
         try:
-            text_log_file_name = f"{args.name}.text-index-log.txt"
             if Path(text_log_file_name).exists():
                 with open(text_log_file_name, "r") as text_log_file:
                     lines.extend(text_log_file.readlines())
         except Exception as e:
             log.error(
-                f"Problem reading text index log file " f"{text_log_file_name}: {e}"
+                f"Problem reading text index log file "
+                f"{text_log_file_name}: {e}"
             )
-            return False
+            return {}
 
-        # Helper function that finds the next line matching the given `regex`,
-        # starting from `current_line`, and extracts the time. Returns a tuple
-        # of the time and the regex match object.
-        #
-        # If `update_current_line` is `False`, then `current_line` will not be
-        # updated by this call.
-        #
-        # Otherwise, and this is the default behavior, `current_line` will be
-        # updated to the line after the first match, or one beyond the last
-        # line if no match is found.
-        current_line = 0
-
-        def find_next_line(regex, update_current_line=True):
-            nonlocal lines
-            nonlocal current_line
-            current_line_backup = current_line
-            # Find starting from `current_line`.
-            while current_line < len(lines):
-                line = lines[current_line]
-                current_line += 1
-                timestamp_regex = r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}"
-                timestamp_format = "%Y-%m-%d %H:%M:%S"
-                regex_match = re.search(regex, line)
-                if regex_match:
-                    try:
-                        return datetime.strptime(
-                            re.match(timestamp_regex, line).group(), timestamp_format
-                        ), regex_match
-                    except Exception as e:
-                        log.error(
-                            f"Could not parse timestamp of form "
-                            f'"{timestamp_regex}" from line '
-                            f' "{line.rstrip()}" ({e})'
-                        )
-            # If we get here, we did not find a matching line.
-            if not update_current_line:
-                current_line = current_line_backup
-            return None, None
-
-        # Find the lines matching the key_lines_regex and extract the time
-        # information from them.
-        overall_begin, _ = find_next_line(r"INFO:\s*Processing")
-        merge_begin, _ = find_next_line(r"INFO:\s*Merging partial vocab")
-        convert_begin, _ = find_next_line(r"INFO:\s*Converting triples")
-        perm_begin_and_info = []
-        while True:
-            # Find the next line that starts a permutation.
-            #
-            # NOTE: Should work for the old and new format of the index log
-            # file (old format: "Creating a pair" + names of permutations in
-            # line "Writing meta data for ..."; new format: name of
-            # permutations already in line "Creating permutations ...").
-            perm_begin, _ = find_next_line(
-                r"INFO:\s*Creating a pair", update_current_line=False
-            )
-            if perm_begin is None:
-                perm_begin, perm_info = find_next_line(
-                    r"INFO:\s*Creating permutations ([A-Z]+ and [A-Z]+)",
-                    update_current_line=False,
-                )
-            else:
-                _, perm_info = find_next_line(
-                    r"INFO:\s*Writing meta data for ([A-Z]+ and [A-Z]+)",
-                    update_current_line=False,
-                )
-            if perm_info is None:
-                break
-            perm_begin_and_info.append((perm_begin, perm_info))
-        convert_end = (
-            perm_begin_and_info[0][0] if len(perm_begin_and_info) > 0 else None
-        )
-        normal_end, _ = find_next_line(r"INFO:\s*Index build completed")
-        text_begin, _ = find_next_line(
-            r"INFO:\s*Adding text index", update_current_line=False
-        )
-        text_end, _ = find_next_line(
-            r"INFO:\s*Text index build comp", update_current_line=False
-        )
-        if args.ignore_text_index:
-            text_begin = text_end = None
-
-        # Check whether at least the first phase is done.
-        if overall_begin is None:
-            log.error("Missing line that index build has started")
-            return False
-        if overall_begin and not merge_begin:
-            log.error(
-                "According to the log file, the index build "
-                "has started, but is still in its first "
-                "phase (parsing the input)"
-            )
-            return False
-
-        # Helper function that shows the duration for a phase (if the start and
-        # end timestamps are available).
-        def show_duration(heading, start_end_pairs):
-            nonlocal time_unit
-            num_start_end_pairs = 0
-            diff_seconds = 0
-            for start, end in start_end_pairs:
-                if start and end:
-                    diff_seconds += (end - start).total_seconds()
-                    num_start_end_pairs += 1
-            if num_start_end_pairs > 0:
-                if time_unit == "h":
-                    diff = diff_seconds / 3600
-                elif time_unit == "min":
-                    diff = diff_seconds / 60
-                else:
-                    diff = diff_seconds
-                log.info(f"{heading:<21} : {diff:>6.1f} {time_unit}")
-
-        # Get the times of the various phases (hours or minutes, depending on
-        # how long the first phase took).
-        time_unit = args.time_unit
-        if time_unit == "auto":
-            time_unit = "h"
-            if merge_begin and overall_begin:
-                parse_duration = (merge_begin - overall_begin).total_seconds()
-                if parse_duration < 200:
-                    time_unit = "s"
-                elif parse_duration < 3600:
-                    time_unit = "min"
-        show_duration("Parse input", [(overall_begin, merge_begin)])
-        show_duration("Build vocabularies", [(merge_begin, convert_begin)])
-        show_duration("Convert to global IDs", [(convert_begin, convert_end)])
-        for i in range(len(perm_begin_and_info)):
-            perm_begin, perm_info = perm_begin_and_info[i]
-            perm_end = (
-                perm_begin_and_info[i + 1][0]
-                if i + 1 < len(perm_begin_and_info)
-                else normal_end
-            )
-            perm_info_text = (
-                perm_info.group(1).replace(" and ", " & ") if perm_info else f"#{i + 1}"
-            )
-            show_duration(f"Permutation {perm_info_text}", [(perm_begin, perm_end)])
-        show_duration("Text index", [(text_begin, text_end)])
-        if text_begin and text_end:
-            log.info("")
-            show_duration(
-                "TOTAL time", [(overall_begin, normal_end), (text_begin, text_end)]
-            )
-        elif normal_end:
-            log.info("")
-            show_duration("TOTAL time", [(overall_begin, normal_end)])
-        return True
+        return compute_durations(lines, args.time_unit, args.ignore_text_index)
 
-    def execute_space(self, args) -> bool:
+    def execute_space(self, args) -> dict[str, tuple[float, str]]:
         """
-        Part of `execute` that shows the space used.
+        Compute the disk space used by each group of index files. Returns
+        a dict mapping display labels (e.g. "Files index.*", "TOTAL size")
+        to (size, unit) tuples, where size is already converted to `unit`.
         """
-
-        # Get the sizes for the various groups of index files.
-        index_size = get_total_file_size([f"{args.name}.index.*"])
-        vocab_size = get_total_file_size([f"{args.name}.vocabulary.*"])
-        text_size = get_total_file_size([f"{args.name}.text.*"])
+        # Collect raw sizes in bytes.
+        sizes = {}
+        for size_type in ["index", "vocabulary", "text"]:
+            sizes[size_type] = get_total_file_size(
+                [f"{args.name}.{size_type}.*"]
+            )
         if args.ignore_text_index:
-            text_size = 0
-        total_size = index_size + vocab_size + text_size
-
-        # Determing the proper unit for the size.
-        size_unit = args.size_unit
-        if size_unit == "auto":
-            size_unit = "TB"
-            if total_size < 1e6:
-                size_unit = "B"
-            elif total_size < 1e9:
-                size_unit = "MB"
-            elif total_size < 1e12:
-                size_unit = "GB"
-
-        # Helper function for showing the size in a uniform way.
-        def show_size(heading, size):
-            nonlocal size_unit
-            if size_unit == "GB":
-                size /= 1e9
-            elif size_unit == "MB":
-                size /= 1e6
-            elif size_unit == "TB":
-                size /= 1e12
-            if size_unit == "B":
-                log.info(f"{heading:<21} :  {size:,} {size_unit}")
-            else:
-                log.info(f"{heading:<21} : {size:>6.1f} {size_unit}")
-
-        show_size("Files index.*", index_size)
-        show_size("Files vocabulary.*", vocab_size)
-        if text_size > 0:
-            show_size("Files text.*", text_size)
-        log.info("")
-        show_size("TOTAL size", total_size)
-        return True
+            sizes["text"] = 0
+        sizes["total"] = sum(sizes.values())
+
+        return compute_sizes(sizes, args.size_unit)
 
     def execute(self, args) -> bool:
         return_value = True
@@ -290,7 +376,17 @@ def execute(self, args) -> bool:
                 only_show=args.show,
             )
             if not args.show:
-                return_value &= self.execute_time(args, log_file_name)
+                durations = self.execute_time(args, log_file_name)
+                # Display each phase duration, skipping phases with
+                # missing timestamps (duration is None).
+                for heading, (duration, time_unit) in durations.items():
+                    if duration is not None:
+                        if heading == "TOTAL time":
+                            log.info("")
+                        log.info(
+                            f"{heading:<25} : {duration:>6.1f} {time_unit}"
+                        )
+                return_value &= len(durations) != 0
             if not args.only_time:
                 log.info("")
 
@@ -301,6 +397,15 @@ def execute(self, args) -> bool:
                 only_show=args.show,
             )
             if not args.show:
-                return_value &= self.execute_space(args)
+                sizes = self.execute_space(args)
+                # Display the disk space used by each group of index files.
+                for heading, (size, size_unit) in sizes.items():
+                    if heading == "TOTAL size":
+                        log.info("")
+                    if size_unit == "B":
+                        log.info(f"{heading:<25} :  {size:,} {size_unit}")
+                    else:
+                        log.info(f"{heading:<25} : {size:>6.1f} {size_unit}")
+                return_value &= len(sizes) != 0
 
         return return_value
diff --git a/src/qlever/commands/log.py b/src/qlever/commands/log.py
index 816072bc..34942ec0 100644
--- a/src/qlever/commands/log.py
+++ b/src/qlever/commands/log.py
@@ -20,7 +20,7 @@ def description(self) -> str:
     def should_have_qleverfile(self) -> bool:
         return False
 
-    def relevant_qleverfile_arguments(self) -> dict[str: list[str]]:
+    def relevant_qleverfile_arguments(self) -> dict[str, list[str]]:
         return {"data": ["name"]}
 
     def additional_arguments(self, subparser) -> None:
diff --git a/src/qlever/commands/materialized_view.py b/src/qlever/commands/materialized_view.py
new file mode 100644
index 00000000..68e6d3f1
--- /dev/null
+++ b/src/qlever/commands/materialized_view.py
@@ -0,0 +1,110 @@
+from __future__ import annotations
+
+import json
+import re
+import shlex
+import time
+
+from qlever.command import QleverCommand
+from qlever.log import log
+from qlever.util import (
+    run_command,
+)
+
+
+class MaterializedViewCommand(QleverCommand):
+    """
+    Class for executing the `materialized-view` command.
+    """
+
+    def __init__(self):
+        self.materialized_view_name_regex = r"^[A-Za-z0-9-]+$"
+        pass
+
+    def description(self) -> str:
+        return "Create a materialized view from the given query"
+
+    def should_have_qleverfile(self) -> bool:
+        return True
+
+    def relevant_qleverfile_arguments(self) -> dict[str, list[str]]:
+        return {
+            "data": ["name"],
+            "server": ["host_name", "port", "access_token"],
+        }
+
+    def additional_arguments(self, subparser) -> None:
+        subparser.add_argument(
+            "view_name",
+            type=str,
+            help="Name of the materialized view",
+        )
+        subparser.add_argument(
+            "view_query",
+            type=str,
+            help="SPARQL query from which to create the materialized view",
+        )
+        subparser.add_argument(
+            "--sparql-endpoint",
+            type=str,
+            help="URL of the SPARQL endpoint (default: <host_name>:<port>)",
+        )
+
+    def execute(self, args) -> bool:
+        # SPARQL endpoint to use.
+        sparql_endpoint = (
+            args.sparql_endpoint
+            if args.sparql_endpoint is not None
+            else f"{args.host_name}:{args.port}"
+        )
+
+        # Check that the name of the materialized view is valid.
+        if not re.match(self.materialized_view_name_regex, args.view_name):
+            log.error(
+                f"The name for the materialized view must match "
+                f"the regex {self.materialized_view_name_regex}"
+            )
+            return False
+
+        # Command for building the materialized view.
+        url = (
+            f"{sparql_endpoint}"
+            f"?cmd=write-materialized-view"
+            f"&view-name={args.view_name}"
+        )
+        materialized_view_cmd = (
+            f"curl -s {shlex.quote(url)} "
+            f"-H 'Authorization: Bearer {args.access_token}' "
+            f"-H 'Content-type: application/sparql-query' "
+            f"-d {shlex.quote(args.view_query)}"
+        )
+        self.show(materialized_view_cmd, only_show=args.show)
+        if args.show:
+            return True
+
+        # Run the command (and time it).
+        time_start = time.monotonic()
+        try:
+            log.info("Creating the materialized view ... "
+                     "(this may take a while, depending on the complexity "
+                     "of the query and the size of the result)")
+            log.info("")
+            result = run_command(materialized_view_cmd, return_output=True)
+        except Exception as e:
+            log.error(f"Creating the materialized view failed: {e}")
+            return False
+        time_end = time.monotonic()
+        duration_seconds = round(time_end - time_start)
+
+        # Try to parse the result (should be JSON).
+        try:
+            result_json = json.loads(result)
+            view_name = result_json.get("materialized-view-written")
+            log.info(
+                f"Materialized view '{view_name}' created successfully "
+                f"in {duration_seconds:,} seconds"
+            )
+        except Exception as e:
+            log.error(f'Failed to parse JSON from "{result}": {e}')
+
+        return True
diff --git a/src/qlever/commands/query.py b/src/qlever/commands/query.py
index c8c0d71f..fa727c4a 100644
--- a/src/qlever/commands/query.py
+++ b/src/qlever/commands/query.py
@@ -37,7 +37,7 @@ def description(self) -> str:
     def should_have_qleverfile(self) -> bool:
         return False
 
-    def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
+    def relevant_qleverfile_arguments(self) -> dict[str, list[str]]:
         return {"server": ["host_name", "port", "access_token"]}
 
     def additional_arguments(self, subparser) -> None:
@@ -72,6 +72,7 @@ def additional_arguments(self, subparser) -> None:
                 "application/sparql-results+json",
                 "application/sparql-results+xml",
                 "application/qlever-results+json",
+                "application/octet-stream",
             ],
             default="text/tab-separated-values",
             help="Accept header for the SPARQL query",
@@ -94,7 +95,7 @@ def execute(self, args) -> bool:
         if args.pin_to_cache:
             args.accept = "application/qlever-results+json"
             curl_cmd_additions = (
-                f" --data pinresult=true --data send=0"
+                f" --data pin-result=true --data send=0"
                 f" --data access-token="
                 f"{shlex.quote(args.access_token)}"
                 f" | jq .resultsize | numfmt --grouping"
diff --git a/src/qlever/commands/rebuild_index.py b/src/qlever/commands/rebuild_index.py
new file mode 100644
index 00000000..b3bb7062
--- /dev/null
+++ b/src/qlever/commands/rebuild_index.py
@@ -0,0 +1,337 @@
+from __future__ import annotations
+
+import shlex
+import shutil
+import subprocess
+import time
+from pathlib import Path
+
+from termcolor import colored
+
+from qlever.command import QleverCommand
+from qlever.log import log
+from qlever.util import (
+    get_existing_index_files,
+    run_command,
+)
+
+
+class RebuildIndexCommand(QleverCommand):
+    """
+    Class for executing the `rebuild-index` command.
+    """
+
+    def __init__(self):
+        pass
+
+    def description(self) -> str:
+        return "Rebuild the index from the current data (including updates)"
+
+    def should_have_qleverfile(self) -> bool:
+        return True
+
+    def relevant_qleverfile_arguments(self) -> dict[str, list[str]]:
+        return {
+            "data": ["name"],
+            "server": ["host_name", "port", "access_token"],
+            "runtime": ["server_container"],
+        }
+
+    def additional_arguments(self, subparser) -> None:
+        subparser.add_argument(
+            "--new-index-dir",
+            type=str,
+            help="Target directory for the new index (default: not set, "
+            "move the old index instead; see `--old-index-dir`)",
+        )
+        subparser.add_argument(
+            "--old-index-dir",
+            type=str,
+            help="Directory where to move the current index once the rebuild "
+            "is finished (default: subdirectory `previous.YYYY-MM-DDTHH:MM`, "
+            "where the timestamp is the time of the earliest index file)",
+        )
+        subparser.add_argument(
+            "--new-index-dir-basename",
+            type=str,
+            default="rebuild.",
+            help="Basename prefix for the new index directory when "
+            "`--new-index-dir` is not specified (default: `rebuild.`)",
+        )
+        subparser.add_argument(
+            "--old-index-dir-basename",
+            type=str,
+            default="previous.",
+            help="Basename prefix for the old index directory when "
+            "`--old-index-dir` is not specified (default: `previous.`)",
+        )
+        subparser.add_argument(
+            "--keep-old-index-dirs",
+            choices=["all", "none", "oldest", "newest"],
+            default="oldest",
+            help="Which old index directories to keep: all (keep all), "
+            "none (delete all), oldest (keep only oldest), "
+            "newest (keep only newest) (default: oldest)",
+        )
+        subparser.add_argument(
+            "--index-name",
+            type=str,
+            help="Base name of the files of the new index (default: use "
+            "the same basename as for the current index)",
+        )
+        subparser.add_argument(
+            "--restart-when-finished",
+            action="store_true",
+            default=False,
+            help="When the rebuild is finished, stop the server with the old "
+            "index and start it again with the new index",
+        )
+
+    def execute(self, args) -> bool:
+        # Either `--new-index-dir` or `--old-index-dir`.
+        if args.new_index_dir is not None and args.old_index_dir is not None:
+            log.error(
+                "Please specify either --new-index-dir (the target directory "
+                "for the new index) or --old-index-dir (the directory where "
+                "to move the current index), but not both"
+            )
+            return False
+
+        # Get the list of all files from the current index and get the date of
+        # the earliest one (in UTC). Add the `Qleverfile` as well.
+        old_index_files = get_existing_index_files(
+            args.name, add_non_essential=True
+        )
+        old_index_date = time.strftime(
+            "%Y-%m-%dT%H:%M:%SZ",
+            time.gmtime(min(Path(f).stat().st_mtime for f in old_index_files)),
+        )
+        new_index_date = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
+        old_index_files.append("Qleverfile")
+
+        # Default values for arguments.
+        #
+        # NOTE 1: When `--old-index-dir` is specified but not `--new-index-dir`,
+        # we nevertheless first build the new index in a temporary directory,
+        # and only when that is successful do we move the current index to the
+        # directory specified by `--old-index-dir` and move the new index to
+        # the current index directory. That way, if the rebuild fails, we still
+        # have the current index in its original location.
+        #
+        # NOTE 2: As a consequence of this logic, `args.new_index_dir` is
+        # always defined after this block, even when it was not specified on
+        # the command line.
+        if args.index_name is None:
+            args.index_name = args.name
+        if args.new_index_dir is None:
+            args.new_index_dir = (
+                f"{args.new_index_dir_basename}{new_index_date}.tmp"
+            )
+            if args.old_index_dir is None:
+                # Check if this is the first rebuild (no previous.* directories exist)
+                existing_previous_dirs = list(
+                    Path(".").glob(f"{args.old_index_dir_basename}*")
+                )
+                is_first_rebuild = len(existing_previous_dirs) == 0
+
+                args.old_index_dir = (
+                    f"{args.old_index_dir_basename}{old_index_date}"
+                    + (".ORIGINAL" if is_first_rebuild else "")
+                )
+        if args.new_index_dir.endswith("/"):
+            args.new_index_dir = args.new_index_dir[:-1]
+
+        # Check that the new index directory either does not exist or is empty.
+        # Same for the old index directory, if specified.
+        new_index_path = Path(args.new_index_dir)
+        if new_index_path.exists() and any(new_index_path.iterdir()):
+            log.error(
+                f"The target directory '{args.new_index_dir}' for the new "
+                "index already exists and is not empty; please specify an "
+                "empty or non-existing directory"
+            )
+            return False
+        if args.old_index_dir is not None:
+            old_index_path = Path(args.old_index_dir)
+            if old_index_path.exists() and any(old_index_path.iterdir()):
+                log.error(
+                    f"The target directory '{args.old_index_dir}' for the "
+                    "old index already exists and is not empty; please "
+                    "specify an empty or non-existing directory"
+                )
+                return False
+
+        # Split `new_index_dir` into path and dir name. For example, if
+        # `new_index_dir` is `path/to/index`, then the path is `path/to` and
+        # the dir name is `index`.
+        #
+        # NOTE: We keep this separate because we can always create a
+        # subdirectory in the current directory (even when running in a
+        # container), but not necessarily a directory at an arbitrary path. If
+        # a path outside the current directory is desired, we move the index
+        # there after it has been built.
+        new_index_dir_path = str(Path(args.new_index_dir).parent)
+        new_index_dir_name = str(Path(args.new_index_dir).name)
+        log_file_name = f"{args.index_name}.rebuild-index-log.txt"
+
+        # Note which indexes we have to move when done.
+        move_new_index_when_done = new_index_dir_path != "."
+        move_old_index_when_done = args.old_index_dir is not None
+
+        # Command for rebuilding the index.
+        mkdir_cmd = (
+            f"mkdir -p {new_index_dir_name} && "
+            f"cp -a Qleverfile {new_index_dir_name}"
+        )
+        rebuild_index_cmd = (
+            f"curl -s {args.host_name}:{args.port} "
+            f"-d cmd=rebuild-index "
+            f"-d index-name={new_index_dir_name}/{args.index_name} "
+            f"-d access-token={args.access_token}"
+        )
+        move_new_index_cmd = f"mv {new_index_dir_name} {new_index_dir_path}"
+        move_old_index_cmd = (
+            f"mkdir -p {shlex.quote(args.old_index_dir)} && "
+            f"mv {' '.join(shlex.quote(f) for f in old_index_files)} "
+            f"{shlex.quote(args.old_index_dir)} && "
+            f"mv {shlex.quote(new_index_dir_name)}/* . && "
+            f"rmdir {shlex.quote(new_index_dir_name)}"
+        )
+        restart_server_cmd = "qlever stop && qlever start"
+        if not move_old_index_when_done:
+            restart_server_cmd = (
+                f"cd {args.new_index_dir} && ${restart_server_cmd}"
+            )
+
+        # Show the command lines.
+        cmds_to_show = [mkdir_cmd, rebuild_index_cmd]
+        if move_old_index_when_done:
+            cmds_to_show.append(move_old_index_cmd)
+        if move_new_index_when_done:
+            cmds_to_show.append(move_new_index_cmd)
+        if args.restart_when_finished:
+            cmds_to_show.append(restart_server_cmd)
+        self.show("\n".join(cmds_to_show), only_show=args.show)
+        if args.show:
+            return True
+
+        # Create the index directory and the log file.
+        try:
+            run_command(mkdir_cmd)
+        except Exception as e:
+            log.error(f"Creating the index directory failed: {e}")
+            return False
+
+        # Show the server log while rebuilding the index.
+        #
+        # NOTE: This will only work satisfactorily when no other queries are
+        # being processed at the same time. It would be better if QLever
+        # logged the rebuild-index output to a separate log file.
+        tail_cmd = (
+            f"touch {new_index_dir_name}/{log_file_name} && "
+            f"exec tail -n 0 -f {new_index_dir_name}/{log_file_name}"
+        )
+        tail_proc = subprocess.Popen(tail_cmd, shell=True)
+
+        # Run the index rebuild command (and time it).
+        try:
+            time_start = time.monotonic()
+            try:
+                run_command(rebuild_index_cmd, show_output=False)
+            except Exception as e:
+                log.error(f"Rebuilding the index failed: {e}")
+                return False
+            time_end = time.monotonic()
+            duration_seconds = round(time_end - time_start)
+            log.info("")
+            rebuild_done_msg = f"Rebuilt index in {duration_seconds:,} seconds"
+            if new_index_dir_path == ".":
+                rebuild_done_msg += (
+                    f", in the new directory '{args.new_index_dir}'"
+                )
+            log.info(rebuild_done_msg)
+        finally:
+            tail_proc.terminate()
+            tail_proc.wait()
+
+        # Move the old index to the specified directory, if needed.
+        if move_old_index_when_done:
+            try:
+                log.info(f"Moving the old index to {args.old_index_dir}")
+                run_command(move_old_index_cmd)
+            except Exception as e:
+                log.error(f"Moving the old index failed: {e}")
+                return False
+
+        # Move the new index to the specified directory, if needed.
+        if move_new_index_when_done:
+            try:
+                log.info(f"Moving the new index to {args.new_index_dir}")
+                run_command(move_new_index_cmd)
+            except Exception as e:
+                log.error(f"Moving the new index failed: {e}")
+                return False
+
+        # Restart the server with the new index, if requested.
+        if args.restart_when_finished:
+            try:
+                log.info("Restarting the server with the new index ...")
+                log.info("")
+                log.info(colored("Command: start", attrs=["bold"]))
+                log.info("")
+                run_command(restart_server_cmd, show_output=True)
+            except Exception as e:
+                log.error(f"Restarting the server failed: {e}")
+                return False
+
+        # Clean up old index directories according to `--keep-old-index-dirs`.
+        # Find all subdirectories starting with `old_index_dir_basename`,
+        # ordered from oldest to newest (by creation time), and keep or delete
+        # them according to the specified policy.
+        if move_old_index_when_done:
+            old_index_dirs = sorted(
+                [
+                    dir
+                    for dir in Path(".").iterdir()
+                    if dir.is_dir()
+                    and dir.name.startswith(args.old_index_dir_basename)
+                ],
+                key=lambda dir: dir.stat().st_ctime,
+            )
+            if old_index_dirs:
+                log.info("")
+                log.info(
+                    colored(
+                        f"Iterate over old index directories (oldest to "
+                        f"newest), and check which ones to keep or delete "
+                        f"(keep_old_index_dirs = {args.keep_old_index_dirs}):",
+                        color="blue",
+                    )
+                )
+                for i, dir in enumerate(old_index_dirs):
+                    is_oldest = i == 0
+                    is_newest = i == len(old_index_dirs) - 1
+                    if args.keep_old_index_dirs == "all":
+                        action = "KEEP"
+                    elif args.keep_old_index_dirs == "none":
+                        action = "DELETE"
+                    elif args.keep_old_index_dirs == "oldest":
+                        action = "KEEP" if is_oldest else "DELETE"
+                    elif args.keep_old_index_dirs == "newest":
+                        action = "KEEP" if is_newest else "DELETE"
+
+                    log.info(f"  {dir.name:<50} {action}")
+
+                    # Actually perform the deletion
+                    if action == "DELETE":
+                        try:
+                            shutil.rmtree(dir)
+                            log.info(f"    → Deleted {dir.name}")
+                        except Exception as e:
+                            log.error(
+                                f"    → Failed to delete {dir.name}: {e}"
+                            )
+
+                log.info("")
+
+        return True
diff --git a/src/qlever/commands/reset_updates.py b/src/qlever/commands/reset_updates.py
new file mode 100644
index 00000000..93625d9e
--- /dev/null
+++ b/src/qlever/commands/reset_updates.py
@@ -0,0 +1,59 @@
+from __future__ import annotations
+
+import re
+
+from qlever.command import QleverCommand
+from qlever.log import log
+from qlever.util import run_command
+
+
+class ResetUpdatesCommand(QleverCommand):
+    """
+    Class for executing the `reset-updates` command.
+    """
+
+    def __init__(self):
+        pass
+
+    def description(self) -> str:
+        return "Reset the updates on the server"
+
+    def should_have_qleverfile(self) -> bool:
+        return True
+
+    def relevant_qleverfile_arguments(self) -> dict[str, list[str]]:
+        return {"server": ["host_name", "port", "access_token"]}
+
+    def additional_arguments(self, subparser) -> None:
+        subparser.add_argument(
+            "--sparql-endpoint",
+            help="URL of the QLever server, default is {host_name}:{port}",
+        )
+
+    def execute(self, args) -> bool:
+        reset_cmd = "curl -s"
+        if args.sparql_endpoint:
+            reset_cmd += f" {args.sparql_endpoint}"
+        else:
+            reset_cmd += f" {args.host_name}:{args.port}"
+        reset_cmd += f' --data-urlencode "cmd=clear-delta-triples" --data-urlencode "access-token={args.access_token}"'
+        self.show(reset_cmd, only_show=args.show)
+        if args.show:
+            return True
+
+        try:
+            reset_cmd += ' -w " %{http_code}"'
+            result = run_command(reset_cmd, return_output=True)
+            match = re.match(r"^(.*) (\d+)$", result, re.DOTALL)
+            if not match:
+                raise Exception(f"Unexpected output:\n{result}")
+            error_message = match.group(1).strip()
+            status_code = match.group(2)
+            if status_code != "200":
+                raise Exception(error_message)
+            message = "Updates reset successfully"
+            log.info(message)
+            return True
+        except Exception as e:
+            log.error(e)
+            return False
diff --git a/src/qlever/commands/settings.py b/src/qlever/commands/settings.py
index dcf52e37..3c19358d 100644
--- a/src/qlever/commands/settings.py
+++ b/src/qlever/commands/settings.py
@@ -6,6 +6,7 @@
 
 from qlever.command import QleverCommand
 from qlever.log import log
+from qlever.qleverfile import Qleverfile
 from qlever.util import run_command
 
 
@@ -23,36 +24,19 @@ def description(self) -> str:
     def should_have_qleverfile(self) -> bool:
         return True
 
-    def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
+    def relevant_qleverfile_arguments(self) -> dict[str, list[str]]:
         return {"server": ["port", "host_name", "access_token"]}
 
     def additional_arguments(self, subparser) -> None:
-        all_keys = [
-            "always-multiply-unions",
-            "cache-max-num-entries",
-            "cache-max-size",
-            "cache-max-size-single-entry",
-            "cache-service-results",
-            "default-query-timeout",
-            "group-by-disable-index-scan-optimizations",
-            "group-by-hash-map-enabled",
-            "lazy-index-scan-max-size-materialization",
-            "lazy-index-scan-num-threads",
-            "lazy-index-scan-queue-size",
-            "lazy-result-max-cache-size",
-            "query-planning-budget",
-            "request-body-limit",
-            "service-max-value-rows",
-            "sort-estimate-cancellation-factor",
-            "throw-on-unbound-variables",
-            "use-binsearch-transitive-path",
-        ]
         subparser.add_argument(
-            "runtime_parameter",
-            nargs="?",
-            help="Set the given runtime parameter (key=value)"
-            "; if no argument is given, show all settings",
-        ).completer = lambda **kwargs: [f"{key}=" for key in all_keys]
+            "runtime_parameters",
+            nargs="*",
+            help="Space-separated list of runtime parameters to set "
+            "in the form `key=value`; afterwards shows all settings, "
+            "with the changed ones highlighted",
+        ).completer = lambda **kwargs: [
+            f"{key}=" for key in Qleverfile.SERVER_RUNTIME_PARAMETERS
+        ]
         subparser.add_argument(
             "--endpoint_url",
             type=str,
@@ -67,46 +51,65 @@ def execute(self, args) -> bool:
         else:
             endpoint_url = f"http://{args.host_name}:{args.port}"
 
-        # Construct the `curl` command for getting or setting.
-        if args.runtime_parameter:
-            try:
-                parameter_key, parameter_value = args.runtime_parameter.split(
-                    "="
+        # Construct the `curl` commands for setting and getting.
+        curl_cmds_setting = []
+        keys_set = set()
+        if args.runtime_parameters:
+            for key_value_pair in args.runtime_parameters:
+                try:
+                    key, value = key_value_pair.split("=")
+                except ValueError:
+                    log.error("Runtime parameter must be given as `key=value`")
+                    return False
+                curl_cmds_setting.append(
+                    f"curl -s {endpoint_url} -w %{{http_code}}"
+                    f' --data-urlencode "{key}={value}"'
+                    f' --data-urlencode "access-token={args.access_token}"'
                 )
-            except ValueError:
-                log.error("Runtime parameter must be given as `key=value`")
-                return False
-
-            curl_cmd = (
-                f"curl -s {endpoint_url}"
-                f' --data-urlencode "{parameter_key}={parameter_value}"'
-                f' --data-urlencode "access-token={args.access_token}"'
-            )
-        else:
-            curl_cmd = (
-                f"curl -s {endpoint_url}" f" --data-urlencode cmd=get-settings"
-            )
-            parameter_key, parameter_value = None, None
-        self.show(curl_cmd, only_show=args.show)
+                keys_set.add(key)
+        curl_cmd_getting = (
+            f"curl -s {endpoint_url} -w %{{http_code}}"
+            f" --data-urlencode cmd=get-settings"
+        )
+        self.show(
+            "\n".join(curl_cmds_setting + [curl_cmd_getting]),
+            only_show=args.show,
+        )
         if args.show:
             return True
 
-        # Execute the `curl` command. Note that the `get-settings` command
-        # returns all settings in both scencarios (that is, also when setting a
-        # parameter).
+        # Execute the `curl` commands for setting the key-value pairs if any.
+        for curl_cmd in curl_cmds_setting:
+            try:
+                curl_result = run_command(curl_cmd, return_output=True)
+                body, http_code = curl_result[:-3], curl_result[-3:]
+                if http_code != "200":
+                    raise Exception(body)
+            except Exception as e:
+                log.error(
+                    f"curl command for setting key-value pair failed: {e}"
+                )
+                return False
+
+        # Execute the `curl` commands for getting the settings.
         try:
-            settings_json = run_command(curl_cmd, return_output=True)
-            settings_dict = json.loads(settings_json)
+            curl_result = run_command(curl_cmd_getting, return_output=True)
+            body, http_code = curl_result[:-3], curl_result[-3:]
+            if http_code != "200":
+                raise Exception(body)
+            settings_dict = json.loads(body)
+            if isinstance(settings_dict, list):
+                settings_dict = settings_dict[0]
         except Exception as e:
-            log.error(f"setting command failed: {e}")
+            log.error(f"curl command for getting settings failed: {e}")
             return False
         for key, value in settings_dict.items():
             print(
                 colored(
                     f"{key:<45}: {value}",
-                    "blue"
-                    if parameter_key and key == parameter_key
-                    else None,
+                    "blue" if key in keys_set else None,
                 )
             )
+
+        # That's it.
         return True
diff --git a/src/qlever/commands/setup_config.py b/src/qlever/commands/setup_config.py
index 0eff3b23..1a53d604 100644
--- a/src/qlever/commands/setup_config.py
+++ b/src/qlever/commands/setup_config.py
@@ -26,7 +26,7 @@ def description(self) -> str:
     def should_have_qleverfile(self) -> bool:
         return False
 
-    def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
+    def relevant_qleverfile_arguments(self) -> dict[str, list[str]]:
         return {}
 
     def additional_arguments(self, subparser) -> None:
diff --git a/src/qlever/commands/start.py b/src/qlever/commands/start.py
index a6811c6f..f55abe6d 100644
--- a/src/qlever/commands/start.py
+++ b/src/qlever/commands/start.py
@@ -5,11 +5,13 @@
 
 from qlever.command import QleverCommand
 from qlever.commands.cache_stats import CacheStatsCommand
+from qlever.commands.settings import SettingsCommand
 from qlever.commands.status import StatusCommand
 from qlever.commands.stop import StopCommand
 from qlever.commands.warmup import WarmupCommand
 from qlever.containerize import Containerize
 from qlever.log import log
+from qlever.qleverfile import Qleverfile
 from qlever.util import binary_exists, is_qlever_server_alive, run_command
 
 
@@ -34,7 +36,7 @@ def construct_command(args) -> str:
         start_cmd += " --persist-updates"
     if args.only_pso_and_pos_permutations:
         start_cmd += " --only-pso-and-pos-permutations"
-    if not args.use_patterns:
+    if args.use_patterns == "no":
         start_cmd += " --no-patterns"
     if args.use_text_index == "yes":
         start_cmd += " -t"
@@ -45,7 +47,7 @@ def construct_command(args) -> str:
 # Kill existing server on the same port. Trust that StopCommand() works?
 # Maybe return StopCommand().execute(args) and handle it with a try except?
 def kill_existing_server(args) -> bool:
-    args.cmdline_regex = f"^ServerMain.* -p {args.port}"
+    args.cmdline_regex = f"^qlever-server.* -p {args.port}"
     args.no_containers = True
     if not StopCommand().execute(args):
         log.error("Stopping the existing server failed")
@@ -120,7 +122,7 @@ def description(self) -> str:
     def should_have_qleverfile(self) -> bool:
         return True
 
-    def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
+    def relevant_qleverfile_arguments(self) -> dict[str, list[str]]:
         return {
             "data": ["name", "description", "text_description"],
             "server": [
@@ -165,14 +167,25 @@ def additional_arguments(self, subparser) -> None:
             help="Run the server in the foreground "
             "(default: run in the background with `nohup`)",
         )
+        subparser.add_argument(
+            "runtime_parameters",
+            nargs="*",
+            help="Space-separated list of runtime parameters to set "
+            "(in the form `key=value`) once the server is running",
+        ).completer = lambda **kwargs: [
+            f"{key}=" for key in Qleverfile.SERVER_RUNTIME_PARAMETERS
+        ]
 
     def execute(self, args) -> bool:
+        # Set the endpoint URL.
+        args.endpoint_url = f"http://{args.host_name}:{args.port}"
+
         # Kill existing server with the same name if so desired.
         #
         # TODO: This is currently disabled because I never used it once over
         # the past weeks and it is not clear to me what the use case is.
         if False:  # or args.kill_existing_with_same_name:
-            args.cmdline_regex = f"^ServerMain.* -i {args.name}"
+            args.cmdline_regex = f"^qlever-server.* -i {args.name}"
             args.no_containers = True
             StopCommand().execute(args)
             log.info("")
@@ -200,17 +213,17 @@ def execute(self, args) -> bool:
         # Show the command line.
         self.show(start_cmd, only_show=args.show)
         if args.show:
+            if args.runtime_parameters:
+                log.info("")
+                SettingsCommand().execute(args)
             return True
 
-        # When running natively, check if the binary exists and works.
-        if args.system == "native":
-            if not binary_exists(args.server_binary, "server-binary"):
-                return False
+        if not binary_exists(args.server_binary, "server-binary", args):
+            return False
 
         # Check if a QLever server is already running on this port.
-        endpoint_url = f"http://{args.host_name}:{args.port}"
-        if is_qlever_server_alive(endpoint_url):
-            log.error(f"QLever server already running on {endpoint_url}")
+        if is_qlever_server_alive(args.endpoint_url):
+            log.error(f"QLever server already running on {args.endpoint_url}")
             log.info("")
             log.info(
                 "To kill the existing server, use `qlever stop` "
@@ -219,7 +232,7 @@ def execute(self, args) -> bool:
             )
 
             # Show output of status command.
-            args.cmdline_regex = f"^ServerMain.* -p *{args.port}"
+            args.cmdline_regex = f"^qlever-server.* -p *{args.port}"
             log.info("")
             StatusCommand().execute(args)
             return False
@@ -269,7 +282,7 @@ def execute(self, args) -> bool:
         log.info("")
         tail_cmd = f"exec tail -f {args.name}.server-log.txt"
         tail_proc = subprocess.Popen(tail_cmd, shell=True)
-        while not is_qlever_server_alive(endpoint_url):
+        while not is_qlever_server_alive(args.endpoint_url):
             time.sleep(1)
 
         # Set the description for the index and text.
@@ -302,9 +315,14 @@ def execute(self, args) -> bool:
         if not args.run_in_foreground:
             log.info("")
             args.detailed = False
-            args.server_url = None
+            args.sparql_endpoint = None
             CacheStatsCommand().execute(args)
 
+        # Apply settings if any.
+        if args.runtime_parameters:
+            log.info("")
+            SettingsCommand().execute(args)
+
         # With `--run-in-foreground`, wait until the server is stopped.
         if args.run_in_foreground:
             try:
diff --git a/src/qlever/commands/status.py b/src/qlever/commands/status.py
index a8efed54..f1683321 100644
--- a/src/qlever/commands/status.py
+++ b/src/qlever/commands/status.py
@@ -20,12 +20,12 @@ def description(self) -> str:
     def should_have_qleverfile(self) -> bool:
         return False
 
-    def relevant_qleverfile_arguments(self) -> dict[str: list[str]]:
+    def relevant_qleverfile_arguments(self) -> dict[str, list[str]]:
         return {}
 
     def additional_arguments(self, subparser) -> None:
         subparser.add_argument("--cmdline-regex",
-                               default="^(ServerMain|IndexBuilderMain)",
+                               default="^(qlever-server|qlever-index)",
                                help="Show only processes where the command "
                                     "line matches this regex")
 
diff --git a/src/qlever/commands/stop.py b/src/qlever/commands/stop.py
index a68d433b..bcd57f6d 100644
--- a/src/qlever/commands/stop.py
+++ b/src/qlever/commands/stop.py
@@ -38,14 +38,14 @@ def description(self) -> str:
     def should_have_qleverfile(self) -> bool:
         return True
 
-    def relevant_qleverfile_arguments(self) -> dict[str: list[str]]:
+    def relevant_qleverfile_arguments(self) -> dict[str, list[str]]:
         return {"data": ["name"],
                 "server": ["port"],
                 "runtime": ["server_container"]}
 
     def additional_arguments(self, subparser) -> None:
         subparser.add_argument("--cmdline-regex",
-                               default="ServerMain.* -i [^ ]*%%NAME%%",
+                               default="qlever-server.* -i [^ ]*%%NAME%%",
                                help="Show only processes where the command "
                                     "line matches this regex")
         subparser.add_argument("--no-containers", action="store_true",
@@ -84,7 +84,7 @@ def execute(self, args) -> bool:
         message = "No matching process found" if args.no_containers else \
             "No matching process or container found"
         log.error(message)
-        args.cmdline_regex = "^ServerMain.* -i [^ ]*"
+        args.cmdline_regex = "^qlever-server.* -i [^ ]*"
         log.info("")
         StatusCommand().execute(args)
         return True
diff --git a/src/qlever/commands/system_info.py b/src/qlever/commands/system_info.py
index acf12ac8..e8b71a2b 100644
--- a/src/qlever/commands/system_info.py
+++ b/src/qlever/commands/system_info.py
@@ -48,7 +48,7 @@ def description(self) -> str:
     def should_have_qleverfile(self) -> bool:
         return True
 
-    def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
+    def relevant_qleverfile_arguments(self) -> dict[str, list[str]]:
         return {"runtime": ["system", "image", "server_container"]}
 
     def additional_arguments(self, subparser) -> None:
diff --git a/src/qlever/commands/ui.py b/src/qlever/commands/ui.py
index fb49ee78..5c12e1d1 100644
--- a/src/qlever/commands/ui.py
+++ b/src/qlever/commands/ui.py
@@ -13,13 +13,16 @@
 
 # Return a YAML string for the given dictionary. Format values with
 # newlines using the "|" style.
-def dict_to_yaml(dictionary):
-    # Custom representer for yaml, which uses the "|" style only for
-    # multiline strings.
-    #
-    # NOTE: We replace all `\r\n` with `\n` because otherwise the `|` style
-    # does not work as expected.
-    class MultiLineDumper(yaml.Dumper):
+def dict_to_yaml(dictionary: dict) -> str:
+    """
+    Custom representer for yaml, which uses the "|" style only for
+    multiline strings.
+
+    NOTE: We replace all `\r\n` with `\n` because otherwise the `|` style
+    does not work as expected.
+    """
+
+    class MultiLineDumper(yaml.SafeDumper):
         def represent_scalar(self, tag, value, style=None):
             value = value.replace("\r\n", "\n")
             if isinstance(value, str) and "\n" in value:
@@ -30,6 +33,7 @@ def represent_scalar(self, tag, value, style=None):
     return yaml.dump(
         dictionary,
         sort_keys=False,
+        allow_unicode=True,
         Dumper=MultiLineDumper,
     )
 
@@ -48,7 +52,7 @@ def description(self) -> str:
     def should_have_qleverfile(self) -> bool:
         return True
 
-    def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
+    def relevant_qleverfile_arguments(self) -> dict[str, list[str]]:
         return {
             "data": ["name"],
             "server": ["host_name", "port"],
diff --git a/src/qlever/commands/update.py b/src/qlever/commands/update.py
new file mode 100644
index 00000000..dd3cf46c
--- /dev/null
+++ b/src/qlever/commands/update.py
@@ -0,0 +1,90 @@
+from __future__ import annotations
+
+import shlex
+import time
+import traceback
+
+from qlever.command import QleverCommand
+from qlever.log import log
+from qlever.util import run_command
+
+
+class UpdateCommand(QleverCommand):
+    """
+    Class for executing a SPARQL UPDATE against a SPARQL endpoint.
+
+    The command accepts the update either directly on the command line or
+    via a file path provided with --update-file.
+    """
+
+    def __init__(self):
+        pass
+
+    def description(self) -> str:
+        return "Send an update to a SPARQL endpoint"
+
+    def should_have_qleverfile(self) -> bool:
+        return False
+
+    def relevant_qleverfile_arguments(self) -> dict[str,list[str]]:
+        return {"server": ["host_name", "port", "access_token"]}
+
+    def additional_arguments(self, subparser) -> None:
+        subparser.add_argument(
+            "update",
+            type=str,
+            nargs="?",
+            default=None,
+            help="SPARQL UPDATE to send (use --update-file to send from a file)",
+        )
+        subparser.add_argument(
+            "--update-file",
+            type=str,
+            help="Path to a file containing the SPARQL UPDATE to send",
+        )
+        subparser.add_argument(
+            "--sparql-endpoint", type=str, help="URL of the SPARQL endpoint"
+        )
+
+    def execute(self, args) -> bool:
+        sparql_endpoint = (
+            args.sparql_endpoint if args.sparql_endpoint else f"{args.host_name}:{args.port}"
+        )
+
+        curl_cmd = (
+            f"curl -s {sparql_endpoint} -X POST "
+            f"-H 'Authorization: Bearer {args.access_token}' "
+            f"-H 'Content-Type: application/sparql-update' "
+        )
+
+        if args.update:
+            curl_cmd += f"--data-binary {shlex.quote(args.update)}"
+        elif args.update_file:
+            curl_cmd += f"--data-binary @{shlex.quote(args.update_file)}"
+        else:
+            log.error("No SPARQL UPDATE provided. Pass it as an argument or via --update-file.")
+            return False
+
+        # Show and exit if requested
+        self.show(curl_cmd, only_show=args.show)
+        if args.show:
+            return True
+
+        # Execute update
+        try:
+            start_time = time.time()
+            run_command(curl_cmd)
+            time_msecs = round(1000 * (time.time() - start_time))
+            if args.log_level != "NO_LOG":
+                log.info("")
+                log.info(
+                    f"Update processing time (end-to-end): {time_msecs:,d} ms"
+                )
+        except Exception as e:
+            if args.log_level == "DEBUG":
+                traceback.print_exc()
+            log.error(e)
+            return False
+
+        return True
+
diff --git a/src/qlever/commands/update_wikidata.py b/src/qlever/commands/update_wikidata.py
new file mode 100644
index 00000000..25650742
--- /dev/null
+++ b/src/qlever/commands/update_wikidata.py
@@ -0,0 +1,1306 @@
+from __future__ import annotations
+
+import glob
+import json
+import logging
+import os
+import re
+import signal
+import time
+from datetime import datetime, timezone
+from enum import Enum, auto
+from pathlib import Path
+from threading import Event
+
+import rdflib.term
+import requests_sse
+from rdflib import Graph
+from termcolor import colored
+from tqdm.contrib.logging import tqdm_logging_redirect
+
+from qlever.command import QleverCommand
+from qlever.log import log
+from qlever.util import run_command
+
+
+# Monkey patch `rdflib.term._castLexicalToPython` to avoid casting of literals
+# to Python types. We do not need it (all we want it convert Turtle to N-Triples),
+# and we can speed up parsing by a factor of about 2.
+def custom_cast_lexical_to_python(lexical, datatype):
+    return None  # Your desired behavior
+
+
+rdflib.term._castLexicalToPython = custom_cast_lexical_to_python
+
+
+def connect_to_sse_stream(sse_stream_url, since=None, event_id=None):
+    """
+    Connect to the SSE stream and return the connected EventSource.
+
+    Args:
+        sse_stream_url: URL of the SSE stream
+        since: ISO date string to start from (mutually exclusive with event_id)
+        event_id: Event ID to resume from (mutually exclusive with since)
+
+    Returns:
+        The connected EventSource object
+    """
+    if event_id:
+        event_id_json = json.dumps(event_id)
+        source = requests_sse.EventSource(
+            sse_stream_url,
+            headers={
+                "Accept": "text/event-stream",
+                "User-Agent": "qlever update-wikidata",
+                "Last-Event-ID": event_id_json,
+            },
+        )
+    else:
+        source = requests_sse.EventSource(
+            sse_stream_url,
+            params={"since": since} if since else {},
+            headers={
+                "Accept": "text/event-stream",
+                "User-Agent": "qlever update-wikidata",
+            },
+        )
+
+    source.connect()
+    return source
+
+
+def get_next_offset_from_endpoint(sparql_endpoint):
+    """Query the endpoint for the next stream offset.
+
+    Args:
+        sparql_endpoint: URL of the SPARQL endpoint
+
+    Returns:
+        int: The offset value from the endpoint
+
+    Raises:
+        Exception: If the query fails or returns no results
+    """
+    sparql_query_offset = (
+        "PREFIX wikibase: <http://wikiba.se/ontology#> "
+        "SELECT (MAX(?offset) AS ?maxOffset) WHERE { "
+        "<http://wikiba.se/ontology#Dump> "
+        "wikibase:updateStreamNextOffset ?offset "
+        "}"
+    )
+    curl_cmd_check_offset = (
+        f"curl -s {sparql_endpoint}"
+        f' -H "Accept: text/csv"'
+        f' -H "Content-type: application/sparql-query"'
+        f' --data "{sparql_query_offset}"'
+    )
+    result = run_command(
+        f"{curl_cmd_check_offset} | sed 1d",
+        return_output=True,
+    ).strip()
+    if not result:
+        raise Exception("Query returned no results")
+    return int(result.strip('"'))
+
+
+class UpdateWikidataCommand(QleverCommand):
+    """
+    Class for executing the `update` command.
+    """
+
+    def __init__(self):
+        # SPARQL query to get the date until which the updates of the
+        # SPARQL endpoint are complete.
+        self.sparql_updates_complete_until_query = (
+            "PREFIX wikibase: <http://wikiba.se/ontology#> "
+            "PREFIX schema: <http://schema.org/> "
+            "SELECT * WHERE { "
+            "{ SELECT (MIN(?date_modified) AS ?updates_complete_until) { "
+            "wikibase:Dump schema:dateModified ?date_modified } } "
+            "UNION { wikibase:Dump wikibase:updatesCompleteUntil ?updates_complete_until } "
+            "} ORDER BY DESC(?updates_complete_until) LIMIT 1"
+        )
+        # URL of the Wikidata SSE stream.
+        self.wikidata_update_stream_url = (
+            "https://stream.wikimedia.org/v2/"
+            "stream/rdf-streaming-updater.mutation.v2"
+        )
+        # Remember if Ctrl+C was pressed, so we can handle it gracefully.
+        self.ctrl_c_pressed = Event()
+        # Set to `True` when finished.
+        self.finished = False
+
+    def description(self) -> str:
+        return "Update from given SSE stream"
+
+    def should_have_qleverfile(self) -> bool:
+        return True
+
+    def relevant_qleverfile_arguments(self) -> dict[str, list[str]]:
+        return {"server": ["host_name", "port", "access_token"]}
+
+    def additional_arguments(self, subparser) -> None:
+        subparser.add_argument(
+            "sse_stream_url",
+            nargs="?",
+            type=str,
+            default=self.wikidata_update_stream_url,
+            help="URL of the SSE stream to update from",
+        )
+        subparser.add_argument(
+            "--batch-size",
+            type=int,
+            default=100000,
+            help="Group this many messages together into one update "
+            "(default: one update for each message); NOTE: this simply "
+            "concatenates the `rdf_added_data` and `rdf_deleted_data` fields, "
+            "which is not 100%% correct; as soon as chaining is supported, "
+            "this will be fixed",
+        )
+        subparser.add_argument(
+            "--lag-seconds",
+            type=int,
+            default=1,
+            help="When a message is encountered that is within this many "
+            "seconds of the current time, finish the current batch "
+            "(and show a warning that this happened)",
+        )
+        subparser.add_argument(
+            "--since",
+            type=str,
+            help="Consume stream messages since this date "
+            "(default: determine automatically from the SPARQL endpoint)",
+        )
+        subparser.add_argument(
+            "--until",
+            type=str,
+            help="Stop consuming stream messages when reaching this date "
+            "(default: continue indefinitely)",
+        )
+        subparser.add_argument(
+            "--offset",
+            type=int,
+            help="Consume stream messages starting from this offset "
+            "(default: not set)",
+        )
+        subparser.add_argument(
+            "--topic",
+            type=str,
+            default="eqiad.rdf-streaming-updater.mutation",
+            help="The topic to consume from the SSE stream (default: "
+            "eqiad.rdf-streaming-updater.mutation)",
+        )
+        subparser.add_argument(
+            "--partition",
+            type=int,
+            default=0,
+            help="The partition to consume from the SSE stream (default: 0)",
+        )
+        subparser.add_argument(
+            "--wait-between-batches",
+            type=int,
+            default=5,
+            help="Wait this many seconds between batches that were "
+            "finished due to a message that is within `lag_seconds` of "
+            "the current time (default: 5 seconds)",
+        )
+        subparser.add_argument(
+            "--num-messages",
+            type=int,
+            help="Process exactly this many messages and then exit "
+            "(default: no bound on the number of messages)",
+        )
+        subparser.add_argument(
+            "--verbose",
+            choices=["no", "yes"],
+            default="yes",
+            help='Verbose logging, "yes" or "no" (default: "yes")',
+        )
+        subparser.add_argument(
+            "--use-cached-sparql-queries",
+            action="store_true",
+            help="Use cached SPARQL query files if they exist with matching "
+            "offset and target batch size (default: off)",
+        )
+        subparser.add_argument(
+            "--check-offset-before-each-batch",
+            choices=["yes", "no"],
+            default="yes",
+            help="Before each batch, verify that the stream offset matches the "
+            "offset from the endpoint (default: yes)",
+        )
+        subparser.add_argument(
+            "--rewind-to-earlier-offset",
+            choices=["yes", "no"],
+            default="yes",
+            help="When the stream offset is later than the offset from the "
+            "endpoint (e.g., after a server restart), rewind to the endpoint "
+            "offset and reprocess messages (default: yes)",
+        )
+        subparser.add_argument(
+            "--num-retries",
+            type=int,
+            default=10,
+            help="Number of retries for offset verification queries when they fail "
+            "(default: 10)",
+        )
+        subparser.add_argument(
+            "--keep-update-requests",
+            choices=["none", "all", "last", "last-three"],
+            default="last",
+            help="Which update request files (update.*.{sparql,meta,result}) to keep: "
+            "none (delete all), all (keep all), last (keep only the most recent), "
+            "last-three (keep the three most recent) (default: last)",
+        )
+
+    def retry_with_backoff(self, operation, operation_name, max_retries):
+        """
+        Retry an operation with exponential backoff, see backoff intervals below
+        (in seconds). Returns the result of the operation if successful, or raises
+        the last exception.
+        """
+        backoff_intervals = [5, 10, 30, 60, 300, 900, 1800, 3600]
+
+        for attempt in range(max_retries):
+            try:
+                return operation()
+            except Exception as e:
+                if self.ctrl_c_pressed.is_set():
+                    raise KeyboardInterrupt()
+                if attempt < max_retries - 1:
+                    # Use the appropriate backoff interval (once we get to the end
+                    # of the list, keep using the last interval).
+                    retry_delay = (
+                        backoff_intervals[attempt]
+                        if attempt < len(backoff_intervals)
+                        else backoff_intervals[-1]
+                    )
+                    # Show the delay as seconds, minutes, or hours.
+                    if retry_delay >= 3600:
+                        delay_str = f"{retry_delay // 3600}h"
+                    elif retry_delay >= 60:
+                        delay_str = f"{retry_delay // 60}min"
+                    else:
+                        delay_str = f"{retry_delay}s"
+                    log.warn(
+                        f"{operation_name} failed (attempt {attempt + 1}/{max_retries}): {e}. "
+                        f"Retrying in {delay_str} ..."
+                    )
+                    # Returns true if the wait ended because of the flag being set.
+                    if self.ctrl_c_pressed.wait(timeout=retry_delay):
+                        raise KeyboardInterrupt()
+                else:
+                    # If this was the last attempt, re-raise the exception.
+                    raise
+
+    # Handle Ctrl+C gracefully by finishing the current batch and then exiting.
+    def handle_ctrl_c(self, signal_received, frame):
+        if self.ctrl_c_pressed.is_set():
+            pass
+            # log.warn("\rCtrl+C pressed again, watch your blood pressure")
+        else:
+            self.ctrl_c_pressed.set()
+
+    def determine_batch_size_for_cached_update(self, offset: int, batch_size: int) -> int | None:
+        options = list(Path.cwd().glob(f"update.{offset}.*.sparql"))
+        if len(options) == 0:
+            log.warn(f"Found no cached SPARQL update. Continuing with update stream.")
+            return None
+        elif len(options) > 1:
+            log.warn(f"Found {len(options)} candidates for cached SPARQL update. Using {options[0].name}.")
+        return int(re.search(r"update\.\d+\.(\d+)\.sparql", options[0].name).group(1))
+
+    def determine_next_cached_update(self, first_offset_in_batch: int, batch_size: int) -> tuple[str, int] | None:
+        batch_size = self.determine_batch_size_for_cached_update(first_offset_in_batch, batch_size)
+        if batch_size is None:
+            return None
+        cached_file_name = (
+            f"update.{first_offset_in_batch}.{batch_size}.sparql"
+        )
+        cached_meta_file_name = (
+            f"update.{first_offset_in_batch}.{batch_size}.meta"
+        )
+
+        # Try to read metadata file for date range
+        cached_date_range = None
+        if os.path.exists(cached_meta_file_name):
+            try:
+                with open(cached_meta_file_name, "r") as f:
+                    cached_date_range = f.read().strip()
+            except Exception:
+                pass
+
+        log_msg = f"Using cached SPARQL query file: {cached_file_name}"
+        if cached_date_range:
+            log_msg += f" [date range: {cached_date_range}]"
+        log.debug(colored(log_msg, "cyan"))
+
+        return cached_file_name, batch_size
+
+    def execute(self, args) -> bool:
+        # cURL command to get the date until which the updates of the
+        # SPARQL endpoint are complete.
+        sparql_endpoint = f"http://{args.host_name}:{args.port}"
+        curl_cmd_updates_complete_until = (
+            f"curl -s {sparql_endpoint}"
+            f' -H "Accept: text/csv"'
+            f' -H "Content-type: application/sparql-query"'
+            f' --data "{self.sparql_updates_complete_until_query}"'
+        )
+
+        # Construct the command and show it.
+        cmd_description = []
+        if args.since:
+            cmd_description.append(f"SINCE={args.since}")
+        else:
+            cmd_description.append(
+                f"SINCE=$({curl_cmd_updates_complete_until} | sed 1d)"
+            )
+        if args.until:
+            cmd_description.append(f"UNTIL={args.until}")
+        cmd_description.append(
+            f"Process SSE stream from {args.sse_stream_url} "
+            f"in batches of up to {args.batch_size:,} messages "
+        )
+        self.show("\n".join(cmd_description), only_show=args.show)
+        if args.show:
+            return True
+
+        # Compute the `since` date if not given.
+        if args.since:
+            since = args.since
+        else:
+            try:
+                since = run_command(
+                    f"{curl_cmd_updates_complete_until} | sed 1d",
+                    return_output=True,
+                ).strip()
+            except Exception as e:
+                log.error(
+                    f"Error running `{curl_cmd_updates_complete_until}`: {e}"
+                )
+                return False
+
+        # Special handling of Ctrl+C, see `handle_ctrl_c` above.
+        signal.signal(signal.SIGINT, self.handle_ctrl_c)
+        log.warn("Press Ctrl+C to finish and exit gracefully")
+        log.info("")
+
+        # If no `--offset` is provided, try to get the offset from
+        # the endpoint.
+        if args.offset is None:
+            try:
+                args.offset = get_next_offset_from_endpoint(sparql_endpoint)
+                log.info(f"Resuming from offset from endpoint: {args.offset}")
+            except Exception as e:
+                log.debug(
+                    f"Could not retrieve offset from endpoint: {e}. "
+                    f"Will determine offset from date instead."
+                )
+
+        # If the offset was neither provided via `--offset` nor could
+        # be retrieved from the endpoint, determine it by reading a
+        # single message from the SSE stream at the `since` date.
+        if args.offset is None:
+            try:
+                source = self.retry_with_backoff(
+                    lambda: connect_to_sse_stream(
+                        args.sse_stream_url, since=since
+                    ),
+                    "SSE stream connection",
+                    args.num_retries,
+                )
+                offset = None
+                for event in source:
+                    if event.type == "message" and event.data:
+                        event_data = json.loads(event.data)
+                        event_topic = event_data.get("meta").get("topic")
+                        if event_topic == args.topic:
+                            offset = event_data.get("meta").get("offset")
+                            log.debug(
+                                f"Determined offset from date: {since} -> {offset}"
+                            )
+                            break
+                source.close()
+                if offset is None:
+                    raise Exception(
+                        f"No event with topic {args.topic} found in stream"
+                    )
+                args.offset = offset
+            except KeyboardInterrupt:
+                log.warn(
+                    "\rCtrl+C pressed while determine current state, exiting"
+                )
+                return True
+            except Exception as e:
+                log.error(f"Error determining offset from stream: {e}")
+                return False
+
+        # Initialize all the statistics variables.
+        batch_count = 0
+        total_num_messages = 0
+        total_update_time = 0
+        start_time = time.perf_counter()
+        wait_before_next_batch = False
+        event_id_for_next_batch = (
+            [
+                {
+                    "topic": args.topic,
+                    "partition": args.partition,
+                    "offset": args.offset,
+                }
+            ]
+            if args.offset is not None
+            else None
+        )
+
+        # Track whether this is the first batch (to skip offset check)
+        first_batch = True
+
+        # Main event loop: Either resume from `event_id_for_next_batch` (if set),
+        # or start a new connection to `args.sse_stream_url` (with URL
+        # parameter `?since=`).
+        while True:
+            # Optionally wait before processing the next batch (make sure that
+            # the wait is interruptible by Ctrl+C).
+            if wait_before_next_batch:
+                log.info(
+                    f"Waiting {args.wait_between_batches} "
+                    f"second{'s' if args.wait_between_batches > 1 else ''} "
+                    f"before processing the next batch"
+                )
+                log.info("")
+                wait_before_next_batch = False
+                self.ctrl_c_pressed.wait(args.wait_between_batches)
+            if self.ctrl_c_pressed.is_set():
+                log.warn(
+                    "\rCtrl+C pressed while waiting in between batches, "
+                    "exiting"
+                )
+                break
+
+            # Start stream from either `event_id_for_next_batch` or `since`.
+            # We'll extract the offset for first_offset_in_batch later.
+            if event_id_for_next_batch:
+                event_id_json = json.dumps(event_id_for_next_batch)
+                if args.verbose == "yes":
+                    log.info(
+                        colored(
+                            f"Consuming stream from event ID: {event_id_json}",
+                            attrs=["dark"],
+                        )
+                    )
+            else:
+                if args.verbose == "yes":
+                    log.info(
+                        colored(
+                            f"Consuming stream from date: {since}",
+                            attrs=["dark"],
+                        )
+                    )
+
+            # Connect to the SSE stream with retry logic
+            try:
+                source = self.retry_with_backoff(
+                    lambda: connect_to_sse_stream(
+                        args.sse_stream_url,
+                        since=since if not event_id_for_next_batch else None,
+                        event_id=event_id_for_next_batch,
+                    ),
+                    "SSE stream connection for batch processing",
+                    args.num_retries,
+                )
+            except KeyboardInterrupt:
+                log.warn(
+                    "\rCtrl+C pressed while while connecting to stream, "
+                    "exiting"
+                )
+                break
+            except Exception as e:
+                log.error(
+                    f"Failed to connect to SSE stream after "
+                    f"{args.num_retries} retry attempts, last error: {e}"
+                )
+                break
+
+            # Next comes the inner loop, which processes exactly one "batch" of
+            # messages. The batch is completed (simply using `break`) when either
+            # `args.batch_size` messages have been processed, or when one of a
+            # variety of conditions occur (Ctrl+C pressed, message within
+            # `args.lag_seconds` of current time, delete operation followed by
+            # insert of triple with that entity as subject).
+
+            # Initialize all the batch variables.
+            current_batch_size = 0
+            # Extract the offset from the event ID to use as the starting offset
+            # for this batch. This is set before processing any messages.
+            if event_id_for_next_batch:
+                first_offset_in_batch = event_id_for_next_batch[0]["offset"]
+                event_id_for_next_batch = None
+            else:
+                # This should not happen since we now always determine the offset
+                # before starting, but keep as fallback
+                first_offset_in_batch = None
+
+            # Check that the stream offset matches the offset from the
+            # endpoint, unless disabled or this is the first batch.
+            if (
+                args.check_offset_before_each_batch == "yes"
+                and not first_batch
+                and first_offset_in_batch is not None
+            ):
+                # Verify offset with retry logic
+                try:
+                    endpoint_offset = self.retry_with_backoff(
+                        lambda: get_next_offset_from_endpoint(sparql_endpoint),
+                        "Offset verification",
+                        args.num_retries,
+                    )
+                except KeyboardInterrupt:
+                    log.warn(
+                        "\rCtrl+C pressed while while verifying state, exiting"
+                    )
+                    break
+                except Exception as e:
+                    log.error(
+                        f"Failed to retrieve offset from endpoint "
+                        f"after {args.num_retries} retries: {e}. "
+                        f"This might be the first update, or the offset triple is missing."
+                    )
+                    return False
+
+                if endpoint_offset < first_offset_in_batch:
+                    # Stream offset is LATER than endpoint offset
+                    if args.rewind_to_earlier_offset == "yes":
+                        log.info(
+                            colored(
+                                f"Stream offset {first_offset_in_batch} is later "
+                                f"than offset {endpoint_offset} from endpoint; "
+                                f"this can happen after a server restart; "
+                                f"rewinding to offset {endpoint_offset} from endpoint",
+                                "cyan",
+                            )
+                        )
+                        log.info("")
+                        # Reconnect from the endpoint offset
+                        event_id_for_next_batch = [
+                            {
+                                "topic": args.topic,
+                                "partition": args.partition,
+                                "offset": endpoint_offset,
+                            }
+                        ]
+                        continue  # Skip this batch and reconnect
+                    else:
+                        log.error(
+                            f"Offset mismatch: stream offset {first_offset_in_batch} "
+                            f"is later than offset {endpoint_offset} from endpoint; "
+                            f"rewind disabled by --rewind-to-earlier-offset=no"
+                        )
+                        return False
+                elif endpoint_offset > first_offset_in_batch:
+                    # Stream offset is EARLIER than endpoint offset - this is bad
+                    log.error(
+                        f"Offset mismatch: stream offset {first_offset_in_batch} "
+                        f"is earlier than offset {endpoint_offset} from endpoint; "
+                        f"this indicates that updates may have been applied "
+                        f"out of order or some updates are missing"
+                    )
+                    return False
+
+            date_list = []
+            delete_entity_ids = set()
+            delta_to_now_list = []
+            batch_assembly_start_time = time.perf_counter()
+            insert_triples = set()
+            delete_triples = set()
+
+            # Check if we can use a cached SPARQL query file
+            use_cached_file = False
+            cached_file_name = None
+            if (
+                args.use_cached_sparql_queries
+                and first_offset_in_batch is not None
+            ):
+                cached_update = self.determine_next_cached_update(first_offset_in_batch,
+                                                                     args.batch_size)
+                if cached_update is not None:
+                    cached_file_name, current_batch_size = cached_update
+                    use_cached_file = True
+
+            # Process one event at a time (unless using cached file).
+            if not use_cached_file:
+                with tqdm_logging_redirect(
+                    loggers=[logging.getLogger("qlever")],
+                    desc="Batch",
+                    total=args.batch_size,
+                    leave=False,
+                    bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt}{postfix}",
+                ) as pbar:
+                    for event in source:
+                        # Skip events that are not of type `message` (should not
+                        # happen), have no field `data` (should not happen either), or
+                        # where the topic is not in `args.topics` (one topic by itself
+                        # should provide all relevant updates).
+                        if event.type != "message" or not event.data:
+                            continue
+                        event_data = json.loads(event.data)
+                        topic = event_data.get("meta").get("topic")
+                        if topic != args.topic:
+                            continue
+
+                        try:
+                            # Extract offset, topic, and partition from the message metadata
+                            # to construct a precise event ID for resuming.
+                            meta = event_data.get("meta")
+                            offset = meta.get("offset")
+                            topic = meta.get("topic")
+                            partition = meta.get("partition")
+
+                            # Get the date (rounded *down* to seconds).
+                            date = meta.get("dt")
+                            date = re.sub(r"\.\d*Z$", "Z", date)
+
+                            # Get the other relevant fields from the message.
+                            entity_id = event_data.get("entity_id")
+                            operation = event_data.get("operation")
+                            rdf_added_data = event_data.get("rdf_added_data")
+                            rdf_deleted_data = event_data.get(
+                                "rdf_deleted_data"
+                            )
+                            rdf_linked_shared_data = event_data.get(
+                                "rdf_linked_shared_data"
+                            )
+                            # rdf_unlinked_shared_data = event_data.get(
+                            #     "rdf_unlinked_shared_data"
+                            # )
+
+                            # Check batch completion conditions BEFORE processing the
+                            # data of this message. If any of the conditions is met,
+                            # we finish the batch and resume from the LAST PROCESSED
+                            # message (not the current one that triggered the break).
+                            #
+                            # NOTE: We will update event_id_for_next_batch AFTER
+                            # successfully processing each message (see below), so that
+                            # when we break, it contains the last processed event ID.
+                            since = None
+
+                            # Condition 1: Delete followed by insert for same entity.
+                            operation_adds_data = (
+                                rdf_added_data is not None
+                                or rdf_linked_shared_data is not None
+                            )
+                            if (
+                                operation_adds_data
+                                and entity_id in delete_entity_ids
+                            ):
+                                if args.verbose == "yes":
+                                    log.warn(
+                                        f"Encountered operation that adds data for "
+                                        f"an entity ID ({entity_id}) that was deleted "
+                                        f"earlier in this batch; finishing batch and "
+                                        f"resuming from this message in the next batch"
+                                    )
+                                break
+
+                            # Condition 2: Batch size or limit on number of
+                            # messages reached.
+                            if current_batch_size >= args.batch_size or (
+                                args.num_messages is not None
+                                and total_num_messages >= args.num_messages
+                            ):
+                                break
+
+                            # Condition 3: Message close to current time.
+                            date_obj = datetime.strptime(
+                                date, "%Y-%m-%dT%H:%M:%SZ"
+                            ).replace(tzinfo=timezone.utc)
+                            date_as_epoch_s = date_obj.timestamp()
+
+                            now_as_epoch_s = time.time()
+                            delta_to_now_s = now_as_epoch_s - date_as_epoch_s
+                            if (
+                                delta_to_now_s < args.lag_seconds
+                                and current_batch_size > 0
+                            ):
+                                if args.verbose == "yes":
+                                    log.warn(
+                                        f"Encountered message with date {date}, which is within "
+                                        f"{args.lag_seconds} "
+                                        f"second{'s' if args.lag_seconds > 1 else ''} "
+                                        f"of the current time, finishing the current batch"
+                                    )
+                                wait_before_next_batch = (
+                                    args.wait_between_batches is not None
+                                    and args.wait_between_batches > 0
+                                )
+                                break
+
+                            # Condition 4: Reached `--until` date and at least one
+                            # message was processed.
+                            if (
+                                args.until
+                                and date >= args.until
+                                and current_batch_size > 0
+                            ):
+                                log.warn(
+                                    f"Reached --until date {args.until} "
+                                    f"(message date: {date}), that's it folks"
+                                )
+                                self.finished = True
+                                break
+
+                            # Delete operations are postponed until the end of the
+                            # batch, so remember the entity ID here.
+                            if operation == "delete":
+                                delete_entity_ids.add(entity_id)
+
+                            # Replace each occurrence of `\\` by `\u005C\u005C`
+                            # (which is twice the Unicode for backslash).
+                            #
+                            # NOTE: Strictly speaking, it would be enough to do
+                            # this for two backslashes followed by a `u`, but
+                            # doing it for all double backslashes does not
+                            # harm. When parsing a SPARQL query, then according
+                            # to the standar, first all occurrences of `\uxxxx`
+                            # (where `xxxx` are four hex digits) are replaced
+                            # by the corresponding Unicode character. That is a
+                            # problem when `\\uxxxx` occurs in a literal,
+                            # because then it would be replaced by `\` followed
+                            # by the Unicode character, which is invalied
+                            # SPARQL. The subsitution avoids that problem.
+                            def node_to_sparql(node: rdflib.term.Node) -> str:
+                                return node.n3().replace(
+                                    "\\\\", "\\u005C\\u005C"
+                                )
+
+                            # Process the to-be-deleted triples.
+                            #
+                            # NOTE: The triples from `rdf_unlinked_shared_data`
+                            # must not be deleted, because they are only
+                            # unlinked from the current entity, but may still
+                            # be linked from other entities. If they are not
+                            # linked from any other entity, they will be
+                            # orphaned, but we don't mind that.
+                            for rdf_to_be_deleted in (rdf_deleted_data,):
+                                if rdf_to_be_deleted is not None:
+                                    try:
+                                        rdf_to_be_deleted_data = (
+                                            rdf_to_be_deleted.get("data")
+                                        )
+                                        graph = Graph()
+                                        log.debug(
+                                            f"RDF to_be_deleted data: {rdf_to_be_deleted_data}"
+                                        )
+                                        graph.parse(
+                                            data=rdf_to_be_deleted_data,
+                                            format="turtle",
+                                        )
+                                        for s, p, o in graph:
+                                            triple = f"{s.n3()} {p.n3()} {node_to_sparql(o)}"
+                                            # NOTE: In case there was a previous `insert` of that
+                                            # triple, it is safe to remove that `insert`, but not
+                                            # the `delete` (in case the triple is contained in the
+                                            # original data).
+                                            if triple in insert_triples:
+                                                insert_triples.remove(triple)
+                                            delete_triples.add(triple)
+                                    except Exception as e:
+                                        log.error(
+                                            f"Error reading `rdf_to_be_deleted_data`: {e}"
+                                        )
+                                        return False
+
+                            # Process the to-be-added triples.
+                            for rdf_to_be_added in (
+                                rdf_added_data,
+                                rdf_linked_shared_data,
+                            ):
+                                if rdf_to_be_added is not None:
+                                    try:
+                                        rdf_to_be_added_data = (
+                                            rdf_to_be_added.get("data")
+                                        )
+                                        graph = Graph()
+                                        log.debug(
+                                            "RDF to be added data: {rdf_to_be_added_data}"
+                                        )
+                                        graph.parse(
+                                            data=rdf_to_be_added_data,
+                                            format="turtle",
+                                        )
+                                        for s, p, o in graph:
+                                            triple = f"{s.n3()} {p.n3()} {node_to_sparql(o)}"
+                                            # NOTE: In case there was a previous `delete` of that
+                                            # triple, it is safe to remove that `delete`, but not
+                                            # the `insert` (in case the triple is not contained in
+                                            # the original data).
+                                            if triple in delete_triples:
+                                                delete_triples.remove(triple)
+                                            insert_triples.add(triple)
+                                    except Exception as e:
+                                        log.error(
+                                            f"Error reading `rdf_to_be_added_data`: {e}"
+                                        )
+                                        return False
+
+                        except Exception as e:
+                            log.error(f"Error reading data from message: {e}")
+                            log.info(event)
+                            continue
+
+                        # Message was successfully processed, update batch tracking
+                        current_batch_size += 1
+                        total_num_messages += 1
+                        pbar_update_frequency = 100
+                        if (current_batch_size % pbar_update_frequency) == 0:
+                            pbar.set_postfix(
+                                {
+                                    "Time": date_obj.strftime(
+                                        "%Y-%m-%d %H:%M:%S"
+                                    )
+                                }
+                            )
+                            pbar.update(pbar_update_frequency)
+                        log.debug(
+                            f"DATE: {date_as_epoch_s:.0f} [{date}], "
+                            f"NOW: {now_as_epoch_s:.0f}, "
+                            f"DELTA: {now_as_epoch_s - date_as_epoch_s:.0f}"
+                        )
+                        date_list.append(date)
+                        delta_to_now_list.append(delta_to_now_s)
+
+                        # Update the event ID for the next batch. We increment the
+                        # offset by 1 so that the next batch starts with the next
+                        # message (not re-processing the current one).
+                        event_id_for_next_batch = [
+                            {
+                                "topic": topic,
+                                "partition": partition,
+                                "offset": offset + 1,
+                            }
+                        ]
+
+                        # Ctrl+C finishes the current batch (this should come at the
+                        # end of the inner event loop so that always at least one
+                        # message is processed).
+                        if self.ctrl_c_pressed.is_set():
+                            log.warn(
+                                "\rCtrl+C pressed while processing a batch, "
+                                "finishing it and exiting"
+                            )
+                            break
+            else:
+                # Using cached file - set batch size and calculate next offset
+                total_num_messages += current_batch_size
+                event_id_for_next_batch = [
+                    {
+                        "topic": args.topic,
+                        "partition": args.partition,
+                        "offset": first_offset_in_batch + current_batch_size,
+                    }
+                ]
+
+            # Process the current batch of messages (or skip if using cached).
+            batch_count += 1
+            if not use_cached_file:
+                batch_assembly_end_time = time.perf_counter()
+                batch_assembly_time_ms = int(
+                    1000
+                    * (batch_assembly_end_time - batch_assembly_start_time)
+                )
+                date_list.sort()
+                delta_to_now_list.sort()
+                min_delta_to_now_s = delta_to_now_list[0]
+                if min_delta_to_now_s < 10:
+                    min_delta_to_now_s = f"{min_delta_to_now_s:.1f}"
+                else:
+                    min_delta_to_now_s = f"{int(min_delta_to_now_s):,}"
+                log.info(
+                    f"Assembled batch #{batch_count}, "
+                    f"#messages: {current_batch_size:2,}, "
+                    f"date range: {date_list[0]} - {date_list[-1]}  "
+                    f"[assembly time: {batch_assembly_time_ms:3,}ms, "
+                    f"min delta to NOW: {min_delta_to_now_s}s]"
+                )
+
+                # Add a triples `wikibase:Dump wikibase:updatesCompleteUntil
+                # DATE` and `wikibase:Dump wikibase:updateStreamNextOffset
+                # OFFSET`.
+                insert_triples.add(
+                    f"<http://wikiba.se/ontology#Dump> "
+                    f"<http://wikiba.se/ontology#updatesCompleteUntil> "
+                    f'"{date_list[-1]}"'
+                    f"^^<http://www.w3.org/2001/XMLSchema#dateTime>"
+                )
+                insert_triples.add(
+                    "<http://wikiba.se/ontology#Dump> "
+                    "<http://wikiba.se/ontology#updateStreamNextOffset> "
+                    f'"{event_id_for_next_batch[0]["offset"]}"'
+                )
+
+                # Construct UPDATE operation.
+                delete_block = " . \n  ".join(delete_triples)
+                insert_block = " . \n  ".join(insert_triples)
+                delete_insert_operation = (
+                    f"DELETE {{\n  {delete_block} \n}} "
+                    f"INSERT {{\n  {insert_block} \n}} "
+                    f"WHERE {{ }}\n"
+                )
+
+                # If `delete_entity_ids` is non-empty, add a `DELETE WHERE`
+                # operation that deletes all triples that are associated with only
+                # those entities.
+                delete_entity_ids_as_values = " ".join(
+                    [f"wd:{qid}" for qid in delete_entity_ids]
+                )
+                if len(delete_entity_ids) > 0:
+                    delete_where_operation = (
+                        f"PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n"
+                        f"PREFIX wikibase: <http://wikiba.se/ontology#>\n"
+                        f"PREFIX wd: <http://www.wikidata.org/entity/>\n"
+                        f"DELETE {{\n"
+                        f"  ?s ?p ?o .\n"
+                        f"}} WHERE {{\n"
+                        f"  {{\n"
+                        f"    VALUES ?s {{ {delete_entity_ids_as_values} }}\n"
+                        f"    ?s ?p ?o .\n"
+                        f"  }} UNION {{\n"
+                        f"    VALUES ?_1 {{ {delete_entity_ids_as_values} }}\n"
+                        f"    ?_1 ?_2 ?s .\n"
+                        f"    ?s ?p ?o .\n"
+                        f"    ?s rdf:type wikibase:Statement .\n"
+                        f"  }}\n"
+                        f"}}\n"
+                    )
+                    delete_insert_operation += ";\n" + delete_where_operation
+
+            # Construct curl command. For batch size 1, send the operation via
+            # `--data-urlencode`, otherwise write to file and send via `--data-binary`.
+            curl_cmd = (
+                f"curl -s -X POST"
+                f' "{sparql_endpoint}?access-token={args.access_token}"'
+                f" -H 'Content-Type: application/sparql-update'"
+            )
+            if use_cached_file:
+                # Use the cached file instead of writing a new one
+                update_arg_file_name = cached_file_name
+            else:
+                # Write the constructed SPARQL update to a file
+                update_arg_file_name = f"update.{first_offset_in_batch}.{current_batch_size}.sparql"
+                with open(update_arg_file_name, "w") as f:
+                    f.write(delete_insert_operation)
+                # Write metadata file with date range
+                meta_file_name = (
+                    f"update.{first_offset_in_batch}.{current_batch_size}.meta"
+                )
+                with open(meta_file_name, "w") as f:
+                    f.write(f"{date_list[0]} - {date_list[-1]}")
+            curl_cmd += f" --data-binary @{update_arg_file_name}"
+            if args.verbose == "yes":
+                log.info(colored(curl_cmd, "blue"))
+
+            # Send the UPDATE request. If it fails, reset to the beginning
+            # of this batch and retry in the next iteration of the outer
+            # loop. If this was a transient error, this makes sure that the
+            # batch is re-assembled and not lost. If the server has
+            # restarted, the offset check at the beginning of the next
+            # iteration will detect the mismatch and rewind.
+            try:
+                result = run_command(curl_cmd, return_output=True)
+            except Exception:
+                if self.ctrl_c_pressed.is_set():
+                    log.warn(
+                        "\r  \nCtrl+C pressed while executing update, exiting"
+                    )
+                    return True
+                else:
+                    log.warn(
+                        "\r  \nUpdate request failed; will reconnect and retry"
+                    )
+                    event_id_for_next_batch = [
+                        {
+                            "topic": args.topic,
+                            "partition": args.partition,
+                            "offset": first_offset_in_batch,
+                        }
+                    ]
+                    continue
+            result_file_name = (
+                f"update.{first_offset_in_batch}.{current_batch_size}.result"
+            )
+            with open(result_file_name, "w") as f:
+                f.write(result)
+
+            # Clean up old update request files according to --keep-update-requests
+            if args.keep_update_requests != "all":
+                # Find all update.*.{sparql,meta,result} files
+                update_files = {}
+                for ext in ["sparql", "meta", "result"]:
+                    for file_path in glob.glob(f"update.*.*.{ext}"):
+                        # Extract offset from filename (update.OFFSET.SIZE.ext)
+                        parts = Path(file_path).stem.split(".")
+                        if len(parts) >= 3:
+                            offset = parts[1]
+                            if offset not in update_files:
+                                update_files[offset] = []
+                            update_files[offset].append(file_path)
+
+                # Sort by offset (newest last)
+                sorted_offsets = sorted(
+                    update_files.keys(), key=lambda x: int(x)
+                )
+
+                # Determine which to keep
+                if args.keep_update_requests == "none":
+                    files_to_keep = []
+                elif args.keep_update_requests == "last":
+                    files_to_keep = (
+                        update_files[sorted_offsets[-1]]
+                        if sorted_offsets
+                        else []
+                    )
+                elif args.keep_update_requests == "last-three":
+                    files_to_keep = []
+                    for offset in sorted_offsets[-3:]:
+                        files_to_keep.extend(update_files[offset])
+
+                # Delete files not in the keep list
+                for offset, files in update_files.items():
+                    for file_path in files:
+                        if file_path not in files_to_keep:
+                            try:
+                                os.remove(file_path)
+                            except Exception:
+                                pass  # Ignore errors during cleanup
+
+            # Results should be a JSON, parse it.
+            try:
+                result = json.loads(result)
+            except Exception as e:
+                log.error(
+                    f"Error parsing JSON result: {e}. "
+                    f"The first 1000 characters are: {result[:1000]}"
+                )
+                return False
+
+            # Check if the result contains a QLever exception.
+            if "exception" in result:
+                error_msg = result["exception"]
+                log.error(f"QLever exception: {error_msg}")
+                log.info("")
+                continue
+
+            # Helper function for getting the value of `stats["time"][...]`
+            # without the "ms" suffix. If the extraction fails, return 0
+
+            # (and optionally log the failure).
+            class FailureMode(Enum):
+                LOG_ERROR = auto()
+                SILENTLY_RETURN_ZERO = auto()
+                THROW_EXCEPTION = auto()
+
+            def get_time_ms(
+                stats, *keys: str, failure_mode=FailureMode.LOG_ERROR
+            ) -> int:
+                try:
+                    value = stats["time"]
+                    for key in keys:
+                        value = value[key]
+                    value = int(value)
+                except Exception:
+                    if failure_mode == FailureMode.THROW_EXCEPTION:
+                        raise
+                    elif failure_mode == FailureMode.LOG_ERROR:
+                        log.error(
+                            f"Error extracting time from JSON statistics, "
+                            f"keys: {keys}"
+                        )
+                    value = 0
+                return value
+
+            # Check for old JSON format (no `operations` or `time` on top level).
+            old_json_message_template = (
+                "Result JSON does not contain `{}` field, you are "
+                "probably using an old version of QLever"
+            )
+            for field in ["operations", "time"]:
+                if field not in result:
+                    raise RuntimeError(old_json_message_template.format(field))
+
+            # Get the per-operation statistics.
+            for i, stats in enumerate(result["operations"]):
+                try:
+                    ins_after = stats["delta-triples"]["after"]["inserted"]
+                    del_after = stats["delta-triples"]["after"]["deleted"]
+                    ops_after = stats["delta-triples"]["after"]["total"]
+                    num_ins = int(
+                        stats["delta-triples"]["operation"]["inserted"]
+                    )
+                    num_del = int(
+                        stats["delta-triples"]["operation"]["deleted"]
+                    )
+                    num_ops = int(stats["delta-triples"]["operation"]["total"])
+                    time_op_total = get_time_ms(stats, "total")
+                    time_us_per_op = (
+                        int(1000 * time_op_total / num_ops)
+                        if num_ops > 0
+                        else 0
+                    )
+                    if args.verbose == "yes":
+                        log.info(
+                            colored(
+                                f"TRIPLES: {num_ops:+10,} -> {ops_after:10,}, "
+                                f"INS: {num_ins:+10,} -> {ins_after:10,}, "
+                                f"DEL: {num_del:+10,} -> {del_after:10,}, "
+                                f"TIME: {time_op_total:7,}ms, "
+                                f"TIME/TRIPLE: {time_us_per_op:6,}µs",
+                                attrs=["bold"],
+                            )
+                        )
+
+                    time_planning = get_time_ms(stats, "planning")
+                    time_compute_ids = get_time_ms(
+                        stats,
+                        "execution",
+                        "computeIds",
+                        "total",
+                    )
+                    time_where = get_time_ms(
+                        stats,
+                        "execution",
+                        "evaluateWhere",
+                    )
+                    time_metadata = get_time_ms(
+                        stats,
+                        "updateMetadata",
+                    )
+                    time_insert = get_time_ms(
+                        stats,
+                        "execution",
+                        "insertTriples",
+                        "total",
+                        failure_mode=FailureMode.SILENTLY_RETURN_ZERO,
+                    )
+                    time_delete = get_time_ms(
+                        stats,
+                        "execution",
+                        "deleteTriples",
+                        "total",
+                        failure_mode=FailureMode.SILENTLY_RETURN_ZERO,
+                    )
+                    time_unaccounted = time_op_total - (
+                        time_planning
+                        + time_compute_ids
+                        + time_where
+                        + time_metadata
+                        + time_delete
+                        + time_insert
+                    )
+                    if args.verbose == "yes":
+                        log.info(
+                            f"METADATA: {100 * time_metadata / time_op_total:2.0f}%, "
+                            f"PLANNING: {100 * time_planning / time_op_total:2.0f}%, "
+                            f"WHERE: {100 * time_where / time_op_total:2.0f}%, "
+                            f"IDS: {100 * time_compute_ids / time_op_total:2.0f}%, "
+                            f"DELETE: {100 * time_delete / time_op_total:2.0f}%, "
+                            f"INSERT: {100 * time_insert / time_op_total:2.0f}%, "
+                            f"UNACCOUNTED: {100 * time_unaccounted / time_op_total:2.0f}%",
+                        )
+
+                except Exception as e:
+                    log.warn(
+                        f"Error extracting statistics: {e}, "
+                        f"curl command was: {curl_cmd}"
+                    )
+                    # Show traceback for debugging.
+                    import traceback
+
+                    traceback.print_exc()
+                    log.info("")
+                    continue
+
+            # Get times for the whole request (not per operation).
+            time_parsing = get_time_ms(
+                result,
+                "parsing",
+            )
+            time_metadata = get_time_ms(
+                result,
+                "metadataUpdateForSnapshot",
+            )
+            time_snapshot = get_time_ms(
+                result,
+                "snapshotCreation",
+            )
+            time_writeback = get_time_ms(
+                result,
+                "diskWriteback",
+            )
+            time_operations = get_time_ms(
+                result,
+                "operations",
+            )
+            time_total = get_time_ms(
+                result,
+                "total",
+            )
+            time_unaccounted = time_total - (
+                time_parsing
+                + time_metadata
+                + time_snapshot
+                + time_writeback
+                + time_operations
+            )
+
+            # Update the totals.
+            total_update_time += time_total / 1000.0
+            total_elapsed_time = time.perf_counter() - start_time
+
+            # Show statistics for the completed batch.
+            if args.verbose == "yes":
+                log.info(
+                    colored(
+                        f"TOTAL UPDATE TIME SO FAR: {total_update_time:4.0f}s, "
+                        f"TOTAL ELAPSED TIME SO FAR: {total_elapsed_time:4.0f}s, "
+                        f"TOTAL TIME FOR THIS UPDATE REQUEST: {time_total:7,}ms, ",
+                        attrs=["bold"],
+                    )
+                )
+                log.info(
+                    f"PARSING: {100 * time_parsing / time_total:2.0f}%, "
+                    f"OPERATIONS: {100 * time_operations / time_total:2.0f}%, "
+                    f"METADATA: {100 * time_metadata / time_total:2.0f}%, "
+                    f"SNAPSHOT: {100 * time_snapshot / time_total:2.0f}%, "
+                    f"WRITEBACK: {100 * time_writeback / time_total:2.0f}%, "
+                    f"UNACCOUNTED: {100 * time_unaccounted / time_total:2.0f}%",
+                )
+                log.info("")
+
+            # Close the source connection (for each batch, we open a new one,
+            # either from `event_id_for_next_batch` or from `since`).
+            source.close()
+
+            # After the first batch is processed, enable offset checking for
+            # subsequent batches.
+            first_batch = False
+
+            # If Ctrl+C was pressed, we reached `--until`, or we processed
+            # exactly `--num-messages`, finish.
+            if (
+                self.ctrl_c_pressed.is_set()
+                or self.finished
+                or (
+                    args.num_messages is not None
+                    and total_num_messages >= args.num_messages
+                )
+            ):
+                break
+
+        # Final message after all batches have been processed.
+        log.info(
+            f"Processed {batch_count} "
+            f"{'batches' if batch_count > 1 else 'batch'} "
+            f"terminating update command"
+        )
+        return True
diff --git a/src/qlever/commands/warmup.py b/src/qlever/commands/warmup.py
index 49150262..12d2c376 100644
--- a/src/qlever/commands/warmup.py
+++ b/src/qlever/commands/warmup.py
@@ -20,7 +20,7 @@ def description(self) -> str:
     def should_have_qleverfile(self) -> bool:
         return True
 
-    def relevant_qleverfile_arguments(self) -> dict[str: list[str]]:
+    def relevant_qleverfile_arguments(self) -> dict[str, list[str]]:
         return {"server": ["port", "warmup_cmd"]}
 
     def additional_arguments(self, subparser) -> None:
diff --git a/src/qlever/config.py b/src/qlever/config.py
index a9b4bdf5..dd6d48a1 100644
--- a/src/qlever/config.py
+++ b/src/qlever/config.py
@@ -220,4 +220,14 @@ def add_qleverfile_option(parser):
                             "arguments on the command line. This is possible, "
                             "but not recommended.")
 
+        # Warn if the old binary names are still being used.
+        if "IndexBuilderMain" in getattr(args, "index_binary", ""):
+            log.warning("The index binary has been renamed from "
+                        "`IndexBuilderMain` to `qlever-index`. Please update "
+                        "your Qleverfile or other configuration.")
+        if "ServerMain" in getattr(args, "server_binary", ""):
+            log.warning("The server binary has been renamed from "
+                        "`ServerMain` to `qlever-server`. Please update "
+                        "your Qleverfile or other configuration.")
+
         return args
diff --git a/src/qlever/containerize.py b/src/qlever/containerize.py
index 8fb12343..11150ba7 100644
--- a/src/qlever/containerize.py
+++ b/src/qlever/containerize.py
@@ -66,7 +66,12 @@ def containerize_command(
 
         # Options for mounting volumes, setting ports, and setting the working
         # dir.
-        volume_options = "".join([f" -v {v1}:{v2}" for v1, v2 in volumes])
+        volume_options = "".join(
+            [
+                f' --mount type=bind,src="{v1}",target={v2}'
+                for v1, v2 in volumes
+            ]
+        )
         port_options = "".join([f" -p {p1}:{p2}" for p1, p2 in ports])
         working_directory_option = (
             f" -w {working_directory}" if working_directory is not None else ""
@@ -97,7 +102,8 @@ def is_running(container_system: str, container_name: str) -> bool:
         # Note: the `{{{{` and `}}}}` result in `{{` and `}}`, respectively.
         containers = (
             run_command(
-                f'{container_system} ps --format="{{{{.Names}}}}"', return_output=True
+                f'{container_system} ps --format="{{{{.Names}}}}"',
+                return_output=True,
             )
             .strip()
             .splitlines()
@@ -105,7 +111,9 @@ def is_running(container_system: str, container_name: str) -> bool:
         return container_name in containers
 
     @staticmethod
-    def stop_and_remove_container(container_system: str, container_name: str) -> bool:
+    def stop_and_remove_container(
+        container_system: str, container_name: str
+    ) -> bool:
         """
         Stop the container with the given name using the given system. Return
         `True` if a container with that name was found and stopped, `False`
diff --git a/src/qlever/qlever_main.py b/src/qlever/qlever_main.py
index 5a16d5fa..c9d76fae 100644
--- a/src/qlever/qlever_main.py
+++ b/src/qlever/qlever_main.py
@@ -40,8 +40,7 @@ def main():
         if not command_successful:
             exit(1)
     except KeyboardInterrupt:
-        log.info("")
-        log.info("Ctrl-C pressed, exiting ...")
+        log.warn("\rCtrl-C pressed, exiting ...")
         log.info("")
         exit(1)
     except Exception as e:
diff --git a/src/qlever/qleverfile.py b/src/qlever/qleverfile.py
index 803b971c..83ed8794 100644
--- a/src/qlever/qleverfile.py
+++ b/src/qlever/qleverfile.py
@@ -21,6 +21,41 @@ class Qleverfile:
     Qleverfile + functions for parsing.
     """
 
+    # Runtime parameters (for `settings` and `start` commands).
+    SERVER_RUNTIME_PARAMETERS = [
+        "cache-max-num-entries",
+        "cache-max-size",
+        "cache-max-size-single-entry",
+        "cache-service-results",
+        "default-query-timeout",
+        "division-by-zero-is-undef",
+        "enable-distributive-union",
+        "enable-prefilter-on-index-scans",
+        "group-by-disable-index-scan-optimizations",
+        "group-by-hash-map-enabled",
+        "lazy-index-scan-max-size-materialization",
+        "lazy-index-scan-num-threads",
+        "lazy-index-scan-queue-size",
+        "lazy-result-max-cache-size",
+        "permutation-writer-num-threads",
+        "query-planning-budget",
+        "request-body-limit",
+        "service-allowed-iri-prefixes",
+        "service-max-redirects",
+        "service-max-value-rows",
+        "sort-estimate-cancellation-factor",
+        "sort-in-memory-threshold",
+        "sparql-results-json-with-time",
+        "spatial-join-prefilter-max-size",
+        "spatial-join-max-num-threads",
+        "strip-columns",
+        "syntax-test-mode",
+        "throw-on-unbound-variables",
+        "treat-default-graph-as-named-graph",
+        "use-binsearch-transitive-path",
+        "websocket-updates-enabled",
+    ]
+
     @staticmethod
     def all_arguments():
         """
@@ -61,7 +96,7 @@ def arg(*args, **kwargs):
             "--text-description",
             type=str,
             default=None,
-            help="A concise description of the additional text data" " if any",
+            help="A concise description of the additional text data if any",
         )
         data_args["format"] = arg(
             "--format",
@@ -108,6 +143,15 @@ def arg(*args, **kwargs):
             default="{}",
             help="The `.settings.json` file for the index",
         )
+        index_args["materialized_views"] = arg(
+            "--materialized-views",
+            type=str,
+            default=None,
+            help="JSON to specify materialized views to be created at the "
+            'end of the index build, of the form `{ "view_name": '
+            '"SPARQL query", ... }`; default: do not create any '
+            "materialized views",
+        )
         index_args["ulimit"] = arg(
             "--ulimit",
             type=int,
@@ -116,10 +160,24 @@ def arg(*args, **kwargs):
             "files (default: 1048576 when the total size of the input files "
             "is larger than 10 GB)",
         )
+        index_args["vocabulary_type"] = arg(
+            "--vocabulary-type",
+            type=str,
+            choices=[
+                "on-disk-compressed",
+                "on-disk-uncompressed",
+                "in-memory-compressed",
+                "in-memory-uncompressed",
+                "on-disk-compressed-geo-split",
+            ],
+            default="on-disk-compressed",
+            help="The type of the vocabulary to use for the index "
+            " (default: `on-disk-compressed`)",
+        )
         index_args["index_binary"] = arg(
             "--index-binary",
             type=str,
-            default="IndexBuilderMain",
+            default="qlever-index",
             help="The binary for building the index (this requires "
             "that you have compiled QLever on your machine)",
         )
@@ -137,6 +195,14 @@ def arg(*args, **kwargs):
             "large enough to contain the end of at least one statement "
             "(default: 10M)",
         )
+        index_args["encode_as_id"] = arg(
+            "--encode-as-id",
+            type=str,
+            help="Space-separated list of IRI prefixes (without angle "
+            "brackets); IRIs that start with one of these prefixes, followed "
+            "by a sequence of digits, do not require a vocabulary entry but "
+            "are directly encoded in the ID (default: none)",
+        )
         index_args["only_pso_and_pos_permutations"] = arg(
             "--only-pso-and-pos-permutations",
             action="store_true",
@@ -145,12 +211,19 @@ def arg(*args, **kwargs):
         )
         index_args["use_patterns"] = arg(
             "--use-patterns",
-            action="store_true",
-            default=True,
-            help="Precompute so-called patterns needed for fast processing"
-            " of queries like SELECT ?p (COUNT(DISTINCT ?s) AS ?c) "
+            choices=["yes", "no"],
+            default="yes",
+            help="Whether to precompute the so-called patterns used for fast "
+            "processing of queries like SELECT ?p (COUNT(DISTINCT ?s) AS ?c) "
             "WHERE { ?s ?p [] ... } GROUP BY ?p",
         )
+        index_args["add_has_word_triples"] = arg(
+            "--add-has-word-triples",
+            action="store_true",
+            default=False,
+            help="Whether to add `ql:has-word` triples for text literals "
+            "(which can then be used for custom text search queries)",
+        )
         index_args["text_index"] = arg(
             "--text-index",
             choices=[
@@ -181,14 +254,13 @@ def arg(*args, **kwargs):
         server_args["server_binary"] = arg(
             "--server-binary",
             type=str,
-            default="ServerMain",
+            default="qlever-server",
             help="The binary for starting the server (this requires "
             "that you have compiled QLever on your machine)",
         )
         server_args["host_name"] = arg(
             "--host-name",
             type=str,
-            default="localhost",
             help="The name of the host on which the server listens for "
             "requests",
         )
@@ -261,10 +333,10 @@ def arg(*args, **kwargs):
         )
         server_args["use_patterns"] = arg(
             "--use-patterns",
-            action="store_true",
-            default=True,
-            help="Use the patterns precomputed during the index build"
-            " (see `qlever index --help` for their utility)",
+            choices=["yes", "no"],
+            default="yes",
+            help="Whether to use the patterns precomputed during the index "
+            "build (see `qlever index --help` for their utility)",
         )
         server_args["use_text_index"] = arg(
             "--use-text-index",
@@ -416,10 +488,15 @@ def read(qleverfile_path):
             server = config["server"]
         if index.get("text_index", "none") != "none":
             server["use_text_index"] = "yes"
+        if index.get("only_pso_and_pos_permutations", "false") == "true":
+            index["use_patterns"] = "no"
+        if index.get("use_patterns", None) == "no":
+            server["use_patterns"] = "no"
 
         # Add other non-trivial default values.
         try:
-            config["server"]["host_name"] = socket.gethostname()
+            if config["server"].get("host_name") is None:
+                config["server"]["host_name"] = socket.gethostname()
         except Exception:
             log.warning(
                 "Could not get the hostname, using `localhost` as default"
diff --git a/src/qlever/util.py b/src/qlever/util.py
index b5c8f35d..60006e40 100644
--- a/src/qlever/util.py
+++ b/src/qlever/util.py
@@ -76,7 +76,7 @@ def run_command(
         else:
             raise Exception(
                 f"Command failed with exit code {result.returncode}, "
-                f" nothing written to stderr"
+                f" nothing written to stderr (stdout: {result.stdout})"
             )
     # Optionally, return what was written to `stdout`.
     if return_output:
@@ -88,6 +88,7 @@ def run_curl_command(
     headers: dict[str, str] = {},
     params: dict[str, str] = {},
     result_file: Optional[str] = None,
+    max_time: int | None = None,
 ) -> str:
     """
     Run `curl` with the given `url`, `headers`, and `params`. If `result_file`
@@ -99,7 +100,7 @@ def run_curl_command(
     default_result_file = "/tmp/qlever.curl.result"
     actual_result_file = result_file if result_file else default_result_file
     curl_cmd = (
-        f'curl -s -o "{actual_result_file}"'
+        f'curl -Ls -o "{actual_result_file}"'
         f' -w "%{{http_code}}\n" {url}'
         + "".join([f' -H "{key}: {value}"' for key, value in headers.items()])
         + "".join(
@@ -109,6 +110,8 @@ def run_curl_command(
             ]
         )
     )
+    if max_time is not None:
+        curl_cmd += f" --max-time {int(max_time)}"
     result = subprocess.run(
         curl_cmd,
         shell=True,
@@ -154,17 +157,28 @@ def is_qlever_server_alive(endpoint_url: str) -> bool:
         return False
 
 
-def get_existing_index_files(basename: str) -> list[str]:
+def get_existing_index_files(basename: str, add_non_essential: bool = False) -> list[str]:
     """
     Helper function that returns a list of all index files for `basename` in
     the current working directory.
     """
+
+    # Essential index files.
     existing_index_files = []
     existing_index_files.extend(Path.cwd().glob(f"{basename}.index.*"))
+    existing_index_files.extend(Path.cwd().glob(f"{basename}.internal.index.*"))
     existing_index_files.extend(Path.cwd().glob(f"{basename}.text.*"))
     existing_index_files.extend(Path.cwd().glob(f"{basename}.vocabulary.*"))
     existing_index_files.extend(Path.cwd().glob(f"{basename}.meta-data.json"))
     existing_index_files.extend(Path.cwd().glob(f"{basename}.prefixes"))
+
+    # Non-essential index files.
+    if add_non_essential:
+        existing_index_files.extend(Path.cwd().glob(f"{basename}.view.*"))
+        existing_index_files.extend(Path.cwd().glob(f"{basename}.settings.json"))
+        existing_index_files.extend(Path.cwd().glob(f"{basename}.index-log.txt"))
+        existing_index_files.extend(Path.cwd().glob(f"{basename}.server-log.txt"))
+
     # Return only the file names, not the full paths.
     return [path.name for path in existing_index_files]
 
@@ -287,8 +301,9 @@ def stop_process_with_regex(cmdline_regex: str) -> list[bool] | None:
             )
             cmdline = " ".join(pinfo["cmdline"])
         except Exception as e:
+            # For some processes (e.g., zombies), getting info may fail.
             log.debug(f"Error getting process info: {e}")
-            return None
+            continue
         if re.search(cmdline_regex, cmdline):
             log.info(
                 f"Found process {pinfo['pid']} from user "
@@ -299,19 +314,46 @@ def stop_process_with_regex(cmdline_regex: str) -> list[bool] | None:
     return stop_process_results
 
 
-def binary_exists(binary: str, cmd_arg: str) -> bool:
+def binary_exists(binary: str, cmd_arg: str, args) -> bool:
     """
-    When a command is run natively, check if the binary exists on the system
+    Check if the binary exists on the user's system. If running inside a
+    container, check if the binary exists inside the container system.
     """
+    from qlever.containerize import Containerize
+
+    is_containerized = args.system in Containerize.supported_systems()
+    cmd = f"{binary} --help"
+    if is_containerized:
+        cmd = Containerize().containerize_command(
+            cmd,
+            args.system,
+            "run --rm",
+            args.image,
+            "qlever.check-binary",
+            volumes=[("$(pwd)", "/index")],
+            working_directory="/index",
+        )
+
     try:
-        run_command(f"{binary} --help")
+        run_command(cmd)
         return True
     except Exception as e:
-        log.error(
-            f'Running "{binary}" failed, '
-            f"set `--{cmd_arg}` to a different binary or "
-            f"set `--system to a container system`"
-        )
+        if is_containerized and (
+            binary == "qlever-index" or binary == "qlever-server"
+        ):
+            log.error(
+                f'Running "{binary}" failed. '
+                f"This might be because you are using a newer version of "
+                f"the `qlever` command-line tool together with an older "
+                f"Docker image; in that case update with "
+                f"`{args.system} pull {args.image}` "
+            )
+        else:
+            log.error(
+                f'Running "{binary}" failed, '
+                f"set `--{cmd_arg}` to a different binary or "
+                f"set `--system to a container system`"
+            )
         log.info("")
         log.info(f"The error message was: {e}")
         return False
diff --git a/src/qoxigraph/commands/benchmark_queries.py b/src/qoxigraph/commands/benchmark_queries.py
new file mode 100644
index 00000000..4f285520
--- /dev/null
+++ b/src/qoxigraph/commands/benchmark_queries.py
@@ -0,0 +1,17 @@
+from __future__ import annotations
+
+from qlever.commands.benchmark_queries import (
+    BenchmarkQueriesCommand as QleverBenchmarkQueriesCommand,
+)
+
+
+class BenchmarkQueriesCommand(QleverBenchmarkQueriesCommand):
+    """
+    Run benchmark queries against the Oxigraph SPARQL endpoint.
+    Overrides the default endpoint to use Oxigraph's /query path.
+    """
+
+    def execute(self, args) -> bool:
+        if not args.sparql_endpoint:
+            args.sparql_endpoint = f"{args.host_name}:{args.port}/query"
+        return super().execute(args)
diff --git a/src/qoxigraph/commands/index.py b/src/qoxigraph/commands/index.py
index 128b9a82..82135914 100644
--- a/src/qoxigraph/commands/index.py
+++ b/src/qoxigraph/commands/index.py
@@ -1,18 +1,48 @@
 from __future__ import annotations
 
-import glob
 import shlex
+import time
 from pathlib import Path
 
+import qlever.util as util
 from qlever.command import QleverCommand
 from qlever.containerize import Containerize
 from qlever.log import log
-from qlever.util import binary_exists, run_command
+
+
+def wrap_cmd_in_container(args, cmd: str, ulimit: int | None = None) -> str:
+    """
+    Wrap an indexing command in a container that is automatically removed
+    after the process exits (`--rm`) Use `use_bash=False` as Oxigraph image
+    doesn't support bash entrypoint.
+    """
+    run_subcommand = "run --rm"
+    if ulimit:
+        run_subcommand += f" --ulimit nofile={ulimit}:{ulimit}"
+    return Containerize().containerize_command(
+        cmd=cmd,
+        container_system=args.system,
+        run_subcommand=run_subcommand,
+        image_name=args.image,
+        container_name=args.index_container,
+        volumes=[("$(pwd)", "/opt")],
+        working_directory="/opt",
+        use_bash=False,
+    )
 
 
 class IndexCommand(QleverCommand):
+    """
+    Build an Oxigraph index for an RDF dataset. The indexing workflow is:
+    1. Run `oxigraph load` to import input files into a RocksDB store.
+    2. Optionally run `oxigraph optimize` to compact storage for read-only use.
+
+    For large datasets (>5 GB), the file descriptor ulimit is raised
+    automatically because RocksDB opens many .sst files concurrently.
+    """
+
     def __init__(self):
-        self.script_name = "qoxigraph"
+        pass
 
     def description(self) -> str:
         return "Build the index for a given RDF dataset"
@@ -20,90 +50,122 @@ def description(self) -> str:
     def should_have_qleverfile(self) -> bool:
         return True
 
-    def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
+    def relevant_qleverfile_arguments(self) -> dict[str, list[str]]:
         return {
             "data": ["name", "format"],
-            "index": ["input_files"],
+            "index": [
+                "input_files",
+                "ulimit",
+                "index_binary",
+                "lenient",
+                "extra_args",
+            ],
+            "server": ["read_only"],
             "runtime": ["system", "image", "index_container"],
         }
 
     def additional_arguments(self, subparser):
-        subparser.add_argument(
-            "--index-binary",
-            type=str,
-            default="oxigraph",
-            help=(
-                "The binary for building the index (default: oxigraph) "
-                "(this requires that you have oxigraph-cli installed "
-                "on your machine)"
-            ),
-        )
-
-    @staticmethod
-    def wrap_cmd_in_container(args, cmd: str) -> str:
-        return Containerize().containerize_command(
-            cmd=cmd,
-            container_system=args.system,
-            run_subcommand="run --rm",
-            image_name=args.image,
-            container_name=args.index_container,
-            volumes=[("$(pwd)", "/index")],
-            working_directory="/index",
-            use_bash=False,
-        )
+        pass
 
     def execute(self, args) -> bool:
-        index_cmd = f"load --location . --file {args.input_files}"
-        index_cmd += f" |& tee {args.name}.index-log.txt"
-
+        cmds_to_execute = []
         index_cmd = (
-            f"{args.index_binary} {index_cmd}"
-            if args.system == "native"
-            else self.wrap_cmd_in_container(args, index_cmd)
+            f"load {'--lenient ' if args.lenient == 'yes' else ''}"
+            f"--location {args.name}_index/ --file {args.input_files} "
+            f"{args.extra_args} |& tee {args.name}.index-log.txt"
+        )
+
+        ulimit = args.ulimit
+        # RocksDB opens many .sst files concurrently. For datasets larger
+        # than 5 GB, raise the file descriptor limit so the process does
+        # not hit the default OS soft limit.
+        total_file_size = util.get_total_file_size(
+            shlex.split(args.input_files)
         )
+        if not ulimit and total_file_size > 5e9:
+            ulimit = 500_000
+        if args.system in Containerize.supported_systems():
+            index_cmd = wrap_cmd_in_container(args, index_cmd, ulimit)
+        else:
+            index_cmd = f"{args.index_binary} {index_cmd}"
+            if ulimit:
+                index_cmd = f"ulimit -Sn {ulimit} && {index_cmd}"
+
+        cmds_to_execute.append(index_cmd)
+
+        # Compact the RocksDB storage for read-only serving. This reduces
+        # disk usage and speeds up queries but makes the index immutable.
+        optimize_cmd = None
+        if args.read_only == "yes":
+            optimize_cmd = f"optimize -l {args.name}_index/"
+            if args.system in Containerize.supported_systems():
+                optimize_cmd = wrap_cmd_in_container(args, optimize_cmd)
+            else:
+                optimize_cmd = f"{args.index_binary} {optimize_cmd}"
+            cmds_to_execute.append(optimize_cmd)
 
         # Show the command line.
-        self.show(index_cmd, only_show=args.show)
+        self.show("\n".join(cmds_to_execute), only_show=args.show)
         if args.show:
             return True
 
-        # Check if all of the input files exist.
-        for pattern in shlex.split(args.input_files):
-            if len(glob.glob(pattern)) == 0:
-                log.error(f'No file matching "{pattern}" found')
-                log.info("")
-                log.info(
-                    f"Did you call `{self.script_name} get-data`? If you did, "
-                    "check GET_DATA_CMD and INPUT_FILES in the Qleverfile"
-                )
-                return False
+        if not util.input_files_exist(args.input_files):
+            return False
 
         # When running natively, check if the binary exists and works.
-        if args.system == "native":
-            if not binary_exists(args.index_binary, "index-binary"):
-                return False
-        else:
+        if args.system in Containerize.supported_systems():
             if Containerize().is_running(args.system, args.index_container):
                 log.info(
                     f"{args.system} container {args.index_container} is still up, "
                     "which means that data loading is in progress. Please wait..."
                 )
                 return False
+        else:
+            if not util.binary_exists(args.index_binary, "index-binary", args):
+                return False
 
-        if len([p.name for p in Path.cwd().glob("*.sst")]) != 0:
+        # Abort if a previous index already exists. RocksDB .sst files in
+        # the index directory indicate an existing store.
+        if (
+            len([p.name for p in Path(f"{args.name}_index").glob("*.sst")])
+            != 0
+        ):
             log.error(
-                "Index files (*.sst) found in current directory "
+                f"Index files (*.sst) found in {args.name}_index directory "
                 "which shows presence of a previous index"
             )
             log.info("")
             log.info("Aborting the index operation...")
             return False
 
-        # Run the index command.
+        # Run the index command and record the elapsed time in the log
+        # file. Oxigraph's progress output is unreliable (may not print a
+        # final summary line when loading multiple files), so we measure
+        # the time externally.
+        log_file_name = f"{args.name}.index-log.txt"
         try:
-            run_command(index_cmd, show_output=True, show_stderr=True)
+            start_time = time.time()
+            util.run_command(index_cmd, show_output=True, show_stderr=True)
+            elapsed_s = time.time() - start_time
+            with open(log_file_name, "a") as f:
+                f.write(f"Total elapsed time: {elapsed_s:.0f}s\n")
         except Exception as e:
             log.error(f"Building the index failed: {e}")
             return False
 
+        if optimize_cmd:
+            try:
+                log.info("")
+                log.info("Optimizing read-only database storage:")
+                self.show(optimize_cmd)
+                util.run_command(
+                    optimize_cmd, show_output=True, show_stderr=True
+                )
+            except Exception as e:
+                log.error(f"Optimizing the database storage failed: {e}")
+                log.info(
+                    f"Please run manually: "
+                    f"{args.index_binary} optimize -l {args.name}_index/"
+                )
+
         return True
diff --git a/src/qoxigraph/commands/index_stats.py b/src/qoxigraph/commands/index_stats.py
new file mode 100644
index 00000000..672f4a53
--- /dev/null
+++ b/src/qoxigraph/commands/index_stats.py
@@ -0,0 +1,72 @@
+from __future__ import annotations
+
+import re
+
+import qlever.util as util
+from qlever.commands.index_stats import (
+    IndexStatsCommand as QleverIndexStatsCommand,
+)
+from qlever.commands.index_stats import (
+    get_size_unit,
+    get_size_unit_factor,
+    get_time_unit,
+    get_time_unit_factor,
+)
+from qlever.log import log
+
+
+class IndexStatsCommand(QleverIndexStatsCommand):
+    """
+    Show index build time and disk space usage for an Oxigraph dataset.
+    Time is read from the "Total elapsed time" line appended to the
+    index log by the index command; space is the sum of all .sst files.
+    """
+
+    def execute_time(
+        self, args, log_file_name: str
+    ) -> dict[str, tuple[float | None, str]]:
+        """Parse total index build time from the index log file."""
+        try:
+            # Read the last few lines of the log file (the total time is
+            # always near the end).
+            log_text = util.run_command(
+                f"tail {log_file_name}", return_output=True
+            )
+        except Exception as e:
+            log.error(f"Problem reading index log file {log_file_name}: {e}")
+            return {}
+
+        stats = {}
+        # Pattern: "Total elapsed time: <number>s" (total time, always last)
+        total_pattern = re.compile(r"Total elapsed time: ([\d,]+)s$")
+
+        for line in log_text.splitlines():
+            match = total_pattern.search(line)
+            if not match:
+                continue
+
+            try:
+                value_s = float(match.group(1).replace(",", ""))
+            except (ValueError, TypeError):
+                continue
+
+            time_unit = get_time_unit(args.time_unit, value_s)
+            unit_factor = get_time_unit_factor(time_unit)
+
+            stats["TOTAL time"] = (value_s / unit_factor, time_unit)
+            break
+
+        return stats
+
+    def execute_space(self, args) -> dict[str, tuple[float, str]]:
+        """
+        Return the space used by the index files (*.sst) along with the unit.
+        """
+        index_size = util.get_total_file_size([f"{args.name}_index/*.sst"])
+
+        size_unit = get_size_unit(args.size_unit, index_size)
+        unit_factor = get_size_unit_factor(size_unit)
+
+        index_size /= unit_factor
+
+        return {"TOTAL size": (index_size, size_unit)}
diff --git a/src/qoxigraph/commands/log.py b/src/qoxigraph/commands/log.py
index a90d2228..401d2148 100644
--- a/src/qoxigraph/commands/log.py
+++ b/src/qoxigraph/commands/log.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+from qlever import script_name
 from qlever.commands.log import LogCommand as QleverLogCommand
 from qlever.containerize import Containerize
 from qlever.log import log
@@ -7,10 +8,16 @@
 
 
 class LogCommand(QleverLogCommand):
+    """
+    Show server logs for Oxigraph. For native execution, tails the log
+    file as usual. For containers, uses `docker/podman logs` as it is
+    not possible to redirect oxigraph logs to a log file.
+    """
+
     def __init__(self):
-        self.script_name = "qoxigraph"
+        pass
 
-    def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
+    def relevant_qleverfile_arguments(self) -> dict[str, list[str]]:
         return {
             "data": ["name"],
             "runtime": [
@@ -21,9 +28,12 @@ def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
         }
 
     def execute(self, args) -> bool:
-        if args.system == "native":
+        if args.system not in Containerize.supported_systems():
             return super().execute(args)
 
+        # Handle container logging using docker/podman logs command instead of tail
+        # This is because we don't have <args.name>.server-log.txt for
+        # containerized execution
         log_cmd = f"{args.system} logs "
 
         if not args.from_beginning:
@@ -40,7 +50,7 @@ def execute(self, args) -> bool:
 
         if not Containerize().is_running(args.system, args.server_container):
             log.error(f"No server container {args.server_container} found!\n")
-            log.info(f"Are you sure you called `{self.script_name} start`?")
+            log.info(f"Are you sure you called `{script_name} start`?")
             return False
 
         try:
diff --git a/src/qoxigraph/commands/query.py b/src/qoxigraph/commands/query.py
index 6518905f..bc3fb35c 100644
--- a/src/qoxigraph/commands/query.py
+++ b/src/qoxigraph/commands/query.py
@@ -4,6 +4,12 @@
 
 
 class QueryCommand(QleverQueryCommand):
+    """
+    Send a SPARQL query to the Oxigraph server. Extends the base query
+    command with Oxigraph's /query endpoint and supported result formats.
+    This class is used as the base QueryCommand by all the other new engines.
+    """
+
     def additional_arguments(self, subparser) -> None:
         subparser.add_argument(
             "query",
@@ -47,8 +53,10 @@ def additional_arguments(self, subparser) -> None:
         )
 
     def execute(self, args) -> bool:
+        # Oxigraph's SPARQL endpoint is at /query.
         if not args.sparql_endpoint:
-            args.sparql_endpoint = f"localhost:{args.port}/query"
+            args.sparql_endpoint = f"{args.host_name}:{args.port}/query"
+        # These QLever-specific options are not supported by Oxigraph.
         args.pin_to_cache = None
         args.access_token = None
-        super().execute(args)
+        return super().execute(args)
diff --git a/src/qoxigraph/commands/setup_config.py b/src/qoxigraph/commands/setup_config.py
index b6d9225b..ed286269 100644
--- a/src/qoxigraph/commands/setup_config.py
+++ b/src/qoxigraph/commands/setup_config.py
@@ -3,14 +3,24 @@
 from configparser import RawConfigParser
 from pathlib import Path
 
-from qlever.command import QleverCommand
+from qlever.commands.setup_config import (
+    SetupConfigCommand as QleverSetupConfigCommand,
+)
 from qlever.log import log
 from qlever.qleverfile import Qleverfile
 
 
-class SetupConfigCommand(QleverCommand):
+class SetupConfigCommand(QleverSetupConfigCommand):
+    """
+    Create a Qleverfile for Oxigraph from a dataset template from `src/qlever/Qleverfiles`.
+    Filters the template to keep only the relevant sections and adds Oxigraph-specific
+    defaults (read-only mode, query timeout).
+    This class is used as the base SetupConfigCommand by all the other new engines.
+    """
+
     IMAGE = "ghcr.io/oxigraph/oxigraph"
 
+    # Sections and keys to retain when filtering a Qleverfile template.
     FILTER_CRITERIA = {
         "data": [],
         "index": ["INPUT_FILES"],
@@ -19,91 +29,63 @@ class SetupConfigCommand(QleverCommand):
         "ui": ["UI_CONFIG"],
     }
 
-    def __init__(self):
-        self.qleverfiles_path = (
-            Path(__file__).parent.parent.parent / "qlever" / "Qleverfiles"
-        )
-        self.qleverfile_names = [
-            p.name.split(".")[1]
-            for p in self.qleverfiles_path.glob("Qleverfile.*")
-        ]
-
-    def description(self) -> str:
-        return "Get a pre-configured Qleverfile"
-
-    def should_have_qleverfile(self) -> bool:
-        return False
-
-    def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
-        return {}
-
-    def additional_arguments(self, subparser) -> None:
-        subparser.add_argument(
-            "config_name",
-            type=str,
-            choices=self.qleverfile_names,
-            help="The name of the pre-configured Qleverfile to create",
-        )
+    @staticmethod
+    def construct_engine_specific_params(args) -> dict[str, dict[str, str]]:
+        """Return Oxigraph-specific defaults to inject into the Qleverfile."""
+        return {"server": {"READ_ONLY": "yes", "TIMEOUT": "60s"}}
+
+    @staticmethod
+    def add_engine_specific_option_values(
+        qleverfile_parser: RawConfigParser,
+        engine_specific_params: dict[str, dict[str, str]],
+    ) -> None:
+        """Merge engine-specific parameters into the Qleverfile parser."""
+        for section, option_dict in engine_specific_params.items():
+            if qleverfile_parser.has_section(section):
+                for option, value in option_dict.items():
+                    qleverfile_parser.set(section, option, value)
 
-    def validate_qleverfile_setup(
-        self, args, qleverfile_path: Path
-    ) -> bool | None:
+    def execute(self, args) -> bool:
         # Construct the command line and show it.
+        template_path = (
+            self.qleverfiles_path / f"Qleverfile.{args.config_name}"
+        )
         setup_config_show = (
-            f"Creating Qleverfile for {args.config_name} using "
-            f"Qleverfile.{args.config_name} file in {self.qleverfiles_path}"
+            f"Qleverfile for {args.config_name} will be created using "
+            f"Qleverfile.{args.config_name} file in {template_path}"
         )
         self.show(setup_config_show, only_show=args.show)
         if args.show:
             return True
 
         # If there is already a Qleverfile in the current directory, exit.
-        if qleverfile_path.exists():
-            log.error("`Qleverfile` already exists in current directory")
-            log.info("")
-            log.info(
-                "If you want to create a new Qleverfile using "
-                "`qlever setup-config`, delete the existing Qleverfile "
-                "first"
-            )
+        if self.check_qleverfile_exists():
             return False
-        return None
 
-    def get_filtered_qleverfile_parser(
-        self, config_name: str
-    ) -> RawConfigParser:
-        qleverfile_config_path = (
-            self.qleverfiles_path / f"Qleverfile.{config_name}"
-        )
-        qleverfile_parser = Qleverfile.filter(
-            qleverfile_config_path, self.FILTER_CRITERIA
-        )
-        if qleverfile_parser.has_section("runtime"):
-            qleverfile_parser.set("runtime", "IMAGE", self.IMAGE)
-        return qleverfile_parser
-
-    def execute(self, args) -> bool:
         qleverfile_path = Path("Qleverfile")
-        exit_status = self.validate_qleverfile_setup(args, qleverfile_path)
-        if exit_status is not None:
-            return exit_status
 
-        qleverfile_parser = self.get_filtered_qleverfile_parser(
-            args.config_name
-        )
-        # Copy the Qleverfile to the current directory.
         try:
+            qleverfile_parser = Qleverfile.filter(
+                template_path, self.FILTER_CRITERIA
+            )
+            qleverfile_parser.set("runtime", "IMAGE", self.IMAGE)
+            params = self.construct_engine_specific_params(args)
+            self.add_engine_specific_option_values(qleverfile_parser, params)
+            for section, arg_name in self.override_args:
+                if arg_value := getattr(args, arg_name, None):
+                    qleverfile_parser.set(
+                        section, arg_name.upper(), str(arg_value)
+                    )
             with qleverfile_path.open("w") as f:
                 qleverfile_parser.write(f)
+
+            log.info(
+                f'Created Qleverfile for config "{args.config_name}"'
+                f" in current directory"
+            )
+            return True
         except Exception as e:
             log.error(
                 f'Could not copy "{qleverfile_path}" to current directory: {e}'
             )
             return False
-
-        # If we get here, everything went well.
-        log.info(
-            f'Created Qleverfile for config "{args.config_name}"'
-            f" in current directory"
-        )
-        return True
diff --git a/src/qoxigraph/commands/start.py b/src/qoxigraph/commands/start.py
index 8a038344..7fbfd81f 100644
--- a/src/qoxigraph/commands/start.py
+++ b/src/qoxigraph/commands/start.py
@@ -4,15 +4,65 @@
 import time
 from pathlib import Path
 
+from qlever import script_name
 from qlever.command import QleverCommand
 from qlever.containerize import Containerize
 from qlever.log import log
-from qlever.util import binary_exists, is_server_alive, run_command
+from qlever.util import (
+    binary_exists,
+    is_server_alive,
+    run_command,
+    tail_log_file,
+)
+from qoxigraph.commands.stop import StopCommand
+
+
+def timeout_supported(args, serve_ps: str) -> bool:
+    """Check whether the oxigraph server binary supports query timeouts."""
+    help_cmd = f"{serve_ps} --help"
+    if args.system in Containerize.supported_systems():
+        help_cmd = f"{args.system} run --rm {args.image} {help_cmd}"
+    else:
+        help_cmd = f"{args.server_binary} {help_cmd}"
+    try:
+        help_output = run_command(help_cmd, return_output=True)
+        return "timeout-s" in help_output
+    except Exception as e:
+        log.warning(
+            "Could not determine if query timeouts are supported by this version "
+            f"of Oxigraph! Falling back to no timeouts. Error: {e}",
+        )
+        return False
+
+
+def wrap_cmd_in_container(args, cmd: str) -> str:
+    """Wrap the server start command in a container with restart policy."""
+    run_subcommand = "run --restart=unless-stopped"
+    if not args.run_in_foreground:
+        run_subcommand += " -d"
+    return Containerize().containerize_command(
+        cmd=cmd,
+        container_system=args.system,
+        run_subcommand=run_subcommand,
+        image_name=args.image,
+        container_name=args.server_container,
+        volumes=[("$(pwd)", "/opt")],
+        ports=[(args.port, args.port)],
+        working_directory="/opt",
+        use_bash=False,
+    )
 
 
 class StartCommand(QleverCommand):
+    """
+    Start the Oxigraph SPARQL server for an already-indexed dataset.
+    Supports both native and containerized execution, with an option
+    to run in the foreground. Uses `serve-read-only` or `serve`
+    depending on the read_only setting.
+    """
+
     def __init__(self):
-        self.script_name = "qoxigraph"
+        pass
 
     def description(self) -> str:
         return (
@@ -23,10 +73,17 @@ def description(self) -> str:
     def should_have_qleverfile(self) -> bool:
         return True
 
-    def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
+    def relevant_qleverfile_arguments(self) -> dict[str, list[str]]:
         return {
             "data": ["name"],
-            "server": ["host_name", "port"],
+            "server": [
+                "host_name",
+                "port",
+                "read_only",
+                "server_binary",
+                "timeout",
+                "extra_args",
+            ],
             "runtime": ["system", "image", "server_container"],
         }
 
@@ -40,50 +97,44 @@ def additional_arguments(self, subparser):
                 "(default: run in the background)"
             ),
         )
-        subparser.add_argument(
-            "--server-binary",
-            type=str,
-            default="oxigraph",
-            help=(
-                "The binary for starting the server (default: oxigraph) "
-                "(this requires that you have oxigraph-cli installed "
-                "on your machine)"
-            ),
-        )
-
-    @staticmethod
-    def wrap_cmd_in_container(args, cmd: str) -> str:
-        run_subcommand = "run --restart=unless-stopped"
-        if not args.run_in_foreground:
-            run_subcommand += " -d"
-        return Containerize().containerize_command(
-            cmd=cmd,
-            container_system=args.system,
-            run_subcommand=run_subcommand,
-            image_name=args.image,
-            container_name=args.server_container,
-            volumes=[("$(pwd)", "/index")],
-            ports=[(args.port, args.port)],
-            working_directory="/index",
-            use_bash=False,
-        )
 
     def execute(self, args) -> bool:
+        # Inside a container, bind to 0.0.0.0 so the port mapping is
+        # reachable from the host; natively, bind to the configured host.
         bind = (
-            f"{args.host_name}:{args.port}"
-            if args.system == "native"
-            else f"0.0.0.0:{args.port}"
+            f"0.0.0.0:{args.port}"
+            if args.system in Containerize.supported_systems() 
+            else f"{args.host_name}:{args.port}"
         )
-        start_cmd = f"serve-read-only --location . --bind={bind}"
-
-        if args.system == "native":
-            start_cmd = f"{args.server_binary} {start_cmd}"
-            if not args.run_in_foreground:
-                start_cmd = (
-                    f"nohup {start_cmd} > {args.name}.server-log.txt 2>&1 &"
+        process = "serve-read-only" if args.read_only == "yes" else "serve"
+        timeout_str = ""
+        if timeout_supported(args, process):
+            try:
+                timeout_s = int(args.timeout[:-1])
+            except ValueError as e:
+                log.warning(
+                    f"Invalid timeout value {args.timeout}. Error: {e}"
                 )
+                log.info("Setting timeout to 60s!")
+                timeout_s = 60
+            timeout_str = f"--timeout-s {timeout_s}"
         else:
-            start_cmd = self.wrap_cmd_in_container(args, start_cmd)
+            log.info(
+                f"Ignoring the set timeout value of {args.timeout} as your "
+                "version of Oxigraph doesn't currently support query timeouts!"
+            )
+
+        start_cmd = (
+            f"{process} --location {args.name}_index/ {args.extra_args} "
+            f"{timeout_str} --bind={bind}"
+        )
+
+        if args.system in Containerize.supported_systems():
+            start_cmd = wrap_cmd_in_container(args, start_cmd)
+        else:
+            start_cmd = f"{args.server_binary} {start_cmd} > {args.name}.server-log.txt 2>&1"
+            if not args.run_in_foreground:
+                start_cmd = f"nohup {start_cmd} &"
 
         # Show the command line.
         self.show(start_cmd, only_show=args.show)
@@ -93,35 +144,34 @@ def execute(self, args) -> bool:
         endpoint_url = f"http://{args.host_name}:{args.port}/query"
 
         # When running natively, check if the binary exists and works.
-        if args.system == "native":
-            if not binary_exists(args.server_binary, "server-binary"):
-                return False
-        else:
-            if Containerize().is_running(args.system, args.server_container):
-                log.error(
-                    f"Server container {args.server_container} already exists!\n"
-                )
-                log.info(
-                    f"To kill the existing server, use `{self.script_name} stop`"
-                )
+        if args.system not in Containerize.supported_systems():
+            if not binary_exists(args.server_binary, "server-binary", args):
                 return False
 
-        # Check if index files (*.sst) present in cwd
-        if len([p.name for p in Path.cwd().glob("*.sst")]) == 0:
+        # Check if index files (*.sst) present in index directory
+        if (
+            len([p.name for p in Path(f"{args.name}_index/").glob("*.sst")])
+            == 0
+        ):
             log.error(f"No Oxigraph index files for {args.name} found!\n")
             log.info(
-                f"Did you call `{self.script_name} index`? If you did, check "
-                "if .sst index files are present in current working directory."
+                f"Did you call `{script_name} index`? If you did, check "
+                "if .sst index files are present in index directory."
             )
             return False
 
+        # Check if server already alive at endpoint url from a previous run
         if is_server_alive(url=endpoint_url):
             log.error(f"Oxigraph server already running on {endpoint_url}\n")
             log.info(
-                f"To kill the existing server, use `{self.script_name} stop`"
+                f"To kill the existing server, use `{script_name} stop`"
             )
             return False
 
+        # Remove old log file so that tail starts clean.
+        log_file = Path(f"{args.name}.server-log.txt")
+        log_file.unlink(missing_ok=True)
+
         try:
             process = run_command(
                 start_cmd,
@@ -145,12 +195,17 @@ def execute(self, args) -> bool:
                 " (Ctrl-C stops following the log, but NOT the server)"
             )
         log.info("")
-        if args.system == "native":
-            log_cmd = f"exec tail -f {args.name}.server-log.txt"
-        else:
+        # For containers, use `docker/podman logs -f` as Oxigraph doesn't
+        # support redirecting logs to a log file. A short delay ensures
+        # the container is up before attaching.
+        if args.system in Containerize.supported_systems():
             time.sleep(2)
             log_cmd = f"exec {args.system} logs -f {args.server_container}"
-        log_proc = subprocess.Popen(log_cmd, shell=True)
+            log_proc = subprocess.Popen(log_cmd, shell=True)
+        else:
+            log_proc = tail_log_file(log_file)
+            if log_proc is None:
+                return False
         while not is_server_alive(endpoint_url):
             time.sleep(1)
 
@@ -165,11 +220,15 @@ def execute(self, args) -> bool:
             log_proc.terminate()
 
         # With `--run-in-foreground`, wait until the server is stopped.
+        # On Ctrl-C, terminate the process and clean up the container.
         if args.run_in_foreground:
             try:
                 process.wait()
             except KeyboardInterrupt:
                 process.terminate()
+                if args.system in Containerize.supported_systems():
+                    args.cmdline_regex = StopCommand.DEFAULT_REGEX
+                    StopCommand().execute(args)
             log_proc.terminate()
 
         return True
diff --git a/src/qoxigraph/commands/status.py b/src/qoxigraph/commands/status.py
index eb2de86c..d73548dc 100644
--- a/src/qoxigraph/commands/status.py
+++ b/src/qoxigraph/commands/status.py
@@ -4,7 +4,9 @@
 
 
 class StatusCommand(QleverStatusCommand):
-    DEFAULT_REGEX = "oxigraph\\s+serve-read-only"
+    """Show Oxigraph server processes running on this machine."""
+
+    DEFAULT_REGEX = "oxigraph\\s+serve"
 
     def description(self) -> str:
         return "Show Oxigraph processes running on this machine"
diff --git a/src/qoxigraph/commands/stop.py b/src/qoxigraph/commands/stop.py
index dedd1ff2..47308284 100644
--- a/src/qoxigraph/commands/stop.py
+++ b/src/qoxigraph/commands/stop.py
@@ -2,30 +2,37 @@
 
 from qlever.command import QleverCommand
 from qlever.commands import stop as qlever_stop
+from qlever.containerize import Containerize
 from qlever.log import log
 from qlever.util import stop_process_with_regex
 from qoxigraph.commands.status import StatusCommand
 
 
 class StopCommand(QleverCommand):
+    """
+    Stop the Oxigraph server for a given dataset. For native execution,
+    finds and kills processes matching the dataset-name regex. For
+    containers, stops and removes the server container.
+    """
+
     # Override this with StatusCommand from child class for execute
     # method to work as intended
     STATUS_COMMAND = StatusCommand()
-    DEFAULT_REGEX = "oxigraph\\s+serve-read-only.*:%%PORT%%"
+    # %%NAME%% is replaced at runtime with the dataset name from the Qleverfile
+    DEFAULT_REGEX = "oxigraph\\s+serve.*%%NAME%%_index"
 
     def __init__(self):
         pass
 
     def description(self) -> str:
-        return "Stop Oxigraph server for a given dataset or port"
+        return "Stop Oxigraph server for a given dataset"
 
     def should_have_qleverfile(self) -> bool:
         return True
 
-    def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
+    def relevant_qleverfile_arguments(self) -> dict[str, list[str]]:
         return {
             "data": ["name"],
-            "server": ["port"],
             "runtime": ["system", "server_container"],
         }
 
@@ -38,18 +45,24 @@ def additional_arguments(self, subparser) -> None:
         )
 
     def execute(self, args) -> bool:
-        cmdline_regex = args.cmdline_regex.replace("%%PORT%%", str(args.port))
+        # Substitute the dataset name into the regex template so we only
+        # match the server running for this dataset.
+        cmdline_regex = args.cmdline_regex
+        if "%%NAME%%" in args.cmdline_regex and hasattr(args, "name"):
+            cmdline_regex = args.cmdline_regex.replace(
+                "%%NAME%%", str(args.name)
+            )
         description = (
-            f'Checking for processes matching "{cmdline_regex}"'
-            if args.system == "native"
-            else f"Checking for container with name {args.server_container}"
+            f"Checking for container with name {args.server_container}"
+            if args.system in Containerize.supported_systems()
+            else f'Checking for processes matching "{cmdline_regex}"'
         )
 
         self.show(description, only_show=args.show)
         if args.show:
             return True
 
-        if args.system == "native":
+        if args.system not in Containerize.supported_systems():
             stop_process_results = stop_process_with_regex(cmdline_regex)
             if stop_process_results is None:
                 return False
diff --git a/src/qoxigraph/qleverfile.py b/src/qoxigraph/qleverfile.py
new file mode 100644
index 00000000..467b77fa
--- /dev/null
+++ b/src/qoxigraph/qleverfile.py
@@ -0,0 +1,77 @@
+from __future__ import annotations
+
+
+def qleverfile_args(all_args: dict[str, dict[str, tuple]]) -> None:
+    """Define additional oxigraph specific Qleverfile parameters"""
+
+    def arg(*args, **kwargs):
+        return (args, kwargs)
+
+    index_args = all_args["index"]
+    server_args = all_args["server"]
+
+    index_args["index_binary"] = arg(
+        "--index-binary",
+        type=str,
+        default="oxigraph",
+        help=(
+            "The binary for building the index (default: oxigraph) "
+            "(this requires that you have oxigraph-cli installed "
+            "on your machine)"
+        ),
+    )
+    index_args["lenient"] = arg(
+        "--lenient",
+        type=str,
+        choices=["yes", "no"],
+        default="no",
+        help="Attempt to keep loading even if the data file is invalid",
+    )
+    index_args["extra_args"] = arg(
+        "--extra-args",
+        type=str,
+        default="",
+        help=(
+            "Additional arguments to pass directly to the oxigraph load process. "
+            "This allows advanced users to specify options not exposed in "
+            "Qleverfile. The string is appended verbatim to the command."
+        ),
+    )
+
+    server_args["server_binary"] = arg(
+        "--server-binary",
+        type=str,
+        default="oxigraph",
+        help=(
+            "The binary for starting the server (default: oxigraph) "
+            "(this requires that you have oxigraph-cli installed "
+            "on your machine)"
+        ),
+    )
+    server_args["read_only"] = arg(
+        "--read-only",
+        type=str,
+        choices=["yes", "no"],
+        default="yes",
+        help=(
+            "The HTTP server will not permit mutation operations in "
+            "read-only mode"
+        ),
+    )
+    server_args["timeout"] = arg(
+        "--timeout",
+        type=str,
+        default="60s",
+        help="The maximal time in seconds a query is allowed to run",
+    )
+    server_args["extra_args"] = arg(
+        "--extra-args",
+        type=str,
+        default="",
+        help=(
+            "Additional arguments to pass directly to the oxigraph "
+            "serve/serve-read-only. This allows advanced users to specify "
+            "options not exposed in Qleverfile. The string is appended "
+            "verbatim to the command."
+        ),
+    )
diff --git a/test/qlever/commands/test_benchmark_queries_methods.py b/test/qlever/commands/test_benchmark_queries_methods.py
new file mode 100644
index 00000000..9676f175
--- /dev/null
+++ b/test/qlever/commands/test_benchmark_queries_methods.py
@@ -0,0 +1,454 @@
+import pytest
+
+from qlever.commands.benchmark_queries import (
+    filter_queries,
+    get_result_size,
+    get_single_int_result,
+    parse_queries_tsv,
+    parse_queries_yml,
+    resolve_benchmark_metadata,
+    sparql_query_type,
+)
+
+MODULE = "qlever.commands.benchmark_queries"
+
+JSON_ACCEPT_HEADERS_AND_RESULT_FILES = [
+    ("application/sparql-results+json", "result.json"),
+    ("application/qlever-results+json", "result.json"),
+]
+
+ALL_ACCEPT_HEADERS_AND_RESULT_FILES = [
+    ("text/csv", "result.csv"),
+    ("text/tab-separated-values", "result.tsv"),
+    *JSON_ACCEPT_HEADERS_AND_RESULT_FILES,
+]
+
+
+@pytest.mark.parametrize("download_or_count", ["count", "download"])
+@pytest.mark.parametrize(
+    "accept_header, result_file", ALL_ACCEPT_HEADERS_AND_RESULT_FILES
+)
+def test_empty_result_non_construct_describe(
+    mock_command,
+    download_or_count,
+    accept_header,
+    result_file,
+):
+    mock_path_stat = mock_command(MODULE, "Path.stat")
+    mock_path_stat.return_value.st_size = 0
+    run_cmd_mock = mock_command(MODULE, "run_command")
+
+    size, err = get_result_size(
+        count_only=download_or_count == "count",
+        query_type="SELECT",
+        accept_header=accept_header,
+        result_file=result_file,
+    )
+
+    assert size == 0
+    assert err["short"] == "Empty result"
+    assert (
+        err["long"] == "curl returned with code 200, but the result is empty"
+    )
+    run_cmd_mock.assert_not_called()
+
+
+@pytest.mark.parametrize("download_or_count", ["count", "download"])
+@pytest.mark.parametrize(
+    "accept_header, result_file", ALL_ACCEPT_HEADERS_AND_RESULT_FILES
+)
+@pytest.mark.parametrize("query_type", ["CONSTRUCT", "DESCRIBE"])
+def test_empty_result_construct_describe(
+    mock_command,
+    download_or_count,
+    query_type,
+    accept_header,
+    result_file,
+):
+    mock_path_stat = mock_command(MODULE, "Path.stat")
+    mock_path_stat.return_value.st_size = 0
+    run_cmd_mock = mock_command(MODULE, "run_command")
+    run_cmd_mock.return_value = "42"
+
+    size, err = get_result_size(
+        count_only=download_or_count == "count",
+        query_type=query_type,
+        accept_header=accept_header,
+        result_file=result_file,
+    )
+
+    assert size == 42
+    assert err is None
+
+
+@pytest.mark.parametrize("download_or_count", ["count", "download"])
+@pytest.mark.parametrize(
+    "accept_header, result_file", ALL_ACCEPT_HEADERS_AND_RESULT_FILES
+)
+def test_count_and_download_success(
+    mock_command,
+    download_or_count,
+    accept_header,
+    result_file,
+):
+    mock_path_stat = mock_command(MODULE, "Path.stat")
+    mock_path_stat.return_value.st_size = 100
+
+    run_cmd_mock = mock_command(MODULE, "run_command")
+    run_cmd_mock.return_value = "42"
+
+    size, err = get_result_size(
+        count_only=download_or_count == "count",
+        query_type="SELECT",
+        accept_header=accept_header,
+        result_file=result_file,
+    )
+
+    run_cmd_mock.assert_called_once()
+    assert size == 42
+    assert err is None
+
+
+def test_download_turtle_success(mock_command):
+    mock_path_stat = mock_command(MODULE, "Path.stat")
+    mock_path_stat.return_value.st_size = 100
+    run_cmd_mock = mock_command(MODULE, "run_command")
+    run_cmd_mock.return_value = "42"
+
+    size, err = get_result_size(
+        count_only=False,
+        query_type="SELECT",
+        accept_header="text/turtle",
+        result_file="result.ttl",
+    )
+
+    run_cmd_mock.assert_called_once()
+    assert size == 42
+    assert err is None
+
+
+@pytest.mark.parametrize("download_or_count", ["count", "download"])
+@pytest.mark.parametrize(
+    "accept_header, result_file", JSON_ACCEPT_HEADERS_AND_RESULT_FILES
+)
+def test_download_and_count_json_malformed(
+    mock_command,
+    download_or_count,
+    accept_header,
+    result_file,
+):
+    mock_path_stat = mock_command(MODULE, "Path.stat")
+    mock_path_stat.return_value.st_size = 100
+
+    run_cmd_mock = mock_command(MODULE, "run_command")
+    run_cmd_mock.side_effect = Exception("jq failed")
+
+    size, err = get_result_size(
+        count_only=download_or_count == "count",
+        query_type="SELECT",
+        accept_header=accept_header,
+        result_file=result_file,
+    )
+
+    run_cmd_mock.assert_called_once()
+    assert size == 0
+    assert err["short"] == "Malformed JSON"
+    assert (
+        "curl returned with code 200, but the JSON is malformed: "
+        in err["long"]
+    )
+    assert "jq failed" in err["long"]
+
+
+def test_single_int_result_success(mock_command):
+    run_cmd_mock = mock_command(MODULE, "run_command")
+    run_cmd_mock.return_value = "123"
+
+    single_int_result = get_single_int_result("result.json")
+
+    run_cmd_mock.assert_called_once()
+    assert single_int_result == 123
+
+
+def test_single_int_result_non_int_fail(mock_command):
+    run_cmd_mock = mock_command(MODULE, "run_command")
+    run_cmd_mock.return_value = "abc"
+
+    single_int_result = get_single_int_result("result.json")
+
+    run_cmd_mock.assert_called_once()
+    assert single_int_result is None
+
+
+def test_single_int_result_failure(mock_command):
+    run_cmd_mock = mock_command(MODULE, "run_command")
+    run_cmd_mock.side_effect = Exception("jq failed")
+
+    single_int_result = get_single_int_result("result.json")
+
+    run_cmd_mock.assert_called_once()
+    assert single_int_result is None
+
+
+@pytest.mark.parametrize(
+    "query, expected",
+    [
+        # Basic types
+        ("SELECT ?x WHERE { ?x ?y ?z }", "SELECT"),
+        ("ASK { ?x ?y ?z }", "ASK"),
+        ("CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o }", "CONSTRUCT"),
+        ("DESCRIBE <http://example.org>", "DESCRIBE"),
+        # Case insensitivity
+        ("Select ?x WHERE { ?x ?y ?z }", "SELECT"),
+        ("ask { ?x ?y ?z }", "ASK"),
+        ("construct { ?s ?p ?o } WHERE { ?s ?p ?o }", "CONSTRUCT"),
+        ("Describe <http://example.org>", "DESCRIBE"),
+        # With prefixes (first match wins)
+        (
+            "PREFIX ex: <http://example.org/> SELECT ?x WHERE { ?x ex:p ?y }",
+            "SELECT",
+        ),
+        # First keyword wins when multiple present
+        ("SELECT ?x WHERE { ?x ?y ?z } CONSTRUCT { ?a ?b ?c }", "SELECT"),
+        # Unknown types
+        ("DELETE WHERE { ?x ?y ?z }", "UNKNOWN"),
+        ("", "UNKNOWN"),
+        ("SELECTED ?x WHERE { ?x ?y ?z }", "UNKNOWN"),
+    ],
+)
+def test_sparql_query_type(query, expected):
+    assert sparql_query_type(query) == expected
+
+
+SAMPLE_QUERIES = [
+    ("q1", "cities query", "SELECT ?x WHERE { ?x a :City }"),
+    ("q2", "countries", "SELECT ?c WHERE { ?c a :Country }"),
+    ("q3", "people", "SELECT ?p WHERE { ?p a :Person }"),
+    ("q4", "rivers", "CONSTRUCT { ?r ?p ?o } WHERE { ?r a :River }"),
+    ("q5", "mountains", "ASK { ?m a :Mountain }"),
+]
+
+
+@pytest.mark.parametrize(
+    "query_ids, expected_names",
+    [
+        # Single ID
+        ("2", ["q2"]),
+        # Range
+        ("1-3", ["q1", "q2", "q3"]),
+        # $ as end of range
+        ("3-$", ["q3", "q4", "q5"]),
+        # $ as single value (last query)
+        ("$", ["q5"]),
+        # Comma-separated mixed
+        ("1,3,5", ["q1", "q3", "q5"]),
+        ("1-2,4-5", ["q1", "q2", "q4", "q5"]),
+        # All queries
+        ("1-$", ["q1", "q2", "q3", "q4", "q5"]),
+        # Out-of-range indices skipped
+        ("99", []),
+        ("4-7", ["q4", "q5"]),
+        # Leading/trailing commas (empty parts skipped)
+        (",1,2", ["q1", "q2"]),
+        ("1,2,", ["q1", "q2"]),
+        # Whitespace around parts
+        (" 1 , 2 ", ["q1", "q2"]),
+    ],
+)
+def test_filter_queries_by_ids(query_ids, expected_names):
+    result = filter_queries(SAMPLE_QUERIES, query_ids, None)
+    assert [name for name, _, _ in result] == expected_names
+
+
+@pytest.mark.parametrize(
+    "query_ids",
+    [
+        # Negative range start → int("") raises ValueError
+        "-2",
+        # Non-numeric
+        "abc",
+        # Duplicate via single IDs
+        "1,1,2",
+        "1-3,2,4,$",
+        # Duplicate via overlapping ranges
+        "1-3,2-4",
+    ],
+)
+def test_filter_queries_invalid_ids(query_ids):
+    assert filter_queries(SAMPLE_QUERIES, query_ids, None) == []
+
+
+def test_filter_queries_empty_input():
+    assert filter_queries([], "1-$", None) == []
+
+
+@pytest.mark.parametrize(
+    "query_regex, expected_names",
+    [
+        # Match on name
+        ("q1", ["q1"]),
+        # Match on description
+        ("cities", ["q1"]),
+        # Match on query body
+        ("CONSTRUCT", ["q4"]),
+        # Case-insensitive
+        ("CITIES", ["q1"]),
+        # Regex matching multiple queries
+        ("Country|Person", ["q2", "q3"]),
+        # No match
+        ("abcd", []),
+    ],
+)
+def test_filter_queries_by_regex(query_regex, expected_names):
+    result = filter_queries(SAMPLE_QUERIES, "1-$", query_regex)
+    assert [name for name, _, _ in result] == expected_names
+
+
+@pytest.mark.parametrize(
+    "query_ids, query_regex, expected_names",
+    [
+        ("1-3", "Country", ["q2"]),
+        ("2,3", "cities", []),
+    ],
+)
+def test_filter_queries_ids_and_regex_combined(
+    query_ids, query_regex, expected_names
+):
+    result = filter_queries(SAMPLE_QUERIES, query_ids, query_regex)
+    assert [name for name, _, _ in result] == expected_names
+
+
+def test_filter_queries_invalid_regex():
+    assert filter_queries(SAMPLE_QUERIES, "1-$", "[invalid") == []
+
+
+VALID_YML = """\
+name: My Benchmark
+description: A test benchmark
+queries:
+  - name: q1
+    description: first query
+    query: SELECT ?x WHERE { ?x ?y ?z }
+  - name: q2
+    query: ASK { ?x ?y ?z }
+"""
+
+
+def test_parse_queries_yml_valid(tmp_path):
+    yml_file = tmp_path / "test.yml"
+    yml_file.write_text(VALID_YML)
+    name, description, queries = parse_queries_yml(str(yml_file))
+    assert name == "My Benchmark"
+    assert description == "A test benchmark"
+    assert queries == [
+        ("q1", "first query", "SELECT ?x WHERE { ?x ?y ?z }"),
+        ("q2", "", "ASK { ?x ?y ?z }"),
+    ]
+
+
+def test_parse_queries_yml_no_top_level_name(tmp_path):
+    yml_file = tmp_path / "test.yml"
+    yml_file.write_text("queries:\n  - name: q1\n    query: SELECT 1\n")
+    name, description, queries = parse_queries_yml(str(yml_file))
+    assert name is None
+    assert description is None
+    assert queries == [("q1", "", "SELECT 1")]
+
+
+@pytest.mark.parametrize(
+    "yml_content",
+    [
+        # Missing top-level 'queries' key
+        "name: test\n",
+        # 'queries' is not a list
+        "queries: not_a_list\n",
+        # Query item missing 'name'
+        "queries:\n  - query: SELECT 1\n",
+        # Query item missing 'query'
+        "queries:\n  - name: q1\n",
+        # Query item is not a dict
+        "queries:\n  - just a string\n",
+        # Not a dict at top level
+        "- item1\n- item2\n",
+        # Invalid YAML syntax
+        ":\n  bad: [yaml\n",
+    ],
+)
+def test_parse_queries_yml_invalid(tmp_path, yml_content):
+    yml_file = tmp_path / "test.yml"
+    yml_file.write_text(yml_content)
+    assert parse_queries_yml(str(yml_file)) == (None, None, [])
+
+
+def test_parse_queries_tsv_valid(mock_command):
+    run_cmd_mock = mock_command(MODULE, "run_command")
+    run_cmd_mock.return_value = (
+        "q1\tSELECT ?x WHERE { ?x ?y ?z }\nq2\tASK { ?x ?y ?z }\n"
+    )
+    result = parse_queries_tsv("cat queries.tsv")
+    assert result == [
+        ("q1", "", "SELECT ?x WHERE { ?x ?y ?z }"),
+        ("q2", "", "ASK { ?x ?y ?z }"),
+    ]
+
+
+def test_parse_queries_tsv_tab_in_query(mock_command):
+    run_cmd_mock = mock_command(MODULE, "run_command")
+    run_cmd_mock.return_value = "q1\tSELECT ?x\tWHERE { ?x ?y ?z }\n"
+    result = parse_queries_tsv("cat queries.tsv")
+    assert result == [("q1", "", "SELECT ?x\tWHERE { ?x ?y ?z }")]
+
+
+def test_parse_queries_tsv_empty_output(mock_command):
+    run_cmd_mock = mock_command(MODULE, "run_command")
+    run_cmd_mock.return_value = ""
+    assert parse_queries_tsv("cat queries.tsv") == []
+
+
+def test_parse_queries_tsv_command_failure(mock_command):
+    run_cmd_mock = mock_command(MODULE, "run_command")
+    run_cmd_mock.side_effect = Exception("command failed")
+    assert parse_queries_tsv("cat queries.tsv") == []
+
+
+@pytest.mark.parametrize(
+    "case",
+    [
+        pytest.param(
+            dict(cli=("CLI", "CLI Desc"), yml=("YML", "YML Desc"),
+                 dataset="wikidata", expected=("CLI", "CLI Desc")),
+            id="cli-takes-priority",
+        ),
+        pytest.param(
+            dict(cli=(None, None), yml=("YML", "YML Desc"),
+                 dataset="wikidata", expected=("YML", "YML Desc")),
+            id="yml-over-default",
+        ),
+        pytest.param(
+            dict(cli=(None, None), yml=(None, None),
+                 dataset="wikidata", expected=("Wikidata", "auto")),
+            id="default-from-dataset",
+        ),
+        pytest.param(
+            dict(cli=(None, None), yml=(None, None),
+                 dataset=None, expected=(None, None)),
+            id="all-none",
+        ),
+        pytest.param(
+            dict(cli=("CLI", None), yml=(None, "YML Desc"),
+                 dataset="wikidata", expected=("CLI", "YML Desc")),
+            id="cli-name-yml-desc",
+        ),
+    ],
+)
+def test_resolve_benchmark_metadata(case):
+    name, desc = resolve_benchmark_metadata(
+        *case["cli"], *case["yml"], case["dataset"]
+    )
+    exp_name, exp_desc = case["expected"]
+    assert name == exp_name
+    if exp_desc == "auto":
+        assert case["dataset"].capitalize() in desc
+        assert "benchmark-queries" in desc
+    else:
+        assert desc == exp_desc
diff --git a/test/qlever/commands/test_cache_stats_execute.py b/test/qlever/commands/test_cache_stats_execute.py
index 7dd716a0..3b6b5f6d 100644
--- a/test/qlever/commands/test_cache_stats_execute.py
+++ b/test/qlever/commands/test_cache_stats_execute.py
@@ -20,7 +20,7 @@ def test_execute_successful_basic_cache_stats(
     ):
         # Mock arguments for basic cache stats
         args = MagicMock()
-        args.server_url = None
+        args.sparql_endpoint = None
         args.host_name = "localhorst"
         args.port = 1234
         args.show = False
@@ -29,13 +29,13 @@ def test_execute_successful_basic_cache_stats(
         # Mock `subprocess.check_output` and `json.loads` as encoded bytes
         mock_check_output.side_effect = [
             # Mock cache_stats
-            b'{"pinned-size": 1e9, "non-pinned-size": 3e9}',
+            b'{"cache-size-pinned": 1e9, "cache-size-unpinned": 3e9}',
             # Mock cache_settings
             b'{"cache-max-size": "10 GB"}',
         ]
         # mock cache_stats_dict and cache_settings_dict as a dictionary
         mock_json_loads.side_effect = [
-            {"pinned-size": 1e9, "non-pinned-size": 3e9},
+            {"cache-size-pinned": 1e9, "cache-size-unpinned": 3e9},
             {"cache-max-size": "10 GB"},
         ]
 
@@ -77,20 +77,20 @@ def test_execute_detailed_cache_stats(
     ):
         # Mock arguments for detailed cache stats
         args = MagicMock()
-        args.server_url = "http://testlocalhost:1234"
+        args.sparql_endpoint = "http://testlocalhost:1234"
         args.show = False
         args.detailed = True
 
         # Mock the responses from `subprocess.check_output` and `json.loads`
         mock_check_output.side_effect = [
-            b'{"pinned-size": 2e9, "non-pinned-size": 1e9, "test-stat": 500}',
+            b'{"cache-size-pinned": 2e9, "cache-size-unpinned": 1e9, "test-stat": 500}',
             b'{"cache-max-size": "10 GB", "test-setting": 1000}',
         ]
         # CAREFUL: if value is float you will get an error in re.match
         mock_json_loads.side_effect = [
             {
-                "pinned-size": int(2e9),
-                "non-pinned-size": int(1e9),
+                "cache-size-pinned": int(2e9),
+                "cache-size-unpinned": int(1e9),
                 "test-stat": 500,
             },
             {"cache-max-size": "10 GB", "test-setting": 1000},
@@ -101,10 +101,10 @@ def test_execute_detailed_cache_stats(
 
         # Assertions
         expected_stats_call = (
-            f"curl -s {args.server_url} " f'--data-urlencode "cmd=cache-stats"'
+            f"curl -s {args.sparql_endpoint} " f'--data-urlencode "cmd=cache-stats"'
         )
         expected_settings_call = (
-            f"curl -s {args.server_url} "
+            f"curl -s {args.sparql_endpoint} "
             f'--data-urlencode "cmd=get-settings"'
         )
 
@@ -112,10 +112,10 @@ def test_execute_detailed_cache_stats(
         mock_check_output.assert_any_call(expected_settings_call, shell=True)
 
         # Verify that detailed stats and settings were logged as a table
-        mock_log.info.assert_any_call("pinned-size     : 2,000,000,000")
-        mock_log.info.assert_any_call("non-pinned-size : 1,000,000,000")
-        mock_log.info.assert_any_call("test-stat       : 500")
         mock_log.info.assert_any_call("cache-max-size : 10 GB")
+        mock_log.info.assert_any_call("cache-size-pinned   : 2,000,000,000")
+        mock_log.info.assert_any_call("cache-size-unpinned : 1,000,000,000")
+        mock_log.info.assert_any_call("test-stat           : 500")
         mock_log.info.assert_any_call("test-setting   : 1,000")
 
         self.assertTrue(result)
@@ -127,7 +127,7 @@ def test_execute_detailed_cache_stats(
     def test_execute_failed_cache_stats(self, mock_log, mock_check_output):
         # Mock arguments for basic cache stats
         args = MagicMock()
-        args.server_url = "http://testlocalhost:1234"
+        args.sparql_endpoint = "http://testlocalhost:1234"
         args.show = False
         args.detailed = False
 
@@ -153,7 +153,7 @@ def test_execute_invalid_cache_size_format(
     ):
         # Mock arguments for basic cache stats
         args = MagicMock()
-        args.server_url = None
+        args.sparql_endpoint = None
         args.port = 1234
         args.show = False
         args.detailed = False
@@ -189,18 +189,18 @@ def test_execute_empty_cache_size(
     ):
         # Mock arguments for basic cache stats
         args = MagicMock()
-        args.server_url = None
+        args.sparql_endpoint = None
         args.port = 1234
         args.show = False
         args.detailed = False
 
         # Mock the responses with empty cache size
         mock_check_output.side_effect = [
-            b'{"pinned-size": 0, "non-pinned-size": 0}',
+            b'{"cache-size-pinned": 0, "cache-size-unpinned": 0}',
             b'{"cache-max-size": "10 GB"}',
         ]
         mock_json_loads.side_effect = [
-            {"pinned-size": 0, "non-pinned-size": 0},
+            {"cache-size-pinned": 0, "cache-size-unpinned": 0},
             {"cache-max-size": "10 GB"},
         ]
 
diff --git a/test/qlever/commands/test_cache_stats_other_methods.py b/test/qlever/commands/test_cache_stats_other_methods.py
index e07bee6e..85f27eac 100644
--- a/test/qlever/commands/test_cache_stats_other_methods.py
+++ b/test/qlever/commands/test_cache_stats_other_methods.py
@@ -34,12 +34,12 @@ def test_additional_arguments(self):
 
         # Test that the default value for server-url is set correctly
         """Why is there no default="localhost:{port}"? """
-        self.assertEqual(args.server_url, None)
+        self.assertEqual(args.sparql_endpoint, None)
 
         # Test that the help text for server-url is correctly set
         argument_help = subparser._group_actions[-2].help
         self.assertEqual(
-            "URL of the QLever server, default is {host_name}:{port}",
+            "URL of the SPARQL endpoint, default is {host_name}:{port}",
             argument_help,
         )
 
diff --git a/test/qlever/commands/test_index_execute.py b/test/qlever/commands/test_index_execute.py
index 30484970..cff13ab9 100644
--- a/test/qlever/commands/test_index_execute.py
+++ b/test/qlever/commands/test_index_execute.py
@@ -35,16 +35,20 @@ def test_execute_successful_indexing_without_extras(
         args.only_pso_and_pos_permutations = False
         args.use_patterns = True
         args.parallel_parsing = False
+        args.add_has_word_triples = False
         args.text_index = "Test Index"
         args.stxxl_memory = False
         args.system = "native"
         args.show = False
         args.overwrite_existing = False
+        args.vocabulary_type = "on-disk-compressed"
         args.index_container = "test_container"
         args.image = "test_image"
         args.multi_input_json = False
         args.ulimit = None
+        args.encode_as_id = None
         args.parser_buffer_size = None
+        args.materialized_views = None
 
         # Mock glob, get_total_file_size, get_existing_index_files,
         # run_command and containerize
@@ -61,7 +65,8 @@ def test_execute_successful_indexing_without_extras(
         expected_index_cmd = (
             f"{args.cat_input_files} | {args.index_binary}"
             f" -i {args.name} -s {args.name}.settings.json"
-            f" -F {args.format} -f - | tee"
+            f" --vocabulary-type {args.vocabulary_type}"
+            f" -F {args.format} -f - 2>&1 | tee"
             f" {args.name}.index-log.txt"
         )
         index_cmd_call = call(expected_index_cmd, show_output=True)
@@ -113,6 +118,7 @@ def test_execute_indexing_with_already_existing_files(
         args.input_files = "*.nt"
         args.only_pso_and_pos_permutations = False
         args.use_patterns = True
+        args.add_has_word_triples = False
         args.text_index = None
         args.stxxl_memory = None
         args.system = "native"
@@ -121,6 +127,7 @@ def test_execute_indexing_with_already_existing_files(
         args.index_container = "test_container"
         args.image = "test_image"
         args.multi_input_json = False
+        args.materialized_views = None
 
         # Mock glob, get_total_file_size, get_existing_index_files,
         # run_command and containerize
@@ -174,6 +181,7 @@ def test_execute_fails_if_no_indexing_binary_is_found(
         args.input_files = "*.nt"
         args.only_pso_and_pos_permutations = False
         args.use_patterns = True
+        args.add_has_word_triples = False
         args.text_index = None
         args.stxxl_memory = None
         args.system = "native"
@@ -182,6 +190,7 @@ def test_execute_fails_if_no_indexing_binary_is_found(
         args.index_container = "test_container"
         args.image = "test_image"
         args.multi_input_json = False
+        args.materialized_views = None
 
         # Mock glob, get_total_file_size, get_existing_index_files,
         # run_command and containerize
@@ -238,16 +247,20 @@ def test_execute_total_file_size_greater_than_ten_gb(
         args.only_pso_and_pos_permutations = False
         args.use_patterns = True
         args.parallel_parsing = False
+        args.add_has_word_triples = False
         args.text_index = None
         args.stxxl_memory = None
         args.system = "native"
         args.show = False
         args.overwrite_existing = False
+        args.vocabulary_type = "on-disk-compressed"
         args.index_container = "test_container"
         args.image = "test_image"
         args.multi_input_json = False
         args.ulimit = None
+        args.encode_as_id = None
         args.parser_buffer_size = None
+        args.materialized_views = None
 
         # Mock glob, get_total_file_size, get_existing_index_files,
         # run_command and containerize
@@ -264,8 +277,9 @@ def test_execute_total_file_size_greater_than_ten_gb(
         expected_index_cmd = (
             f"ulimit -Sn 500000 && {args.cat_input_files} | {args.index_binary}"
             f" -i {args.name} -s {args.name}.settings.json"
+            f" --vocabulary-type {args.vocabulary_type}"
             f" -F {args.format} -f -"
-            f" | tee {args.name}.index-log.txt"
+            f" 2>&1 | tee {args.name}.index-log.txt"
         )
         mock_util_run_command.assert_called_once_with(
             f"{args.index_binary} --help"
@@ -344,15 +358,19 @@ def test_execute_successful_indexing_with_extras_and_show(
         args.multi_input_json = True
         args.cat_input_files = False
         args.only_pso_and_pos_permutations = True
-        args.use_patterns = False
+        args.use_patterns = "no"
+        args.add_has_word_triples = False
         args.text_index = "from_text_records_and_literals"
         args.stxxl_memory = True
         args.input_files = "*.nt"
         args.system = "native"
         args.settings_json = '{"example": "settings"}'
+        args.vocabulary_type = "on-disk-compressed"
         args.show = True
         args.ulimit = None
+        args.encode_as_id = None
         args.parser_buffer_size = None
+        args.materialized_views = None
 
         # Mock get_input_options_for_json
         mock_input_json.return_value = "test_input_stream"
@@ -364,13 +382,14 @@ def test_execute_successful_indexing_with_extras_and_show(
         expected_index_cmd = (
             f"{args.index_binary}"
             f" -i {args.name} -s {args.name}.settings.json"
+            f" --vocabulary-type {args.vocabulary_type}"
             f" {mock_input_json.return_value}"
-            f" --only-pso-and-pos-permutations --no-patterns"
+            f" --only-pso-and-pos-permutations"
             f" --no-patterns -w {args.name}.wordsfile.tsv"
             f" -d {args.name}.docsfile.tsv"
             f" --text-words-from-literals"
             f" --stxxl-memory {args.stxxl_memory}"
-            f" | tee {args.name}.index-log.txt"
+            f" 2>&1 | tee {args.name}.index-log.txt"
         )
         settings_json_cmd = (
             f"echo {shlex.quote(args.settings_json)} "
diff --git a/test/qlever/commands/test_index_other_methods.py b/test/qlever/commands/test_index_other_methods.py
index afc2b34c..2a808ad4 100644
--- a/test/qlever/commands/test_index_other_methods.py
+++ b/test/qlever/commands/test_index_other_methods.py
@@ -33,13 +33,17 @@ def test_relevant_qleverfile_arguments(self):
                 "index": [
                     "input_files",
                     "cat_input_files",
+                    "encode_as_id",
                     "multi_input_json",
                     "parallel_parsing",
                     "settings_json",
+                    "materialized_views",
+                    "vocabulary_type",
                     "index_binary",
                     "only_pso_and_pos_permutations",
                     "ulimit",
                     "use_patterns",
+                    "add_has_word_triples",
                     "text_index",
                     "stxxl_memory",
                     "parser_buffer_size",
diff --git a/test/qlever/commands/test_index_stats_methods.py b/test/qlever/commands/test_index_stats_methods.py
new file mode 100644
index 00000000..db27405b
--- /dev/null
+++ b/test/qlever/commands/test_index_stats_methods.py
@@ -0,0 +1,266 @@
+import pytest
+
+from qlever.commands.index_stats import (
+    compute_durations,
+    compute_sizes,
+    get_size_unit,
+    get_time_unit,
+)
+
+
+@pytest.mark.parametrize("explicit_unit", ["s", "min", "h"])
+@pytest.mark.parametrize("parse_duration", [None, 0, 50, 500, 5000])
+def test_get_time_unit_explicit(explicit_unit, parse_duration):
+    """Explicit time unit is returned as-is regardless of parse_duration."""
+    assert get_time_unit(explicit_unit, parse_duration) == explicit_unit
+
+
+@pytest.mark.parametrize(
+    "parse_duration, expected_unit",
+    [
+        (None, "h"),
+        (0, "s"),
+        (199, "s"),
+        (200, "min"),
+        (3599, "min"),
+        (3600, "h"),
+        (10000, "h"),
+    ],
+)
+def test_get_time_unit_auto(parse_duration, expected_unit):
+    """Auto mode picks unit based on parse_duration thresholds."""
+    assert get_time_unit("auto", parse_duration) == expected_unit
+
+
+@pytest.mark.parametrize("explicit_unit", ["B", "MB", "GB", "TB"])
+@pytest.mark.parametrize("total_size", [0, 500, int(1e7), int(1e13)])
+def test_get_size_unit_explicit(explicit_unit, total_size):
+    """Explicit size unit is returned as-is regardless of total_size."""
+    assert get_size_unit(explicit_unit, total_size) == explicit_unit
+
+
+@pytest.mark.parametrize(
+    "total_size, expected_unit",
+    [
+        (0, "B"),
+        (999_999, "B"),
+        (1_000_000, "MB"),
+        (999_999_999, "MB"),
+        (1_000_000_000, "GB"),
+        (999_999_999_999, "GB"),
+        (1_000_000_000_000, "TB"),
+        (5_000_000_000_000, "TB"),
+    ],
+)
+def test_get_size_unit_auto(total_size, expected_unit):
+    """Auto mode picks unit based on total_size thresholds."""
+    assert get_size_unit("auto", total_size) == expected_unit
+
+
+def test_compute_sizes_text_omitted_when_zero():
+    """Text index entry is excluded from result when text index size is zero."""
+    raw_sizes = {"index": 500, "vocabulary": 300, "text": 0, "total": 800}
+    result = compute_sizes(raw_sizes, "B")
+    assert "Files text.*" not in result
+    assert list(result.keys()) == [
+        "Files index.*",
+        "Files vocabulary.*",
+        "TOTAL size",
+    ]
+
+
+def test_compute_sizes_text_included_when_nonzero():
+    """Text index entry is included in result when text index size is nonzero."""
+    raw_sizes = {
+        "index": 500,
+        "vocabulary": 300,
+        "text": 200,
+        "total": 1000,
+    }
+    result = compute_sizes(raw_sizes, "B")
+    assert "Files text.*" in result
+    assert list(result.keys()) == [
+        "Files index.*",
+        "Files vocabulary.*",
+        "Files text.*",
+        "TOTAL size",
+    ]
+
+
+def test_compute_sizes_all_zero():
+    """All sizes zero: auto resolves to 'B', text index is omitted."""
+    raw_sizes = {"index": 0, "vocabulary": 0, "text": 0, "total": 0}
+    result = compute_sizes(raw_sizes, "auto")
+    assert result["Files index.*"] == (0, "B")
+    assert result["Files vocabulary.*"] == (0, "B")
+    assert result["TOTAL size"] == (0, "B")
+    assert "Files text.*" not in result
+
+
+@pytest.mark.parametrize(
+    "size_unit, divisor",
+    [("B", 1), ("MB", 1e6), ("GB", 1e9), ("TB", 1e12)],
+)
+def test_compute_sizes_conversion(size_unit, divisor):
+    """Raw byte sizes are correctly divided by the unit factor."""
+    raw_sizes = {
+        "index": 5_000_000_000,
+        "vocabulary": 1_000_000_000,
+        "text": 500_000_000,
+        "total": 6_500_000_000,
+    }
+    result = compute_sizes(raw_sizes, size_unit)
+    assert result["Files index.*"] == (5_000_000_000 / divisor, size_unit)
+    assert result["Files vocabulary.*"] == (1_000_000_000 / divisor, size_unit)
+    assert result["Files text.*"] == (500_000_000 / divisor, size_unit)
+    assert result["TOTAL size"] == (6_500_000_000 / divisor, size_unit)
+
+
+def test_compute_sizes_auto_unit_propagated():
+    """Auto-resolved unit is applied consistently to all entries."""
+    raw_sizes = {
+        "index": 2_000_000_000,
+        "vocabulary": 500_000_000,
+        "text": 100_000_000,
+        "total": 2_600_000_000,
+    }
+    result = compute_sizes(raw_sizes, "auto")
+    # total is 2.6e9 -> auto resolves to GB
+    for _, (_, unit) in result.items():
+        assert unit == "GB"
+
+
+def log_line(time: str, message: str) -> str:
+    """Build a timestamped log line matching the real log format."""
+    return f"2025-01-15 {time}.000 - INFO: {message}\n"
+
+
+# A complete log with all phases (new format)
+COMPLETE_LOG_LINES = [
+    log_line("10:00:00", "Processing triples from single input stream"),
+    log_line("10:01:00", "Merging partial vocabularies ..."),
+    log_line(
+        "10:02:00", "Converting triples from local IDs to global IDs ..."
+    ),
+    log_line("10:03:00", "Creating permutations SPO and SOP ..."),
+    log_line("10:05:00", "Creating permutations OSP and OPS ..."),
+    log_line("10:07:00", "Creating permutations PSO and POS ..."),
+    log_line("10:09:00", "Index build completed"),
+]
+
+
+def test_compute_durations_complete_build():
+    """All phases present: every phase has a duration, TOTAL is computed."""
+    result = compute_durations(COMPLETE_LOG_LINES, "s", False)
+    assert result["Parse input"] == (60.0, "s")
+    assert result["Build vocabularies"] == (60.0, "s")
+    assert result["Convert to global IDs"] == (60.0, "s")
+    assert result["Permutation SPO & SOP"] == (120.0, "s")
+    assert result["Permutation OSP & OPS"] == (120.0, "s")
+    assert result["Permutation PSO & POS"] == (120.0, "s")
+    assert result["Text index"] == (None, "s")
+    assert result["TOTAL time"] == (540.0, "s")
+
+
+def test_compute_durations_empty_lines():
+    """Empty input: no 'Processing' line found, returns empty dict."""
+    result = compute_durations([], "s", False)
+    assert result == {}
+
+
+def test_compute_durations_only_processing():
+    """Only 'Processing' line, no 'Merging': build still in first phase,
+    returns empty dict."""
+    lines = [
+        log_line("10:00:00", "Processing triples from single input stream")
+    ]
+    result = compute_durations(lines, "s", False)
+    assert result == {}
+
+
+def test_compute_durations_partial_build():
+    """Parse and merge done, but no convert or permutations yet: those
+    phases have None durations."""
+    lines = [
+        log_line("10:00:00", "Processing triples from single input stream"),
+        log_line("10:01:00", "Merging partial vocabularies ..."),
+    ]
+    result = compute_durations(lines, "s", False)
+    assert result["Parse input"] == (60.0, "s")
+    assert result["Build vocabularies"] == (None, "s")
+    assert result["Convert to global IDs"] == (None, "s")
+    assert result["Text index"] == (None, "s")
+    assert "TOTAL time" not in result
+
+
+def test_compute_durations_with_text_index():
+    """Separate text index built after main build: TOTAL includes both."""
+    lines = COMPLETE_LOG_LINES + [
+        log_line("11:00:00", "Adding text index"),
+        log_line("11:10:00", "Text index build completed"),
+    ]
+    result = compute_durations(lines, "s", False)
+    assert result["Text index"] == (600.0, "s")
+    # TOTAL = main build (540s) + text index (600s)
+    assert result["TOTAL time"] == (540.0 + 600.0, "s")
+
+
+def test_compute_durations_ignore_text_index():
+    """ignore_text_index=True: text index duration is None, TOTAL excludes
+    text time."""
+    lines = COMPLETE_LOG_LINES + [
+        log_line("11:00:00", "Adding text index"),
+        log_line("11:10:00", "Text index build completed"),
+    ]
+    result = compute_durations(lines, "s", True)
+    assert result["Text index"] == (None, "s")
+    assert result["TOTAL time"] == (540.0, "s")
+
+
+def test_compute_durations_old_log_format():
+    """Old format uses 'Creating a pair' + 'Writing meta data for ...'
+    instead of 'Creating permutations ...'."""
+    lines = [
+        log_line("10:00:00", "Processing triples from single input stream"),
+        log_line("10:01:00", "Merging partial vocabularies ..."),
+        log_line(
+            "10:02:00", "Converting triples from local IDs to global IDs ..."
+        ),
+        log_line("10:03:00", "Creating a pair of permutations ..."),
+        log_line("10:03:30", "Writing meta data for SPO and SOP ..."),
+        log_line("10:05:00", "Creating a pair of permutations ..."),
+        log_line("10:05:30", "Writing meta data for OSP and OPS ..."),
+        log_line("10:07:00", "Index build completed"),
+    ]
+    result = compute_durations(lines, "s", False)
+    assert "Permutation SPO & SOP" in result
+    assert "Permutation OSP & OPS" in result
+    assert result["Permutation SPO & SOP"] == (120.0, "s")
+    assert result["Permutation OSP & OPS"] == (120.0, "s")
+
+
+def test_compute_durations_time_unit_conversion():
+    """Explicit time unit 'min': all durations converted from seconds."""
+    result = compute_durations(COMPLETE_LOG_LINES, "min", False)
+    assert result["Parse input"] == (1.0, "min")
+    assert result["TOTAL time"] == (9.0, "min")
+
+
+def test_compute_durations_auto_time_unit():
+    """Auto time unit resolved based on parse phase duration (60s < 200
+    -> 's')."""
+    result = compute_durations(COMPLETE_LOG_LINES, "auto", False)
+    # Parse phase is 60s which is < 200, so auto resolves to "s"
+    for _, (_, unit) in result.items():
+        assert unit == "s"
+
+
+def test_compute_durations_no_index_build_completed():
+    """Missing 'Index build completed' line: last permutation end and
+    TOTAL are None."""
+    lines = COMPLETE_LOG_LINES[:-1]
+    result = compute_durations(lines, "s", False)
+    assert result["Permutation SPO & SOP"] == (120.0, "s")
+    assert result["Permutation OSP & OPS"] == (120.0, "s")
+    assert result["Permutation PSO & POS"] == (None, "s")
+    assert "TOTAL time" not in result
diff --git a/test/qlever/commands/test_start_execute.py b/test/qlever/commands/test_start_execute.py
index fac4357c..c5e23505 100644
--- a/test/qlever/commands/test_start_execute.py
+++ b/test/qlever/commands/test_start_execute.py
@@ -22,7 +22,7 @@ def test_construct_command_with_if():
     args.persist_updates = False
     args.access_token = True
     args.only_pso_and_pos_permutations = True
-    args.use_patterns = False
+    args.use_patterns = "no"
     args.use_text_index = "yes"
 
     # Execute the function
@@ -125,11 +125,12 @@ def test_check_binary_success(mock_run_cmd):
     # Setup args
     args = MagicMock()
     args.server_binary = "/test/path/server_binary"
+    args.system = "native"
     # mock run_cmd as successful
     mock_run_cmd.return_value = "Command works"
 
     # Execute the function
-    result = qlever.util.binary_exists(args.server_binary, "server-binary")
+    result = qlever.util.binary_exists(args.server_binary, "server-binary", args)
     # check if run_cmd was called once with
     mock_run_cmd.assert_called_once_with(f"{args.server_binary} --help")
     assert result
@@ -143,12 +144,13 @@ def test_check_binary_exception(mock_log, mock_run_cmd):
     # Setup args
     args = MagicMock()
     args.server_binary = "false_binary"
+    args.system = "native"
 
     # Simulate an exception when run_command is called
     mock_run_cmd.side_effect = Exception("Mocked command failure")
 
     # Execute the function
-    result = qlever.util.binary_exists(args.server_binary, "server-binary")
+    result = qlever.util.binary_exists(args.server_binary, "server-binary", args)
 
     # check if run_cmd was called once with
     mock_run_cmd.assert_called_once_with(f"{args.server_binary} --help")
@@ -331,7 +333,7 @@ def test_execute_kills_existing_server_on_same_port(
         args.persist_updates = False
         args.access_token = True
         args.only_pso_and_pos_permutations = True
-        args.use_patterns = False
+        args.use_patterns = "no"
         args.use_text_index = "yes"
 
         # Mock CacheStatsCommand
@@ -397,7 +399,7 @@ def test_execute_fails_due_to_existing_server(
         args.kill_existing_with_same_port = False
         args.port = "localhorst"
         args.port = 1234
-        args.cmdline_regex = f"^ServerMain.* -p {args.port}"
+        args.cmdline_regex = f"^qlever-server.* -p {args.port}"
         args.no_containers = True
         args.server_binary = "/test/path/server_binary"
         args.name = "TestName"
@@ -552,9 +554,7 @@ def test_execute_server_with_warmup(
         )
 
         # Check warmup was called
-        mock_run.assert_called_once_with(
-            args.warmup_cmd, shell=True, check=True
-        )
+        mock_run.assert_any_call(args.warmup_cmd, shell=True, check=True)
 
         # Assertions
         # Ensure the server status was checked
@@ -573,8 +573,10 @@ def test_execute_server_with_warmup(
     @patch("qlever.commands.start.Containerize.supported_systems")
     @patch("qlever.commands.start.wrap_command_in_container")
     @patch("qlever.commands.start.construct_command")
+    @patch("qlever.commands.start.binary_exists")
     def test_execute_containerize_and_description(
         self,
+        mock_binary_exists,
         mock_construct_cl,
         mock_run_containerize,
         mock_containerize,
@@ -623,6 +625,8 @@ def test_execute_containerize_and_description(
         # Mock Containerize
         mock_containerize.return_value = ["test1", "test2"]
 
+        mock_binary_exists.return_value = True
+
         # Instantiate the StartCommand
         sc = StartCommand()
 
diff --git a/test/qlever/commands/test_start_other_methods.py b/test/qlever/commands/test_start_other_methods.py
index d3c3efa3..2848dbb3 100644
--- a/test/qlever/commands/test_start_other_methods.py
+++ b/test/qlever/commands/test_start_other_methods.py
@@ -65,7 +65,7 @@ def test_additional_arguments(self):
 
         # Test that the help text for
         # --kill-existing-with-same-port is correctly set
-        argument_help = subparser._group_actions[-3].help
+        argument_help = subparser._group_actions[-4].help
         self.assertEqual(
             argument_help,
             "If a QLever server is already running "
@@ -77,5 +77,5 @@ def test_additional_arguments(self):
         self.assertEqual(args.no_warmup, False)
 
         # Test that the help text for --no-warmup is correctly set
-        argument_help = subparser._group_actions[-2].help
+        argument_help = subparser._group_actions[-3].help
         self.assertEqual(argument_help, "Do not execute the warmup command")
diff --git a/test/qlever/commands/test_status_execute.py b/test/qlever/commands/test_status_execute.py
index 7f993c81..56a2edb3 100644
--- a/test/qlever/commands/test_status_execute.py
+++ b/test/qlever/commands/test_status_execute.py
@@ -9,7 +9,7 @@
 
 def get_mock_args(only_show):
     args = MagicMock()
-    args.cmdline_regex = "^(ServerMain|IndexBuilderMain)"
+    args.cmdline_regex = "^(qlever-server|qlever-index)"
     args.show = only_show
     return [args, args.cmdline_regex, args.show]
 
diff --git a/test/qlever/commands/test_status_other_methods.py b/test/qlever/commands/test_status_other_methods.py
index c1954000..430a53bc 100644
--- a/test/qlever/commands/test_status_other_methods.py
+++ b/test/qlever/commands/test_status_other_methods.py
@@ -31,7 +31,7 @@ def test_additional_arguments(self):
         args = parser.parse_args([])
 
         # Test that the default value is set correctly
-        self.assertEqual(args.cmdline_regex, "^(ServerMain|IndexBuilderMain)")
+        self.assertEqual(args.cmdline_regex, "^(qlever-server|qlever-index)")
 
         # Test that the help text is correctly set
         argument_help = subparser._group_actions[-1].help
diff --git a/test/qlever/commands/test_stop_execute.py b/test/qlever/commands/test_stop_execute.py
index 41090172..b037d3fb 100644
--- a/test/qlever/commands/test_stop_execute.py
+++ b/test/qlever/commands/test_stop_execute.py
@@ -20,7 +20,7 @@ def test_execute_no_matching_processes_or_containers(
     ):
         # Setup args
         args = MagicMock()
-        args.cmdline_regex = "ServerMain.* -i [^ ]*%%NAME%%"
+        args.cmdline_regex = "qlever-server.* -i [^ ]*%%NAME%%"
         args.name = "TestName"
         args.no_containers = True
         args.server_container = "test_container"
@@ -61,7 +61,7 @@ def test_execute_with_matching_process(
     ):
         # Setup args
         args = MagicMock()
-        args.cmdline_regex = "ServerMain.* -i [^ ]*%%NAME%%"
+        args.cmdline_regex = "qlever-server.* -i [^ ]*%%NAME%%"
         args.name = "TestName"
         args.no_containers = True
         args.server_container = "test_container"
@@ -75,7 +75,7 @@ def test_execute_with_matching_process(
         # to test with real psutil.process objects use this:
 
         mock_process.as_dict.return_value = {
-            "cmdline": ["ServerMain", "-i", "/some/path/TestName"],
+            "cmdline": ["qlever-server", "-i", "/some/path/TestName"],
             "pid": 1234,
             "username": "test_user",
         }
@@ -115,7 +115,7 @@ def test_execute_with_containers(
     ):
         # Setup args
         args = MagicMock()
-        args.cmdline_regex = "ServerMain.* -i [^ ]*%%NAME%%"
+        args.cmdline_regex = "qlever-server.* -i [^ ]*%%NAME%%"
         args.name = "TestName"
         args.no_containers = False
         args.server_container = "test_container"
@@ -157,7 +157,7 @@ def test_execute_with_no_containers_and_no_matching_process(
     ):
         # Setup args
         args = MagicMock()
-        args.cmdline_regex = "ServerMain.* -i [^ ]*%%NAME%%"
+        args.cmdline_regex = "qlever-server.* -i [^ ]*%%NAME%%"
         args.name = "TestName"
         args.no_containers = False
         args.server_container = "test_container"
@@ -204,7 +204,7 @@ def test_execute_with_error_killing_process(
     ):
         # Setup args
         args = MagicMock()
-        args.cmdline_regex = "ServerMain.* -i [^ ]*%%NAME%%"
+        args.cmdline_regex = "qlever-server.* -i [^ ]*%%NAME%%"
         args.name = "TestName"
         args.no_containers = True
         args.server_container = "test_container"
@@ -216,7 +216,7 @@ def test_execute_with_error_killing_process(
         # Creating mock psutil.Process objects with necessary attributes
         mock_process = MagicMock()
         mock_process.as_dict.return_value = {
-            "cmdline": ["ServerMain", "-i", "/some/path/TestName"],
+            "cmdline": ["qlever-server", "-i", "/some/path/TestName"],
             "pid": 1234,
             "create_time": 1234567890,
             "memory_info": MagicMock(rss=1024 * 1024 * 512),
diff --git a/test/qlever/commands/test_stop_other_methods.py b/test/qlever/commands/test_stop_other_methods.py
index 7675788b..b69d7b40 100644
--- a/test/qlever/commands/test_stop_other_methods.py
+++ b/test/qlever/commands/test_stop_other_methods.py
@@ -38,7 +38,7 @@ def test_additional_arguments(self):
         args = parser.parse_args([])
 
         # Test that the default value for cmdline_regex is set correctly
-        self.assertEqual(args.cmdline_regex, "ServerMain.* -i [^ ]*%%NAME%%")
+        self.assertEqual(args.cmdline_regex, "qlever-server.* -i [^ ]*%%NAME%%")
 
         # Test that the help text for cmdline_regex is correctly set
         argument_help = subparser._group_actions[-2].help
diff --git a/test/qlever/conftest.py b/test/qlever/conftest.py
new file mode 100644
index 00000000..9f825cb1
--- /dev/null
+++ b/test/qlever/conftest.py
@@ -0,0 +1,16 @@
+from unittest.mock import MagicMock
+
+import pytest
+
+
+@pytest.fixture
+def mock_command(monkeypatch):
+    def _mock(module_name: str, function_name: str, override=None):
+        if override:
+            monkeypatch.setattr(f"{module_name}.{function_name}", override)
+            return override
+        mock = MagicMock(name=f"{function_name}_mock")
+        monkeypatch.setattr(f"{module_name}.{function_name}", mock)
+        return mock
+
+    return _mock

From 833e06f1434770fbdf20a9339d0d57d04a44280c Mon Sep 17 00:00:00 2001
From: tanmay-9 <tanmaygarg9879@gmail.com>
Date: Wed, 1 Apr 2026 13:34:34 +0200
Subject: [PATCH 4/7] Removed extract_queries.py

---
 src/qoxigraph/commands/extract_queries.py | 1 -
 1 file changed, 1 deletion(-)
 delete mode 120000 src/qoxigraph/commands/extract_queries.py

diff --git a/src/qoxigraph/commands/extract_queries.py b/src/qoxigraph/commands/extract_queries.py
deleted file mode 120000
index 5667cc52..00000000
--- a/src/qoxigraph/commands/extract_queries.py
+++ /dev/null
@@ -1 +0,0 @@
-../../qlever/commands/extract_queries.py
\ No newline at end of file

From 274fff973c726f4376a8de285387e3e891d9a3ac Mon Sep 17 00:00:00 2001
From: tanmay-9 <tanmaygarg9879@gmail.com>
Date: Wed, 8 Apr 2026 14:36:53 +0200
Subject: [PATCH 5/7] Add memory monitor to qoxigraph along with separate time
 log for load and optimization.

---
 src/qlever/commands/index.py          |  10 +-
 src/qlever/memory_monitor.py          | 164 ++++++++++++++++++++++++++
 src/qoxigraph/commands/index.py       |  62 ++++++----
 src/qoxigraph/commands/index_stats.py |  47 +++++---
 4 files changed, 246 insertions(+), 37 deletions(-)
 create mode 100644 src/qlever/memory_monitor.py

diff --git a/src/qlever/commands/index.py b/src/qlever/commands/index.py
index d47f616f..8b2415d7 100644
--- a/src/qlever/commands/index.py
+++ b/src/qlever/commands/index.py
@@ -8,6 +8,7 @@
 from qlever.command import QleverCommand
 from qlever.containerize import Containerize
 from qlever.log import log
+from qlever.memory_monitor import MemoryMonitor
 from qlever.util import (
     binary_exists,
     get_existing_index_files,
@@ -322,7 +323,14 @@ def execute(self, args) -> bool:
 
         # Run the index command.
         try:
-            run_command(index_cmd, show_output=True)
+            with MemoryMonitor(
+                engine="qlever",
+                dataset=args.name,
+                cmdline_regex=args.index_binary,
+                container=args.index_container,
+                system=args.system,
+            ):
+                run_command(index_cmd, show_output=True)
         except Exception as e:
             log.error(f"Building the index failed: {e}")
             return False
diff --git a/src/qlever/memory_monitor.py b/src/qlever/memory_monitor.py
new file mode 100644
index 00000000..60442625
--- /dev/null
+++ b/src/qlever/memory_monitor.py
@@ -0,0 +1,164 @@
+from __future__ import annotations
+
+import json
+import re
+import threading
+import time
+from datetime import datetime
+from pathlib import Path
+
+import psutil
+
+from qlever.containerize import Containerize
+from qlever.log import log
+from qlever.util import format_size, run_command
+
+
+def parse_container_mem_usage(usage: str) -> int:
+    """
+    Parse a memory usage string from `docker stats` / `podman stats`
+    like "4.2GiB", "150MiB", "512KiB" into bytes.
+    """
+    usage = usage.strip()
+    units = {
+        "TIB": 1024**4,
+        "GIB": 1024**3,
+        "MIB": 1024**2,
+        "KIB": 1024,
+        "B": 1,
+    }
+    for suffix, multiplier in units.items():
+        if usage.upper().endswith(suffix):
+            number = float(usage[: len(usage) - len(suffix)])
+            return int(number * multiplier)
+    return 0
+
+
+class MemoryMonitor:
+    """
+    Monitor memory usage of an index-building process. Works in both
+    native mode (via psutil) and container mode (via docker/podman stats).
+
+    Usage as a context manager:
+
+        with MemoryMonitor(engine="qlever", dataset="wikidata",
+                           cmdline_regex=r"qlever-index"):
+            run_command(cmd, show_output=True)
+
+        # For container mode:
+        with MemoryMonitor(engine="qlever", dataset="wikidata",
+                           cmdline_regex=r"qlever-index",
+                           container="qlever.index.wikidata",
+                           system="docker"):
+            run_command(cmd, show_output=True)
+    """
+
+    def __init__(
+        self,
+        engine: str,
+        dataset: str,
+        cmdline_regex: str,
+        container: str | None = None,
+        system: str | None = None,
+        interval: float = 1.0,
+        output_dir: Path = Path.cwd(),
+    ):
+        self.engine = engine
+        self.dataset = dataset
+        self.cmdline_regex = cmdline_regex
+        self.container = container
+        self.system = system
+        self.interval = interval
+        self.output_dir = Path(output_dir)
+        self.peak_rss = 0
+        self.samples = []
+        self.stop_event = threading.Event()
+        self.thread = None
+        self.start_time = 0
+
+    def sample_native(self) -> int:
+        """
+        Find the index process among our children by matching its
+        command line, then sum RSS of that process and all its
+        descendants.
+        """
+        me = psutil.Process()
+        for child in me.children(recursive=True):
+            try:
+                cmdline = " ".join(child.cmdline())
+            except (psutil.NoSuchProcess, psutil.AccessDenied):
+                continue
+            if re.search(self.cmdline_regex, cmdline):
+                rss = child.memory_info().rss
+                for grandchild in child.children(recursive=True):
+                    try:
+                        rss += grandchild.memory_info().rss
+                    except (psutil.NoSuchProcess, psutil.AccessDenied):
+                        pass
+                return rss
+        return 0
+
+    def sample_container(self) -> int:
+        """
+        Query the container runtime for the memory usage of the
+        index container.
+        """
+        try:
+            output = run_command(
+                f"{self.system} stats --no-stream"
+                f" --format '{{{{.MemUsage}}}}' {self.container}",
+                return_output=True,
+            )
+            usage = output.strip().split("/")[0].strip()
+            return parse_container_mem_usage(usage)
+        except Exception:
+            return 0
+
+    def run_loop(self):
+        sample = (
+            self.sample_container
+            if self.system in Containerize.supported_systems()
+            else self.sample_native
+        )
+        while not self.stop_event.is_set():
+            rss = sample()
+            self.peak_rss = max(self.peak_rss, rss)
+            elapsed = time.monotonic() - self.start_time
+            self.samples.append((elapsed, rss))
+            self.stop_event.wait(self.interval)
+
+    def save(self):
+        path = (
+            self.output_dir / f"{self.engine}.{self.dataset}.memory-log.json"
+        )
+        data = {
+            "engine": self.engine,
+            "dataset": self.dataset,
+            "start_time": datetime.fromtimestamp(
+                time.time() - (time.monotonic() - self.start_time)
+            ).isoformat(timespec="seconds"),
+            "peak_rss_bytes": self.peak_rss,
+            "peak_rss_human": format_size(self.peak_rss),
+            "elapsed_s": (
+                round(self.samples[-1][0], 1) if self.samples else 0
+            ),
+            "samples": [
+                {"elapsed_s": round(t, 1), "rss_bytes": r}
+                for t, r in self.samples
+            ],
+        }
+        with open(path, "w") as f:
+            json.dump(data, f, indent=2)
+
+    def __enter__(self):
+        self.start_time = time.monotonic()
+        self.thread = threading.Thread(target=self.run_loop, daemon=True)
+        self.thread.start()
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.stop_event.set()
+        self.thread.join()
+        self.save()
+        log.info(f"Peak memory usage: {format_size(self.peak_rss)}")
+        return False
diff --git a/src/qoxigraph/commands/index.py b/src/qoxigraph/commands/index.py
index 82135914..e3915ec9 100644
--- a/src/qoxigraph/commands/index.py
+++ b/src/qoxigraph/commands/index.py
@@ -8,6 +8,7 @@
 from qlever.command import QleverCommand
 from qlever.containerize import Containerize
 from qlever.log import log
+from qlever.memory_monitor import MemoryMonitor
 
 
 def wrap_cmd_in_container(args, cmd: str, ulimit: int | None = None) -> str:
@@ -142,30 +143,51 @@ def execute(self, args) -> bool:
         # file. Oxigraph's progress output is unreliable (may not print a
         # final summary line when loading multiple files), so we measure
         # the time externally.
+        #
+        # The MemoryMonitor wraps both the load and optimize steps so
+        # that peak RSS is tracked across the entire indexing workflow.
         log_file_name = f"{args.name}.index-log.txt"
-        try:
-            start_time = time.time()
-            util.run_command(index_cmd, show_output=True, show_stderr=True)
-            elapsed_s = time.time() - start_time
-            with open(log_file_name, "a") as f:
-                f.write(f"Total elapsed time: {elapsed_s:.0f}s\n")
-        except Exception as e:
-            log.error(f"Building the index failed: {e}")
-            return False
-
-        if optimize_cmd:
+        with MemoryMonitor(
+            engine="qoxigraph",
+            dataset=args.name,
+            cmdline_regex=args.index_binary,
+            container=args.index_container,
+            system=args.system,
+        ):
             try:
-                log.info("")
-                log.info("Optimizing read-only database storage:")
-                self.show(optimize_cmd)
+                load_start = time.time()
                 util.run_command(
-                    optimize_cmd, show_output=True, show_stderr=True
+                    index_cmd, show_output=True, show_stderr=True
                 )
+                load_s = time.time() - load_start
             except Exception as e:
-                log.error(f"Optimizing the database storage failed: {e}")
-                log.info(
-                    f"Please run manually: "
-                    f"{args.index_binary} optimize -l {args.name}_index/"
-                )
+                log.error(f"Building the index failed: {e}")
+                return False
+
+            optimize_s = 0.0
+            if optimize_cmd:
+                try:
+                    log.info("")
+                    log.info("Optimizing read-only database storage:")
+                    self.show(optimize_cmd)
+                    optimize_start = time.time()
+                    util.run_command(
+                        optimize_cmd, show_output=True, show_stderr=True
+                    )
+                    optimize_s = time.time() - optimize_start
+                except Exception as e:
+                    log.error(f"Optimizing the database storage failed: {e}")
+                    log.info(
+                        f"Please run manually: "
+                        f"{args.index_binary} optimize -l {args.name}_index/"
+                    )
+
+        with open(log_file_name, "a") as f:
+            f.write(f"Load time: {load_s:.0f}s\n")
+            if optimize_cmd:
+                f.write(f"Optimize time: {optimize_s:.0f}s\n")
+            f.write(
+                f"Total elapsed time: {load_s + optimize_s:.0f}s\n"
+            )
 
         return True
diff --git a/src/qoxigraph/commands/index_stats.py b/src/qoxigraph/commands/index_stats.py
index 672f4a53..5e386bf6 100644
--- a/src/qoxigraph/commands/index_stats.py
+++ b/src/qoxigraph/commands/index_stats.py
@@ -25,9 +25,9 @@ class IndexStatsCommand(QleverIndexStatsCommand):
     def execute_time(
         self, args, log_file_name: str
     ) -> dict[str, tuple[float | None, str]]:
-        """Parse total index build time from the index log file."""
+        """Parse index build times from the index log file."""
         try:
-            # Read the last few lines of the log file (the total time is
+            # Read the last few lines of the log file (the times are
             # always near the end).
             log_text = util.run_command(
                 f"tail {log_file_name}", return_output=True
@@ -36,25 +36,40 @@ def execute_time(
             log.error(f"Problem reading index log file {log_file_name}: {e}")
             return {}
 
-        stats = {}
-        # Pattern: "Total elapsed time: <number>s" (total time, always last)
-        total_pattern = re.compile(r"Total elapsed time: ([\d,]+)s$")
+        patterns = {
+            "Load time": re.compile(r"Load time: ([\d,]+)s$"),
+            "Optimize time": re.compile(r"Optimize time: ([\d,]+)s$"),
+            "TOTAL time": re.compile(r"Total elapsed time: ([\d,]+)s$"),
+        }
 
+        raw_seconds = {}
         for line in log_text.splitlines():
-            match = total_pattern.search(line)
-            if not match:
-                continue
+            for name, pattern in patterns.items():
+                match = pattern.search(line)
+                if match:
+                    try:
+                        raw_seconds[name] = float(
+                            match.group(1).replace(",", "")
+                        )
+                    except (ValueError, TypeError):
+                        pass
+
+        if not raw_seconds:
+            return {}
 
-            try:
-                value_s = float(match.group(1).replace(",", ""))
-            except (ValueError, TypeError):
-                continue
+        # Pick a time unit based on the total time.
+        total_s = raw_seconds.get("TOTAL time")
+        time_unit = get_time_unit(args.time_unit, total_s)
+        unit_factor = get_time_unit_factor(time_unit)
 
-            time_unit = get_time_unit(args.time_unit, value_s)
-            unit_factor = get_time_unit_factor(time_unit)
+        stats = {}
+        for name in ["Load time", "Optimize time", "TOTAL time"]:
+            if name in raw_seconds:
+                stats[name] = (raw_seconds[name] / unit_factor, time_unit)
 
-            stats["TOTAL time"] = (value_s / unit_factor, time_unit)
-            break
+        # If there was no optimize step, Load and TOTAL are identical
+        if "Optimize time" not in stats:
+            stats.pop("Load time", None)
 
         return stats
 

From 17e66ca7a0556b706c6504f6c4579262645378df Mon Sep 17 00:00:00 2001
From: tanmay-9 <tanmaygarg9879@gmail.com>
Date: Wed, 8 Apr 2026 18:18:44 +0200
Subject: [PATCH 6/7] Take new memory monitor changes

---
 src/qlever/commands/index.py |  1 -
 src/qlever/memory_monitor.py | 48 +++++++++++++++++++++++++++++-------
 2 files changed, 39 insertions(+), 10 deletions(-)

diff --git a/src/qlever/commands/index.py b/src/qlever/commands/index.py
index 8b2415d7..5f0cb4b5 100644
--- a/src/qlever/commands/index.py
+++ b/src/qlever/commands/index.py
@@ -324,7 +324,6 @@ def execute(self, args) -> bool:
         # Run the index command.
         try:
             with MemoryMonitor(
-                engine="qlever",
                 dataset=args.name,
                 cmdline_regex=args.index_binary,
                 container=args.index_container,
diff --git a/src/qlever/memory_monitor.py b/src/qlever/memory_monitor.py
index 60442625..ea33df2c 100644
--- a/src/qlever/memory_monitor.py
+++ b/src/qlever/memory_monitor.py
@@ -9,6 +9,7 @@
 
 import psutil
 
+from qlever import engine_name
 from qlever.containerize import Containerize
 from qlever.log import log
 from qlever.util import format_size, run_command
@@ -16,15 +17,20 @@
 
 def parse_container_mem_usage(usage: str) -> int:
     """
-    Parse a memory usage string from `docker stats` / `podman stats`
-    like "4.2GiB", "150MiB", "512KiB" into bytes.
+    Parse a memory usage string from ``docker stats`` or ``podman stats``
+    into bytes.  Docker reports binary units (GiB, MiB) while Podman
+    reports decimal units (GB, MB).
     """
     usage = usage.strip()
     units = {
         "TIB": 1024**4,
+        "TB": 1000**4,
         "GIB": 1024**3,
+        "GB": 1000**3,
         "MIB": 1024**2,
+        "MB": 1000**2,
         "KIB": 1024,
+        "KB": 1000,
         "B": 1,
     }
     for suffix, multiplier in units.items():
@@ -41,13 +47,12 @@ class MemoryMonitor:
 
     Usage as a context manager:
 
-        with MemoryMonitor(engine="qlever", dataset="wikidata",
-                           cmdline_regex=r"qlever-index"):
+        with MemoryMonitor(dataset="wikidata", cmdline_regex="qlever-index"):
             run_command(cmd, show_output=True)
 
         # For container mode:
-        with MemoryMonitor(engine="qlever", dataset="wikidata",
-                           cmdline_regex=r"qlever-index",
+        with MemoryMonitor(dataset="wikidata",
+                           cmdline_regex="qlever-index",
                            container="qlever.index.wikidata",
                            system="docker"):
             run_command(cmd, show_output=True)
@@ -55,7 +60,6 @@ class MemoryMonitor:
 
     def __init__(
         self,
-        engine: str,
         dataset: str,
         cmdline_regex: str,
         container: str | None = None,
@@ -63,7 +67,21 @@ def __init__(
         interval: float = 1.0,
         output_dir: Path = Path.cwd(),
     ):
-        self.engine = engine
+        """
+        Args:
+            dataset:        Name of the dataset being indexed.
+            cmdline_regex:  Regex matched against child process command
+                            lines to identify the index process (native
+                            mode only).
+            container:      Container name to query for memory stats.
+                            When set together with ``system``, sampling
+                            uses ``docker/podman stats`` instead of
+                            psutil.
+            system:         Container runtime ("docker" or "podman").
+            interval:       Seconds between samples (default 1.0).
+            output_dir:     Directory for the JSON memory log file.
+        """
+        self.engine = engine_name
         self.dataset = dataset
         self.cmdline_regex = cmdline_regex
         self.container = container
@@ -115,6 +133,11 @@ def sample_container(self) -> int:
             return 0
 
     def run_loop(self):
+        """
+        Polling loop that runs on a background thread. Selects the
+        appropriate sampling method (native or container) and collects
+        (elapsed_seconds, rss_bytes) tuples until the stop event is set.
+        """
         sample = (
             self.sample_container
             if self.system in Containerize.supported_systems()
@@ -128,8 +151,13 @@ def run_loop(self):
             self.stop_event.wait(self.interval)
 
     def save(self):
+        """
+        Write all collected samples and metadata to a JSON file at
+        ``<output_dir>/<engine>.<dataset>.memory-log.json``.
+        """
         path = (
-            self.output_dir / f"{self.engine}.{self.dataset}.memory-log.json"
+            self.output_dir
+            / f"{self.engine.lower()}.{self.dataset.lower()}.memory-log.json"
         )
         data = {
             "engine": self.engine,
@@ -151,12 +179,14 @@ def save(self):
             json.dump(data, f, indent=2)
 
     def __enter__(self):
+        """Start the background sampling thread."""
         self.start_time = time.monotonic()
         self.thread = threading.Thread(target=self.run_loop, daemon=True)
         self.thread.start()
         return self
 
     def __exit__(self, exc_type, exc_val, exc_tb):
+        """Stop sampling, persist results, and log peak memory usage."""
         self.stop_event.set()
         self.thread.join()
         self.save()

From bb59bc9de51a0bd41af69e33a02985aa409e95f9 Mon Sep 17 00:00:00 2001
From: tanmay-9 <tanmaygarg9879@gmail.com>
Date: Wed, 8 Apr 2026 18:26:17 +0200
Subject: [PATCH 7/7] Remove redundant example_queries and change symlinked
 get-data to a simple import

---
 src/qoxigraph/commands/example_queries.py | 12 ------------
 src/qoxigraph/commands/get_data.py        |  2 +-
 2 files changed, 1 insertion(+), 13 deletions(-)
 delete mode 100644 src/qoxigraph/commands/example_queries.py
 mode change 120000 => 100644 src/qoxigraph/commands/get_data.py

diff --git a/src/qoxigraph/commands/example_queries.py b/src/qoxigraph/commands/example_queries.py
deleted file mode 100644
index 4ef76c24..00000000
--- a/src/qoxigraph/commands/example_queries.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from __future__ import annotations
-
-from qlever.commands.example_queries import (
-    ExampleQueriesCommand as QleverExampleQueriesCommand,
-)
-
-
-class ExampleQueriesCommand(QleverExampleQueriesCommand):
-    def execute(self, args) -> bool:
-        if not args.sparql_endpoint:
-            args.sparql_endpoint = f"{args.host_name}:{args.port}/query"
-        return super().execute(args)
diff --git a/src/qoxigraph/commands/get_data.py b/src/qoxigraph/commands/get_data.py
deleted file mode 120000
index 4900dbb8..00000000
--- a/src/qoxigraph/commands/get_data.py
+++ /dev/null
@@ -1 +0,0 @@
-../../qlever/commands/get_data.py
\ No newline at end of file
diff --git a/src/qoxigraph/commands/get_data.py b/src/qoxigraph/commands/get_data.py
new file mode 100644
index 00000000..29bba0e2
--- /dev/null
+++ b/src/qoxigraph/commands/get_data.py
@@ -0,0 +1 @@
+from qlever.commands.get_data import GetDataCommand  # noqa