diff --git a/.gitignore b/.gitignore
index fee070a..648a984 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,13 +3,17 @@
 .ipynb_checkpoints/
 */*.egg-info/*
 .idea/
-
+build/
+logs/
 __pycache__/
 docs/
 cache/
 data/
-datalake/
+plugins/
+
+minio/
+neo4j/
 neo4j_data/
-postgress_storage/
-storage/
+chromadb/
+postgres/
 
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..b053566
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,15 @@
+FROM apache/airflow:2.10.0
+
+# Set the working directory
+WORKDIR /app
+
+# Switch to airflow user to run the application
+USER airflow
+
+# Copy the requirements file and install dependencies
+COPY requirements.txt .
+
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Set the entrypoint to Airflow
+ENTRYPOINT ["airflow"]
diff --git a/Makefile b/Makefile
index 417c95d..72b4428 100644
--- a/Makefile
+++ b/Makefile
@@ -1,21 +1,118 @@
-.PHONY: server etl test
+.PHONY: setup install start stop profile server etl test build clean
 
-# Default port for the server
-PORT ?= 8000
+.PHONY: install
+install:
+	@echo "Installing requirements..."
+	@pip install -r requirements.txt
 
-# Path for ETL documents
-ETL_PATH ?= /path/to/docspecs
-FORCE ?= true
+.PHONY: setup
+setup: install
+	@if [ ! -f .env ]; then \
+		echo "Generating .env file with FERNET_KEY..."; \
+		echo "Generating .env file with FERNET_KEY..."; \
+		python3 -c "from cryptography.fernet import Fernet; \
+		fernet_key = Fernet.generate_key().decode(); \
+		template = 'FERNET_KEY={fernet_key}\\nAIRFLOW_UID=50000'; \
+		print(template.format(fernet_key=fernet_key)); \
+		print('_AIRFLOW_WWW_USER_USERNAME=airflow'); \
+		print('_AIRFLOW_WWW_USER_PASSWORD=airflow'); " > .env; \
+  		echo ".env file generated."; \
+	else \
+		echo ".env file already exists. Skipping FERNET_KEY generation."; \
+	fi
+	@if [ ! -f config/local/.env ]; then \
+		echo "Copying .env file to config/local..."; \
+		cp .env config/local/; \
+		echo ".env file copied."; \
+	else \
+		echo "config/local/.env file already exists. Skipping config/local copy."; \
+	fi
+	@echo "Launching minio..."
+	@docker-compose up -d minio
+	@echo "Waiting for minio to start..."
+	@until docker-compose exec minio mc ready local; do \
+		echo "Minio is not healthy yet. Retrying in 5 seconds..."; \
+		sleep 5; \
+	done
+	@echo "Setting up local alias..."
+	@docker-compose exec minio mc alias set minio http://localhost:9000 minioadmin minioadmin
+	@echo "Checking if bucket 'legal' exists..."
+	@if ! docker-compose exec minio mc ls minio/legal; then \
+  		echo "Creating bucket 'legal'..."; \
+  		docker-compose exec minio mc mb minio/legal; \
+ 	else \
+  		echo "Bucket 'legal' already exists. Skipping creation."; \
+	fi
+	@echo "Checking if bucket 'airflow-logs' exists..."
+	@if ! docker-compose exec minio mc ls minio/airflow-logs; then \
+  		echo "Creating bucket 'airflow-logs'..."; \
+  		docker-compose exec minio mc mb minio/airflow-logs; \
+ 	else \
+  		echo "Bucket 'airflow-logs' already exists. Skipping creation."; \
+	fi
+	@echo "Minio setup complete. Stopping minio..."
+	@docker-compose stop minio
+	@echo "Initializing Airflow..."
+	@docker-compose up -d airflow-webserver
+	@echo "Waiting for Airflow to start..."
+	@until docker-compose exec airflow-webserver airflow db check; do \
+		echo "Airflow is not healthy yet. Retrying in 5 seconds..."; \
+		sleep 5; \
+	done
+	@echo "Creating Airflow user..."
+	@docker-compose exec airflow-webserver airflow users create -u airflow -p airflow -r Admin --verbose -f air -l flow -e airflow@airflow.air
+	@if  docker-compose exec airflow-webserver airflow connections get minio; then \
+		echo "Connection 'minio' already exists. Skipping creation."; \
+	else \
+		echo "Creating connection 'minio'..."; \
+		docker-compose exec airflow-webserver airflow connections add --conn-login minioadmin --conn-password minioadmin --conn-host minio --conn-port 9000 --conn-schema http --conn-extra '{"endpoint_url": "http://minio:9000"}' --conn-type aws minio; \
+	fi
+	@echo "Stopping Airflow..."
+	@docker-compose stop airflow-webserver
+	@echo "Setup complete."
+
+.PHONY: start
+start:
+	@echo "Detecting virtual environment..."
+	@if [ -n "$$VIRTUAL_ENV" ]; then \
+		echo "Virtual environment detected at $$VIRTUAL_ENV"; \
+		export VIRTUAL_ENV_PATH=$$VIRTUAL_ENV; \
+	else \
+		echo "No virtual environment detected."; \
+	fi
+	@echo "Starting up the microservices..."
+	@docker-compose up -d
+	@echo "Done."
+	@echo "\nFrontends are available at the following links:"
+	@echo "ChromaDB: http://localhost:3000/collections/legal-database"
+	@echo "Neo4j: http://localhost:7474"
+	@echo "Minio: http://localhost:9000"
+	@echo "Airflow: http://localhost:8080"
+
+.PHONY: stop
+stop:
+	@echo "Stopping the microservices..."
+	@docker-compose down
+
+
+.PHONY: profile
+profile:
+	@py-spy record -o profile.svg -- python dags/jurisprudencia.py
+
+# Docker build
+.PHONY: build
+build:
+	@docker build -t semantic_airflow .
 
 # Run the server
 server:
 	@echo "Running the server on port $(PORT)..."
-	@semantic server --port $(PORT)
+	@verdictnet server --port $(PORT)
 
 # Run the ETL pipeline
 etl:
 	@echo "Running the ETL pipeline with path $(ETL_PATH) and force $(FORCE)..."
-	@semantic etl run --path $(ETL_PATH) --force $(FORCE)
+	@verdictnet etl run --path $(ETL_PATH) --force $(FORCE)
 
 # Run tests
 test:
@@ -25,4 +122,4 @@ test:
 # Clean the vector database
 clean:
 	@echo "Cleaning the vector database..."
-	@semantic etl clean
\ No newline at end of file
+	@verdictnet etl clean
\ No newline at end of file
diff --git a/README.md b/README.md
index 4459095..6043221 100644
--- a/README.md
+++ b/README.md
@@ -1,24 +1,49 @@
-# Semantic Graph Search Project
+# VerdictNet: Legal Semantic Search Engine
 
 This project is a semantic graph search system designed to manage and query data.
 Currently, the interface is a simple command-line interface (CLI) tool and a web server.
 The system supports data ingestion, cleaning, querying, and running a server for frontend interactions.
 
 
-## Quick Start
-You should be able to kickstart the project by launching the docker compose setup and then starting the server:
+## Development Quick Start
+It is strongly recommended to use a virtual environment to run the project. You should be able to kickstart the project by running the following commands:
 ```sh
-  $ docker-compose up
+  $ make setup
 ```
-This will launch the following services:
+
+This will
+- Install the package requirements in the current python environment (python 3.12 recommended)
+- Create an `.env` file with the necessary environment variables if it does not exist. Copy this `.env` file to the `config/local` directory if it does not already exist. 
+- Create the `datalake` and `airflow-logs` buckets in the Minio object storage.
+- Create the `airflow` user in Airflow.
+- Create the `minio` Airflow connection.
+- Install development dependencies.
+
+After this, you can start the development environment by running:
+```sh
+  $ make start
+```
+The first launch will take some time because it will build the docker images.
+
+When done, the following services will be up and running:
 - [ChromaDB Browser: `http://localhost:3000/collections/legal-database`](http://localhost:3000/collections/legal-database). This is the vector Database used to run semantic queries.
 - [Neo4J Browser: `http://localhost:7474`](http://localhost:7474). This is a GUI to the graph database that will hold the relationships between the different documents indexed in the ChromaDB.
 - [Airflow: `http://localhost:8080`](http://localhost:8080). This is the scheduler used to run daily data mining tasks.
 - [Minio Console: `http://localhost:9001`](http://localhost:9001). This is the object storage used to store the documents in local develpment envs.
 
+The Postgress database is used by Airflow and is persisted to a `postgress_service`. This is useful if you want to do a clean start and not lose the data in the database.
+
+
+
+### Running the ETL pipeline
+To run the ETL pipeline, you can run the following command:
+```sh
+  $ make etl
+```
+
 Finally, run
 ```sh
-  $ semantic server
+  $ verdictnet server
 ````
 to launch the frontend interface, accessible through
 - [Frontend: `http://localhost:8000`](http://localhost:8000)
@@ -33,7 +58,7 @@ Run data pipelines to ingest and process documents.
 
 #### Usage:
 ```sh
-$ semantic etl [--path PATH] [--force FORCE] {clean,run}
+$ verdictnet etl [--path PATH] [--force FORCE] {clean,run}
 
 --path PATH: Path where to look for document specs.
 --force FORCE: Force download of documents.
@@ -46,13 +71,13 @@ run: Ingest data into the vector database.
 Query the data stored in the system.
 Usage:
 ```sh
-    $ semantic query [--query QUERY] [--n_results N_RESULTS] [--interactive]
+    $ verdictnet query [--query QUERY] [--n_results N_RESULTS] [--interactive]
 ```
 ### Server
 Run the server to provide a frontend interface.
 Usage:
 ```sh
-    $ semantic server [-p PORT]
+    $ verdictnet server [-p PORT]
     
     -p, --port PORT: Port to run the frontend on (default: 8000).
 ```
@@ -60,16 +85,16 @@ Usage:
 ## Example usage
 ```sh
 # Clean the vector database
-semantic etl clean
+verdictnet etl clean
 
 # Run the ETL pipeline
-semantic etl run --path /path/to/docspecs --force true
+verdictnet etl run --path /path/to/docspecs --force true
 
 # Query the data
-semantic query --query "example query" --n_results 5
+verdictnet query --query "example query" --n_results 5
 
 # Run the server
-semantic server --port 8080
+verdictnet server --port 8080
 ```
 
 ## Configuration
diff --git a/config.ini b/config.ini
index cd2617e..05f70ad 100644
--- a/config.ini
+++ b/config.ini
@@ -1,3 +1,6 @@
+# This is the config file meant for running the application in the host machine
+# It uses the minio storage
+
 [storage]
 type: s3
 bucket: legal
@@ -17,6 +20,7 @@ user: neo4j
 password: neo4jtest
 
 [embedding]
+# Lightweight, fast model
 model_name_or_path: paraphrase-mpnet-base-v2
 cache: cache/
 
diff --git a/config/local/airflow.cfg b/config/local/airflow.cfg
new file mode 100644
index 0000000..b396ebe
--- /dev/null
+++ b/config/local/airflow.cfg
@@ -0,0 +1,12 @@
+[logging]
+remote_logging = True
+remote_base_log_folder = s3://airflow-logs
+remote_log_conn_id = minio
+encrypt_s3_logs = False
+
+[webserver]
+default_dag_run_display_number = 250
+expose_config = True
+
+[celery]
+worker_concurrency = 2
diff --git a/config/local/config.ini b/config/local/config.ini
new file mode 100644
index 0000000..ac523d4
--- /dev/null
+++ b/config/local/config.ini
@@ -0,0 +1,30 @@
+[storage]
+type: s3
+bucket: legal
+collection: legal-database
+raw: datalake/raw/
+refined: datalake/refined/
+html: datalake/html/
+
+[chroma]
+type: http
+host: chromadb
+port: 8000
+
+[neo4j]
+url: bolt://neo4j:7687
+user: neo4j
+password: neo4jtest
+
+[embedding]
+model_name_or_path: paraphrase-mpnet-base-v2
+# use this because this is the mounting point in the docker compose
+cache: /cache
+
+[rag]
+n_results: 5
+
+[s3]
+key = minioadmin
+secret = minioadmin
+endpoint_url = http://minio:9000
\ No newline at end of file
diff --git a/dags/codigo_civil_penal.py b/dags/codigo_civil_penal.py
new file mode 100644
index 0000000..2500960
--- /dev/null
+++ b/dags/codigo_civil_penal.py
@@ -0,0 +1,94 @@
+import pendulum
+from airflow import DAG
+from airflow.decorators import task_group
+from airflow.operators.empty import EmptyOperator
+from airflow.operators.python import PythonOperator
+from datetime import timedelta
+
+from slugify import slugify
+
+from verdictnet.config import get_config
+from verdictnet.etl import get_docspecs, download_doc, refine, render_html, ingest
+from verdictnet.ingestion.documentspec import DocumentSpec
+
+# Define the default arguments
+default_args = {
+    'owner': 'airflow',
+    'depends_on_past': False,
+    'start_date': pendulum.today('UTC'),  # Start date 8 weeks ago
+    'email_on_failure': False,
+    'email_on_retry': False,
+    'retries': 1,
+    'retry_delay': timedelta(minutes=5),
+}
+
+
+def lazy_download_doc(docspec):
+    conf = get_config()
+    download_doc(docspec, conf, force_download=False)
+
+
+def lazy_refine(docspec):
+    conf = get_config()
+    refine(docspec, conf)
+
+
+def lazy_render_html(docspec):
+    conf = get_config()
+    render_html(docspec, conf)
+
+
+def lazy_ingest(docspec):
+    conf = get_config()
+    ingest(docspec, conf)
+
+
+def group(name, docspec):
+    """
+    Process a specific document
+    """
+    download_task = PythonOperator(
+        task_id=f'download_{name}',
+        python_callable=lazy_download_doc,
+        op_args=[docspec],
+    )
+
+    refine_task = PythonOperator(
+        task_id=f'refine_{name}',
+        python_callable=lazy_refine,
+        op_args=[docspec],
+    )
+
+    render_task = PythonOperator(
+        task_id=f'render_{name}',
+        python_callable=lazy_render_html,
+        op_args=[docspec],
+    )
+
+    ingest_task = PythonOperator(
+        task_id=f'ingest_{name}',
+        python_callable=lazy_ingest,
+        op_args=[docspec],
+    )
+
+    download_task >> refine_task >> render_task >> ingest_task
+
+    return download_task
+
+# Define the DAG
+with DAG(
+        'download_codigos',
+        default_args=default_args,
+        description='Download Codigo Civil y Penal',
+        schedule="@once",
+        catchup=False,
+):
+    filenames = get_docspecs()
+    docspecs = [DocumentSpec.load(filename) for filename in filenames]
+
+    start = EmptyOperator(task_id='start_task')
+
+    for docspec in docspecs:
+        name = slugify(docspec.name)
+
+        start >> group(name, docspec)
diff --git a/dags/jurisprudencia.py b/dags/jurisprudencia.py
index de06499..febb97a 100644
--- a/dags/jurisprudencia.py
+++ b/dags/jurisprudencia.py
@@ -1,68 +1,89 @@
+import pendulum
 from airflow import DAG
-from airflow.operators.python_operator import PythonOperator
-from airflow.utils.dates import days_ago
-from datetime import datetime, timedelta
-import requests
-import json
-import os
+from airflow.operators.python import PythonOperator
+from datetime import timedelta, datetime
 
 # Define the default arguments
 default_args = {
     'owner': 'airflow',
     'depends_on_past': False,
-    'start_date': days_ago(8 * 7),  # Start date 8 weeks ago
+    'start_date': pendulum.today('UTC').add(days=-8 * 7),  # Start date 8 weeks ago
     'email_on_failure': False,
     'email_on_retry': False,
     'retries': 1,
     'retry_delay': timedelta(minutes=5),
 }
 
+
+def get_item_pagination_task(date: str):
+    from verdictnet.ingestion.downloader import get_item_pagination
+    return get_item_pagination(datetime.strptime(date, "%Y-%m-%d"))
+
+
+def refine_item_pagination_task(date: str):
+    from verdictnet.ingestion.downloader import refine_item_pagination
+    return refine_item_pagination(datetime.strptime(date, "%Y-%m-%d"))
+
+
+def download_pdfs_task(date: str):
+    from verdictnet.ingestion.downloader import download_pdfs
+    return download_pdfs(datetime.strptime(date, "%Y-%m-%d"))
+
+
+def parse_pdfs_task(date: str):
+    from verdictnet.ingestion.downloader import parse_pdfs
+    return parse_pdfs(datetime.strptime(date, "%Y-%m-%d"))
+
+
+def ingest_pdfs_task(date: str):
+    from verdictnet.ingestion.downloader import ingest_pdfs
+    from verdictnet.storage.transaction_manager import TransactionManager
+    from verdictnet.config import get_config
+
+    transaction_manager = TransactionManager.get_transaction_manager(get_config())
+    dataset_uuid = transaction_manager.init_dataset("Jurisprudencia")
+    return ingest_pdfs(date=datetime.strptime(date, "%Y-%m-%d"),
+                transaction_manager=transaction_manager,
+                dataset_uuid=dataset_uuid)
+
+
 # Define the DAG
-dag = DAG(
+with DAG(
     'query_poderjudicial',
     default_args=default_args,
     description='Query www.poderjudicial.es and store results in JSON',
-    schedule_interval='@weekly',
+    schedule='@daily',
     catchup=True,
-)
-
-# Define the Python function to query the API and save results
-def query_poderjudicial(ds, **kwargs):
-    date_from = (datetime.strptime(ds, '%Y-%m-%d') - timedelta(days=7)).strftime('%Y-%m-%d')
-    date_to = ds
-    #
-    # url = 'https://www.poderjudicial.es/search/search.action'
-    # payload = {
-    #     "action": "query",
-    #     "sort": "IN_FECHARESOLUCION:decreasing",
-    #     "recordsPerPage": "10",
-    #     "databasematch": "AN",
-    #     "start": "1",
-    #     "FECHARESOLUCIONDESDE": date_from,
-    #     "FECHARESOLUCIONHASTA": date_to,
-    #     "TIPOINTERES_ACTUAL": "Actualidad",
-    #     "TIPOORGANOPUB": "|11|12|13|14|15|16|"
-    # }
-    # headers = {
-    #     'Content-Type': 'application/json'
-    # }
-    #
-    # response = requests.post(url, json=payload, headers=headers)
-    # response.raise_for_status()
-    #
-    # results = response.json()
-    # output_path = f'/path/to/output/results_{date_from}_to_{date_to}.json'
-    #
-    # with open(output_path, 'w') as f:
-    #     json.dump(results, f)
-
-# Define the task
-query_task = PythonOperator(
-    task_id='query_poderjudicial_task',
-    provide_context=True,
-    python_callable=query_poderjudicial,
-    dag=dag,
-)
-
-# Set the task in the DAG
-query_task
+):
+    item_pagination_task = PythonOperator(
+        task_id='get_item_pagination',
+        python_callable=get_item_pagination_task,
+        op_kwargs={'date': "{{ ds }}"},
+    )
+
+    refine_pagination_task = PythonOperator(
+        task_id='refine_item_pagination',
+        python_callable=refine_item_pagination_task,
+        op_kwargs={'date': "{{ ds }}"},
+    )
+
+    download_pdfs = PythonOperator(
+        task_id='download_pdfs',
+        python_callable=download_pdfs_task,
+        op_kwargs={'date': "{{ ds }}"},
+    )
+
+    parse_pdfs = PythonOperator(
+        task_id='parse_pdfs',
+        python_callable=parse_pdfs_task,
+        op_kwargs={'date': "{{ ds }}"},
+    )
+
+    ingest_pdfs = PythonOperator(
+        task_id='ingest_pdfs',
+        python_callable=ingest_pdfs_task,
+        op_kwargs={'date': "{{ ds }}"},
+    )
+
+    item_pagination_task >> refine_pagination_task >> download_pdfs >> parse_pdfs >> ingest_pdfs
+
diff --git a/dags/profiler.py b/dags/profiler.py
new file mode 100644
index 0000000..3e00ffe
--- /dev/null
+++ b/dags/profiler.py
@@ -0,0 +1,10 @@
+import line_profiler
+
+
+@line_profiler.profile
+def execute():
+    import jurisprudencia
+
+
+if __name__ == "__main__":
+    execute()
diff --git a/docker-compose.yml b/docker-compose.yml
index 4f7dffe..b679f6a 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,4 +1,110 @@
-version: '3.8'
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+# Basic Airflow cluster configuration for CeleryExecutor with Redis and PostgreSQL.
+#
+# WARNING: This configuration is for local development. Do not use it in a production deployment.
+#
+# This configuration supports basic configuration using environment variables or an .env file
+# The following variables are supported:
+#
+# AIRFLOW_IMAGE_NAME           - Docker image name used to run Airflow.
+#                                Default: apache/airflow:2.10.4
+# AIRFLOW_UID                  - User ID in Airflow containers
+#                                Default: 50000
+# AIRFLOW_PROJ_DIR             - Base path to which all the files will be volumed.
+#                                Default: .
+# Those configurations are useful mostly in case of standalone testing/running Airflow in test/try-out mode
+#
+# _AIRFLOW_WWW_USER_USERNAME   - Username for the administrator account (if requested).
+#                                Default: airflow
+# _AIRFLOW_WWW_USER_PASSWORD   - Password for the administrator account (if requested).
+#                                Default: airflow
+# _PIP_ADDITIONAL_REQUIREMENTS - Additional PIP requirements to add when starting all containers.
+#                                Use this option ONLY for quick checks. Installing requirements at container
+#                                startup is done EVERY TIME the service is started.
+#                                A better way is to build a custom image or extend the official image
+#                                as described in https://airflow.apache.org/docs/docker-stack/build.html.
+#                                Default: ''
+#
+# Feel free to modify this file to suit your needs.
+---
+x-airflow-common:
+  &airflow-common
+  # In order to add custom dependencies or upgrade provider packages you can use your extended image.
+  # Comment the image line, place your Dockerfile in the directory where you placed the docker-compose.yaml
+  # and uncomment the "build" line below, Then run `docker-compose build` to build the images.
+  # image: ${AIRFLOW_IMAGE_NAME:-apache/airflow:2.10.4}
+  build: .
+  env_file:
+    - config/local/.env
+  environment:
+    &airflow-common-env
+    AIRFLOW__CORE__EXECUTOR: CeleryExecutor
+    AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres/airflow
+    AIRFLOW__CELERY__RESULT_BACKEND: db+postgresql://airflow:airflow@postgres/airflow
+    AIRFLOW__CELERY__BROKER_URL: redis://:@redis:6379/0
+    AIRFLOW__CORE__FERNET_KEY: ${FERNET_KEY}
+    AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: 'true'
+    AIRFLOW__CORE__LOAD_EXAMPLES: 'false'
+    AIRFLOW__API__AUTH_BACKENDS: 'airflow.api.auth.backend.basic_auth,airflow.api.auth.backend.session'
+    _AIRFLOW_WWW_USER_CREATE: 'true'
+    _AIRFLOW_WWW_USER_USERNAME: ${_AIRFLOW_WWW_USER_USERNAME:-airflow}
+    _AIRFLOW_WWW_USER_PASSWORD: ${_AIRFLOW_WWW_USER_PASSWORD:-airflow}
+    AIRFLOW_CONN_MINIO: '{
+      "conn_type": "s3",
+      "login": "minioadmin",
+      "password": "minioadmin",
+      "host": "minio",
+      "port": 9000,
+      "schema": "http",
+      "extra": {"endpoint_url": "http://minio:9000"}
+    }'
+    # yamllint disable rule:line-length
+    # Use simple http server on scheduler for health checks
+    # See https://airflow.apache.org/docs/apache-airflow/stable/administration-and-deployment/logging-monitoring/check-health.html#scheduler-health-check-server
+    # yamllint enable rule:line-length
+    AIRFLOW__SCHEDULER__ENABLE_HEALTH_CHECK: 'true'
+    # WARNING: Use _PIP_ADDITIONAL_REQUIREMENTS option ONLY for a quick checks
+    # for other purpose (development, test and especially production usage) build/extend Airflow image.
+    _PIP_ADDITIONAL_REQUIREMENTS: ${_PIP_ADDITIONAL_REQUIREMENTS:-}
+    # The following line can be used to set a custom config file, stored in the local config folder
+    # If you want to use it, outcomment it and replace airflow.cfg with the name of your config file
+    AIRFLOW_CONFIG: '/opt/airflow/config/airflow.cfg'
+    PYTHONPATH: /app:${PYTHONPATH:-}  # Append custom path to existing PYTHONPATH
+  volumes:
+    - ${AIRFLOW_PROJ_DIR:-.}/dags:/opt/airflow/dags
+    # TODO: need to figure out what happens with dag_processor_manager and scheduler logs, that don't go to minio
+    #- ${AIRFLOW_PROJ_DIR:-.}/logs:/opt/airflow/logs
+    - ${AIRFLOW_PROJ_DIR:-.}/config/local/:/opt/airflow/config
+    # TODO: Not needed ATM, but might be useful in the future
+    #- ${AIRFLOW_PROJ_DIR:-.}/plugins:/opt/airflow/plugins
+    - ${PWD}/config/local/airflow.cfg:/opt/airflow/config/airflow.cfg
+    - ./src/verdictnet:/app/verdictnet  # Mount local package directory to container
+    - ${PWD}/config/local/config.ini:/app/verdictnet/config.ini  # Mount local config file to container
+  user: "${AIRFLOW_UID:-50000}:0"
+  depends_on:
+    &airflow-common-depends-on
+    redis:
+      condition: service_healthy
+    postgres:
+      condition: service_healthy
+
 
 services:
   chromadb:
@@ -7,7 +113,7 @@ services:
     ports:
       - "8000:8000"
     volumes:
-      - ${PWD}/chromadb_storage:/data
+      - ${PWD}/chromadb:/data
 
   chromadb-admin:
     image: fengzhichao/chromadb-admin:latest
@@ -25,27 +131,162 @@ services:
       POSTGRES_USER: airflow
       POSTGRES_PASSWORD: airflow
       POSTGRES_DB: airflow
+    healthcheck:
+      test: [ "CMD", "pg_isready", "-U", "airflow" ]
+      interval: 10s
+      retries: 5
+      start_period: 5s
+    restart: always
     volumes:
-      - ${PWD}/postgress_storage:/var/lib/postgresql/data
+      - ${PWD}/postgres:/var/lib/postgresql/data
+
+  redis:
+    # Redis is limited to 7.2-bookworm due to licencing change
+    # https://redis.io/blog/redis-adopts-dual-source-available-licensing/
+    image: redis:7.2-bookworm
+    expose:
+      - 6379
+    healthcheck:
+      test: ["CMD", "redis-cli", "ping"]
+      interval: 10s
+      timeout: 30s
+      retries: 50
+      start_period: 30s
+    restart: always
+
 
   airflow-webserver:
-    image: apache/airflow:2.6.1
-    container_name: airflow-webserver
-    environment:
-      - AIRFLOW__CORE__EXECUTOR=LocalExecutor
-      - AIRFLOW__CORE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow@postgres/airflow
-      - AIRFLOW__CORE__FERNET_KEY=${FERNET_KEY}
-      - AIRFLOW__WEBSERVER__RBAC=True
+    <<: *airflow-common
+    command: webserver
     ports:
       - "8080:8080"
+    healthcheck:
+      test: ["CMD", "curl", "--fail", "http://localhost:8080/health"]
+      interval: 30s
+      timeout: 10s
+      retries: 5
+      start_period: 30s
+    restart: always
+    depends_on:
+      <<: *airflow-common-depends-on
+      airflow-init:
+        condition: service_completed_successfully
+
+  airflow-scheduler:
+    <<: *airflow-common
+    command: scheduler
+    healthcheck:
+      test: ["CMD", "curl", "--fail", "http://localhost:8974/health"]
+      interval: 30s
+      timeout: 10s
+      retries: 5
+      start_period: 30s
+    restart: always
+    depends_on:
+      <<: *airflow-common-depends-on
+      airflow-init:
+        condition: service_completed_successfully
+
+  airflow-worker:
+    <<: *airflow-common
+    command: celery worker
+    healthcheck:
+      # yamllint disable rule:line-length
+      test:
+        - "CMD-SHELL"
+        - 'celery --app airflow.providers.celery.executors.celery_executor.app inspect ping -d "celery@$${HOSTNAME}" || celery --app airflow.executors.celery_executor.app inspect ping -d "celery@$${HOSTNAME}"'
+      interval: 30s
+      timeout: 10s
+      retries: 5
+      start_period: 30s
+    environment:
+      <<: *airflow-common-env
+      # Required to handle warm shutdown of the celery workers properly
+      # See https://airflow.apache.org/docs/docker-stack/entrypoint.html#signal-propagation
+      DUMB_INIT_SETSID: "0"
+    restart: always
+    volumes:
+      - ${PWD}/cache:/cache
+    depends_on:
+      <<: *airflow-common-depends-on
+      airflow-init:
+        condition: service_completed_successfully
+
+  airflow-triggerer:
+    <<: *airflow-common
+    command: triggerer
+    healthcheck:
+      test: ["CMD-SHELL", 'airflow jobs check --job-type TriggererJob --hostname "$${HOSTNAME}"']
+      interval: 30s
+      timeout: 10s
+      retries: 5
+      start_period: 30s
+    restart: always
     depends_on:
-      - postgres
-    command: >
-      bash -c "airflow db init && airflow webserver"
-    env_file:
-      - .env
+      <<: *airflow-common-depends-on
+      airflow-init:
+        condition: service_completed_successfully
+
+  airflow-init:
+    <<: *airflow-common
+    entrypoint: /bin/bash
+    # yamllint disable rule:line-length
+    command:
+      - -c
+      - |
+        if [[ -z "${AIRFLOW_UID}" ]]; then
+          echo
+          echo -e "\033[1;33mWARNING!!!: AIRFLOW_UID not set!\e[0m"
+          echo "If you are on Linux, you SHOULD follow the instructions below to set "
+          echo "AIRFLOW_UID environment variable, otherwise files will be owned by root."
+          echo "For other operating systems you can get rid of the warning with manually created .env file:"
+          echo "    See: https://airflow.apache.org/docs/apache-airflow/stable/howto/docker-compose/index.html#setting-the-right-airflow-user"
+          echo
+        fi
+        one_meg=1048576
+        mem_available=$$(($$(getconf _PHYS_PAGES) * $$(getconf PAGE_SIZE) / one_meg))
+        cpus_available=$$(grep -cE 'cpu[0-9]+' /proc/stat)
+        disk_available=$$(df / | tail -1 | awk '{print $$4}')
+        warning_resources="false"
+        if (( mem_available < 4000 )) ; then
+          echo
+          echo -e "\033[1;33mWARNING!!!: Not enough memory available for Docker.\e[0m"
+          echo "At least 4GB of memory required. You have $$(numfmt --to iec $$((mem_available * one_meg)))"
+          echo
+          warning_resources="true"
+        fi
+        if (( cpus_available < 2 )); then
+          echo
+          echo -e "\033[1;33mWARNING!!!: Not enough CPUS available for Docker.\e[0m"
+          echo "At least 2 CPUs recommended. You have $${cpus_available}"
+          echo
+          warning_resources="true"
+        fi
+        if (( disk_available < one_meg * 10 )); then
+          echo
+          echo -e "\033[1;33mWARNING!!!: Not enough Disk space available for Docker.\e[0m"
+          echo "At least 10 GBs recommended. You have $$(numfmt --to iec $$((disk_available * 1024 )))"
+          echo
+          warning_resources="true"
+        fi
+        if [[ $${warning_resources} == "true" ]]; then
+          echo
+          echo -e "\033[1;33mWARNING!!!: You have not enough resources to run Airflow (see above)!\e[0m"
+          echo "Please follow the instructions to increase amount of resources available:"
+          echo "   https://airflow.apache.org/docs/apache-airflow/stable/howto/docker-compose/index.html#before-you-begin"
+          echo
+        fi
+        mkdir -p /sources/logs /sources/dags /sources/plugins
+        chown -R "${AIRFLOW_UID}:0" /sources/{logs,dags,plugins}
+        exec airflow db migrate
+    # yamllint enable rule:line-length
+    environment:
+      <<: *airflow-common-env
+      _AIRFLOW_DB_MIGRATE: 'true'
+      _PIP_ADDITIONAL_REQUIREMENTS: ''
+    user: "0:0"
     volumes:
-      - ${PWD}/dags:/opt/airflow/dags  # Mount the dags folder
+      - ${AIRFLOW_PROJ_DIR:-.}:/sources
 
   minio:
     image: minio/minio:latest
@@ -58,7 +299,7 @@ services:
       - "9000:9000"  # API
       - "9001:9001"  # Console
     volumes:
-      - ${PWD}/data/:/data
+      - ${PWD}/minio/:/data
     command: server /data --console-address ":9001"
 
   neo4j:
@@ -71,7 +312,3 @@ services:
       - "7687:7687"  # Bolt protocol
     volumes:
       - ${PWD}/neo4j_data:/data
-
-volumes:
-  chromadb_data:
-  postgres_data:
diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 0000000..050e475
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,3 @@
+[pytest]
+log_level=INFO
+log_cli=true
\ No newline at end of file
diff --git a/requirements-dev.txt b/requirements-dev.txt
new file mode 100644
index 0000000..ce3e9fc
--- /dev/null
+++ b/requirements-dev.txt
@@ -0,0 +1 @@
+line_profiler
diff --git a/requirements.txt b/requirements.txt
index d11ad23..b32ce8b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,29 +1,362 @@
-pathlib~=1.0.1
-fastapi~=0.115.5
-starlette~=0.40.0
-Werkzeug~=3.0.3
-requests~=2.32.0
-beautifulsoup4~=4.12.3
-tika~=2.6.0
-tqdm~=4.66.4
-setuptools~=70.0.0
-pytest~=8.2.0
-websocket-client~=1.8.0
-uvicorn~=0.29.0
-websockets~=12.0
-playwright~=1.43.0
-PyPDF2~=3.0.1
-openparse~=0.7.0
-chromadb~=0.5.20
-numpy~=2.1.3
-torch~=2.5.1
-slugify~=0.0.1
-configparser~=7.1.0
-python-slugify~=8.0.4
-fsspec~=2024.10.0
-semantic_pdf~=0.0.1
-pdfplumber~=0.11.4
-pandas~=2.2.3
-neomodel~=5.4.1
-neo4j~=5.26.0
-pytest-mock~=3.14.0
+acres==0.1.0
+aiobotocore==2.15.2
+aiofiles==23.2.1
+aiohappyeyeballs==2.4.4
+aiohttp==3.11.10
+aioitertools==0.12.0
+aiosignal==1.3.1
+aiosqlite==0.20.0
+alembic==1.14.1
+amqp==5.3.1
+annotated-types==0.7.0
+anyio==4.6.2.post1
+apache-airflow==2.10.4
+apache-airflow-providers-celery==3.8.5
+apache-airflow-providers-common-compat==1.3.0
+apache-airflow-providers-common-io==1.5.0
+apache-airflow-providers-common-sql==1.21.0
+apache-airflow-providers-fab==1.5.2
+apache-airflow-providers-ftp==3.12.0
+apache-airflow-providers-google==12.0.0
+apache-airflow-providers-http==5.0.0
+apache-airflow-providers-imap==3.8.0
+apache-airflow-providers-smtp==1.9.0
+apache-airflow-providers-sqlite==4.0.0
+apispec==6.8.1
+argcomplete==3.5.3
+asgiref==3.8.1
+attrs==24.2.0
+babel==2.16.0
+backoff==2.2.1
+bcrypt==4.2.1
+beautifulsoup4==4.12.3
+billiard==4.2.1
+blinker==1.9.0
+botocore==1.35.36
+build==1.2.2.post1
+cachelib==0.9.0
+cachetools==5.5.0
+captcha-solver==0.1.5
+cattrs==24.1.2
+celery==5.4.0
+certifi==2024.8.30
+cffi==1.17.1
+chardet==5.2.0
+charset-normalizer==3.4.0
+chroma-hnswlib==0.7.6
+chromadb==0.5.20
+ci-info==0.3.0
+click==8.1.7
+click-didyoumean==0.3.1
+click-plugins==1.1.1
+click-repl==0.3.0
+clickclick==20.10.2
+colorama==0.4.6
+coloredlogs==15.0.1
+colorlog==6.9.0
+configobj==5.0.9
+configparser==7.1.0
+ConfigUpdater==3.2
+connexion==2.14.2
+contourpy==1.3.1
+cron-descriptor==1.4.5
+croniter==6.0.0
+cryptography==44.0.0
+cycler==0.12.1
+dataclasses-json==0.6.7
+db-dtypes==1.4.0
+decorator==5.1.1
+Deprecated==1.2.15
+dill==0.3.9
+dirtyjson==1.0.8
+distro==1.9.0
+dnspython==2.7.0
+docstring_parser==0.16
+durationpy==0.9
+email_validator==2.2.0
+etelemetry==0.3.1
+fastapi==0.115.5
+fastapi-cli==0.0.5
+filelock==3.16.1
+filetype==1.2.0
+fitz==0.0.1.dev2
+Flask==2.2.5
+Flask-AppBuilder==4.5.2
+Flask-Babel==2.0.0
+Flask-Caching==2.3.0
+Flask-JWT-Extended==4.7.1
+Flask-Limiter==3.10.1
+Flask-Login==0.6.3
+Flask-Session==0.5.0
+Flask-SQLAlchemy==2.5.1
+Flask-WTF==1.2.2
+flatbuffers==24.3.25
+flower==2.0.1
+fonttools==4.55.0
+frozenlist==1.5.0
+fsspec
+gcloud-aio-auth==5.3.2
+gcloud-aio-bigquery==7.1.0
+gcloud-aio-storage==9.3.0
+gcsfs
+google-ads==25.1.0
+google-analytics-admin==0.23.3
+google-api-core==2.24.0
+google-api-python-client==2.159.0
+google-auth==2.36.0
+google-auth-httplib2==0.2.0
+google-auth-oauthlib==1.2.1
+google-cloud-aiplatform==1.78.0
+google-cloud-alloydb==0.4.1
+google-cloud-appengine-logging==1.5.0
+google-cloud-audit-log==0.3.0
+google-cloud-automl==2.15.0
+google-cloud-batch==0.17.33
+google-cloud-bigquery==3.29.0
+google-cloud-bigquery-datatransfer==3.18.0
+google-cloud-bigtable==2.28.1
+google-cloud-build==3.29.0
+google-cloud-compute==1.23.0
+google-cloud-container==2.55.0
+google-cloud-core==2.4.1
+google-cloud-datacatalog==3.24.1
+google-cloud-dataflow-client==0.8.15
+google-cloud-dataform==0.5.14
+google-cloud-dataplex==2.6.0
+google-cloud-dataproc==5.16.0
+google-cloud-dataproc-metastore==1.17.0
+google-cloud-dlp==3.26.0
+google-cloud-kms==3.2.2
+google-cloud-language==2.16.0
+google-cloud-logging==3.11.3
+google-cloud-memcache==1.11.0
+google-cloud-monitoring==2.26.0
+google-cloud-orchestration-airflow==1.16.1
+google-cloud-os-login==2.16.0
+google-cloud-pubsub==2.27.3
+google-cloud-redis==2.17.0
+google-cloud-resource-manager==1.14.0
+google-cloud-run==0.10.14
+google-cloud-secret-manager==2.22.1
+google-cloud-spanner==3.51.0
+google-cloud-speech==2.30.0
+google-cloud-storage==2.19.0
+google-cloud-storage-transfer==1.15.0
+google-cloud-tasks==2.18.0
+google-cloud-texttospeech==2.24.0
+google-cloud-translate==3.19.0
+google-cloud-videointelligence==2.15.0
+google-cloud-vision==3.9.0
+google-cloud-workflows==1.16.0
+google-crc32c==1.6.0
+google-re2==1.1.20240702
+google-resumable-media==2.7.2
+googleapis-common-protos==1.66.0
+greenlet==3.0.3
+grpc-google-iam-v1==0.14.0
+grpc-interceptor==0.15.4
+grpcio==1.70.0
+grpcio-gcp==0.2.2
+grpcio-status==1.70.0
+gunicorn==23.0.0
+h11==0.14.0
+httpcore==1.0.7
+httplib2==0.22.0
+httptools==0.6.4
+httpx==0.27.2
+huggingface-hub==0.26.2
+humanfriendly==10.0
+humanize==4.11.0
+idna==3.10
+immutabledict==4.2.1
+importlib_metadata==8.5.0
+importlib_resources==6.4.5
+inflection==0.5.1
+iniconfig==2.0.0
+isodate==0.6.1
+itsdangerous==2.2.0
+Jinja2==3.1.4
+jiter==0.8.0
+jmespath==1.0.1
+joblib==1.4.2
+json-merge-patch==0.2
+jsonschema==4.23.0
+jsonschema-specifications==2024.10.1
+kiwisolver==1.4.7
+kombu==5.4.2
+kubernetes==31.0.0
+lazy-object-proxy==1.10.0
+limits==4.0.1
+linkify-it-py==2.0.3
+llama-cloud==0.1.6
+llama-index==0.12.5
+llama-index-agent-openai==0.4.0
+llama-index-cli==0.4.0
+llama-index-core==0.12.5
+llama-index-embeddings-openai==0.3.1
+llama-index-indices-managed-llama-cloud==0.6.3
+llama-index-legacy==0.9.48.post4
+llama-index-llms-openai==0.3.10
+llama-index-multi-modal-llms-openai==0.4.0
+llama-index-program-openai==0.3.1
+llama-index-question-gen-openai==0.3.0
+llama-index-readers-file==0.4.1
+llama-index-readers-llama-parse==0.4.0
+llama-parse==0.5.17
+lockfile==0.12.2
+looker-sdk==25.0.0
+looseversion==1.3.0
+lxml==5.3.0
+Mako==1.3.8
+markdown-it-py==3.0.0
+MarkupSafe==3.0.2
+marshmallow==3.23.1
+marshmallow-oneofschema==3.1.1
+marshmallow-sqlalchemy==0.28.2
+matplotlib==3.9.2
+mdit-py-plugins==0.4.2
+mdurl==0.1.2
+methodtools==0.4.7
+mmh3==5.0.1
+monotonic==1.6
+more-itertools==10.6.0
+mpmath==1.3.0
+multidict==6.1.0
+mypy-extensions==1.0.0
+neo4j==5.26.0
+neomodel==5.4.1
+nest-asyncio==1.6.0
+networkx==3.4.2
+nibabel==5.3.2
+nipype==1.9.1
+nltk==3.9.1
+numpy==1.26.4
+oauthlib==3.2.2
+onnxruntime==1.20.1
+openai==1.57.2
+openparse==0.7.0
+ordered-set==4.1.0
+orjson==3.10.12
+overrides==7.7.0
+packaging==24.2
+pandas==2.1.4
+pandas-gbq==0.26.1
+pathlib==1.0.1
+pathspec==0.12.1
+pdf2image==1.17.0
+pdfminer.six==20231228
+pdfplumber==0.11.4
+pendulum==3.0.0
+pillow==11.0.0
+playwright==1.43.0
+pluggy==1.5.0
+posthog==3.7.3
+prison==0.2.1
+prometheus_client==0.21.1
+prompt_toolkit==3.0.50
+propcache==0.2.1
+proto-plus==1.25.0
+protobuf==5.29.0
+prov==2.0.1
+psutil==6.1.1
+puremagic==1.28
+pyarrow==18.1.0
+pyasn1==0.6.1
+pyasn1_modules==0.4.0
+pycparser==2.22
+pydantic==2.10.2
+pydantic_core==2.27.1
+pydata-google-auth==1.9.1
+pydot==3.0.2
+pyee==11.1.0
+Pygments==2.18.0
+PyJWT==2.10.1
+PyMuPDF==1.24.14
+pyOpenSSL==25.0.0
+pyparsing==3.2.0
+pypdf==5.1.0
+PyPDF2==3.0.1
+pypdfium2==4.30.0
+PyPika==0.48.9
+pyproject_hooks==1.2.0
+pytesseract==0.3.13
+pytest==8.2.2
+pytest-mock==3.14.0
+python-daemon==3.1.2
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+python-nvd3==0.16.0
+python-slugify==8.0.4
+pytz==2024.2
+pyxnat==1.6.2
+PyYAML==6.0.2
+rdflib==6.3.2
+redis==5.2.1
+referencing==0.36.2
+regex==2024.11.6
+reportlab==4.2.5
+requests==2.32.3
+requests-oauthlib==2.0.0
+requests-toolbelt==1.0.0
+rfc3339-validator==0.1.4
+rich==13.9.4
+rich-argparse==1.6.0
+rpds-py==0.22.3
+rsa==4.9
+s3fs==2024.10.0
+safetensors==0.4.5
+scikit-learn==1.5.2
+scipy==1.14.1
+sentence-transformers==3.3.1
+setproctitle==1.3.4
+setuptools==70.0.0
+shapely==2.0.6
+shellingham==1.5.4
+simplejson==3.19.3
+six==1.16.0
+slugify==0.0.1
+sniffio==1.3.1
+soupsieve==2.6
+SQLAlchemy==1.4.54
+sqlalchemy-bigquery==1.12.1
+SQLAlchemy-JSONField==1.0.2
+sqlalchemy-spanner==1.8.0
+SQLAlchemy-Utils==0.41.2
+sqlparse==0.5.3
+starlette==0.40.0
+striprtf==0.0.26
+sympy==1.13.1
+tabulate==0.9.0
+tenacity==8.5.0
+termcolor==2.5.0
+text-unidecode==1.3
+threadpoolctl==3.5.0
+tika==2.6.0
+tiktoken==0.8.0
+time-machine==2.16.0
+tokenizers==0.20.3
+torch==2.5.1
+tornado==6.4.2
+tqdm==4.66.6
+traits==6.4.3
+transformers==4.46.3
+typer==0.13.1
+typing-inspect==0.9.0
+typing_extensions==4.12.2
+tzdata==2024.2
+uc-micro-py==1.0.3
+universal_pathlib==0.2.6
+uritemplate==4.1.1
+urllib3==2.2.3
+uvicorn==0.29.0
+uvloop==0.21.0
+vine==5.1.0
+watchfiles==1.0.0
+wcwidth==0.2.13
+websocket-client==1.8.0
+websockets==12.0
+wirerope==1.0.0
+wrapt==1.17.0
+WTForms==3.2.1
+yarl==1.18.3
+zipp==3.21.0
diff --git a/setup.cfg b/setup.cfg
index 09a51e2..8a6db70 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [metadata]
-name = semantic
+name = verdictnet
 version = 0.0.1
 description = A tool for running semantic queries on Spanish Law
 author = Alex Monras
@@ -7,7 +7,7 @@ license = MIT
 
 [options]
 package_dir=
-    =src
+    =verdictnet
 packages = find:
 install_requires =
     numpy
@@ -19,11 +19,18 @@ install_requires =
     matplotlib
     pyarrow
     PyMuPDF
-    wordcloud
+
+[options.package_data]
+verdictnet.ingestion =
+    resources/*.json
+verdictnet.frontend =
+    static/css/*
+    static/js/*
+    templates/*
 
 [options.packages.find]
 where=src
 
 [options.entry_points]
 console_scripts =
-    semantic = cli:main
+    verdictnet = verdictnet.cli:main
diff --git a/setup.py b/setup.py
index 726ea4c..8339b5a 100644
--- a/setup.py
+++ b/setup.py
@@ -1,10 +1,11 @@
 from setuptools import setup, find_packages
 
 setup(
-    name='semantic_pdf',
+    name='verdictnet',
     version='0.0.1',
-    description='A tool to do semantic_pdf queries on large documents',
+    description='A tool to do verdictnet queries on large documents',
     author='Alex Monras',
     license='MIT',
-    packages=find_packages('src')
+    packages=find_packages(where='src'),
+    package_dir={'': 'src'}
 )
diff --git a/src/config.py b/src/config.py
deleted file mode 100644
index 39a6583..0000000
--- a/src/config.py
+++ /dev/null
@@ -1,56 +0,0 @@
-import configparser
-import logging
-import os
-from pathlib import Path
-from typing import Optional
-
-import fsspec
-
-logging.basicConfig(
-    level=logging.INFO,
-    # Define the format of the logs, include the function name and line number
-    format="%(asctime)s - %(name)s.%(funcName)s [%(levelname)s]: %(message)s",
-)
-
-
-def root_path():
-    return Path(__file__).parent.parent
-
-
-def get_config():
-    # Create a ConfigParser instance
-    config = configparser.ConfigParser()
-
-    # Load the configuration file
-    config.read(root_path() / 'config.ini')
-
-    return config
-
-
-def configure_fsspec():
-    config = get_config()
-
-    s3_config = {
-        "key": os.getenv("AWS_ACCESS_KEY_ID", config.get('s3', 'key')),
-        "secret": os.getenv("AWS_SECRET_ACCESS_KEY", config.get('s3', 'secret')),
-        "client_kwargs": {
-            "endpoint_url": os.getenv("S3_ENDPOINT", config.get('s3', 'endpoint_url'))
-        }
-    }
-
-    fsspec.config.conf = {
-        "s3": s3_config
-    }
-
-
-# Call the function to configure fsspec
-configure_fsspec()
-
-
-def get_fs(conf: Optional[configparser.ConfigParser] = None):
-    conf = conf or get_config()
-    if conf['storage']['type'] == 's3':
-        fs = fsspec.filesystem("s3")
-    else:
-        fs = fsspec.filesystem("file")
-    return fs
diff --git a/src/etl.py b/src/etl.py
deleted file mode 100644
index c5b755b..0000000
--- a/src/etl.py
+++ /dev/null
@@ -1,145 +0,0 @@
-import os
-from pathlib import Path
-from typing import List
-
-import requests
-from slugify import slugify
-from bs4 import BeautifulSoup
-
-from config import get_config, root_path, get_fs
-from ingestion.documentspec import DocumentSpec
-from ingestion.paths import refined_path
-from models.node import Node
-from ingestion.parsers.html_parser import parse
-from storage.chroma_storage import ChromaStorage
-from storage.hybrid_storage import HybridStorage
-from storage.transaction_manager import TransactionManager
-
-
-def get_docspecs(path: Path = None) -> List[Path]:
-    """
-    Return all the docspecs in the resources folder
-    """
-    if path is None:
-        path = Path(__file__).parent / 'ingestion' / 'resources/'
-    filenames = []
-    for root, subdirs, files in os.walk(path):
-        for file in files:
-            if file.endswith('.json'):
-                filenames.append(Path(root + '/' + file))
-
-    if not filenames:
-        print(f"No document Spec filed found in provided folder {path}")
-
-    return filenames
-
-
-def get_files(path: Path, extension: str = None, subfolders=False) -> List[Path]:
-    """
-    Return all the files in the resources folder
-    """
-    filenames = []
-    for root, subdirs, files in os.walk(path):
-        for file in files:
-            if file.endswith(extension):
-                filenames.append(Path(root + '/' + file))
-        if not subfolders:
-            break
-
-    if not filenames:
-        print(f"No files found in provided folder {path}")
-
-    return filenames
-
-
-def download(docspec: DocumentSpec) -> str:
-    # Hacer la solicitud HTTP
-    response = requests.get(docspec.url)
-    if response.status_code != 200:
-        raise Exception(f"No se pudo acceder al texto consolidado. Código de estado: {response.status_code}")
-
-    return response.text
-
-
-def get_document_structure(text, docspec: DocumentSpec) -> List[Node]:
-    soup = BeautifulSoup(text, "html.parser")
-    tags = soup.findAll(docspec.tags)
-    parsed = parse(tags, docspec=docspec, levels=docspec.wraps or [docspec.head])
-    if len(parsed) > 1:
-        parsed = [Node(level=docspec.head, content=docspec.name, children=parsed)]
-    return parsed
-
-
-def ingest(main_node, docspec: DocumentSpec, storage: TransactionManager):
-    all_nodes = main_node.get_all(level=docspec.embed_level)
-
-    storage.store_with_transaction(main_node)
-
-
-def clean():
-    print("This will empty both databases. Are you sure you want to continue? (y/n)")
-    response = input()
-    if response.lower() != 'y':
-        print("Aborted.")
-        return
-
-    conf = get_config()
-
-    store = HybridStorage.get_hybrid_storage(conf)
-    store.delete_all()
-    print(f"Cleared all databases.")
-
-
-def run(force_download=False, path=None):
-    conf = get_config()
-
-    transaction_manager = TransactionManager.get_transaction_manager(conf)
-
-    # Load Docspecs
-    filenames = get_docspecs(path)
-    docspecs = [DocumentSpec.load(filename) for filename in filenames]
-
-    for docspec in docspecs:
-
-        slug_name = slugify(docspec.name)
-
-        # Download documents
-        target_filename = f'{slug_name}.html'
-        raw_path = root_path() / conf['storage']['raw'] / target_filename
-
-        if force_download or not os.path.exists(raw_path):
-            print(f"Downloading document `{docspec.name}`...")
-            text = download(docspec)
-
-            os.makedirs(os.path.dirname(raw_path), exist_ok=True)
-            with open(raw_path, 'w') as file:
-                file.write(text)
-        else:
-            with open(raw_path, 'r') as file:
-                text = file.read()
-
-        # Refining documents
-        target = refined_path() + f'{slug_name}.json'
-
-        main_node = get_document_structure(text, docspec=docspec)
-
-        main_node[0].save(target)
-        print(f"Saved refined in '{target_filename}'.")
-
-        # Saving json
-        html_path = root_path() / conf['storage']['html'] / f'{slug_name}.html'
-        os.makedirs(os.path.dirname(html_path), exist_ok=True)
-        with open(html_path, 'w', encoding='utf-8') as file:
-            file.write(main_node[0].html(
-                preamble="""
-                <html lang="es"><head><meta charset="utf-8" /></head>
-                """
-            ))
-        print(f"HTML saved to '{slug_name}.html'.")
-
-        # Ingesting into vector database
-        ingest(main_node[0], docspec, storage=transaction_manager)
-
-
-if __name__ == "__main__":
-    run()
diff --git a/src/__init__.py b/src/verdictnet/__init__.py
similarity index 100%
rename from src/__init__.py
rename to src/verdictnet/__init__.py
diff --git a/src/cli.py b/src/verdictnet/cli.py
similarity index 92%
rename from src/cli.py
rename to src/verdictnet/cli.py
index 2ef9e44..d1db693 100644
--- a/src/cli.py
+++ b/src/verdictnet/cli.py
@@ -1,6 +1,6 @@
 import argparse
 
-import query
+from verdictnet import query
 
 
 def main():
@@ -40,13 +40,13 @@ def main():
 
 
 def handle_etl(args):
-    import etl
+    from verdictnet import etl
     if args.subcommand == "clean":
         etl.clean()
     elif args.subcommand == "run":
         etl.run(force_download=args.force, path=args.path)
     else:
-        print("No {args.subcommand} subcommand found.")
+        print(f"No {args.subcommand} subcommand found.")
 
 
 def handle_query(args):
@@ -54,7 +54,7 @@ def handle_query(args):
 
 
 def handle_server(args):
-    from frontend.server import server
+    from verdictnet.frontend.server import server
     server.main()
 
 
diff --git a/src/verdictnet/config.py b/src/verdictnet/config.py
new file mode 100644
index 0000000..cbe587c
--- /dev/null
+++ b/src/verdictnet/config.py
@@ -0,0 +1,80 @@
+import configparser
+import json
+import logging
+import os
+from pathlib import Path
+from typing import Optional
+
+import fsspec
+from airflow.hooks.base import BaseHook
+
+logging.basicConfig(
+    level=logging.INFO,
+    # Define the format of the logs, include the function name and line number
+    format="%(asctime)s - %(name)s.%(funcName)s [%(levelname)s]: %(message)s",
+)
+
+logger = logging.getLogger(__name__)
+
+_fsspec_configured = False
+
+
+def root_path():
+    return Path(__file__).parent
+
+
+def get_config():
+    # Create a ConfigParser instance
+    config = configparser.ConfigParser()
+
+    # Load the configuration file from the current folder, or from the package root folder
+    files = config.read(filenames=['config.ini', root_path() / 'config.ini'])
+    logger.info("Successfully loaded config files: %s", files)
+
+    return config
+
+
+def configure_fsspec():
+    global _fsspec_configured
+    if _fsspec_configured:
+        return
+
+    config = get_config()
+
+    # Check if running within Airflow
+    if 'AIRFLOW_HOME' in os.environ:
+        # Retrieve the connection details from Airflow
+        connection = BaseHook.get_connection('minio')
+        s3_config = {
+            "key": connection.login,
+            "secret": connection.password,
+            "client_kwargs": {
+                "endpoint_url": connection.extra_dejson.get('endpoint_url')
+            }
+        }
+    else:
+        s3_config = {
+            "key": os.getenv("AWS_ACCESS_KEY_ID", config.get('s3', 'key')),
+            "secret": os.getenv("AWS_SECRET_ACCESS_KEY", config.get('s3', 'secret')),
+            "client_kwargs": {
+                "endpoint_url": os.getenv("S3_ENDPOINT", config.get('s3', 'endpoint_url'))
+            }
+        }
+
+    fsspec.config.conf = {
+        "s3": s3_config
+    }
+
+    # TODO: Warning. This is potentially logging sensitive passwords. Password obfuscation should be implemented.
+    logger.info("fsspec configured with: %s", json.dumps(fsspec.config.conf))
+    _fsspec_configured = True
+
+
+def get_fs(conf: Optional[configparser.ConfigParser] = None):
+    configure_fsspec()
+    conf = conf or get_config()
+    if conf['storage']['type'] == 's3':
+        fs = fsspec.filesystem("s3")
+    else:
+        fs = fsspec.filesystem("file")
+    return fs
diff --git a/src/embedding.py b/src/verdictnet/embedding.py
similarity index 69%
rename from src/embedding.py
rename to src/verdictnet/embedding.py
index 281c23f..2fd3a5f 100644
--- a/src/embedding.py
+++ b/src/verdictnet/embedding.py
@@ -1,26 +1,25 @@
-import argparse
 import configparser
-from typing import List, Union, Tuple, Optional
+from typing import List, Union, Optional
 
 from numpy import ndarray
-from sentence_transformers import SentenceTransformer
-from torch import Tensor
 
-from config import root_path, get_config
-from models.node import Node
+from verdictnet.config import root_path, get_config
+from verdictnet.models.node import Node
 
 
 class Embedding:
     def __init__(self, conf: Optional[configparser.ConfigParser] = None):
+        from sentence_transformers import SentenceTransformer
+
         self.conf = conf or get_config()
         # Load a pre-trained model
-        self.model = SentenceTransformer(  # Lightweight, fast model
+        self.model = SentenceTransformer(
             self.conf.get('embedding', 'model_name_or_path'),
-            cache_folder=root_path() / self.conf.get('embedding', 'cache')
+            cache_folder=self.conf.get('embedding', 'cache')
         )
 
     def embed_nodes(self, nodes: List[Node]) -> tuple[
-        List[Union[List[Tensor], ndarray, Tensor]],
+        List[Union[List, ndarray]],
         List[str],
         List[dict]
     ]:
@@ -43,5 +42,5 @@ def embed_nodes(self, nodes: List[Node]) -> tuple[
 
         return embeddings, documents, metadata
 
-    def embed_string(self, text: str) -> Tensor:
+    def embed_string(self, text: str):
         return self.model.encode(text).tolist()
diff --git a/src/verdictnet/etl.py b/src/verdictnet/etl.py
new file mode 100644
index 0000000..92660f1
--- /dev/null
+++ b/src/verdictnet/etl.py
@@ -0,0 +1,183 @@
+import os
+from configparser import ConfigParser
+from pathlib import Path
+from typing import List
+
+import requests
+from slugify import slugify
+from bs4 import BeautifulSoup
+
+from verdictnet.config import get_config, root_path, logging, get_fs
+from verdictnet.ingestion.documentspec import DocumentSpec
+from verdictnet.ingestion.paths import refined_path, raw_path, html_path
+from verdictnet.models.node import Node
+from verdictnet.ingestion.parsers.html_parser import parse
+from verdictnet.storage.hybrid_storage import HybridStorage
+from verdictnet.storage.transaction_manager import TransactionManager
+
+logger = logging.getLogger(__name__)
+
+
+def get_docspecs(path: Path = None) -> List[Path]:
+    """
+    Return all the docspecs in the resources folder
+    """
+    if path is None:
+        path = Path(__file__).parent / 'ingestion' / 'resources/'
+    filenames = []
+    for root, subdirs, files in os.walk(path):
+        for file in files:
+            if file.endswith('.json'):
+                filenames.append(Path(root + '/' + file))
+
+    if not filenames:
+        logger.warning(f"No document Spec files found in provided folder {path}")
+    else:
+        logger.info(f"Found {len(filenames)} document specs: %s", ",".join([str(f) for f in filenames]))
+
+    return filenames
+
+
+def get_files(path: Path, extension: str = None, subfolders=False) -> List[Path]:
+    """
+    Return all the files in the resources folder
+    """
+    filenames = []
+    for root, subdirs, files in os.walk(path):
+        for file in files:
+            if file.endswith(extension):
+                filenames.append(Path(root + '/' + file))
+        if not subfolders:
+            break
+
+    if not filenames:
+        print(f"No files found in provided folder {path}")
+
+    return filenames
+
+
+def download(docspec: DocumentSpec) -> str:
+    # Hacer la solicitud HTTP
+    response = requests.get(docspec.url)
+    if response.status_code != 200:
+        raise Exception(f"No se pudo acceder al texto consolidado. Código de estado: {response.status_code}")
+
+    return response.text
+
+
+def get_document_structure(text, docspec: DocumentSpec) -> List[Node]:
+    soup = BeautifulSoup(text, "html.parser")
+    tags = soup.findAll(docspec.tags)
+    parsed = parse(tags, docspec=docspec, levels=docspec.wraps or [docspec.head])
+    if len(parsed) > 1:
+        parsed = [Node(level=docspec.head, content=docspec.name, children=parsed)]
+    return parsed
+
+
+def download_doc(docspec: DocumentSpec, conf, force_download=False):
+    """
+    Download the document and save it to the raw folder in html format
+    """
+    fs = get_fs(conf)
+
+    slug_name = slugify(docspec.name)
+
+    target_path = raw_path() + f'{slug_name}.html'
+
+    # Download documents
+    if force_download or not os.path.exists(target_path):
+        logger.info(f"Downloading document `{docspec.name}`...")
+        text = download(docspec)
+
+        os.makedirs(os.path.dirname(target_path), exist_ok=True)
+        with fs.open(target_path, 'w') as file:
+            file.write(text)
+
+
+def refine(docspec, conf):
+    """
+    Take the document in html format and refine it to a json format
+    """
+    fs = get_fs(conf)
+
+    slug_name = slugify(docspec.name)
+
+    source_filename = f'{slug_name}.html'
+    soure_path = raw_path() + source_filename
+
+    with fs.open(soure_path, 'r') as file:
+        text = file.read()
+
+    target = refined_path() + f'{slug_name}.json'
+
+    main_node = get_document_structure(text, docspec=docspec)
+
+    main_node[0].save(target)
+    logger.info(f"Saved refined in '{target}'.")
+
+
+def render_html(docspec: DocumentSpec, conf: ConfigParser):
+    fs = get_fs(conf)
+
+    slug_name = slugify(docspec.name)
+    main_node_path = refined_path() + f'{slug_name}.json'
+    main_node = Node.load(main_node_path)
+
+    html_file = html_path() + f'{slug_name}.html'
+    with fs.open(html_file, 'w', encoding='utf-8') as file:
+        file.write(main_node.html(
+            preamble="""
+            <html lang="es"><head><meta charset="utf-8" /></head>
+            """
+        ))
+    logger.info(f"HTML saved to '{html_file}'.")
+
+
+def ingest(docspec: DocumentSpec, conf: ConfigParser):
+    slug_name = slugify(docspec.name)
+    main_node_path = refined_path() + f'{slug_name}.json'
+    main_node = Node.load(main_node_path)
+
+    storage = TransactionManager.get_transaction_manager(conf)
+    # all_nodes = main_node.get_all(level=docspec.embed_level)
+
+    storage.store_with_transaction(main_node)
+
+
+def clean():
+    print("This will empty both databases. Are you sure you want to continue? (y/n)")
+    response = input()
+    if response.lower() != 'y':
+        print("Aborted.")
+        return
+
+    conf = get_config()
+
+    store = HybridStorage.get_hybrid_storage(conf)
+    store.delete_all()
+    print(f"Cleared all databases.")
+
+
+def run(force_download=False, path=None):
+    conf = get_config()
+
+    # Load Docspecs
+    filenames = get_docspecs(path)
+    docspecs = [DocumentSpec.load(filename) for filename in filenames]
+
+    for docspec in docspecs:
+        # Download document
+        download_doc(docspec, conf, force_download)
+
+        # Refining documents
+        refine(docspec, conf)
+
+        # Render html
+        render_html(docspec, conf)
+
+        # Ingesting into vector database
+        ingest(docspec, conf)
+
+
+if __name__ == "__main__":
+    run()
diff --git a/src/frontend/__init__.py b/src/verdictnet/frontend/__init__.py
similarity index 100%
rename from src/frontend/__init__.py
rename to src/verdictnet/frontend/__init__.py
diff --git a/src/frontend/custom_logger.py b/src/verdictnet/frontend/custom_logger.py
similarity index 100%
rename from src/frontend/custom_logger.py
rename to src/verdictnet/frontend/custom_logger.py
diff --git a/src/frontend/paths.py b/src/verdictnet/frontend/paths.py
similarity index 100%
rename from src/frontend/paths.py
rename to src/verdictnet/frontend/paths.py
diff --git a/src/frontend/server/__init__.py b/src/verdictnet/frontend/server/__init__.py
similarity index 100%
rename from src/frontend/server/__init__.py
rename to src/verdictnet/frontend/server/__init__.py
diff --git a/src/frontend/server/app.py b/src/verdictnet/frontend/server/app.py
similarity index 100%
rename from src/frontend/server/app.py
rename to src/verdictnet/frontend/server/app.py
diff --git a/src/ingestion/__init__.py b/src/verdictnet/frontend/server/dto/__init__.py
similarity index 100%
rename from src/ingestion/__init__.py
rename to src/verdictnet/frontend/server/dto/__init__.py
diff --git a/src/frontend/server/dto/websocket.py b/src/verdictnet/frontend/server/dto/websocket.py
similarity index 100%
rename from src/frontend/server/dto/websocket.py
rename to src/verdictnet/frontend/server/dto/websocket.py
diff --git a/src/frontend/server/server.py b/src/verdictnet/frontend/server/server.py
similarity index 88%
rename from src/frontend/server/server.py
rename to src/verdictnet/frontend/server/server.py
index 0fa4907..3c667ad 100644
--- a/src/frontend/server/server.py
+++ b/src/verdictnet/frontend/server/server.py
@@ -9,14 +9,14 @@
 from starlette.staticfiles import StaticFiles
 from starlette.websockets import WebSocketDisconnect
 
-from config import get_config
-from etl import get_files
-from frontend import paths
-from ragagent import RAGAgent
-from frontend.server.websocket import Connection
-from frontend.server.dto.websocket import ConnectionId, DisplayDocuments
-from render.html import HTMLRenderer
-from storage.hybrid_storage import HybridStorage
+from verdictnet.config import get_config
+from verdictnet.etl import get_files
+from verdictnet.frontend import paths
+from verdictnet.ragagent import RAGAgent
+from verdictnet.frontend.server.websocket import Connection
+from verdictnet.frontend.server.dto.websocket import ConnectionId, DisplayDocuments
+from verdictnet.render.html import HTMLRenderer
+from verdictnet.storage.hybrid_storage import HybridStorage
 
 conf = get_config()
 
diff --git a/src/frontend/server/websocket.py b/src/verdictnet/frontend/server/websocket.py
similarity index 94%
rename from src/frontend/server/websocket.py
rename to src/verdictnet/frontend/server/websocket.py
index 3d3cc3b..7823c9d 100644
--- a/src/frontend/server/websocket.py
+++ b/src/verdictnet/frontend/server/websocket.py
@@ -5,9 +5,9 @@
 from PyPDF2 import PdfFileReader
 from starlette.websockets import WebSocket
 
-from frontend.paths import uploads
-from ragagent import RAGAgent
-from frontend.server.dto.websocket import WebSocketMessage, ChatQueryMessage, FileUploaded, ChatResponseMessage, \
+from verdictnet.frontend.paths import uploads
+from verdictnet.ragagent import RAGAgent
+from verdictnet.frontend.server.dto.websocket import WebSocketMessage, ChatQueryMessage, FileUploaded, ChatResponseMessage, \
     UnfoldNodes
 
 
diff --git a/src/frontend/static/css/document_tree.css b/src/verdictnet/frontend/static/css/document_tree.css
similarity index 100%
rename from src/frontend/static/css/document_tree.css
rename to src/verdictnet/frontend/static/css/document_tree.css
diff --git a/src/frontend/static/css/style.css b/src/verdictnet/frontend/static/css/style.css
similarity index 100%
rename from src/frontend/static/css/style.css
rename to src/verdictnet/frontend/static/css/style.css
diff --git a/src/frontend/static/js/main.js b/src/verdictnet/frontend/static/js/main.js
similarity index 100%
rename from src/frontend/static/js/main.js
rename to src/verdictnet/frontend/static/js/main.js
diff --git a/src/frontend/templates/index.html b/src/verdictnet/frontend/templates/index.html
similarity index 100%
rename from src/frontend/templates/index.html
rename to src/verdictnet/frontend/templates/index.html
diff --git a/src/ingestion/README.md b/src/verdictnet/ingestion/README.md
similarity index 98%
rename from src/ingestion/README.md
rename to src/verdictnet/ingestion/README.md
index 239f454..fef01c9 100644
--- a/src/ingestion/README.md
+++ b/src/verdictnet/ingestion/README.md
@@ -2,7 +2,7 @@
 
 There are currently two ingestion processes in place:
 ```sh
-$ semantic erl run
+$ verdictnet erl run
 ```
 ingests the documents specified in `src/ingestion/resources/`, namely:
 - Código Civil
diff --git a/src/ingestion/parsers/__init__.py b/src/verdictnet/ingestion/__init__.py
similarity index 100%
rename from src/ingestion/parsers/__init__.py
rename to src/verdictnet/ingestion/__init__.py
diff --git a/src/ingestion/documentspec.py b/src/verdictnet/ingestion/documentspec.py
similarity index 100%
rename from src/ingestion/documentspec.py
rename to src/verdictnet/ingestion/documentspec.py
diff --git a/src/ingestion/downloader.py b/src/verdictnet/ingestion/downloader.py
similarity index 96%
rename from src/ingestion/downloader.py
rename to src/verdictnet/ingestion/downloader.py
index 529de2a..82c7df3 100644
--- a/src/ingestion/downloader.py
+++ b/src/verdictnet/ingestion/downloader.py
@@ -9,11 +9,11 @@
 from bs4 import BeautifulSoup
 from tqdm import tqdm
 
-from ingestion.parsers.pdf_parser import extract_paragraphs
-from ingestion.paths import raw_path, refined_path, fsspec_walk
-from config import get_config, logging, get_fs
-from models.node import Node
-from storage.transaction_manager import TransactionManager
+from verdictnet.ingestion.parsers.pdf_parser import extract_paragraphs
+from verdictnet.ingestion.paths import raw_path, refined_path, fsspec_walk
+from verdictnet.config import get_config, logging, get_fs
+from verdictnet.models.node import Node
+from verdictnet.storage.transaction_manager import TransactionManager
 
 logger = logging.getLogger(__name__)
 
@@ -88,6 +88,7 @@ def create_session():
         "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:132.0) Gecko/20100101 Firefox/132.0"
     }
 
+    logger.info("Creating session: %s", f"{BASE_URL}/search/")
     response = session.get(f"{BASE_URL}/search/", headers=headers_dict)
 
     if response.status_code != 200:
@@ -307,7 +308,6 @@ def ingest_pdfs(date: datetime, transaction_manager: TransactionManager, force=F
     transaction_manager.store_with_transaction(nodes, parent_uuid=dataset_uuid)
 
 
-
 if __name__ == "__main__":
     start_date = datetime.today() - timedelta(days=20)
     end_date = datetime.today() - timedelta(days=1)
diff --git a/src/render/__init__.py b/src/verdictnet/ingestion/parsers/__init__.py
similarity index 100%
rename from src/render/__init__.py
rename to src/verdictnet/ingestion/parsers/__init__.py
diff --git a/src/ingestion/parsers/html_parser.py b/src/verdictnet/ingestion/parsers/html_parser.py
similarity index 96%
rename from src/ingestion/parsers/html_parser.py
rename to src/verdictnet/ingestion/parsers/html_parser.py
index 30db5f0..f152bc8 100644
--- a/src/ingestion/parsers/html_parser.py
+++ b/src/verdictnet/ingestion/parsers/html_parser.py
@@ -1,5 +1,5 @@
-from ingestion.documentspec import DocumentSpec
-from models.node import Node
+from verdictnet.ingestion.documentspec import DocumentSpec
+from verdictnet.models.node import Node
 
 
 def next_class(tags):
diff --git a/src/ingestion/parsers/pdf_parser.py b/src/verdictnet/ingestion/parsers/pdf_parser.py
similarity index 96%
rename from src/ingestion/parsers/pdf_parser.py
rename to src/verdictnet/ingestion/parsers/pdf_parser.py
index 5c69312..0876e1e 100644
--- a/src/ingestion/parsers/pdf_parser.py
+++ b/src/verdictnet/ingestion/parsers/pdf_parser.py
@@ -1,6 +1,6 @@
 import pdfplumber
 
-from models.node import Node
+from verdictnet.models.node import Node
 
 
 def extract_paragraphs(pdf_path) -> Node:
diff --git a/src/ingestion/paths.py b/src/verdictnet/ingestion/paths.py
similarity index 74%
rename from src/ingestion/paths.py
rename to src/verdictnet/ingestion/paths.py
index b0a4224..7d2e06f 100644
--- a/src/ingestion/paths.py
+++ b/src/verdictnet/ingestion/paths.py
@@ -2,7 +2,7 @@
 
 import fsspec
 
-from config import root_path, get_config, logging
+from verdictnet.config import root_path, get_config, logging
 
 logger = logging.getLogger(__name__)
 
@@ -43,3 +43,14 @@ def refined_path():
         return root_path() / conf['storage']['bucket'] / conf['storage']['refined']
     elif conf['storage']['type'] == 's3':
         return f"s3://{conf['storage']['bucket']}/{conf['storage']['refined']}"
+
+
+def html_path():
+    """
+    Return the path where we store refined objects as JSON files ready to be ingested
+    into the database
+    """
+    if conf['storage']['type'] == 'local':
+        return root_path() / conf['storage']['bucket'] / conf['storage']['html']
+    elif conf['storage']['type'] == 's3':
+        return f"s3://{conf['storage']['bucket']}/{conf['storage']['html']}"
diff --git a/src/ingestion/resources/codigo_civil.json b/src/verdictnet/ingestion/resources/codigo_civil.json
similarity index 100%
rename from src/ingestion/resources/codigo_civil.json
rename to src/verdictnet/ingestion/resources/codigo_civil.json
diff --git a/src/ingestion/resources/codigo_penal.json b/src/verdictnet/ingestion/resources/codigo_penal.json
similarity index 100%
rename from src/ingestion/resources/codigo_penal.json
rename to src/verdictnet/ingestion/resources/codigo_penal.json
diff --git a/src/verdictnet/models/__init__.py b/src/verdictnet/models/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/models/node.py b/src/verdictnet/models/node.py
similarity index 91%
rename from src/models/node.py
rename to src/verdictnet/models/node.py
index c1ee23a..dd2b24e 100644
--- a/src/models/node.py
+++ b/src/verdictnet/models/node.py
@@ -3,7 +3,7 @@
 from dataclasses import dataclass, field
 from typing import List
 
-from config import get_fs
+from verdictnet.config import get_fs
 
 
 class AutoIncrement:  # pylint: disable=too-few-public-methods
@@ -83,13 +83,16 @@ def save(self, path):
         with fs.open(path, 'w', encoding='utf8') as file:
             json.dump(self.json(), file, indent=4, ensure_ascii=False)
 
-    def load(self, path):
+    @classmethod
+    def load(cls, path):
         """
         Load the node from a file
         """
-        with open(path, 'r', encoding='utf8') as file:
-            data = json.load(file, ensure_ascii=False)
-        return Node(**data)
+        fs = get_fs()
+
+        with fs.open(path, 'r', encoding='utf8') as file:
+            data = json.load(file)
+        return Node.from_dict(data)
 
     @classmethod
     def from_dict(cls, data):
@@ -99,6 +102,7 @@ def from_dict(cls, data):
         return Node(
             id=data['id'],
             level=data['level'],
+            uuid=data['uuid'],
             content=data['content'],
             children=[cls.from_dict(child) for child in data['children']]
         )
diff --git a/src/query.py b/src/verdictnet/query.py
similarity index 90%
rename from src/query.py
rename to src/verdictnet/query.py
index 142c794..3763911 100644
--- a/src/query.py
+++ b/src/verdictnet/query.py
@@ -1,9 +1,9 @@
 import os
 import textwrap
 
-from render.plain_text import PlainTextRenderer
-from storage.chroma_storage import ChromaStorage
-from storage.hybrid_storage import HybridStorage
+from verdictnet.render.plain_text import PlainTextRenderer
+from verdictnet.storage.chroma_storage import ChromaStorage
+from verdictnet.storage.hybrid_storage import HybridStorage
 
 
 def query(q_string: str, n_results: int = 3):
diff --git a/src/ragagent.py b/src/verdictnet/ragagent.py
similarity index 84%
rename from src/ragagent.py
rename to src/verdictnet/ragagent.py
index 6afdb49..7bb6424 100644
--- a/src/ragagent.py
+++ b/src/verdictnet/ragagent.py
@@ -2,11 +2,9 @@
 import logging
 from typing import Optional, List
 
-import chromadb
-
-from models.node import Node
-from query import print_results
-from storage.hybrid_storage import HybridStorage
+from verdictnet.models.node import Node
+from verdictnet.query import print_results
+from verdictnet.storage.hybrid_storage import HybridStorage
 
 logging.basicConfig(level=logging.DEBUG)
 logger = logging.getLogger(__name__)
diff --git a/src/verdictnet/render/__init__.py b/src/verdictnet/render/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/render/html.py b/src/verdictnet/render/html.py
similarity index 90%
rename from src/render/html.py
rename to src/verdictnet/render/html.py
index 8e377e2..f6efb5f 100644
--- a/src/render/html.py
+++ b/src/verdictnet/render/html.py
@@ -1,5 +1,5 @@
-from models.node import Node
-from render.node_renderer import NodeRenderer
+from verdictnet.models.node import Node
+from verdictnet.render.node_renderer import NodeRenderer
 
 
 class HTMLRenderer(NodeRenderer):
diff --git a/src/render/node_renderer.py b/src/verdictnet/render/node_renderer.py
similarity index 85%
rename from src/render/node_renderer.py
rename to src/verdictnet/render/node_renderer.py
index 27166a4..4be823f 100644
--- a/src/render/node_renderer.py
+++ b/src/verdictnet/render/node_renderer.py
@@ -1,4 +1,4 @@
-from models.node import Node
+from verdictnet.models.node import Node
 
 
 class NodeRenderer:
diff --git a/src/render/plain_text.py b/src/verdictnet/render/plain_text.py
similarity index 86%
rename from src/render/plain_text.py
rename to src/verdictnet/render/plain_text.py
index e7498e4..55c2cbc 100644
--- a/src/render/plain_text.py
+++ b/src/verdictnet/render/plain_text.py
@@ -1,5 +1,5 @@
-from models.node import Node
-from render.node_renderer import NodeRenderer
+from verdictnet.models.node import Node
+from verdictnet.render.node_renderer import NodeRenderer
 
 
 class PlainTextRenderer(NodeRenderer):
diff --git a/src/verdictnet/storage/__init__.py b/src/verdictnet/storage/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/storage/adapters.py b/src/verdictnet/storage/adapters.py
similarity index 99%
rename from src/storage/adapters.py
rename to src/verdictnet/storage/adapters.py
index 3b5686b..1d8ecd2 100644
--- a/src/storage/adapters.py
+++ b/src/verdictnet/storage/adapters.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 
-from models.node import Node
+from verdictnet.models.node import Node
 
 
 class NodeAdapter:
diff --git a/src/storage/chroma_storage.py b/src/verdictnet/storage/chroma_storage.py
similarity index 95%
rename from src/storage/chroma_storage.py
rename to src/verdictnet/storage/chroma_storage.py
index 159668a..7638b99 100644
--- a/src/storage/chroma_storage.py
+++ b/src/verdictnet/storage/chroma_storage.py
@@ -4,11 +4,11 @@
 import chromadb
 import neo4j
 
-import config
-from embedding import Embedding
-from models.node import Node
+from verdictnet import config
+from verdictnet.embedding import Embedding
+from verdictnet.models.node import Node
 
-from config import logging
+from verdictnet.config import logging
 
 logger = logging.getLogger(__name__)
 
diff --git a/src/storage/graph_storage.py b/src/verdictnet/storage/graph_storage.py
similarity index 98%
rename from src/storage/graph_storage.py
rename to src/verdictnet/storage/graph_storage.py
index 74fa688..9876423 100644
--- a/src/storage/graph_storage.py
+++ b/src/verdictnet/storage/graph_storage.py
@@ -6,9 +6,9 @@
 import numpy as np
 from neo4j import Driver, GraphDatabase
 
-import config
-from models.node import Node
-from storage.adapters import NodeAdapter
+from verdictnet import config
+from verdictnet.models.node import Node
+from verdictnet.storage.adapters import NodeAdapter
 
 logger = config.logging.getLogger(__name__)
 logger.setLevel(logging.DEBUG)
diff --git a/src/storage/hybrid_storage.py b/src/verdictnet/storage/hybrid_storage.py
similarity index 89%
rename from src/storage/hybrid_storage.py
rename to src/verdictnet/storage/hybrid_storage.py
index 0e26417..0196ca2 100644
--- a/src/storage/hybrid_storage.py
+++ b/src/verdictnet/storage/hybrid_storage.py
@@ -1,10 +1,10 @@
 import configparser
 from typing import Optional, List
 
-import config
-from models.node import Node
-from storage.chroma_storage import ChromaStorage
-from storage.graph_storage import GraphStorage
+from verdictnet import config
+from verdictnet.models.node import Node
+from verdictnet.storage.chroma_storage import ChromaStorage
+from verdictnet.storage.graph_storage import GraphStorage
 
 
 class HybridStorage:
@@ -39,7 +39,7 @@ def flatten_hierarchy(self, root_node: Node) -> List[Node]:
 
     def query(self, query_string: str, n_results: Optional[int] = None):
         """
-        Perform a semantic search in ChromaDB and return the results.
+        Perform a verdictnet search in ChromaDB and return the results.
         """
         return self.chroma_storage.query(query_string, n_results)
 
diff --git a/src/storage/transaction_manager.py b/src/verdictnet/storage/transaction_manager.py
similarity index 95%
rename from src/storage/transaction_manager.py
rename to src/verdictnet/storage/transaction_manager.py
index 3fdd7b3..6bdd9a1 100644
--- a/src/storage/transaction_manager.py
+++ b/src/verdictnet/storage/transaction_manager.py
@@ -1,9 +1,9 @@
 import configparser
 from typing import Optional, List
 
-from models.node import Node
-from storage.hybrid_storage import HybridStorage
-from config import logging, get_config
+from verdictnet.models.node import Node
+from verdictnet.storage.hybrid_storage import HybridStorage
+from verdictnet.config import logging, get_config
 
 logger = logging.getLogger(__name__)
 
diff --git a/tests/conftest.py b/tests/conftest.py
index 7b527d6..50b9128 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,24 +1,27 @@
+import configparser
 from pathlib import Path
+from unittest.mock import patch
 
 from bs4 import BeautifulSoup
 from pytest import fixture
 
-from config import root_path
-from ingestion.documentspec import DocumentSpec
-from ingestion.parsers.html_parser import parse
 
-resources = Path(__file__).parent / "resources"
+@fixture(autouse=True)
+def mock_config():
+    def mock_get_config():
+        config = configparser.ConfigParser()
+        success = config.read(['resources/config.ini', 'tests/resources/config.ini'])
+        assert success is not [], "Could not read mock config file"
+        return config
 
+    with patch('verdictnet.config.get_config', mock_get_config):
+        yield
 
-@fixture
-def static_files():
-    return root_path() / "src/frontend/static/css"
 
+from verdictnet.ingestion.documentspec import DocumentSpec
+from verdictnet.ingestion.parsers.html_parser import parse
 
-@fixture
-def css_code(static_files):
-    with open(static_files / 'document_tree.css', 'r') as css_file:
-        yield css_file.read()
+resources = Path(__file__).parent / "resources"
 
 
 @fixture
diff --git a/tests/ingestion/test_documentspec.py b/tests/ingestion/test_documentspec.py
index f3fc49a..b954bb3 100644
--- a/tests/ingestion/test_documentspec.py
+++ b/tests/ingestion/test_documentspec.py
@@ -3,7 +3,7 @@
 from pytest import fixture
 
 from conftest import resources
-from ingestion.documentspec import DocumentSpec
+from verdictnet.ingestion.documentspec import DocumentSpec
 
 
 @fixture
diff --git a/tests/ingestion/test_ingest.py b/tests/ingestion/test_ingest.py
index d8610a1..3b4e71c 100644
--- a/tests/ingestion/test_ingest.py
+++ b/tests/ingestion/test_ingest.py
@@ -2,7 +2,7 @@
 from unittest.mock import patch
 import pytest
 
-from etl import get_docspecs
+from verdictnet.etl import get_docspecs
 
 
 @pytest.fixture
diff --git a/tests/ingestion/test_parser.py b/tests/ingestion/test_parser.py
index eb28839..260db44 100644
--- a/tests/ingestion/test_parser.py
+++ b/tests/ingestion/test_parser.py
@@ -2,7 +2,7 @@
 from bs4 import BeautifulSoup
 
 from conftest import codigo_civil_spec
-from ingestion.parsers.html_parser import parse
+from verdictnet.ingestion.parsers.html_parser import parse
 
 
 @fixture(scope='class')
diff --git a/tests/ingestion/test_pdf_parser.py b/tests/ingestion/test_pdf_parser.py
index b6feae1..fbc1a0e 100644
--- a/tests/ingestion/test_pdf_parser.py
+++ b/tests/ingestion/test_pdf_parser.py
@@ -2,8 +2,8 @@
 import os
 
 from conftest import resources
-from models.node import Node
-from ingestion.parsers.pdf_parser import extract_paragraphs
+from verdictnet.models.node import Node
+from verdictnet.ingestion.parsers.pdf_parser import extract_paragraphs
 
 
 @pytest.fixture
diff --git a/tests/models/test_node.py b/tests/models/test_node.py
new file mode 100644
index 0000000..1246596
--- /dev/null
+++ b/tests/models/test_node.py
@@ -0,0 +1,128 @@
+import json
+import tempfile
+import webbrowser
+
+from _pytest.fixtures import fixture
+from bs4 import BeautifulSoup
+
+
+from verdictnet.models.node import Node
+
+
+@fixture
+def static_files():
+    from verdictnet.config import root_path
+    return root_path() / "frontend/static/css"
+
+
+@fixture
+def css_code(static_files):
+    with open(static_files / 'document_tree.css', 'r') as css_file:
+        yield css_file.read()
+
+
+class TestNode:
+    def test_render(self, node_titulo):
+        text = node_titulo.render()
+        assert "      2. Carecerán de validez las disposiciones que  contradigan otra de rango superior." in text
+
+    def test_html(self, node_titulo, css_code):
+        html = node_titulo.html()
+
+        # Insert the CSS link into the HTML content
+        html_with_css = f'<html><head><style>{css_code}</style></head><body>{html}</body></html>'
+
+        # open html_text in a browser to see the result
+        with tempfile.NamedTemporaryFile('w', delete=False, suffix='.html') as f:
+            url = 'file://' + f.name
+            f.write(html_with_css)
+
+        # ensure html is correctly formatted
+        try:
+            assert BeautifulSoup(html_with_css, 'html.parser')
+        except Exception as e:
+            webbrowser.open(url)
+            raise e
+
+    def test_save(self):
+        node = Node(level="1", content="Test Node")
+        with tempfile.NamedTemporaryFile('w', delete=False, suffix='.json') as f:
+            path = f.name
+        node.save(path)
+
+        with open(path, 'r', encoding='utf8') as file:
+            data = json.load(file)
+
+        assert data['level'] == "1"
+        assert data['content'] == "Test Node"
+        assert 'uuid' in data
+
+    def test_load(self):
+        node_data = {
+            "id": 1,
+            "uuid": "1234",
+            "level": "1",
+            "content": "Test Node",
+            "children": []
+        }
+        with tempfile.NamedTemporaryFile('w', delete=False, suffix='.json') as f:
+            path = f.name
+            json.dump(node_data, f)
+
+        loaded_node = Node.load(path)
+
+        assert loaded_node.level == "1"
+        assert loaded_node.content == "Test Node"
+        assert loaded_node.uuid == "1234"
+        assert loaded_node.children == []
+
+    def test_save_with_children(self):
+        child_node = Node(level="2", content="Child Node")
+        parent_node = Node(level="1", content="Parent Node", children=[child_node])
+
+        with tempfile.NamedTemporaryFile('w', delete=False, suffix='.json') as f:
+            path = f.name
+        parent_node.save(path)
+
+        with open(path, 'r', encoding='utf8') as file:
+            data = json.load(file)
+
+        assert data['level'] == "1"
+        assert data['content'] == "Parent Node"
+        assert 'uuid' in data
+        assert len(data['children']) == 1
+        assert data['children'][0]['level'] == "2"
+        assert data['children'][0]['content'] == "Child Node"
+        assert 'uuid' in data['children'][0]
+
+    def test_load_with_children(self):
+        node_data = {
+            "id": 1,
+            "uuid": "1234",
+            "level": "1",
+            "content": "Parent Node",
+            "children": [
+                {
+                    "id": 2,
+                    "uuid": "5678",
+                    "level": "2",
+                    "content": "Child Node",
+                    "children": []
+                }
+            ]
+        }
+
+        with tempfile.NamedTemporaryFile('w', delete=False, suffix='.json') as f:
+            path = f.name
+            json.dump(node_data, f)
+
+        loaded_node = Node.load(path)
+
+        assert loaded_node.level == "1"
+        assert loaded_node.content == "Parent Node"
+        assert loaded_node.uuid == "1234"
+        assert len(loaded_node.children) == 1
+        assert loaded_node.children[0].level == "2"
+        assert loaded_node.children[0].content == "Child Node"
+        assert loaded_node.children[0].uuid == "5678"
+        assert loaded_node.children[0].children == []
diff --git a/tests/resources/config.ini b/tests/resources/config.ini
new file mode 100644
index 0000000..5782e55
--- /dev/null
+++ b/tests/resources/config.ini
@@ -0,0 +1,30 @@
+[storage]
+# s3 or file
+type: file
+bucket: legal
+collection: legal-database
+raw: datalake/raw/
+refined: datalake/refined/
+html: datalake/html/
+
+[chroma]
+type: http
+host: localhost
+port: 8000
+
+[neo4j]
+url: bolt://localhost:7687
+user: none
+password: nopwd
+
+[embedding]
+model_name_or_path: paraphrase-mpnet-base-v2
+cache: cache/
+
+[rag]
+n_results: 5
+
+[s3]
+key = none
+secret = nopwd
+endpoint_url = http://localhost:9000
\ No newline at end of file
diff --git a/tests/storage/conftest.py b/tests/storage/conftest.py
index d984c8f..97e59dc 100644
--- a/tests/storage/conftest.py
+++ b/tests/storage/conftest.py
@@ -1,7 +1,6 @@
 import pytest
 
-from storage.graph_storage import GraphStorage
-from storage.hybrid_storage import HybridStorage
+from verdictnet.storage.hybrid_storage import HybridStorage
 
 
 @pytest.fixture
diff --git a/tests/storage/test_adapter.py b/tests/storage/test_adapter.py
index 5e45abe..099286a 100644
--- a/tests/storage/test_adapter.py
+++ b/tests/storage/test_adapter.py
@@ -1,5 +1,5 @@
-from models.node import Node
-from storage.adapters import NodeAdapter
+from verdictnet.models.node import Node
+from verdictnet.storage.adapters import NodeAdapter
 
 
 def test_to_neo4j_with_relationships_single_node():
diff --git a/tests/storage/test_chroma_storage.py b/tests/storage/test_chroma_storage.py
index cc9685e..46c805d 100644
--- a/tests/storage/test_chroma_storage.py
+++ b/tests/storage/test_chroma_storage.py
@@ -1,9 +1,9 @@
 import chromadb
 import pytest
 from unittest.mock import MagicMock
-from models.node import Node
-from storage.chroma_storage import ChromaStorage
-from embedding import Embedding
+from verdictnet.models.node import Node
+from verdictnet.storage.chroma_storage import ChromaStorage
+from verdictnet.embedding import Embedding
 
 
 @pytest.fixture
diff --git a/tests/storage/test_graph_storage.py b/tests/storage/test_graph_storage.py
index c2d275f..673c0bd 100644
--- a/tests/storage/test_graph_storage.py
+++ b/tests/storage/test_graph_storage.py
@@ -5,8 +5,8 @@
 
 from neo4j import Result
 
-from models.node import Node
-from storage.graph_storage import GraphStorage
+from verdictnet.models.node import Node
+from verdictnet.storage.graph_storage import GraphStorage
 
 
 @pytest.fixture
diff --git a/tests/storage/test_hybrid_storage.py b/tests/storage/test_hybrid_storage.py
index 095351f..c7e3100 100644
--- a/tests/storage/test_hybrid_storage.py
+++ b/tests/storage/test_hybrid_storage.py
@@ -1,7 +1,7 @@
 import pytest
 from unittest.mock import MagicMock
-from models.node import Node
-from storage.hybrid_storage import HybridStorage
+from verdictnet.models.node import Node
+from verdictnet.storage.hybrid_storage import HybridStorage
 
 
 @pytest.fixture
diff --git a/tests/storage/test_transaction_manager.py b/tests/storage/test_transaction_manager.py
index 51adc15..78d19cc 100644
--- a/tests/storage/test_transaction_manager.py
+++ b/tests/storage/test_transaction_manager.py
@@ -1,8 +1,8 @@
 import pytest
 from unittest.mock import MagicMock, call
-from models.node import Node
-from storage.transaction_manager import TransactionManager
-from storage.hybrid_storage import HybridStorage
+from verdictnet.models.node import Node
+from verdictnet.storage.transaction_manager import TransactionManager
+from verdictnet.storage.hybrid_storage import HybridStorage
 
 
 @pytest.fixture
diff --git a/tests/test_node.py b/tests/test_node.py
deleted file mode 100644
index da6614a..0000000
--- a/tests/test_node.py
+++ /dev/null
@@ -1,29 +0,0 @@
-import tempfile
-import webbrowser
-
-from bs4 import BeautifulSoup
-
-
-class TestNode:
-    def test_render(self, node_titulo):
-        text = node_titulo.render()
-        assert "      2. Carecerán de validez las disposiciones que  contradigan otra de rango superior." in text
-
-    def test_html(self, node_titulo, css_code):
-        html = node_titulo.html()
-
-        # Insert the CSS link into the HTML content
-        html_with_css = f'<html><head><style>{css_code}</style></head><body>{html}</body></html>'
-
-        # open html_text in a browser to see the result
-        with tempfile.NamedTemporaryFile('w', delete=False, suffix='.html') as f:
-            url = 'file://' + f.name
-            f.write(html_with_css)
-
-        # ensure html is correctly formatted
-        try:
-            assert BeautifulSoup(html_with_css, 'html.parser')
-        except Exception as e:
-            webbrowser.open(url)
-            raise e
-