datagato · swapnilashtekar · Feb 16, 2026
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,53 @@
+# syntax=docker/dockerfile:1.6
+
+############################
+# Builder: install deps with uv
+############################
+FROM python:3.11-slim AS builder
+
+# System deps (add more if you need: git, build-essential, etc.)
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    ca-certificates \
+    git \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+
+
+WORKDIR /app
+
+# Install uv
+# (uv is a single binary; pip install also works, but this is usually faster/cleaner)
+RUN pip install --no-cache-dir uv
+
+# Copy only dependency metadata first for better layer caching
+COPY pyproject.toml ./
+# If you have a lockfile, copy it too:
+COPY uv.lock ./
+
+# Create a virtualenv and sync deps from lockfile
+# --frozen: fail if lockfile doesn't match pyproject
+RUN uv venv /opt/venv && \
+    . /opt/venv/bin/activate && \
+    uv sync --active --frozen --no-dev
+
+############################
+# Runtime: copy venv + code
+############################
+FROM python:3.11-slim AS runtime
+
+ENV VIRTUAL_ENV=/opt/venv
+ENV PATH="/opt/venv/bin:$PATH"
+ENV PYTHONUNBUFFERED=1
+WORKDIR /app
+
+# Copy venv from builder
+COPY --from=builder /opt/venv /opt/venv
+
+# Copy repo contents
+COPY . /app
+
+# Make sure imports work from repo root
+ENV PYTHONPATH=/app
+
+# Default help
+CMD ["python", "-c", "print('Container ready. Try: python scripts/measure_similarity.py')"]
diff --git a/README.md b/README.md
@@ -203,3 +203,50 @@ The following table provides an overview of all scripts in the `scripts/` folder
 6. **ResNet** - He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep Residual Learning for Image Recognition. *Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)*, 770-778. https://doi.org/10.1109/CVPR.2016.90
 
 7. **Foldseek** - van Kempen, M., Kim, S.S., Tumescheit, C., Mirdita, M., Lee, J., Gilchrist, C.L.M., Söding, J., & Steinegger, M. (2024). Fast and accurate protein structure search with Foldseek. *Nature Biotechnology*, 42, 243–246. https://doi.org/10.1038/s41587-023-01773-0
+
+
+# Docker Guide (uv based)
+
+This repo is dockerized to run scripts under `/scripts` (e.g. measure_similarity.py, create_proteograms.py etc.)
+
+## Prerequisites
+- Docker installed
+- `uv.lock` present in repo root (recommended for reproducible builds)
+
+---
+
+## Build the Docker image
+
+From the repo root (the folder that contains `Dockerfile`, `pyproject.toml`, `uv.lock`):
+
+```
+sudo docker build -t proteogram:dev .
+```
+
+## Verify the image
+
+Verify Python and package import
+```
+docker run --rm proteogram:dev python -c "import proteogram; print('import ok')"
+```
+
+Verify scripts inside the container
+```
+docker run --rm proteogram:dev python scripts/measure_similarity.py
+```
+
+Interactively login to container and inspect the contents to see expected files.
+```
+docker run --rm -it proteogram:dev bash
+```
+
+### Mount the datasets 
+Note: `-v` bind mounts are applied **only at container run time**. The data is
+not stored in the image and will not be present unless you start the container
+with the `-v` flag.
+```
+sudo docker run --rm -it \
+  -v "$(pwd)/scripts/data/pdbstyle-2.08:/app/scripts/data/pdbstyle-2.08" \
+  proteogram:dev \
+  bash
+```
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,38 +1,28 @@
 [project]
 name = "proteogram"
-version = "0.1.0"
-description = "Protein structure to image - manipulation and analysis."
+version = "0.0.3"
+description = "Protein structure to image manipulation and analysis."
 requires-python = ">=3.11"
-dependencies = [
-    "biopython~=1.8",
-    "kmeans-pytorch~=0.3",
-    "matplotlib~=3.10",
-    "MDAnalysis[analysis,extra_formats,parallel]==2.10.0",
-    "numpy~=1.26",
-    "openmm~=8.4",
-    "pandas~=3.0",
-    "pdbfixer @ git+https://github.com/openmm/pdbfixer@94cfa4c",
-    "pillow~=12.1",
-    "torch~=2.2",
-    "torchvision~=0.17",
-    "tqdm~=4.67",
-]
+dynamic = ["dependencies"]
+
+[tool.setuptools.dynamic]
+dependencies = {file = ["requirements.txt"]}
 
 [project.optional-dependencies]
+cuda12 = [
+  "nvidia-cuda-nvcc-cu12==12.9.86",
+  "openmm-cuda-12==8.4.0.post2"
+]
 test = ["pytest", "pytest-cov"]
 notebook = [
   "jupyterlab>=4.2.5",
   "nglview>=3.1.4",
   "rcsbsearchapi>=1.5.1"
 ]
-cuda12 = [
-  "nvidia-cuda-nvcc-cu12",
-  "openmm-cuda-12~=8.4"
-]
 
 [tool.setuptools.packages.find]
 # Exclude patterns should match the full package name
-exclude = ["assets","notebooks", "scripts", "tmp"]
+exclude = ["assets", "paper", "notebooks", "scripts", "tmp"] 
 
 [build-system]
 requires = ["setuptools>=61.0"]

diff --git a/pyproject_poetry.toml b/pyproject_poetry.toml
@@ -0,0 +1,29 @@
+[tool.poetry]
+name = "proteogram"
+version = "0.0.2"
+description = "Protein structure to image manipulation and analysis."
+authors = ["Micheleen Harris"]
+license = "LICENSE"
+readme = "README.md"
+
+[tool.poetry.dependencies]
+python = "^3.11"
+nglview = "^3.1.2"
+biopython = "^1.8"
+pandas = "^2.2.2"
+matplotlib = "^3.6"
+rcsbsearchapi = "1.5.1"
+ase = "3.23.0"
+numpy = "^1"
+torch = "^2.2"
+torchvision = "^0.17"
+torchsummary = "^1.5"
+kmeans-pytorch = "^0.3"
+pyrotein = { git = "https://github.com/carbonscott/pyrotein.git", branch = "main" }
+
+[tool.poetry.dev-dependencies]
+jupyterlab = "4.2.5"
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"