Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# syntax=docker/dockerfile:1.6

############################
# Builder: install deps with uv
############################
FROM python:3.11-slim AS builder

# System deps (add more if you need: git, build-essential, etc.)
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates \
git \
build-essential \
&& rm -rf /var/lib/apt/lists/*


WORKDIR /app

# Install uv
# (uv is a single binary; pip install also works, but this is usually faster/cleaner)
RUN pip install --no-cache-dir uv

# Copy only dependency metadata first for better layer caching
COPY pyproject.toml ./
# If you have a lockfile, copy it too:
COPY uv.lock ./

# Create a virtualenv and sync deps from lockfile
# --frozen: fail if lockfile doesn't match pyproject
RUN uv venv /opt/venv && \
. /opt/venv/bin/activate && \
uv sync --active --frozen --no-dev

############################
# Runtime: copy venv + code
############################
FROM python:3.11-slim AS runtime

ENV VIRTUAL_ENV=/opt/venv
ENV PATH="/opt/venv/bin:$PATH"
ENV PYTHONUNBUFFERED=1
WORKDIR /app

# Copy venv from builder
COPY --from=builder /opt/venv /opt/venv

# Copy repo contents
COPY . /app

# Make sure imports work from repo root
ENV PYTHONPATH=/app

# Default help
CMD ["python", "-c", "print('Container ready. Try: python scripts/measure_similarity.py')"]
47 changes: 47 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -203,3 +203,50 @@ The following table provides an overview of all scripts in the `scripts/` folder
6. **ResNet** - He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep Residual Learning for Image Recognition. *Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)*, 770-778. https://doi.org/10.1109/CVPR.2016.90

7. **Foldseek** - van Kempen, M., Kim, S.S., Tumescheit, C., Mirdita, M., Lee, J., Gilchrist, C.L.M., Söding, J., & Steinegger, M. (2024). Fast and accurate protein structure search with Foldseek. *Nature Biotechnology*, 42, 243–246. https://doi.org/10.1038/s41587-023-01773-0


# Docker Guide (uv based)

This repo is dockerized to run scripts under `/scripts` (e.g. measure_similarity.py, create_proteograms.py etc.)

## Prerequisites
- Docker installed
- `uv.lock` present in repo root (recommended for reproducible builds)

---

## Build the Docker image

From the repo root (the folder that contains `Dockerfile`, `pyproject.toml`, `uv.lock`):

```
sudo docker build -t proteogram:dev .
```

## Verify the image

Verify Python and package import
```
docker run --rm proteogram:dev python -c "import proteogram; print('import ok')"
```

Verify scripts inside the container
```
docker run --rm proteogram:dev python scripts/measure_similarity.py
```

Interactively login to container and inspect the contents to see expected files.
```
docker run --rm -it proteogram:dev bash
```

### Mount the datasets
Note: `-v` bind mounts are applied **only at container run time**. The data is
not stored in the image and will not be present unless you start the container
with the `-v` flag.
```
sudo docker run --rm -it \
-v "$(pwd)/scripts/data/pdbstyle-2.08:/app/scripts/data/pdbstyle-2.08" \
proteogram:dev \
bash
```
32 changes: 11 additions & 21 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,38 +1,28 @@
[project]
name = "proteogram"
version = "0.1.0"
description = "Protein structure to image - manipulation and analysis."
version = "0.0.3"
description = "Protein structure to image manipulation and analysis."
requires-python = ">=3.11"
dependencies = [
"biopython~=1.8",
"kmeans-pytorch~=0.3",
"matplotlib~=3.10",
"MDAnalysis[analysis,extra_formats,parallel]==2.10.0",
"numpy~=1.26",
"openmm~=8.4",
"pandas~=3.0",
"pdbfixer @ git+https://github.com/openmm/pdbfixer@94cfa4c",
"pillow~=12.1",
"torch~=2.2",
"torchvision~=0.17",
"tqdm~=4.67",
]
dynamic = ["dependencies"]

[tool.setuptools.dynamic]
dependencies = {file = ["requirements.txt"]}

[project.optional-dependencies]
cuda12 = [
"nvidia-cuda-nvcc-cu12==12.9.86",
"openmm-cuda-12==8.4.0.post2"
]
test = ["pytest", "pytest-cov"]
notebook = [
"jupyterlab>=4.2.5",
"nglview>=3.1.4",
"rcsbsearchapi>=1.5.1"
]
cuda12 = [
"nvidia-cuda-nvcc-cu12",
"openmm-cuda-12~=8.4"
]

[tool.setuptools.packages.find]
# Exclude patterns should match the full package name
exclude = ["assets","notebooks", "scripts", "tmp"]
exclude = ["assets", "paper", "notebooks", "scripts", "tmp"]

[build-system]
requires = ["setuptools>=61.0"]
Expand Down
29 changes: 29 additions & 0 deletions pyproject_poetry.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
[tool.poetry]
name = "proteogram"
version = "0.0.2"
description = "Protein structure to image manipulation and analysis."
authors = ["Micheleen Harris"]
license = "LICENSE"
readme = "README.md"

[tool.poetry.dependencies]
python = "^3.11"
nglview = "^3.1.2"
biopython = "^1.8"
pandas = "^2.2.2"
matplotlib = "^3.6"
rcsbsearchapi = "1.5.1"
ase = "3.23.0"
numpy = "^1"
torch = "^2.2"
torchvision = "^0.17"
torchsummary = "^1.5"
kmeans-pytorch = "^0.3"
pyrotein = { git = "https://github.com/carbonscott/pyrotein.git", branch = "main" }

[tool.poetry.dev-dependencies]
jupyterlab = "4.2.5"

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
Loading