From 4d77f8e77d592cc87a9f4048219864f6f9683633 Mon Sep 17 00:00:00 2001 From: Zoey Zhang Date: Tue, 16 Jun 2026 15:53:57 -0700 Subject: [PATCH 1/5] remove all final mentions of bionemo framework2 Signed-off-by: Zoey Zhang --- .github/ISSUE_TEMPLATE/bug-report.yml | 4 +- .github/ISSUE_TEMPLATE/feature-request.yml | 2 +- .github/nv-slack-bot.yaml | 8 +- docs/docs/SUMMARY.md | 2 +- docs/docs/main/about/SUMMARY.md | 2 - docs/docs/main/about/background/SUMMARY.md | 2 - .../about/background/megatron_datasets.md | 80 ---- docs/docs/main/about/background/nemo2.md | 178 -------- docs/docs/main/about/releasenotes-fw.md | 432 ------------------ .../Writing Documentation/mkdocs.md | 2 +- docs/docs/main/datasets/index.md | 6 +- docs/docs/main/getting-started/pre-reqs.md | 8 +- docs/docs/main/index.md | 21 - docs/docs/models/ESM-2/index.md | 10 +- docs/docs/models/evo2.md | 2 +- .../examples/evo2_gfmbench_recipe.ipynb | 4 +- 16 files changed, 24 insertions(+), 739 deletions(-) delete mode 100644 docs/docs/main/about/background/SUMMARY.md delete mode 100644 docs/docs/main/about/background/megatron_datasets.md delete mode 100644 docs/docs/main/about/background/nemo2.md delete mode 100644 docs/docs/main/about/releasenotes-fw.md delete mode 100644 docs/docs/main/index.md diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml index bda7707e31..14dff2d3b5 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.yml +++ b/.github/ISSUE_TEMPLATE/bug-report.yml @@ -13,7 +13,7 @@ body: - type: input id: version attributes: - label: BioNeMo Framework Version + label: BioNeMo Recipes Version description: | **What version or commit hash of the framework are you using?** @@ -89,7 +89,7 @@ body: id: docker-image-info attributes: label: Docker Image - description: If the issue occurred in a container, provide the docker image name. Visit [BioNeMo Framework NGC website](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/clara/containers/bionemo-framework/tags) for available images. + description: If the issue occurred in a container, provide the docker image name. Visit [BioNeMo Recipes NGC website](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/clara/containers/bionemo-framework/tags) for available images. placeholder: e.g., nvcr.io/nvidia/clara/bionemo-framework:2.2 validations: required: false diff --git a/.github/ISSUE_TEMPLATE/feature-request.yml b/.github/ISSUE_TEMPLATE/feature-request.yml index 629af53fb0..f251fcf3f5 100644 --- a/.github/ISSUE_TEMPLATE/feature-request.yml +++ b/.github/ISSUE_TEMPLATE/feature-request.yml @@ -17,7 +17,7 @@ body: - type: input id: version attributes: - label: BioNeMo Framework Version + label: BioNeMo Recipes Version description: What version or commit hash of the framework are you using? Please, specify a commit hash or version tag. Do not use 'latest', 'ToT' or 'nightly' as a reference. placeholder: commit-hash or version tag, ie v1.2.3. diff --git a/.github/nv-slack-bot.yaml b/.github/nv-slack-bot.yaml index e103f928b5..eb97ffd5f1 100644 --- a/.github/nv-slack-bot.yaml +++ b/.github/nv-slack-bot.yaml @@ -1,8 +1,8 @@ $schema: https://public.gha-runners.nvidia.com/nv-slack-bot/schemas/config-v1.json enabled: true notifications: - # Send a slack notification when BioNeMo Framework nightly CI fails - - name: "BioNeMo Framework Nightly CI Failed" + # Send a slack notification when BioNeMo Recipes nightly CI fails + - name: "BioNeMo Recipes Nightly CI Failed" event: workflow_run slack: nvidia: @@ -10,11 +10,11 @@ notifications: - id: C0832RNTR09 # bionemo-fw-ci-alerts match: > workflow_run.conclusion = "failure" - and workflow_run.name = "BioNeMo Framework CI" + and workflow_run.name = "BioNeMo Recipes CI" and workflow_run.event = "schedule" message: body: | - <{{url}}|BioNeMo Framework nightly CI "{{name}}"> failed + <{{url}}|BioNeMo Recipes nightly CI "{{name}}"> failed vars: url: workflow_run.html_url name: workflow_run.name diff --git a/docs/docs/SUMMARY.md b/docs/docs/SUMMARY.md index fa2d566f7b..c19dc6ad74 100644 --- a/docs/docs/SUMMARY.md +++ b/docs/docs/SUMMARY.md @@ -1,3 +1,3 @@ - [Home](index.md) -- [BioNeMo Framework](main/) +- [BioNeMo Recipes](main/) - [Models](models/) diff --git a/docs/docs/main/about/SUMMARY.md b/docs/docs/main/about/SUMMARY.md index 7bbe5b1e47..630d441870 100644 --- a/docs/docs/main/about/SUMMARY.md +++ b/docs/docs/main/about/SUMMARY.md @@ -1,3 +1 @@ - [Overview](overview.md) -- [Background](background/) -- [Release Notes](releasenotes-fw.md) diff --git a/docs/docs/main/about/background/SUMMARY.md b/docs/docs/main/about/background/SUMMARY.md deleted file mode 100644 index ae7c154ab4..0000000000 --- a/docs/docs/main/about/background/SUMMARY.md +++ /dev/null @@ -1,2 +0,0 @@ -- [NeMo2](nemo2.md) -- [Megatron Dataset Considerations](megatron_datasets.md) diff --git a/docs/docs/main/about/background/megatron_datasets.md b/docs/docs/main/about/background/megatron_datasets.md deleted file mode 100644 index d63cb7756c..0000000000 --- a/docs/docs/main/about/background/megatron_datasets.md +++ /dev/null @@ -1,80 +0,0 @@ -# Writing Megatron-LM Compatible Datamodules - -[Megatron-LM](https://github.com/NVIDIA/Megatron-LM) relies on determinism in the training dataset classes to ensure -that input tensors are initialized correctly across model-parallel ranks (see [NeMo2 Parallelism](./nemo2.md)). As a -consequence, ensure that the new dataset classes preserve the required determinism. Common operations such as data -augmentation and masking can cause `dataset[i]` to return random results for a given index, breaking this megatron -contract. - -## Multi-Epoch Training - -One training regime where this limitation is most apparent is multi-epoch training, where standard training recipes -would apply different random masks or different data augmentation strategies each time the data is encountered. BioNeMo -provides some utilities that make multi-epoch training easier, while obeying the determinism requirements of -megatron. - -The [MultiEpochDatasetResampler][bionemo.common.data.multi_epoch_dataset.MultiEpochDatasetResampler] class simplifies the -process of multi-epoch training, where the data should both be re-shuffled each epoch with different random effects -applied each time the data is seen. To be compatible with this resampler, the provided dataset class's `__getitem__` -method should accept a [EpochIndex][bionemo.common.data.multi_epoch_dataset.EpochIndex] tuple that contains both an epoch -and index value. Random effects can then be performed by setting the torch random seed based on the epoch value: - -```python -class MyDataset: - def __getitem__(self, idx: EpochIndex): - rng = torch.Generator() - rng.manual_seed(idx.epoch) - ... -``` - -!!! bug "Avoid `torch.manual_seed`" - -``` -Megatron-LM handles torch seeding internally. Calling `torch.cuda.manual_seed` inside the user-provided dataset -can cause issues with model parallelism. See [megatron/core/tensor_parallel/random.py#L198-L199]( -https://github.com/NVIDIA/Megatron-LM/blob/dddecd19/megatron/core/tensor_parallel/random.py#L198-L199) for more -details. -``` - -For deterministic datasets that still want to train for multiple epochs with epoch-level shuffling, the -[IdentityMultiEpochDatasetWrapper][bionemo.common.data.multi_epoch_dataset.IdentityMultiEpochDatasetWrapper] class can -simplify this process by wrapping a dataset that accepts integer indices and passes along the -[EpochIndex][bionemo.common.data.multi_epoch_dataset.EpochIndex] index values from the resampled dataset. - -```python -class MyDeterministicDataset: - def __getitem__(self, index: int): ... - - -dataset = IdentityMultiEpochDatasetWrapper(MyDeterministicDataset()) -for sample in MultiEpochDatasetResampler(dataset, num_epochs=3, shuffle=True): - ... -``` - -## Training Resumption - -To ensure identical behavior with and without job interruption, Megatron datamodules must manage sample-exact training -resumption. When writing your own datamodule, preserve these constraints: - -- Persist enough dataloader state (e.g. the global step or sample index) so training resumes from the correct position - rather than restarting from index 0. -- Distinguish between train, validation, and test dataloaders explicitly. Only the training dataloader should resume - from a saved sample index — validation and test dataloaders should always start from the beginning. -- Update the global step immediately before returning each dataloader so the resume position is accurate. - -See the `evo2_megatron` and `eden_megatron` recipes in `BioNeMo Recipes` for working examples of Megatron datamodule -implementations with training resumption. - -## Testing Datasets for Megatron Compatibility - -The key invariant for Megatron-compatible datasets is determinism: repeated calls with the same index must yield the -same sample. When writing tests for your dataset, confirm that: - -- Repeated indexing with the same index returns identical results. -- Epoch-aware randomization is driven only by the epoch component of the index (via a local `torch.Generator`, not - the global seed). -- `torch.manual_seed` is not called inside dataset `__getitem__` paths, as Megatron-LM manages torch seeding - internally for model parallelism. - -Recipe-local tests in `BioNeMo Recipes` (e.g. in the `evo2_megatron` recipe) are the best reference for how to -validate these assumptions. diff --git a/docs/docs/main/about/background/nemo2.md b/docs/docs/main/about/background/nemo2.md deleted file mode 100644 index 841ab196cb..0000000000 --- a/docs/docs/main/about/background/nemo2.md +++ /dev/null @@ -1,178 +0,0 @@ -# NeMo2 - -## Checkpointing - -In NeMo, there are two distinct mechanisms for continuing training from a checkpoint: resuming from a training -directory and restoring from a checkpoint. - -> **Note**: If both `--result-dir` and `--ckpt-dir` are provided, checkpoints in `--result-dir` take precedence. -> The `--ckpt-dir` is only used if `--result-dir` contains no checkpoints. - -**1. Resuming Training from a Directory** - -When a training job runs, NeMo saves checkpoints in a designated results directory specified with the `--result-dir` -flag. If the same job is restarted and a checkpoint exists in that directory, the most recent checkpoint is -automatically loaded and training continues from the exact step and optimizer state stored there. - -If no checkpoint is found in the results directory: - -- No checkpoint directory specified → training starts from scratch. -- Checkpoint dir is specified by `--ckpt-dir` → NeMo attempts to restore from that checkpoint (see "Restoring from a - Checkpoint" below). - -**2. Restoring from a Checkpoint** - -To start a new training run initialized from a checkpoint in a different directory, the restore configuration can be -set to point to that checkpoint via the `--ckpt-dir` flag. NeMo will begin training from that checkpoint’s weights and -optimizer state. After the initial restoration, subsequent runs of the same job follow the standard resuming flow - -loading from the results directory — without repeating the restore step. - -``` - +-------------------------+ - | Start Training Job | - +-----------+-------------+ - | - +-------------------+-------------------+ - | | -Results dir has Results dir empty -checkpoint → Resume | - v - +----------------------------+ - | Checkpoint dir specified? | - +-------------+--------------+ - | - +----------------+----------------+ - | | - No → Start from scratch Yes → Restore - from checkpoint - ↓ - Resume flow -``` - -## Parallelism - -NeMo2 represents tools and utilities to extend the capabilities of `pytorch-lightning` to support training and inference -with megatron models. While pytorch-lightning supports parallel abstractions sufficient for LLMs that fit on single GPUs -(distributed data parallel, aka DDP) and even somewhat larger architectures that need to be sharded across small -clusters of GPUs (Fully Sharded Data Parallel, aka FSDP), when you get to very large architectures and want the most -efficient pretraining and inference possible, megatron-supported parallelism is a great option. - -So in other words, NeMo2 adds the Megatron strategy in addition to the standard DDP and FSDP strategies. - -Many downstream constraints and conventions are driven by the underlying constraints of megatron. - -### Deeper Background on Megatron - -#### Other Options for Parallelizing Smaller Models - -Megatron is a system for supporting advanced varieties of model parallelism. While vanilla models can be executed -in parallel with systems, such as distributed data parallel (DDP), or moderately large models can be trained with Meta's -Fully Sharded Data Parallel (FSDP/FSDP2), when you work with larger models and want to train them with maximal -efficiency, it is ideal to use some variant of megatron. - -#### DDP Background - -DDP is the best option **when you can fit the entire model on every GPU** in your cluster. With DDP, you can -parallelize your `global batch` across multiple GPUs by splitting it into smaller `mini-batches`, one for each GPU. -Each GPU computes the forward and backward pass independently for its subset of data, allowing for maximal utilization. -Synchronization of gradients occurs after the backward pass is complete for each batch, followed by a weight update -that ensures all GPUs have synchronized parameters for the next iteration. Here is an example of how this might appear -on your cluster with a small model: - -![Data Parallelism Diagram](../../../assets/images/megatron_background/data_parallelism.png) - -#### FSDP Background - -FSDP extends DDP by sharding (splitting) model weights across GPUs in your cluster to optimize memory usage. -While data is still split across GPUs in the same way as DDP, FSDP strategically synchronizes and broadcasts -the necessary shards of model weights to all GPUs just-in-time for computation during the forward pass. - -For example, when a layer is needed for computation, the owning GPU sends that shard of weights to the other GPUs, -which then perform the forward computation on that layer. After the computation is complete, FSDP frees the memory for -that layer on all GPUs except the one that owns the shard. This process continues iteratively for each layer until the -entire model has been executed on the data. - -**Note:** This process parallelizes the storage in a way that enables too large models to be executed (assuming a single -layer is not too large to fit on a GPU). Megatron (next) co-locates both storage and compute. - -The following two figures show two steps through the forward pass of a model that has been sharded with FSDP. -![FSDP Diagram Step 1](../../../assets/images/megatron_background/fsdp_slide1.png) -![FSDP Diagram Step 2](../../../assets/images/megatron_background/fsdp_slide2.png) - -#### Model Parallelism - -Model parallelism is the catch-all term for the variety of different parallelism strategies -that could be applied to parallelizing your model across a cluster. Below we explain several varieties of model -parallelism that are implemented in megatron. As mentioned in the previous section, one key advantage to the -megatron-specific parallelism types described next are that they co-locate storage and compute of the layers. Inefficiencies -caused by naive scheduler implementations are also addressed (discussed in the section on schedulers). - -##### Pipeline Parallelism - -Pipeline parallelism is similar to FSDP, but the model blocks that are sharded are also computed in parallel on the -nodes that own the model weight in question. You can think of this as a larger simulated GPU that happens to be spread -across several child GPUs. Examples of this include `parallel_state.is_pipeline_last_stage()`, which is commonly -used to tell if a particular node is on last pipeline stage, where you compute the final head outputs or loss. -![Pipeline Parallelism](../../../assets/images/megatron_background/pipeline_parallelism.png) - -Similarly, there are convenience -environmental lookups for the first pipeline stage (where you compute the embedding for example) -`parallel_state.is_pipeline_first_stage()`. - -##### Tensor Parallelism - -Tensor parallelism represents splitting single layers across GPUs. This can also solve the problem where some individual -layers could in theory be too large to fit on a single GPU, where FSDP would not be possible. This would still work -since individual layer weights (and computations) are distributed. Examples of this in megatron include `RowParallelLinear` and -`ColumnParallelLinear` layers. -![Tensor Parallelism](../../../assets/images/megatron_background/tensor_parallelism.png) - -##### Sequence Parallelism - -In megatron, "sequence parallelism" refers to the parallelization of the dropout, and layernorm blocks of a transformer. -The idea is roughly as follows. First, remember that in a typical transformer architecture, the `embedding_dimension` is -the only dimension that `LayerNorm` is applied over. Similarly, Dropout (outside of the attention block) is an operation -that is applied on the last embedding dimension. These two layers are independent over the sequence dimension, so they -can be processed in blocks on separate GPUs. As can be seen in the following figure, the initial `LayerNorm` in a -multi-headed transformer block is executed in parallel. Next the results are gathered for the self attention and linear -layers (which are typically set up for tensor parallelism). Next the result from those layers is scattered back to -sequence parallel nodes which execute dropout, do a residual connection from the previous sequence parallel output, and -a layernorm. Next those results are again gathered for the final FFN and activation layers prior to a final scattering -across sequence parallel GPUs for the output of that transformer block. -![Sequence Parallelism](../../../assets/images/megatron_background/sp_korthikanti_2022_fig5.png) - -As a user, if you know that your transformer is executed in parallel and you have custom losses or downstream layers, -you need to make sure that the appropriate gather operations are occurring for your loss computation etc. - -##### Context Parallelism - -[Context parallelism](https://docs.nvidia.com/megatron-core/developer-guide/latest/user-guide/features/context_parallel.html) -extends sequence parallelism by also parallelizing the attention mechanism itself, similar to -[Ring Attention](https://arxiv.org/abs/2310.01889). In general, if you are using a transformer, context parallelism is -going to perform better than sequence parallelism for very long input sequences. That said, due to the necessity of -all-gather and reduce scatter operations throughout the architecture, the general advice that you should avoid these -kinds of parallelism if a micro-batch fits on a single device still holds. Splitting across elements in a global batch -represent the fewest necessary communications between GPUs on your cluster, so standard DDP should run the fastest if -you can get your training loop for a micro batch to fit on one GPU. - -##### Mixing Parallelism Strategies - -You can mix different kinds of parallelism together to achieve a better result. In general, experimentation -should be done to identify the optimal mix of parallelism. See this -[YouTube tutorial from Jared Casper](https://youtu.be/gHaNUcS1_O4) for more background on megatron parallelism -strategies. - -The figure below demonstrates how mixing strategies results in larger "virtual GPUs", which similarly means you have -fewer distinct micro-batches in flight across your cluster. Note that the number of virtual GPUs is multiplicative -so if you have `TP=2` and `PP=2`, then you are creating a larger virtual GPU out of `2*2=4` GPUs, so your cluster size -needs to be a multiple of 4 in this case. -![Mixing Tensor and Pipeline Parallelism](../../../assets/images/megatron_background/tensor_and_pipeline_parallelism.png) - -##### Scheduling Model Parallelism - -You can improve on naive schedules by splitting up micro-batches into smaller pieces, executing many stages of the -model on single GPUs, and starting computing the backwards pass of one micro-batch while another is going through forward. -These optimizations allow for better cluster GPU utilization to be achieved. For example, the following figure shows -how more advanced splitting techniques in megatron (for example, the interleaved scheduler) offer better utilization when model -parallelism is used. As best possible, we don't recommend using model parallelism (DDP). -![Execution Schedulers](../../../assets/images/megatron_background/execution_schedulers.png) diff --git a/docs/docs/main/about/releasenotes-fw.md b/docs/docs/main/about/releasenotes-fw.md deleted file mode 100644 index a1a04afd81..0000000000 --- a/docs/docs/main/about/releasenotes-fw.md +++ /dev/null @@ -1,432 +0,0 @@ -# Release Notes - -## BioNeMo Framework v2.7 - -### Updates & Improvements - -- Evo2 model improvements: - - - Context, tensor and data parallelism support in the prediction endpoint as well as support for context lengths over 8192 https://github.com/NVIDIA-BioNeMo/bionemo-framework/pull/1123. Fixes https://github.com/NVIDIA-BioNeMo/bionemo-framework/issues/910 and https://github.com/NVIDIA-BioNeMo/bionemo-framework/issues/1048. - - - LoRA fine-tuning by @gabenavarro: https://github.com/NVIDIA-BioNeMo/bionemo-framework/pull/980. Note: internal CI coverage of LoRA convergence is still a work in progress; therefore, we cannot guarantee convergence. - - - Fix a 2x memory-usage issue during Evo2 generation: https://github.com/NVIDIA/NeMo/pull/14515 - - - Add flash-decode support in inference: https://github.com/NVIDIA-BioNeMo/bionemo-framework/pull/1000 - - - Update Rotary Embedding and sequence-length defaults to address incorrect checkpoint conversion: https://github.com/NVIDIA/NeMo/pull/14514 - - - Improvements to tag masking in the Evo2 loss: https://github.com/NVIDIA-BioNeMo/bionemo-framework/pull/1008 - - - Support for [Spike-no-more](https://arxiv.org/abs/2312.16903) to improve training stability: https://github.com/NVIDIA-BioNeMo/bionemo-framework/pull/1011 - -- Added a header to SCDL archives, providing improved provenance tracking and supporting future releases. It also adds tracking of AnnData API coverage in SCDL tests. - This header stores metadata about the archive and its composite arrays, including a version; the array lengths and data types; and information about the RowFeatureIndexes. This adds the features necessary to fix https://github.com/NVIDIA-BioNeMo/bionemo-framework/issues/999 as well as to implement simple bit-packing of the rowptr, colptr, and data arrays. It should also make SCDL more secure, enable strict compatibility checking, and open the door to further performance improvements: https://github.com/NVIDIA-BioNeMo/bionemo-framework/pull/1030 - -- `bionemo-geometric` has been deprecated and removed. The molecular-featurization tooling in this package has moved to [cuik-molmaker](https://github.com/NVIDIA-Digital-Bio/cuik-molmaker). - -### Known Issues - -- We have removed `libtiff` from the container due to a known vulnerability, [CVE-2025-9900](https://ubuntu.com/security/CVE-2025-9900). `libtiff` isn't directly used in any BioNeMo code; however, users might face issues with e.g. Pillow or other common image-manipulation libraries inside this container. - -## BioNeMo Framework v2.6.3 - -### Updates & Improvements - -- Fixes numerous issues with Evo2 model: - 1. Inference/Generation issues resolved. https://github.com/NVIDIA-BioNeMo/bionemo-framework/issues/890 - 2. FP8 training resumption issues resolved. https://github.com/NVIDIA-BioNeMo/bionemo-framework/issues/973 - 3. Bug in inference script that concerns checkpoint loading is fixed. https://github.com/NVIDIA-BioNeMo/bionemo-framework/pull/950 -- ESM2 LoRA model inference issue resolved. https://github.com/NVIDIA-BioNeMo/bionemo-framework/pull/996 -- Added experimental evo2-mamba model. https://github.com/NVIDIA-BioNeMo/bionemo-framework/pull/888 -- Updated base Docker image to [nvidia-pytorch 25.06-py3](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch/tags) -- NCCL issue in ESM2 pretraining resolved. https://github.com/NVIDIA-BioNeMo/bionemo-framework/issues/970 - -### What's Changed - -- Fix test_train_evo2_stops test by @balvisio in https://github.com/NVIDIA-BioNeMo/bionemo-framework/pull/965 -- Enable test_train_evo2_stop_at_max_steps_and_continue. by @balvisio in https://github.com/NVIDIA-BioNeMo/bionemo-framework/pull/966 -- automated benchmarks: esm2 650M training analogous to bionemo-recipes by @dorotat-nv in https://github.com/NVIDIA-BioNeMo/bionemo-framework/pull/975 -- Fix database path in esm2_pretrain_recipes by @pstjohn in https://github.com/NVIDIA-BioNeMo/bionemo-framework/pull/978 -- Add fp8 stop and go test for evo2 by @jwilber in https://github.com/NVIDIA-BioNeMo/bionemo-framework/pull/974 -- Update Docs Banner for GitHub Pages-hosted Docs by @tshimko-nv in https://github.com/NVIDIA-BioNeMo/bionemo-framework/pull/981 -- Add release notes for v2.6.2 (25.06) by @trvachov in https://github.com/NVIDIA-BioNeMo/bionemo-framework/pull/971 -- Evo2 Generation fixes and necessary base dependency and container updates. Large change. by @jwilber in https://github.com/NVIDIA-BioNeMo/bionemo-framework/pull/949 -- Point NeMo submodule back to main repo by @trvachov in https://github.com/NVIDIA-BioNeMo/bionemo-framework/pull/984 -- Use new b2b kernels in evo2 jet tests by @jwilber in https://github.com/NVIDIA-BioNeMo/bionemo-framework/pull/985 -- change where dtype is found in checkpoint export by @pstjohn in https://github.com/NVIDIA-BioNeMo/bionemo-framework/pull/989 -- Evo2 Mamba by @jstjohn in https://github.com/NVIDIA-BioNeMo/bionemo-framework/pull/888 -- Adding inference CDS length tests by @jstjohn in https://github.com/NVIDIA-BioNeMo/bionemo-framework/pull/991 -- Fix PIL CVE by @trvachov in https://github.com/NVIDIA-BioNeMo/bionemo-framework/pull/992 -- (BIONEMO-2334) Patch TE to fix Evo2 stop and go training by @balvisio in https://github.com/NVIDIA-BioNeMo/bionemo-framework/pull/987 -- Fix bug in evo2-mamba train and add test by @jstjohn in https://github.com/NVIDIA-BioNeMo/bionemo-framework/pull/994 -- Fix esm2 lora inference by @yzhang123 in https://github.com/NVIDIA-BioNeMo/bionemo-framework/pull/996 -- Reset parameters for the ESM-2 contact head on HF export by @pstjohn in https://github.com/NVIDIA-BioNeMo/bionemo-framework/pull/983 - -## BioNeMo Framework v2.6.2 - -### Updates & Improvements - -- Fixes numerous ESM2 model issues: - 1. Finetuning metric for token classification is fixed. https://github.com/NVIDIA-BioNeMo/bionemo-framework/pull/946 - 2. Losses for finetuning were fixed for data and model parallelism. https://github.com/NVIDIA-BioNeMo/bionemo-framework/pull/959 - 3. Bug in inference script that concerns checkpoint loading is fixed. https://github.com/NVIDIA-BioNeMo/bionemo-framework/pull/950 -- Updated base Docker image to [nvidia-pytorch 25.04-py3](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch/tags) - -### Known Issues - -- Evo2 generation is broken (i.e. `bionemo-evo2/src/bionemo/evo2/run/infer.py`). See issue https://github.com/NVIDIA-BioNeMo/bionemo-framework/issues/890. A workaround exists on branch https://github.com/NVIDIA-BioNeMo/bionemo-framework/pull/949 and we are working to fix this issue for the July release. -- There is a NCCL communication issue on certain A100 multi-node environments. In our internal testing, we were not able to reproduce the issue reliably across environments. If end users see the following error, please report in issue https://github.com/NVIDIA-BioNeMo/bionemo-framework/issues/970 : - -``` -[rank9]: torch.distributed.DistBackendError: NCCL error in: /opt/pytorch/pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:3356, internal error - please report this issue to the NCCL developers, NCCL version 2.26.3 -``` - -## BioNeMo Framework v2.6.1 - -### Updates & Improvements - -- Fixes around ESM2 pretraining and finetuning checkpoints. -- Added sanity dataset for AMPLIFY testing. -- Tested against A100 [brev](https://developer.nvidia.com/brev) instances. -- Update `tornado` package to `>6.5.0` to fix container CVEs. - -## BioNeMo Framework v2.6 - -### New Features - -- Adds support for AMPLIFY [doi:10.1101/2024.09.23.614603](https://doi.org/10.1101/2024.09.23.614603) pre-training and inference, offering a 70% speedup over the xformers-based attention backend with similar final perplexity values at 1M pre-training steps. (4.23 for 120M, 3.05 for 350M). The model is fully compatible with existing weights on HuggingFace. -- Adds alpha support for [LoRA fine-tuning to for ESM2 models](../../../models/ESM-2/#lora-fine-tuning-performace). Inference and fine-tuning are enabled along with resumption from a checkpoint. - -### Updates & Improvements - -- Blackwell support, tested on B200 systems. -- Fixed Grace CPU support, released ARM compatible container. - -## BioNeMo Framework v2.5 - -### New Features - -- Adding the Evo2 model training workflow, including data preprocessing, pre-training, fine-tuning and inference with bf16 and fp8 support. - -### Updates & Improvements - -- Supporting/upgrading federated learning examples of BioNeMo in [NVFlare](https://github.com/NVIDIA/NVFlare/tree/2.6.0rc1/examples/advanced/bionemo) -- Upgrade bionemo-moco to v0.0.2 -- Brev.dev launchable tutorials - -#### Known Issues - -- Partial test failures on ARM CPUs. - -## BioNeMo Framework v2.4.1 - -### Updates & Improvements - -- Applies fixes to ESM2 metric logging that result in NotImplementedError while using Model Parallelism. - -## BioNeMo Framework v2.4 - -### New Features - -- Draft implementation of Evo2 with support for Hyena operators -- bionemo-moco v0.0.1 released for building diffusion-like generative models. - -### Known Issues - -- Partial test failures on ARM CPUs. - -### Updates & Improvements - -- ESM2 fine-tuning script with CLI (finetune_esm2) that supports sequence-level/token-level classification/regression using a CSV dataset. -- Brev.dev launchable fine-tuning tutorial for ESM2 (archived) - -## BioNeMo Framework v2.3 - -### New Features - -- Distributed Inference Support for ESM2 and Geneformer - - Enables linear inference throughput as GPU number is increased - - [See ESM2 inference notebook](https://github.com/NVIDIA-BioNeMo/bionemo-framework/blob/release-v2.3/docs/docs/user-guide/examples/bionemo-esm2/inference.ipynb) and use `--num-gpus` parameter. - -### Updates & Improvements - -- Prior Geneformer inference on H100 accuracy regression fixed. -- Base image updated to `nvcr.io/nvidia/pytorch:24.12-py3`; python updated to 3.12 among other core dependency upgrades ([base container release notes here](https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-12.html#rel-24-12)). - -## BioNeMo Framework v2.2 - -### New Features - -- Small Molecule Featurization - - Implemented elementary and advanced atom, bond, and full molecule featurizers. -- GH200 Support for BioNeMo - - Added a `Dockerfile.arm` that builds a BioNeMo container that runs on GH200 machines. - - Publish a version of the BioNeMo container that supports multiple architectures to NGC. - -### Updates & Improvements - -- Single-Cell Dataloader (SCDL) - - Changed metadata storage to `parquet` files, which creates a 30x speed up when iterating over a large dataset. - - Added functionality to concatenate several `anndata` files without doubling disk memory usage. -- ESM2 - - Added support for `SIGTERM` preemption checkpoint saving. - - Moved ESM-2 and Geneformer training scripts to new executables, `train_esm2` and `train_geneformer`, respectively. - - Moved inference script to a new executable `infer_esm2`, and deprecated the inference example in the fine-tuning tutorial. - - Added new Jupyter notebook tutorials for inference and zero-shot protein design. These notebooks can be deployed on the cloud resources as a [brev.dev](https://www.brev.dev/) launchable. - -### Known Issues: - -- Loading a checkpoint for Geneformer inference on H100 has a known regression in accuracy. Work is in progress to resolve by next release. - -## BioNeMo Framework v2.1 - -### New Features: - -- ESM2 Implementation - - Updated the ESM-2 Model Card with detailed performance benchmarks comparing BioNeMo2 training against vanilla pytorch. - - Added ESM-2 inference endpoint for evaluating pre-trained models -- Size-Aware Batching - - Added SizeAwareBatchSampler, a pytorch data sampler that batches elements of varying sizes while ensuring that the total size of each batch does not exceed a specified maximum. - - Added BucketBatchSampler, another pytorch data sampler that groups elements of varying sizes based on predefined bucket ranges, and create batches with elements from each bucket to ensure that each batch has elements with homogeneous sizes. -- CLI Support - - Added pydantic interface for pretraining jobs via parsing JSON configuration files that enables passing customized Model and DataModules classes. - - Implemented pydantic configuration for Geneformer and ESM2 pretraining and finetuning. - - Added 'recipes' for generating validated JSON files to be used with pydantic interface. - - Added installable scripts for 2/3 respectively, bionemo-esm2-recipe, bionemo-esm2-train, bionemo-geneformer-recipe, bionemo-geneformer-train. -- Geneformer support in BioNeMo2: - - Tested pre-training scripts and fine-tuning example scripts that can be used as a starting point for users to create custom derivative models. - - Geneformer 10M and 106M checkpoints ported from BioNeMo v1 into BioNeMo v2 available and included in documentation. - - Added inference scripts -- Documentation - - Cell type classification example notebook which covers the process of converting anndata into our internal format, and running inference on that data with a geneformer checkpoint, as well as making use of the inference results. - - Updated Getting Started guide, ESM-2 tutorials - - Added Frequently Asked Questions (FAQ) page - -## BioNeMo Framework v2.0 - -### New Features: - -- ESM-2 implementation - - State of the art training performance and equivalent accuracy to the reference implementation - - 650M, and 3B scale checkpoints available which mirror the reference model - - Flexible fine-tuning examples that can be copied and modified to accomplish a wide variety of downstream tasks -- First version of our NeMo v2 based reference implementation which re-imagines bionemo as a repository of megatron models, dataloaders, and training recipes which make use of NeMo v2 for training loops. - - Modular design and permissible Apache 2 OSS licenses enables the import and use of our framework in proprietary applications. - - NeMo2 training abstractions allows the user to focus on the model implementation while the training strategy handles distribution and model parallelism. -- Documentation and documentation build system for BioNeMo 2. - -### Known Issues: - -- PEFT support is not yet fully functional. -- Partial implementation of Geneformer is present, use at your own risk. It will be optimized and officially released in the future. -- Command line interface is currently based on one-off training recipes and scripts. We are working on a configuration based approach that will be released in the future. -- Fine-tuning workflow is implemented for BERT based architectures and could be adapted for others, but it requires you to inherit from the biobert base model config. You can follow similar patterns in the short term to load weights from an old checkpoint partially into a new model, however in the future we will have a more direct API which is easier to follow. -- Slow memory leak occurs during ESM-2 pretraining, which can cause OOM during long pretraining runs. Training with a - microbatch size of 48 on 40 A100s raised an out-of-memory error after 5,800 training steps. - - Possible workarounds include calling `gc.collect(); torch.cuda.empty_cache()` at every ~1,000 steps, which appears - to reclaim the consumed memory; or training with a lower microbatch size and re-starting training from a saved - checkpoint periodically. - -## BioNeMo Framework v1.9 - -### New Features - -- [Documentation] Updated, executable ESM-2nv notebooks demonstrating: Data preprocessing and model training with custom datasets, Fine-tuning on FLIP data, Inference on OAS sequences, Pre-training from scratch and continuing training -- [Documentation] New notebook demonstrating Zero-Shot Protein Design Using ESM-2nv. Thank you to @awlange from A-Alpha Bio for contributing the original version of this recipe! - -### Bug fixes and Improvements - -- [Geneformer] Fixed bug in preprocessing due to a relocation of dependent artifacts. -- [Geneformer] Fixes bug in finetuning to use the newer preprocessing constructor. - -## BioNeMo Framework v1.8 - -### New Features - -- [Documentation] Updated, executable MolMIM notebooks demonstrating: Training on custom data, Inference and downstream prediction, ZINC15 dataset preprocesing, and CMA-ES optimization -- [Dependencies] Upgraded the framework to [NeMo v1.23](https://github.com/NVIDIA/NeMo/tree/v1.23.0), which updates PyTorch to version 2.2.0a0+81ea7a4 and CUDA to version 12.3. - -### Bug fixes and Improvements - -- [ESM2] Fixed a bug in gradient accumulation in encoder fine-tuning -- [MegaMolBART] Make MegaMolBART encoder finetuning respect random seed set by user -- [MegaMolBART] Finetuning with val_check_interval=1 bug fix - -### Known Issues - -- Minor training speed regression observed for models DNABERT, Geneformer, MolMIM -- Two known critical CVEs GHSA-cgwc-qvrx-rf7f, GHSA-mr7h-w2qc-ffc2. The vulnerabilities arise within a package that's installed by lightning by default. We do not use that package in bionemo framework container. we are also unable to remove the package in question as it's installed as a side-effect of installing lightning. -- Two known High CVEs from pytorch : GHSA-pg7h-5qx3-wjr3, GHSA-5pcm-hx3q-hm94. - -## BioNeMo Framework v1.7 - -### New Models - -- [DSMBind](https://www.biorxiv.org/content/10.1101/2023.12.10.570461v1), developed under the BioNeMo framework, is a model which can produce comparative values for ranking protein-ligand binding affinities. This release features the capability to perform inference using a newly trained checkpoint. - -### New Features - -- [EquiDock] Remove steric clashes as a post-processing step after equidock inference. -- [Documentation] Updated Getting Started section which sequentially describes prerequisites, BioNeMo Framework access, startup instructions, and next steps. - -### Known Issues - -- There is a known security vulnerability with NLTK that can allow for arbitrary code execution via pickle files that are external assets downloaded via nltk.download() (https://github.com/nltk/nltk/issues/3266). BioNeMo itself does not use this dependency in any way, however parts of NeMo text-to-speech (nemo.collections.tts) does use this vulnerable codepath. Since NeMo is installed in the BioNeMo release containers, users are urged to exercise caution when using nemo.collections.tts or nltk. - -## BioNeMo Framework v1.6 - -### New Features - -- [Model Fine-tuning] `model.freeze_layers` fine-tuning config parameter added to freeze a specified number of layers. Thank you to github user [@nehap25](https://github.com/nehap25)! -- [ESM2] Loading pre-trained ESM-2 weights and continue pre-training on the MLM objective on a custom FASTA dataset is now supported. -- [OpenFold] MLPerf feature 3.2 bug (mha_fused_gemm) fix has merged. -- [OpenFold] MLPerf feature 3.10 integrated into bionemo framework. -- [DiffDock] Updated data loading module for DiffDock model training, changing from sqlite3 backend to webdataset. - -## BioNeMo Framework v1.5 - -### New Models - -- [Geneformer](https://www.nature.com/articles/s41586-023-06139-9) is out of **Beta** status. This release includes newly trained checkpoints and benchmarks, including a variant based on the publication with 10M parameters, and the largest variant of geneformer publically available to date with 106M parameters. - -## BioNeMo Framework v1.4 - -### New Models - -- **Beta** [Geneformer](https://www.nature.com/articles/s41586-023-06139-9) a foundation model for single-cell data that encodes each cell as represented by an ordered list of differentially expressed genes for that cell. - -### New Features - -- **Beta** Geneformer pretraining with custom datasets -- Low-Rank Adaptation (LoRA) finetuning for ESM2 - -### Bug fixes and Improvements - -- OpenFold training improved benchmarks and validation of optimizations - -### Known Issues - -- BioNeMo Framework v24.04 container is vulnerable to [GHSA-whh8-fjgc-qp73](https://github.com/advisories/GHSA-whh8-fjgc-qp73) in onnx 1.14.0. Users are advised not to open untrusted onnx files with this image. Restrict your mount point to minimize directory traversal impact. A fix for this is scheduled in the 24.05 (May) release. - -## BioNeMo Framework v1.3 - -### New Models - -- MolMIM implementation under BioNeMo framework, [a small molecule model developed at NVIDIA](https://arxiv.org/abs/2208.09016) which can be used to produce embeddings and novel molecules. - -### New Features - -- [MolMIM](https://developer.nvidia.com/blog/new-models-molmim-and-diffdock-power-molecule-generation-and-molecular-docking-in-bionemo/) re-trained on more data is now available in the framework and achieves state-of-the-art performance. -- MolMIM property guided tutorial notebook covering property guided optimization using our new framework model. -- MolMIM training tutorial available walking users through either training from scratch or from an existing checkpoint on your own data. -- MolMIM tutorial notebook covering molecular sampling and property prediction is also now available. -- Numerous optimizations from [NVIDIA's entry to the MLPerf competition](https://developer.nvidia.com/blog/optimizing-openfold-training-for-drug-discovery/) have been added to OpenFold. Documentation and detailed benchmarks are works in progress and will be published in upcoming releases. This release contains the following performance optimizations: - - Fused GEMMs in multi-head attention (MHA) - - Non-blocking data pipeline - - BF16 precision training - - Fused MHA gating - - Inductor Compiled LayerNorm - - OpenAI Triton LayerNorm kernels - - OpenAI Triton MHA - -### Bug fixes and Improvements - -- NeMo upgraded to v1.22 ([see NeMo release notes](https://github.com/NVIDIA/NeMo/releases)), -- PyTorch Lightning upgraded to 2.0.7 -- [NGC CLI](https://org.ngc.nvidia.com/setup/installers/cli) has been removed from the release container. If users - download models from inside the container (e.g. using `bionemo_data_download` or via running specific unit tests), - the NGC CLI will be auto-installed to pull the models from NGC. - -### Known Issues - -- BioNeMo Framework v24.03 container is vulnerable to [GHSA-whh8-fjgc-qp73](https://github.com/advisories/GHSA-whh8-fjgc-qp73) in onnx 1.14.0. Users are advised not to open untrusted onnx files with this image. Restrict your mount point to minimize directory traversal impact. - -## BioNeMo Framework v1.2 - -## New Models - -- OpenFold implementation under BioNeMo framework, derived from public OpenFold and DeepMind AlphaFold-2. -- DNABERT implementation for computing embeddings for each nucleotide in the input DNA sequence. - -### New Features - -- Training recipes for DNABERT and OpenFold, including automated data processing and full configuration for training. -- Example tutorials for running inference using OpenFold. -- Splice Prediction downstream task example for DNABERT. -- Wrapper scripts for DNABERT and OpenFold to launch jobs on BCP. - -### Bug fixes and Improvements - -- Interface improvements for ESM-2 data ingestion and pre-processing. The interface allows for explicit specification of training, validation, and test sets. The user may set `config.model.data.default_dataset_path` to maintain prior behavior, or set `config.model.data.train.dataset_path`, `config.model.data.val.dataset_path`, `config.model.data.test.dataset_path` which may all be unique. - -### Known Issues - -- OpenFold training speed does not yet include [MLPerf optimizations](https://blogs.nvidia.com/blog/scaling-ai-training-mlperf/), and these will be released in the subsequent release. - -## BioNeMo Framework v1.1 - -## New Models - -- EquiDock for protein-protein docking pose prediction -- DiffDock for protein-ligand blind docking pose generation - -### New Features - -- Training recipes for EquiDock and DiffDock, including automated data processing and full configuration for training. -- Accelerated inference and training for DiffDock via fast tensor-product kernels. -- Example tutorials for running inference using EquiDock and DiffDock. -- Recipes for running EquiDock and DiffDock on BCP and Slurm. -- Pipeline parallel supported for ESM-2nv. -- Migration of inference notebooks to using pytriton. - -### Bug fixes and Improvements - -- Faster pre-processing of data on BCP. -- Refactor of download_models.sh to download_models.py for easier CLI use. -- Refactor of install structure to move from /opt/nvidia to /workspace/bionemo. The environment variable $BIONEMO_HOME now points to the repo base and is required to be set for tests to pass. - -### Security Notice - -SchedMD Slurm in the release container is shipped with a security vulnerability, [CVE-2022-29501](https://ubuntu.com/security/CVE-2022-29501), and therefore this version of Slurm should not be used to run a Slurm cluster (specifically, the processes `slurmdbd`, `slurmctld`, and `slurmd`. - -In general, the BioNeMo Framework release is designed to ship code and an environment that would be executed on local workstations, or deployed on clusters for large scale training jobs. This container is not designed to run as a service with public facing APIs. A full summary of security vulnerabilities can be found [here](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/clara/containers/bionemo-framework/security). - -## BioNeMo Framework v1.0 - -## New Models - -- ESM-2nv for protein sequence representations, pretrained weights of ESM-2 650M and ESM-2 3B converted from HF checkpoint available. - -### New Features - -- Pre-training recipes for ESM-2nv, including automated data processing and full configuration for training -- Fine-tuning of ESM-2nv with encoder frozen or trainable -- Downstream task finetuning support for single-value classification (e.g. subcellular localization), single-value regression (e.g. meltome) and per-token classification (e.g. secondary structure) -- Validation in loop to evaluate performance on downstream tasks during training -- Example tutorials for pre-training, fine tuning, and downstream tasks - -## BioNeMo Framework v0.4.0 - -### New Models - -- ESM-1nv for protein sequence representations, pretrained weights available -- ProtT5nv for protein sequence representation and sequence-to-sequence tasks, pretrained weights available - -### New Features - -- Pre-training for all models, including automated data processing and full configuration for training -- Fine-tuning of MegaMolBART, ESM-1nv, and ProtT5nv with encoder frozen or trainable -- Downstream task example applications – secondary structure prediction for ESM-1nv and ProtT5nv, physchem prediction (lipophilicity, FreeSolv, ESOL) and retrosynthesis prediction for MegaMolBART -- Validation in loop to evaluate performance on downstream tasks during training: physchem prediction (MegaMolBART) and secondary structure prediction (ESM-1nv and ProtT5nv). -- Pipeline parallelism supported as a beta feature. Not fully tested. -- Example notebooks for pre-training, fine tuning, and downstream tasks - -### Known Issues - -- Data preprocessing on DGX Cloud is slow. Faster to do it on a local machine. - -### New APIs - -- BioNeMoDataModule - Encapsulates dataset instantiation in bionemo models so that many different datasets can be used with the same model -- EncoderFineTuning - Base class to facilitate implementation of downstream tasks built on embeddings from other models diff --git a/docs/docs/main/contributing/Writing Documentation/mkdocs.md b/docs/docs/main/contributing/Writing Documentation/mkdocs.md index 45d165ec47..d412941239 100644 --- a/docs/docs/main/contributing/Writing Documentation/mkdocs.md +++ b/docs/docs/main/contributing/Writing Documentation/mkdocs.md @@ -2,6 +2,6 @@ ## Build system -BioNeMo Framework uses [Material for MkDocs](https://squidfunk.github.io/mkdocs-material/) to build its documentation. +BioNeMo Recipes uses [Material for MkDocs](https://squidfunk.github.io/mkdocs-material/) to build its documentation. Docstrings are converted to automatically-generated API reference pages using `mkdocstrings`, and can be linked from markdown pages using [paths](https://mkdocstrings.github.io/usage/). diff --git a/docs/docs/main/datasets/index.md b/docs/docs/main/datasets/index.md index ff03da4b00..929654ded7 100644 --- a/docs/docs/main/datasets/index.md +++ b/docs/docs/main/datasets/index.md @@ -1,10 +1,10 @@ -# BioNeMo Framework: Available Datasets +# BioNeMo Recipes: Available Datasets -The BioNeMo Framework provides access to a variety of high-quality datasets for bioinformatics and cheminformatics research. These datasets cover a range of biological and chemical modalities, supporting various research applications. The following table lists the currently available datasets: +The BioNeMo Recipes provides access to a variety of high-quality datasets for bioinformatics and cheminformatics research. These datasets cover a range of biological and chemical modalities, supporting various research applications. The following table lists the currently available datasets: | **Dataset** | **Modality** | **Uses** | | --------------------------- | ------------ | -------------------------------------- | | [CELLxGENE](./CELLxGENE.md) | Single Cell | Single-Cell Gene Expression | | [UniProt](./uniprot.md) | Protein | Protein Sequence and Function Analysis | -For more information about the datasets included in the BioNeMo Framework, refer to the Dataset Cards linked in the table above or the original sources referenced in the respective dataset descriptions. +For more information about the datasets included in the BioNeMo Recipes, refer to the Dataset Cards linked in the table above or the original sources referenced in the respective dataset descriptions. diff --git a/docs/docs/main/getting-started/pre-reqs.md b/docs/docs/main/getting-started/pre-reqs.md index 2f0b117ab6..96252f30d8 100644 --- a/docs/docs/main/getting-started/pre-reqs.md +++ b/docs/docs/main/getting-started/pre-reqs.md @@ -1,11 +1,11 @@ -# Hardware and Software Prerequisites for BioNeMo Framework +# Hardware and Software Prerequisites for BioNeMo Recipes -Before you begin using the BioNeMo Framework, ensure the hardware and software prerequisites outlined below are +Before you begin using the BioNeMo Recipes, ensure the hardware and software prerequisites outlined below are met. ## Hardware Prerequisites -The BioNeMo Framework is compatible with environments that have access to NVIDIA GPUs. `bfloat16` precision requires an +The BioNeMo Recipes is compatible with environments that have access to NVIDIA GPUs. `bfloat16` precision requires an Ampere generation GPU or higher ([Compute Capability ≥8.0](https://developer.nvidia.com/cuda-gpus)). You may be able to run BioNeMo on GPUs without `bfloat16`, but this use-case is not supported by the development team. @@ -31,7 +31,7 @@ The following datacenter and desktop GPUs have Compute Capability ≥8.0 and are ## Software Prerequisites -The BioNeMo Framework is supported on x86 Linux systems. +The BioNeMo Recipes is supported on x86 Linux systems. Ensure that the following are installed in your desired execution environment: diff --git a/docs/docs/main/index.md b/docs/docs/main/index.md deleted file mode 100644 index 3f2db04b98..0000000000 --- a/docs/docs/main/index.md +++ /dev/null @@ -1,21 +0,0 @@ -# What is BioNeMo? - -BioNeMo is a software ecosystem produced by NVIDIA for the development and deployment of life sciences-oriented artificial intelligence models. BioNeMo provides a set of tools to help researchers build, train, and deploy AI models for various biological applications. The main components of BioNeMo are: - -- **BioNeMo Framework**: a free-to-use collection of programming tools and packages offering access to optimized, pre-trained biomolecular models and workflows. The framework enables building and customizing models, including training and fine-tuning. Capabilities span various workloads and therapeutic modalities, such as molecular generation, protein structure prediction, protein-ligand, and representation learning. - -- **BioNeMo NIMs**: easy-to-use, enterprise-ready _inference_ microservices with built-in API endpoints. NIMs are engineered for scalable, self- or cloud-hosted deployment of optimized, production-grade biomolecular foundation models. Check out the growing list of BioNeMo NIMs [here](https://build.nvidia.com/explore/biology). - -When choosing between the BioNeMo Framework and BioNeMo NIMs, consider your project's specific requirements. The Framework is ideal for scenarios that require model training, fine-tuning, or customization, offering a comprehensive suite of tools and packages. In contrast, NIMs are optimized for inference-only workflows, providing easy-to-use, enterprise-ready microservices with built-in API endpoints. As a rule, use the Framework for custom model development or high-control modeling, and NIMs for inference against existing models. - -Get notified of new releases, bug fixes, critical security updates, and more for biopharma. [Subscribe.](https://www.nvidia.com/en-us/clara/biopharma/product-updates/) - -## BioNeMo User Success Stories - -[Enhancing Biologics Discovery and Development With Generative AI](https://www.nvidia.com/en-us/case-studies/amgen-biologics-discovery-and-development/) - Amgen leverages BioNeMo and DGX Cloud to train large language models (LLMs) on proprietary protein sequence data, predicting protein properties and designing biologics with enhanced capabilities. By using BioNeMo, Amgen achieved faster training and up to 100X faster post-training analysis, accelerating the drug discovery process. - -[Cognizant to apply generative AI to enhance drug discovery for pharmaceutical clients with NVIDIA BioNeMo](https://investors.cognizant.com/news-and-events/news/news-details/2024/Cognizant-to-apply-generative-AI-to-enhance-drug-discovery-for-pharmaceutical-clients-with-NVIDIA-BioNeMo/default.aspx) - Cognizant leverages BioNeMo to enhance drug discovery for pharmaceutical clients using generative AI technology. This collaboration enables researchers to rapidly analyze vast datasets, predict interactions between drug compounds, and create new development pathways, aiming to improve productivity, reduce costs, and accelerate the development of life-saving treatments. - -[Cadence and NVIDIA Unveil Groundbreaking Generative AI and Accelerated Compute-Driven Innovations](https://www.cadence.com/en_US/home/company/newsroom/press-releases/pr/2024/cadence-and-nvidia-unveil-groundbreaking-generative-ai-and.html) - Cadence's Orion molecular design platform will integrate with BioNeMo generative AI tool to accelerate therapeutic design and shorten time to trusted results in drug discovery. The combined platform will enable pharmaceutical companies to quickly generate and assess design hypotheses across various therapeutic modalities using on-demand GPU access. - -Find more user stories on NVIDIA's [Customer Stories](https://www.nvidia.com/en-us/case-studies/?industries=Healthcare%20%26%20Life%20Sciences&page=1) and [Technical Blog](https://developer.nvidia.com/blog/search-posts/?q=bionemo) sites. diff --git a/docs/docs/models/ESM-2/index.md b/docs/docs/models/ESM-2/index.md index bd7dd07fcf..4b66bde324 100644 --- a/docs/docs/models/ESM-2/index.md +++ b/docs/docs/models/ESM-2/index.md @@ -6,7 +6,7 @@ ESM-2 is a pre-trained, bi-directional encoder (BERT-style model) over amino acid sequences. ESM-2 models provide embeddings for amino acids that have led to state-of-the-art performance on downstream tasks such as structure and -function prediction. ESM-2 has been trained at a number of different model sizes. BioNeMo Framework includes converted +function prediction. ESM-2 has been trained at a number of different model sizes. BioNeMo Recipes includes converted checkpoints for the 650M and 3B parameter variants. The 650M model has 33 layers, 20 attention heads, and a hidden space dimension of 1280. The 3B model has 36 layers, 40 attention heads, and a hidden space dimension of 2,560. @@ -105,7 +105,7 @@ A validation set of 328,360 UniRef50 representative sequences were randomly sele Dataset](../../main/datasets/uniprot.md)). This validation set was used to ensure that the output of BioNeMo-converted checkpoints is consistent with their outputs when evaluated with the HuggingFace Transformers library. -| Checkpoint | HuggingFace | BioNeMo Framework | Lin *et al.* 2023 | +| Checkpoint | HuggingFace | BioNeMo Recipes | Lin *et al.* 2023 | | ---------- | ----------- | ----------------- | ----------------------------------- | | 650M | 7.001 | 7.002 | 6.95 :material-information-outline: | | 3B | 6.003 | 6.004 | 6.49 :material-information-outline: | @@ -113,7 +113,7 @@ checkpoints is consistent with their outputs when evaluated with the HuggingFace !!! info "Different Validation Sets" ``` -The HuggingFace and converted BioNeMo Framework checkpoints were evaluated on a newly curated validation set. Perplexities +The HuggingFace and converted BioNeMo Recipes checkpoints were evaluated on a newly curated validation set. Perplexities from Lin *et al.* 2023 are reported for comparison, but the original train/test splits are not available. ``` @@ -131,9 +131,9 @@ at the ESM2-650M model size. The BioNeMo implementation could handle batch sizes ![ESM-2 Model Scaling](../../assets/images/esm2/esm2_model_scaling.png) -Training ESM-2 at the 650M, 3B, and 15B model variants show improved performance with the BioNeMo Framework over the +Training ESM-2 at the 650M, 3B, and 15B model variants show improved performance with the BioNeMo Recipes over the pure-PyTorch baseline. These experiments were conducted on 16x NVIDIA A100 or 16x NVIDIA H100 GPUs split across two -nodes. \**Note:* 15B model variants were trained on 64 GPUs with the BioNeMo Framework. +nodes. \**Note:* 15B model variants were trained on 64 GPUs with the BioNeMo Recipes. #### Device Scaling diff --git a/docs/docs/models/evo2.md b/docs/docs/models/evo2.md index b08a9037a9..a822df653e 100644 --- a/docs/docs/models/evo2.md +++ b/docs/docs/models/evo2.md @@ -159,7 +159,7 @@ Evo2 NIM: - H200 (1 and 2 GPU configurations, 144 GB each) - H100 (2 GPU configuration, 80 GB each) -BioNeMo Framework: +BioNeMo Recipes: - A100 (1, 8, ..., 1024 GPU configurations) - H100 (1, 8, ..., 2048 GPU configurations) diff --git a/recipes/evo2_megatron/examples/evo2_gfmbench_recipe.ipynb b/recipes/evo2_megatron/examples/evo2_gfmbench_recipe.ipynb index ceffc2109b..5e9a824a7e 100644 --- a/recipes/evo2_megatron/examples/evo2_gfmbench_recipe.ipynb +++ b/recipes/evo2_megatron/examples/evo2_gfmbench_recipe.ipynb @@ -6,7 +6,7 @@ "source": [ "# Evaluating Evo2 with GFMBench-API (BioNeMo Recipe)\n", "\n", - "This notebook evaluates the **Evo2** genomic foundation model using the **BioNeMo framework** (`bionemo-evo2` + NeMo/Megatron) on the **[GFMBench-API](https://github.com/NVIDIA/GFMBench-api)** benchmark suite.\n", + "This notebook evaluates the **Evo2** genomic foundation model using the **BioNeMo Recipes** (`bionemo-evo2` + NeMo/Megatron) on the **[GFMBench-API](https://github.com/NVIDIA/GFMBench-api)** benchmark suite.\n", "\n", "**GFMBench-API** is a framework for evaluating genomic foundation models. It exposes a single, model-agnostic API that decouples model development from benchmark tasks and metrics: \n", "you implement inference methods once, and the same interface drives zero-shot scoring, supervised evaluation, and reporting across all tasks. See the [bioRxiv preprint](https://www.biorxiv.org/content/10.64898/2026.02.19.706811v1) for the full benchmark design and task definitions.\n", @@ -363,7 +363,7 @@ "source": [ "### 2.3 Load Evo2 Model via BioNeMo\n", "\n", - "We use the **BioNeMo framework** (`bionemo-evo2`) to load Evo2. The notebook-compatible approach:\n", + "We use the **BioNeMo Recipes** (`bionemo-evo2`) to load Evo2. The notebook-compatible approach:\n", "1. Initializes **Megatron parallel state manually** (single-GPU, no NeMo Trainer)\n", "2. Creates the model from NeMo's `HYENA_MODEL_OPTIONS` / `MAMBA_MODEL_OPTIONS` configs\n", "3. Loads checkpoint weights via **Megatron `dist_checkpointing`** (torch_dist format)\n", From 00a03e7cec52703581cfc1c8ed44c6d50de38abd Mon Sep 17 00:00:00 2001 From: Zoey Zhang Date: Tue, 16 Jun 2026 16:18:28 -0700 Subject: [PATCH 2/5] fixing some broken links and linting Signed-off-by: Zoey Zhang --- docs/docs/main/references/FAQ.md | 6 +----- docs/docs/models/ESM-2/index.md | 6 +++--- recipes/context_parallel.md | 2 +- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/docs/docs/main/references/FAQ.md b/docs/docs/main/references/FAQ.md index 71ae6c7e6f..9b115024f9 100644 --- a/docs/docs/main/references/FAQ.md +++ b/docs/docs/main/references/FAQ.md @@ -11,11 +11,7 @@ while also providing a high degree of freedom and flexibility for users. ## How do I install BioNeMo Recipes? You can install individual recipe directories from within BioNeMo Recipes by following the corresponding -README pages the [BioNeMo Recipes GitHub](https://github.com/NVIDIA-BioNeMo/bionemo-framework). Please note that this is a -beta feature and may require some additional effort to install seamlessly. We are actively working on testing this -functionality and expect it will be a fully supported feature in future releases. You can review our -[release notes](../about/releasenotes-fw.md) to stay up to -date on our releases. +README pages the [BioNeMo Recipes GitHub](https://github.com/NVIDIA-BioNeMo/bionemo-framework). ## What are the system requirements for BioNeMo Recipes? diff --git a/docs/docs/models/ESM-2/index.md b/docs/docs/models/ESM-2/index.md index 4b66bde324..cfba69983b 100644 --- a/docs/docs/models/ESM-2/index.md +++ b/docs/docs/models/ESM-2/index.md @@ -106,9 +106,9 @@ Dataset](../../main/datasets/uniprot.md)). This validation set was used to ensur checkpoints is consistent with their outputs when evaluated with the HuggingFace Transformers library. | Checkpoint | HuggingFace | BioNeMo Recipes | Lin *et al.* 2023 | -| ---------- | ----------- | ----------------- | ----------------------------------- | -| 650M | 7.001 | 7.002 | 6.95 :material-information-outline: | -| 3B | 6.003 | 6.004 | 6.49 :material-information-outline: | +| ---------- | ----------- | --------------- | ----------------------------------- | +| 650M | 7.001 | 7.002 | 6.95 :material-information-outline: | +| 3B | 6.003 | 6.004 | 6.49 :material-information-outline: | !!! info "Different Validation Sets" diff --git a/recipes/context_parallel.md b/recipes/context_parallel.md index 350d2bff5f..8907ee323c 100644 --- a/recipes/context_parallel.md +++ b/recipes/context_parallel.md @@ -18,7 +18,7 @@ The core idea behind CP is to partition the data into various chunks, with each In BioNeMo, we've created some abstractions to partition the data for you. There exists a [ContextParallelDataLoaderWrapper](esm2_native_te/collator.py) that will shard the CP data for you and send it to each device. This dataloader operates on Sequence Packed (THD) data [link](https://docs.nvidia.com/nemo-framework/user-guide/24.12/nemotoolkit/features/optimizations/sequence_packing.html). This `ContextParallelDataLoaderWrapper` will take as arguments your CP group and local CP rank. This dataloader wrapper will call its underlying dataloader to generate a unique piece of data and then shard those unique sequences across your CP groups. This is beneficial because you won't need to maintain a deterministic data pipeline because unique data is only being generated across the non CP groups, and it is replicated across the CP groups. More details below. -Alternatively, one could utilize any DataLoader such as the canonical [PyTorch DataLoader](https://pytorch.org/docs/stable/data.html#torch.utils.data.DataLoader), however, you would have to ensure that your dataset is synchronized across CP ranks. In some cases, if you have a non-deterministic data pipeline, even if you attempt to get the same data from a dataloader it may be different due to non-deterministic preprocessing stages such as masking. For more information on preserving determinism in your datasets, please see [MegatronLMDataModule](../docs/docs/main/about/background/megatron_datasets.md). +Alternatively, one could utilize any DataLoader such as the canonical [PyTorch DataLoader](https://pytorch.org/docs/stable/data.html#torch.utils.data.DataLoader), however, you would have to ensure that your dataset is synchronized across CP ranks. In some cases, if you have a non-deterministic data pipeline, even if you attempt to get the same data from a dataloader it may be different due to non-deterministic preprocessing stages such as masking. ### Context Parallelism Sharding Example From 3086a344cfc3313842b1fd761f4356ef6d1419f3 Mon Sep 17 00:00:00 2001 From: Zoey Zhang Date: Tue, 16 Jun 2026 16:30:34 -0700 Subject: [PATCH 3/5] removing API references, moving FAQ up a level Signed-off-by: Zoey Zhang --- docs/docs/main/{references => }/FAQ.md | 6 +-- docs/docs/main/SUMMARY.md | 2 +- .../Writing Documentation/mkdocs.md | 2 - .../main/references/API_reference/index.md | 3 -- docs/mkdocs.yml | 14 ------ docs/requirements.txt | 1 - docs/scripts/gen_ref_pages.py | 43 +------------------ 7 files changed, 5 insertions(+), 66 deletions(-) rename docs/docs/main/{references => }/FAQ.md (93%) delete mode 100644 docs/docs/main/references/API_reference/index.md diff --git a/docs/docs/main/references/FAQ.md b/docs/docs/main/FAQ.md similarity index 93% rename from docs/docs/main/references/FAQ.md rename to docs/docs/main/FAQ.md index 9b115024f9..20a288e793 100644 --- a/docs/docs/main/references/FAQ.md +++ b/docs/docs/main/FAQ.md @@ -16,7 +16,7 @@ README pages the [BioNeMo Recipes GitHub](https://github.com/NVIDIA-BioNeMo/bion ## What are the system requirements for BioNeMo Recipes? Generally, BioNeMo Recipes should run on any NVIDIA GPU with Compute Capability ≥8.0. For a full list of supported -hardware, refer to the [Hardware and Software Prerequisites](../getting-started/pre-reqs.md). +hardware, refer to the [Hardware and Software Prerequisites](getting-started/pre-reqs.md). ## Can I contribute code or models to BioNeMo Recipes? @@ -29,8 +29,8 @@ tokenizers, custom architecture blocks, and other reusable features over end-to- consider end-to-end model implementations on a case-by-case basis. If you're interested in this contribution of this kind, we recommend [reaching out to us](https://www.nvidia.com/en-us/industries/healthcare/contact-sales/) first -For more information about external contributions, refer to the [Contributing](../contributing/contributing.md) and -[Code Review](../contributing/code-review.md) pages. +For more information about external contributions, refer to the [Contributing](contributing/contributing.md) and +[Code Review](contributing/code-review.md) pages. ## How do I report bugs or suggest new features? diff --git a/docs/docs/main/SUMMARY.md b/docs/docs/main/SUMMARY.md index f1ecbe77fd..ee74cb3120 100644 --- a/docs/docs/main/SUMMARY.md +++ b/docs/docs/main/SUMMARY.md @@ -4,4 +4,4 @@ - [Tutorials](examples/) - [Data Sets](datasets/) - [Contributing](contributing/) -- [References](references/) +- [Frequently Asked Questions](FAQ.md) diff --git a/docs/docs/main/contributing/Writing Documentation/mkdocs.md b/docs/docs/main/contributing/Writing Documentation/mkdocs.md index d412941239..2de09969bc 100644 --- a/docs/docs/main/contributing/Writing Documentation/mkdocs.md +++ b/docs/docs/main/contributing/Writing Documentation/mkdocs.md @@ -3,5 +3,3 @@ ## Build system BioNeMo Recipes uses [Material for MkDocs](https://squidfunk.github.io/mkdocs-material/) to build its documentation. -Docstrings are converted to automatically-generated API reference pages using `mkdocstrings`, and can be linked from -markdown pages using [paths](https://mkdocstrings.github.io/usage/). diff --git a/docs/docs/main/references/API_reference/index.md b/docs/docs/main/references/API_reference/index.md deleted file mode 100644 index dde92196f1..0000000000 --- a/docs/docs/main/references/API_reference/index.md +++ /dev/null @@ -1,3 +0,0 @@ -# API reference - -The API reference contains detailed descriptions of all public functions and objects. It's the best place to look if you need information on a specific function. diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index 88d9b25100..18d217029b 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -61,20 +61,6 @@ plugins: version_selector: true css_dir: "assets/css" javascript_dir: "assets/js" - - mkdocstrings: - handlers: - python: - paths: - - ../models/*/src - - ../recipes/*/src - - ../interpretability/*/*/src - - ../interpretability/*/*/*/src - - ../interpretability/*/*/*/*/src - options: - docstring_options: - warnings: false - warn_missing_types: false - warn_unknown_params: false - mkdocs-jupyter: theme: auto include: ["*.ipynb"] diff --git a/docs/requirements.txt b/docs/requirements.txt index a4e21d6114..4b2dd5add6 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,7 +1,6 @@ mkdocs-material mkdocs-macros-plugin mkdocs-minify-plugin -mkdocstrings[python] mkdocs-gen-files pymdown-extensions # Pinned: 0.26.0 (released 2026-03-23) breaks the build with diff --git a/docs/scripts/gen_ref_pages.py b/docs/scripts/gen_ref_pages.py index 43785ec52f..9165811930 100644 --- a/docs/scripts/gen_ref_pages.py +++ b/docs/scripts/gen_ref_pages.py @@ -14,7 +14,6 @@ # limitations under the License. """Generate reference pages and copy docs from framework packages and recipes.""" -import itertools import json import logging import os @@ -522,43 +521,6 @@ def copy_support_files(source_dir: Path, dest_dir: Path, root: Path, log_prefix: write_support_directory_indexes(dest_dir, copied_files, explicit_index_dirs) -def generate_api_reference() -> None: - """Generate API reference documentation for import-light model and interpretability packages.""" - root = Path(__file__).parent.parent.parent - source_roots = [ - (src, ()) - for src in itertools.chain((root / "models").rglob("src"), (root / "interpretability").rglob("src")) - if "src" not in src.relative_to(root).parts[:-1] - ] - source_roots.append((root / "recipes" / "evo2_megatron" / "src", ("bionemo", "common"))) - - for src, required_prefix in source_roots: - # Process Python files - for path in sorted(src.rglob("*.py")): - module_path = path.relative_to(src).with_suffix("") - doc_path = path.relative_to(src).with_suffix(".md") - full_doc_path = Path("main/references/API_reference") / doc_path - parts = tuple(module_path.parts) - - if required_prefix and parts[: len(required_prefix)] != required_prefix: - continue - - if parts[-1] in ("__init__", "__main__"): - continue - - with mkdocs_gen_files.open(full_doc_path, "w") as fd: - identifier = ".".join(parts) - print("::: " + identifier, file=fd) - - mkdocs_gen_files.set_edit_path(full_doc_path, path.relative_to(root)) - - # Process Markdown files - for path in sorted(src.rglob("*.md")): - doc_path = path.relative_to(src) - full_doc_path = Path("main/references/API_reference") / doc_path - copy_text_file(path, full_doc_path, root, f"Added Markdown file: {full_doc_path}") - - def get_recipes_readmes(recipes_dir: Path, root: Path) -> None: """Copy README files from root recipe directories to the docs recipes directory. @@ -695,7 +657,7 @@ def get_recipes_assets(recipes_dir: Path, root: Path) -> None: def generate_pages() -> None: """Generate pages for documentation. - This function orchestrates API references, notebooks, and README files for recipes. + This function orchestrates notebooks and README files for recipes. Returns: None @@ -708,9 +670,6 @@ def generate_pages() -> None: with mkdocs_gen_files.open("versions.json", "w") as f: json.dump([{"version": "main", "title": "main", "aliases": ["latest"]}], f) - # Generate API docs for recipe and model packages. - generate_api_reference() - # Process recipes write_generated_tutorials_index() get_recipes_assets(recipes_dir, root) From 92958934124b1e5250edccf3e73435c6c369945f Mon Sep 17 00:00:00 2001 From: Zoey Zhang Date: Tue, 16 Jun 2026 16:48:13 -0700 Subject: [PATCH 4/5] fixing grammar Signed-off-by: Zoey Zhang --- docs/docs/main/datasets/index.md | 4 ++-- docs/docs/main/getting-started/pre-reqs.md | 6 +++--- docs/docs/models/ESM-2/index.md | 4 ++-- docs/docs/models/index.md | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/docs/main/datasets/index.md b/docs/docs/main/datasets/index.md index 929654ded7..065c50cbfb 100644 --- a/docs/docs/main/datasets/index.md +++ b/docs/docs/main/datasets/index.md @@ -1,10 +1,10 @@ # BioNeMo Recipes: Available Datasets -The BioNeMo Recipes provides access to a variety of high-quality datasets for bioinformatics and cheminformatics research. These datasets cover a range of biological and chemical modalities, supporting various research applications. The following table lists the currently available datasets: +BioNeMo Recipes provides access to a variety of high-quality datasets for bioinformatics and cheminformatics research. These datasets cover a range of biological and chemical modalities, supporting various research applications. The following table lists the currently available datasets: | **Dataset** | **Modality** | **Uses** | | --------------------------- | ------------ | -------------------------------------- | | [CELLxGENE](./CELLxGENE.md) | Single Cell | Single-Cell Gene Expression | | [UniProt](./uniprot.md) | Protein | Protein Sequence and Function Analysis | -For more information about the datasets included in the BioNeMo Recipes, refer to the Dataset Cards linked in the table above or the original sources referenced in the respective dataset descriptions. +For more information about the datasets included in BioNeMo Recipes, refer to the Dataset Cards linked in the table above or the original sources referenced in the respective dataset descriptions. diff --git a/docs/docs/main/getting-started/pre-reqs.md b/docs/docs/main/getting-started/pre-reqs.md index 96252f30d8..b5cd7ea1ee 100644 --- a/docs/docs/main/getting-started/pre-reqs.md +++ b/docs/docs/main/getting-started/pre-reqs.md @@ -1,11 +1,11 @@ # Hardware and Software Prerequisites for BioNeMo Recipes -Before you begin using the BioNeMo Recipes, ensure the hardware and software prerequisites outlined below are +Before you begin using BioNeMo Recipes, ensure the hardware and software prerequisites outlined below are met. ## Hardware Prerequisites -The BioNeMo Recipes is compatible with environments that have access to NVIDIA GPUs. `bfloat16` precision requires an +BioNeMo Recipes are compatible with environments that have access to NVIDIA GPUs. `bfloat16` precision requires an Ampere generation GPU or higher ([Compute Capability ≥8.0](https://developer.nvidia.com/cuda-gpus)). You may be able to run BioNeMo on GPUs without `bfloat16`, but this use-case is not supported by the development team. @@ -31,7 +31,7 @@ The following datacenter and desktop GPUs have Compute Capability ≥8.0 and are ## Software Prerequisites -The BioNeMo Recipes is supported on x86 Linux systems. +BioNeMo Recipes are supported on x86 and ARM Linux systems. Ensure that the following are installed in your desired execution environment: diff --git a/docs/docs/models/ESM-2/index.md b/docs/docs/models/ESM-2/index.md index cfba69983b..deadc07a55 100644 --- a/docs/docs/models/ESM-2/index.md +++ b/docs/docs/models/ESM-2/index.md @@ -131,9 +131,9 @@ at the ESM2-650M model size. The BioNeMo implementation could handle batch sizes ![ESM-2 Model Scaling](../../assets/images/esm2/esm2_model_scaling.png) -Training ESM-2 at the 650M, 3B, and 15B model variants show improved performance with the BioNeMo Recipes over the +Training ESM-2 at the 650M, 3B, and 15B model variants show improved performance with BioNeMo Recipes over the pure-PyTorch baseline. These experiments were conducted on 16x NVIDIA A100 or 16x NVIDIA H100 GPUs split across two -nodes. \**Note:* 15B model variants were trained on 64 GPUs with the BioNeMo Recipes. +nodes. \**Note:* 15B model variants were trained on 64 GPUs with BioNeMo Recipes. #### Device Scaling diff --git a/docs/docs/models/index.md b/docs/docs/models/index.md index 71b2129284..5cab372cd1 100644 --- a/docs/docs/models/index.md +++ b/docs/docs/models/index.md @@ -1,6 +1,6 @@ # BioNeMo Recipes: Available Models -State-of-the-art models are continually integrated into the BioNeMo Recipes. The BioNeMo Recipes currently offers the following pre-trained models: +State-of-the-art models are continually integrated into BioNeMo Recipes. BioNeMo Recipes currently offers the following pre-trained models: | **Model** | **Modality** | **Uses** | **Training Location** | | ----------------------------- | ------------ | ----------------------- | -------------------------------------------------------------------------------- | From 8c1e7caff62891591afba02c702602a0bf944c14 Mon Sep 17 00:00:00 2001 From: Zoey Zhang Date: Tue, 16 Jun 2026 16:49:52 -0700 Subject: [PATCH 5/5] adding docker image change Signed-off-by: Zoey Zhang --- .github/ISSUE_TEMPLATE/bug-report.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml index 14dff2d3b5..1dc91d894c 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.yml +++ b/.github/ISSUE_TEMPLATE/bug-report.yml @@ -89,8 +89,8 @@ body: id: docker-image-info attributes: label: Docker Image - description: If the issue occurred in a container, provide the docker image name. Visit [BioNeMo Recipes NGC website](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/clara/containers/bionemo-framework/tags) for available images. - placeholder: e.g., nvcr.io/nvidia/clara/bionemo-framework:2.2 + description: If the issue occurred in a container, provide the docker image name. Visit the [PyTorch NGC Container Registry](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch) for available images. + placeholder: e.g., nvcr.io/nvidia/pytorch:26.05-py3 validations: required: false