From eaca9966c0a0ed440f4ac5c89d916152f78fd156 Mon Sep 17 00:00:00 2001 From: Yangyang Li Date: Wed, 25 Oct 2023 23:00:03 -0500 Subject: [PATCH 01/12] chore: Add issue templates, labels, and workflows --- .github/ISSUE_TEMPLATE/bug_report.yml | 62 +++++++++++++++++++++ .github/ISSUE_TEMPLATE/feature_request.md | 19 +++++++ .github/dependabot.yml | 5 ++ .github/labels.yml | 66 +++++++++++++++++++++++ .github/workflows/labeler.yml | 20 +++++++ .github/workflows/test.yml | 32 +++++++++-- 6 files changed, 200 insertions(+), 4 deletions(-) create mode 100644 .github/ISSUE_TEMPLATE/bug_report.yml create mode 100644 .github/ISSUE_TEMPLATE/feature_request.md create mode 100644 .github/labels.yml create mode 100644 .github/workflows/labeler.yml diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 0000000..a1bbad5 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,62 @@ +name: Bug Report +description: File a bug report +title: "[Bug]: " +labels: ["bug"] +assignees: + - octocat +body: + - type: markdown + attributes: + value: | + Thanks for taking the time to fill out this bug report! + - type: input + id: contact + attributes: + label: Contact Details + description: How can we get in touch with you if we need more info? + placeholder: ex. email@example.com + validations: + required: false + - type: textarea + id: what-happened + attributes: + label: What happened? + description: Also tell us, what did you expect to happen? + placeholder: Tell us what you see! + value: "A bug happened!" + validations: + required: true + + - type: textarea + id: version + attributes: + label: Version + description: What version of our software are you running? + validations: + required: true + + - type: dropdown + id: platform + attributes: + label: What platform are you working on? + multiple: true + options: + - Mac Intel + - Linux + - Windows + - Mac M + - type: textarea + id: logs + attributes: + label: Relevant log output + description: Please copy and paste any relevant log output in trace log mode. This will be automatically formatted into code, so no need for backticks. + render: bash + + - type: checkboxes + id: terms + attributes: + label: Code of Conduct + description: By submitting this issue, you agree to follow our [Code of Conduct](https://github.com/ylab-hi/ScanNLS/blob/main/CODE_OF_CONDUCT.md) + options: + - label: I agree to follow this project's Code of Conduct + required: true diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..2bc5d5f --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,19 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: "" +labels: "" +assignees: "" +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 32e2534..5a848ff 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -8,3 +8,8 @@ updates: # Prefix all commit messages with "deps: ", which should be # accepted as a conventional commit and trigger release-please prefix: "deps" + + - package-ecosystem: github-actions + directory: "/" + schedule: + interval: daily diff --git a/.github/labels.yml b/.github/labels.yml new file mode 100644 index 0000000..f7f83aa --- /dev/null +++ b/.github/labels.yml @@ -0,0 +1,66 @@ +--- +# Labels names are important as they are used by Release Drafter to decide +# regarding where to record them in changelog or if to skip them. +# +# The repository labels will be automatically configured using this file and +# the GitHub Action https://github.com/marketplace/actions/github-labeler. +- name: breaking + description: Breaking Changes + color: bfd4f2 +- name: bug + description: Something isn't working + color: d73a4a +- name: build + description: Build System and Dependencies + color: bfdadc +- name: ci + description: Continuous Integration + color: 4a97d6 +- name: dependencies + description: Pull requests that update a dependency file + color: 0366d6 +- name: documentation + description: Improvements or additions to documentation + color: 0075ca +- name: duplicate + description: This issue or pull request already exists + color: cfd3d7 +- name: enhancement + description: New feature or request + color: a2eeef +- name: github_actions + description: Pull requests that update Github_actions code + color: "000000" +- name: good first issue + description: Good for newcomers + color: 7057ff +- name: help wanted + description: Extra attention is needed + color: 008672 +- name: invalid + description: This doesn't seem right + color: e4e669 +- name: performance + description: Performance + color: "016175" +- name: python + description: Pull requests that update Python code + color: 2b67c6 +- name: question + description: Further information is requested + color: d876e3 +- name: refactoring + description: Refactoring + color: ef67c4 +- name: removal + description: Removals and Deprecations + color: 9ae7ea +- name: style + description: Style + color: c120e5 +- name: testing + description: Testing + color: b1fc6f +- name: wontfix + description: This will not be worked on + color: ffffff diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml new file mode 100644 index 0000000..9054eec --- /dev/null +++ b/.github/workflows/labeler.yml @@ -0,0 +1,20 @@ +name: Labeler + +on: + push: + branches: + - main + +jobs: + labeler: + runs-on: ubuntu-latest + steps: + - name: Check out the repository + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Run Labeler + uses: crazy-max/ghaction-github-labeler@v5.0.0 + with: + skip-delete: true diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 3995e60..14536d7 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -2,9 +2,28 @@ name: CI on: push: - branches: [master] + branches: + - main + paths: + - src/** + - examples/** + - Cargo.toml + - .github/workflows/test.yml + pull_request: - branches: [master] + paths: + - src/** + - examples/** + - Cargo.toml + - .github/workflows/test.yml + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +#ref https://github.com/pola-rs/polars/blob/main/.github/workflows/test-rust.yml +env: + RUSTFLAGS: -C debuginfo=0 # Do not produce debug symbols to keep memory usage down jobs: Formatting: @@ -12,6 +31,7 @@ jobs: steps: - name: Checkout repository uses: actions/checkout@v2 + de - name: Install stable toolchain uses: actions-rs/toolchain@v1 @@ -63,7 +83,9 @@ jobs: rust: stable steps: - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v4 + with: + fetch-depth: 1 - uses: actions-rs/toolchain@v1 with: @@ -115,7 +137,9 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v4 + with: + fetch-depth: 1 - name: Install nightly toolchain uses: actions-rs/toolchain@v1 From 93f948ed6425f1143732f60273660001ab0782e9 Mon Sep 17 00:00:00 2001 From: Yangyang Li Date: Wed, 25 Oct 2023 23:00:23 -0500 Subject: [PATCH 02/12] build: Update pre-commit hooks to latest versions --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index bd60306..c5790bc 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v4.5.0 hooks: - id: check-yaml stages: [commit] @@ -16,7 +16,7 @@ repos: stages: [commit] - repo: https://github.com/crate-ci/committed - rev: v1.0.18 + rev: v1.0.20 hooks: - id: committed stages: [commit-msg] From cf10435a44365fd5d634a8c28eda6ee07d8275bd Mon Sep 17 00:00:00 2001 From: Yangyang Li Date: Wed, 25 Oct 2023 23:04:52 -0500 Subject: [PATCH 03/12] chore: update GitHub Actions workflow --- .github/workflows/test.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 14536d7..94d7f6b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -31,7 +31,8 @@ jobs: steps: - name: Checkout repository uses: actions/checkout@v2 - de + with: + fetch-depth: 1 - name: Install stable toolchain uses: actions-rs/toolchain@v1 From d17196352a7d45a58d191c3e5e5b2df90db37714 Mon Sep 17 00:00:00 2001 From: Yangyang Li Date: Wed, 25 Oct 2023 23:12:41 -0500 Subject: [PATCH 04/12] chore: Update actions/checkout to v4 --- .github/workflows/test.yml | 31 ++-------------- README.md | 74 +++++++++++++++++++------------------- 2 files changed, 40 insertions(+), 65 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 94d7f6b..b652676 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -112,7 +112,9 @@ jobs: MSRV_VERSION: 1.62.0 steps: - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v4 + with: + fetch-depth: 1 - name: Install MSRV toolchain uses: actions-rs/toolchain@v1 @@ -132,30 +134,3 @@ jobs: with: command: test args: --all --no-fail-fast - - Coverage: - needs: Formatting - runs-on: ubuntu-latest - steps: - - name: Checkout repository - uses: actions/checkout@v4 - with: - fetch-depth: 1 - - - name: Install nightly toolchain - uses: actions-rs/toolchain@v1 - with: - toolchain: nightly - override: true - - - name: Install and run cargo-tarpaulin - uses: actions-rs/tarpaulin@v0.1 - with: - version: "0.26.0" - args: "--workspace --all-features --run-types Tests,Doctests --out Lcov --timeout 300" - - - name: Upload coverage - uses: coverallsapp/github-action@v1 - with: - github-token: ${{ secrets.GITHUB_TOKEN }} - path-to-lcov: ./lcov.info diff --git a/README.md b/README.md index 11205a0..c246976 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,3 @@ - # COITrees: Cache Oblivious Interval Trees COITrees implements a data structure for very fast overlap queries of a @@ -29,10 +28,12 @@ remains highly efficient. # Trying Out +The Minimum Supported Rust Version (MSRV) is 1.62.0. This is primary a library for use in other programs, but for benchmarking purposes it includes a program for intersecting BED files. To try out, just clone this repo and run: + ```shell cargo run --release --example bed-intersect -- test1.bed test2.bed > intersections.bed ``` @@ -45,51 +46,50 @@ million lines of `B`. ## Intervals in sorted order -| | A vs B | B vs A | A vs A | B' vs B' | -| ----------------------------------- | ---------: | ---------: | -------: | ---------: | -| coitrees AVX | 11.8s | **3.7s** | 0.7 | 5.3s | -| coitrees AVX (`--sorted`) | 6.4s | 4.2s | **0.6s** | **0.5s** | -| coitrees | 11.4s | 5.2s | 0.8s | 8.3s | -| coitrees (`--sorted`) | **5.8s** | 5.4s | **0.6s** | **0.5s** | -| cgranges (`bedcov-cr -c`) | 35.4s | 6.6s | 2.0s | 17.6s | -| AIList | 13.8s | 10.1s | 1.1s | 18.4s | -| CITree | 20.1s | 13.5s | 1.6s | 45.7s | -| NCList | 22.5s | 16.8s | 1.9s | 39.8s | -| AITree | 23.8s | 26.3s | 2.1s | 63.4s | -| `bedtools coverage -counts -sorted` | 257.5s | 295.6s | 71.6s | 2130.9s | -| `bedtools coverage -counts` | 322.4s | 378.5s | 75.0s | 3595.9s | +| | A vs B | B vs A | A vs A | B' vs B' | +| ----------------------------------- | -------: | -------: | -------: | -------: | +| coitrees AVX | 11.8s | **3.7s** | 0.7 | 5.3s | +| coitrees AVX (`--sorted`) | 6.4s | 4.2s | **0.6s** | **0.5s** | +| coitrees | 11.4s | 5.2s | 0.8s | 8.3s | +| coitrees (`--sorted`) | **5.8s** | 5.4s | **0.6s** | **0.5s** | +| cgranges (`bedcov-cr -c`) | 35.4s | 6.6s | 2.0s | 17.6s | +| AIList | 13.8s | 10.1s | 1.1s | 18.4s | +| CITree | 20.1s | 13.5s | 1.6s | 45.7s | +| NCList | 22.5s | 16.8s | 1.9s | 39.8s | +| AITree | 23.8s | 26.3s | 2.1s | 63.4s | +| `bedtools coverage -counts -sorted` | 257.5s | 295.6s | 71.6s | 2130.9s | +| `bedtools coverage -counts` | 322.4s | 378.5s | 75.0s | 3595.9s | ### With coverage -| | A vs B | B vs A | A vs A | B' vs B' | -| ----------------------------------- | ---------: | ---------: | -------: | ---------: | -| coitrees AVX | 18.2s | **4.8s** | 1.1s | 16.0s | -| coitrees | **14.6s** | 5.7s | **1.0s** | **12.0s** | -| cgranges | 38.4s | 8.1s | 2.2s | 31.0s | -| CITree | 23.2s | 25.6s | 2.0s | 160.4s | +| | A vs B | B vs A | A vs A | B' vs B' | +| ------------ | --------: | -------: | -------: | --------: | +| coitrees AVX | 18.2s | **4.8s** | 1.1s | 16.0s | +| coitrees | **14.6s** | 5.7s | **1.0s** | **12.0s** | +| cgranges | 38.4s | 8.1s | 2.2s | 31.0s | +| CITree | 23.2s | 25.6s | 2.0s | 160.4s | ## Intervals in randomized order -| | A vs B | B vs A | A vs A | B' vs B' | -| ----------------------------------- | ---------: | ---------: | -------: | --------: | -| coitrees AVX | **23.9s** | **7.2s** | **1.6s** | **6.1s** | -| coitrees | 24.2s | 8.9s | 1.9s | 9.4s | -| cgranges (`bedcov-cr -c`) | 55.7s | 11.1s | 3.3s | 19.6s | -| AIList | 31.2s | 18.2s | 2.3s | 19.3s | -| CITree | 39.4s | 19.0s | 2.9s | 47.1s | -| NCList | 42.7s | 23.8s | 3.4s | 44.0s | -| AITree | 225.3s | 134.8s | 14.7s | 921.6s | -| `bedtools coverage -counts` | 1160.4s | 849.6s | 104.5s | 9254.6s | +| | A vs B | B vs A | A vs A | B' vs B' | +| --------------------------- | --------: | -------: | -------: | -------: | +| coitrees AVX | **23.9s** | **7.2s** | **1.6s** | **6.1s** | +| coitrees | 24.2s | 8.9s | 1.9s | 9.4s | +| cgranges (`bedcov-cr -c`) | 55.7s | 11.1s | 3.3s | 19.6s | +| AIList | 31.2s | 18.2s | 2.3s | 19.3s | +| CITree | 39.4s | 19.0s | 2.9s | 47.1s | +| NCList | 42.7s | 23.8s | 3.4s | 44.0s | +| AITree | 225.3s | 134.8s | 14.7s | 921.6s | +| `bedtools coverage -counts` | 1160.4s | 849.6s | 104.5s | 9254.6s | ### With coverage -| | A vs B | B vs A | A vs A | B' vs B' | -| ----------------------------------- | ---------: | ---------: | -------: | ---------: | -| coitrees AVX | 34.3s | **8.8s** | **2.2s** | 16.3s | -| coitrees | **29.6s** | 9.7s | 2.3s | **13.1s** | -| cgranges | 57.6s | 12.5s | 3.6s | 32.6s | -| CITree | 50.0s | 32.5s | 3.8s | 170.4s | - +| | A vs B | B vs A | A vs A | B' vs B' | +| ------------ | --------: | -------: | -------: | --------: | +| coitrees AVX | 34.3s | **8.8s** | **2.2s** | 16.3s | +| coitrees | **29.6s** | 9.7s | 2.3s | **13.1s** | +| cgranges | 57.6s | 12.5s | 3.6s | 32.6s | +| CITree | 50.0s | 32.5s | 3.8s | 170.4s | All benchmarks run on a ryzen 5950x. From a526248a4bee63aa8b5815c52996107644fcd38a Mon Sep 17 00:00:00 2001 From: Yangyang Li Date: Wed, 25 Oct 2023 23:39:58 -0500 Subject: [PATCH 05/12] chore: Update MSRV_VERSION to 1.63.0 --- .github/workflows/test.yml | 2 +- README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b652676..5402069 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -109,7 +109,7 @@ jobs: needs: Formatting runs-on: ubuntu-latest env: - MSRV_VERSION: 1.62.0 + MSRV_VERSION: 1.63.0 steps: - name: Checkout repository uses: actions/checkout@v4 diff --git a/README.md b/README.md index c246976..f639cb9 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ remains highly efficient. # Trying Out -The Minimum Supported Rust Version (MSRV) is 1.62.0. +The Minimum Supported Rust Version (MSRV) is 1.63.0. This is primary a library for use in other programs, but for benchmarking purposes it includes a program for intersecting BED files. From b740d96fdd736856fb09eadcf9788b9b7578efce Mon Sep 17 00:00:00 2001 From: Yangyang Li Date: Wed, 25 Oct 2023 23:52:03 -0500 Subject: [PATCH 06/12] chore: Update MSRV version to 1.62.0 --- .github/workflows/test.yml | 6 +----- README.md | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5402069..9fa6cea 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -109,7 +109,7 @@ jobs: needs: Formatting runs-on: ubuntu-latest env: - MSRV_VERSION: 1.63.0 + MSRV_VERSION: 1.62.0 steps: - name: Checkout repository uses: actions/checkout@v4 @@ -125,10 +125,6 @@ jobs: - name: check if README matches MSRV defined here run: grep $MSRV_VERSION README.md - - name: pin dependency versions for MSRV - run: | - cargo update -p indexmap --precise 1.8.2 - - name: Run tests uses: actions-rs/cargo@v1 with: diff --git a/README.md b/README.md index f639cb9..c246976 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ remains highly efficient. # Trying Out -The Minimum Supported Rust Version (MSRV) is 1.63.0. +The Minimum Supported Rust Version (MSRV) is 1.62.0. This is primary a library for use in other programs, but for benchmarking purposes it includes a program for intersecting BED files. From 78426d3cc22b7d838605b1f5b33b075837e112e2 Mon Sep 17 00:00:00 2001 From: Yangyang Li Date: Thu, 26 Oct 2023 00:24:04 -0500 Subject: [PATCH 07/12] feat(py-coitrees): lanuch py-coitrees layout --- .github/workflows/release-python.yml | 74 ++++++++++++++++++++++++++++ .gitignore | 73 +++++++++++++++++++++++++++ py-coitrees/Cargo.toml | 12 +++++ py-coitrees/LICENSE | 1 + py-coitrees/README.md | 0 py-coitrees/pyproject.toml | 31 ++++++++++++ py-coitrees/src/lib.rs | 14 ++++++ 7 files changed, 205 insertions(+) create mode 100644 .github/workflows/release-python.yml create mode 100644 py-coitrees/Cargo.toml create mode 100644 py-coitrees/LICENSE create mode 100644 py-coitrees/README.md create mode 100644 py-coitrees/pyproject.toml create mode 100644 py-coitrees/src/lib.rs diff --git a/.github/workflows/release-python.yml b/.github/workflows/release-python.yml new file mode 100644 index 0000000..9a3eda3 --- /dev/null +++ b/.github/workflows/release-python.yml @@ -0,0 +1,74 @@ +name: CI + +# ref https://github.com/pola-rs/polars/blob/main/.github/workflows/release-python.yml +on: + push: + branches: + - release + pull_request: + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + linux: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: PyO3/maturin-action@v1 + with: + manylinux: auto + command: build + args: --release --sdist -o dist --find-interpreter + - name: Upload wheels + uses: actions/upload-artifact@v3 + with: + name: wheels + path: dist + + windows: + runs-on: windows-latest + steps: + - uses: actions/checkout@v3 + - uses: PyO3/maturin-action@v1 + with: + command: build + args: --release -o dist --find-interpreter + - name: Upload wheels + uses: actions/upload-artifact@v3 + with: + name: wheels + path: dist + + macos: + runs-on: macos-latest + steps: + - uses: actions/checkout@v3 + - uses: PyO3/maturin-action@v1 + with: + command: build + args: --release -o dist --universal2 --find-interpreter + - name: Upload wheels + uses: actions/upload-artifact@v3 + with: + name: wheels + path: dist + + release: + name: Release + runs-on: ubuntu-latest + if: "startsWith(github.ref, 'refs/tags/')" + needs: [macos, windows, linux] + steps: + - uses: actions/download-artifact@v3 + with: + name: wheels + - name: Publish to PyPI + uses: PyO3/maturin-action@v1 + env: + MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }} + with: + command: upload + args: --skip-existing * diff --git a/.gitignore b/.gitignore index 3038700..ff14de3 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,76 @@ bench/random-bench.csv bench/random-cov-bench.csv bench/*.bed data/ + + +# Python binding ignore +# Byte-compiled / optimized / DLL files +__pycache__/ +.pytest_cache/ +*.py[cod] + +# C extensions +*.so + +# Distribution / packaging +.Python +.venv/ +env/ +bin/ +build/ +develop-eggs/ +dist/ +eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +include/ +man/ +venv/ +*.egg-info/ +.installed.cfg +*.egg + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt +pip-selfcheck.json + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.cache +nosetests.xml +coverage.xml + +# Translations +*.mo + +# Mr Developer +.mr.developer.cfg +.project +.pydevproject + +# Rope +.ropeproject + +# Django stuff: +*.log +*.pot + +.DS_Store + +# Sphinx documentation +docs/_build/ + +# PyCharm +.idea/ + +# VSCode +.vscode/ + +# Pyenv +.python-version diff --git a/py-coitrees/Cargo.toml b/py-coitrees/Cargo.toml new file mode 100644 index 0000000..c134654 --- /dev/null +++ b/py-coitrees/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "py-coitrees" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lib] +name = "coitrees" +crate-type = ["cdylib"] + +[dependencies] +pyo3 = { version = "0.20.0", features = ["extension-module"] } diff --git a/py-coitrees/LICENSE b/py-coitrees/LICENSE new file mode 100644 index 0000000..4f88f81 --- /dev/null +++ b/py-coitrees/LICENSE @@ -0,0 +1 @@ +../LICENSE diff --git a/py-coitrees/README.md b/py-coitrees/README.md new file mode 100644 index 0000000..e69de29 diff --git a/py-coitrees/pyproject.toml b/py-coitrees/pyproject.toml new file mode 100644 index 0000000..3353e92 --- /dev/null +++ b/py-coitrees/pyproject.toml @@ -0,0 +1,31 @@ +[build-system] +requires = ["maturin>=0.14,<0.15"] +build-backend = "maturin" + +[project] +name = "coitrees" +description = "Blazingly fast interval tree library" +authors = [ + { name = "Yangyang Li", email = "yangyang.li@northwestern.edu" }, + { name = "Daniel C. Jones", email = "djones3@fredhutch.org" }, +] +readme = "README.md" +requires-python = ">=3.8" +license = { file = "LICENSE" } +keywords = ["interval tree", "find overlap", "rust", "blazingly fast"] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Environment :: Console", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Rust", + "Topic :: Scientific/Engineering", +] diff --git a/py-coitrees/src/lib.rs b/py-coitrees/src/lib.rs new file mode 100644 index 0000000..ade05bc --- /dev/null +++ b/py-coitrees/src/lib.rs @@ -0,0 +1,14 @@ +use pyo3::prelude::*; + +/// Formats the sum of two numbers as string. +#[pyfunction] +fn sum_as_string(a: usize, b: usize) -> PyResult { + Ok((a + b).to_string()) +} + +/// A Python module implemented in Rust. +#[pymodule] +fn coitrees(_py: Python, m: &PyModule) -> PyResult<()> { + m.add_function(wrap_pyfunction!(sum_as_string, m)?)?; + Ok(()) +} From 2ad39118dee3d02e95a22c0f49c7a269b6ab24ab Mon Sep 17 00:00:00 2001 From: Yangyang Li Date: Thu, 26 Oct 2023 00:35:52 -0500 Subject: [PATCH 08/12] refactor: format files vis pre-commit --- .pre-commit-config.yaml | 16 ++++++++++------ README.md | 2 +- bench/CITree | 1 - bench/CITree-cov | 1 - bench/bedtools | 1 - bench/bedtools-sorted | 1 - bench/cgranges | 1 - bench/cgranges-cov | 1 - bench/coitrees | 1 - bench/coitrees-cov | 1 - bench/coitrees-sorted | 1 - bench/coitrees-tvt | 1 - src/avx.rs | 4 ++-- src/lib.rs | 2 +- src/neon.rs | 4 ++-- src/nosimd.rs | 6 +++--- 16 files changed, 19 insertions(+), 25 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c5790bc..40b8c4b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,15 +14,19 @@ repos: stages: [commit] - id: detect-private-key stages: [commit] - - - repo: https://github.com/crate-ci/committed - rev: v1.0.20 - hooks: - - id: committed - stages: [commit-msg] + - id: check-added-large-files + args: ["--maxkb=150000"] + - id: end-of-file-fixer + - id: trailing-whitespace - repo: https://github.com/doublify/pre-commit-rust rev: v1.0 hooks: - id: fmt - id: cargo-check + + - repo: https://github.com/crate-ci/typos + rev: typos-dict-v0.11.2 + hooks: + - id: typos + exclude: "extc|svg|psl|CHANGELOG" diff --git a/README.md b/README.md index c246976..bc50b4c 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ variants are implemented to exploit AVX2 instructions on x86-64 cpus type is oppurtunistically defined to one of these types if the right instruction set is detected. Typically it's necessary to compile with the environment variable `RUSTFLAGS="-Ctarget-cpu=native"` set for this to work. The fallback -implemntation (`BasicCOITree`) supports any platform rust compiles to and +implementation (`BasicCOITree`) supports any platform rust compiles to and remains highly efficient. # Trying Out diff --git a/bench/CITree b/bench/CITree index 27171b1..e19ce28 100755 --- a/bench/CITree +++ b/bench/CITree @@ -1,4 +1,3 @@ #!/bin/sh ~/src/cgranges/test/bedcov-itree-nocov $1 $2 - diff --git a/bench/CITree-cov b/bench/CITree-cov index 56587ee..69bd6dd 100755 --- a/bench/CITree-cov +++ b/bench/CITree-cov @@ -1,4 +1,3 @@ #!/bin/sh ~/src/cgranges/test/bedcov-itree $1 $2 - diff --git a/bench/bedtools b/bench/bedtools index 8f525ab..d48b2ca 100755 --- a/bench/bedtools +++ b/bench/bedtools @@ -1,4 +1,3 @@ #!/bin/sh bedtools coverage -counts -a $1 -b $2 - diff --git a/bench/bedtools-sorted b/bench/bedtools-sorted index f8abfc4..d3e31b4 100755 --- a/bench/bedtools-sorted +++ b/bench/bedtools-sorted @@ -1,4 +1,3 @@ #!/bin/sh bedtools coverage -counts -sorted -a $1 -b $2 - diff --git a/bench/cgranges b/bench/cgranges index 3200914..4134f76 100755 --- a/bench/cgranges +++ b/bench/cgranges @@ -1,4 +1,3 @@ #!/bin/sh ~/src/cgranges/test/bedcov-cr -c $1 $2 - diff --git a/bench/cgranges-cov b/bench/cgranges-cov index 36d8a66..d78d2f0 100755 --- a/bench/cgranges-cov +++ b/bench/cgranges-cov @@ -1,4 +1,3 @@ #!/bin/sh ~/src/cgranges/test/bedcov-cr $1 $2 - diff --git a/bench/coitrees b/bench/coitrees index b2a4ee6..d20870c 100755 --- a/bench/coitrees +++ b/bench/coitrees @@ -1,4 +1,3 @@ #!/bin/sh ../target/release/examples/bed-intersect $1 $2 - diff --git a/bench/coitrees-cov b/bench/coitrees-cov index 640661f..36dfd53 100755 --- a/bench/coitrees-cov +++ b/bench/coitrees-cov @@ -1,4 +1,3 @@ #!/bin/sh ../target/release/examples/bed-intersect --coverage $1 $2 - diff --git a/bench/coitrees-sorted b/bench/coitrees-sorted index 01937ab..77e2628 100755 --- a/bench/coitrees-sorted +++ b/bench/coitrees-sorted @@ -1,4 +1,3 @@ #!/bin/sh ../target/release/examples/bed-intersect --sorted $1 $2 - diff --git a/bench/coitrees-tvt b/bench/coitrees-tvt index 24105f4..c736e27 100755 --- a/bench/coitrees-tvt +++ b/bench/coitrees-tvt @@ -1,4 +1,3 @@ #!/bin/sh ../target/release/examples/bed-intersect --tree-vs-tree $1 $2 - diff --git a/src/avx.rs b/src/avx.rs index 2a2ec52..e69abe8 100644 --- a/src/avx.rs +++ b/src/avx.rs @@ -484,7 +484,7 @@ where self.nodes.is_empty() } - // /// Find intervals in the set overlaping the query `[first, last]` and call `visit` on every overlapping node + // /// Find intervals in the set overlapping the query `[first, last]` and call `visit` on every overlapping node fn query(&'a self, first: i32, last: i32, mut visit: F) where F: FnMut(&Interval<&'a T>), @@ -901,7 +901,7 @@ where return; } - // not overlaping or preceding + // not overlapping or preceding if first < self.prev_first || first > self.prev_last { // no overlap with previous query. have to resort to regular query strategy self.overlapping_intervals.clear(); diff --git a/src/lib.rs b/src/lib.rs index 14d3c59..867254d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,7 +11,7 @@ //! structs which store integer, end-inclusive intervals along with associated //! metadata. The tree can be queried directly for coverage or overlaps, or //! through the intermediary `SortedQuerenty` which keeps track of some state -//! to accelerate overlaping queries. +//! to accelerate overlapping queries. mod interval; pub use interval::*; diff --git a/src/neon.rs b/src/neon.rs index 00a5b30..683d7d0 100644 --- a/src/neon.rs +++ b/src/neon.rs @@ -381,7 +381,7 @@ where self.nodes.is_empty() } - // /// Find intervals in the set overlaping the query `[first, last]` and call `visit` on every overlapping node + // /// Find intervals in the set overlapping the query `[first, last]` and call `visit` on every overlapping node fn query(&'a self, first: i32, last: i32, mut visit: F) where F: FnMut(&Interval<&'a T>), @@ -798,7 +798,7 @@ where return; } - // not overlaping or preceding + // not overlapping or preceding if first < self.prev_first || first > self.prev_last { // no overlap with previous query. have to resort to regular query strategy self.overlapping_intervals.clear(); diff --git a/src/nosimd.rs b/src/nosimd.rs index 4fe1f35..9186a77 100644 --- a/src/nosimd.rs +++ b/src/nosimd.rs @@ -11,7 +11,7 @@ //! structs which store integer, end-inclusive intervals along with associated //! metadata. The tree can be queried directly for coverage or overlaps, or //! through the intermediary `SortedQuerent` which keeps track of some state -//! to accelerate overlaping queries. +//! to accelerate overlapping queries. use super::interval::{GenericInterval, IntWithMax, Interval, IntervalTree, SortedQuerent}; use std::cmp::Ordering; @@ -190,7 +190,7 @@ where self.nodes.is_empty() } - /// Find intervals in the set overlaping the query `[first, last]` and call `visit` on every overlapping node + /// Find intervals in the set overlapping the query `[first, last]` and call `visit` on every overlapping node fn query(&'a self, first: i32, last: i32, mut visit: F) where F: FnMut(&IntervalNode), @@ -547,7 +547,7 @@ where return; } - // not overlaping or preceding + // not overlapping or preceding if first < self.prev_first || first > self.prev_last { // no overlap with previous query. have to resort to regular query strategy self.overlapping_intervals.clear(); From 03fa98f4669cddbe76fd4533ae0fb2575c95926c Mon Sep 17 00:00:00 2001 From: Yangyang Li Date: Thu, 26 Oct 2023 00:38:04 -0500 Subject: [PATCH 09/12] chore: Remove unnecessary files and code --- py-coitrees/Cargo.toml | 12 ------------ py-coitrees/LICENSE | 1 - py-coitrees/README.md | 0 py-coitrees/pyproject.toml | 31 ------------------------------- py-coitrees/src/lib.rs | 14 -------------- 5 files changed, 58 deletions(-) delete mode 100644 py-coitrees/Cargo.toml delete mode 100644 py-coitrees/LICENSE delete mode 100644 py-coitrees/README.md delete mode 100644 py-coitrees/pyproject.toml delete mode 100644 py-coitrees/src/lib.rs diff --git a/py-coitrees/Cargo.toml b/py-coitrees/Cargo.toml deleted file mode 100644 index c134654..0000000 --- a/py-coitrees/Cargo.toml +++ /dev/null @@ -1,12 +0,0 @@ -[package] -name = "py-coitrees" -version = "0.1.0" -edition = "2021" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html -[lib] -name = "coitrees" -crate-type = ["cdylib"] - -[dependencies] -pyo3 = { version = "0.20.0", features = ["extension-module"] } diff --git a/py-coitrees/LICENSE b/py-coitrees/LICENSE deleted file mode 100644 index 4f88f81..0000000 --- a/py-coitrees/LICENSE +++ /dev/null @@ -1 +0,0 @@ -../LICENSE diff --git a/py-coitrees/README.md b/py-coitrees/README.md deleted file mode 100644 index e69de29..0000000 diff --git a/py-coitrees/pyproject.toml b/py-coitrees/pyproject.toml deleted file mode 100644 index 3353e92..0000000 --- a/py-coitrees/pyproject.toml +++ /dev/null @@ -1,31 +0,0 @@ -[build-system] -requires = ["maturin>=0.14,<0.15"] -build-backend = "maturin" - -[project] -name = "coitrees" -description = "Blazingly fast interval tree library" -authors = [ - { name = "Yangyang Li", email = "yangyang.li@northwestern.edu" }, - { name = "Daniel C. Jones", email = "djones3@fredhutch.org" }, -] -readme = "README.md" -requires-python = ">=3.8" -license = { file = "LICENSE" } -keywords = ["interval tree", "find overlap", "rust", "blazingly fast"] -classifiers = [ - "Development Status :: 5 - Production/Stable", - "Environment :: Console", - "Intended Audience :: Science/Research", - "License :: OSI Approved :: MIT License", - "Operating System :: OS Independent", - "Programming Language :: Python", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Rust", - "Topic :: Scientific/Engineering", -] diff --git a/py-coitrees/src/lib.rs b/py-coitrees/src/lib.rs deleted file mode 100644 index ade05bc..0000000 --- a/py-coitrees/src/lib.rs +++ /dev/null @@ -1,14 +0,0 @@ -use pyo3::prelude::*; - -/// Formats the sum of two numbers as string. -#[pyfunction] -fn sum_as_string(a: usize, b: usize) -> PyResult { - Ok((a + b).to_string()) -} - -/// A Python module implemented in Rust. -#[pymodule] -fn coitrees(_py: Python, m: &PyModule) -> PyResult<()> { - m.add_function(wrap_pyfunction!(sum_as_string, m)?)?; - Ok(()) -} From b0da5a02e6237278e4fc27db3c4398f11213b906 Mon Sep 17 00:00:00 2001 From: Yangyang Li Date: Thu, 26 Oct 2023 00:42:25 -0500 Subject: [PATCH 10/12] chore: remove release-python.yml workflow --- .github/workflows/release-python.yml | 74 ---------------------------- 1 file changed, 74 deletions(-) delete mode 100644 .github/workflows/release-python.yml diff --git a/.github/workflows/release-python.yml b/.github/workflows/release-python.yml deleted file mode 100644 index 9a3eda3..0000000 --- a/.github/workflows/release-python.yml +++ /dev/null @@ -1,74 +0,0 @@ -name: CI - -# ref https://github.com/pola-rs/polars/blob/main/.github/workflows/release-python.yml -on: - push: - branches: - - release - pull_request: - workflow_dispatch: - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -jobs: - linux: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - uses: PyO3/maturin-action@v1 - with: - manylinux: auto - command: build - args: --release --sdist -o dist --find-interpreter - - name: Upload wheels - uses: actions/upload-artifact@v3 - with: - name: wheels - path: dist - - windows: - runs-on: windows-latest - steps: - - uses: actions/checkout@v3 - - uses: PyO3/maturin-action@v1 - with: - command: build - args: --release -o dist --find-interpreter - - name: Upload wheels - uses: actions/upload-artifact@v3 - with: - name: wheels - path: dist - - macos: - runs-on: macos-latest - steps: - - uses: actions/checkout@v3 - - uses: PyO3/maturin-action@v1 - with: - command: build - args: --release -o dist --universal2 --find-interpreter - - name: Upload wheels - uses: actions/upload-artifact@v3 - with: - name: wheels - path: dist - - release: - name: Release - runs-on: ubuntu-latest - if: "startsWith(github.ref, 'refs/tags/')" - needs: [macos, windows, linux] - steps: - - uses: actions/download-artifact@v3 - with: - name: wheels - - name: Publish to PyPI - uses: PyO3/maturin-action@v1 - env: - MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }} - with: - command: upload - args: --skip-existing * From 5c5b2792d212c9525a7b6fadb1d66d38ce360576 Mon Sep 17 00:00:00 2001 From: Yangyang Li Date: Thu, 26 Oct 2023 00:53:01 -0500 Subject: [PATCH 11/12] chore: Update MSRV version to 1.70.0 --- .github/workflows/test.yml | 6 ++++-- README.md | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9fa6cea..0644462 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,4 +1,4 @@ -name: CI +name: CI-rust on: push: @@ -11,6 +11,8 @@ on: - .github/workflows/test.yml pull_request: + branches: + - main paths: - src/** - examples/** @@ -109,7 +111,7 @@ jobs: needs: Formatting runs-on: ubuntu-latest env: - MSRV_VERSION: 1.62.0 + MSRV_VERSION: 1.70.0 steps: - name: Checkout repository uses: actions/checkout@v4 diff --git a/README.md b/README.md index bc50b4c..8126a37 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ remains highly efficient. # Trying Out -The Minimum Supported Rust Version (MSRV) is 1.62.0. +The Minimum Supported Rust Version (MSRV) is 1.70.0. This is primary a library for use in other programs, but for benchmarking purposes it includes a program for intersecting BED files. From d4921e04f8efcb09e061ef74bc170cd10e671c86 Mon Sep 17 00:00:00 2001 From: Yangyang Li Date: Sat, 27 Jan 2024 14:36:09 -0600 Subject: [PATCH 12/12] fix(issue-#16): add offset when convert coitee to iterator --- src/avx.rs | 8 ++++---- src/neon.rs | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/avx.rs b/src/avx.rs index 7cc12a8..62e23ac 100644 --- a/src/avx.rs +++ b/src/avx.rs @@ -612,8 +612,8 @@ where let node = &self.nodes[self.i]; if self.j < 8 { let ret = Some(Interval { - first: node.first(self.j), - last: node.last(self.j), + first: node.first(self.j) + 1, + last: node.last(self.j) - 1, metadata: &node.metadata[self.j], }); self.count += 1; @@ -652,8 +652,8 @@ where let node = &self.nodes[self.i]; self.count += 1; Some(Interval { - first: node.first(self.j), - last: node.last(self.j), + first: node.first(self.j) + 1, + last: node.last(self.j) - 1, metadata: &node.metadata[self.j], }) } diff --git a/src/neon.rs b/src/neon.rs index cf15143..b6edae0 100644 --- a/src/neon.rs +++ b/src/neon.rs @@ -509,8 +509,8 @@ where let node = &self.nodes[self.i]; if self.j < LANE_SIZE { let ret = Some(Interval { - first: node.first(self.j), - last: node.last(self.j), + first: node.first(self.j) + 1, + last: node.last(self.j) - 1, metadata: &node.metadata[self.j], }); self.count += 1; @@ -549,8 +549,8 @@ where let node = &self.nodes[self.i]; self.count += 1; Some(Interval { - first: node.first(self.j), - last: node.last(self.j), + first: node.first(self.j) + 1, + last: node.last(self.j) - 1, metadata: &node.metadata[self.j], }) }