diff --git a/.gitignore b/.gitignore index e651855..a387970 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ target mwa_full_embedded_element_pattern.h5 /include/mwa_hyperbeam.h +/comparisons/ /examples/fee /examples/fee_parallel /examples/fee_parallel_omp diff --git a/README.md b/README.md index d777582..1b0b912 100644 --- a/README.md +++ b/README.md @@ -222,37 +222,39 @@ maturin develop --release -b pyo3 --features=python,hdf5-static --strip ## Comparing with other FEE beam codes -Below is a table comparing other implementations of the FEE beam code. All -benchmarks were done with unique azimuth and zenith angle directions, and all -on the same system. The CPU is a Ryzen 9 3900X, which has 12 cores and SMT (24 -threads). The CUDA benchmarks uses an NVIDIA GeForce RTX 2070. All benchmarks -were done in serial, unless indicated by "parallel". Python times were taken -by running `time.time()` before and after the calculations. Memory usage is -measured by running `time -v` on the command (not the `time` associated with -your shell; this is usually at `/usr/bin/time`). - -| Code | Number of directions | Duration | Max. memory usage | -|:-----------------|---------------------:|---------:|------------------:| -| [mwa_pb](https://github.com/MWATelescope/mwa_pb) | 500 | 98.8 ms | 134.6 MiB | -| | 100000 | 13.4 s | 5.29 GiB | -| | 1000000 | 139.8 s | 51.6 GiB | -| mwa-reduce (C++) | 500 | 115.2 ms | 48.9 MiB | -| | 10000 | 2.417 s | 6.02 GiB | -| mwa_hyperbeam | 500 | 10.0 ms | 9.75 MiB | -| | 100000 | 1.82 s | 11.3 MiB | -| | 1000000 | 18.1 s | 25.0 MiB | -| mwa_hyperbeam (parallel) | 1000000 | 1.55 s | 88.8 MiB | -| mwa_hyperbeam (via python) | 500 | 20.5 ms | 44.2 MiB | -| | 100000 | 3.70 s | 45.4 MiB | -| | 1000000 | 37.2 s | 59.0 MiB | -| mwa_hyperbeam (via python, parallel) | 1000000 | 2.49 s | 246.6 MiB | -| mwa_hyperbeam (CUDA, single precision) | 1000000 | 450 ms | 253.8 MiB | -| | 1e8 | 3.08 s | 14.26 GiB | - -Not sure what's up with the C++ code. Maybe I'm calling `CalcJonesArray` wrong, -but it uses a huge amount of memory. In any case, `hyperbeam` seems to be -roughly 10x faster. If you know how to compare with `Everybeam`, please let me -know. +A high-level summary is below. Further details and info on how these results +were obtained can be found [here](./comparisons). + +| Package | Config | Number of directions | Duration | Max. memory usage | +|:--------|:-------|---------------------:|---------:|------------------:| +| [mwa_pb](https://github.com/MWATelescope/mwa_pb) | serial | 1 | 14.85 ms | 153 MiB | +| | serial | 1,000 | 130.1 ms | 201 MiB | +| | serial | 300,000 | 37.94 s | 14.4 GiB | +| [pyuvdata](https://github.com/RadioAstronomySoftwareGroup/pyuvdata) | serial | 32,760 | 7.446 s | 653 MiB | +| | serial | 130,320 | 11.85 s | 1.92 GiB | +| [EveryBeam](https://git.astron.nl/RD/EveryBeam) | serial | 1 | 114 µs | 61.7 MiB | +| | serial | 1,000 | 103.9 ms | 61.9 MiB | +| | serial | 300,000 | 31.16 s | 71.1 MiB | +| mwa_hyperbeam | serial | 1 | 32.54 µs | 11.1 MiB | +| | serial | 1,000 | 29.02 ms | 13.3 MiB | +| | parallel | 1,000 | 4.598 ms | 13.5 MiB | +| | serial | 300,000 | 8.610 s | 33.9 MiB | +| | parallel | 300,000 | 596.1 ms | 34.6 MiB | +| | CUDA | 300,000 | 63.70 ms | 134 MiB | +| | CUDA | 999,999 | 164.4 ms | 195 MiB | + +| | mwa_pb | pyuvdata | EveryBeam | mwa_hyperbeam | +|-------------------------------------------|:------------------:|:------------------:|:---------:|:------------------:| +| Can be run in parallel? | :x: | :x: | :x: | :white_check_mark: | +| Parallactic-angle correction? | :x: | :x: | :x: | :white_check_mark: | +| GPU (CUDA/HIP) support? | :x: | :x: | :x: | :white_check_mark: | +| Supports MWA analytic beam? | :white_check_mark: | :x: | :x: | :white_check_mark: | +| Supports per-dipole gains? | :white_check_mark: | :white_check_mark: | :x: | :white_check_mark: | +| Python interface? | :white_check_mark: | :white_check_mark: | :x:* | :white_check_mark: | +| Can be called from other languages via C? | :x: | :x: | :x: | :white_check_mark: | +| Supports MWA CRAM tile? | :x: | :x: | :x: | :white_check_mark: | + +*: `EveryBeam` has a Python interface, but it does not support the MWA beam. ## Troubleshooting diff --git a/comparisons/1090008640_2s_40kHz.trunc.ms.tar.gz b/comparisons/1090008640_2s_40kHz.trunc.ms.tar.gz new file mode 100644 index 0000000..973ccec Binary files /dev/null and b/comparisons/1090008640_2s_40kHz.trunc.ms.tar.gz differ diff --git a/comparisons/README.md b/comparisons/README.md new file mode 100644 index 0000000..7aaf154 --- /dev/null +++ b/comparisons/README.md @@ -0,0 +1,281 @@ +# Comparisons between a few implementations of the MWA FEE beam code + +## Performance summary + +| Package | Config | Number of directions | Duration | Max. memory usage | +|:--------|:-------|---------------------:|---------:|------------------:| +| [mwa_pb](https://github.com/MWATelescope/mwa_pb) | serial | 1 | 14.85 ms | 153 MiB | +| | serial | 1,000 | 130.1 ms | 201 MiB | +| | serial | 300,000 | 37.94 s | 14.4 GiB | +| [pyuvdata](https://github.com/RadioAstronomySoftwareGroup/pyuvdata) | serial | 32,760 | 7.446 s | 653 MiB | +| | serial | 130,320 | 11.85 s | 1.92 GiB | +| [EveryBeam](https://git.astron.nl/RD/EveryBeam) | serial | 1 | 114 µs | 61.7 MiB | +| | serial | 1,000 | 103.9 ms | 61.9 MiB | +| | serial | 300,000 | 31.16 s | 71.1 MiB | +| mwa_hyperbeam | serial | 1 | 32.54 µs | 11.1 MiB | +| | serial | 1,000 | 29.02 ms | 13.3 MiB | +| | parallel | 1,000 | 4.598 ms | 13.5 MiB | +| | serial | 300,000 | 8.610 s | 33.9 MiB | +| | parallel | 300,000 | 596.1 ms | 34.6 MiB | +| | serial | 999,999 | 28.66 s | 87.3 MiB | +| | parallel | 999,999 | 1.986 s | 90.3 MiB | +| | CUDA | 300,000 | 63.70 ms | 134 MiB | +| | CUDA | 999,999 | 164.4 ms | 195 MiB | +| | serial, Python | 1 | 36.95 µs | 42.5 MiB | +| | serial, Python | 1,000 | 31.57 ms | 44.9 MiB | +| | parallel, Python | 1,000 | 3.890 ms | 45.5 MiB | +| | serial, Python | 300,000 | 9.393 s | 93.5 MiB | +| | parallel, Python | 300,000 | 660.3 ms | 96.3 MiB | +| | serial, Python | 999,999 | 31.05 s | 211 MiB | +| | parallel, Python | 999,999 | 2.191 s | 212 MiB | +| | CUDA, Python | 999,999 | 1.342 s | 305 MiB | + +All of the durations refer to "hot cache" times. See the full print out of the +benchmarks run and the system details at the bottom of this page. You can verify +these numbers by running `run.sh`. + +## Compared packages and their features + +Please file an issue if this information is incorrect. + +| | mwa_pb | pyuvdata | EveryBeam | mwa_hyperbeam | +|-------------------------------------------|:------------------:|:------------------:|:---------:|:------------------:| +| Can be run in parallel? | :x: | :x: | :x: | :white_check_mark: | +| Parallactic-angle correction? | :x: | :x: | :x: | :white_check_mark: | +| GPU (CUDA/HIP) support? | :x: | :x: | :x: | :white_check_mark: | +| Supports MWA analytic beam? | :white_check_mark: | :x: | :x: | :white_check_mark: | +| Supports per-dipole gains? | :white_check_mark: | :white_check_mark: | :x: | :white_check_mark: | +| Python interface? | :white_check_mark: | :white_check_mark: | :x:* | :white_check_mark: | +| Can be called from other languages via C? | :x: | :x: | :x: | :white_check_mark: | +| Supports MWA CRAM tile? | :x: | :x: | :x: | :white_check_mark: | + +*: `EveryBeam` has a Python interface, but it does not support the MWA beam. + +## Compared packages and methodology + +To my knowledge, `EveryBeam` only takes RADec coordinates to do its simulations, +whereas others use azimuth and elevation. Also unlike others, `EveryBeam` +needs a measurement set for its work. To keep things as fair as possible, I've +provided a stripped-down measurement set and given it to `EveryBeam` with RADec +(0, -27). This corresponds to a specific AzEl which I've then used in other +packages. + +### mwa_pb + +Installed with `pip install mwa_pb`. + +The latest pip-installable version needs a hack to work correctly with the +latest `numpy`. Change all instances of `numpy.complex` and `numpy.complex64` to +just `complex`. Also this line + +```python +if not interp and "mwa_hyperbeam" in sys.modules: +``` + +needs to be changed to + +```python +if False: +``` + +to actually use the pure-Python `mwa_pb`, otherwise `hyperbeam` is used +internally :) + +Finally, the results here are a little misleading. All other examples use the +same azimuth and elevation for their simulations, and therefore the resulting +Jones matrices are all the same. This is fine, as all other code doesn't check +the input to de-duplicate (it's the caller's responsibility). However, `mwa_pb` +does notice this duplication. To actually get it to do some work, it is called +with different azimuths and elevations to other packages, which is a little +unfair. + +In addition, `mwa_pb` has a nice interpolation feature, which reduces +the number of FEE simulations to be made by gridding coarsely and using "nearby" +simulations instead. Using interpolation makes the code quite fast, but would +also be a very unfair comparison, as other packages don't have interpolation and +using it compromises the accuracy of the results. + +### EveryBeam + +Version 0.5.3, alongside `casacore` 3.5.0. I had to manually install the MWA +header files, as the default `CMake` build doesn't seem to. I also used the +Python interface via `pip` (version 0.5.1) and from source (version 0.5.3); +neither supports MWA beam responses. + +`EveryBeam` does not appear to have a way to do FEE calculations in parallel. +Attempting to compile with `OpenMP` and annotating a pragma on a for loop either +caused segfaults or showed HDF5 errors. Thus, `EveryBeam` is only using a single +thread in this comparison. If there is a way to make the code run in parallel, +please share it! + +It also seems that the beam responses are normalised regardless of the beam +normalisation setting. + +### pyuvdata + +I used version 2.4.1 installed with `pip`. + +`pyuvdata` includes a module `uvbeam`, which has MWA beam code. I'm not sure how +to use this code best in a fair comparison; it seems designed to generate beam +responses over a grid with adjustable resolution rather than allowing arbitrary +directions. I've opted to adjust the resolution and report the timings and +memory usage. + +### mwa_hyperbeam + +Also referred to as `hyperbeam`. Installed from source, and the Python interface +was also from source or via `pip`. + +#### CUDA + +`hyperbeam` can use CUDA-capable (or HIP-capable) GPUs to run the simulations +faster. Here, we are using the `gpu-single` feature; this means the simulations +are calculated with 32-bit floats, which benefits my desktop-grade GPU. Omitting +this feature will make the calculations use 64-bit floats, which run much +slower on my GPU. However, these floats will be crunched much faster on a +"datacenter"-grade GPU, such as those hosted by Pawsey. + +## Omitted + +### "Marcin's C++ code" + +This code can be seen on +[this commit of `hyperbeam`](https://github.com/MWATelescope/mwa_hyperbeam/commit/72e0914). +From memory, this code isn't ready to be used as a library, so it's not easy +to do comparisons with. However, my (CHJ) testing from years ago suggested that +this code is 10x slower and uses much more memory than `hyperbeam`, while giving +extremely similar if not the same results. + +### RTS FEE + +The Real-Time System (RTS) has an implementation of the FEE code, but only for +CUDA. The RTS is not open sourced, so it is difficult to fairly compare this +code. However, Jack Line's testing showed that its results were consistent with +`hyperbeam` but approximately 3-4x slower. + +## Benchmark print out + +Obtained by running `run.sh`. + +``` +*** System information *** +uname -a: + Linux sirius 6.5.2-arch1-1 #1 SMP PREEMPT_DYNAMIC Wed, 06 Sep 2023 21:01:01 +0000 x86_64 GNU/Linux +CPU: + AMD Ryzen 9 3900X 12-Core Processor (12 cores, 24 threads) +GPU: + NVIDIA GeForce RTX 2070 +Total memory: + 128735 MiB +glibc: + GNU C Library (GNU libc) stable release version 2.38. +Compilers: + GCC: g++ (GCC) 13.2.1 20230801 + Rust: rustc 1.75.0 (82e1608df 2023-12-21) + nvcc: Cuda compilation tools, release 12.2, V12.2.91 +Python: + Python 3.11.5 +*** + +*** mwa_pb Python results *** +time taken to produce 1 simulation (cold cache): 0.04600405693054199s +time taken to produce 1 simulation (hot cache): 0.01485133171081543s +time taken to produce 1000 simulations (hot cache): 0.13009285926818848s +time taken to produce 300000 simulations (hot cache): 37.93662881851196s +Max memory use (kBytes): 15080124 +*** + +*** pyuvdata Python results *** +time taken to produce 32760 simulations: 7.445570707321167s +time taken to produce 130320 simulations: 11.852437734603882s +Max memory use (kBytes): 2459428 +*** + +*** hyperbeam Python results with 1 CPU core *** +time taken to produce 1 simulation (cold cache): 0.001977205276489258s +time taken to produce 1 simulation (hot cache): 3.695487976074219e-05s +time taken to produce 1000 simulations (hot cache): 0.031566619873046875s +time taken to produce 300000 simulations (hot cache): 9.393456220626831s +time taken to produce 999999 simulations (hot cache): 31.051132917404175s +First and last MWA beam responses: +[ 0.95149467+0.23737095j -0.16821772-0.04190302j -0.1687059 -0.04202984j + -0.95181881-0.23673898j] +[ 0.95149467+0.23737095j -0.16821772-0.04190302j -0.1687059 -0.04202984j + -0.95181881-0.23673898j] +Max memory use (kBytes): 235028 + +*** hyperbeam Python results with all CPU cores *** +time taken to produce 1 simulation (cold cache): 0.001982450485229492s +time taken to produce 1 simulation (hot cache): 3.6716461181640625e-05s +time taken to produce 1000 simulations (hot cache): 0.0038902759552001953s +time taken to produce 300000 simulations (hot cache): 0.6602745056152344s +time taken to produce 999999 simulations (hot cache): 2.190699577331543s +First and last MWA beam responses: +[ 0.95149467+0.23737095j -0.16821772-0.04190302j -0.1687059 -0.04202984j + -0.95181881-0.23673898j] +[ 0.95149467+0.23737095j -0.16821772-0.04190302j -0.1687059 -0.04202984j + -0.95181881-0.23673898j] + +*** hyperbeam Python results with CUDA *** +time taken to produce 999999 simulations (hot cache): 1.3418140411376953s +Max memory use (kBytes): 381324 +*** + +*** Compiling EveryBeam C++ example *** +make: Nothing to be done for 'all'. + +*** EveryBeam C++ results with 1 CPU core *** +time taken to produce 1 simulation (cold cache): 0.099906s +time taken to produce 1 simulation (hot cache): 0.000114s +time taken to produce 1000 simulations: 0.103940s +time taken to produce 300000 simulations: 31.161970s + +First and last MWA beam responses: +[+0.951391+0.237339i, -0.168200-0.041897i + -0.168688-0.042025i, -0.951721-0.236713i] +[+0.951391+0.237339i, -0.168200-0.041897i + -0.168688-0.042025i, -0.951721-0.236713i] +Max memory use (kBytes): 72784 +*** + +*** Compiling hyperbeam Rust code *** + +*** hyperbeam Rust results with 1 CPU core *** +time taken to produce 1 simulation (cold cache): 1.936581ms +time taken to produce 1 simulation (hot cache): 32.542µs +time taken to produce 1000 simulations (hot cache): 29.02333ms +time taken to produce 300000 simulations (hot cache): 8.609547797s +time taken to produce 999999 simulations (hot cache): 28.656200548s +First and last MWA beam responses: +[+0.951495+0.237371i, -0.168218-0.041903i + -0.168706-0.042030i, -0.951819-0.236739i] +[+0.951495+0.237371i, -0.168218-0.041903i + -0.168706-0.042030i, -0.951819-0.236739i] +Max memory use (kBytes): 91416 + +*** hyperbeam Rust results with all CPU cores *** +time taken to produce 1 simulation (cold cache): 1.884332ms +time taken to produce 1 simulation (hot cache): 30.046µs +time taken to produce 1000 simulations (hot cache): 4.598173ms +time taken to produce 300000 simulations (hot cache): 596.130603ms +time taken to produce 999999 simulations (hot cache): 1.985971281s +First and last MWA beam responses: +[+0.951495+0.237371i, -0.168218-0.041903i + -0.168706-0.042030i, -0.951819-0.236739i] +[+0.951495+0.237371i, -0.168218-0.041903i + -0.168706-0.042030i, -0.951819-0.236739i] +Max memory use (kBytes): 92028 + +*** hyperbeam Rust results with CUDA *** +time taken to produce 300000 simulations (cold cache): 1.069158203s +time taken to produce 300000 simulations (hot cache): 63.699647ms +time taken to produce 999999 simulations (hot cache): 164.427598ms +First and last MWA beam responses: +[+0.951469+0.237365i, -0.168213-0.041902i + -0.168701-0.042029i, -0.951793-0.236733i] +[+0.951469+0.237365i, -0.168213-0.041902i + -0.168701-0.042029i, -0.951793-0.236733i] +Max memory use (kBytes): 205064 +*** +``` + diff --git a/comparisons/everybeam/Makefile b/comparisons/everybeam/Makefile new file mode 100644 index 0000000..b587335 --- /dev/null +++ b/comparisons/everybeam/Makefile @@ -0,0 +1,8 @@ +CXX = g++ + +all: everybeam_example + +everybeam_example: everybeam_example.cpp + $(CXX) -O3 -march=native \ + -leverybeam -lcasa_ms -lcasa_measures -lcasa_casa \ + everybeam_example.cpp -o everybeam_example diff --git a/comparisons/everybeam/everybeam_example.cpp b/comparisons/everybeam/everybeam_example.cpp new file mode 100644 index 0000000..8c5aa8c --- /dev/null +++ b/comparisons/everybeam/everybeam_example.cpp @@ -0,0 +1,91 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +const size_t NUM_POINTS_TO_SIMULATE = 300'000; +const double RA_RAD = 0.0 * M_PI / 180.0; +const double DEC_RAD = -27.0 * M_PI / 180.0; +const double FREQ_HZ = 150e6; + +int main(int argc, char *argv[]) { + if (argc == 1) { + printf("Expected an argument (path to a measurement set for EveryBeam to use)\n"); + return 1; + } + const char *ms_path = argv[1]; + + casacore::MeasurementSet ms(ms_path); + casacore::MEpoch::ScalarColumn time_column(ms, ms.columnName(casacore::MSMainEnums::TIME)); + casacore::MEpoch first_time = time_column(0); + casacore::Quantity first_utc_time = first_time.get(casacore::Unit("s")); + + everybeam::Options options; + // options.coeff_path = "/usr/local/mwa_full_embedded_element_pattern.h5"; + options.coeff_path = std::getenv("MWA_BEAM_FILE"); + options.beam_normalisation_mode = everybeam::BeamNormalisationMode::kFull; + options.beam_mode = everybeam::BeamMode::kFull; + options.frequency_interpolation = false; + everybeam::telescope::MWA beam = everybeam::telescope::MWA(ms, options); + + std::complex *jones = + (std::complex *)malloc(NUM_POINTS_TO_SIMULATE * 4 * sizeof(std::complex)); + + std::unique_ptr pr = beam.GetPointResponse(first_utc_time.getBaseValue()); + + auto start = std::chrono::high_resolution_clock::now(); + pr->Response(everybeam::BeamMode::kFull, jones, RA_RAD, DEC_RAD, FREQ_HZ, 0, 0); + auto stop = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(stop - start); + printf("time taken to produce 1 simulation (cold cache): %fs\n", (double)duration.count() / 1e6); + + start = std::chrono::high_resolution_clock::now(); + pr->Response(everybeam::BeamMode::kFull, jones, RA_RAD, DEC_RAD, FREQ_HZ, 0, 0); + stop = std::chrono::high_resolution_clock::now(); + duration = std::chrono::duration_cast(stop - start); + printf("time taken to produce 1 simulation (hot cache): %fs\n", (double)duration.count() / 1e6); + + start = std::chrono::high_resolution_clock::now(); + // Attempting to use OpenMP will either cause segfaults or expose the lack + // of thread safety in HDF5 + // #pragma omp parallel for + for (size_t i_jones = 0; i_jones < 1000; ++i_jones) { + std::complex *e = jones + 4 * i_jones; + pr->Response(everybeam::BeamMode::kFull, e, RA_RAD, DEC_RAD, FREQ_HZ, 0, 0); + } + stop = std::chrono::high_resolution_clock::now(); + duration = std::chrono::duration_cast(stop - start); + printf("time taken to produce %ld simulations: %fs\n", 1000, (double)duration.count() / 1e6); + + start = std::chrono::high_resolution_clock::now(); + // Attempting to use OpenMP will either cause segfaults or expose the lack + // of thread safety in HDF5 + // #pragma omp parallel for + for (size_t i_jones = 0; i_jones < NUM_POINTS_TO_SIMULATE; ++i_jones) { + std::complex *e = jones + 4 * i_jones; + pr->Response(everybeam::BeamMode::kFull, e, RA_RAD, DEC_RAD, FREQ_HZ, 0, 0); + } + stop = std::chrono::high_resolution_clock::now(); + duration = std::chrono::duration_cast(stop - start); + printf("time taken to produce %ld simulations: %fs\n", NUM_POINTS_TO_SIMULATE, (double)duration.count() / 1e6); + + printf("\nFirst and last MWA beam responses:\n"); + printf("[%+f%+fi, %+f%+fi\n", jones[0].real(), jones[0].imag(), jones[1].real(), jones[1].imag()); + printf(" %+f%+fi, %+f%+fi]\n", jones[2].real(), jones[2].imag(), jones[3].real(), jones[3].imag()); + printf("[%+f%+fi, %+f%+fi\n", jones[(NUM_POINTS_TO_SIMULATE - 1) * 4 + 0].real(), + jones[(NUM_POINTS_TO_SIMULATE - 1) * 4 + 0].imag(), jones[(NUM_POINTS_TO_SIMULATE - 1) * 4 + 1].real(), + jones[(NUM_POINTS_TO_SIMULATE - 1) * 4 + 1].imag()); + printf(" %+f%+fi, %+f%+fi]\n", jones[(NUM_POINTS_TO_SIMULATE - 1) * 4 + 2].real(), + jones[(NUM_POINTS_TO_SIMULATE - 1) * 4 + 2].imag(), jones[(NUM_POINTS_TO_SIMULATE - 1) * 4 + 3].real(), + jones[(NUM_POINTS_TO_SIMULATE - 1) * 4 + 3].imag()); + + return 0; +} diff --git a/comparisons/everybeam_example.py b/comparisons/everybeam_example.py new file mode 100755 index 0000000..2bbee74 --- /dev/null +++ b/comparisons/everybeam_example.py @@ -0,0 +1,12 @@ +#!/usr/bin/env python3 + +import os +import sys + +import everybeam as eb + + +ms_path = sys.argv[1] +# This thorws an error on v0.5.3 of EveryBeam (the latest at the time of +# writing), because the MWA isn't supported. +telescope = eb.load_telescope(ms_path, os.environ.get("MWA_BEAM_FILE")) diff --git a/comparisons/hyperbeam/.cargo/config.toml b/comparisons/hyperbeam/.cargo/config.toml new file mode 100644 index 0000000..c729c54 --- /dev/null +++ b/comparisons/hyperbeam/.cargo/config.toml @@ -0,0 +1,2 @@ +[build] +rustflags = "-C target-cpu=native" diff --git a/comparisons/hyperbeam/Cargo.lock b/comparisons/hyperbeam/Cargo.lock new file mode 100644 index 0000000..7a247ee --- /dev/null +++ b/comparisons/hyperbeam/Cargo.lock @@ -0,0 +1,1159 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aho-corasick" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" +dependencies = [ + "memchr", +] + +[[package]] +name = "ascii" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d92bec98840b8f03a5ff5413de5293bfcd8bf96467cf5452609f939ec6f5de16" + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf" + +[[package]] +name = "built" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b9c056b9ed43aee5e064b683aa1ec783e19c6acec7559e3ae931b7490472fbe" +dependencies = [ + "cargo-lock", +] + +[[package]] +name = "bumpalo" +version = "3.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" + +[[package]] +name = "cargo-lock" +version = "8.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "031718ddb8f78aa5def78a09e90defe30151d1f6c672f937af4dd916429ed996" +dependencies = [ + "semver", + "serde", + "toml", + "url", +] + +[[package]] +name = "cbindgen" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "faeaa693e5a727975a79211b8f35c0cb09b031fdb6eaa4a788bc6713d01488ca" +dependencies = [ + "heck", + "indexmap", + "log", + "proc-macro2", + "quote", + "serde", + "serde_json", + "syn 1.0.109", + "tempfile", + "toml", +] + +[[package]] +name = "cc" +version = "1.0.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" +dependencies = [ + "jobserver", + "libc", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "crossbeam-deque" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" + +[[package]] +name = "cuda-config" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ee74643f7430213a1a78320f88649de309b20b80818325575e393f848f79f5d" +dependencies = [ + "glob", +] + +[[package]] +name = "cuda-runtime-sys" +version = "0.3.0-alpha.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d070b301187fee3c611e75a425cf12247b7c75c09729dbdef95cb9cb64e8c39" +dependencies = [ + "cuda-config", +] + +[[package]] +name = "either" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" + +[[package]] +name = "erfa" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f63880def87bd7d612b89f9046f5db0961949417f88d831d24596eae555915e5" +dependencies = [ + "thiserror", +] + +[[package]] +name = "errno" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" +dependencies = [ + "libc", + "windows-sys", +] + +[[package]] +name = "fastrand" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" + +[[package]] +name = "filetime" +version = "0.2.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ee447700ac8aa0b2f2bd7bc4462ad686ba06baa6727ac149a2d6277f0d240fd" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall 0.4.1", + "windows-sys", +] + +[[package]] +name = "form_urlencoded" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "glob" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" + +[[package]] +name = "hdf5" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bdcd9b131fd67bb827b386d0dc63d3e74196a14616ef800acf87ca5fef741a10" +dependencies = [ + "bitflags 1.3.2", + "cfg-if", + "hdf5-derive", + "hdf5-sys", + "hdf5-types", + "lazy_static", + "libc", + "ndarray", + "parking_lot 0.11.2", + "paste", +] + +[[package]] +name = "hdf5-derive" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5a77ac6a41e6880594d506118c0b8bc665ec959fe4636e0c84809756d224820" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "hdf5-sys" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4842d5980dc311a7c8933c7b45534fdae84df5ae7939a0ae8e449a56d4beb3d2" +dependencies = [ + "libc", + "libloading", + "pkg-config", + "regex", + "serde", + "serde_derive", + "winreg", +] + +[[package]] +name = "hdf5-types" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b47268c0dfb499b1ffe5638b6e7694e7a87fe49fb92eca998a4346e5483e428f" +dependencies = [ + "ascii", + "cfg-if", + "hdf5-sys", + "libc", +] + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + +[[package]] +name = "hifitime" +version = "3.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c587aef1280b84f15bfd84eefff9ee55d1a2826e67f089ed263a8c3a029c273" +dependencies = [ + "js-sys", + "lexical-core", + "num-traits", + "serde", + "serde_derive", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "hip-runtime-sys" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "901a5d54cfff799dd9e6f6e1d53883bb50afdd92edce7680b1ca299d1805f65a" +dependencies = [ + "libc", +] + +[[package]] +name = "hip-sys" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f399629f98b6249efc10039e949baa70e2ff2bf84d3c30e7a0ca49f179a04287" +dependencies = [ + "hip-runtime-sys", +] + +[[package]] +name = "hyperbeam" +version = "0.1.0" +dependencies = [ + "mwa_hyperbeam", + "ndarray", +] + +[[package]] +name = "idna" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] + +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown", +] + +[[package]] +name = "instant" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" + +[[package]] +name = "jobserver" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c37f63953c4c63420ed5fd3d6d398c719489b9f872b9fa683262f8edd363c7d" +dependencies = [ + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.67" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a1d36f1235bc969acba30b7f5990b864423a6068a10f7c90ae8f0112e3a59d1" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "lexical-core" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46" +dependencies = [ + "lexical-parse-float", + "lexical-parse-integer", + "lexical-util", +] + +[[package]] +name = "lexical-parse-float" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f" +dependencies = [ + "lexical-parse-integer", + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-parse-integer" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-util" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5255b9ff16ff898710eb9eb63cb39248ea8a5bb036bea8085b1a767ff6c4e3fc" +dependencies = [ + "static_assertions", +] + +[[package]] +name = "libc" +version = "0.2.152" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7" + +[[package]] +name = "libloading" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f" +dependencies = [ + "cfg-if", + "winapi", +] + +[[package]] +name = "libm" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" + +[[package]] +name = "linux-raw-sys" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" + +[[package]] +name = "lock_api" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" + +[[package]] +name = "marlu" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33477b1391477af72a79bf06ed326f7625c21d9e4cd49ddc99d5d8c0d6ba3186" +dependencies = [ + "built", + "cfg-if", + "erfa", + "hifitime", + "itertools", + "lazy_static", + "log", + "ndarray", + "num-complex", + "num-traits", + "rayon", + "tar", + "thiserror", +] + +[[package]] +name = "matrixmultiply" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7574c1cf36da4798ab73da5b215bbf444f50718207754cb522201d78d1cd0ff2" +dependencies = [ + "autocfg", + "rawpointer", +] + +[[package]] +name = "memchr" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" + +[[package]] +name = "mwa_hyperbeam" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "361ad095f48d8aeba20b52c03ca3462cce4e7c48ccb80a8c12f10e950fb2f9d8" +dependencies = [ + "cbindgen", + "cc", + "cfg-if", + "cuda-runtime-sys", + "hdf5", + "hip-sys", + "marlu", + "ndarray", + "num-complex", + "panic-message", + "parking_lot 0.12.1", + "rayon", + "thiserror", +] + +[[package]] +name = "ndarray" +version = "0.15.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adb12d4e967ec485a5f71c6311fe28158e9d6f4bc4a447b474184d0f91a8fa32" +dependencies = [ + "matrixmultiply", + "num-complex", + "num-integer", + "num-traits", + "rawpointer", + "rayon", +] + +[[package]] +name = "num-complex" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ba157ca0885411de85d6ca030ba7e2a83a28636056c7c699b07c8b6f7383214" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +dependencies = [ + "autocfg", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" +dependencies = [ + "autocfg", + "libm", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "panic-message" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384e52fd8fbd4cbe3c317e8216260c21a0f9134de108cea8a4dd4e7e152c472d" + +[[package]] +name = "parking_lot" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" +dependencies = [ + "instant", + "lock_api", + "parking_lot_core 0.8.6", +] + +[[package]] +name = "parking_lot" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" +dependencies = [ + "lock_api", + "parking_lot_core 0.9.9", +] + +[[package]] +name = "parking_lot_core" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60a2cfe6f0ad2bfc16aefa463b497d5c7a5ecd44a23efa72aa342d90177356dc" +dependencies = [ + "cfg-if", + "instant", + "libc", + "redox_syscall 0.2.16", + "smallvec", + "winapi", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall 0.4.1", + "smallvec", + "windows-targets 0.48.5", +] + +[[package]] +name = "paste" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" + +[[package]] +name = "percent-encoding" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" + +[[package]] +name = "pkg-config" +version = "0.3.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2900ede94e305130c13ddd391e0ab7cbaeb783945ae07a279c268cb05109c6cb" + +[[package]] +name = "proc-macro2" +version = "1.0.76" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95fc56cda0b5c3325f5fbbd7ff9fda9e02bb00bb3dac51252d2f1bfa1cb8cc8c" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rawpointer" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" + +[[package]] +name = "rayon" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7237101a77a10773db45d62004a272517633fbcc3df19d96455ede1122e051" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "redox_syscall" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" +dependencies = [ + "bitflags 1.3.2", +] + +[[package]] +name = "redox_syscall" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" +dependencies = [ + "bitflags 1.3.2", +] + +[[package]] +name = "regex" +version = "1.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" + +[[package]] +name = "rustix" +version = "0.38.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "322394588aaf33c24007e8bb3238ee3e4c5c09c084ab32bc73890b99ff326bca" +dependencies = [ + "bitflags 2.4.2", + "errno", + "libc", + "linux-raw-sys", + "windows-sys", +] + +[[package]] +name = "ryu" +version = "1.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c" + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "semver" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b97ed7a9823b74f99c7742f5336af7be5ecd3eeafcb1507d1fa93347b1d589b0" +dependencies = [ + "serde", +] + +[[package]] +name = "serde" +version = "1.0.195" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63261df402c67811e9ac6def069e4786148c4563f4b50fd4bf30aa370d626b02" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.195" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46fe8f8603d81ba86327b23a2e9cdf49e1255fb94a4c5f297f6ee0547178ea2c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.48", +] + +[[package]] +name = "serde_json" +version = "1.0.111" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "176e46fa42316f18edd598015a5166857fc835ec732f5215eac6b7bdbf0a84f4" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "smallvec" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b187f0231d56fe41bfb12034819dd2bf336422a5866de41bc3fec4b2e3883e8" + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tar" +version = "0.4.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b16afcea1f22891c49a00c751c7b63b2233284064f11a200fc624137c51e2ddb" +dependencies = [ + "filetime", + "libc", + "xattr", +] + +[[package]] +name = "tempfile" +version = "3.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01ce4141aa927a6d1bd34a041795abd0db1cccba5d5f24b009f694bdf3a1f3fa" +dependencies = [ + "cfg-if", + "fastrand", + "redox_syscall 0.4.1", + "rustix", + "windows-sys", +] + +[[package]] +name = "thiserror" +version = "1.0.56" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d54378c645627613241d077a3a79db965db602882668f9136ac42af9ecb730ad" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.56" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa0faa943b50f3db30a20aa7e265dbc66076993efed8463e8de414e5d06d3471" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.48", +] + +[[package]] +name = "tinyvec" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "toml" +version = "0.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234" +dependencies = [ + "serde", +] + +[[package]] +name = "unicode-bidi" +version = "0.3.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "unicode-normalization" +version = "0.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "url" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.90" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1223296a201415c7fad14792dbefaace9bd52b62d33453ade1c5b5f07555406" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.90" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcdc935b63408d58a32f8cc9738a0bffd8f05cc7c002086c6ef20b7312ad9dcd" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn 2.0.48", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.90" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e4c238561b2d428924c49815533a8b9121c664599558a5d9ec51f8a1740a999" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.90" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bae1abb6806dc1ad9e560ed242107c0f6c84335f1749dd4e8ddb012ebd5e25a7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.48", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.90" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d91413b1c31d7539ba5ef2451af3f0b833a005eb27a631cec32bc0635a8602b" + +[[package]] +name = "web-sys" +version = "0.3.67" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "58cd2333b6e0be7a39605f0e255892fd7418a682d8da8fe042fe25128794d2ed" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.0", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" +dependencies = [ + "windows_aarch64_gnullvm 0.52.0", + "windows_aarch64_msvc 0.52.0", + "windows_i686_gnu 0.52.0", + "windows_i686_msvc 0.52.0", + "windows_x86_64_gnu 0.52.0", + "windows_x86_64_gnullvm 0.52.0", + "windows_x86_64_msvc 0.52.0", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" + +[[package]] +name = "winreg" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80d0f4e272c85def139476380b12f9ac60926689dd2e01d4923222f40580869d" +dependencies = [ + "serde", + "winapi", +] + +[[package]] +name = "xattr" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8da84f1a25939b27f6820d92aed108f83ff920fdf11a7b19366c27c4cda81d4f" +dependencies = [ + "libc", + "linux-raw-sys", + "rustix", +] diff --git a/comparisons/hyperbeam/Cargo.toml b/comparisons/hyperbeam/Cargo.toml new file mode 100644 index 0000000..d5290dd --- /dev/null +++ b/comparisons/hyperbeam/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "hyperbeam" +version = "0.1.0" +edition = "2021" + +[features] +default = [] +cuda = ["mwa_hyperbeam/cuda"] +hip = ["mwa_hyperbeam/hip"] +gpu-single = ["mwa_hyperbeam/gpu-single"] + +[[bin]] +name = "hyperbeam-cuda" +required-features = ["cuda"] + +[dependencies] +mwa_hyperbeam = "0.7.0" +ndarray = "0.15.6" diff --git a/comparisons/hyperbeam/src/bin/hyperbeam-cuda.rs b/comparisons/hyperbeam/src/bin/hyperbeam-cuda.rs new file mode 100644 index 0000000..2e92f89 --- /dev/null +++ b/comparisons/hyperbeam/src/bin/hyperbeam-cuda.rs @@ -0,0 +1,84 @@ +use mwa_hyperbeam::{ + fee::{FEEBeam, FEEBeamGpu}, + AzEl, +}; +use ndarray::Array2; + +const NUM_POINTS_TO_SIMULATE: usize = 300_000; +const NUM_POINTS_TO_SIMULATE_BIG: usize = 999_999; +const FREQ_HZ: f64 = 150e6; +const EVERYBEAM_AZ_RAD: f64 = 1.745998843813605; +const EVERYBEAM_EL_RAD: f64 = 1.548676626223685; +const MS_DELAYS: &[u32; 16] = &[0; 16]; +const DIPOLE_GAINS: &[f64; 16] = &[1.0; 16]; +const _EVERYBEAM_MWA_LATITUDE_RAD: f64 = -0.466018978039551; + +fn main() { + let beam = FEEBeam::new_from_env().unwrap(); + let gpu_beam: FEEBeamGpu; + let delays_array = Array2::from_shape_vec((1, MS_DELAYS.len()), Vec::from(MS_DELAYS)).unwrap(); + let gains_array = + Array2::from_shape_vec((1, DIPOLE_GAINS.len()), Vec::from(DIPOLE_GAINS)).unwrap(); + let azel = AzEl::from_radians(EVERYBEAM_AZ_RAD, EVERYBEAM_EL_RAD); + let azels = vec![azel; NUM_POINTS_TO_SIMULATE]; + + { + // In order to more fairly compare the cold cache time here with others, + // initialise the GPU beam after the timer starts. + let start = std::time::Instant::now(); + gpu_beam = unsafe { + beam.gpu_prepare( + &[FREQ_HZ as u32], + delays_array.view(), + gains_array.view(), + true, + ) + } + .unwrap(); + let _jones = gpu_beam.calc_jones(&azels, None, false).unwrap(); + println!( + "time taken to produce {} simulations (cold cache): {:?}", + azels.len(), + start.elapsed() + ); + } + { + let start = std::time::Instant::now(); + let _jones = gpu_beam.calc_jones(&azels, None, false).unwrap(); + println!( + "time taken to produce {} simulations (hot cache): {:?}", + azels.len(), + start.elapsed() + ); + } + { + let azels_big = vec![azel; NUM_POINTS_TO_SIMULATE_BIG]; + let start = std::time::Instant::now(); + let jones = gpu_beam.calc_jones(&azels_big, None, false).unwrap(); + println!( + "time taken to produce {} simulations (hot cache): {:?}", + azels_big.len(), + start.elapsed() + ); + + println!("First and last MWA beam responses:"); + let first = jones.first().unwrap(); + let last = jones.last().unwrap(); + println!( + "[{:+.6}{:+.6}i, {:+.6}{:+.6}i", + first[0].re, first[0].im, first[1].re, first[1].im + ); + println!( + " {:+.6}{:+.6}i, {:+.6}{:+.6}i]", + first[2].re, first[2].im, first[3].re, first[3].im + ); + println!( + "[{:+.6}{:+.6}i, {:+.6}{:+.6}i", + last[0].re, last[0].im, last[1].re, last[1].im + ); + println!( + " {:+.6}{:+.6}i, {:+.6}{:+.6}i]", + last[2].re, last[2].im, last[3].re, last[3].im + ); + } +} diff --git a/comparisons/hyperbeam/src/bin/hyperbeam.rs b/comparisons/hyperbeam/src/bin/hyperbeam.rs new file mode 100644 index 0000000..18c5654 --- /dev/null +++ b/comparisons/hyperbeam/src/bin/hyperbeam.rs @@ -0,0 +1,102 @@ +use std::sync::atomic::{AtomicBool, Ordering}; + +use mwa_hyperbeam::{fee::FEEBeam, AzEl}; + +const FREQ_HZ: f64 = 150e6; +const EVERYBEAM_AZ_RAD: f64 = 1.745998843813605; +const EVERYBEAM_EL_RAD: f64 = 1.548676626223685; +const MS_DELAYS: &[u32; 16] = &[0; 16]; +const DIPOLE_GAINS: &[f64; 16] = &[1.0; 16]; +const _EVERYBEAM_MWA_LATITUDE_RAD: f64 = -0.466018978039551; + +static CACHE_HOT: AtomicBool = AtomicBool::new(false); + +fn bench(n: usize, beam: &FEEBeam, azel: AzEl, print_first_and_last: bool) { + let azels = vec![azel; n]; + let start = std::time::Instant::now(); + + let jones = if n == 1 { + beam.calc_jones( + azel, + FREQ_HZ as u32, + MS_DELAYS, + DIPOLE_GAINS, + true, + None, + false, + ) + .unwrap(); + None + } else { + let jones = beam + .calc_jones_array( + &azels, + FREQ_HZ as u32, + MS_DELAYS, + DIPOLE_GAINS, + true, + None, + false, + ) + .unwrap(); + Some(jones) + }; + let duration = start.elapsed(); + + let cache_state = if CACHE_HOT.load(Ordering::Relaxed) { + "hot" + } else { + CACHE_HOT.store(true, Ordering::Relaxed); + "cold" + }; + let plural = if n == 1 { "" } else { "s" }; + println!( + "time taken to produce {n} simulation{plural} ({cache_state} cache): {:?}", + duration + ); + + if let Some(jones) = jones { + if print_first_and_last { + println!("First and last MWA beam responses:"); + let first = jones.first().unwrap(); + let last = jones.last().unwrap(); + println!( + "[{:+.6}{:+.6}i, {:+.6}{:+.6}i", + first[0].re, first[0].im, first[1].re, first[1].im + ); + println!( + " {:+.6}{:+.6}i, {:+.6}{:+.6}i]", + first[2].re, first[2].im, first[3].re, first[3].im + ); + println!( + "[{:+.6}{:+.6}i, {:+.6}{:+.6}i", + last[0].re, last[0].im, last[1].re, last[1].im + ); + println!( + " {:+.6}{:+.6}i, {:+.6}{:+.6}i]", + last[2].re, last[2].im, last[3].re, last[3].im + ); + } + } +} + +fn main() { + let beam = FEEBeam::new_from_env().unwrap(); + let azel = AzEl::from_radians(EVERYBEAM_AZ_RAD, EVERYBEAM_EL_RAD); + + // Check for a CLI argument. If it's there, we'll do only one benchmark + // with the indicated amount of simulations. This is mostly useful to see + // memory usage as a one-off. + if let Some(arg) = std::env::args().nth(1) { + // Verify it's a number. + let n = arg.parse().expect("is a number"); + bench(n, &beam, azel, true); + std::process::exit(0); + } + + bench(1, &beam, azel, false); + bench(1, &beam, azel, false); + bench(1000, &beam, azel, false); + bench(300_000, &beam, azel, false); + bench(999_999, &beam, azel, true); +} diff --git a/comparisons/hyperbeam_example.py b/comparisons/hyperbeam_example.py new file mode 100755 index 0000000..02ff356 --- /dev/null +++ b/comparisons/hyperbeam_example.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python3 + +import sys +import time + +import numpy as np +import mwa_hyperbeam + + +N = 1000 +FREQ_HZ = 150e6 +DELAYS = [0] * 16 +AMPS = [1.0] * 16 + + +def get_pointings(n): + # az = np.linspace(0, 0.9 * np.pi, n) + # za = np.linspace(0.1, 0.9 * np.pi / 2, n) + az = np.ones(n) * 1.745998843813605 + za = np.ones(n) * (np.pi / 2 - 1.548676626223685) + return az, za + + +az, za = get_pointings(N) +beam = mwa_hyperbeam.FEEBeam() + +start_time = time.time() +jones = beam.calc_jones( + az[0], + za[0], + FREQ_HZ, + delays=DELAYS, + amps=AMPS, + norm_to_zenith=True, + latitude_rad=None, + iau_order=False, +) +duration = time.time() - start_time +print(f"time taken to produce 1 simulation (cold cache): {duration}s") + +start_time = time.time() +jones = beam.calc_jones( + az[0], + za[0], + FREQ_HZ, + delays=DELAYS, + amps=AMPS, + norm_to_zenith=True, + latitude_rad=None, + iau_order=False, +) +duration = time.time() - start_time +print(f"time taken to produce 1 simulation (hot cache): {duration}s") + +start_time = time.time() +jones = beam.calc_jones_array( + az, + za, + FREQ_HZ, + delays=DELAYS, + amps=AMPS, + norm_to_zenith=True, + latitude_rad=None, + iau_order=False, +) +duration = time.time() - start_time +print(f"time taken to produce {len(az)} simulations (hot cache): {duration}s") + +az, za = get_pointings(300000) +start_time = time.time() +jones = beam.calc_jones_array( + az, + za, + FREQ_HZ, + delays=DELAYS, + amps=AMPS, + norm_to_zenith=True, + latitude_rad=None, + iau_order=False, +) +duration = time.time() - start_time +print(f"time taken to produce {len(az)} simulations (hot cache): {duration}s") + +az, za = get_pointings(999999) +start_time = time.time() +jones = beam.calc_jones_array( + az, + za, + FREQ_HZ, + delays=DELAYS, + amps=AMPS, + norm_to_zenith=True, + latitude_rad=None, + iau_order=False, +) +duration = time.time() - start_time +print(f"time taken to produce {len(az)} simulations (hot cache): {duration}s") + +print("First and last MWA beam responses:") +print(jones[0]) +print(jones[-1]) + +if len(sys.argv) >= 2 and sys.argv[1] == "cuda": + print("\n*** hyperbeam Python results with CUDA ***") + start_time = time.time() + jones = beam.calc_jones_gpu( + az, + za, + [FREQ_HZ], + DELAYS, + AMPS, + norm_to_zenith=True, + latitude_rad=None, + iau_order=False, + ) + duration = time.time() - start_time + print(f"time taken to produce {len(az)} simulations (hot cache): {duration}s") diff --git a/comparisons/mwa_pb_example.py b/comparisons/mwa_pb_example.py new file mode 100755 index 0000000..c68dacb --- /dev/null +++ b/comparisons/mwa_pb_example.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python3 + +import time + +import numpy as np +from mwa_pb.primary_beam import MWA_Tile_full_EE + +N = 1000 +FREQ_HZ = 150e6 +DELAYS = np.array([0] * 16) + + +def get_pointings(n): + az = np.linspace(0, 0.9 * np.pi, n) + za = np.linspace(0.1, 0.9 * np.pi / 2, n) + # az = np.ones(n) * 1.745998843813605 + # za = np.ones(n) * (np.pi / 2 - 1.548676626223685) + return az, za + + +az, za = get_pointings(N) + +start_time = time.time() +jones = MWA_Tile_full_EE( + za[0], + az[0], + int(FREQ_HZ), + delays=DELAYS, + zenithnorm=True, + interp=False, + power=False, + jones=True, +) +duration = time.time() - start_time +print(f"time taken to produce 1 simulation (cold cache): {duration}s") + +start_time = time.time() +jones = MWA_Tile_full_EE( + za[0], + az[0], + int(FREQ_HZ), + delays=DELAYS, + zenithnorm=True, + interp=False, + power=False, + jones=True, +) +duration = time.time() - start_time +print(f"time taken to produce 1 simulation (hot cache): {duration}s") + +start_time = time.time() +jones = MWA_Tile_full_EE( + za, + az, + int(FREQ_HZ), + delays=DELAYS, + zenithnorm=True, + interp=False, + power=False, + jones=True, +) +duration = time.time() - start_time +print(f"time taken to produce {len(az)} simulations (hot cache): {duration}s") + +az, za = get_pointings(300000) +start_time = time.time() +jones = MWA_Tile_full_EE( + za, + az, + int(FREQ_HZ), + delays=DELAYS, + zenithnorm=True, + interp=False, + power=False, + jones=True, +) +duration = time.time() - start_time +print(f"time taken to produce {len(az)} simulations (hot cache): {duration}s") + +# Not printing these because they're different to other packages +# print("First and last MWA beam responses:") +# print(jones[0]) +# print(jones[-1]) diff --git a/comparisons/pyuvdata_example.py b/comparisons/pyuvdata_example.py new file mode 100755 index 0000000..32b883c --- /dev/null +++ b/comparisons/pyuvdata_example.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python3 + +# Written against pyuvdata 2.4.1 + +import os +import time + +import numpy as np +from pyuvdata.uvbeam import UVBeam + + +FREQ_HZ = 150e6 +DELAYS = np.zeros((2, 16), dtype=int) +AMPS = np.ones((2, 16)) + + +mwa_beam_file = os.environ.get("MWA_BEAM_FILE") + +for res in [1, 2]: + start_time = time.time() + beam = UVBeam.from_file( + mwa_beam_file, + delays=DELAYS, + amplitudes=AMPS, + frequency=[FREQ_HZ], + pixels_per_deg=res, + ) + duration = time.time() - start_time + print( + f"time taken to produce {beam.data_array.shape[-1]*beam.data_array.shape[-2]} simulations: {duration}s" + ) diff --git a/comparisons/run.sh b/comparisons/run.sh new file mode 100755 index 0000000..91518fc --- /dev/null +++ b/comparisons/run.sh @@ -0,0 +1,189 @@ +#!/bin/bash + +# This script compares a bunch of different packages' implementations of the MWA +# FEE beam code. Please file an issue if this comparison is very misleading or +# incorrect. +# +# Requirements: +# - /usr/bin/time +# - Python (with a venv module inside it) +# - everybeam (https://git.astron.nl/RD/EveryBeam) +# - a Rust toolchain (see rustup) +# +# Optional: +# - CUDA + +set -eu + +# https://stackoverflow.com/questions/4774054/reliable-way-for-a-bash-script-to-get-the-full-path-to-itself +SCRIPTPATH="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P)" + +# Set up a Python env +if [ ! -r venv ]; then + python3 -m venv venv + . ./venv/bin/activate + pip install mwa_hyperbeam everybeam mwa_pb pyuvdata +else + . ./venv/bin/activate +fi + +# Adjust to suit your system +export MWA_BEAM_FILE="${SCRIPTPATH}/$(find venv -type f -name mwa_full_embedded_element_pattern.h5)" # Automatically provided by mwa_pb +# export MWA_BEAM_FILE="/usr/local/mwa_full_embedded_element_pattern.h5" # A hard-coded path +export MS="${SCRIPTPATH}/1090008640_2s_40kHz.trunc.ms" +USE_CUDA=1 + +if [ ! -r "${MWA_BEAM_FILE}" ]; then + echo "The MWA HDF5 beam file doesn't exist; adjust the variable" + exit 1 +fi +if [ ! -r "${MS}" ]; then + # Extract the MS out of the tarball + if [ ! -r "${MS}.tar.gz" ]; then + echo "The measurement set tarball needed for EveryBeam doesn't exist; adjust the variable" + exit 1 + fi + tar -xf "${MS}.tar.gz" +fi + +##### + +run_and_measure_memory () { + local COMMAND_AND_ARGS=("$@") + + # https://stackoverflow.com/a/59592881 + { + IFS=$'\n' read -r -d '' STDERR; + IFS=$'\n' read -r -d '' STDOUT; + } < <((printf '\0%s\0' "$(/usr/bin/time -v "${COMMAND_AND_ARGS[@]}")" 1>&2) 2>&1) + + echo "${STDOUT}" + MEMORY=$(echo "${STDERR}" | grep "Maximum resident set size" | cut -d: -f2 | cut -d' ' -f2-) + echo "Max memory use (kBytes): ${MEMORY}" +} + +print_sys_info () { + echo "*** System information ***" + echo "uname -a:" + echo " $(uname -a)" + + echo "CPU:" + NAME=$(grep -m1 "model name" /proc/cpuinfo | cut -d: -f2 | cut -d' ' -f2-) + CORES=$(grep -m1 "cpu cores" /proc/cpuinfo | cut -d: -f2 | cut -d' ' -f2-) + THREADS=$(grep -m1 "siblings" /proc/cpuinfo | cut -d: -f2 | cut -d' ' -f2-) + echo " ${NAME} (${CORES} cores, ${THREADS} threads)" + + if [ $USE_CUDA == 1 ]; then + echo "GPU:" + echo " $(nvidia-smi --query-gpu=gpu_name --format=csv,noheader)" + fi + + echo "Total memory:" + echo " $(free -m | grep -m1 "Mem:" | sed -e 's|\(Mem:\s\+\)\([0-9]\+\)\(.*\)|\2|') MiB" + + local GLIBC=/usr/lib/libc.so.6 + if [ -r $GLIBC ]; then + echo "glibc:" + echo " $($GLIBC | head -n1)" + fi + + echo "Compilers:" + echo " GCC: $(g++ --version | head -n1)" + echo " Rust: $(rustc --version)" + if [ $USE_CUDA == 1 ]; then + echo " nvcc: $(nvcc --version | grep release)" + fi + + echo "Python:" + echo " $(python --version)" + + echo "***" + echo "" +} + +run_mwa_pb_python () { + echo "*** mwa_pb Python results ***" + run_and_measure_memory ./mwa_pb_example.py + echo "***" + echo "" +} + +run_everybeam_python () { + echo "*** EveryBeam Python results ***" + run_and_measure_memory ./everybeam_example.py + echo "***" + echo "" +} + +run_pyuvdata_python () { + echo "*** pyuvdata Python results ***" + run_and_measure_memory ./pyuvdata_example.py + echo "***" + echo "" +} + +run_hyperbeam_python () { + echo "*** hyperbeam Python results with 1 CPU core ***" + export RAYON_NUM_THREADS=1 + run_and_measure_memory ./hyperbeam_example.py + unset RAYON_NUM_THREADS + + echo "" + echo "*** hyperbeam Python results with all CPU cores ***" + if [ $USE_CUDA == 1 ]; then + run_and_measure_memory ./hyperbeam_example.py cuda + else + run_and_measure_memory ./hyperbeam_example.py + fi + + echo "***" + echo "" +} + +run_everybeam_cpp () { + echo "*** Compiling EveryBeam C++ example ***" + make + echo "" + echo "*** EveryBeam C++ results with 1 CPU core ***" + run_and_measure_memory ./everybeam_example "${MS}" + echo "***" + echo "" +} + +run_hyperbeam_rust () { + echo "*** Compiling hyperbeam Rust code ***" + if [ $USE_CUDA == 1 ]; then + cargo build --release --features=cuda,gpu-single + else + cargo build --release + fi + echo "" + + echo "*** hyperbeam Rust results with 1 CPU core ***" + export RAYON_NUM_THREADS=1 + run_and_measure_memory ./target/release/hyperbeam + unset RAYON_NUM_THREADS + echo "" + + echo "*** hyperbeam Rust results with all CPU cores ***" + run_and_measure_memory ./target/release/hyperbeam + + if [ $USE_CUDA == 1 ]; then + echo "" + echo "*** hyperbeam Rust results with CUDA ***" + run_and_measure_memory ./target/release/hyperbeam-cuda + fi + + echo "***" + echo "" +} + +print_sys_info +run_mwa_pb_python +# run_everybeam_python # MWA not supported +run_pyuvdata_python +run_hyperbeam_python +cd everybeam +run_everybeam_cpp +cd ../hyperbeam +run_hyperbeam_rust