diff --git a/.gitignore b/.gitignore index a68b971cdb5e..ba8dc4a2f44e 100644 --- a/.gitignore +++ b/.gitignore @@ -35,6 +35,7 @@ # Explicit files to ignore (only matches one). #==============================================================================# # Various tag programs +tags /tags /TAGS /GPATH @@ -73,3 +74,13 @@ pythonenv* # automodapi puts generated documentation files here. /lldb/docs/python_api/ mlir_opt_helper.txt +mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/Output/* +mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/Output/* +mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/Output/* +mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Output/* +# csv files +*.csv +/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/LClanglogs +/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/HClanglogs +/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/ServerExeLogs +/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/ServerExeLogs diff --git a/LICENSE.TXT b/LICENSE.TXT index fa6ac5400070..3901fa02e64f 100644 --- a/LICENSE.TXT +++ b/LICENSE.TXT @@ -1,3 +1,219 @@ +============================================================================== +Dual License +============================================================================== + +The DSP-MLIR project is dual-licensed under: + +1. The DSP-MLIR Apache License, Version 2.0 +2. The standard LLVM Project License: Apache License v2.0 with LLVM Exceptions +============================================================================== +The DSP-MLIR Apache License, Version 2.0: + + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + + ============================================================================== The LLVM Project is under the Apache License v2.0 with LLVM Exceptions: ============================================================================== diff --git a/README.md b/README.md index a9b29ecbc1a3..bba0b2efcf8c 100644 --- a/README.md +++ b/README.md @@ -1,44 +1,47 @@ -# The LLVM Compiler Infrastructure +# DSP-MLIR Compiler -[![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/llvm/llvm-project/badge)](https://securityscorecards.dev/viewer/?uri=github.com/llvm/llvm-project) -[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/8273/badge)](https://www.bestpractices.dev/projects/8273) -[![libc++](https://github.com/llvm/llvm-project/actions/workflows/libcxx-build-and-test.yaml/badge.svg?branch=main&event=schedule)](https://github.com/llvm/llvm-project/actions/workflows/libcxx-build-and-test.yaml?query=event%3Aschedule) +This repository contains the source code for **DSP-MLIR**, a compiler tailored for Digital Signal Processing (DSP) applications. It provides highly optimized tools and environments for building, optimizing, and running DSP operations like Fast Fourier Transforms (FFT), Finite Impulse Response (FIR) filters, and more. -Welcome to the LLVM project! +The project is built on top of the **LLVM** infrastructure and leverages the **MLIR** (Multi-Level Intermediate Representation) framework for implementing DSP-specific operations and transformations. -This repository contains the source code for LLVM, a toolkit for the -construction of highly optimized compilers, optimizers, and run-time -environments. -The LLVM project has multiple components. The core of the project is -itself called "LLVM". This contains all of the tools, libraries, and header -files needed to process intermediate representations and convert them into -object files. Tools include an assembler, disassembler, bitcode analyzer, and -bitcode optimizer. -C-like languages use the [Clang](https://clang.llvm.org/) frontend. This -component compiles C, C++, Objective-C, and Objective-C++ code into LLVM bitcode --- and from there into object files, using LLVM. -Other components include: -the [libc++ C++ standard library](https://libcxx.llvm.org), -the [LLD linker](https://lld.llvm.org), and more. +## Build Instructions -## Getting the Source Code and Building LLVM +To build the DSP-MLIR compiler, follow these steps: -Consult the -[Getting Started with LLVM](https://llvm.org/docs/GettingStarted.html#getting-the-source-code-and-building-llvm) -page for information on building and running LLVM. +### Step 1: Clone this repository and cd into the DSP-MLIR folder. -For information on how to contribute to the LLVM project, please take a look at -the [Contributing to LLVM](https://llvm.org/docs/Contributing.html) guide. -## Getting in touch +### Step 2: Make and cd into the build directory using the following command: -Join the [LLVM Discourse forums](https://discourse.llvm.org/), [Discord -chat](https://discord.gg/xS7Z362), -[LLVM Office Hours](https://llvm.org/docs/GettingInvolved.html#office-hours) or -[Regular sync-ups](https://llvm.org/docs/GettingInvolved.html#online-sync-ups). +```bash +mkdir build +cd build + +``` +### Step 3: To build the project, run the following command: +```bash +cmake -G Ninja ../llvm \ + -DLLVM_ENABLE_PROJECTS=mlir \ + -DLLVM_BUILD_EXAMPLES=ON \ + -DLLVM_TARGETS_TO_BUILD="Native" \ + -DCMAKE_BUILD_TYPE=Release \ + -DLLVM_ENABLE_ASSERTIONS=ON +``` + +### Step 4: After configuring the build, compile the project by running: +```bash +ninja +``` + +## Running an Example + +After the build completes, you can run an example to test the DSP operations. From the build directory: + +```bash +ninja && ./bin/dsp1 ../mlir/test/Examples/DspExample/dsp_gain_op.py -emit=mlir-affine +ninja && ./bin/dsp1 ../mlir/test/Examples/DspExample/dsp_gain_op.py -emit=jit +``` -The LLVM project has adopted a [code of conduct](https://llvm.org/docs/CodeOfConduct.html) for -participants to all modes of communication within the project. diff --git a/matmul_test/dsp_matmul.py b/matmul_test/dsp_matmul.py new file mode 100644 index 000000000000..0c866fd2dc4c --- /dev/null +++ b/matmul_test/dsp_matmul.py @@ -0,0 +1,12 @@ +def main() { + var x = [[1.0, 2.0], [4.0, 5.0]]; + var y = [[1.0, 2.0], [4.0, 5.0]]; + var z = matmul(x, y); + print(z); + + + var x2 = [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]; + var y2 = [[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]]; + var z2 = matmul(x2, y2); + print(z2); +} diff --git a/mlir/.gitignore b/mlir/.gitignore new file mode 100644 index 000000000000..d61db156e85f --- /dev/null +++ b/mlir/.gitignore @@ -0,0 +1,2 @@ +/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/logs +/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab_Functions diff --git a/mlir/examples/dsp/CMakeLists.txt b/mlir/examples/dsp/CMakeLists.txt index 9d2fc3fb3b85..10092b7c3658 100644 --- a/mlir/examples/dsp/CMakeLists.txt +++ b/mlir/examples/dsp/CMakeLists.txt @@ -1,11 +1,11 @@ -add_custom_target(Dsp) -set_target_properties(Dsp PROPERTIES FOLDER Examples) +# add_custom_target(Dsp) +# set_target_properties(Dsp PROPERTIES FOLDER Examples) -macro(add_dsp_chapter name) - add_dependencies(Dsp ${name}) - add_llvm_example(${name} ${ARGN}) -endmacro(add_dsp_chapter name) +# macro(add_dsp_chapter name) +# add_dependencies(Dsp ${name}) +# add_llvm_example(${name} ${ARGN}) +# endmacro(add_dsp_chapter name) -add_subdirectory(SimpleBlocks) +# add_subdirectory(SimpleBlocks) diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/FIRFilterDesign.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/FIRFilterDesign.c new file mode 100644 index 000000000000..e2361143850d --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/FIRFilterDesign.c @@ -0,0 +1,95 @@ +#include +#include +#include + +#define INPUT_LENGTH 101 +#define PI M_PI +#define FS 8000 +#define FC1 500 +#define FC2 600 +#define FC3 1000 +#define FC4 1200 + +double *hamming(int length) { + double *window = malloc(length * sizeof(double)); + if (!window) { + perror("Memory allocation failed in hamming"); + exit(EXIT_FAILURE); + } + for (int i = 0; i < length; i++) { + window[i] = 0.54 - 0.46 * cos(2 * PI * i / (length - 1)); + } + return window; +} + +double *highPassFIRFilter(double wc, int length) { + double *filter = malloc(length * sizeof(double)); + if (!filter) { + perror("Memory allocation failed in highPassFIRFilter"); + exit(EXIT_FAILURE); + } + int mid = (length - 1) / 2; + for (int n = 0; n < length; n++) { + if (n == mid) { + filter[n] = 1 - (wc / PI); + } else { + filter[n] = -sin(wc * (n - mid)) / (PI * (n - mid)); + } + } + return filter; +} + +void elementWiseMultiplication(double *output, const double *array1, const double *array2, int length) { + for (int i = 0; i < length; i++) { + output[i] = array1[i] * array2[i]; + } +} + +double getElemAtIndx(const double *array, int index) { + return array[index]; +} + +int main() { + double wc1 = 2 * PI * FC1 / FS; + double wc2 = 2 * PI * FC2 / FS; + double wc3 = 2 * PI * FC3 / FS; + double wc4 = 2 * PI * FC4 / FS; + + double *hamming_window = hamming(INPUT_LENGTH); + + double *hpf1 = highPassFIRFilter(wc1, INPUT_LENGTH); + double *hpf_w1 = malloc(INPUT_LENGTH * sizeof(double)); + elementWiseMultiplication(hpf_w1, hpf1, hamming_window, INPUT_LENGTH); + + double *hpf2 = highPassFIRFilter(wc2, INPUT_LENGTH); + double *hpf_w2 = malloc(INPUT_LENGTH * sizeof(double)); + elementWiseMultiplication(hpf_w2, hpf2, hamming_window, INPUT_LENGTH); + + double *hpf3 = highPassFIRFilter(wc3, INPUT_LENGTH); + double *hpf_w3 = malloc(INPUT_LENGTH * sizeof(double)); + elementWiseMultiplication(hpf_w3, hpf3, hamming_window, INPUT_LENGTH); + + double *hpf4 = highPassFIRFilter(wc4, INPUT_LENGTH); + double *hpf_w4 = malloc(INPUT_LENGTH * sizeof(double)); + elementWiseMultiplication(hpf_w4, hpf4, hamming_window, INPUT_LENGTH); + + double final1 = getElemAtIndx(hpf_w1, 6); + double final2 = getElemAtIndx(hpf_w2, 7); + double final3 = getElemAtIndx(hpf_w3, 8); + + printf("%f\n", final1); + printf("%f\n", final2); + printf("%f\n", final3); + + free(hamming_window); + free(hpf1); + free(hpf2); + free(hpf3); + free(hpf4); + free(hpf_w1); + free(hpf_w2); + free(hpf_w3); + free(hpf_w4); + + return 0; +} diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/ResultScript.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/ResultScript.py new file mode 100644 index 000000000000..9cd5fc3e02a7 --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/ResultScript.py @@ -0,0 +1,499 @@ +import os +import subprocess +import time +import sys +import math +# The script does the following +# Input : filename.c +# Output : TimeOfExecution for different IP sizes : +# Steps to run: +# Open a terminal at the path of the script -- +# Run: python ScriptForCases.c #3.11 validated + +# Pseudo-code: +# Iterate for all the input-size & update the input value in file +# Update logic -- change the 2nd parameter of line: var c = getRangeOfVector(init , Count, StepSize) +# Run the respective commands on the file + +# Path to the input file +# Apps = "lowPassFIRFilterDesign.c", "noisecancelling.c" , "echocancelling.c", "hearingAid.c", "audioEqualizer.c", "vibrationAnalysis.c", "underWaterCommunication.c", "voiceActivityDetection.c", "signalSmoothing", "targetDetection", "biomedicalSignalProcessing", "periodogram2Conv", "spaceCommunication", "dtmfDetection", "speakerIdentification" +input_file_name = sys.argv[1] +BasePathForLLVM = "/home/local/ASURITE/apkhedka/ForLLVM/" +OutputScriptPath = "mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/" +# OutputPath = BasePathForLLVM + "mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/Output/" +input_file_path = BasePathForLLVM + OutputScriptPath + input_file_name + +print(f"Running Application {input_file_path}") +# Construct full output path +if sys.argv[2]: + OutputPath = os.path.join(BasePathForLLVM, OutputScriptPath, "Output", sys.argv[2]) + +else: + OutputPath = os.path.join(BasePathForLLVM, OutputScriptPath, "Output") + +# Check if the Output folder exists, create it if it doesn't +if not os.path.exists(OutputPath): + os.makedirs(OutputPath) + +# Now OutputPath is ready for use +print("InputPath:{}".format(BasePathForLLVM)) +print(f"OutputPath: {OutputPath}") +# exit() + +# ************ Don't change unless u required +# Define the values dictionary + +inputValues = { + "10": 10, + "100": 100, + "500": 500, + "1K": 1000, + "2K": 2000, + "5K": 5000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + # "10M": 10000000, + # "20M": 20000000, + # "30M": 30000000, + # "40M": 40000000, + # "50M": 50000000, + # "100M": 100000000, + # "1B": 1000000000 +} + +if sys.argv[1] == "noiseCancellation.c": + inputValues = { + "10": 10, + "100": 100, + "1K": 1000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + "10M": 10000000, + "20M": 20000000, + "30M": 30000000, + "40M": 40000000, + "50M": 50000000, + "100M": 100000000, + } + +elif sys.argv[1] == "echoCancellation.c": + inputValues = { + "10": 10, + "100": 100, + "1K": 1000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + "10M": 10000000, + "20M": 20000000, + "30M": 30000000, + "40M": 40000000, + "50M": 50000000, + "100M": 100000000, + } + +elif sys.argv[1] == "periodogram.c": + inputValues = { + "10": 10, + "100": 100, + "1K": 1000, + "10K": 10000, + "20K": 20000, + } + +elif sys.argv[1] == "lowPassFiltering.c": + inputValues = { + "10": 10, + "100": 100, + "1K": 1000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + "10M": 10000000, + "20M": 20000000, + "30M": 30000000, + "40M": 40000000, + "50M": 50000000, + "100M": 100000000, + } + +elif sys.argv[1] == "hearingAid.c": + inputValues = { + "10": 10, + "100": 100, + "1K": 1000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + "10M": 10000000, + "20M": 20000000, + "30M": 30000000, + "40M": 40000000, + "50M": 50000000, + "100M": 100000000, + } + +elif sys.argv[1] == "FIRFilterDesign.c": + inputValues = { + "100": 100, + "1K": 1000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + "10M": 10000000, + "20M": 20000000, + "30M": 30000000, + "40M": 40000000, + "50M": 50000000, + "100M": 100000000, + } + +elif sys.argv[1] == "spectralAnalysis.c": + inputValues = { + "10": 10, + "100": 100, + "1K": 1000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + } + +elif sys.argv[1] == "audioEqualization.c": + inputValues = { + "10": 10, + "100": 100, + "1K": 1000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + "10M": 10000000, + "20M": 20000000, + "30M": 30000000, + "40M": 40000000, + "50M": 50000000, + "100M": 100000000, + } + +elif sys.argv[1] == "audioCompression.c": + inputValues = { + "10": 10, + "100": 100, + "1K": 1000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + } + +elif sys.argv[1] == "vibrationAnalysis.c": + inputValues = { + "10": 10, + "100": 100, + "1K": 1000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + } + +elif sys.argv[1] == "underWaterCommunication.c": + inputValues = { + "100": 100, + "1K": 1000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + "10M": 10000000, + "20M": 20000000, + "30M": 30000000, + "40M": 40000000, + "50M": 50000000, + "100M": 100000000, + } + +elif sys.argv[1] == "voiceActivityDetection.c": + inputValues = { + "10": 10, + "100": 100, + "1K": 1000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + "10M": 10000000, + "20M": 20000000, + "30M": 30000000, + "40M": 40000000, + "50M": 50000000, + "100M": 100000000, + } + +elif sys.argv[1] == "signalSmoothing.c": + inputValues = { + "100": 100, + "1K": 1000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + "10M": 10000000, + "20M": 20000000, + "30M": 30000000, + "40M": 40000000, + "50M": 50000000, + "100M": 100000000, + } + +elif sys.argv[1] == "targetDetection.c": + inputValues = { + "10": 10, + "100": 100, + "1K": 1000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + "10M": 10000000, + "20M": 20000000, + "30M": 30000000, + "40M": 40000000, + "50M": 50000000, + "100M": 100000000, + } + +elif sys.argv[1] == "biomedicalSignalProcessing.c": + inputValues = { + "10": 10, + "100": 100, + "1K": 1000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + "10M": 10000000, + "20M": 20000000, + "30M": 30000000, + "40M": 40000000, + "50M": 50000000, + "100M": 100000000, + } + +elif sys.argv[1] == "digitalModulation.c": + inputValues = { + "100": 100, + "1K": 1000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + "10M": 10000000, + "20M": 20000000, + "30M": 30000000, + "40M": 40000000, + "50M": 50000000, + "100M": 100000000, + } + +elif sys.argv[1] == "spaceCommunication.c": + inputValues = { + "100": 100, + "1K": 1000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + "10M": 10000000, + "20M": 20000000, + "30M": 30000000, + "40M": 40000000, + "50M": 50000000, + "100M": 100000000, + } + +elif sys.argv[1] == "radarSignalProcessing.c": + inputValues = { + "10": 10, + "100": 100, + "1K": 1000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + "10M": 10000000, + "20M": 20000000, + "30M": 30000000, + "40M": 40000000, + "50M": 50000000, + "100M": 100000000, + } + +elif sys.argv[1] == "dtmfDetection.c": + inputValues = { + "100": 100, + "1K": 1000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + } + +elif sys.argv[1] == "speakerIdentification.c": + inputValues = { + "100": 100, + "1K": 1000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + } + +NoOfIterations = 3 + + +# Define the cases +cases = [ + { + "gcc": True, + "clang": False, + "exe": "fileGCCOptExe", + }, + { + "clang": True, + "gcc": False, + "exe": "fileClangOptExe", + }, +] + + +with open(input_file_path, "r") as file: + lines = file.readlines() + +print("", end="\t") + +for case in cases: + print(f"{case['exe']}", end="\t") + +for key, value in inputValues.items(): + # Update the specific line in the file + # print("Updating for {}".format(value)) + print("\n{}".format(key), end="\t") + with open(input_file_path, "w") as file: + for line in lines: + if line.strip().startswith("#define INPUT_LENGTH"): + if sys.argv[1] == "speakerIdentification.c": + updated_line = f"#define INPUT_LENGTH {math.floor(value/8.192)}\n" + else: + updated_line = f"#define INPUT_LENGTH {value}\n" + file.write(updated_line) + else: + file.write(line) + + for case in cases: + + if case["gcc"]: + command = f"gcc -O3 -o {OutputPath}/{case['exe']} {input_file_path} -lm", + if case["clang"]: + command = f"{BasePathForLLVM}/build/bin/clang-19 -O3 {input_file_path} -o {OutputPath}/{case['exe']} -lm", + + result = subprocess.run(command, shell=True, capture_output=True, text=True) + + sum_exe_time = 0 + for i in range(0, NoOfIterations): + try: + process = subprocess.run( + "sudo sh -c 'sync; echo 3 > /proc/sys/vm/drop_caches'", + shell=True, + check=True, + ) + # process.wait() + except subprocess.CalledProcessError as exc: + print(exc) + process.terminate() + # The command to be executed + + command2 = f"taskset -c 0 ./Output/{sys.argv[2]}/{case['exe']}" + + # Record the start time + start_time = time.time() + + # Execute the command + try: + subprocess.run( + command2, + shell=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=True, + ) + # subprocess.run(command2, shell=True) + except subprocess.CalledProcessError as exc: + print( + f"Process failed because did not return a successful return code. " + f"Returned {exc.returncode}\n{exc}" + ) + + end_time = time.time() + execution_time = end_time - start_time + sum_exe_time = sum_exe_time + execution_time + avg_exe_time = sum_exe_time / NoOfIterations + print("{}".format(avg_exe_time), end="\t") diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/RunResults.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/RunResults.py new file mode 100644 index 000000000000..d9a30f95a8b5 --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/RunResults.py @@ -0,0 +1,49 @@ +import subprocess +import os + +# Ensure the log directory exists +log_dir = "ServerExeLogs" +os.makedirs(log_dir, exist_ok=True) + +# Corrected list of application names (without non-application entries) +app_names = [ + "speakerIdentification", + "targetDetection", + "underWaterCommunication", + "voiceActivityDetection", + "spectralAnalysis", + "audioCompression", + "audioEqualization", + "biomedicalSignalProcessing", + "digitalModulation", + "dtmfDetection", + "echoCancellation", + "FIRFilterDesign", + "hearingAid", + "lowPassFiltering", + "noiseCancellation", + "periodogram", + "vibrationAnalysis", + "radarSignalProcessing", + "signalSmoothing", + "spaceCommunication" +] + +# Loop through each application and execute the script +for app_name in app_names: + app_script = f"{app_name}.c" + log_file = os.path.join(log_dir, f"{app_name}.log") + with open(log_file, "w") as log: + command = ["python", "ResultScript.py", app_script, app_name] + print(f"Running command: {' '.join(command)}") + process = subprocess.Popen( + command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True + ) + for line in process.stdout: + print(line, end="") + log.write(line) + for line in process.stderr: + print(line, end="") + log.write(line) + process.wait() + diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/audioCompression.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/audioCompression.c new file mode 100644 index 000000000000..f8239d55d35b --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/audioCompression.c @@ -0,0 +1,143 @@ +#include +#include +#include +#include + +#define INPUT_LENGTH 10 +#define NLEVELS 16 +#define MIN 0.0 +#define MAX 8.0 +#define THRESHOLD_VAL 4.0 + +double *getRangeOfVector(double start, int noOfSamples, double increment) { + double *output = malloc(noOfSamples * sizeof(double)); + if (!output) { + perror("Memory allocation failed in getRangeOfVector"); + exit(EXIT_FAILURE); + } + + for (int i = 0; i < noOfSamples; i++) { + output[i] = start + i * increment; + } + + return output; +} + +void dft(double complex *output, const double *input, int length) { + for (int k = 0; k < length; k++) { + output[k] = 0; + for (int n = 0; n < length; n++) { + double angle = 2 * M_PI * k * n / length; + output[k] += input[n] * cexp(-I * angle); + } + } +} + +void threshold(double *output, const double *input, double thresh, int length) { + for (int i = 0; i < length; i++) { + output[i] = (fabs(input[i]) >= thresh) ? input[i] : 0; + } +} + +void quantization(double *output, const double *input, int nlevels, double max, + double min, int length) { + double stepSize = (max - min) / nlevels; + + for (int i = 0; i < length; ++i) { + double level = (input[i] - min) / stepSize; + int roundedLevel = (unsigned int)(level); + output[i] = roundedLevel * stepSize + min; + } +} + +void runLenEncoding(double *output, const double *input, int length) { + int k = 0; // Index for storing encoded values + int count = 1; // Initialize count + + output[k] = input[0]; // First value + int half_len = length / 2; // Output array's second half starts here + + for (int i = 1; i < half_len; i++) { + if (input[i] == input[i - 1]) { + count++; // Increase count if value is the same as previous + } else { + output[k + half_len] = count; // Store count at second half + k++; // Move to next unique value position + output[k] = input[i]; // Store new value + count = 1; // Reset count + } + } + output[k + half_len] = count; // Store count of last element +} + +double getElemAtIndx(const double *rle, int indx) { return rle[indx]; } + +int main() { + double *input = getRangeOfVector(0, INPUT_LENGTH, 1); + + double complex *fft = malloc(INPUT_LENGTH * sizeof(double complex)); + if (!fft) { + perror("Memory allocation failed"); + free(input); + return EXIT_FAILURE; + } + + dft(fft, input, INPUT_LENGTH); + + double *GetThresholdReal = malloc(INPUT_LENGTH * sizeof(double)); + double *GetThresholdImg = malloc(INPUT_LENGTH * sizeof(double)); + if (!GetThresholdReal || !GetThresholdImg) { + perror("Memory allocation failed"); + free(input); + free(fft); + free(GetThresholdReal); + free(GetThresholdImg); + return EXIT_FAILURE; + } + + for (int i = 0; i < INPUT_LENGTH; i++) { + GetThresholdReal[i] = creal(fft[i]); + GetThresholdImg[i] = cimag(fft[i]); + } + + threshold(GetThresholdReal, GetThresholdReal, THRESHOLD_VAL, INPUT_LENGTH); + threshold(GetThresholdImg, GetThresholdImg, THRESHOLD_VAL, INPUT_LENGTH); + + double *QuantOutReal = malloc(INPUT_LENGTH * sizeof(double)); + double *QuantOutImg = malloc(INPUT_LENGTH * sizeof(double)); + if (!QuantOutReal || !QuantOutImg) { + perror("Memory allocation failed"); + free(input); + free(fft); + free(GetThresholdReal); + free(GetThresholdImg); + free(QuantOutReal); + free(QuantOutImg); + return EXIT_FAILURE; + } + + quantization(QuantOutReal, GetThresholdReal, NLEVELS, MAX, MIN, INPUT_LENGTH); + quantization(QuantOutImg, GetThresholdImg, NLEVELS, MAX, MIN, INPUT_LENGTH); + + double *rLEOutReal = (double *)malloc(2 * INPUT_LENGTH * sizeof(double)); + double *rLEOutImg = (double *)malloc(2 * INPUT_LENGTH * sizeof(double)); + + runLenEncoding(rLEOutReal, QuantOutReal, INPUT_LENGTH); + runLenEncoding(rLEOutImg, QuantOutImg, INPUT_LENGTH); + + double final1 = getElemAtIndx(rLEOutReal, 0); + double final2 = getElemAtIndx(rLEOutImg, 1); + printf("%f\t", final1); + printf("%f", final2); + + free(input); + free(fft); + free(GetThresholdReal); + free(GetThresholdImg); + free(QuantOutReal); + free(QuantOutImg); + free(rLEOutReal); + free(rLEOutImg); + + return 0; +} \ No newline at end of file diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/audioEqualization.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/audioEqualization.c new file mode 100644 index 000000000000..df13d82a8bd5 --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/audioEqualization.c @@ -0,0 +1,229 @@ +#include +#include +#include + +#define PI 3.14159265359 +#define INPUT_LENGTH 10 +#define FILTER_LENGTH 101 +#define OUTPUT_LENGTH (INPUT_LENGTH + FILTER_LENGTH - 1) + +// Function prototypes +double* getRangeOfVector(double start, int length, double increment); +double* lowPassFIRFilter(double wc, int length); +double* highPassFIRFilter(double wc, int length); +double* hamming(int length); +void elementWiseMultiplication(double* output, const double* array1, const double* array2, int length); +void FIRFilterResponse(double* output, const double* input, const double* filter, int inputLength); +void gain(double* output, const double* input, double gainFactor, int length); +void add(double* output, const double* input1, const double* input2, int length); +void sub(double* output, const double* input1, const double* input2, int length); +void printArray(const double* array, int length); +double getElementAtIndex(const double* array, int index); + +// Generate a range of values +double* getRangeOfVector(double start, int length, double increment) { + double* vector = malloc(length * sizeof(double)); + if (!vector) { + perror("Memory allocation failed in getRangeOfVector"); + exit(EXIT_FAILURE); + } + for (int i = 0; i < length; i++) { + vector[i] = start + i * increment; + } + return vector; +} + +// Generate a Hamming window +double* hamming(int length) { + double* window = malloc(length * sizeof(double)); + if (!window) { + perror("Memory allocation failed in hamming"); + exit(EXIT_FAILURE); + } + for (int i = 0; i < length; i++) { + window[i] = 0.54 - 0.46 * cos(2 * PI * i / (length - 1)); + } + return window; +} + +// Generate an ideal low-pass FIR filter +double* lowPassFIRFilter(double wc, int length) { + double* filter = malloc(length * sizeof(double)); + if (!filter) { + perror("Memory allocation failed in lowPassFIRFilter"); + exit(EXIT_FAILURE); + } + + int mid = (length - 1) / 2; + for (int n = 0; n < length; n++) { + if (n == mid) { + filter[n] = wc / PI; + } else { + filter[n] = sin(wc * (n - mid)) / (PI * (n - mid)); + } + } + return filter; +} + +// Generate an ideal high-pass FIR filter +double* highPassFIRFilter(double wc, int length) { + double* lpf = lowPassFIRFilter(wc, length); + double* hpf = malloc(length * sizeof(double)); + if (!hpf) { + perror("Memory allocation failed in highPassFIRFilter"); + exit(EXIT_FAILURE); + } + for (int i = 0; i < length; i++) { + hpf[i] = -lpf[i]; + } + int mid = (length - 1) / 2; + hpf[mid] += 1.0; + free(lpf); + return hpf; +} + +// Perform element-wise multiplication +void elementWiseMultiplication(double* output, const double* array1, const double* array2, int length) { + for (int i = 0; i < length; i++) { + output[i] = array1[i] * array2[i]; + } +} + +void FIRFilterResponse(double* output, const double* input, const double* filter, int inputLength) { + int conv_length = inputLength + FILTER_LENGTH - 1; + + // Initialize output to zero + for (int n = 0; n < conv_length; n++) { + output[n] = 0; + } + + // Perform convolution + for (int n = 0; n < conv_length; n++) { + for (int k = 0; k < FILTER_LENGTH; k++) { + if (n - k >= 0 && n - k < inputLength) { + output[n] += input[n - k] * filter[k]; + } + } + } +} + +// Apply gain to a signal +void gain(double* output, const double* input, double gainFactor, int length) { + for (int i = 0; i < length; i++) { + output[i] = input[i] * gainFactor; + } +} + +// Perform element-wise addition +void add(double* output, const double* input1, const double* input2, int length) { + for (int i = 0; i < length; i++) { + output[i] = input1[i] + input2[i]; + } +} + +// Perform element-wise subtraction +void sub(double* output, const double* input1, const double* input2, int length) { + for (int i = 0; i < length; i++) { + output[i] = input1[i] - input2[i]; + } +} + +// Print an array +void printArray(const double* array, int length) { + for (int i = 0; i < length; i++) { + printf("%f ", array[i]); + } + printf("\n"); +} + +// Get element at index +double getElementAtIndex(const double* array, int index) { + return array[index]; +} + +int main() { + // Step 1: Generate Input Signal + double* input = getRangeOfVector(0, INPUT_LENGTH, 1); + + // Step 2: Define constants + double pi = PI; + double Fs = 8000; + double gainForBass = 2; + double gainForMid = 1.5; + double gainForTreble = 0.8; + + // Step 3: Low-pass filter + double fc = 300; + double wc = 2 * pi * fc / Fs; + double* lpf = lowPassFIRFilter(wc, FILTER_LENGTH); + double* hamming_window = hamming(FILTER_LENGTH); + double* lpf_w = malloc(FILTER_LENGTH * sizeof(double)); + elementWiseMultiplication(lpf_w, lpf, hamming_window, FILTER_LENGTH); + + double* FIRfilterResponseForLpf = malloc(OUTPUT_LENGTH * sizeof(double)); + FIRFilterResponse(FIRfilterResponseForLpf, input, lpf_w, INPUT_LENGTH); + + double* gainWithLpf = malloc(OUTPUT_LENGTH * sizeof(double)); + gain(gainWithLpf, FIRfilterResponseForLpf, gainForBass, OUTPUT_LENGTH); + + // Step 4: High-pass filter + double fc2 = 1500; + double wc2 = 2 * pi * fc2 / Fs; + double* hpf = highPassFIRFilter(wc2, FILTER_LENGTH); + double* hpf_w = malloc(FILTER_LENGTH * sizeof(double)); + elementWiseMultiplication(hpf_w, hpf, hamming_window, FILTER_LENGTH); + + double* FIRfilterResponseForHpf = malloc(OUTPUT_LENGTH * sizeof(double)); + FIRFilterResponse(FIRfilterResponseForHpf, input, hpf_w, INPUT_LENGTH); + + double* gainWithHpf = malloc(OUTPUT_LENGTH * sizeof(double)); + gain(gainWithHpf, FIRfilterResponseForHpf, gainForTreble, OUTPUT_LENGTH); + + // Step 5: Band-pass filter + double* lpf2 = lowPassFIRFilter(wc2, FILTER_LENGTH); + double* lpf2_w = malloc(FILTER_LENGTH * sizeof(double)); + elementWiseMultiplication(lpf2_w, lpf2, hamming_window, FILTER_LENGTH); + + double* bpf_w = malloc(FILTER_LENGTH * sizeof(double)); + sub(bpf_w, lpf2_w, lpf_w, FILTER_LENGTH); + + double* FIRfilterResponseForBpf = malloc(OUTPUT_LENGTH * sizeof(double)); + FIRFilterResponse(FIRfilterResponseForBpf, input, bpf_w, INPUT_LENGTH); + + + + // Apply gain to Band-pass filter response + double* gainWithBpf = malloc(OUTPUT_LENGTH * sizeof(double)); + gain(gainWithBpf, FIRfilterResponseForBpf, gainForTreble, OUTPUT_LENGTH); + + // Compute final audio by summing all filter responses + double* final_audio = malloc(OUTPUT_LENGTH * sizeof(double)); + add(final_audio, gainWithLpf, gainWithHpf, OUTPUT_LENGTH); + add(final_audio, final_audio, gainWithBpf, OUTPUT_LENGTH); + + + // Extract and print the element at index 3 + double final1 = getElementAtIndex(final_audio, 3); + printf("%f\n", final1); + + // Free allocated memory + free(input); + free(lpf); + free(hamming_window); + free(lpf_w); + free(FIRfilterResponseForLpf); + free(gainWithLpf); + free(hpf); + free(hpf_w); + free(FIRfilterResponseForHpf); + free(gainWithHpf); + free(lpf2); + free(lpf2_w); + free(bpf_w); + free(FIRfilterResponseForBpf); + free(gainWithBpf); + free(final_audio); + + return 0; +} + diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/biomedicalSignalProcessing.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/biomedicalSignalProcessing.c new file mode 100644 index 000000000000..7606d2796e61 --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/biomedicalSignalProcessing.c @@ -0,0 +1,250 @@ +#include +#include +#include + +#define PI 3.14159265359 +#define FS 8000 +#define INPUT_LENGTH 2000 +#define FILTER_SIZE 101 +#define MAX_PEAKS 950 + +// Function prototypes +void getRangeOfVector(double* vector, double start, int length, double increment); +void gain(double* output, double* input, double multiplier, int length); +void sine(double* output, double* input, int length); +void add(double* output, double* input1, double* input2, int length); +void sub(double* output, double* input1, double* input2, int length); +void lowPassFIRFilter(double* lpf, double wc, int N); +void hamming(double* window, int length); +void FIRFilterResponse(double* output, double* input, double* filter, int input_length, int filter_length); +double max_signal(double* signal, int length); +void find_peaks(double* peaks, double* input, int length, double height, int distance); +void diff(double* output, double* input, int length); +double mean(double* input, int length); + +int main() { + double fc1 = 1000, fc2 = 7500; + int N = FILTER_SIZE, distance = 950; + + double* input = (double*)malloc(INPUT_LENGTH * sizeof(double)); + getRangeOfVector(input, 0, INPUT_LENGTH, 0.000125); + + double f_sig = 500; + double getMultiplier = 2 * PI * f_sig; + double* getSinDuration = (double*)malloc(INPUT_LENGTH * sizeof(double)); + gain(getSinDuration, input, getMultiplier, INPUT_LENGTH); + + double* clean_sig = (double*)malloc(INPUT_LENGTH * sizeof(double)); + sine(clean_sig, getSinDuration, INPUT_LENGTH); + + double f_noise = 3000; + double* getNoiseSinDuration = (double*)malloc(INPUT_LENGTH * sizeof(double)); + gain(getNoiseSinDuration, input, 2 * PI * f_noise, INPUT_LENGTH); + + double* noise = (double*)malloc(INPUT_LENGTH * sizeof(double)); + sine(noise, getNoiseSinDuration, INPUT_LENGTH); + + double* noise1 = (double*)malloc(INPUT_LENGTH * sizeof(double)); + gain(noise1, noise, 0.5, INPUT_LENGTH); + + double* noisy_sig = (double*)malloc(INPUT_LENGTH * sizeof(double)); + add(noisy_sig, clean_sig, noise1, INPUT_LENGTH); + + // FIR Bandpass Filter + double wc1 = 2 * PI * fc1 / FS; + double wc2 = 2 * PI * fc2 / FS; + + double* lpf1 = (double*)malloc(N * sizeof(double)); + double* lpf2 = (double*)malloc(N * sizeof(double)); + lowPassFIRFilter(lpf1, wc1, N); + lowPassFIRFilter(lpf2, wc2, N); + + double hamming_window[FILTER_SIZE]; + hamming(hamming_window, FILTER_SIZE); + + double* lpf1_w = (double*)malloc(N * sizeof(double)); + double* lpf2_w = (double*)malloc(N * sizeof(double)); + + for (int i = 0; i < N; i++) { + lpf1_w[i] = lpf1[i] * hamming_window[i]; + lpf2_w[i] = lpf2[i] * hamming_window[i]; + } + + double* bpf_w = (double*)malloc(N * sizeof(double)); + sub(bpf_w, lpf2_w, lpf1_w, N); + + int conv_length = INPUT_LENGTH + N - 1; + double* FIRfilterResponseForBpf = (double*)malloc(conv_length * sizeof(double)); + FIRFilterResponse(FIRfilterResponseForBpf, noisy_sig, bpf_w, INPUT_LENGTH, N); + + double max_val = max_signal(FIRfilterResponseForBpf, conv_length); + double height = 0.3 * max_val; + + double* r_peaks = (double*)malloc(MAX_PEAKS * sizeof(double)); + find_peaks(r_peaks, FIRfilterResponseForBpf, conv_length, height, distance); + // Get peak count from last index of r_peaks array (converted to double) + double len_r_peaks = (double)r_peaks[MAX_PEAKS - 1]; // Number of detected peaks as double + double last_peaks_index = len_r_peaks - 1.0; // Last peak index as double + + double* diff_val = (double*)malloc((int)len_r_peaks * sizeof(double)); + diff(diff_val, r_peaks, (int)len_r_peaks); + + +// Compute peaks_count - 1 +double peaks_count_minus_one = len_r_peaks - 1.0; + +// Compute mean of peak differences +double diff_mean = mean(diff_val, (int)peaks_count_minus_one); + +// Compute heart rate +double avg_hr = (60.0 * FS) / diff_mean; + +printf("%f", avg_hr); + + + // Free memory + free(diff_val); + free(input); + free(getSinDuration); + free(clean_sig); + free(getNoiseSinDuration); + free(noise); + free(noise1); + free(noisy_sig); + free(lpf1); + free(lpf2); + free(lpf1_w); + free(lpf2_w); + free(bpf_w); + free(FIRfilterResponseForBpf); + free(r_peaks); + + return 0; +} + +// Function implementations +void getRangeOfVector(double* vector, double start, int length, double increment) { + for (int i = 0; i < length; i++) { + vector[i] = start + i * increment; + } +} + +void gain(double* output, double* input, double multiplier, int length) { + for (int i = 0; i < length; i++) { + output[i] = input[i] * multiplier; + } +} + +void sine(double* output, double* input, int length) { + for (int i = 0; i < length; i++) { + output[i] = sin(input[i]); + } +} + +void add(double* output, double* input1, double* input2, int length) { + for (int i = 0; i < length; i++) { + output[i] = input1[i] + input2[i]; + } +} + +void sub(double* output, double* input1, double* input2, int length) { + for (int i = 0; i < length; i++) { + output[i] = input1[i] - input2[i]; + } +} + +void hamming(double* window, int length) { + for (int i = 0; i < length; i++) { + window[i] = 0.54 - 0.46 * cos(2 * PI * i / (length - 1)); + } +} + +void lowPassFIRFilter(double* lpf, double wc, int N) { + int mid = (N - 1) / 2; + for (int n = 0; n < N; n++) { + if (n == mid) { + lpf[n] = wc / PI; + } else { + double x = wc * (n - mid); + lpf[n] = (wc / PI) * (sin(x) / x); + } + } +} + +// Perform full convolution for FIR filtering +void FIRFilterResponse(double* output, double* input, double* filter, int input_length, int filter_length) { + int conv_length = input_length + filter_length - 1; + + // Initialize output to zero + for (int n = 0; n < conv_length; n++) { + output[n] = 0; + } + + // Perform full convolution + for (int n = 0; n < conv_length; n++) { + for (int k = 0; k < filter_length; k++) { + if (n - k >= 0 && n - k < input_length) { + output[n] += input[n - k] * filter[k]; + } + } + } +} + + +double max_signal(double* signal, int length) { + double max = signal[0]; + for (int i = 1; i < length; i++) { + if (signal[i] > max) { + max = signal[i]; + } + } + return max; +} + +// Find peaks in a signal based on a threshold and minimum distance +void find_peaks(double* peaks, double* input, int length, double height, int distance) { + int peakCount = 0; + + // Initialize peaks array with -1 (default no peaks) + for (int i = 0; i < MAX_PEAKS; i++) { + peaks[i] = -1; + } + + for (int i = 1; i < length - 1; i++) { + if (input[i] > input[i - 1] && input[i] > input[i + 1] && input[i] >= height) { + // If it's the first peak, store it + if (peakCount == 0) { + peaks[peakCount++] = i; + } else { + // Ensure minimum distance between peaks + if (i - (int)peaks[peakCount - 1] >= distance) { + peaks[peakCount++] = i; + } + } + + // Stop if max peaks reached + if (peakCount >= MAX_PEAKS - 1) { + break; + } + } + } + + // Store peak count at the last index + peaks[MAX_PEAKS - 1] = peakCount; +} + + + +void diff(double* output, double* input, int length) { + for (int i = 0; i < length - 1; i++) { + output[i] = (double)(input[i + 1] - input[i]); + } +} + +double mean(double* input, int length) { + double sum = 0; + for (int i = 0; i < length; i++) { + sum += input[i]; + } + return sum / length; +} \ No newline at end of file diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/digitalModulation.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/digitalModulation.c new file mode 100644 index 000000000000..a464f139cac2 --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/digitalModulation.c @@ -0,0 +1,129 @@ +#include +#include +#include +#include +#include + +#define PI 3.14159265359 +#define INPUT_LENGTH 100 + +void getRangeOfVector(double* vector, double start, int length, double increment) { + for (int i = 0; i < length; i++) { + vector[i] = start + i * increment; + } +} + +void gain(double* output, const double* input, double gainFactor, int length) { + for (int i = 0; i < length; i++) { + output[i] = input[i] * gainFactor; + } +} + +void thresholdUp(double* output, const double* input, double threshold, double low_value) { + for (int i = 0; i < INPUT_LENGTH; i++) { + output[i] = (input[i] >= threshold) ? 1.0 : low_value; + } +} + +void qam_modulate_real(double* symbols_real, double* binary_sig) { + for (int i = 0; i < INPUT_LENGTH; i += 2) { + double bit1 = binary_sig[i]; + double bit2 = binary_sig[i + 1]; + + if (bit1 == 0.0 && bit2 == 0.0) + symbols_real[i / 2] = -1.0; + else if (bit1 == 0.0 && bit2 == 1.0) + symbols_real[i / 2] = -1.0; + else if (bit1 == 1.0 && bit2 == 0.0) + symbols_real[i / 2] = 1.0; + else if (bit1 == 1.0 && bit2 == 1.0) + symbols_real[i / 2] = 1.0; + } +} + +void qam_modulate_imag(double* symbols_imag, double* binary_sig) { + for (int i = 0; i < INPUT_LENGTH; i += 2) { + double bit1 = binary_sig[i]; + double bit2 = binary_sig[i + 1]; + + if (bit1 == 0.0 && bit2 == 0.0) + symbols_imag[i / 2] = -1.0; + else if (bit1 == 0.0 && bit2 == 1.0) + symbols_imag[i / 2] = 1.0; + else if (bit1 == 1.0 && bit2 == 0.0) + symbols_imag[i / 2] = -1.0; + else if (bit1 == 1.0 && bit2 == 1.0) + symbols_imag[i / 2] = 1.0; + } +} + +void qam_demodulate(double* decoded_data, double* symbols_real, double* symbols_imag) { + for (int i = 0; i < INPUT_LENGTH / 2; i++) { + double real = symbols_real[i]; + double imag = symbols_imag[i]; + + if (real == -1.0 && imag == -1.0) { + decoded_data[2 * i] = 0.0; + decoded_data[2 * i + 1] = 0.0; + } else if (real == -1.0 && imag == 1.0) { + decoded_data[2 * i] = 0.0; + decoded_data[2 * i + 1] = 1.0; + } else if (real == 1.0 && imag == -1.0) { + decoded_data[2 * i] = 1.0; + decoded_data[2 * i + 1] = 0.0; + } else if (real == 1.0 && imag == 1.0) { + decoded_data[2 * i] = 1.0; + decoded_data[2 * i + 1] = 1.0; + } + } +} + +int main() { + srand(time(NULL)); // Seed random number generator + + // Step 1: Generate Input Signal + double* input = (double*)malloc(sizeof(double) * INPUT_LENGTH); + getRangeOfVector(input, 0, INPUT_LENGTH, 0.000125); + + // Step 2: Generate clean signal + double f_sig = 500; + double getMultiplier = 2 * PI * f_sig; + double* getSinDuration = (double*)malloc(sizeof(double) * INPUT_LENGTH); + gain(getSinDuration, input, getMultiplier, INPUT_LENGTH); + + double* clean_sig = (double*)malloc(sizeof(double) * INPUT_LENGTH); + for (int i = 0; i < INPUT_LENGTH; i++) { + clean_sig[i] = sin(getSinDuration[i]); + } + + // Step 3: Apply thresholdUp to get binary signal (stored in double array) + double* binary_sig = (double*)malloc(sizeof(double) * INPUT_LENGTH); + thresholdUp(binary_sig, clean_sig, 0.4, 0.0); + + // Step 4: Perform QAM modulation + double* modulate_symbol_real = (double*)malloc(sizeof(double) * (INPUT_LENGTH / 2)); + double* modulate_symbol_imag = (double*)malloc(sizeof(double) * (INPUT_LENGTH / 2)); + + qam_modulate_real(modulate_symbol_real, binary_sig); + qam_modulate_imag(modulate_symbol_imag, binary_sig); + + + // Step 5: Perform QAM Demodulation + double* decode_data = (double*)malloc(sizeof(double) * INPUT_LENGTH); + qam_demodulate(decode_data, modulate_symbol_real, modulate_symbol_imag); + + + printf("%f ", decode_data[2]); + + + // Free allocated memory + free(input); + free(getSinDuration); + free(clean_sig); + free(binary_sig); + free(modulate_symbol_real); + free(modulate_symbol_imag); + free(decode_data); + + return 0; +} diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/dtmfDetection.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/dtmfDetection.c new file mode 100644 index 000000000000..a700990ffe56 --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/dtmfDetection.c @@ -0,0 +1,148 @@ +#include +#include +#include +#include + +#define M_PI 3.14159265358979323846 +#define INPUT_LENGTH 1000 + +void dftReal(double* real, double* input, int length) { + for (int k = 0; k < length; k++) { + real[k] = 0; + for (int n = 0; n < length; n++) { + double angle = 2 * M_PI * k * n / length; + real[k] += input[n] * cos(angle); + } + } +} + +void dftImag(double* imag, double* input, int length) { + for (int k = 0; k < length; k++) { + imag[k] = 0; + for (int n = 0; n < length; n++) { + double angle = 2 * M_PI * k * n / length; + imag[k] -= input[n] * sin(angle); + } + } +} + +void generateDtmf(double* dtmf_tone, int digit, double duration, int fs) { + double freqPairs[10][2] = { + {941, 1336}, {697, 1209}, {697, 1336}, {697, 1477}, + {770, 1209}, {770, 1336}, {770, 1477}, {852, 1209}, + {852, 1336}, {852, 1477} + }; + + double f1 = freqPairs[digit][0]; + double f2 = freqPairs[digit][1]; + int N = fs * duration; + + for (int i = 0; i < N; i++) { + double t = (double)i / fs; + dtmf_tone[i] = 10* sin(2 * M_PI * f1 * t) + sin(2 * M_PI * f2 * t); + } +} + +void findDominantPeaks(double* frequencies, double* magnitudes, int fft_size, double* peaks) { + double max1 = 0.0, max2 = 0.0; + double freq1 = 0.0, freq2 = 0.0; + + for (int i = 0; i < fft_size; i++) { + double currentFreq = frequencies[i]; + double currentMag = magnitudes[i]; + + // Check if frequency is positive + if (currentFreq >= 0.0) { + // Compare current magnitude with max1 + if (currentMag > max1) { + // Update max2 and freq2 with previous max1 and freq1 + max2 = max1; + freq2 = freq1; + // Update max1 and freq1 with current values + max1 = currentMag; + freq1 = currentFreq; + } else if (currentMag > max2) { + // Update max2 and freq2 with current values + max2 = currentMag; + freq2 = currentFreq; + } + } + // No update for negative frequencies + } + + // Compare freq1 and freq2 to determine the order + if (freq1 < freq2) { + peaks[0] = freq1; + peaks[1] = freq2; + } else { + peaks[0] = freq2; + peaks[1] = freq1; + } +} + +// Function to recover the DTMF digit from frequency peaks +int recoverDTMFDigit(double* peaks, const double freqPairs[10][2], int peak_count) { + for (int i = 0; i < 10; i++) { + double f1 = freqPairs[i][0]; + double f2 = freqPairs[i][1]; + + if ((fabs(peaks[0] - f1) < 10 && fabs(peaks[1] - f2) < 10) || + (fabs(peaks[0] - f2) < 10 && fabs(peaks[1] - f1) < 10)) { + return i; // Digit found + } + } + return -1; // No match found +} + + +int main() { + int digit = 8; + int fs = 8192; + double duration = (double)INPUT_LENGTH / fs; + int N = fs * duration; + + double* dtmf_tone = (double*)malloc(N * sizeof(double)); + generateDtmf(dtmf_tone, digit, duration, fs); + + double* fft_real = (double*)malloc(N * sizeof(double)); + double* fft_imag = (double*)malloc(N * sizeof(double)); + + dftReal(fft_real, dtmf_tone, N); + dftImag(fft_imag, dtmf_tone, N); + + double* magnitudes = (double*)malloc(N * sizeof(double)); + for (int i = 0; i < N; i++) { + magnitudes[i] = sqrt(fft_real[i] * fft_real[i] + fft_imag[i] * fft_imag[i]); + } + + double* frequencies = (double*)malloc(N * sizeof(double)); + for (int i = 0; i < N; i++) { + magnitudes[i] = sqrt(fft_real[i] * fft_real[i] + fft_imag[i] * fft_imag[i]); + if (i <= N / 2) { + frequencies[i] = (double)i * fs / N; + } else { + frequencies[i] = ((double)i - N) * fs / N; + } + } + + double peaks[2]; + findDominantPeaks(frequencies, magnitudes, N, peaks); + printf("%f %f\t", peaks[0], peaks[1]); + + double freqPairs[10][2] = { + {941, 1336}, {697, 1209}, {697, 1336}, {697, 1477}, + {770, 1209}, {770, 1336}, {770, 1477}, {852, 1209}, + {852, 1336}, {852, 1477} + }; + + double recovered_digit = recoverDTMFDigit(peaks, freqPairs, 10); + printf("%f", recovered_digit); + + free(dtmf_tone); + free(fft_real); + free(fft_imag); + free(magnitudes); + free(frequencies); + + return 0; +} \ No newline at end of file diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/echoCancellation.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/echoCancellation.c new file mode 100644 index 000000000000..5836e7d81b4c --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/echoCancellation.c @@ -0,0 +1,133 @@ +#include +#include +#include +#include + +#define PI 3.14159265359 +#define INPUT_LENGTH 1000 + +// Function to generate a range of values +void getRangeOfVector(double* vector, double start, int length, double increment) { + for (int i = 0; i < length; i++) { + vector[i] = start + i * increment; + } +} + +// Function to apply gain (multiplier) to a signal +void gain(double* output, double* input, double multiplier, int length) { + for (int i = 0; i < length; i++) { + output[i] = input[i] * multiplier; + } +} + +// Function to compute the sine of each element in the input array +void sine(double* output, double* input, int length) { + for (int i = 0; i < length; i++) { + output[i] = sin(input[i]); + } +} + +// Function to add two signals element-wise +void add(double* output, double* input1, double* input2, int length) { + for (int i = 0; i < length; i++) { + output[i] = input1[i] + input2[i]; + } +} + +// Function to delay the signal by a certain number of samples +void delay(double* input, double* output, int delaySamples, int length) { + for (int i = 0; i < length; i++) { + if (i < delaySamples) { + output[i] = 0; // Initial delay period is zeroed + } else { + output[i] = input[i - delaySamples]; + } + } +} + +// LMS filter response function +void lmsFilterResponse(double* output, double* noisy_sig, double* clean_sig, double mu, int filterSize, int length) { + double w[32] = {0}; // Initialize weights to zero + for (int n = 0; n < length; n++) { + double y = 0; + for (int i = 0; i < filterSize; i++) { + if (n - i >= 0) { + y += w[i] * noisy_sig[n - i]; + } + } + double e = clean_sig[n] - y; + for (int i = 0; i < filterSize; i++) { + if (n - i >= 0) { + w[i] += mu * e * noisy_sig[n - i]; + } + } + output[n] = y; + } +} + +void normalize(double *output, double *input, int length) { + double min_val = DBL_MAX; + double max_val = -DBL_MAX; + + // Find min and max values + for (int i = 0; i < length; i++) { + if (input[i] < min_val) min_val = input[i]; + if (input[i] > max_val) max_val = input[i]; + } + + // Normalize the array + double range = max_val - min_val; + for (int i = 0; i < length; i++) { + output[i] = (input[i] - min_val) / range; + } +} + +int main() { + int fs = 8000; + double step = 1.0 / fs; + + // Allocate memory for vectors + double* input = (double*)malloc(INPUT_LENGTH * sizeof(double)); + double* getSinDuration = (double*)malloc(INPUT_LENGTH * sizeof(double)); + double* clean_sig = (double*)malloc(INPUT_LENGTH * sizeof(double)); + double* noise = (double*)malloc(INPUT_LENGTH * sizeof(double)); + double* noisy_sig = (double*)malloc(INPUT_LENGTH * sizeof(double)); + double* y = (double*)malloc(INPUT_LENGTH * sizeof(double)); + double *normalized_sol = (double *)malloc(INPUT_LENGTH * sizeof(double)); + + // Generate input range + getRangeOfVector(input, 0.0, INPUT_LENGTH, step); + + // Generate clean signal + double f_sig = 500; + gain(getSinDuration, input, 2 * PI * f_sig, INPUT_LENGTH); + + sine(clean_sig, getSinDuration, INPUT_LENGTH); + + // Generate noise signal with a delay of 2 samples + delay(clean_sig, noise, 2, INPUT_LENGTH); + + // Create noisy signal by adding noise to clean signal + add(noisy_sig, clean_sig, noise, INPUT_LENGTH); + + // Apply LMS filter + double mu = 0.01; + int filterSize = 32; + + lmsFilterResponse(y, noisy_sig, clean_sig, mu, filterSize, INPUT_LENGTH); + normalize(normalized_sol, y, INPUT_LENGTH); + + + printf("%f", normalized_sol[5]); + + + // Free allocated memory + free(input); + free(getSinDuration); + free(clean_sig); + free(noise); + free(noisy_sig); + free(y); + free(normalized_sol); + return 0; +} \ No newline at end of file diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/getSize.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/getSize.py new file mode 100644 index 000000000000..8e22146721d0 --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/getSize.py @@ -0,0 +1,149 @@ +import os +import subprocess +import pandas as pd + +# The script does the following +# Input : filename.c +# Output : TimeOfExecution for different IP sizes : +# Steps to run: +# Open a terminal at the path of the script -- +# Run: python ScriptForCases.c #3.11 validated + +# Pseudo-code: +# Iterate for all the input-size & update the input value in file +# Update logic -- change the 2nd parameter of line: var c = getRangeOfVector(init , Count, StepSize) +# Run the respective commands on the file + +# Path to the input file +# Apps = "lowPassFIRFilterDesign.c", "noisecancelling.c" , "echocancelling.c", "hearingAid.c", "audioEqualizer.c", "vibrationAnalysis.c", "underWaterCommunication.c", "voiceActivityDetection.c", "signalSmoothing", "targetDetection", "biomedicalSignalProcessing", "periodogram2Conv", "spaceCommunication", "dtmfDetection" +input_files = ["audioCompression.c", "biomedicalSignalProcessing.c", "dtmfDetection.c", "lowPassFIRFilterDesign.c", "noisecancelling.c", \ +"radarSignalProcessing.c", "signalSmoothing.c", "speakerIdentification.c", "targetDetection.c", "vibrationAnalysis.c", "audioEqualizer.c", \ +"digitalModulation.c", "echocancelling.c", "hearingAid.c", "lowPassFull.c", "periodogram2Conv1.c", "spaceCommunication.c", "spectralAnalysis.c", \ +"underWaterCommunication.c", "voiceActivityDetection.c"] +data = [] + +for input_file_path in input_files: + BasePathForLLVM = "/home/local/ASURITE/megan/ForLLVM/" + OutputScriptPath = "mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/" + # OutputPath = BasePathForLLVM + "mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/Output/" + print(f"Running Application {input_file_path}") + # Construct full output path + OutputPath = os.path.join(BasePathForLLVM, OutputScriptPath, "Output") + + # Check if the Output folder exists, create it if it doesn't + if not os.path.exists(OutputPath): + os.makedirs(OutputPath) + + # Now OutputPath is ready for use + print("InputPath:{}".format(BasePathForLLVM)) + print(f"OutputPath: {OutputPath}") + # exit() + + # ************ Don't change unless u required + # Define the values dictionary + inputValues = { + "10": 10, + "100": 100, + "1K": 1000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + "10M": 10000000, + "20M": 20000000, + "30M": 30000000, + "40M": 40000000, + "50M": 50000000, + "100M": 100000000, + # "1B": 1000000000 + } + NoOfIterations = 3 + + + # Define the cases + cases = [ + { + "gcc": True, + "clang": False, + "exe": "fileGCCOptExe", + }, + { + "clang": True, + "gcc": False, + "exe": "fileClangOptExe", + }, + ] + + try: + with open(input_file_path, "r") as file: + lines = file.readlines() + except: + continue + + print("", end="\t") + + for case in cases: + print(f"{case['exe']}", end="\t") + + size_test = {"100M": 100000000} + for key, value in size_test.items(): + # Update the specific line in the file + # print("Updating for {}".format(value)) + print("\n{}".format(key), end="\t") + with open(input_file_path, "w") as file: + for line in lines: + if line.strip().startswith("#define INPUT_LENGTH"): + updated_line = f"#define INPUT_LENGTH {value}\n" + file.write(updated_line) + else: + file.write(line) + + for case in cases: + + test_size = 0 + gcc_flag = ["O3", "Os"] + clang_flag = ["O3", "Oz"] + if case["gcc"]: + command = f"gcc -{gcc_flag[test_size]} -o {OutputPath}/{case['exe']} {input_file_path} -lm", # -Os + if case["clang"]: + command = f"clang-17 -{clang_flag[test_size]} {input_file_path} -o {OutputPath}/{case['exe']} -lm", # -Oz + + result = subprocess.run(command, shell=True, capture_output=True, text=True) + + command2 = f"size ./Output/{case['exe']}" + + # Execute the command + try: + result = subprocess.run( + command2, + shell=True, + capture_output=True, text=True + ) + + output_parts = result.stdout.splitlines() + if len(output_parts) > 1: + size_data = output_parts[1].split() + + data.append({ + "filename": input_file_path, + # "input size" : key, + "opt": case['exe'], + # "text": size_data[0], + # "data": size_data[1], + # "bss": size_data[2], + # "dec": size_data[3], + # "hex": size_data[4], + "total": sum(map(int, size_data[:4])) + }) + except subprocess.CalledProcessError as exc: + print( + f"Process failed because did not return a successful return code. " + f"Returned {exc.returncode}\n{exc}" + ) + + df = pd.DataFrame(data) + + df.to_csv("codesize.csv", index=False) \ No newline at end of file diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/hearingAid.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/hearingAid.c new file mode 100644 index 000000000000..21657587aec4 --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/hearingAid.c @@ -0,0 +1,104 @@ +#include +#include +#include + +#define PI 3.14159265359 +#define INPUT_LENGTH 10 +#define FILTER_LENGTH 32 + +// Function to generate a range of values +void getRangeOfVector(double* vector, double start, int length, double increment) { + for (int i = 0; i < length; i++) { + vector[i] = start + i * increment; + } +} + +// Function to apply gain (multiplier) to a signal +void gain(double* output, double* input, double multiplier, int length) { + for (int i = 0; i < length; i++) { + output[i] = input[i] * multiplier; + } +} + +// Function to compute the sine of each element in the input array +void sine(double* output, double* input, int length) { + for (int i = 0; i < length; i++) { + output[i] = sin(input[i]); + } +} + +// Function to add two signals element-wise +void add(double* output, double* input1, double* input2, int length) { + for (int i = 0; i < length; i++) { + output[i] = input1[i] + input2[i]; + } +} + +// Corrected LMS filter response function +void lmsFilterResponse(double* y, double* noisy_sig, double* clean_sig, double mu, int filterSize, int length) { + double w[FILTER_LENGTH] = {0}; // Initialize weights to zero + for (int n = 0; n < length; n++) { + y[n] = 0; + for (int i = 0; i < filterSize; i++) { + if (n - i >= 0) { + y[n] += w[i] * noisy_sig[n - i]; + } + } + double e = clean_sig[n] - y[n]; + for (int i = 0; i < filterSize; i++) { + if (n - i >= 0) { + w[i] += mu * e * noisy_sig[n - i]; + } + } + + } +} + +int main() { + int fs = 8000; + double step = 1.0 / fs; + + // Allocate memory for vectors + double input[INPUT_LENGTH]; + double getSinDuration[INPUT_LENGTH]; + double clean_sig[INPUT_LENGTH]; + double getNoiseSinDuration[INPUT_LENGTH]; + double noise[INPUT_LENGTH]; + double noise1[INPUT_LENGTH]; + double noisy_sig[INPUT_LENGTH]; + double y[INPUT_LENGTH]; + double sol[INPUT_LENGTH]; + + // Generate input range + getRangeOfVector(input, 0.0, INPUT_LENGTH, step); + + // Generate clean signal + double f_sig = 500; + gain(getSinDuration, input, 2 * PI * f_sig, INPUT_LENGTH); + sine(clean_sig, getSinDuration, INPUT_LENGTH); + + // Generate noise signal with frequency of 3000 Hz + double f_noise = 3000; + gain(getNoiseSinDuration, input, 2 * PI * f_noise, INPUT_LENGTH); + sine(noise, getNoiseSinDuration, INPUT_LENGTH); + + + gain(noise1, noise, 0.5, INPUT_LENGTH); + + // Create noisy signal by adding noise to clean signal + add(noisy_sig, clean_sig, noise1, INPUT_LENGTH); + + + double mu = 0.01; + lmsFilterResponse(y, noisy_sig, clean_sig, mu, FILTER_LENGTH, INPUT_LENGTH); + + + double G1 = 123; + gain(sol, y, G1, INPUT_LENGTH); + + + printf("%f\n", sol[3]); + + + return 0; +} \ No newline at end of file diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/lowPassFiltering.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/lowPassFiltering.c new file mode 100644 index 000000000000..3903a26caf13 --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/lowPassFiltering.c @@ -0,0 +1,71 @@ +#include +#include +#include + +#define PI 3.14159265359 +#define FS 8000 +#define N 101 +#define INPUT_LENGTH 100 +#define FILTER_LENGTH 200 + +void generate_signal(double *signal, double freq, int length) { + for (int i = 0; i < length; i++) { + signal[i] = sin(2 * PI * freq * i / FS); + } +} + +void generate_lowpass_filter(double *filter, double cutoff_freq) { + double wc = 2 * PI * cutoff_freq / FS; + for (int i = 0; i < N; i++) { + int n = i - (N / 2); + if (n == 0) { + filter[i] = wc / PI; + } else { + filter[i] = sin(wc * n) / (PI * n); + } + // Apply Hamming window + filter[i] *= (0.54 - 0.46 * cos(2 * PI * i / (N - 1))); + } +} + +void apply_fir_filter(double *input, double *output, double *filter) { + for (int i = 0; i < FILTER_LENGTH; i++) { + double sum = 0.0; + for (int j = 0; j < N; j++) { + if (i - j >= 0 && i - j < INPUT_LENGTH) { + sum += input[i - j] * filter[j]; + } + } + output[i] = sum; + } +} + +int main() { + double clean_signal[INPUT_LENGTH]; + double noise_signal[INPUT_LENGTH]; + double noisy_signal[INPUT_LENGTH]; + double fir_filter[N]; + double filtered_signal[FILTER_LENGTH] = {0}; + + // Generate clean signal with frequency 500Hz + generate_signal(clean_signal, 500, INPUT_LENGTH); + + // Generate noise signal with frequency 3000Hz and scale it + generate_signal(noise_signal, 3000, INPUT_LENGTH); + for (int i = 0; i < INPUT_LENGTH; i++) { + noise_signal[i] *= 0.5; + noisy_signal[i] = clean_signal[i] + noise_signal[i]; + } + + // Design low-pass filter with cutoff frequency 1000Hz + generate_lowpass_filter(fir_filter, 1000); + + // Apply FIR filter + apply_fir_filter(noisy_signal, filtered_signal, fir_filter); + + + printf("%f\n", filtered_signal[6]); + + + return 0; +} diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/noiseCancellation.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/noiseCancellation.c new file mode 100644 index 000000000000..d9fdc28e1324 --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/noiseCancellation.c @@ -0,0 +1,140 @@ +#include +#include +#include +#include + +#define INPUT_LENGTH 1000 + +void getRangeOfVector(double *vector, double start, int length, + double increment) { + for (int i = 0; i < length; i++) { + vector[i] = start + i * increment; + } +} + +void gain(double *output, double *input, double multiplier, int length) { + for (int i = 0; i < length; i++) { + output[i] = input[i] * multiplier; + } +} + +void sine(double *output, double *input, int length) { + for (int i = 0; i < length; i++) { + output[i] = sin(input[i]); + } +} + +void add(double *output, double *input1, double *input2, int length) { + for (int i = 0; i < length; i++) { + output[i] = input1[i] + input2[i]; + } +} + +void lmsFilterResponse(double *output, double *noisy_sig, double *clean_sig, + double mu, int filterSize, int length) { + double w[32] = {0}; + for (int n = 0; n < length; n++) { + double y = 0; + for (int i = 0; i < filterSize; i++) { + if (n - i >= 0) { + y += w[i] * noisy_sig[n - i]; + } + } + double e = clean_sig[n] - y; + for (int i = 0; i < filterSize; i++) { + if (n - i >= 0) { + w[i] += mu * e * noisy_sig[n - i]; + } + } + output[n] = y; + } +} + +void normalize(double *output, double *input, int length) { + double min_val = DBL_MAX; + double max_val = -DBL_MAX; + + // Find min and max values + for (int i = 0; i < length; i++) { + if (input[i] < min_val) + min_val = input[i]; + if (input[i] > max_val) + max_val = input[i]; + } + + // Normalize the array + double range = max_val - min_val; + for (int i = 0; i < length; i++) { + output[i] = (input[i] - min_val) / range; + } +} + +int main() { + // Allocate memory dynamically + double *t = (double *)malloc(INPUT_LENGTH * sizeof(double)); + double *getSinDuration = (double *)malloc(INPUT_LENGTH * sizeof(double)); + double *clean_sig = (double *)malloc(INPUT_LENGTH * sizeof(double)); + double *getNoiseSinDuration = (double *)malloc(INPUT_LENGTH * sizeof(double)); + double *noise = (double *)malloc(INPUT_LENGTH * sizeof(double)); + double *noise1 = (double *)malloc(INPUT_LENGTH * sizeof(double)); + double *noisy_sig = (double *)malloc(INPUT_LENGTH * sizeof(double)); + double *y = (double *)malloc(INPUT_LENGTH * sizeof(double)); + double *sol = (double *)malloc(INPUT_LENGTH * sizeof(double)); + double *normalized_sol = (double *)malloc(INPUT_LENGTH * sizeof(double)); + + // Check if memory allocation was successful + if (!t || !getSinDuration || !clean_sig || !getNoiseSinDuration || !noise || + !noise1 || !noisy_sig || !y || !sol || !normalized_sol) { + perror("Memory allocation failed"); + free(t); + free(getSinDuration); + free(clean_sig); + free(getNoiseSinDuration); + free(noise); + free(noise1); + free(noisy_sig); + free(y); + free(sol); + free(normalized_sol); + exit(EXIT_FAILURE); + } + + // Signal processing steps + getRangeOfVector(t, 0, INPUT_LENGTH, 0.000125); + + double f_sig = 500; + double pi = 3.14159265359; + gain(getSinDuration, t, 2 * pi * f_sig, INPUT_LENGTH); + + sine(clean_sig, getSinDuration, INPUT_LENGTH); + + double f_noise = 3000; + gain(getNoiseSinDuration, t, 2 * pi * f_noise, INPUT_LENGTH); + + sine(noise, getNoiseSinDuration, INPUT_LENGTH); + + gain(noise1, noise, 0.5, INPUT_LENGTH); + + add(noisy_sig, clean_sig, noise1, INPUT_LENGTH); + + // LMS filter response + lmsFilterResponse(y, noisy_sig, clean_sig, 0.01, 32, INPUT_LENGTH); + + gain(sol, y, 10, INPUT_LENGTH); + normalize(normalized_sol, sol, INPUT_LENGTH); + + printf("%f\n", normalized_sol[5]); + + // Free allocated memory at the end + free(t); + free(getSinDuration); + free(clean_sig); + free(getNoiseSinDuration); + free(noise); + free(noise1); + free(noisy_sig); + free(y); + free(sol); + free(normalized_sol); + return 0; +} \ No newline at end of file diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/CCode/periodogram2Conv.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/periodogram.c similarity index 52% rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/CCode/periodogram2Conv.c rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/periodogram.c index f1b201ccf8d2..6303ddae7b7b 100644 --- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/CCode/periodogram2Conv.c +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/periodogram.c @@ -1,9 +1,14 @@ #include #include -void getRangeOfVector(double* input, int start, int NoOfElements, double Increment) { - for (int i = 0; i < NoOfElements; i++) { - input[i] = start + i * Increment; +// Define INPUT_LENGTH globally +#define INPUT_LENGTH 500 +#define PI 3.14159265358 + + +void getRangeOfVector(double* vector, double start, int length, double increment) { + for (int i = 0; i < length; i++) { + vector[i] = start + i * increment; } } @@ -14,10 +19,11 @@ void reverseInput(double* output, double* input, int length) { } void FIRFilterResponse(double* output, double* input, double* filter, int length) { - for (int n = 0; n < length; n++) { + int conv_length = 2 * length - 1; + for (int n = 0; n < conv_length; n++) { output[n] = 0; for (int k = 0; k < length; k++) { - if (n - k >= 0) { + if (n - k >= 0 && n - k < length) { output[n] += input[n - k] * filter[k]; } } @@ -28,7 +34,7 @@ void dftReal(double* real, double* input, int length) { for (int k = 0; k < length; k++) { real[k] = 0; for (int n = 0; n < length; n++) { - double angle = 2 * M_PI * k * n / length; + double angle = 2.0 * PI * k * n / length; real[k] += input[n] * cos(angle); } } @@ -38,7 +44,7 @@ void dftImag(double* imag, double* input, int length) { for (int k = 0; k < length; k++) { imag[k] = 0; for (int n = 0; n < length; n++) { - double angle = 2 * M_PI * k * n / length; + double angle = 2.0 * PI * k * n / length; imag[k] -= input[n] * sin(angle); } } @@ -51,27 +57,28 @@ void squareMagnitude(double* output, double* real, double* imag, int length) { } int main() { - int length = 10; - double input[10]; - getRangeOfVector(input, 0, length, 1); + double input[INPUT_LENGTH]; + getRangeOfVector(input, 0.0, INPUT_LENGTH, 1.0); - double reverse_input[10]; - reverseInput(reverse_input, input, length); + double reverse_input[INPUT_LENGTH]; + reverseInput(reverse_input, input, INPUT_LENGTH); - double conv1d[10]; - FIRFilterResponse(conv1d, input, reverse_input, length); + int conv_length = 2 * INPUT_LENGTH - 1; + double conv1d[conv_length]; + FIRFilterResponse(conv1d, input, reverse_input, INPUT_LENGTH); - double fft_real[10]; - double fft_img[10]; - dftReal(fft_real, conv1d, length); - dftImag(fft_img, conv1d, length); + double fft_real[conv_length]; + double fft_img[conv_length]; + dftReal(fft_real, conv1d, conv_length); + dftImag(fft_img, conv1d, conv_length); - double sq[10]; - squareMagnitude(sq, fft_real, fft_img, length); + double sq[conv_length]; + squareMagnitude(sq, fft_real, fft_img, conv_length); + + + printf("%f\n", sq[2]); + - for (int i = 0; i < length; i++) { - printf("%f\n", sq[i]); - } return 0; } diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/radarSignalProcessing.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/radarSignalProcessing.c new file mode 100644 index 000000000000..8b322a2084f8 --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/radarSignalProcessing.c @@ -0,0 +1,309 @@ +#include +#include +#include +#include + +#define PI 3.1415926 +#define INPUT_LENGTH 10 + +// Function prototypes +double* getrangeofvector(double first, int64_t N, double step); +double* beamForm(int antennas, double frequency, double* time, double* weights, int timeDim); +double* abs_array(double* arr, int size); +double* power_profile(double* arr, int size); +double* lowPassFIRFilter(double wc, int N); +double* highPassFIRFilter(double wc, int N); +double* hamming(int N); +double* multiply_arrays(const double* arr1, const double* arr2, int size); +double* subtract_arrays(const double* arr1, const double* arr2, int size); +double* FirFilterResponse(const double *input, int inputLen, const double *filter, int filterLen); + +int main() { + // Parameters + int antennas = 4; + double input_fc = 5; + int N = 101; + int input_length = INPUT_LENGTH; + double fc1 = 1000; + double fc2 = 7500; + double Fs = 8000; + + double* input = getrangeofvector(0, input_length, 0.000125); + double* weights = getrangeofvector(-90, 180, 1); + double* signal = beamForm(antennas, input_fc, input, weights, input_length); + double* b1 = abs_array(signal, input_length); + double* power = power_profile(b1, input_length); + double wc1 = 2 * PI * fc1 / Fs; + double* filter1 = lowPassFIRFilter(wc1, N); + double* filter_hamming_1 = multiply_arrays(filter1, hamming(N), N); + double wc2 = 2 * PI * fc2 / Fs; + double* filter2 = highPassFIRFilter(wc2, N); + double* filter_hamming_2 = multiply_arrays(filter2, hamming(N), N); + double* bpf = subtract_arrays(filter_hamming_2, filter_hamming_1, N); + double* firFilterResponse = FirFilterResponse(power, input_length, bpf, N); + double final = firFilterResponse[10]; + printf("%f", final); + + // for (int i = 0; i < (input_length + N - 1); ++i) { + // printf("%f\t", firFilterResponse[i]); + // } + + // Free allocated memory + free(input); + free(weights); + free(signal); + free(b1); + free(power); + free(filter1); + free(filter2); + free(filter_hamming_1); + free(filter_hamming_2); + free(bpf); + free(firFilterResponse); + return 0; +} + +double* getrangeofvector(double first, int64_t N, double step) { + double* result = (double*)malloc(N * sizeof(double)); + if (result == NULL) { + fprintf(stderr, "Memory allocation failed\n"); + exit(1); + } + + // Initialize the first element + result[0] = first; + + // Calculate the rest of the elements + for (int64_t i = 1; i < N; ++i) { + result[i] = result[i-1] + step; + } + + return result; +} + +double* beamForm(int antennas, double frequency, double* time, double* weights, int timeDim) { + // Allocate space for output + double* output = (double*)malloc(timeDim * sizeof(double)); + if (output == NULL) { + fprintf(stderr, "Memory allocation failed for output\n"); + exit(1); + } + + // Allocate space for internal generated signals + double** signal = (double**)malloc(antennas * sizeof(double*)); + if (signal == NULL) { + fprintf(stderr, "Memory allocation failed for signal\n"); + free(output); + exit(1); + } + + for (int i = 0; i < antennas; i++) { + signal[i] = (double*)malloc(timeDim * sizeof(double)); + if (signal[i] == NULL) { + fprintf(stderr, "Memory allocation failed for signal[%d]\n", i); + for (int j = 0; j < i; j++) { + free(signal[j]); + } + free(signal); + free(output); + exit(1); + } + } + + // Generate input signals + double phase_var = 2 * PI * frequency; + for (int i = 0; i < antennas; i++) { + double iter_args = (i * PI) / 4.0; + for (int j = 0; j < timeDim; j++) { + double sin_body = time[j] * phase_var + iter_args; + signal[i][j] = sin(sin_body); + } + } + + // Beam forming + for (int i = 0; i < timeDim; i++) { + double sum = 0.0; + for (int j = 0; j < antennas; j++) { + sum += signal[j][i] * weights[j]; + } + output[i] = sum; + } + + // Free allocated memory for signal + for (int i = 0; i < antennas; i++) { + free(signal[i]); + } + + free(signal); + + return output; +} + +// Function to calculate absolute values of an array +double* abs_array(double* arr, int size) { + double* result = (double*)malloc(size * sizeof(double)); + + if (result == NULL) { + fprintf(stderr, "Memory allocation failed\n"); + exit(1); + } + + for (int i = 0; i < size; i++) { + result[i] = fabs(arr[i]); + } + + return result; +} + +// Function to calculate power profile (element-wise square) +double* power_profile(double* arr, int size) { + double* result = (double*)malloc(size * sizeof(double)); + + if (result == NULL) { + fprintf(stderr, "Memory allocation failed\n"); + exit(1); + } + + for (int i = 0; i < size; i++) { + result[i] = arr[i] * arr[i]; + } + + return result; +} + +double* lowPassFIRFilter(double wc, int N) { + double* output = (double*)malloc(N * sizeof(double)); + if (output == NULL) { + fprintf(stderr, "Memory allocation failed\n"); + exit(1); + } + + int midIndex = (N - 1) / 2; + double wcByPi = wc / PI; + + // Handle middle point + output[midIndex] = wcByPi; + + // First loop: 0 <= i <= (N-1)/2 - 1 + for (int i = 0; i < midIndex; i++) { + double iMinusMid = i - midIndex; + double sinArg = wc * iMinusMid; + double sinValue = sin(sinArg); + output[i] = sinValue / (PI * iMinusMid); + } + + // Second loop: (N-1)/2 + 1 <= i < N + for (int i = midIndex + 1; i < N; i++) { + double iMinusMid = i - midIndex; + double sinArg = wc * iMinusMid; + double sinValue = sin(sinArg); + output[i] = sinValue / (PI * iMinusMid); + } + + return output; +} + + +double* highPassFIRFilter(double wc, int N) { + double* output = (double*)malloc(N * sizeof(double)); + if (output == NULL) { + fprintf(stderr, "Memory allocation failed\n"); + exit(1); + } + + int midIndex = (N - 1) / 2; + double wcByPi = wc / PI; + + // Handle middle point + output[midIndex] = 1.0 - wcByPi; + + // First loop: 0 <= i <= (N-1)/2 - 1 + for (int i = 0; i < midIndex; i++) { + double iMinusMid = i - midIndex; + double sinArg = wc * iMinusMid; + double sinValue = sin(sinArg); + output[i] = -1.0 * sinValue / (PI * iMinusMid); + } + + // Second loop: (N-1)/2 + 1 <= i < N + for (int i = midIndex + 1; i < N; i++) { + double iMinusMid = i - midIndex; + double sinArg = wc * iMinusMid; + double sinValue = sin(sinArg); + output[i] = -1.0 * sinValue / (PI * iMinusMid); + } + + return output; +} + +double* hamming(int N) { + double* window = (double*)malloc(N * sizeof(double)); + if (window == NULL) { + fprintf(stderr, "Memory allocation failed\n"); + exit(1); + } + + const double a0 = 0.54; + const double a1 = 0.46; + const double twoPi = 2.0 * PI; + + for (int k = 0; k < N; k++) { + double angle = twoPi * k / (N - 1); + window[k] = a0 - a1 * cos(angle); + } + + return window; +} + +double* multiply_arrays(const double* arr1, const double* arr2, int size) { + double* result = (double*)malloc(size * sizeof(double)); + if (result == NULL) { + fprintf(stderr, "Memory allocation failed\n"); + exit(1); + } + + for (int i = 0; i < size; i++) { + result[i] = arr1[i] * arr2[i]; + } + + return result; +} + +double* subtract_arrays(const double* arr1, const double* arr2, int size) { + double* result = (double*)malloc(size * sizeof(double)); + if (result == NULL) { + fprintf(stderr, "Memory allocation failed\n"); + exit(1); + } + + for (int i = 0; i < size; i++) { + result[i] = arr1[i] - arr2[i]; + } + + return result; +} + +double* FirFilterResponse(const double *input, int inputLen, const double *filter, int filterLen) { + int outputLen = inputLen + filterLen - 1; + double *output = (double*)malloc(outputLen * sizeof(double)); + if (output == NULL) { + fprintf(stderr, "Memory allocation failed\n"); + return NULL; + } + + // Initialize output array to zero + for (int i = 0; i < outputLen; i++) { + output[i] = 0.0; + } + + // Perform full convolution + for (int i = 0; i < outputLen; i++) { + for (int k = 0; k < filterLen; k++) { + if (i - k >= 0 && i - k < inputLen) { + output[i] += filter[k] * input[i - k]; + } + } + } + + return output; +} \ No newline at end of file diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/signalSmoothing.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/signalSmoothing.c new file mode 100644 index 000000000000..9a68cc038b7e --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/signalSmoothing.c @@ -0,0 +1,202 @@ +#include +#include +#include + +#define PI 3.14159265359 +#define INPUT_LENGTH 10 +#define SAMPLE_RATE 8000 +#define TIME_INCREMENT 0.000125 +#define WINDOW_SIZE 3 + +// Function declarations +void getRangeOfVector(double* vector, double start, int length, double increment); +void gain(double* output, double* input, double multiplier, int length); +void sine(double* output, double* input, int length); +void sliding_median_filter(double* input, double* output, int length); +void sliding_avg_filter(double* input, double* output, int length); +double min_of_three(double a, double b, double c); +double max_of_three(double a, double b, double c); + +int main() { + double fs = SAMPLE_RATE; + double* input = (double*)malloc(INPUT_LENGTH * sizeof(double)); + if (input == NULL) { + fprintf(stderr, "Memory allocation failed for input\n"); + return 1; + } + + double f_sig = 500; + double getMultiplier = 2 * PI * f_sig; + + getRangeOfVector(input, 0, INPUT_LENGTH, TIME_INCREMENT); + + double* getSinDuration = (double*)malloc(INPUT_LENGTH * sizeof(double)); + if (getSinDuration == NULL) { + fprintf(stderr, "Memory allocation failed for getSinDuration\n"); + free(input); + return 1; + } + gain(getSinDuration, input, getMultiplier, INPUT_LENGTH); + + double* clean_sig = (double*)malloc(INPUT_LENGTH * sizeof(double)); + if (clean_sig == NULL) { + fprintf(stderr, "Memory allocation failed for clean_sig\n"); + free(input); + free(getSinDuration); + return 1; + } + sine(clean_sig, getSinDuration, INPUT_LENGTH); + + double f_noise = 3000; + double* getNoiseSinDuration = (double*)malloc(INPUT_LENGTH * sizeof(double)); + if (getNoiseSinDuration == NULL) { + fprintf(stderr, "Memory allocation failed for getNoiseSinDuration\n"); + free(input); + free(getSinDuration); + free(clean_sig); + return 1; + } + gain(getNoiseSinDuration, input, 2 * PI * f_noise, INPUT_LENGTH); + + double* noise = (double*)malloc(INPUT_LENGTH * sizeof(double)); + if (noise == NULL) { + fprintf(stderr, "Memory allocation failed for noise\n"); + free(input); + free(getSinDuration); + free(clean_sig); + free(getNoiseSinDuration); + return 1; + } + sine(noise, getNoiseSinDuration, INPUT_LENGTH); + + double* noise1 = (double*)malloc(INPUT_LENGTH * sizeof(double)); + if (noise1 == NULL) { + fprintf(stderr, "Memory allocation failed for noise1\n"); + free(input); + free(getSinDuration); + free(clean_sig); + free(getNoiseSinDuration); + free(noise); + return 1; + } + gain(noise1, noise, 0.5, INPUT_LENGTH); + + double* noisy_sig = (double*)malloc(INPUT_LENGTH * sizeof(double)); + if (noisy_sig == NULL) { + fprintf(stderr, "Memory allocation failed for noisy_sig\n"); + free(input); + free(getSinDuration); + free(clean_sig); + free(getNoiseSinDuration); + free(noise); + free(noise1); + return 1; + } + for (int i = 0; i < INPUT_LENGTH; i++) { + noisy_sig[i] = clean_sig[i] + noise1[i]; + } + + double* median = (double*)malloc((INPUT_LENGTH - WINDOW_SIZE + 1) * sizeof(double)); + if (median == NULL) { + fprintf(stderr, "Memory allocation failed for median\n"); + free(input); + free(getSinDuration); + free(clean_sig); + free(getNoiseSinDuration); + free(noise); + free(noise1); + free(noisy_sig); + return 1; + } + sliding_median_filter(noisy_sig, median, INPUT_LENGTH); + + double* average = (double*)malloc((INPUT_LENGTH - WINDOW_SIZE + 1) * sizeof(double)); + if (average == NULL) { + fprintf(stderr, "Memory allocation failed for average\n"); + free(input); + free(getSinDuration); + free(clean_sig); + free(getNoiseSinDuration); + free(noise); + free(noise1); + free(noisy_sig); + free(median); + return 1; + } + sliding_avg_filter(median, average, INPUT_LENGTH - WINDOW_SIZE + 1); + + printf("%f\n", average[3]); + + + // Free allocated memory + free(input); + free(getSinDuration); + free(clean_sig); + free(getNoiseSinDuration); + free(noise); + free(noise1); + free(noisy_sig); + free(median); + free(average); + + return 0; +} + +// Function to generate a range of values +void getRangeOfVector(double* vector, double start, int length, double increment) { + for (int i = 0; i < length; i++) { + vector[i] = start + i * increment; + } +} + +// Function to apply gain (multiplier) to a signal +void gain(double* output, double* input, double multiplier, int length) { + for (int i = 0; i < length; i++) { + output[i] = input[i] * multiplier; + } +} + +// Function to compute the sine of each element in the input array +void sine(double* output, double* input, int length) { + for (int i = 0; i < length; i++) { + output[i] = sin(input[i]); + } +} + +// Function to find the minimum of three values +double min_of_three(double a, double b, double c) { + double min = a; + if (b < min) min = b; + if (c < min) min = c; + return min; +} + +// Function to find the maximum of three values +double max_of_three(double a, double b, double c) { + double max = a; + if (b > max) max = b; + if (c > max) max = c; + return max; +} + +// Function to apply sliding window average filter with kernel size of 3 +void sliding_avg_filter(double* input, double* output, int length) { + int new_length = length - WINDOW_SIZE + 1; + for (int i = 0; i < new_length; i++) { + output[i] = (input[i] + input[i + 1] + input[i + 2]) / 3.0; + } +} + +// Function to apply sliding window median filter with kernel size of 3 +void sliding_median_filter(double* input, double* output, int length) { + int new_length = length - WINDOW_SIZE + 1; + for (int i = 0; i < new_length; i++) { + double a = input[i]; + double b = input[i + 1]; + double c = input[i + 2]; + // Median formula: median = a + b + c - max(a, b, c) - min(a, b, c) + double max_val = max_of_three(a, b, c); + double min_val = min_of_three(a, b, c); + output[i] = a + b + c - max_val - min_val; + } +} \ No newline at end of file diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/spaceCommunication.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/spaceCommunication.c new file mode 100644 index 000000000000..bbf6f1d34b1d --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/spaceCommunication.c @@ -0,0 +1,195 @@ +#include +#include +#include +#include + +#define INPUT_LENGTH 40000 + +double *getRangeOfVector(double start, int length, double increment) { + double *vector = malloc(length * sizeof(double)); + if (!vector) { + perror("Memory allocation failed in getRangeOfVector"); + exit(EXIT_FAILURE); + } + for (int i = 0; i < length; i++) { + vector[i] = start + i * increment; + // printf("%.6f ", vector[i]); + } + return vector; + } + +double *gain(const double *input, int length, double increment) { + double *vector = malloc(length * sizeof(double)); + if (!vector) { + perror("Memory allocation failed in getRangeOfVector"); + exit(EXIT_FAILURE); + } + for (int i = 0; i < length; i++) { + vector[i] = input[i] * increment; + // printf("%.6f ", vector[i]); + } + return vector; + } + +double *Sin(const double *input, int length) { + double *vector = malloc(length * sizeof(double)); + if (!vector) { + perror("Memory allocation failed in getRangeOfVector"); + exit(EXIT_FAILURE); + } + for (int i = 0; i < length; i++) { + vector[i] = sin(input[i]); + // printf("%.6f ", vector[i]); + } + return vector; + } + +double *thresholdUp(const double *input, int length, double threshold, int returnOrignal) { + double *vector = malloc(length * sizeof(double)); + if (!vector) { + perror("Memory allocation failed in getRangeOfVector"); + exit(EXIT_FAILURE); + } + + if (returnOrignal == 0) { + for (int i = 0; i < length; i++) { + if (input[i] >= threshold) { + vector[i] = 1; + } + else { + vector[i] = 0; + } + // printf("%.6f ", vector[i]); + } + } + else { + for (int i = 0; i < length; i++) { + if (input[i] >= threshold) { + vector[i] = input[i]; + } + else { + vector[i] = 0; + } + // printf("%.6f ", vector[i]); + } + } + return vector; + } + +double *space_modulate(const double *input, int length) { + double *vector = malloc(length * sizeof(double)); + if (!vector) { + perror("Memory allocation failed in getRangeOfVector"); + exit(EXIT_FAILURE); + } + for (int i = 0; i < length; i++) { + vector[i] = (input[i] == 1) ? 1 : -1; + // printf("%.6f ", vector[i]); + } + return vector; +} + +double *add_noise(const double *input, int length) { + double *vector = malloc(length * sizeof(double)); + if (!vector) { + perror("Memory allocation failed in getRangeOfVector"); + exit(EXIT_FAILURE); + } + for (int i = 0; i < length; i++) { + double noise = sin(input[i]); + vector[i] = input[i] + noise; + // printf("%.6f ", vector[i]); + } + return vector; +} + +double *space_demodulate(const double *input, int length) { + double *vector = malloc(length * sizeof(double)); + if (!vector) { + perror("Memory allocation failed in getRangeOfVector"); + exit(EXIT_FAILURE); + } + for (int i = 0; i < length; i++) { + vector[i] = (input[i] > 0) ? 1 : 0; + // printf("%.6f ", vector[i]); + } + return vector; +} + +double *error_correction(const double *data, int length) { + double *corrected = malloc(length * sizeof(double)); + if (!corrected) { + perror("Memory allocation failed for corrected"); + exit(EXIT_FAILURE); + } + + int corrected_index = 0; + for (int i = 0; i < length; i += 8) { + int count = 0; + for (int j = 0; j < 8 && (i + j) < length; j++) { // Ensure within bounds + if (data[i + j] == 1) + count++; + } + + if (count % 2 == 0) { + // Copy the original 8-bit chunk if parity is even + for (int j = 0; j < 8 && (i + j) < length; j++) { + corrected[corrected_index + j] = data[i + j]; + // printf("%.6f ", corrected[i]); + } + } else { + // If parity is odd, correct the first bit by setting it to 0 + corrected[corrected_index] = 0; + for (int j = 1; j < 8 && (i + j) < length; j++) { + corrected[corrected_index + j] = data[i + j]; + // printf("%.6f ", corrected[i]); + } + } + corrected_index += 8; + } + return corrected; +} + +void print_array(const double *arr, int length) { + for (int i = 0; i < length; i++) { + printf("%.6f", arr[i]); // Print each element with 6 decimal places + if (i < length - 1) { + printf(", "); // Add comma between elements except the last one + } + } +} + +int main() { + double *input = getRangeOfVector(0, INPUT_LENGTH, 0.000125); + if (!input) { + perror("Memory allocation failed for input"); + return EXIT_FAILURE; + } + + double pi = 3.14159265359; + double f_sig = 500; + double getMultiplier = 2 * pi * f_sig; + double threshold = 0.4; + int returnOrignal = 0; + + double *getSinDuration = gain(input, INPUT_LENGTH, getMultiplier); + double *clean_sig = Sin(getSinDuration, INPUT_LENGTH); + double *binary_sig = thresholdUp(clean_sig, INPUT_LENGTH, threshold, returnOrignal); + double *a = space_modulate(binary_sig, INPUT_LENGTH); + double *noisy_signal = add_noise(a, INPUT_LENGTH); + double *b = space_demodulate(noisy_signal, INPUT_LENGTH); + double *e = error_correction(b, INPUT_LENGTH); + printf("%f", e[8]); + + // Free allocated memory + free(input); + free(getSinDuration); + free(clean_sig); + free(binary_sig); + free(a); + free(noisy_signal); + free(b); + free(e); + + return 0; +} \ No newline at end of file diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/speakerIdentification.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/speakerIdentification.c new file mode 100644 index 000000000000..1fb7ff58fc5c --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/speakerIdentification.c @@ -0,0 +1,95 @@ +#include +#include +#include + +#define SAMPLE_RATE 1000 +#define INPUT_LENGTH 12207 +#define DURATION ((double)INPUT_LENGTH / SAMPLE_RATE) +#define CORRELATION_LENGTH (2 * INPUT_LENGTH - 1) + +void generateVoiceSignature(double *signal, double freq1, double freq2) { + for (int i = 0; i < INPUT_LENGTH; i++) { + double t = i / (double)SAMPLE_RATE; + signal[i] = sin(2 * M_PI * freq1 * t) + sin(2 * M_PI * freq2 * t); + } +} + +void correlate(const double *signal1, const double *signal2, double *result) { + for (int lag = 0; lag < CORRELATION_LENGTH; lag++) { + result[lag] = 0; + for (int i = 0; i < INPUT_LENGTH; i++) { + int j = lag - INPUT_LENGTH + 1 + i; + if (j >= 0 && j < INPUT_LENGTH) { + result[lag] += signal1[i] * signal2[j]; + } + } + } +} + +double max(const double *arr, int length) { + double max_value = arr[0]; + for (int i = 1; i < length; i++) { + if (arr[i] > max_value) { + max_value = arr[i]; + } + } + return max_value; +} + +int argmax(const double *arr, int length) { + int max_index = 0; + for (int i = 1; i < length; i++) { + if (arr[i] > arr[max_index]) { + max_index = i; + } + } + return max_index; +} + +int main() { + double *person1 = (double *)malloc(INPUT_LENGTH * sizeof(double)); + double *person2 = (double *)malloc(INPUT_LENGTH * sizeof(double)); + double *person3 = (double *)malloc(INPUT_LENGTH * sizeof(double)); + double *unknown_signal = (double *)malloc(INPUT_LENGTH * sizeof(double)); + double *correlation1 = (double *)malloc(CORRELATION_LENGTH * sizeof(double)); + double *correlation2 = (double *)malloc(CORRELATION_LENGTH * sizeof(double)); + double *correlation3 = (double *)malloc(CORRELATION_LENGTH * sizeof(double)); + + generateVoiceSignature(person1, 100, 200); + generateVoiceSignature(person2, 150, 250); + generateVoiceSignature(person3, 120, 180); + generateVoiceSignature(unknown_signal, 150, 250); + + correlate(person1, unknown_signal, correlation1); + correlate(person2, unknown_signal, correlation2); + correlate(person3, unknown_signal, correlation3); + + double total_maxes[3]; + total_maxes[0] = max(correlation1, CORRELATION_LENGTH); + total_maxes[1] = max(correlation2, CORRELATION_LENGTH); + total_maxes[2] = max(correlation3, CORRELATION_LENGTH); + + double temp2 = total_maxes[0]; + double temp3 = total_maxes[1]; + double temp4 = total_maxes[2]; + + int max_index = argmax(total_maxes, 3); + double max_value = total_maxes[max_index]; + + printf("%d\t", max_index); + printf("%f\t", temp2); + printf("%f\t", max_value); + printf("%f\t", temp3); + printf("%f %f %f\t", total_maxes[0], total_maxes[1], total_maxes[2]); + printf("%f\t", temp4); + + free(person1); + free(person2); + free(person3); + free(unknown_signal); + free(correlation1); + free(correlation2); + free(correlation3); + + return 0; +} diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/spectralAnalysis.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/spectralAnalysis.c new file mode 100644 index 000000000000..1371f761ceb4 --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/spectralAnalysis.c @@ -0,0 +1,81 @@ +#include +#include +#include +#include + +#define INPUT_LENGTH 400 +#define M_PI 3.14159265358979323846 + +double* getRange(double start, int noOfSamples, double increment) { + double* output = malloc(noOfSamples * sizeof(double)); + if (!output) { + perror("Memory allocation failed in getRange"); + exit(EXIT_FAILURE); + } + + for (int i = 0; i < noOfSamples; i++) { + output[i] = start + i * increment; + } + + return output; +} + +void dft(double complex* output, const double* input, int length) { + for (int k = 0; k < length; k++) { + output[k] = 0; + for (int n = 0; n < length; n++) { + double angle = 2 * M_PI * k * n / length; + output[k] += input[n] * cexp(-I * angle); + } + } +} + +void square(double* output, const double* input, int length) { + for (int i = 0; i < length; i++) { + output[i] = input[i] * input[i]; + } +} + +double sum(const double* input, int length) { + double total = 0; + for (int i = 0; i < length; i++) { + total += input[i]; + } + return total; +} + +int main() { + double* input = getRange(0, INPUT_LENGTH, 1); + + double complex* fft = malloc(INPUT_LENGTH * sizeof(double complex)); + if (!fft) { + perror("Memory allocation failed"); + free(input); + return EXIT_FAILURE; + } + + dft(fft, input, INPUT_LENGTH); + + double* sq_abs = malloc(INPUT_LENGTH * sizeof(double)); + if (!sq_abs) { + perror("Memory allocation failed"); + free(input); + free(fft); + return EXIT_FAILURE; + } + + for (int i = 0; i < INPUT_LENGTH; i++) { + sq_abs[i] = creal(fft[i]) * creal(fft[i]) + cimag(fft[i]) * cimag(fft[i]); + } + + double sum_result = sum(sq_abs, INPUT_LENGTH); + double res = sum_result / INPUT_LENGTH; + + printf("%f\n", res); + + free(input); + free(fft); + free(sq_abs); + + return 0; +} \ No newline at end of file diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/targetDetection.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/targetDetection.c new file mode 100644 index 000000000000..0235c5c5933d --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/targetDetection.c @@ -0,0 +1,151 @@ +#include +#include +#include + +#define PI 3.14159265359 +#define FS 1000 +#define INPUT_LENGTH 1000 +#define FILTER_SIZE 20 +#define MAX_PEAKS 50 + +// Function prototypes +void getRangeOfVector(double* vector, double start, int length, double increment); +void gain(double* output, const double* input, double multiplier, int length); +void sine(double* output, const double* input, int length); +void delay(double* output, const double* input, int delaySamples, int length); +void add(double* output, const double* input1, const double* input2, int length); +void lmsFilterResponse(double* y, double* noisy_sig, double* clean_sig, double mu, int filterSize, int length); +void find_peaks(double* peaks, double* input, int length, double height, int distance); +double getElemAtIndx(double* input, int index); + +// Function implementations +void getRangeOfVector(double* vector, double start, int length, double increment) { + for (int i = 0; i < length; i++) { + vector[i] = start + i * increment; + } +} + +void gain(double* output, const double* input, double multiplier, int length) { + for (int i = 0; i < length; i++) { + output[i] = input[i] * multiplier; + } +} + +void sine(double* output, const double* input, int length) { + for (int i = 0; i < length; i++) { + output[i] = sin(input[i]); + } +} + +void delay(double* output, const double* input, int delaySamples, int length) { + for (int i = 0; i < length; i++) { + output[i] = (i < delaySamples) ? 0.0 : input[i - delaySamples]; + } +} + +void add(double* output, const double* input1, const double* input2, int length) { + for (int i = 0; i < length; i++) { + output[i] = input1[i] + input2[i]; + } +} + +void lmsFilterResponse(double* y, double* noisy_sig, double* clean_sig, double mu, int filterSize, int length) { + double w[FILTER_SIZE] = {0}; // Initialize weights to zero + for (int n = 0; n < length; n++) { + y[n] = 0; + for (int i = 0; i < filterSize; i++) { + if (n - i >= 0) { + y[n] += w[i] * noisy_sig[n - i]; + } + } + double e = clean_sig[n] - y[n]; + for (int i = 0; i < filterSize; i++) { + if (n - i >= 0) { + w[i] += mu * e * noisy_sig[n - i]; + } + } + } +} + +void find_peaks(double* peaks, double* input, int length, double height, int distance) { + int peakCount = 0; + + // Initialize peaks array with -1 (default no peaks) + for (int i = 0; i < MAX_PEAKS; i++) { + peaks[i] = -1; + } + + for (int i = 1; i < length - 1; i++) { + if (input[i] > input[i - 1] && input[i] > input[i + 1] && input[i] >= height) { + // If it's the first peak, store it + if (peakCount == 0) { + peaks[peakCount++] = i; + } else { + // Ensure distance between peaks + if (i - peaks[peakCount - 1] >= distance) { + peaks[peakCount++] = i; + } + } + + // Stop if max peaks reached + if (peakCount >= MAX_PEAKS - 1) { + break; + } + } + } + + // Store peak count at the last index + peaks[MAX_PEAKS - 1] = peakCount; +} + +double getElemAtIndx(double* input, int index) { + return input[index]; +} + +int main() { + double pi = PI; + double input[INPUT_LENGTH]; + getRangeOfVector(input, 0, INPUT_LENGTH, 0.000125); + + double getMultiplier = 2 * pi * 10; + double getSinDuration[INPUT_LENGTH]; + gain(getSinDuration, input, getMultiplier, INPUT_LENGTH); + + double sig1[INPUT_LENGTH]; + sine(sig1, getSinDuration, INPUT_LENGTH); + + double getMultiplier2 = 2 * pi * 20; + double getSinDuration2[INPUT_LENGTH]; + gain(getSinDuration2, input, getMultiplier2, INPUT_LENGTH); + + double sinsig2[INPUT_LENGTH]; + sine(sinsig2, getSinDuration2, INPUT_LENGTH); + + double sig2[INPUT_LENGTH]; + gain(sig2, sinsig2, 0.5, INPUT_LENGTH); + + double signal[INPUT_LENGTH]; + add(signal, sig1, sig2, INPUT_LENGTH); + + double noise[INPUT_LENGTH]; + delay(noise, signal, 5, INPUT_LENGTH); + + double noisy_sig[INPUT_LENGTH]; + add(noisy_sig, signal, noise, INPUT_LENGTH); + + double mu = 0.01; + double y[INPUT_LENGTH]; + lmsFilterResponse(y, noisy_sig, signal, mu, FILTER_SIZE, INPUT_LENGTH); + + double peaks[MAX_PEAKS]; + find_peaks(peaks, y, INPUT_LENGTH, 1.0, 50); + + double final1 = getElemAtIndx(peaks, 1); + double final2 = getElemAtIndx(peaks, 0); + + printf("%f\t", final1); + printf("%f", final2); + + + return 0; +} diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/underWaterCommunication.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/underWaterCommunication.c new file mode 100644 index 000000000000..8aff517fa10e --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/underWaterCommunication.c @@ -0,0 +1,128 @@ +#include +#include +#include + +#define PI 3.14159265359 +#define FS 1000 +#define INPUT_LENGTH 50 +#define FILTER_ORDER 5 + +// Function prototypes +void getRangeOfVector(double* vector, double start, int length, double increment); +void gain(double* output, const double* input, double multiplier, int length); +void sine(double* output, const double* input, int length); +void delay(double* output, const double* input, int delaySamples, int length); +void add(double* output, const double* input1, const double* input2, int length); +double lowPassFIRFilter(double wc, int length); +void hamming(double* window, int length); +void FIRFilterResponse(double* output, double* input, double filter, int input_length); +void thresholdUp(double* output, const double* input, double threshold, double defaultValue, int length); +double getElemAtIndx(double* input, int index); + +// Function implementations +void getRangeOfVector(double* vector, double start, int length, double increment) { + for (int i = 0; i < length; i++) { + vector[i] = start + i * increment; + } +} + +void gain(double* output, const double* input, double multiplier, int length) { + for (int i = 0; i < length; i++) { + output[i] = input[i] * multiplier; + } +} + +void sine(double* output, const double* input, int length) { + for (int i = 0; i < length; i++) { + output[i] = sin(input[i]); + } +} + +void delay(double* output, const double* input, int delaySamples, int length) { + for (int i = 0; i < length; i++) { + output[i] = (i < delaySamples) ? 0.0 : input[i - delaySamples]; + } +} + +void add(double* output, const double* input1, const double* input2, int length) { + for (int i = 0; i < length; i++) { + output[i] = input1[i] + input2[i]; + } +} + +double sinc(double x) { + return (fabs(x) < 1e-8) ? 1.0 : sin(x) / x; // Handle division by zero +} + +double lowPassFIRFilter(double wc, int length) { + if (length == 1) { + return wc / PI; + } + return 0.0; +} + +void hamming(double* window, int length) { + for (int i = 0; i < length; i++) { + window[i] = 0.54 - 0.46 * cos(2 * PI * i / (length - 1)); + } +} + +void FIRFilterResponse(double* output, double* input, double filter, int input_length) { + for (int i = 0; i < input_length; i++) { + output[i] = input[i] * filter; // Element-wise multiplication with single value + } +} + +void thresholdUp(double* output, const double* input, double threshold, double defaultValue, int length) { + for (int i = 0; i < length; i++) { + output[i] = (input[i] >= threshold) ? 1 : defaultValue; + } +} + +double getElemAtIndx(double* input, int index) { + return input[index]; +} + +int main() { + double pi = PI; + double input[INPUT_LENGTH]; + getRangeOfVector(input, 0, INPUT_LENGTH, 0.000125); + + double getMultiplier = 2 * pi * 5; + double getSinDuration[INPUT_LENGTH]; + gain(getSinDuration, input, getMultiplier, INPUT_LENGTH); + + double signal[INPUT_LENGTH]; + sine(signal, getSinDuration, INPUT_LENGTH); + + double noise[INPUT_LENGTH]; + delay(noise, signal, 5, INPUT_LENGTH); + + double noisy_sig[INPUT_LENGTH]; + add(noisy_sig, signal, noise, INPUT_LENGTH); + + // Low-pass filter design + double wc = 2 * pi * 1000 / 500; + int N = 5; + + double lpf = lowPassFIRFilter(wc, 1); + + double hamming_window[N]; + hamming(hamming_window, N); + + + double lpf_w = lpf * hamming_window[0]; + + double FIRfilterResponseArray[INPUT_LENGTH]; + FIRFilterResponse(FIRfilterResponseArray, noisy_sig, lpf_w, INPUT_LENGTH); + + double threshold = 0.05; + double GetThresholdReal[INPUT_LENGTH]; + thresholdUp(GetThresholdReal, FIRfilterResponseArray, threshold, 0, INPUT_LENGTH); + + double final1 = getElemAtIndx(GetThresholdReal, 3); + + printf("%f", final1); + + return 0; +} diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/vibrationAnalysis.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/vibrationAnalysis.c new file mode 100644 index 000000000000..a30a3c4f5a13 --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/vibrationAnalysis.c @@ -0,0 +1,179 @@ +#include +#include +#include +#include + +#define PI 3.14159265359 +#define INPUT_LENGTH 10 + +// Function prototypes +double *getRangeOfVector(double start, int length, double increment); +void gain(double *output, const double *input, double multiplier, int length); +void sine(double *output, const double *input, int length); +void add(double *output, const double *input1, const double *input2, + int length); +void delay(double *output, const double *input, int delaySamples, int length); +void dft(double complex *output, const double *input, int length); +void square(double *output, const double *input, int length); +double sum(const double *input, int length); +void threshold(double *output, const double *input, double thresholdValue, + int length); +void sqrt_array(double *output, const double *input, int length); + +// Function implementations +double *getRangeOfVector(double start, int length, double increment) { + double *vector = malloc(length * sizeof(double)); + if (!vector) { + perror("Memory allocation failed in getRangeOfVector"); + exit(EXIT_FAILURE); + } + for (int i = 0; i < length; i++) { + vector[i] = start + i * increment; + } + return vector; +} + +void gain(double *output, const double *input, double multiplier, int length) { + for (int i = 0; i < length; i++) { + output[i] = input[i] * multiplier; + } +} + +void sine(double *output, const double *input, int length) { + for (int i = 0; i < length; i++) { + output[i] = sin(input[i]); + } +} + +void add(double *output, const double *input1, const double *input2, + int length) { + for (int i = 0; i < length; i++) { + output[i] = input1[i] + input2[i]; + } +} + +void delay(double *output, const double *input, int delaySamples, int length) { + for (int i = 0; i < length; i++) { + if (i < delaySamples) { + output[i] = 0; + } else { + output[i] = input[i - delaySamples]; + } + } +} + +void dft(double complex *output, const double *input, int length) { + for (int k = 0; k < length; k++) { + output[k] = 0; + for (int n = 0; n < length; n++) { + double angle = 2 * PI * k * n / length; + output[k] += input[n] * cexp(-I * angle); + } + } +} + +void square(double *output, const double *input, int length) { + for (int i = 0; i < length; i++) { + output[i] = input[i] * input[i]; + } +} + +double sum(const double *input, int length) { + double total = 0; + for (int i = 0; i < length; i++) { + total += input[i]; + } + return total; +} + +void threshold(double *output, const double *input, double thresholdValue, + int length) { + for (int i = 0; i < length; i++) { + if (input[i] >= thresholdValue) { + output[i] = input[i]; + } else { + output[i] = 0; + } + } +} +void sqrt_array(double *output, const double *input, int length) { + for (int i = 0; i < length; i++) { + output[i] = sqrt(input[i]); + } +} + +int main() { + int fs = 1000; + double *input = getRangeOfVector(0, INPUT_LENGTH, 0.000125); + + double getMultiplier = 2 * PI * 50; + double *getSinDuration = malloc(INPUT_LENGTH * sizeof(double)); + gain(getSinDuration, input, getMultiplier, INPUT_LENGTH); + + double *sig1 = malloc(INPUT_LENGTH * sizeof(double)); + sine(sig1, getSinDuration, INPUT_LENGTH); + + double getMultiplier2 = 2 * PI * 120; + double *getSinDuration2 = malloc(INPUT_LENGTH * sizeof(double)); + gain(getSinDuration2, input, getMultiplier2, INPUT_LENGTH); + + double *sinsig2 = malloc(INPUT_LENGTH * sizeof(double)); + sine(sinsig2, getSinDuration2, INPUT_LENGTH); + + double *sig2 = malloc(INPUT_LENGTH * sizeof(double)); + gain(sig2, sinsig2, 0.5, INPUT_LENGTH); + + double *signal = malloc(INPUT_LENGTH * sizeof(double)); + add(signal, sig1, sig2, INPUT_LENGTH); + + double *noise = malloc(INPUT_LENGTH * sizeof(double)); + delay(noise, signal, 5, INPUT_LENGTH); + + double *noisy_sig = malloc(INPUT_LENGTH * sizeof(double)); + add(noisy_sig, signal, noise, INPUT_LENGTH); + + double threshold_value = 2; + + double complex *dft_output = malloc(INPUT_LENGTH * sizeof(double complex)); + dft(dft_output, noisy_sig, INPUT_LENGTH); + + double *fft_real = malloc(INPUT_LENGTH * sizeof(double)); + double *fft_img = malloc(INPUT_LENGTH * sizeof(double)); + for (int i = 0; i < INPUT_LENGTH; i++) { + fft_real[i] = creal(dft_output[i]); + fft_img[i] = cimag(dft_output[i]); + } + + double *sq_abs = malloc(INPUT_LENGTH * sizeof(double)); + double *temp_real = malloc(INPUT_LENGTH * sizeof(double)); + double *temp_img = malloc(INPUT_LENGTH * sizeof(double)); + square(temp_real, fft_real, INPUT_LENGTH); + square(temp_img, fft_img, INPUT_LENGTH); + add(sq_abs, temp_real, temp_img, INPUT_LENGTH); + double *magnitude = malloc(INPUT_LENGTH * sizeof(double)); + sqrt_array(magnitude, sq_abs, INPUT_LENGTH); + double *GetThresholdReal = malloc(INPUT_LENGTH * sizeof(double)); + threshold(GetThresholdReal, magnitude, threshold_value, INPUT_LENGTH); + + printf("%f ", GetThresholdReal[0]); + + // Free allocated memory + free(input); + free(getSinDuration); + free(sig1); + free(getSinDuration2); + free(sinsig2); + free(sig2); + free(signal); + free(noise); + free(noisy_sig); + free(dft_output); + free(fft_real); + free(fft_img); + free(sq_abs); + free(temp_real); + free(temp_img); + free(GetThresholdReal); + + return 0; +} \ No newline at end of file diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/voiceActivityDetection.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/voiceActivityDetection.c new file mode 100644 index 000000000000..423c6f9586bc --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/voiceActivityDetection.c @@ -0,0 +1,134 @@ +#include +#include +#include + +#define PI 3.14159265359 +#define SAMPLE_RATE 1000 +#define INPUT_LENGTH 100 +#define THRESHOLD 0.01 + +// Function prototypes +double* getRangeOfVector(double start, int length, double increment); +void gain(double* output, const double* input, double gainFactor, int length); +void sine(double* output, const double* input, int length); +void delay(double* output, const double* input, int length, int delay_steps); +void add(double* output, const double* input1, const double* input2, int length); +void threshold(double* output, const double* input, double threshold); +double zeroCrossCount(const double* input, int length); +void printArray(const double* array, int length); + +// Generate a range of values +double* getRangeOfVector(double start, int length, double increment) { + double* vector = malloc(length * sizeof(double)); + if (!vector) { + perror("Memory allocation failed in getRangeOfVector"); + exit(EXIT_FAILURE); + } + for (int i = 0; i < length; i++) { + vector[i] = start + i * increment; + } + return vector; +} + +// Apply gain to a signal +void gain(double* output, const double* input, double gainFactor, int length) { + for (int i = 0; i < length; i++) { + output[i] = input[i] * gainFactor; + } +} + +// Compute sine wave signal +void sine(double* output, const double* input, int length) { + for (int i = 0; i < length; i++) { + output[i] = sin(input[i]); + } +} + +// Apply delay to a signal +void delay(double* output, const double* input, int length, int delay_steps) { + for (int i = 0; i < length; i++) { + if (i >= delay_steps) { + output[i] = input[i - delay_steps]; + } else { + output[i] = 0.0; + } + } +} + +// Perform element-wise addition +void add(double* output, const double* input1, const double* input2, int length) { + for (int i = 0; i < length; i++) { + output[i] = input1[i] + input2[i]; + } +} + +// Apply thresholding +void threshold(double* output, const double* input, double threshold) { + for (int i = 0; i < INPUT_LENGTH; i++) { + if (input[i] >= -threshold && input[i] <= threshold) { + output[i] = 0.0; + } else { + output[i] = input[i]; + } + } +} + +// Count zero crossings and return as double +double zeroCrossCount(const double* input, int length) { + double count = 0.0; + for (int i = 1; i < length; i++) { + if ((input[i - 1] > 0 && input[i] < 0) || (input[i - 1] < 0 && input[i] > 0)) { + count += 1.0; // Increment as double + } + } + return count; +} + +// Print an array +void printArray(const double* array, int length) { + for (int i = 0; i < length; i++) { + printf("%f ", array[i]); + } + printf("\n"); +} + +int main() { + // Step 1: Generate Input Vector + double* input = getRangeOfVector(0, INPUT_LENGTH, 0.0125); + + // Step 2: Apply Gain + double* getSinDuration = malloc(INPUT_LENGTH * sizeof(double)); + gain(getSinDuration, input, 2 * PI * 5, INPUT_LENGTH); + + // Step 3: Compute Sine Wave Signal + double* signal = malloc(INPUT_LENGTH * sizeof(double)); + sine(signal, getSinDuration, INPUT_LENGTH); + + // Step 4: Generate Delayed Noise Signal + double* noise = malloc(INPUT_LENGTH * sizeof(double)); + delay(noise, signal, INPUT_LENGTH, 5); + + // Step 5: Compute Noisy Signal + double* noisy_sig = malloc(INPUT_LENGTH * sizeof(double)); + add(noisy_sig, signal, noise, INPUT_LENGTH); + + // Step 6: Apply Thresholding + double* GetThresholdReal = malloc(INPUT_LENGTH * sizeof(double)); + threshold(GetThresholdReal, noisy_sig, THRESHOLD); + + // Step 7: Compute Zero-Crossing Rate (as double) + double zcr = zeroCrossCount(GetThresholdReal, INPUT_LENGTH); + + // Print final result + printf("%f\n", zcr); + + // Free allocated memory + free(input); + free(getSinDuration); + free(signal); + free(noise); + free(noisy_sig); + free(GetThresholdReal); + + return 0; +} diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CountLinesFile.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CountLinesFile.py new file mode 100644 index 000000000000..283069c90136 --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CountLinesFile.py @@ -0,0 +1,145 @@ +import os +import pandas as pd +current_dir = os.path.dirname(os.path.abspath(__file__)) +folderC = os.path.join(current_dir, 'CCode') +folderDSL = os.path.join(current_dir, 'DSP-DSL') # Renamed this folder +folderMatlab = os.path.join(current_dir, 'Matlab') + +os.makedirs('Output', exist_ok=True) + +output_fileC = os.path.join(current_dir, 'Output', 'NoOfLinesInC.txt') +output_fileDSL = os.path.join(current_dir, 'Output', 'NoOfLinesInPython.txt') +output_fileMatlab = os.path.join(current_dir, 'Output', 'NoOfLinesInMatlab.txt') + +def count_non_empty_linesInC(file_path): + with open(file_path, 'r') as file: + lines = file.readlines() + non_empty_code_lines = 0 + in_multiline_comment = False + for line in lines: + stripped_line = line.strip() + if in_multiline_comment: + if '*/' in stripped_line: + in_multiline_comment = False + stripped_line = stripped_line.split('*/', 1)[1] + else: + continue + if stripped_line.startswith('//'): + continue + if '/*' in stripped_line: + if '*/' in stripped_line: + stripped_line = stripped_line.split('/*', 1)[0] + stripped_line.split('*/', 1)[1] + else: + in_multiline_comment = True + stripped_line = stripped_line.split('/*', 1)[0] + if stripped_line: + non_empty_code_lines += 1 + return non_empty_code_lines + +def count_valid_code_lines_in_dsl(file_path): + valid_code_lines = 0 + with open(file_path, 'r') as file: + for line in file: + stripped_line = line.strip() + if stripped_line and not stripped_line.startswith('#'): + valid_code_lines += 1 + return valid_code_lines + +def count_valid_code_lines_in_matlab(file_path): + valid_code_lines = 0 + with open(file_path, 'r') as file: + in_multiline_comment = False + for line in file: + stripped_line = line.strip() + if in_multiline_comment: + if stripped_line.endswith('%}'): + in_multiline_comment = False + continue + if stripped_line.startswith('%{'): + in_multiline_comment = True + continue + if stripped_line and not stripped_line.startswith('%'): + valid_code_lines += 1 + return valid_code_lines + +def count_lines_across_languages(): + line_counts = {} + if os.path.exists(folderC): + for filename in sorted(os.listdir(folderC)): + file_path = os.path.join(folderC, filename) + if os.path.isfile(file_path) and filename.endswith('.c'): + count = count_non_empty_linesInC(file_path) + line_counts[filename] = {'lines_in_c': count, 'lines_in_dsl': 0, 'lines_in_matlab': 0} + + # Count Python files + if os.path.exists(folderDSL): + for filename in sorted(os.listdir(folderDSL)): + file_path = os.path.join(folderDSL, filename) + if os.path.isfile(file_path) and filename.endswith('.py'): + count = count_valid_code_lines_in_dsl(file_path) + if filename in line_counts: + line_counts[filename]['lines_in_dsl'] = count + else: + line_counts[filename] = {'lines_in_c': 0, 'lines_in_dsl': count, 'lines_in_matlab': 0} + + # Count MATLAB files + if os.path.exists(folderMatlab): + for filename in sorted(os.listdir(folderMatlab)): + file_path = os.path.join(folderMatlab, filename) + if os.path.isfile(file_path) and filename.endswith('.m'): + count = count_valid_code_lines_in_matlab(file_path) + if filename in line_counts: + line_counts[filename]['lines_in_matlab'] = count + else: + line_counts[filename] = {'lines_in_c': 0, 'lines_in_dsl': 0, 'lines_in_matlab': count} + + return line_counts + +def create_consolidated_table(): + line_counts = count_lines_across_languages() + + # Create a DataFrame + df = pd.DataFrame.from_dict(line_counts, orient='index') + + # Reset index to make filename a column + df.reset_index(inplace=True) + df.rename(columns={'index': 'filename'}, inplace=True) + + # Reorder columns + df = df[['filename', 'lines_in_dsl', 'lines_in_c', 'lines_in_matlab']] + + # Fill NaN values with 0 + df.fillna(0, inplace=True) + + # Convert line count columns to integers + for col in ['lines_in_dsl', 'lines_in_c', 'lines_in_matlab']: + df[col] = df[col].astype(int) + + return df + +def list_files_and_write_line_counts(folder, output_path, count_function, extension): + files = sorted(os.listdir(folder)) + with open(output_path, 'w') as output: + for filename in files: + file_path = os.path.join(folder, filename) + if os.path.isfile(file_path) and filename.endswith(extension): + line_count = count_function(file_path) + output.write(f"{filename}: \t{line_count}\n") + +if __name__ == "__main__": + # Create the consolidated table + consolidated_table = create_consolidated_table() + + # Save the consolidated table to a CSV file + output_file = os.path.join('Output', 'consolidated_lines_of_code.csv') + consolidated_table.to_csv(output_file, index=False) + + # Display the table + print(consolidated_table) + + # Output file paths + print(f"\nConsolidated table saved to: {output_file}") + + list_files_and_write_line_counts(folderC, output_fileC, count_non_empty_linesInC, '.c') + list_files_and_write_line_counts(folderDSL, output_fileDSL, count_valid_code_lines_in_dsl, '.py') + list_files_and_write_line_counts(folderMatlab, output_fileMatlab, count_valid_code_lines_in_matlab, '.m') \ No newline at end of file diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/lowPassFIRFilterDesign.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/FIRFilterDesign.py similarity index 71% rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/lowPassFIRFilterDesign.py rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/FIRFilterDesign.py index 3be61f2b89b7..40081eeefdf2 100644 --- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/lowPassFIRFilterDesign.py +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/FIRFilterDesign.py @@ -8,7 +8,7 @@ def main() { # var a10 = getRangeOfVector(0, 400, 0.000125); # var orig = sin(a10); - var N = 20000001 ; + var N_input = 101; # for cut-off freq var pi = 3.14159265359; @@ -17,6 +17,7 @@ def main() { var fc3 = 1000; var fc4 = 1200; var Fs = 8000; + var wc1 = 2 * pi * fc1 / Fs; #wc should vary from 0 to pi var wc2 = 2 * pi * fc2 / Fs; var wc3 = 2 * pi * fc3 / Fs; @@ -26,17 +27,17 @@ def main() { # var lpf = lowPassFIRFilter(wc, N); #ideal low -pass filter # var lpf_w = lpf * hamming(N); # var lpf_w2 = FIRFilterHammingOptimized(wc, N); - var hpf = highPassFIRFilter(wc1, N); #ideal high-pass filter - var hpf_w = hpf * hamming(N); + var hpf = highPassFIRFilter(wc1, N_input); #ideal high-pass filter + var hpf_w = hpf * hamming(N_input); - var hpf2 = highPassFIRFilter(wc2, N); #ideal high-pass filter - var hpf_w2 = hpf2 * hamming(N); + var hpf2 = highPassFIRFilter(wc2, N_input); #ideal high-pass filter + var hpf_w2 = hpf2 * hamming(N_input); - var hpf3 = highPassFIRFilter(wc3, N); #ideal high-pass filter - var hpf_w3 = hpf3 * hamming(N); + var hpf3 = highPassFIRFilter(wc3, N_input); #ideal high-pass filter + var hpf_w3 = hpf3 * hamming(N_input); - var hpf4 = highPassFIRFilter(wc4, N); #ideal high-pass filter - var hpf_w4 = hpf4 * hamming(N); + var hpf4 = highPassFIRFilter(wc4, N_input); #ideal high-pass filter + var hpf_w4 = hpf4 * hamming(N_input); # var hpf_w2 = highPassFIRHammingOptimized(wc, N); # print(lpf_w2); var final1 = getElemAtIndx(hpf_w , [6]); diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/HexagonClangResultScript.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/HexagonClangResultScript.py new file mode 100644 index 000000000000..3f26f9ce3a79 --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/HexagonClangResultScript.py @@ -0,0 +1,566 @@ +import os +import subprocess +import time +import sys + + +# The script does the following +# Input : filename.py +# Output : TimeOfExecution for different IP sizes : +# Steps to run: +# Open a terminal at the path of the script -- +# Run: python ScriptForCases.py #3.11 validated + +# Pseudo-code: +# Iterate for all the input-size & update the input value in file +# Update logic -- change the 2nd parameter of line: var c = getRangeOfVector(init , Count, StepSize) +# Run the respective commands on the file + +# Path to the input file +# Apps = "hearingAid.py" , "lowPassFull.py" , " audioCompression.py", "lowPassFIRFilterDesign.py" , "EnergyOfSignal.py", "periodogram2Conv1.py", "audioEqualizer.py", "vibrationAnalysis.py", "signalSmoothing.py", "targetDetection.py", "biomedicalSignalProcessing.py", "spaceCommunication.py", "echocancelling", "noisecancelling.py", "digitalModulation", "underWaterCommunication", "voiceActivityDetection", "radarSignalProcessing", "speakerIdentification" +# input_file_name = "speakerIdentification.py" +input_file_name = sys.argv[1] + + + +BasePathForLLVM = "/home/local/ASURITE/apkhedka/ForLLVM/" +OutputScriptPath = ( + "mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/" +) +input_file_path = BasePathForLLVM + OutputScriptPath + input_file_name + +print(f"Running Application {input_file_path}") +# Construct full output path + +if sys.argv[2]: + OutputPath = os.path.join(BasePathForLLVM, OutputScriptPath, "Output", sys.argv[2]) + +else: + OutputPath = os.path.join(BasePathForLLVM, OutputScriptPath, "Output") + + + +# Check if the Output folder exists, create it if it doesn't +if not os.path.exists(OutputPath): + os.makedirs(OutputPath) + + +# Now OutputPath is ready for use +print("InputPath:{}".format(BasePathForLLVM)) +print(f"OutputPath: {OutputPath}") + +# ************ Don't change unless u required +# Define the values dictionary +inputValues = { + "10": 10, + "100": 100, + "500": 500, + "1K": 1000, + "2K": 2000, + "5K": 5000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + + + #"10M": 10000000, + #"20M": 20000000, + #"30M": 30000000, + #"40M": 40000000, + #"50M": 50000000, + #"100M": 100000000, + # "1B": 1000000000 +} + +if sys.argv[1] == "noiseCancellation.py": + inputValues = { + "10": 10, + "100": 100, + "500": 500, + "1K": 1000, + "2K": 2000, + "5K": 5000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + } + +elif sys.argv[1] == "echoCancellation.py": + inputValues = { + "10": 10, + "100": 100, + "500": 500, + "1K": 1000, + "2K": 2000, + "5K": 5000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + } + +elif sys.argv[1] == "periodogram.py": + inputValues = { + "10": 10, + "100": 100, + "500": 500, + "1K": 1000, + } + +elif sys.argv[1] == "lowPassFiltering.py": + inputValues = { + "10": 10, + "100": 100, + "500": 500, + "1K": 1000, + "2K": 2000, + "5K": 5000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + } + + +elif sys.argv[1] == "hearingAid.py": + inputValues = { + "10": 10, + "100": 100, + "500": 500, + "1K": 1000, + "2K": 2000, + "5K": 5000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + } + +elif sys.argv[1] == "FIRFilterDesign.py": + inputValues = { + "10": 10, + "100": 100, + "500": 500, + "1K": 1000, + "2K": 2000, + "5K": 5000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + } + + +elif sys.argv[1] == "spectralAnalysis.py": + inputValues = { + "10": 10, + "100": 100, + "500": 500, + "1K": 1000, + "2K": 2000, + } + + +elif sys.argv[1] == "audioEqualization.py": + inputValues = { + "10": 10, + "100": 100, + "500": 500, + "1K": 1000, + "2K": 2000, + "5K": 5000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + } + + +elif sys.argv[1] == "audioCompression.py": + inputValues = { + "10": 10, + "100": 100, + "500": 500, + "1K": 1000, + "2K": 2000, + } + + +elif sys.argv[1] == "vibrationAnalysis.py": + inputValues = { + "10": 10, + "100": 100, + "500": 500, + "1K": 1000, + "2K": 2000, + } + + +elif sys.argv[1] == "underWaterCommunication.py": + inputValues = { + "10": 10, + "100": 100, + "500": 500, + "1K": 1000, + "2K": 2000, + "5K": 5000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + } + +elif sys.argv[1] == "voiceActivityDetection.py": + inputValues = { + "10": 10, + "100": 100, + "500": 500, + "1K": 1000, + "2K": 2000, + "5K": 5000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + } + + +elif sys.argv[1] == "signalSmoothing.py": + inputValues = { + "10": 10, + "100": 100, + "500": 500, + "1K": 1000, + "2K": 2000, + "5K": 5000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + } + + + +elif sys.argv[1] == "targetDetection.py": + inputValues = { + "10": 10, + "100": 100, + "500": 500, + "1K": 1000, + "2K": 2000, + "5K": 5000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + } + + +elif sys.argv[1] == "biomedicalSignalProcessing.py": + inputValues = { + "10": 10, + "100": 100, + "500": 500, + "1K": 1000, + "2K": 2000, + "5K": 5000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + } + + +elif sys.argv[1] == "digitalModulation.py": + inputValues = { + "10": 10, + "100": 100, + "500": 500, + "1K": 1000, + "2K": 2000, + "5K": 5000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + } + + + +elif sys.argv[1] == "spaceCommunication.py": + inputValues = { + "10": 10, + "100": 100, + "500": 500, + "1K": 1000, + "2K": 2000, + "5K": 5000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + } + + +elif sys.argv[1] == "radarSignalProcessing.py": + inputValues = { + "10": 10, + "100": 100, + "500": 500, + "1K": 1000, + "2K": 2000, + "5K": 5000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + } + + +elif sys.argv[1] == "speakerIdentification.py": + inputValues = { + "10": 10, + "100": 100, + "500": 500, + "1K": 1000, + "2K": 2000, + "5K": 5000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + } + + +elif sys.argv[1] == "dtmfDetection.py": + inputValues = { + "10": 10, + "100": 100, + "500": 500, + "1K": 1000, + "2K": 2000, + } +NoOfIterations = 1 + +# -------------------------------------------------- +commands_base = [ + # "./dsp1 lowPassFull.py -emit=mlir-affine", + # f"./dsp1 {input_file_path} -emit=llvm", + f"{BasePathForLLVM}/build/bin/dsp1 {input_file_path} -emit=llvm-hexagonv68", + # "clang-17 -O0 file.ll -o fileexe -lm", +] + +# clang = f"{BasePathForLLVM}/build/bin/clang LL_FILE_PATH -O3 -o OUT_FILE_PATH --target=hexagon -mcpu=hexagonv68 -fuse-ld=/local/mnt/workspace/Qualcomm/Hexagon_SDK/6.2.0.1/tools/HEXAGON_Tools/8.8.06/Tools/bin/hexagon-link" + +clang = f"/local/mnt/workspace/Qualcomm/Hexagon_SDK/6.2.0.1/tools/HEXAGON_Tools/8.8.06/Tools/bin/hexagon-clang LL_FILE_PATH -O3 -o OUT_FILE_PATH --target=hexagon -mcpu=hexagonv68 -fuse-ld=/local/mnt/workspace/Qualcomm/Hexagon_SDK/6.2.0.1/tools/HEXAGON_Tools/8.8.06/Tools/bin/hexagon-link" + +# Define the cases +cases = [ + # { + # "affineOpt": False, + # "canonOpt": False, + # "suffix": "fileNoOpt.ll", + # "exe": "fileNoOptExe", + # }, + + #HWISOO: Temporal for debugging + # { + # "affineOpt": False, + # "canonOpt": False, + # "suffix": "fileNoOpt.ll", + # "exe": "fileNoOptExe", + # }, + + { + "affineOpt": True, + "canonOpt": False, + "suffix": "fileAffineOpt.ll", + "exe": "fileAffineOptExe", + }, + # { + # "affineOpt": False, + # "canonOpt": True, + # "suffix": "fileOnlyCanonOpt.ll", + # "exe": "fileOnlyCanonOptExe", + # }, + { + "affineOpt": True, + "canonOpt": True, + "suffix": "fileAffineCanonOpt.ll", + "exe": "fileAffineCanonOptExe", + }, +] + + + +print(input_file_path + " with hexagon clang") + +# Read the input file +with open(input_file_path, "r") as file: + lines = file.readlines() + +print("", end="\t") +for case in cases: + print(f"{case['exe']}", end="\t") + +for key, value in inputValues.items(): + value2 = 1 / value + dur = value / 8192 + print(f"\n{key}", end="\t") + + with open(input_file_path, "r") as file: + lines = file.readlines() + + with open(input_file_path, "w") as file: + for line in lines: + if line.strip().startswith("var input = getRangeOfVector("): + if input_file_name in ["audioCompression.py", "audioEqualization.py", "periodogram.py", "spectralAnalysis.py"]: + updated_line = ( + f"\tvar input = getRangeOfVector(0, {value}, 1);\n" + ) + if input_file_name in ["voiceActivityDetection.py"]: + updated_line = ( + f"\tvar input = getRangeOfVector(0, {value}, 0.125);\n" + ) + else: + updated_line = ( + f"\tvar input = getRangeOfVector(0, {value}, 0.000125);\n" + ) + + file.write(updated_line) + elif line.strip().startswith("var duration ="): + updated_line = f"\tvar duration = {dur};\n" + file.write(updated_line) + elif line.strip().startswith("var N_input ="): + updated_line = f"\tvar N_input = {value+1};\n" + file.write(updated_line) + elif line.strip().startswith("var frequencies = fftfreq"): + updated_line = f"\tvar frequencies = fftfreq({value}, 0.000122);\n" + file.write(updated_line) + else: + file.write(line) + + + + # Iterate through the cases and run the commands + for case in cases: + command_llvm = commands_base[0] + if case["affineOpt"]: + command_llvm += " -affineOpt" + if case["canonOpt"]: + command_llvm += " -canonOpt" + # command_llvm += f" 2> {case['suffix']}" #OutputPath + + + + + ll_file_path = f"{OutputPath}/{case['suffix']}" + command_llvm += f" 2> {OutputPath}/{case['suffix']}" # OutputPath + + out_file_path = ll_file_path.replace(".ll", ".out") + clang_command = clang.replace("LL_FILE_PATH", ll_file_path) + clang_command = clang_command.replace("OUT_FILE_PATH", out_file_path) + + + commands = [ + "rm "+ll_file_path, + "rm "+out_file_path, + command_llvm, + clang_command, + + ] + + # Iterate over each value and perform the necessary operations + for command in commands: + # Run the commands for the current case + result = subprocess.run(command, shell=True, capture_output=True, text=True) + + + + sum_exe_time = 0 + for i in range(0, NoOfIterations): + #NOTE: for simulation environment, we don't need to take care of cachine impact on host + + # The command to be executed + # command2 = "./fileexe" + # Limit execution to a single core + # command2 = "taskset -c 0 ./fileexe" + # command2 = f"taskset -c 0 ./{case['exe']}" #{OutputPath} + # command2 = f"taskset -c 0 ./Output/{case['exe']}" + + command2 = "/local/mnt/workspace/Qualcomm/Hexagon_SDK/6.2.0.1/tools/HEXAGON_Tools/8.8.06/Tools/bin/hexagon-sim --mv68 OUT_FILE_PATH".replace("OUT_FILE_PATH", out_file_path) + + + + # Record the start time + start_time = time.time() + + + pcycle = "failed" + # Execute the command + try: + result = subprocess.run( + command2, + shell=True, + #stdout=subprocess.DEVNULL, + #stdout="test_stdout", + #stderr="test_stderr", + #stderr=subprocess.DEVNULL, + capture_output = True, + text = True, + check=True, + ) + # subprocess.run(command2, shell=True) + + pcycle = result.stderr.split("Pcycles=")[1].replace("\n","").replace(" ","").replace("\t","") + except subprocess.CalledProcessError as exc: + print( + f"Process failed because did not return a successful return code. " + f"Returned {exc.returncode}\n{exc}" + ) + + + + + # Record the end time + end_time = time.time() + + # Calculate the elapsed time + execution_time = end_time - start_time + sum_exe_time = sum_exe_time + execution_time + # print("{}".format(execution_time), end="\t") + avg_exe_time = sum_exe_time / NoOfIterations + # print(pcycle + "/" + "{}".format(round(avg_exe_time, 4)), end="\t") + print(pcycle, end="\t") + + + # print(f"The command took {execution_time} seconds to execute.") diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/HexagonResultScript.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/HexagonResultScript.py new file mode 100644 index 000000000000..0de93a9a3ac3 --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/HexagonResultScript.py @@ -0,0 +1,566 @@ +import os +import subprocess +import time +import sys + + +# The script does the following +# Input : filename.py +# Output : TimeOfExecution for different IP sizes : +# Steps to run: +# Open a terminal at the path of the script -- +# Run: python ScriptForCases.py #3.11 validated + +# Pseudo-code: +# Iterate for all the input-size & update the input value in file +# Update logic -- change the 2nd parameter of line: var c = getRangeOfVector(init , Count, StepSize) +# Run the respective commands on the file + +# Path to the input file +# Apps = "hearingAid.py" , "lowPassFull.py" , " audioCompression.py", "lowPassFIRFilterDesign.py" , "EnergyOfSignal.py", "periodogram2Conv1.py", "audioEqualizer.py", "vibrationAnalysis.py", "signalSmoothing.py", "targetDetection.py", "biomedicalSignalProcessing.py", "spaceCommunication.py", "echocancelling", "noisecancelling.py", "digitalModulation", "underWaterCommunication", "voiceActivityDetection", "radarSignalProcessing", "speakerIdentification" +# input_file_name = "speakerIdentification.py" +input_file_name = sys.argv[1] + + + +BasePathForLLVM = "/home/local/ASURITE/apkhedka/ForLLVM/" +OutputScriptPath = ( + "mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/" +) +input_file_path = BasePathForLLVM + OutputScriptPath + input_file_name + +print(f"Running Application {input_file_path}") +# Construct full output path + +if sys.argv[2]: + OutputPath = os.path.join(BasePathForLLVM, OutputScriptPath, "Output", sys.argv[2]) + +else: + OutputPath = os.path.join(BasePathForLLVM, OutputScriptPath, "Output") + + + +# Check if the Output folder exists, create it if it doesn't +if not os.path.exists(OutputPath): + os.makedirs(OutputPath) + + +# Now OutputPath is ready for use +print("InputPath:{}".format(BasePathForLLVM)) +print(f"OutputPath: {OutputPath}") + +# ************ Don't change unless u required +# Define the values dictionary +inputValues = { + "10": 10, + "100": 100, + "500": 500, + "1K": 1000, + "2K": 2000, + "5K": 5000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + + + #"10M": 10000000, + #"20M": 20000000, + #"30M": 30000000, + #"40M": 40000000, + #"50M": 50000000, + #"100M": 100000000, + # "1B": 1000000000 +} + +if sys.argv[1] == "noiseCancellation.py": + inputValues = { + "10": 10, + "100": 100, + "500": 500, + "1K": 1000, + "2K": 2000, + "5K": 5000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + } + +elif sys.argv[1] == "echoCancellation.py": + inputValues = { + "10": 10, + "100": 100, + "500": 500, + "1K": 1000, + "2K": 2000, + "5K": 5000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + } + +elif sys.argv[1] == "periodogram.py": + inputValues = { + "10": 10, + "100": 100, + "500": 500, + "1K": 1000, + } + +elif sys.argv[1] == "lowPassFiltering.py": + inputValues = { + "10": 10, + "100": 100, + "500": 500, + "1K": 1000, + "2K": 2000, + "5K": 5000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + } + + +elif sys.argv[1] == "hearingAid.py": + inputValues = { + "10": 10, + "100": 100, + "500": 500, + "1K": 1000, + "2K": 2000, + "5K": 5000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + } + +elif sys.argv[1] == "FIRFilterDesign.py": + inputValues = { + "10": 10, + "100": 100, + "500": 500, + "1K": 1000, + "2K": 2000, + "5K": 5000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + } + + +elif sys.argv[1] == "spectralAnalysis.py": + inputValues = { + "10": 10, + "100": 100, + "500": 500, + "1K": 1000, + "2K": 2000, + } + + +elif sys.argv[1] == "audioEqualization.py": + inputValues = { + "10": 10, + "100": 100, + "500": 500, + "1K": 1000, + "2K": 2000, + "5K": 5000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + } + + +elif sys.argv[1] == "audioCompression.py": + inputValues = { + "10": 10, + "100": 100, + "500": 500, + "1K": 1000, + "2K": 2000, + } + + +elif sys.argv[1] == "vibrationAnalysis.py": + inputValues = { + "10": 10, + "100": 100, + "500": 500, + "1K": 1000, + "2K": 2000, + } + + +elif sys.argv[1] == "underWaterCommunication.py": + inputValues = { + "10": 10, + "100": 100, + "500": 500, + "1K": 1000, + "2K": 2000, + "5K": 5000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + } + +elif sys.argv[1] == "voiceActivityDetection.py": + inputValues = { + "10": 10, + "100": 100, + "500": 500, + "1K": 1000, + "2K": 2000, + "5K": 5000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + } + + +elif sys.argv[1] == "signalSmoothing.py": + inputValues = { + "10": 10, + "100": 100, + "500": 500, + "1K": 1000, + "2K": 2000, + "5K": 5000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + } + + + +elif sys.argv[1] == "targetDetection.py": + inputValues = { + "10": 10, + "100": 100, + "500": 500, + "1K": 1000, + "2K": 2000, + "5K": 5000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + } + + +elif sys.argv[1] == "biomedicalSignalProcessing.py": + inputValues = { + "10": 10, + "100": 100, + "500": 500, + "1K": 1000, + "2K": 2000, + "5K": 5000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + } + + +elif sys.argv[1] == "digitalModulation.py": + inputValues = { + "10": 10, + "100": 100, + "500": 500, + "1K": 1000, + "2K": 2000, + "5K": 5000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + } + + + +elif sys.argv[1] == "spaceCommunication.py": + inputValues = { + "10": 10, + "100": 100, + "500": 500, + "1K": 1000, + "2K": 2000, + "5K": 5000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + } + + +elif sys.argv[1] == "radarSignalProcessing.py": + inputValues = { + "10": 10, + "100": 100, + "500": 500, + "1K": 1000, + "2K": 2000, + "5K": 5000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + } + + +elif sys.argv[1] == "speakerIdentification.py": + inputValues = { + "10": 10, + "100": 100, + "500": 500, + "1K": 1000, + "2K": 2000, + "5K": 5000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + } + + +elif sys.argv[1] == "dtmfDetection.py": + inputValues = { + "10": 10, + "100": 100, + "500": 500, + "1K": 1000, + "2K": 2000, + } +NoOfIterations = 1 + +# -------------------------------------------------- +commands_base = [ + # "./dsp1 lowPassFull.py -emit=mlir-affine", + # f"./dsp1 {input_file_path} -emit=llvm", + f"{BasePathForLLVM}/build/bin/dsp1 {input_file_path} -emit=llvm-hexagonv68", + # "clang-17 -O0 file.ll -o fileexe -lm", +] + +clang = f"{BasePathForLLVM}/build/bin/clang LL_FILE_PATH -O3 -o OUT_FILE_PATH --target=hexagon -mcpu=hexagonv68 -fuse-ld=/local/mnt/workspace/Qualcomm/Hexagon_SDK/6.2.0.1/tools/HEXAGON_Tools/8.8.06/Tools/bin/hexagon-link" + +# clang = f"/local/mnt/workspace/Qualcomm/Hexagon_SDK/6.2.0.1/tools/HEXAGON_Tools/8.8.06/Tools/bin/hexagon-clang LL_FILE_PATH -O3 -o OUT_FILE_PATH --target=hexagon -mcpu=hexagonv68 -fuse-ld=/local/mnt/workspace/Qualcomm/Hexagon_SDK/6.2.0.1/tools/HEXAGON_Tools/8.8.06/Tools/bin/hexagon-link" + +# Define the cases +cases = [ + # { + # "affineOpt": False, + # "canonOpt": False, + # "suffix": "fileNoOpt.ll", + # "exe": "fileNoOptExe", + # }, + + #HWISOO: Temporal for debugging + # { + # "affineOpt": False, + # "canonOpt": False, + # "suffix": "fileNoOpt.ll", + # "exe": "fileNoOptExe", + # }, + + { + "affineOpt": True, + "canonOpt": False, + "suffix": "fileAffineOpt.ll", + "exe": "fileAffineOptExe", + }, + # { + # "affineOpt": False, + # "canonOpt": True, + # "suffix": "fileOnlyCanonOpt.ll", + # "exe": "fileOnlyCanonOptExe", + # }, + { + "affineOpt": True, + "canonOpt": True, + "suffix": "fileAffineCanonOpt.ll", + "exe": "fileAffineCanonOptExe", + }, +] + + + +print(input_file_path) + +# Read the input file +with open(input_file_path, "r") as file: + lines = file.readlines() + +print("", end="\t") +for case in cases: + print(f"{case['exe']}", end="\t") + +for key, value in inputValues.items(): + value2 = 1 / value + dur = value / 8192 + print(f"\n{key}", end="\t") + + with open(input_file_path, "r") as file: + lines = file.readlines() + + with open(input_file_path, "w") as file: + for line in lines: + if line.strip().startswith("var input = getRangeOfVector("): + if input_file_name in ["audioCompression.py", "audioEqualization.py", "periodogram.py", "spectralAnalysis.py"]: + updated_line = ( + f"\tvar input = getRangeOfVector(0, {value}, 1);\n" + ) + if input_file_name in ["voiceActivityDetection.py"]: + updated_line = ( + f"\tvar input = getRangeOfVector(0, {value}, 0.125);\n" + ) + else: + updated_line = ( + f"\tvar input = getRangeOfVector(0, {value}, 0.000125);\n" + ) + + file.write(updated_line) + elif line.strip().startswith("var duration ="): + updated_line = f"\tvar duration = {dur};\n" + file.write(updated_line) + elif line.strip().startswith("var N_input ="): + updated_line = f"\tvar N_input = {value+1};\n" + file.write(updated_line) + elif line.strip().startswith("var frequencies = fftfreq"): + updated_line = f"\tvar frequencies = fftfreq({value}, 0.000122);\n" + file.write(updated_line) + else: + file.write(line) + + + + # Iterate through the cases and run the commands + for case in cases: + command_llvm = commands_base[0] + if case["affineOpt"]: + command_llvm += " -affineOpt" + if case["canonOpt"]: + command_llvm += " -canonOpt" + # command_llvm += f" 2> {case['suffix']}" #OutputPath + + + + + ll_file_path = f"{OutputPath}/{case['suffix']}" + command_llvm += f" 2> {OutputPath}/{case['suffix']}" # OutputPath + + out_file_path = ll_file_path.replace(".ll", ".out") + clang_command = clang.replace("LL_FILE_PATH", ll_file_path) + clang_command = clang_command.replace("OUT_FILE_PATH", out_file_path) + + + commands = [ + "rm "+ll_file_path, + "rm "+out_file_path, + command_llvm, + clang_command, + + ] + + # Iterate over each value and perform the necessary operations + for command in commands: + # Run the commands for the current case + result = subprocess.run(command, shell=True, capture_output=True, text=True) + + + + sum_exe_time = 0 + for i in range(0, NoOfIterations): + #NOTE: for simulation environment, we don't need to take care of cachine impact on host + + # The command to be executed + # command2 = "./fileexe" + # Limit execution to a single core + # command2 = "taskset -c 0 ./fileexe" + # command2 = f"taskset -c 0 ./{case['exe']}" #{OutputPath} + # command2 = f"taskset -c 0 ./Output/{case['exe']}" + + command2 = "/local/mnt/workspace/Qualcomm/Hexagon_SDK/6.2.0.1/tools/HEXAGON_Tools/8.8.06/Tools/bin/hexagon-sim --mv68 OUT_FILE_PATH".replace("OUT_FILE_PATH", out_file_path) + + + + # Record the start time + start_time = time.time() + + + pcycle = "failed" + # Execute the command + try: + result = subprocess.run( + command2, + shell=True, + #stdout=subprocess.DEVNULL, + #stdout="test_stdout", + #stderr="test_stderr", + #stderr=subprocess.DEVNULL, + capture_output = True, + text = True, + check=True, + ) + # subprocess.run(command2, shell=True) + + pcycle = result.stderr.split("Pcycles=")[1].replace("\n","").replace(" ","").replace("\t","") + except subprocess.CalledProcessError as exc: + print( + f"Process failed because did not return a successful return code. " + f"Returned {exc.returncode}\n{exc}" + ) + + + + + # Record the end time + end_time = time.time() + + # Calculate the elapsed time + execution_time = end_time - start_time + sum_exe_time = sum_exe_time + execution_time + # print("{}".format(execution_time), end="\t") + avg_exe_time = sum_exe_time / NoOfIterations + # print(pcycle + "/" + "{}".format(round(avg_exe_time, 4)), end="\t") + print(pcycle, end="\t") + + + # print(f"The command took {execution_time} seconds to execute.") diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/ResultScript.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/ResultScript.py new file mode 100644 index 000000000000..2a5cdeb9e708 --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/ResultScript.py @@ -0,0 +1,574 @@ +import os +import subprocess +import time +import sys + + +# The script does the following +# Input : filename.py +# Output : TimeOfExecution for different IP sizes : +# Steps to run: +# Open a terminal at the path of the script -- +# Run: python ScriptForCases.py #3.11 validated + +# Pseudo-code: +# Iterate for all the input-size & update the input value in file +# Update logic -- change the 2nd parameter of line: var c = getRangeOfVector(init , Count, StepSize) +# Run the respective commands on the file + +# Path to the input file +# Apps = "hearingAid.py" , "lowPassFull.py" , " audioCompression.py", "lowPassFIRFilterDesign.py" , "EnergyOfSignal.py", "periodogram2Conv1.py", "audioEqualizer.py", "vibrationAnalysis.py", "signalSmoothing.py", "targetDetection.py", "biomedicalSignalProcessing.py", "spaceCommunication.py", "echocancelling", "noisecancelling.py", "digitalModulation", "underWaterCommunication", "voiceActivityDetection", "radarSignalProcessing", "speakerIdentification" +# input_file_name = "speakerIdentification.py" +input_file_name = sys.argv[1] + + +BasePathForLLVM = "/home/local/ASURITE/apkhedka/ForLLVM/" +OutputScriptPath = ( + "mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/" +) +input_file_path = BasePathForLLVM + OutputScriptPath + input_file_name + +print(f"Running Application {input_file_path}") +# Construct full output path + +if sys.argv[2]: + OutputPath = os.path.join(BasePathForLLVM, OutputScriptPath, "Output", sys.argv[2]) + +else: + OutputPath = os.path.join(BasePathForLLVM, OutputScriptPath, "Output") + + +# Check if the Output folder exists, create it if it doesn't +if not os.path.exists(OutputPath): + os.makedirs(OutputPath) + + +# Now OutputPath is ready for use +print("InputPath:{}".format(BasePathForLLVM)) +print(f"OutputPath: {OutputPath}") + +# ************ Don't change unless u required +# Define the values dictionary + +inputValues = { + "10": 10, + "100": 100, + "500": 500, + "1K": 1000, + "2K": 2000, + "5K": 5000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + # "10M": 10000000, + # "20M": 20000000, + # "30M": 30000000, + # "40M": 40000000, + # "50M": 50000000, + # "100M": 100000000, + # "1B": 1000000000 +} + +if sys.argv[1] == "noiseCancellation.py": + inputValues = { + "10": 10, + "100": 100, + "1K": 1000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + "10M": 10000000, + "20M": 20000000, + "30M": 30000000, + "40M": 40000000, + "50M": 50000000, + "100M": 100000000, + } + +elif sys.argv[1] == "echoCancellation.py": + inputValues = { + "10": 10, + "100": 100, + "1K": 1000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + "10M": 10000000, + "20M": 20000000, + "30M": 30000000, + "40M": 40000000, + "50M": 50000000, + "100M": 100000000, + } + +elif sys.argv[1] == "periodogram.py": + inputValues = { + "10": 10, + "100": 100, + "1K": 1000, + "10K": 10000, + "20K": 20000, + } + +elif sys.argv[1] == "lowPassFiltering.py": + inputValues = { + "10": 10, + "100": 100, + "1K": 1000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + "10M": 10000000, + "20M": 20000000, + "30M": 30000000, + "40M": 40000000, + "50M": 50000000, + "100M": 100000000, + } + +elif sys.argv[1] == "hearingAid.py": + inputValues = { + "10": 10, + "100": 100, + "1K": 1000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + "10M": 10000000, + "20M": 20000000, + "30M": 30000000, + "40M": 40000000, + "50M": 50000000, + "100M": 100000000, + } + +elif sys.argv[1] == "FIRFilterDesign.py": + inputValues = { + "100": 100, + "1K": 1000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + "10M": 10000000, + "20M": 20000000, + "30M": 30000000, + "40M": 40000000, + "50M": 50000000, + "100M": 100000000, + } + +elif sys.argv[1] == "spectralAnalysis.py": + inputValues = { + "10": 10, + "100": 100, + "1K": 1000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + } + +elif sys.argv[1] == "audioEqualization.py": + inputValues = { + "10": 10, + "100": 100, + "1K": 1000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + "10M": 10000000, + "20M": 20000000, + "30M": 30000000, + "40M": 40000000, + "50M": 50000000, + "100M": 100000000, + } + +elif sys.argv[1] == "audioCompression.py": + inputValues = { + "10": 10, + "100": 100, + "1K": 1000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + } + +elif sys.argv[1] == "vibrationAnalysis.py": + inputValues = { + "10": 10, + "100": 100, + "1K": 1000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + } + +elif sys.argv[1] == "underWaterCommunication.py": + inputValues = { + "100": 100, + "1K": 1000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + "10M": 10000000, + "20M": 20000000, + "30M": 30000000, + "40M": 40000000, + "50M": 50000000, + "100M": 100000000, + } + +elif sys.argv[1] == "voiceActivityDetection.py": + inputValues = { + "10": 10, + "100": 100, + "1K": 1000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + "10M": 10000000, + "20M": 20000000, + "30M": 30000000, + "40M": 40000000, + "50M": 50000000, + "100M": 100000000, + } + +elif sys.argv[1] == "signalSmoothing.py": + inputValues = { + "100": 100, + "1K": 1000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + "10M": 10000000, + "20M": 20000000, + "30M": 30000000, + "40M": 40000000, + "50M": 50000000, + "100M": 100000000, + } + +elif sys.argv[1] == "targetDetection.py": + inputValues = { + "10": 10, + "100": 100, + "1K": 1000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + "10M": 10000000, + "20M": 20000000, + "30M": 30000000, + "40M": 40000000, + "50M": 50000000, + "100M": 100000000, + } + +elif sys.argv[1] == "biomedicalSignalProcessing.py": + inputValues = { + "10": 10, + "100": 100, + "1K": 1000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + "10M": 10000000, + "20M": 20000000, + "30M": 30000000, + "40M": 40000000, + "50M": 50000000, + "100M": 100000000, + } + +elif sys.argv[1] == "digitalModulation.py": + inputValues = { + "100": 100, + "1K": 1000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + "10M": 10000000, + "20M": 20000000, + "30M": 30000000, + "40M": 40000000, + "50M": 50000000, + "100M": 100000000, + } + +elif sys.argv[1] == "spaceCommunication.py": + inputValues = { + "100": 100, + "1K": 1000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + "10M": 10000000, + "20M": 20000000, + "30M": 30000000, + "40M": 40000000, + "50M": 50000000, + "100M": 100000000, + } + +elif sys.argv[1] == "radarSignalProcessing.py": + inputValues = { + "10": 10, + "100": 100, + "1K": 1000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + "10M": 10000000, + "20M": 20000000, + "30M": 30000000, + "40M": 40000000, + "50M": 50000000, + "100M": 100000000, + } + +elif sys.argv[1] == "dtmfDetection.py": + inputValues = { + "100": 100, + "1K": 1000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + } + +elif sys.argv[1] == "speakerIdentification.py": + inputValues = { + "100": 100, + "1K": 1000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + } + +NoOfIterations = 3 + +# -------------------------------------------------- +commands_base = [ + # "./dsp1 lowPassFull.py -emit=mlir-affine", + # f"./dsp1 {input_file_path} -emit=llvm", + f"{BasePathForLLVM}/build/bin/dsp1 {input_file_path} -emit=llvm", + # "clang-17 -O0 file.ll -o fileexe -lm", +] + +# Define the cases +cases = [ + # { + # "affineOpt": False, + # "canonOpt": False, + # "suffix": "fileNoOpt.ll", + # "exe": "fileNoOptExe", + # }, + { + "affineOpt": True, + "canonOpt": False, + "suffix": "fileAffineOpt.ll", + "exe": "fileAffineOptExe", + }, + { + "affineOpt": True, + "canonOpt": True, + "suffix": "fileAffineCanonOpt.ll", + "exe": "fileAffineCanonOptExe", + }, +] + +# Read the input file +with open(input_file_path, "r") as file: + lines = file.readlines() + +print("", end="\t") +for case in cases: + print(f"{case['exe']}", end="\t") + +for key, value in inputValues.items(): + value2 = 1 / value + dur = value / 8192 + print(f"\n{key}", end="\t") + + with open(input_file_path, "r") as file: + lines = file.readlines() + + with open(input_file_path, "w") as file: + for line in lines: + if line.strip().startswith("var input = getRangeOfVector("): + if input_file_name in [ + "audioCompression.py", + "audioEqualization.py", + "periodogram.py", + "spectralAnalysis.py", + ]: + updated_line = f"\tvar input = getRangeOfVector(0, {value}, 1);\n" + if input_file_name in ["voiceActivityDetection.py"]: + updated_line = ( + f"\tvar input = getRangeOfVector(0, {value}, 0.125);\n" + ) + else: + updated_line = ( + f"\tvar input = getRangeOfVector(0, {value}, 0.000125);\n" + ) + + file.write(updated_line) + elif line.strip().startswith("var duration ="): + updated_line = f"\tvar duration = {dur};\n" + file.write(updated_line) + elif line.strip().startswith("var N_input ="): + updated_line = f"\tvar N_input = {value+1};\n" + file.write(updated_line) + elif line.strip().startswith("var frequencies = fftfreq"): + updated_line = f"\tvar frequencies = fftfreq({value}, 0.000122);\n" + file.write(updated_line) + else: + file.write(line) + + # Iterate through the cases and run the commands + for case in cases: + command_llvm = commands_base[0] + if case["affineOpt"]: + command_llvm += " -affineOpt" + if case["canonOpt"]: + command_llvm += " -canonOpt" + # command_llvm += f" 2> {case['suffix']}" #OutputPath + command_llvm += f" 2> {OutputPath}/{case['suffix']}" # OutputPath + + commands = [ + command_llvm, + # f"clang-17 -O0 {case['suffix']} -o fileexe -lm", + f"{BasePathForLLVM}/build/bin/clang-19 -O3 {OutputPath}/{case['suffix']} -o {OutputPath}/{case['exe']} -lm", + ] + # print(case,end="\n") + # print("\n") + + # Iterate over each value and perform the necessary operations + for command in commands: + # Run the commands for the current case + result = subprocess.run(command, shell=True, capture_output=True, text=True) + + sum_exe_time = 0 + for i in range(0, NoOfIterations): + # for command in commands: + # # print("running command {}".format(command)) + # # os.system(command) + # result = subprocess.run(command, shell=True, capture_output=True, text=True) + + # Clear the cache to minimize caching effects + # subprocess.run("sync; echo 3 > /proc/sys/vm/drop_caches", shell=True) + try: + process = subprocess.run( + "sudo sh -c 'sync; echo 3 > /proc/sys/vm/drop_caches'", + shell=True, + check=True, + ) + # process.wait() + except subprocess.CalledProcessError as exc: + print(exc) + process.terminate() + # The command to be executed + # command2 = "./fileexe" + # Limit execution to a single core + # command2 = "taskset -c 0 ./fileexe" + command2 = f"taskset -c 0 ./Output/{sys.argv[2]}/{case['exe']}" #{OutputPath} + # command2 = f"taskset -c 0 {OutputPath}/{case['exe']}" + + # Record the start time + start_time = time.time() + + # Execute the command + try: + subprocess.run( + command2, + shell=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=True, + ) + # subprocess.run(command2, shell=True) + except subprocess.CalledProcessError as exc: + print( + f"Process failed because did not return a successful return code. " + f"Returned {exc.returncode}\n{exc}" + ) + + # Record the end time + end_time = time.time() + + # Calculate the elapsed time + execution_time = end_time - start_time + sum_exe_time = sum_exe_time + execution_time + # print("{}".format(execution_time), end="\t") + avg_exe_time = sum_exe_time / NoOfIterations + print("{}".format(avg_exe_time), end="\t") + # print(f"The command took {execution_time} seconds to execute.") diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/RunHexagon.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/RunHexagon.py new file mode 100644 index 000000000000..f58f706418ce --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/RunHexagon.py @@ -0,0 +1,71 @@ +import subprocess +import os + +# Ensure the log directory exists +log_dir = "LClanglogs" +os.makedirs(log_dir, exist_ok=True) + +# Corrected list of application names (without non-application entries) +app_names = [ + "spectralAnalysis", + "audioCompression", + "audioEqualization", + "biomedicalSignalProcessing", + "digitalModulation", + "dtmfDetection", + "echoCancellation", + "FIRFilterDesign", + "hearingAid", + "lowPassFiltering", + "noiseCancellation", + "periodogram", + "vibrationAnalysis", + "radarSignalProcessing", + "signalSmoothing", + "spaceCommunication", + "speakerIdentification", + "targetDetection", + "underWaterCommunication", + "voiceActivityDetection" +] + +# Loop through each application and execute the HexagonResultScript.py script +for app_name in app_names: + app_script = f"{app_name}.py" + log_file = os.path.join(log_dir, f"{app_name}.log") + with open(log_file, "w") as log: + command = ["python", "HexagonResultScript.py", app_script, app_name] + print(f"Running command: {' '.join(command)}") + process = subprocess.Popen( + command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True + ) + for line in process.stdout: + print(line, end="") + log.write(line) + for line in process.stderr: + print(line, end="") + log.write(line) + process.wait() + + +log_dir2 = "HClanglogs" +os.makedirs(log_dir2, exist_ok=True) + + + +for app_name in app_names: + app_script = f"{app_name}.py" + log_file2 = os.path.join(log_dir2, f"{app_name}.log") + with open(log_file2, "w") as log: + command = ["python", "HexagonClangResultScript.py", app_script, app_name] + print(f"Running command: {' '.join(command)}") + process = subprocess.Popen( + command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True + ) + for line in process.stdout: + print(line, end="") + log.write(line) + for line in process.stderr: + print(line, end="") + log.write(line) + process.wait() diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/RunResults.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/RunResults.py new file mode 100644 index 000000000000..82f82c941ae9 --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/RunResults.py @@ -0,0 +1,49 @@ +import subprocess +import os + +# Ensure the log directory exists +log_dir = "ServerExeLogs" +os.makedirs(log_dir, exist_ok=True) + +# Corrected list of application names (without non-application entries) +app_names = [ + "spectralAnalysis", + "audioCompression", + "audioEqualization", + "biomedicalSignalProcessing", + "digitalModulation", + "dtmfDetection", + "echoCancellation", + "FIRFilterDesign", + "hearingAid", + "lowPassFiltering", + "noiseCancellation", + "periodogram", + "vibrationAnalysis", + "radarSignalProcessing", + "signalSmoothing", + "spaceCommunication", + "speakerIdentification", + "targetDetection", + "underWaterCommunication", + "voiceActivityDetection" +] + +# Loop through each application and execute the script +for app_name in app_names: + app_script = f"{app_name}.py" + log_file = os.path.join(log_dir, f"{app_name}.log") + with open(log_file, "w") as log: + command = ["python", "ResultScript.py", app_script, app_name] + print(f"Running command: {' '.join(command)}") + process = subprocess.Popen( + command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True + ) + for line in process.stdout: + print(line, end="") + log.write(line) + for line in process.stderr: + print(line, end="") + log.write(line) + process.wait() + diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/audioCompression.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/audioCompression.py similarity index 84% rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/audioCompression.py rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/audioCompression.py index 0b190a877c9a..2702463cc262 100644 --- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/audioCompression.py +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/audioCompression.py @@ -9,7 +9,7 @@ def main() { # var a10 = [ 3.2, 1.5, 0.8, 2.9, 4.5,10 , 0,5,5.5, 1.1]; # var a10 = getRangeOfVector(3.2, 10, 1); - var input = getRangeOfVector(0, 20000, 10); + var input = getRangeOfVector(0, 10, 1); var nlevels = 16; #powerOf2 var min = 0; var max = 8; @@ -25,20 +25,20 @@ def main() { #Threshold var GetThresholdReal = threshold(fft10real , threshold); var GetThresholdImg = threshold(fft10img , threshold); - # print(GetThresholdReal); # print(GetThresholdImg); #Quant var QuantOutReal = quantization(GetThresholdReal , nlevels, max, min); var QuantOutImg = quantization(GetThresholdImg , nlevels, max, min); - - print(QuantOutReal); - print(QuantOutImg); + # print(QuantOutImg); #RLE var rLEOutReal = runLenEncoding(QuantOutReal); var rLEOutImg = runLenEncoding(QuantOutImg); - # print(rLEOutReal); # print(rLEOutImg); + var final1 = getElemAtIndx(rLEOutReal , [0]); + var final2 = getElemAtIndx(rLEOutImg , [1]); + print(final1); + print(final2); } diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/audioEqualizer.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/audioEqualization.py similarity index 92% rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/audioEqualizer.py rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/audioEqualization.py index 0dcea71954f9..3d2312572b09 100644 --- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/audioEqualizer.py +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/audioEqualization.py @@ -5,7 +5,7 @@ def main() { # var input = [1,2,3,4,5]; - var input = getRangeOfVector(0, 5000000, 1); + var input = getRangeOfVector(0, 10, 1); var pi = 3.14159265359; var fc = 300; var Fs = 8000; @@ -32,15 +32,12 @@ def main() { var lpf2 = lowPassFIRFilter(wc2, N); var lpf2_w = lpf2 * hamming(N); # var bpf = lpf2 - lpf; - var bpf_w = lpf2_w - lpf_w; + var bpf_w = sub(lpf2_w,lpf_w); var FIRfilterResponseForBpf = FIRFilterResponse(input, bpf_w); var gainWithBpf = gain(FIRfilterResponseForBpf , gainForTreble); - - var final_audio = gainWithLpf + gainWithHpf + gainWithBpf ; var final1 = getElemAtIndx(final_audio , [3]); print(final1); - # print(final_audio); } diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/biomedicalSignalProcessing.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/biomedicalSignalProcessing.py new file mode 100644 index 000000000000..b6e9106927e7 --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/biomedicalSignalProcessing.py @@ -0,0 +1,56 @@ +def main() { + var fc1 = 1000; + var fc2 = 7500; + var Fs = 8000; + var N = 101; + var distance = 950; + var input = getRangeOfVector(0, 1000, 0.000125); + # print(c); + var pi = 3.14159265359; + var f_sig = 500; + var getMultiplier = 2 * pi * f_sig; + # print(getMultiplier); + var getSinDuration = gain(input, getMultiplier); + # print(getSinDuration); + var clean_sig = sin(getSinDuration ); + + var f_noise = 3000; + var getNoiseSinDuration = gain(input, 2 * pi * f_noise); + var noise = sin(getNoiseSinDuration); + var noise1 = gain(noise, 0.5); + + var noisy_sig = clean_sig + noise1; + # Step 1: FIR Bandpass Filter + var wc1 = 2 * pi * fc1 / Fs; #wc should vary from 0 to pi + var lpf1 = lowPassFIRFilter(wc1, N); #ideal low -pass filter + var lpf1_w = lpf1 * hamming(N); + + var wc2 = 2 * pi * fc2 / Fs; + var lpf2 = lowPassFIRFilter(wc2, N); + var lpf2_w = lpf2 * hamming(N); + + # var bpf = lpf2 - lpf; + var bpf_w = sub(lpf2_w,lpf1_w); + var FIRfilterResponseForBpf = FIRFilterResponse(noisy_sig, bpf_w); + + # Step 2: Artifact Removal (R-peak detection) + var max_signal = max(FIRfilterResponseForBpf); + + var height = 0.3 * max_signal; + + var r_peaks = find_peaks(FIRfilterResponseForBpf, height, distance); + + var len_r_peaks = len(r_peaks); + var last_peaks_index = sub(len_r_peaks, [1]); + var peaks_count = getSingleElemAtIndx(r_peaks, last_peaks_index); + + var diff_val = diff(r_peaks, peaks_count); + var peaks_count_minus_one = sub(peaks_count, 1); + var diff_mean = mean(diff_val, peaks_count_minus_one); + + var avg_hr = (60 * Fs) / diff_mean; + + print(avg_hr); + +} + diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/convolutionthm.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/convolutionthm.py new file mode 100644 index 000000000000..831b75b995e8 --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/convolutionthm.py @@ -0,0 +1,37 @@ +def main() { + var a = getRangeOfVector(0, 100, 1); + var b = getRangeOfVector(0, 100, 2); + # var a = [1,2,3,4]; + # var b = [2,3,4,5]; + + # print(a); + + var ra = padding(a, 0, 99); + var rb = padding(b, 0, 99); + + # print(ra); + + var x1 = fft1dreal(ra); + var y1 = fft1dimg(ra); + var x2 = fft1dreal(rb); + var y2 = fft1dimg(rb); + + # # print(x1); + # # print(y1); + # # print(x2); + # # print(y2); + + var tempreal = x1 * x2; + var negreal = y1 * y2; + var imag = x1 * y2 + x2 * y1; # the order matters! + var real = sub(tempreal, negreal); + + # print(real); + # print(imag); + + var result = ifft1d(real, imag); + print(result); + + # var t = FIRFilterResponse(b, a); + # print(t); +} \ No newline at end of file diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/digitalModulation.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/digitalModulation.py new file mode 100644 index 000000000000..f36dc87df15d --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/digitalModulation.py @@ -0,0 +1,23 @@ +def main() { + # var input = [1,0,1,1,0,1,0,0]; + var input = getRangeOfVector(0, 100, 0.000125); + # print(c); + var pi = 3.14159265359; + var f_sig = 500; + var getMultiplier = 2 * pi * f_sig; + # print(getMultiplier); + var getSinDuration = gain(input, getMultiplier); + # print(getSinDuration); + var clean_sig = sin(getSinDuration ); + var binary_sig = thresholdUp(clean_sig, 0.4,0); + # print(binary_sig); + var modulate_symbol_real = qam_modulate_real(binary_sig); + # print(modulate_symbol_real); + var modulate_symbol_imagine = qam_modulate_imagine(binary_sig); + # print(modulate_symbol_imagine); + var decode_data = qam_demodulate(modulate_symbol_real, modulate_symbol_imagine); + # print(decode_data); + var final1 = getElemAtIndx(decode_data , 2); + print(final1); +} + diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/dtmfDetection.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/dtmfDetection.py new file mode 100644 index 000000000000..fa5364d6baee --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/dtmfDetection.py @@ -0,0 +1,33 @@ +def main() { + var digit = 8; + var duration = 0.5; + var fs = 8192; + var d = 1/fs; + var N = fs * duration; + var dtmf_tone = generateDtmf(digit, duration, fs); + var fft_real = fft1dreal(dtmf_tone); + var fft_imag = fft1dimg(dtmf_tone); + var squared_fft_real = square(fft_real); + var squared_fft_imag = square(fft_imag); + var sum = squared_fft_real + squared_fft_imag; + # print(sum); + var magnitudes = sqrt(sum); + # print(magnitudes); + var frequencies = fftfreq(4096, 0.000122); + # # print(frequencies); + var peaks = findDominantPeaks(frequencies, magnitudes); + print(peaks); + var freqPairs = [ + [941, 1336], + [697, 1209], + [697, 1336], + [697, 1477], + [770, 1209], + [770, 1336], + [770, 1477], + [852, 1209], + [852, 1336], + [852, 1477]]; + var recovered_digit = recoverDtmfDigit(peaks, freqPairs); + print(recovered_digit); + } \ No newline at end of file diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/echoCancellation.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/echoCancellation.py new file mode 100644 index 000000000000..91a8f56941ef --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/echoCancellation.py @@ -0,0 +1,28 @@ +def main() { + var fs = 8000; + # var step = 1/8000; + # print(step); + var input = getRangeOfVector(0, 100, 0.000125); + var f_sig = 500; + var pi = 3.14159265359; + var getMultiplier = 2 * pi * f_sig; + # print(getMultiplier); + var getSinDuration = gain(input, getMultiplier); + # print(getSinDuration); + var clean_sig = sin(getSinDuration ); + + #define a noise signal with freq = 3000 + var noise = delay(clean_sig, 2); + # var noise1 = gain(noise, 0.5); + + var noisy_sig = clean_sig + noise; + # print(noisy_sig); + # print(clean_sig); + var mu = 0.01; + var filterSize = 32; + var y = lmsFilterResponse(noisy_sig, clean_sig, mu, filterSize); + var z = normalize(y); + var final = getElemAtIndx(z , [5]); + print(final); + +} diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/getCompileTime.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/getCompileTime.py new file mode 100644 index 000000000000..a443dde2aa6f --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/getCompileTime.py @@ -0,0 +1,159 @@ +import os +import subprocess +import time +import pandas as pd + +# Path to the input file +input_files = [ + "spectralAnalysis", + "audioCompression", + "audioEqualization", + "biomedicalSignalProcessing", + "digitalModulation", + "dtmfDetection", + "echoCancellation", + "FIRFilterDesign", + "hearingAid", + "lowPassFiltering", + "noiseCancellation", + "periodogram", + "vibrationAnalysis", + "radarSignalProcessing", + "signalSmoothing", + "spaceCommunication", + "speakerIdentification", + "targetDetection", + "underWaterCommunication", + "voiceActivityDetection" +] +data = [] + +for input_file_path in input_files: + input_file_path +=".py" + BasePathForLLVM = "/home/local/ASURITE/apkhedka/ForLLVM/" + OutputScriptPath = ( + "mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/" + ) + + try: + with open(input_file_path, "r") as file: + lines = file.readlines() + except FileNotFoundError: + print(f"File {input_file_path} not found") + continue + + # Construct full output path + OutputPath = os.path.join(BasePathForLLVM, OutputScriptPath, "Output") + + # Check if the Output folder exists, create it if it doesn't + if not os.path.exists(OutputPath): + os.makedirs(OutputPath) + + print(f"Running Application: {input_file_path}") + print("InputPath: {}".format(BasePathForLLVM)) + print(f"OutputPath: {OutputPath}") + + # Input values dictionary + inputValues = { + "10": 10, + "100": 100, + "1K": 1000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + "10M": 10000000, + "20M": 20000000, + "30M": 30000000, + "40M": 40000000, + "50M": 50000000, + "100M": 100000000, + } + + NoOfIterations = 3 + + # Base command + commands_base = [ + f"{BasePathForLLVM}/build/bin/dsp1 {input_file_path} -emit=llvm", + ] + + # Define the cases + cases = [ + { + "affineOpt": True, + "canonOpt": False, + "suffix": "fileAffineOpt.ll", + "exe": "fileAffineOptExe", + }, + { + "affineOpt": True, + "canonOpt": True, + "suffix": "fileAffineCanonOpt.ll", + "exe": "fileAffineCanonOptExe", + }, + ] + + # Print header + print("", end="\t") + for case in cases: + print(f"{case['suffix']}", end="\t") + + # Iterate over input sizes + for key, value in inputValues.items(): + print(f"\n{key}", end="\t") + + # Update the input file with the current value + with open(input_file_path, "r") as file: + lines = file.readlines() + + with open(input_file_path, "w") as file: + for line in lines: + if line.strip().startswith("var input = getRangeOfVector("): + updated_line = ( + f"\tvar input = getRangeOfVector(0, {value}, 0.000125);\n" + ) + file.write(updated_line) + else: + file.write(line) + + # Measure compiler time for each case + for case in cases: + command_llvm = commands_base[0] + if case["affineOpt"]: + command_llvm += " -affineOpt" + if case["canonOpt"]: + command_llvm += " -canonOpt" + command_llvm += f" 2> {OutputPath}/{case['suffix']}" # OutputPath + + sum_compile_time = 0 + for _ in range(NoOfIterations): + # Record the start time + start_time = time.time() + + # Execute the compilation command + result = subprocess.run(command_llvm, shell=True, capture_output=True, text=True) + + # Record the end time + end_time = time.time() + + # Calculate the elapsed time for compilation + compile_time = end_time - start_time + sum_compile_time += compile_time + + + + # Calculate average compile time + avg_compile_time = sum_compile_time / NoOfIterations + print(f"{avg_compile_time:.6f}", end="\t") + data.append({ + "filename": input_file_path, + "input size" : key, + # "opt": case['exe'], + f"{case['exe']}": avg_compile_time, + }) + df = pd.DataFrame(data) + + df.to_csv("compile_time.csv", index=False) \ No newline at end of file diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/getSize.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/getSize.py new file mode 100644 index 000000000000..f40e208fb7b2 --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/getSize.py @@ -0,0 +1,204 @@ +import os +import subprocess +import pandas as pd + +# The script does the following +# Input : filename.py +# Output : TimeOfExecution for different IP sizes : +# Steps to run: +# Open a terminal at the path of the script -- +# Run: python ScriptForCases.py #3.11 validated + +# Pseudo-code: +# Iterate for all the input-size & update the input value in file +# Update logic -- change the 2nd parameter of line: var c = getRangeOfVector(init , Count, StepSize) +# Run the respective commands on the file + +# Path to the input file +# Apps = "hearingAid.py" , "lowPassFull.py" , " audioCompression.py", "lowPassFIRFilterDesign.py" , "EnergyOfSignal.py", "periodogram2Conv1.py", "audioEqualizer.py", "vibrationAnalysis.py", "signalSmoothing.py", "targetDetection.py", "biomedicalSignalProcessing.py", "spaceCommunication.py", "echocancelling", "noisecancelling.py", "digitalModulation", "underWaterCommunication", "voiceActivityDetection", "radarSignalProcessing", "speakerIdentification" +input_files = [ + "spectralAnalysis", + "audioCompression", + "audioEqualization", + "biomedicalSignalProcessing", + "digitalModulation", + "dtmfDetection", + "echoCancellation", + "FIRFilterDesign", + "hearingAid", + "lowPassFiltering", + "noiseCancellation", + "periodogram", + "vibrationAnalysis", + "radarSignalProcessing", + "signalSmoothing", + "spaceCommunication", + "speakerIdentification", + "targetDetection", + "underWaterCommunication", + "voiceActivityDetection" +] +data = [] + +for input_file_path in input_files: + BasePathForLLVM = "/home/local/ASURITE/apkhedka/ForLLVM/" + OutputScriptPath = ( + "mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/" + ) + # OutputPath = BasePathForLLVM + "mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/Output/" + print(f"Running Application {input_file_path}") + # Construct full output path + OutputPath = os.path.join(BasePathForLLVM, OutputScriptPath, "Output") + + # Check if the Output folder exists, create it if it doesn't + if not os.path.exists(OutputPath): + os.makedirs(OutputPath) + + # Now OutputPath is ready for use + print("InputPath:{}".format(BasePathForLLVM)) + print(f"OutputPath: {OutputPath}") + # exit() + + # ************ Don't change unless u required + # Define the values dictionary + inputValues = { + "10": 10, + "100": 100, + "1K": 1000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + "10M": 10000000, + "20M": 20000000, + "30M": 30000000, + "40M": 40000000, + "50M": 50000000, + "100M": 100000000, + # "1B": 1000000000 + } + NoOfIterations = 3 + + # -------------------------------------------------- + commands_base = [ + # "./dsp1 lowPassFull.py -emit=mlir-affine", + # f"./dsp1 {input_file_path} -emit=llvm", + f"{BasePathForLLVM}/build/bin/dsp1 {input_file_path}.py -emit=llvm", + # "clang-17 -O0 file.ll -o fileexe -lm", + ] + + # Define the cases + cases = [ + # { + # "affineOpt": False, + # "canonOpt": False, + # "suffix": "fileNoOpt.ll", + # "exe": "fileNoOptExe", + # }, + { + "affineOpt": True, + "canonOpt": False, + "suffix": "fileAffineOpt.ll", + "exe": "fileAffineOptExe", + }, + { + "affineOpt": True, + "canonOpt": True, + "suffix": "fileAffineCanonOpt.ll", + "exe": "fileAffineCanonOptExe", + }, + ] + + # Read the input file + with open(input_file_path+".py", "r") as file: + lines = file.readlines() + + print("", end="\t") + for case in cases: + print(f"{case['exe']}", end="\t") + + size_test = {"100M": 100000000} + for key, value in size_test.items(): + value2 = 1 / value + dur = value / 8192 + print(f"\n{key}", end="\t") + + with open(input_file_path+".py", "r") as file: + lines = file.readlines() + + with open(input_file_path+".py", "w") as file: + for line in lines: + if line.strip().startswith("var input = getRangeOfVector("): + updated_line = ( + f"\tvar input = getRangeOfVector(0, {value}, 0.000125);\n" + ) + file.write(updated_line) + elif line.strip().startswith("var duration ="): + updated_line = f"\tvar duration = {dur};\n" + file.write(updated_line) + elif line.strip().startswith("var frequencies = fftfreq"): + updated_line = f"\tvar frequencies = fftfreq({value}, 0.000122);\n" + file.write(updated_line) + else: + file.write(line) + + # Iterate through the cases and run the commands + for case in cases: + command_llvm = commands_base[0] + if case["affineOpt"]: + command_llvm += " -affineOpt" + if case["canonOpt"]: + command_llvm += " -canonOpt" + # command_llvm += f" 2> {case['suffix']}" #OutputPath + command_llvm += f" 2> {OutputPath}/{case['suffix']}" # OutputPath + + commands = [ + command_llvm, + # f"clang-17 -O0 {case['suffix']} -o fileexe -lm", + f"{BasePathForLLVM}/build/bin/clang-19 -O3 {OutputPath}/{case['suffix']} -o {OutputPath}/{case['exe']} -lm", + ] + # print(case,end="\n") + # print("\n") + + # Iterate over each value and perform the necessary operations + for command in commands: + # Run the commands for the current case + result = subprocess.run(command, shell=True, capture_output=True, text=True) + + command2 = f"size ./Output/{case['exe']}" + + # Execute the command + try: + result = subprocess.run( + command2, + shell=True, + capture_output=True, text=True + ) + + output_parts = result.stdout.splitlines() + if len(output_parts) > 1: + size_data = output_parts[1].split() + + data.append({ + "filename": input_file_path, + # "input size" : key, + "opt": case['exe'], + # "text": size_data[0], + # "data": size_data[1], + # "bss": size_data[2], + # "dec": size_data[3], + # "hex": size_data[4], + "total": sum(map(int, size_data[:4])) + }) + except subprocess.CalledProcessError as exc: + print( + f"Process failed because did not return a successful return code. " + f"Returned {exc.returncode}\n{exc}" + ) + + df = pd.DataFrame(data) + + df.to_csv("codesize.csv", index=False) \ No newline at end of file diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/noisecancelling.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/hearingAid.py similarity index 79% rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/noisecancelling.py rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/hearingAid.py index ed37be3a42d8..d81e05eb2b3a 100644 --- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/noisecancelling.py +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/hearingAid.py @@ -2,7 +2,7 @@ def main() { var fs = 8000; # var step = 1/8000; # print(step); - var input = getRangeOfVector(0, 20000000, 0.000125); + var input = getRangeOfVector(0, 100, 0.000125); var f_sig = 500; var pi = 3.14159265359; var getMultiplier = 2 * pi * f_sig; @@ -23,8 +23,12 @@ def main() { var mu = 0.01; var filterSize = 32; var y = lmsFilterResponse(noisy_sig, clean_sig, mu, filterSize); - var final1 = getElemAtIndx(y , [6]); - print(final1); + var G1 = 123; + var sol = gain(y,G1); + # print(y); + var final = getElemAtIndx(sol, [3]); + print(final); + # print(sol); } diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/lowPassFull.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/lowPassFiltering.py similarity index 97% rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/lowPassFull.py rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/lowPassFiltering.py index c0b8d7851621..e08a55791347 100644 --- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/lowPassFull.py +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/lowPassFiltering.py @@ -12,7 +12,7 @@ def main() { # var step = 1/8000; # print(step); var duration = 0.05 ; # 50 milli-secs - var input = getRangeOfVector(0, 30000000, 0.000125); + var input = getRangeOfVector(0, 100, 0.000125); # print(c); # var c = getRangeOfVector(0,10, 0.000125); var f_sig = 500; diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/noiseCancellation.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/noiseCancellation.py new file mode 100644 index 000000000000..5a296d6347b3 --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/noiseCancellation.py @@ -0,0 +1,30 @@ +def main() { +var fs = 8000; + # var step = 1/8000; + # print(step); + var input = getRangeOfVector(0, 1000, 0.000125); + var f_sig = 500; + var pi = 3.14159265359; + var getMultiplier = 2 * pi * f_sig; + # print(getMultiplier); + var getSinDuration = gain(input, getMultiplier); + # print(getSinDuration); + var clean_sig = sin(getSinDuration ); + + #define a noise signal with freq = 3000 + var f_noise = 3000; + var getNoiseSinDuration = gain(input, 2 * pi * f_noise); + var noise = sin(getNoiseSinDuration); + var noise1 = gain(noise, 0.5); + + var noisy_sig = clean_sig + noise1; + # print(noisy_sig); + var mu = 0.01; + var filterSize = 32; + var y = lmsFilterResponse(noisy_sig, clean_sig, mu, filterSize); + # var a = norm_LMSFilterResponse_opt(noisy_sig, clean_sig, mu, filterSize); + var z = normalize(y); + var final = getElemAtIndx(z , [5]); + print(final); +} + diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/periodogram2Conv1.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/periodogram.py similarity index 62% rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/periodogram2Conv1.py rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/periodogram.py index 9ee480e6c033..af9e30275355 100644 --- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/periodogram2Conv1.py +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/periodogram.py @@ -14,7 +14,7 @@ def main() { #size 10 # var a10 = [ 10,20,30,40,50,60,70,80,90,100]; - var input = getRangeOfVector(0, 10, 1); + var input = getRangeOfVector(0, 500, 1); # var input = [1,2,3,4]; # print(a10); @@ -23,30 +23,14 @@ def main() { # y[n] = sum(h(k) . x(n-k)) k=0 to N-1 & 0<= n < N var conv1d = FIRFilterResponse(input, reverse_input); # var fft_real = fft1DRealSymm(conv1d); #fft1DRealSymm - var fft_real = fft1dreal(conv1d); - var fft_img = fft1dimg(conv1d); - var sq = fft_real * fft_real + fft_img * fft_img; - print(sq); - # var final1 = getElemAtIndx(fft_real , [6]); - # var final2 = getElemAtIndx(fft_real , [7]); - # print(final1); - # print(final2); # print(conv1d); + var fft_real = fft1dreal(conv1d); # print(fft_real); + var fft_img = fft1dimg(conv1d); # print(fft_img); - #Pad the input , reverse_input for the size of conv o/p - #Calculate - # var padLen = 9 ; #10 + 10 - 1 - 10 - # var input_padded = padding(input , 0, padLen ); - - - # var fft10real = fft1dreal(input); - # var fft10img = fft1dimg(input); - - # #try input * -input - # var neg_input = gain(input , -1); - # var sq = fft10real * fft10real + fft10img * fft10img; + var sq = fft_real * fft_real + fft_img * fft_img; # print(sq); - + var final1 = getElemAtIndx(sq , [2]); + print(final1); } diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/radarSignalProcessing.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/radarSignalProcessing.py new file mode 100644 index 000000000000..9a68946cc7e9 --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/radarSignalProcessing.py @@ -0,0 +1,32 @@ +def main() { + + var antennas = 4; + var input_fc = 5; + var N = 101; + var input = getRangeOfVector(0, 10, 0.000125); + var weights = getRangeOfVector(-90, 180, 1); + + var signal = beam_form(antennas, input_fc, input, weights); + var b1 = abs(signal); + var power_profile = b1 * b1; + var power_angle_max_idx = argmax(power_profile, 0); + var power_angle_max_ele = argmax(power_profile,0); + var pi = 3.1415926; + var fc1 = 1000; + var fc2 = 7500; + var Fs = 8000; + + var wc1 = 2*pi*fc1 / Fs; + var filter1 = lowPassFIRFilter(wc1, N); + var filter_hamming_1 = filter1 * hamming(N); + var wc2 = 2*pi*fc2 / Fs; + var filter2 = highPassFIRFilter(wc2, N); + var filter_hamming_2 = filter2 * hamming(N); + + var bpf = sub(filter_hamming_2, filter_hamming_1); + var firFilterResponse = FIRFilterResponse(power_profile, bpf); + var length = len(firFilterResponse); + var final = getElemAtIndx(firFilterResponse , 10); + print(final); +} + diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/signalSmoothing.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/signalSmoothing.py new file mode 100644 index 000000000000..3ac2474a345f --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/signalSmoothing.py @@ -0,0 +1,19 @@ +def main() { + var fs = 8000; + var input = getRangeOfVector(0, 10, 0.000125); + var f_sig = 500; + var pi = 3.14159265359; + var getMultiplier = 2 * pi * f_sig; + var getSinDuration = gain(input, getMultiplier); + var clean_sig = sin(getSinDuration ); + var f_noise = 3000; + var getNoiseSinDuration = gain(input, 2 * pi * f_noise); + var noise = sin(getNoiseSinDuration); + var noise1 = gain(noise, 0.5); + + var noisy_sig = clean_sig + noise1; + var median = medianFilter(noisy_sig); + var average = slidingWindowAvg(median); + var final1 = getElemAtIndx(average , 3); + print(final1); +} \ No newline at end of file diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/spaceCommunication.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/spaceCommunication.py new file mode 100644 index 000000000000..c21a388fa630 --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/spaceCommunication.py @@ -0,0 +1,20 @@ +def main() { + # var input = "HELLO FROM SPACE"; + var input = getRangeOfVector(0, 10, 0.000125); + # print(c); + var pi = 3.14159265359; + var f_sig = 500; + var getMultiplier = 2 * pi * f_sig; + # print(getMultiplier); + var getSinDuration = gain(input, getMultiplier); + # print(getSinDuration); + var clean_sig = sin(getSinDuration ); + var binary_sig = thresholdUp(clean_sig, 0.4,0); + var a = space_modulate(binary_sig); + var noise = sin(a); + var noisy_signal = a+noise; + var b = space_demodulate(noisy_signal); + var e = space_err_correction(b); + var final = getElemAtIndx(e, [8]); + print(final); +} diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/speakerIdentification.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/speakerIdentification.py new file mode 100644 index 000000000000..6501e5f474fb --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/speakerIdentification.py @@ -0,0 +1,33 @@ +def main() { + var sample_rate = 1000; + var duration = 12.20703125; + var person1 = generateVoiceSignature(100, 200, duration, sample_rate); #Alice + var person2 = generateVoiceSignature(150, 250, duration, sample_rate); #Bob + var person3 = generateVoiceSignature(120, 180, duration, sample_rate); #Charlie + + # var unknown_signal = generateVoiceSignature(100, 200, duration, sample_rate); + var unknown_signal = generateVoiceSignature(150, 250, duration, sample_rate); + # var unknown_signal = generateVoiceSignature(120, 180, duration, sample_rate); + + var max1 = max(correlate(person1, unknown_signal)); + var max2 = max(correlate(person2, unknown_signal)); + var max3 = max(correlate(person3, unknown_signal)); + + var total_maxes = [0, 0, 0]; + + var temp2 = setSingleElemAtIndx(total_maxes, 0, max1); #work + var temp3 = setSingleElemAtIndx(total_maxes, 1, max2); #work + var temp4 = setSingleElemAtIndx(total_maxes, 2, max3); #work + + var max_index = argmax(total_maxes,0); + + var max_value = getSingleElemAtIndx(total_maxes, max_index); + + print(max_index); + print(temp2); + print(max_value); + print(temp3); + print(total_maxes); + print(temp4); +} + diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/spectralAnalysis.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/spectralAnalysis.py new file mode 100644 index 000000000000..9d32dec6fd4a --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/spectralAnalysis.py @@ -0,0 +1,13 @@ +def main() { + + var input = getRangeOfVector(0, 400, 1); + var fft_real = fft1dreal(input); + var fft_img = fft1dimg(input); + var sq_abs = square(fft_real) + square(fft_img) ; + var sum1 = sum(sq_abs); + # print(sum1); + var len1 = len(input); + var res = sum1 / len1; + print(res); +} + diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/targetDetection.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/targetDetection.py new file mode 100644 index 000000000000..4ce8c4a32c6a --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/targetDetection.py @@ -0,0 +1,27 @@ +def main() { + var fs = 1000; + # var step = 1/fs; + # print(step); + var input = getRangeOfVector(0, 1000, 0.000125); + var pi = 3.14159265359; + var getMultiplier = 2 * pi * 10; + # print(getMultiplier); + var getSinDuration = gain(input, getMultiplier); + var sig1 = sin(getSinDuration ); + var getMultiplier2 = 2 * pi * 20; + var getSinDuration2 = gain(input, getMultiplier2); + var sinsig2 = sin(getSinDuration2); + var sig2 = gain(sinsig2, 0.5); + var signal = sig1 + sig2; + var noise = delay(signal, 5); + var noisy_sig = signal + noise; + + var mu = 0.01; + var filterSize = 20; + var y = lmsFilterResponse(noisy_sig, signal, mu, filterSize); + var peaks = find_peaks(y, 1, 50); + var final1 = getElemAtIndx(peaks , [1]); + var final2 = getElemAtIndx(peaks , [0]); + print(final1); + print(final2); +} \ No newline at end of file diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/underWaterCommunication.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/underWaterCommunication.py new file mode 100644 index 000000000000..ac3a7f2131cd --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/underWaterCommunication.py @@ -0,0 +1,34 @@ +def main() { + var fs = 1000; + # var step = 1/fs; + # print(step); + var input = getRangeOfVector(0, 50, 0.000125); + var pi = 3.14159265359; + var getMultiplier = 2 * pi * 5; + # print(getMultiplier); + var getSinDuration = gain(input, getMultiplier); + var signal = sin(getSinDuration ); + + var noise = delay(signal, 5); + var noisy_sig = signal + noise; + + + #design a low-pass filter : filterOrder = 5(odd) , cut-off freq=10 + # get wc = 2 * pi * cutoff_freq / fs + # get the filter response using filter(b,a, noisy_sig) + var fc = 1000; + # var Fs = 8000; + var wc = 2 * pi * 1000 / 500; #wc should vary from 0 to pi + var N = 5; + # var hid = sinc(wc, N); + var lpf = lowPassFIRFilter(wc, 1); #ideal low -pass filter + var lpf_w = lpf * hamming(N); + var FIRfilterResponse = FIRFilterResponse(noisy_sig, lpf_w); + + var threshold = 0.05; + var GetThresholdReal = thresholdUp(FIRfilterResponse, threshold, 0); + # print(GetThresholdReal); + var final1 = getElemAtIndx(GetThresholdReal , [3]); + print(final1); + +} \ No newline at end of file diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/vibrationAnalysis.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/vibrationAnalysis.py new file mode 100644 index 000000000000..945c7cb40fc6 --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/vibrationAnalysis.py @@ -0,0 +1,27 @@ +def main() { + var fs = 1000; + # var step = 1/fs; + # print(step); + var input = getRangeOfVector(0, 10, 0.000125); + var pi = 3.14159265359; + var getMultiplier = 2 * pi * 50; + # print(getMultiplier); + var getSinDuration = gain(input, getMultiplier); + var sig1 = sin(getSinDuration ); + var getMultiplier2 = 2 * pi * 120; + var getSinDuration2 = gain(input, getMultiplier2); + var sinsig2 = sin(getSinDuration2); + var sig2 = gain(sinsig2, 0.5); + var signal = sig1 + sig2; + var noise = delay(signal, 5); + var noisy_sig = signal + noise; + var threshold = 2; + + var fft_real = fft1dreal(noisy_sig); + var fft_img = fft1dimg(noisy_sig); + var sq_abs = square(fft_real) + square(fft_img); + var magnitudes = sqrt(sq_abs); + var GetThresholdReal = thresholdUp( magnitudes , threshold,1); + var final1 = getElemAtIndx(GetThresholdReal , 0); + print(final1); +} \ No newline at end of file diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/voiceActivityDetection.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/voiceActivityDetection.py new file mode 100644 index 000000000000..1cf37d8a34ad --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/voiceActivityDetection.py @@ -0,0 +1,18 @@ +def main() { + var fs = 1000; + # var step = 1/fs; + # print(step); + var input = getRangeOfVector(0, 100, 0.0125); + var pi = 3.14159265359; + var getMultiplier = 2 * pi * 5; + # print(getMultiplier); + var getSinDuration = gain(input, getMultiplier); + var signal = sin(getSinDuration ); + + var noise = delay(signal, 5); + var noisy_sig = signal + noise; + var threshold = 0.01; + var GetThresholdReal = threshold( noisy_sig , threshold); + var zcr = zeroCrossCount(GetThresholdReal); + print(zcr); +} \ No newline at end of file diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/FIRFilterDesign.m b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/FIRFilterDesign.m new file mode 100644 index 000000000000..0b364f0d3e89 --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/FIRFilterDesign.m @@ -0,0 +1,48 @@ +% Constants +INPUT_LENGTH = 100000000; +FS = 8000; +FC1 = 500; +FC2 = 600; +FC3 = 1000; + +% Calculate normalized frequencies +wc1 = 2 * pi * FC1 / FS; +wc2 = 2 * pi * FC2 / FS; +wc3 = 2 * pi * FC3 / FS; + +% Create Hamming window +hamming_window = hamming(INPUT_LENGTH); + +% Create high-pass filters +hpf1 = highPassFIRFilter(wc1, INPUT_LENGTH); +hpf2 = highPassFIRFilter(wc2, INPUT_LENGTH); +hpf3 = highPassFIRFilter(wc3, INPUT_LENGTH); + +% Element-wise multiplication with Hamming window +hpf_w1 = hpf1 .* hamming_window'; +hpf_w2 = hpf2 .* hamming_window'; +hpf_w3 = hpf3 .* hamming_window'; + +% Get specific elements +final1 = hpf_w1(7); +final2 = hpf_w2(8); +final3 = hpf_w3(9); + +% Display results +fprintf('%f\n', final1); +fprintf('%f\n', final2); +fprintf('%f\n', final3); + +% High-pass FIR filter function +function h = highPassFIRFilter(wc, filterLength) + n = 0:(filterLength-1); + mid = (filterLength-1) / 2; + h = zeros(1, filterLength); + + % Use logical indexing to avoid issues with non-integer indices + midIndex = (n ~= mid); + h(midIndex) = -sin(wc * (n(midIndex) - mid)) ./ (pi * (n(midIndex) - mid)); + + % Handle the middle point separately + h(floor(mid)+1) = 1 - (wc / pi); +end \ No newline at end of file diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/ResultScript.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/ResultScript.py new file mode 100644 index 000000000000..8da6b86da1fa --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/ResultScript.py @@ -0,0 +1,145 @@ +import os +import subprocess +import time +import re +# The script does the following +# Input : filename.py +# Output : TimeOfExecution for different IP sizes : +# Steps to run: +# Open a terminal at the path of the script -- +# Run: python ScriptForCases.py #3.11 validated + +# Pseudo-code: +# Iterate for all the input-size & update the input value in file +# Update logic -- change the 2nd parameter of line: var c = getRangeOfVector(init , Count, StepSize) +# Run the respective commands on the file + +# Path to the input file +# Apps = "noiseCancelling.m" , "echoCancelling.m", "periodogram.m", "lowPassFull.m", "hearingAid.m", "lowPassFIRFilterDesign", "energyOfSignal", "audioEqualizer", "audioCompression","vibrationAnalysis", "underWaterCommunication", "voiceActivityDetection", "signalSmoothing", "targetDetection", "biomedicalSignalProcessing", "digitalModulation", "spaceCommunication", "radarSignalProcessing" +input_file = "speakerIdentification" +input_file_path = input_file + ".m" +BasePathForLLVM = "/home/local/ASURITE/apkhedka/ForLLVM/" +OutputScriptPath = "mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/" +mcc_path ="/home/local/ASURITE/apkhedka/Matlab_Installation/bin/mcc" +mrt_path ="/home/local/ASURITE/apkhedka/Matlab_Runtime/R2024b/" +# OutputPath = BasePathForLLVM + "mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/Output/" +print(f"Running Application {input_file_path}") +# Construct full output path +OutputPath = os.path.join(BasePathForLLVM, OutputScriptPath, "Output") + +# Check if the Output folder exists, create it if it doesn't +if not os.path.exists(OutputPath): + os.makedirs(OutputPath) + +# Now OutputPath is ready for use +print("InputPath:{}".format(BasePathForLLVM)) +print(f"OutputPath: {OutputPath}") +# exit() + +# ************ Don't change unless u required +# Define the values dictionary +inputValues = { + # "10": 10, + "100": 100, + "1K": 1000, + "10K": 10000, + "20K": 20000, + "30K": 30000, + "40K": 40000, + "50K": 50000, + "100K": 100000, + "1M": 1000000, + "10M": 10000000, + "20M": 20000000, + "30M": 30000000, + "40M": 40000000, + "50M": 50000000, + "100M": 100000000, + # "1B": 1000000000 +} +NoOfIterations = 3 + +def delete_folder_contents(folder_path): + for filename in os.listdir(folder_path): + file_path = os.path.join(folder_path, filename) + try: + if os.path.isfile(file_path) or os.path.islink(file_path): + os.unlink(file_path) + elif os.path.isdir(file_path): + shutil.rmtree(file_path) + except Exception as e: + print(f'Failed to delete {file_path}. Reason: {e}') + + +with open(input_file_path, "r") as file: + lines = file.readlines() + +print("", end="\t") + + +for key, value in inputValues.items(): + # Update the specific line in the file + # print("Updating for {}".format(value)) + print("\n{}".format(key), end="\t") + with open(input_file_path, "w") as file: + for line in lines: + if line.strip().startswith("INPUT_LENGTH = "): + updated_line = f"INPUT_LENGTH = {value};\n" + file.write(updated_line) + else: + file.write(line) + + command = f"{mcc_path} -m {input_file_path} -d 'Output/' -o {input_file}{key}" + result = subprocess.run(command, shell=True, capture_output=True, text=True) + + # Modify the generated shell script + script_path = f"./Output/run_{input_file}{key}.sh" + # Modify the generated shell script + script_path = f"./Output/run_{input_file}{key}.sh" + with open(script_path, 'r') as file: + script_content = file.readlines() + + # Find the line with the eval command and modify it + for i, line in enumerate(script_content): + if line.strip().startswith('eval'): + script_content[i] = f""" start_time=$(date +%s.%N) + {line.strip()} + end_time=$(date +%s.%N) + execution_time=$(echo "$end_time - $start_time" | bc) + echo "Execution time: $execution_time" +""" + break + + # Write the modified content back to the script + with open(script_path, 'w') as file: + file.writelines(script_content) + + + sum_exe_time = 0 + for i in range(0, NoOfIterations): + try: + subprocess.run("sudo sh -c 'sync; echo 3 > /proc/sys/vm/drop_caches'", shell=True, check=True) + except subprocess.CalledProcessError as exc: + print(exc) + + command2 = f"taskset -c 0 ./Output/run_{input_file}{key}.sh {mrt_path}" + + try: + result = subprocess.run(command2, shell=True, capture_output=True, text=True, check=True) + output = result.stdout + + # Extract execution time from the output + match = re.search(r"Execution time: (\d+\.\d+)", output) + if match: + execution_time = float(match.group(1)) + sum_exe_time += execution_time + else: + print(f"Execution time not found in output: {output}") + except subprocess.CalledProcessError as exc: + print(f"Process failed. Returned {exc.returncode}\n{exc}") + + avg_exe_time = sum_exe_time / NoOfIterations + print(f"{avg_exe_time}", end="\t") + # delete_folder_contents("./Output") + + diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/audioCompression.m b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/audioCompression.m new file mode 100644 index 000000000000..c388ce8f13fa --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/audioCompression.m @@ -0,0 +1,71 @@ +% Constants +INPUT_LENGTH = 100000000; +NLEVELS = 16; +MIN = 0.0; +MAX = 8.0; +THRESHOLD_VAL = 4.0; + +% Function to get range of vector +function output = getRangeOfVector(start, noOfSamples, increment) + output = start + (0:noOfSamples-1) * increment; +end + +% DFT function +function output = dft(input) + N = length(input); + n = 0:N-1; + k = n'; + M = exp(-1j * 2 * pi * k * n / N); + output = M * input(:); +end + +% Threshold function +function output = threshold(input, thresh) + output = input .* (abs(input) >= thresh); +end + +% Quantization function +function output = quantization(input, nlevels, max, min) + step = (max - min) / nlevels; + output = round((input - min) / step) * step + min; +end + +% Run Length Encoding function +function [rle, rleLength] = runLenEncoding(input) + diffs = diff([input(:); NaN]); + runs = find(diffs ~= 0); + lengths = diff([0; runs]); + values = input(runs); + rle = [values, lengths]; + rle = rle'; + rle = rle(:); + rleLength = length(rle); +end + +% Get element at index function +function elem = getElemAtIndx(rle, indx) + elem = rle(indx); +end + +% Main script +input = getRangeOfVector(0, INPUT_LENGTH, 1); + +fft_result = dft(input); + +GetThresholdReal = real(fft_result); +GetThresholdImg = imag(fft_result); + +GetThresholdReal = threshold(GetThresholdReal, THRESHOLD_VAL); +GetThresholdImg = threshold(GetThresholdImg, THRESHOLD_VAL); + +QuantOutReal = quantization(GetThresholdReal, NLEVELS, MAX, MIN); +QuantOutImg = quantization(GetThresholdImg, NLEVELS, MAX, MIN); + +[rLEOutReal, rleLengthReal] = runLenEncoding(QuantOutReal); +[rLEOutImg, rleLengthImg] = runLenEncoding(QuantOutImg); + +final1 = getElemAtIndx(rLEOutReal, 2); +final2 = getElemAtIndx(rLEOutImg, 1); + +fprintf('%f\n', final1); +fprintf('%f\n', final2); \ No newline at end of file diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/audioEqualization.m b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/audioEqualization.m new file mode 100644 index 000000000000..8b3f85c42e18 --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/audioEqualization.m @@ -0,0 +1,57 @@ +% Constants +INPUT_LENGTH = 100000000; +pi = 3.14159265359; +fc = 300; +Fs = 8000; +gainForBass = 2; +gainForMid = 1.5; +gainForTreble = 0.8; +wc = 2 * pi * fc / Fs; +N = 101; + +% Input signal +input = 0:(INPUT_LENGTH-1); + +% Low-pass filter +lpf = lowPassFIRFilter(wc, N); +hamming_window = hamming(N)'; +lpf_w = lpf .* hamming_window; +FIRfilterResponseForLpf = conv(input, lpf_w, 'same'); +gainWithLpf = FIRfilterResponseForLpf * gainForBass; + +% High-pass filter +fc2 = 1500; +wc2 = 2 * pi * fc2 / Fs; +hpf = highPassFIRFilter(wc2, N); +hpf_w = hpf .* hamming_window; +FIRfilterResponseForHpf = conv(input, hpf_w, 'same'); +gainWithHpf = FIRfilterResponseForHpf * gainForTreble; + +% Band-pass filter +lpf2 = lowPassFIRFilter(wc2, N); +lpf2_w = lpf2 .* hamming_window; +bpf_w = lpf2_w - lpf_w; +FIRfilterResponseForBpf = conv(input, bpf_w, 'same'); +gainWithBpf = FIRfilterResponseForBpf * gainForMid; + +% Final audio +final_audio = gainWithLpf + gainWithHpf + gainWithBpf; + +% Print results +fprintf('Element at index 4: %f\n', final_audio(4)); +disp(final_audio); + +% Helper functions +function h = lowPassFIRFilter(wc, length) + n = 0:(length-1); + mid = (length - 1) / 2; + h = zeros(1, length); + h(n ~= mid) = sin(wc * (n(n ~= mid) - mid)) ./ (pi * (n(n ~= mid) - mid)); + h(mid+1) = wc / pi; +end + +function h = highPassFIRFilter(wc, length) + lpf = lowPassFIRFilter(wc, length); + h = -lpf; + h((length+1)/2) = h((length+1)/2) + 1; +end \ No newline at end of file diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/biomedicalSignalProcessing.m b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/biomedicalSignalProcessing.m new file mode 100644 index 000000000000..9212913001f2 --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/biomedicalSignalProcessing.m @@ -0,0 +1,60 @@ +% Constants +INPUT_LENGTH = 20000000; +MAX_PEAKS = 1000; +N = 101; + +% Signal parameters +fc1 = 1000; +fc2 = 7500; +Fs = 8000; +distance = 950; +f_sig = 500; +f_noise = 3000; + +% Generate input signal +t = (0:0.000125:(INPUT_LENGTH-1)*0.000125)'; + +% Generate clean signal +clean_sig = sin(2*pi*f_sig*t); + +% Generate noise +noise = 0.5 * sin(2*pi*f_noise*t); + +% Create noisy signal +noisy_sig = clean_sig + noise; + +% Step 1: FIR Bandpass Filter +wc1 = 2 * pi * fc1 / Fs; +wc2 = 2 * pi * fc2 / Fs; + +% Design lowpass filters +n = 0:N-1; +mid = (N-1)/2; +lpf1 = (wc1/pi) * sinc(wc1*(n-mid)/pi); +lpf2 = (wc2/pi) * sinc(wc2*(n-mid)/pi); + +% Apply Hamming window +hamming_window = hamming(N)'; +lpf1_w = lpf1 .* hamming_window; +lpf2_w = lpf2 .* hamming_window; + +% Create bandpass filter +bpf_w = lpf2_w - lpf1_w; + +% Apply bandpass filter +FIRfilterResponseForBpf = filter(bpf_w, 1, noisy_sig); + +% Step 2: Artifact Removal (R-peak detection) +max_val = max(FIRfilterResponseForBpf); +height = 0.3 * max_val; + +% Find peaks +[~, r_peaks] = findpeaks(FIRfilterResponseForBpf, 'MinPeakHeight', height, 'MinPeakDistance', distance); + +% Calculate heart rate +diff_val = diff(r_peaks); +diff_mean = mean(diff_val); + +avg_hr = (60 * Fs) / diff_mean; + +fprintf('%f\n', avg_hr); diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/digitalModulation.m b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/digitalModulation.m new file mode 100644 index 000000000000..656fee46c2f3 --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/digitalModulation.m @@ -0,0 +1,58 @@ +% Define constants +INPUT_LENGTH = 100000000; + +% Generate random input data +data = randi([0 1], 1, INPUT_LENGTH); + +% QAM Modulation +function symbols = qam_modulate(data) + symbols = zeros(1, length(data)/2); + for i = 1:2:length(data) + bit1 = data(i); + bit2 = data(i+1); + + if bit1 == 0 && bit2 == 0 + symbols((i+1)/2) = -1 - 1i; + elseif bit1 == 0 && bit2 == 1 + symbols((i+1)/2) = -1 + 1i; + elseif bit1 == 1 && bit2 == 0 + symbols((i+1)/2) = 1 - 1i; + elseif bit1 == 1 && bit2 == 1 + symbols((i+1)/2) = 1 + 1i; + end + end +end + +% QAM Demodulation +function bits = qam_demodulate(symbols) + bits = zeros(1, length(symbols)*2); + for i = 1:length(symbols) + symbol = symbols(i); + + if symbol == -1 - 1i + bits(2*i-1) = 0; + bits(2*i) = 0; + elseif symbol == -1 + 1i + bits(2*i-1) = 0; + bits(2*i) = 1; + elseif symbol == 1 - 1i + bits(2*i-1) = 1; + bits(2*i) = 0; + elseif symbol == 1 + 1i + bits(2*i-1) = 1; + bits(2*i) = 1; + end + end +end + +% Main script +rng('shuffle'); % Seed random number generator + +% Perform QAM modulation +symbols = qam_modulate(data); + +% Perform QAM demodulation +bits = qam_demodulate(symbols); + +% Print the 6th bit (equivalent to bits[5] in C, as MATLAB uses 1-based indexing) +disp(bits(6)); \ No newline at end of file diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/dtmfDetection.m b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/dtmfDetection.m new file mode 100644 index 000000000000..20f4c3517247 --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/dtmfDetection.m @@ -0,0 +1,119 @@ +% DTMF Detection in MATLAB using DFT + +% Constants +SAMPLING_FREQUENCY = 8192; % Sampling frequency +DURATION = 0.5; % Duration of the DTMF signal +N_SAMPLES = SAMPLING_FREQUENCY * DURATION; % Number of samples for the DTMF signal + +% DTMF frequencies +freqPairs = [ + 941, 1336; % 0 + 697, 1209; % 1 + 697, 1336; % 2 + 697, 1477; % 3 + 770, 1209; % 4 + 770, 1336; % 5 + 770, 1477; % 6 + 852, 1209; % 7 + 852, 1336; % 8 + 852, 1477 % 9 +]; + +% Main script +digit = 0; % DTMF digit to be generated +fs = SAMPLING_FREQUENCY; +duration = DURATION; + +% Generate the DTMF tone +dtmf_tone = generateDtmf(digit, fs, duration, freqPairs); + +% Perform DFT +[real_out, imag_out] = dft(dtmf_tone); + +% Calculate magnitudes and frequencies +N = length(dtmf_tone); +magnitudes = sqrt(real_out.^2 + imag_out.^2); +frequencies = (0:N-1)' * fs / N; +frequencies(frequencies > fs/2) = frequencies(frequencies > fs/2) - fs; + +% Find dominant frequency peaks +peaks = findDominantPeaks(frequencies, magnitudes); + +% Recover the DTMF digit +recovered_digit = recoverDtmfDigit(peaks, freqPairs); + +% Display results +if recovered_digit >= 0 + fprintf('Recovered DTMF digit: %d\n', recovered_digit); +else + fprintf('No DTMF digit detected.\n'); +end + +% Function definitions +function dtmf_tone = generateDtmf(digit, fs, duration, freqPairs) + f1 = freqPairs(digit + 1, 1); + f2 = freqPairs(digit + 1, 2); + t = (0:1/fs:duration-1/fs)'; + dtmf_tone = 10 * (sin(2 * pi * f1 * t) + sin(2 * pi * f2 * t)); +end + +function [real_out, imag_out] = dft(signal) + N = length(signal); + real_out = zeros(N, 1); + imag_out = zeros(N, 1); + for k = 0:N-1 + for n = 0:N-1 + angle = 2 * pi * k * n / N; + real_out(k+1) = real_out(k+1) + signal(n+1) * cos(angle); + imag_out(k+1) = imag_out(k+1) - signal(n+1) * sin(angle); + end + end +end + +function peaks = findDominantPeaks(frequencies, magnitudes) + max1 = 0; max2 = 0; + freq1 = 0; freq2 = 0; + + for i = 1:length(frequencies) + currentFreq = frequencies(i); + currentMag = magnitudes(i); + + % Check if frequency is positive + if currentFreq >= 0 + % Compare current magnitude with max1 + if currentMag > max1 + % Update max2 and freq2 with previous max1 and freq1 + max2 = max1; + freq2 = freq1; + % Update max1 and freq1 with current values + max1 = currentMag; + freq1 = currentFreq; + elseif currentMag > max2 + % Update max2 and freq2 with current values + max2 = currentMag; + freq2 = currentFreq; + end + end + end + + % Compare freq1 and freq2 to determine the order + if freq1 < freq2 + peaks = [freq1, freq2]; + else + peaks = [freq2, freq1]; + end +end + +function digit = recoverDtmfDigit(peaks, freqPairs) + for i = 1:size(freqPairs, 1) + f1 = freqPairs(i, 1); + f2 = freqPairs(i, 2); + + if (abs(peaks(1) - f1) < 10 && abs(peaks(2) - f2) < 10) || ... + (abs(peaks(1) - f2) < 10 && abs(peaks(2) - f1) < 10) + digit = i - 1; % Digit found (subtract 1 because MATLAB is 1-indexed) + return; + end + end + digit = -1; % No match found +end \ No newline at end of file diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/echoCancellation.m b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/echoCancellation.m new file mode 100644 index 000000000000..2fae6b58462e --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/echoCancellation.m @@ -0,0 +1,36 @@ +% Constants +INPUT_LENGTH = 100000000; +PI = pi; % MATLAB has pi built-in +fs = 8000; +step = 1 / fs; + +% Generate input range +input = (0:step:(INPUT_LENGTH-1)*step)'; + +% Generate clean signal +f_sig = 500; +clean_sig = sin(2 * PI * f_sig * input); + +% Generate noise signal with a delay of 2 samples +noise = [zeros(2, 1); clean_sig(1:end-2)]; + +% Create noisy signal by adding noise to clean signal +noisy_sig = clean_sig + noise; + +% LMS filter parameters +mu = 0.01; +filterSize = 32; + +% LMS filter implementation +w = zeros(filterSize, 1); +y = zeros(INPUT_LENGTH, 1); + +for n = filterSize:INPUT_LENGTH + x = noisy_sig(n:-1:n-filterSize+1); + y(n) = w' * x; + e = clean_sig(n) - y(n); + w = w + mu * e * x; +end + +% Print result +fprintf('%f\n', y); diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/getSize.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/getSize.py new file mode 100644 index 000000000000..3c5fce6ef316 --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/getSize.py @@ -0,0 +1,112 @@ +import os +import subprocess +import pandas as pd +# The script does the following +# Input : filename +# Output : TimeOfExecution for different IP sizes : +# Steps to run: +# Open a terminal at the path of the script -- +# Run: python ScriptForCases #3.11 validated + +# Pseudo-code: +# Iterate for all the input-size & update the input value in file +# Update logic -- change the 2nd parameter of line: var c = getRangeOfVector(init , Count, StepSize) +# Run the respective commands on the file + +# Path to the input file +# Apps = "noiseCancelling.m" , "echoCancelling.m", "periodogram.m", "lowPassFull.m", "hearingAid.m", "lowPassFIRFilterDesign", "energyOfSignal", "audioEqualizer", "audioCompression","vibrationAnalysis", "underWaterCommunication", "voiceActivityDetection", "signalSmoothing", "targetDetection", "biomedicalSignalProcessing" +input_files = ["audioCompression", "biomedicalSignalProcessing", "dtmfDetection", "lowPassFIRFilterDesign", "noisecancelling", \ +"radarSignalProcessing", "signalSmoothing", "speakerIdentification", "targetDetection", "vibrationAnalysis", "audioEqualizer", \ +"digitalModulation", "echocancelling", "hearingAid", "lowPassFull", "periodogram2Conv1", "spaceCommunication", "spectralAnalysis", \ +"underWaterCommunication", "voiceActivityDetection"] +data = [] + +for input_file in input_files: + input_file_path = input_file + ".m" + BasePathForLLVM = "/home/local/ASURITE/megan/ForLLVM/" + OutputScriptPath = "mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/" + mcc_path ="/home/local/ASURITE/apkhedka/Matlab_Installation/bin/mcc" + mrt_path ="/home/local/ASURITE/apkhedka/Matlab_Runtime/R2024b/" + # OutputPath = BasePathForLLVM + "mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/Output/" + print(f"Running Application {input_file_path}") + # Construct full output path + OutputPath = os.path.join(BasePathForLLVM, OutputScriptPath, "Output") + + # Check if the Output folder exists, create it if it doesn't + if not os.path.exists(OutputPath): + os.makedirs(OutputPath) + + + def delete_folder_contents(folder_path): + for filename in os.listdir(folder_path): + file_path = os.path.join(folder_path, filename) + try: + if os.path.isfile(file_path) or os.path.islink(file_path): + os.unlink(file_path) + elif os.path.isdir(file_path): + shutil.rmtree(file_path) + except Exception as e: + print(f'Failed to delete {file_path}. Reason: {e}') + + try: + with open(input_file_path, "r") as file: + lines = file.readlines() + except: + continue + + print("", end="\t") + + size_test = {"100M": 100000000} + for key, value in size_test.items(): + # Update the specific line in the file + # print("Updating for {}".format(value)) + print("\n{}".format(key), end="\t") + with open(input_file_path, "w") as file: + for line in lines: + if line.strip().startswith("INPUT_LENGTH = "): + updated_line = f"INPUT_LENGTH = {value};\n" + file.write(updated_line) + else: + file.write(line) + + command = f"{mcc_path} -m {input_file_path} -d 'Output/' -o {input_file}{key}" + print(command) + result = subprocess.run(command, shell=True, capture_output=True, text=True) + + command2 = f"size ./Output/{input_file}{key}" + + # Execute the command + try: + result = subprocess.run( + command2, + shell=True, + capture_output=True, text=True + ) + + output_parts = result.stdout.splitlines() + if len(output_parts) > 1: + size_data = output_parts[1].split() + + data.append({ + "filename": input_file_path, + # "input size" : key, + # "text": size_data[0], + # "data": size_data[1], + # "bss": size_data[2], + # "dec": size_data[3], + # "hex": size_data[4], + "total": sum(map(int, size_data[:4])) + }) + except subprocess.CalledProcessError as exc: + print( + f"Process failed because did not return a successful return code. " + f"Returned {exc.returncode}\n{exc}" + ) + + df = pd.DataFrame(data) + + df.to_csv("codesize.csv", index=False) + + delete_folder_contents("./Output") + + diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/hearingAid.m b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/hearingAid.m new file mode 100644 index 000000000000..47208238ddc7 --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/hearingAid.m @@ -0,0 +1,45 @@ +% Define constants +INPUT_LENGTH = 100000000; +fs = 8000; +step = 1 / fs; + +% Generate input range +input = (0:step:(INPUT_LENGTH-1)*step)'; + +% Generate clean signal +f_sig = 500; +clean_sig = sin(2 * pi * f_sig * input); + +% Generate noise signal with frequency of 3000 Hz +f_noise = 3000; +noise = 0.5 * sin(2 * pi * f_noise * input); + +% Create noisy signal by adding noise to clean signal +noisy_sig = clean_sig + noise; + +% LMS filter response function +function y = lmsFilterResponse(noisy_sig, clean_sig, mu, filterSize) + w = zeros(filterSize, 1); + y = zeros(size(noisy_sig)); + + for n = 1:length(noisy_sig) + x = noisy_sig(max(1, n-filterSize+1):n); + x = [zeros(filterSize - length(x), 1); x]; + y(n) = w' * x; + e = clean_sig(n) - y(n); + w = w + mu * e * x; + y(n) = e; + end +end + +% Apply LMS filter +mu = 0.01; +filterSize = 32; +y = lmsFilterResponse(noisy_sig, clean_sig, mu, filterSize); + +% Apply final gain factor G1 to the LMS filter output +G1 = 1002300; +sol = G1 * y; + +% Display +disp(sol); \ No newline at end of file diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/lowPassFiltering.m b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/lowPassFiltering.m new file mode 100644 index 000000000000..4b5348e6641f --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/lowPassFiltering.m @@ -0,0 +1,40 @@ +% Define constants +PI = pi; +INPUT_LENGTH = 100000000; +fs = 8000; + +% Generate input vector +input = (0:0.000125:(INPUT_LENGTH-1)*0.000125)'; + +% Signal processing steps +f_sig = 500; +getSinDuration = 2 * PI * f_sig * input; +clean_sig = sin(getSinDuration); + +f_noise = 3000; +getNoiseSinDuration = 2 * PI * f_noise * input; +noise = sin(getNoiseSinDuration); + +scaled_noise = 0.5 * noise; +noisy_sig = clean_sig + scaled_noise; + +% Filter design +fc = 1000; +wc = 2 * PI * fc / fs; +N = 101; + +% Low-pass FIR filter +n = -(N-1)/2:(N-1)/2; +lpf = (wc / PI) * sinc(wc * n / PI); + +% Hamming window +hamming = 0.54 - 0.46 * cos(2 * PI * (0:N-1) / (N-1)); + +% Apply window to filter +lpf_w = lpf .* hamming; + +% Apply FIR filter +FIRfilterResponse = filter(lpf_w, 1, noisy_sig); + +% Display results +disp(FIRfilterResponse(2)); \ No newline at end of file diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/noiseCancellation.m b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/noiseCancellation.m new file mode 100644 index 000000000000..be40a7369d23 --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/noiseCancellation.m @@ -0,0 +1,32 @@ +% Constants +INPUT_LENGTH = 100000000; + +% Main script +t = linspace(0, INPUT_LENGTH * 0.000125, INPUT_LENGTH); + +f_sig = 500; +clean_sig = sin(2 * pi * f_sig * t); + +f_noise = 3000; +noise = 0.5 * sin(2 * pi * f_noise * t); + +noisy_sig = clean_sig + noise; + +% LMS filter response +mu = 0.01; +filterSize = 32; + +% Preallocate arrays +w = zeros(1, filterSize); +y = zeros(1, INPUT_LENGTH); + +% Implement LMS filter +for n = filterSize:INPUT_LENGTH + x = noisy_sig(n:-1:n-filterSize+1); + y(n) = w * x'; + e = clean_sig(n) - y(n); + w = w + mu * e * x; +end + +sol = 10 * y; +fprintf('%f\n', sol); diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/periodogram.m b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/periodogram.m new file mode 100644 index 000000000000..8484900de377 --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/periodogram.m @@ -0,0 +1,20 @@ +% Define INPUT_LENGTH globally +INPUT_LENGTH = 10; + +% Generate input range +input = 0:1:(INPUT_LENGTH-1); + +% Reverse input +reverse_input = flip(input); + +% FIR Filter Response (Convolution) +conv1d = conv(input, reverse_input, 'same'); + +% Compute DFT using FFT +fft_result = fft(conv1d); + +% Compute square magnitude +sq = abs(fft_result).^2; + +% Display results +fprintf('%f\n', sq); diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/radarSignalProcessing.m b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/radarSignalProcessing.m new file mode 100644 index 000000000000..3874976f3833 --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/radarSignalProcessing.m @@ -0,0 +1,79 @@ +% Constants +PI = pi; +INPUT_LENGTH = 10000; + +% Function prototypes +input = linspace(0, (INPUT_LENGTH - 1) * 0.000125, INPUT_LENGTH); % Using linspace instead of getrangeofvector +weights = linspace(-90, 180, 4); % Example antenna weights +antennas = 4; +input_fc = 5; +N = 101; +fc1 = 1000; +fc2 = 7500; +Fs = 8000; + +% Generate beamformed signal +signal = beamForm(antennas, input_fc, input, weights); + +% Compute absolute values and power profile +b1 = abs(signal); +power = b1 .^ 2; % element-wise square instead of power_profile + +% Low-pass and high-pass FIR filters with Hamming window +wc1 = 2 * PI * fc1 / Fs; +filter1 = lowPassFIRFilter(wc1, N); +filter_hamming_1 = filter1 .* hamming(N, 'symmetric')'; % Using 'symmetric' Hamming window + +wc2 = 2 * PI * fc2 / Fs; +filter2 = highPassFIRFilter(wc2, N); +filter_hamming_2 = filter2 .* hamming(N, 'symmetric')'; % Using 'symmetric' Hamming window + +% Band-pass filter by subtracting the filters +bpf = filter_hamming_2 - filter_hamming_1; + +% Apply FIR filter to the power profile (use full convolution) +firFilterResponse = conv(power, bpf, 'full'); % Use 'full' to match C code + +% Output final value at the 10000th index (adjust if necessary) +final = firFilterResponse(2); % Adjust to match desired index in C code +fprintf('final: %f\n', final); + +% Functions + +function output = beamForm(antennas, frequency, time, weights) + phase_var = 2 * pi * frequency; + signal = zeros(antennas, length(time)); + + for i = 1:antennas + iter_args = (i - 1) * pi / 4.0; + signal(i, :) = sin(time * phase_var + iter_args); + end + + output = sum(signal .* weights', 1); % Beamforming by weighted summation +end + +function output = lowPassFIRFilter(wc, N) + midIndex = (N - 1) / 2; + output = zeros(1, N); + + for i = 1:N + if i == midIndex + 1 + output(i) = wc / pi; + else + output(i) = sin(wc * (i - midIndex - 1)) / (pi * (i - midIndex - 1)); + end + end +end + +function output = highPassFIRFilter(wc, N) + midIndex = (N - 1) / 2; + output = zeros(1, N); + + for i = 1:N + if i == midIndex + 1 + output(i) = 1 - wc / pi; + else + output(i) = -sin(wc * (i - midIndex - 1)) / (pi * (i - midIndex - 1)); + end + end +end diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/signalSmoothing.m b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/signalSmoothing.m new file mode 100644 index 000000000000..0753b6e78ef9 --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/signalSmoothing.m @@ -0,0 +1,30 @@ +% Define constants +INPUT_LENGTH = 1000; +SAMPLE_RATE = 8000; +step = 0.000125; +WINDOW_SIZE = 3; + +% Generate input range +input = (0:step:(INPUT_LENGTH-1)*step)'; + +% Signal parameters +f_sig = 500; +f_noise = 3000; + +% Generate clean signal +clean_sig = sin(2*pi*f_sig*input); + +% Generate noise +noise = 0.5 * sin(2*pi*f_noise*input); + +% Create noisy signal +noisy_sig = clean_sig + noise; + +% Apply median filter +median_filtered = medfilt1(noisy_sig, WINDOW_SIZE); + +% Apply moving average filter +avg_filtered = movmean(median_filtered, WINDOW_SIZE); + +% Print the 4th element of the final result +disp(avg_filtered(4)); diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/spaceCommunication.m b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/spaceCommunication.m new file mode 100644 index 000000000000..6dbc04445c1e --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/spaceCommunication.m @@ -0,0 +1,95 @@ + + +function main() + % Define constants + INPUT_LENGTH = 100000000; + + % Generate input vector + input = getRangeOfVector(0, INPUT_LENGTH, 1); + + % Threshold + binary_sig = thresholdUp(input, INPUT_LENGTH, 50); + + % Modulate + modulated_signal = space_modulate(binary_sig, INPUT_LENGTH); + + % Transmit and receive (add noise) + received_signal = transmit_and_receive(modulated_signal, INPUT_LENGTH, 1.0); + + % Demodulate + demodulated_data = demodulate(received_signal, INPUT_LENGTH); + + % Error correction + corrected_data = error_correction(demodulated_data); + + % Decode data + decoded_data = decode_data(corrected_data); + + % Display first corrected byte (equivalent to printing corrected_data[8] in C) + fprintf('%c\n', corrected_data(9)); +end + +% Function to generate a vector with a given range and increment +function vector = getRangeOfVector(start, length, increment) + vector = start:increment:(start + (length - 1) * increment); +end + +% Thresholding function (creates a binary string from a vector) +function output = thresholdUp(input, length, threshold) + output = char(zeros(1, length)); % Preallocate output + output(input > threshold) = '1'; + output(input <= threshold) = '0'; +end + +% Space modulation: convert binary string to modulated signal +function output = space_modulate(input, length) + output = zeros(1, length); + output(input == '1') = 1; + output(input == '0') = -1; +end + +% Transmit and receive (add noise based on sine of the signal) +function received_signal = transmit_and_receive(signal, length, noise_level) + received_signal = signal + sin(signal); % Add noise (sine-based in this case) +end + +% Demodulate: convert received signal back into binary data +function demodulated_data = demodulate(signal, length) + demodulated_data = char(zeros(1, length)); + demodulated_data(signal > 0) = '1'; + demodulated_data(signal <= 0) = '0'; +end + +% Error correction function +function corrected = error_correction(data) + length = numel(data); + corrected = char(zeros(1, length)); % Preallocate corrected array + corrected_index = 1; + + for i = 1:8:length + segment = data(i:i+7); + count = sum(segment == '1'); + + if mod(count, 2) == 0 + corrected(corrected_index:corrected_index+7) = segment; + else + corrected(corrected_index) = '0'; + corrected(corrected_index+1:corrected_index+7) = segment(2:8); + end + + corrected_index = corrected_index + 8; + end +end + +% Decode binary data to ASCII characters +function decoded = decode_data(binary) + length = numel(binary); + decoded = char(zeros(1, length / 8)); % Preallocate decoded data array + decoded_index = 1; + + for i = 1:8:length + byte = binary(i:i+7); + decoded(decoded_index) = char(bin2dec(byte)); + decoded_index = decoded_index + 1; + end +end diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/speakerIdentification.m b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/speakerIdentification.m new file mode 100644 index 000000000000..bf7e9a18991b --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/speakerIdentification.m @@ -0,0 +1,46 @@ +% Function to generate voice signature (sinusoidal wave with two frequencies) +function signal = generateVoiceSignature(freq1, freq2, duration, sample_rate) + t = linspace(0, duration, sample_rate * duration); + signal = sin(2 * pi * freq1 * t) + cos(2 * pi * freq2 * t); +end + +% Function to compute the dot product (correlation) between two signals +function result = correlate(signal1, signal2) + result = sum(signal1 .* signal2); +end + +% Main function +function main() + % Sample rate and duration + sample_rate = 1000; + duration = 1; + + % Generate voice signatures for Alice, Bob, Charlie + person1 = generateVoiceSignature(100, 200, duration, sample_rate); % Alice + person2 = generateVoiceSignature(150, 250, duration, sample_rate); % Bob + person3 = generateVoiceSignature(120, 180, duration, sample_rate); % Charlie + + % Generate an unknown signal (Bob's signature in this case) + unknown_signal = generateVoiceSignature(150, 250, duration, sample_rate); % Change this to test + + % Correlate unknown signal with each person's signature + max1 = correlate(person1, unknown_signal); + max2 = correlate(person2, unknown_signal); + max3 = correlate(person3, unknown_signal); + + % Store correlation results + total_maxes = [max1, max2, max3]; + + % Find the index of the maximum correlation result + [max_value, max_index] = max(total_maxes); + + % Output results + fprintf('Max Index: %d\n', max_index); + fprintf('Max Value: %f\n', max_value); + fprintf('Correlation with Alice: %f\n', max1); + fprintf('Correlation with Bob: %f\n', max2); + fprintf('Correlation with Charlie: %f\n', max3); +end + +% Call the main function +main(); diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/spectralAnalysis.m b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/spectralAnalysis.m new file mode 100644 index 000000000000..787269371ac3 --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/spectralAnalysis.m @@ -0,0 +1,22 @@ +% Constants +INPUT_LENGTH = 100000000; + +% getRange function +input = getRange(0, INPUT_LENGTH, 1); + +% DFT function (using built-in FFT) +fft_result = fft(input); + +% Square of absolute values +sq_abs = abs(fft_result).^2; + +% Sum and average +res = mean(sq_abs); + +% Display result +fprintf('%f\n', res); + +% getRange function +function output = getRange(start, noOfSamples, increment) + output = start + (0:noOfSamples-1) * increment; +end \ No newline at end of file diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/targetDetection.m b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/targetDetection.m new file mode 100644 index 000000000000..fda717133060 --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/targetDetection.m @@ -0,0 +1,51 @@ +% Constants +INPUT_LENGTH = 100000000; +MAX_PEAKS = 100; + +% Generate input range +input = (0:0.000125:(INPUT_LENGTH-1)*0.000125)'; + +% Generate signals +getMultiplier = 2 * pi * 10; +getSinDuration = input * getMultiplier; +sig1 = sin(getSinDuration); + +getMultiplier2 = 2 * pi * 20; +getSinDuration2 = input * getMultiplier2; +sinsig2 = sin(getSinDuration2); +sig2 = 0.5 * sinsig2; + +% Combine signals +signal = sig1 + sig2; + +% Add delayed noise +noise = [zeros(5, 1); signal(1:end-5)]; +noisy_sig = signal + noise; + +% LMS Filter +mu = 0.01; +filterSize = 20; +y = lmsFilterResponse(noisy_sig, signal, mu, filterSize); + +% Find peaks +[peaks, ~] = findpeaks(signal, 'MinPeakHeight', 1, 'MinPeakDistance', 50); + +% Display results +fprintf('%d %d\n', peaks(2), peaks(3)); + + +% LMS Filter Response Function +function output = lmsFilterResponse(noisy_sig, clean_sig, mu, filterSize) + length = numel(noisy_sig); + w = zeros(filterSize, 1); + output = zeros(length, 1); + + for n = 1:length + x = noisy_sig(max(1, n-filterSize+1):n); + x = [zeros(filterSize - numel(x), 1); x]; + y = w' * x; + e = clean_sig(n) - y; + w = w + mu * e * x; + output(n) = e; + end +end \ No newline at end of file diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/underWaterCommunication.m b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/underWaterCommunication.m new file mode 100644 index 000000000000..bd4e3f04fe17 --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/underWaterCommunication.m @@ -0,0 +1,79 @@ +% Constants +INPUT_LENGTH = 100000000; +FILTER_ORDER = 5; + +% Sampling frequency +fs = 1000; + +% Generate input vector +input = getRangeOfVector(0, INPUT_LENGTH, 1); + +% Gain calculation +getMultiplier = 2 * pi * 5; +getSinDuration = gain(input, getMultiplier); + +% Sine wave generation +signal = sine(getSinDuration); + +% Adding delay (noise) +noise = delay(signal, 5); + +% Adding signal and noise +noisy_sig = add(signal, noise); + +% Low-pass filter parameters +fc = 1000; +wc = 2 * pi * fc / 500; % wc should vary from 0 to pi + +% Low-pass FIR filter design +lpf = lowPassFIRFilter(wc, FILTER_ORDER); +hamming_window = hamming(FILTER_ORDER); + +% Apply Hamming window to the filter +lpf_w = lpf .* hamming_window; + +% FIR filter response +FIRfilterResponse = FIRFilterResponse(noisy_sig, lpf_w); + +% Thresholding operation +threshold = 0.5; +GetThresholdReal = thresholdUp(FIRfilterResponse, threshold, 0); + +% Display the result +disp(GetThresholdReal(3)); + +% Function implementations + +function vector = getRangeOfVector(start, length, increment) + vector = (start : increment : start + (length-1)*increment)'; +end + +function output = gain(input, multiplier) + output = input * multiplier; +end + +function output = sine(input) + output = sin(input); +end + +function output = delay(input, delaySamples) + output = [zeros(delaySamples, 1); input(1:end-delaySamples)]; +end + +function output = add(input1, input2) + output = input1 + input2; +end + +function filter = lowPassFIRFilter(wc, length) + n = (-(length-1)/2:(length-1)/2)'; + filter = wc/pi * sinc(wc/pi * n); +end + +function output = FIRFilterResponse(input, filter) + output = conv(input, filter, 'same'); +end + +function output = thresholdUp(input, threshold, defaultValue) + output = max(input, threshold); + output(output == threshold) = defaultValue; +end \ No newline at end of file diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/vibrationAnalysis.m b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/vibrationAnalysis.m new file mode 100644 index 000000000000..934c4e9501a2 --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/vibrationAnalysis.m @@ -0,0 +1,39 @@ +% Constants +INPUT_LENGTH = 10000000; +fs = 1000; + +% Generate input signal +input = 0:(INPUT_LENGTH-1); + +% Generate first sinusoidal signal +getMultiplier = 2 * pi * 50; +getSinDuration = input * getMultiplier; +sig1 = sin(getSinDuration); + +% Generate second sinusoidal signal +getMultiplier2 = 2 * pi * 120; +getSinDuration2 = input * getMultiplier2; +sig2 = 0.5 * sin(getSinDuration2); + +% Combine signals +signal = sig1 + sig2; + +% Add delayed noise +noise = [zeros(1, 5), signal(1:end-5)]; +noisy_sig = signal + noise; + +% Perform DFT +dft_output = fft(noisy_sig); + +% Calculate squared magnitude +sq_abs = abs(dft_output).^2; + +% Calculate mean +res = mean(sq_abs); + +% Apply threshold +threshold_value = 0.2; +GetThresholdReal = sq_abs .* (sq_abs >= threshold_value); + +% Display results +disp(GetThresholdReal); diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/voiceActivityDetection.m b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/voiceActivityDetection.m new file mode 100644 index 000000000000..a3cc47b620c8 --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/voiceActivityDetection.m @@ -0,0 +1,57 @@ +% Constants +INPUT_LENGTH = 100000000; + +% Main script +fs = 1000; +input = getRangeOfVector(0, INPUT_LENGTH, 1); + +getMultiplier = 2 * pi * 5; +getSinDuration = gain(input, getMultiplier); + +signal = sine(getSinDuration); + +noise = delay(signal, 5); + +noisy_sig = add(signal, noise); + +threshold_value = 0.8; +GetThresholdReal = threshold(noisy_sig, threshold_value); + +zcr = zeroCrossCount(GetThresholdReal); + +% Display results +disp(GetThresholdReal(4)); + +% Print zero-crossing count +fprintf('Zero-crossing count: %d\n', zcr); + +% Function implementations +function vector = getRangeOfVector(start, length, increment) + vector = (start : increment : start + (length-1)*increment)'; +end + +function output = gain(input, multiplier) + output = input * multiplier; +end + +function output = sine(input) + output = sin(input); +end + +function output = delay(input, delaySamples) + output = [zeros(delaySamples, 1); input(1:end-delaySamples)]; +end + +function output = add(input1, input2) + output = input1 + input2; +end + +function output = threshold(input, thresholdValue) + output = input; + output(abs(input) < thresholdValue) = 0; +end + +function count = zeroCrossCount(input) + signs = sign(input); + count = sum(abs(diff(signs)) == 2); +end \ No newline at end of file diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/DTMFToneDetection.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/DTMFToneDetection.py new file mode 100644 index 000000000000..c271fe777e77 --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/DTMFToneDetection.py @@ -0,0 +1,43 @@ +def main() { + # GENERATE SIGNAL FOR '5' + var fs = 8000; + var duration = 0.5; + var f1 = 770; + var f2 = 1336; +# # var step = 1/fs; +# # print(step); +# # total instances = fs * duration +# var total_instances = fs * duration; +# var t = getRangeOfVector(0,4000,0.000125); +# var pi = 3.14159265359; +# var getMultiplier = 2 * pi * f1; +# var getSinDuration = gain(t, getMultiplier); +# var sig1 = sin(getSinDuration); + + +# var getMultiplier2 = 2 * pi * f2; +# var getSinDuration2 = gain(t, getMultiplier2); +# var sig2 = sin(getSinDuration2); +# var signal = sig1 + sig2; +# var finalsig = gain(signal, 0.5); + + + +# var noise = delay(signal, 5); +# var noisy_sig = signal + noise; +# var threshold = 4; + +# var fft_real = fft1dreal(noisy_sig); +# var fft_img = fft1dimg(noisy_sig); + +# var magnitude = square(fft_real) + square(fft_img); +# print(magnitude); +# # res = gain(sum , 1/N) +# var len1 = len(t); +# # var res = sum1 / len1; +# # print(sq_abs); +# var GetThresholdReal = threshold( magnitude , threshold); +var dtmf_sig = generateDtmf(5,duration,fs); +print(dtmf_sig); + +} \ No newline at end of file diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/ExtractOpName.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/ExtractOpName.py similarity index 90% rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/ExtractOpName.py rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/ExtractOpName.py index a7962eb4c8c0..6fe49536d05d 100644 --- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/ExtractOpName.py +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/ExtractOpName.py @@ -2,9 +2,9 @@ import os fileNamePath = "mlir/examples/dsp/SimpleBlocks/include/toy/Ops.td" -BasePathForLLVM = "/mnt/sharedDrive/SourceCode/llvm-project/" +# BasePathForLLVM = "/mnt/sharedDrive/SourceCode/llvm-project/" # OutputScriptPath = "mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/" - +BasePathForLLVM = "/home/local/ASURITE/apkhedka/ForLLVM/" fileName = BasePathForLLVM + fileNamePath print(fileName) # Create 'Output' folder if it doesn't exist diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/HammingWindow.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/HammingWindow.py similarity index 100% rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/HammingWindow.py rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/HammingWindow.py diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/LMSNoiseFilter.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/LMSNoiseFilter.py similarity index 100% rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/LMSNoiseFilter.py rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/LMSNoiseFilter.py diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/FIRFilterHammingOpt.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/FIRFilterHammingOpt.py similarity index 100% rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/FIRFilterHammingOpt.py rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/FIRFilterHammingOpt.py diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/ScriptForMlirAffine.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/ScriptForMlirAffine.py similarity index 100% rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/ScriptForMlirAffine.py rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/ScriptForMlirAffine.py diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/ScriptForSingleRun.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/ScriptForSingleRun.py similarity index 100% rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/ScriptForSingleRun.py rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/ScriptForSingleRun.py diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/PyDSL/audioCompression.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/audioCompression.py similarity index 100% rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/PyDSL/audioCompression.py rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/audioCompression.py diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/back2backDelay.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/back2backDelay.py similarity index 100% rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/back2backDelay.py rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/back2backDelay.py diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/PyDSL/energyOfSignal.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/energyOfSignal.py similarity index 99% rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/PyDSL/energyOfSignal.py rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/energyOfSignal.py index 974d81365d2c..673f4dc80941 100644 --- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/PyDSL/energyOfSignal.py +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/energyOfSignal.py @@ -33,8 +33,5 @@ def main() { print(res); # var final1 = getElemAtIndx(fft_real , [6]); # print(final1); - - - } diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/firFilter10.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/firFilter10.py similarity index 100% rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/firFilter10.py rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/firFilter10.py diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/PyDSL/lowPassFIRFilterDesign1.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/lowPassFIRFilterDesign1.py similarity index 100% rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/PyDSL/lowPassFIRFilterDesign1.py rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/lowPassFIRFilterDesign1.py diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/PyDSL/lowPassFull1.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/lowPassFull1.py similarity index 100% rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/PyDSL/lowPassFull1.py rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/lowPassFull1.py diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/PyDSL/noisecancelling.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/noisecancelling.py similarity index 100% rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/PyDSL/noisecancelling.py rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/noisecancelling.py diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/periodogram2Conv.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/periodogram2Conv.py similarity index 100% rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/periodogram2Conv.py rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/periodogram2Conv.py diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/Quantization.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/Quantization.py similarity index 100% rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/Quantization.py rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/Quantization.py diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/TryHearingAid copy.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/TryHearingAid copy.py similarity index 100% rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/TryHearingAid copy.py rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/TryHearingAid copy.py diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/TryHearingAid.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/TryHearingAid.py similarity index 100% rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/TryHearingAid.py rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/TryHearingAid.py diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/audioEqualizer.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/audioEqualizer.py similarity index 100% rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/audioEqualizer.py rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/audioEqualizer.py diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/bandPassfilter.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/bandPassfilter.py similarity index 100% rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/bandPassfilter.py rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/bandPassfilter.py diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/filterDesign.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/filterDesign.py similarity index 100% rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/filterDesign.py rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/filterDesign.py diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/hearingAid.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/hearingAid.py similarity index 100% rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/hearingAid.py rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/hearingAid.py diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/highPassfilter.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/highPassfilter.py similarity index 100% rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/highPassfilter.py rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/highPassfilter.py diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/lmsNoiseCancelling.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/lmsNoiseCancelling.py similarity index 100% rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/lmsNoiseCancelling.py rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/lmsNoiseCancelling.py diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/lowPassFilterApp.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/lowPassFilterApp.py similarity index 100% rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/lowPassFilterApp.py rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/lowPassFilterApp.py diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/periodogramHelp.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/periodogramHelp.py similarity index 100% rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/periodogramHelp.py rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/periodogramHelp.py diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/periodogramHelp2.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/periodogramHelp2.py similarity index 100% rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/periodogramHelp2.py rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/periodogramHelp2.py diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/ScriptSteps.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/HelperScripts/ScriptSteps.py similarity index 100% rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/ScriptSteps.py rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/HelperScripts/ScriptSteps.py diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/generate_dense_inputs.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/HelperScripts/generate_dense_inputs.py similarity index 100% rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/generate_dense_inputs.py rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/HelperScripts/generate_dense_inputs.py diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/matlab_result.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/HelperScripts/matlab_result.py similarity index 100% rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/matlab_result.py rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/HelperScripts/matlab_result.py diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/working_slidingwind.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/HelperScripts/working_slidingwind.py similarity index 100% rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/working_slidingwind.py rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/HelperScripts/working_slidingwind.py diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/CCode/audioCompression.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/CCode/audioCompression.c deleted file mode 100644 index 24bcd1f4030b..000000000000 --- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/CCode/audioCompression.c +++ /dev/null @@ -1,107 +0,0 @@ -#include -#include -#include - -void getRangeOfVector(double* input, int start, int NoOfElements, double Increment) { - for (int i = 0; i < NoOfElements; i++) { - input[i] = start + i * Increment; - } -} - -void dftReal(double* real, double* input, int length) { - for (int k = 0; k < length; k++) { - real[k] = 0; - for (int n = 0; n < length; n++) { - double angle = 2 * M_PI * k * n / length; - real[k] += input[n] * cos(angle); - } - } -} - -void dftImag(double* imag, double* input, int length) { - for (int k = 0; k < length; k++) { - imag[k] = 0; - for (int n = 0; n < length; n++) { - double angle = 2 * M_PI * k * n / length; - imag[k] -= input[n] * sin(angle); - } - } -} - -void threshold(double* output, double* input, double thresh, int length) { - for (int i = 0; i < length; i++) { - if (input[i] >= thresh || input[i] <= -thresh) { - output[i] = input[i]; - } else { - output[i] = 0; - } - } -} - -void quantization(double* output, double* input, int nlevels, double max, double min, int length) { - double step = (max - min) / nlevels; - for (int i = 0; i < length; i++) { - output[i] = round((input[i] - min) / step) * step + min; - } -} - -int* runLenEncoding(double* input, int length, int* rleLength) { - int* rle = (int*)malloc(length * sizeof(int)); - int index = 0; - for (int i = 1; i < length; i++) { - if (input[i] != input[i - 1]) { - rle[index++] = input[i - 1]; - rle[index++] = 1; - } else { - rle[index - 1]++; - } - } - *rleLength = index; - return rle; -} - -double getElemAtIndx(int* rle, int indx) { - return rle[indx]; -} - -int main() { - int input_length = 50000; - double input[50000]; - getRangeOfVector(input, 0, input_length, 1); - - int nlevels = 16; - double min = 0; - double max = 8; - - double threshold_val = 4; - - double fft10real[50000]; - double fft10img[50000]; - - dftReal(fft10real, fft10img, input, input_length); - dftImag(fft10real, fft10img, input, input_length); - - double GetThresholdReal[50000]; - double GetThresholdImg[50000]; - threshold(GetThresholdReal, fft10real, threshold_val, input_length); - threshold(GetThresholdImg, fft10img, threshold_val, input_length); - - double QuantOutReal[50000]; - double QuantOutImg[50000]; - quantization(QuantOutReal, GetThresholdReal, nlevels, max, min, input_length); - quantization(QuantOutImg, GetThresholdImg, nlevels, max, min, input_length); - - int rleLengthReal, rleLengthImg; - int* rLEOutReal = runLenEncoding(QuantOutReal, input_length, &rleLengthReal); - int* rLEOutImg = runLenEncoding(QuantOutImg, input_length, &rleLengthImg); - - double final1 = getElemAtIndx(rLEOutReal, 6); - double final2 = getElemAtIndx(rLEOutImg, 7); - printf("%f\n", final1); - printf("%f\n", final2); - - free(rLEOutReal); - free(rLEOutImg); - - return 0; -} diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/CCode/energyOfSignal.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/CCode/energyOfSignal.c deleted file mode 100644 index f95be8171a11..000000000000 --- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/CCode/energyOfSignal.c +++ /dev/null @@ -1,75 +0,0 @@ -#include -#include - -void getRangeOfVector(double* input, int start, int NoOfElements, double Increment) { - for (int i = 0; i < NoOfElements; i++) { - input[i] = start + i * Increment; - } -} - -void dftReal(double* real, double* input, int length) { - for (int k = 0; k < length; k++) { - real[k] = 0; - for (int n = 0; n < length; n++) { - double angle = 2 * M_PI * k * n / length; - real[k] += input[n] * cos(angle); - } - } -} - -void dftImag(double* imag, double* input, int length) { - for (int k = 0; k < length; k++) { - imag[k] = 0; - for (int n = 0; n < length; n++) { - double angle = 2 * M_PI * k * n / length; - imag[k] -= input[n] * sin(angle); - } - } -} - -void square(double* output, double* input, int length) { - for (int i = 0; i < length; i++) { - output[i] = input[i] * input[i]; - } -} - -double sum(double* input, int length) { - double total = 0; - for (int i = 0; i < length; i++) { - total += input[i]; - } - return total; -} - -int len(double* input) { - return sizeof(input) / sizeof(input[0]); -} - -int main() { - int input_length = 10; - double input[10]; - getRangeOfVector(input, 0, input_length, 1); - - double fft_real[10]; - double fft_img[10]; - dftReal(fft_real, input, input_length); - dftImag(fft_img, input, input_length); - - double sq_real[10]; - double sq_img[10]; - square(sq_real, fft_real, input_length); - square(sq_img, fft_img, input_length); - - double sq_abs[10]; - for (int i = 0; i < input_length; i++) { - sq_abs[i] = sq_real[i] + sq_img[i]; - } - - double sum1 = sum(sq_abs, input_length); - int len1 = input_length; - double res = sum1 / len1; - - printf("%f\n", res); - - return 0; -} diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/CCode/lowPassFIRFilterDesign.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/CCode/lowPassFIRFilterDesign.c deleted file mode 100644 index 6b8d6b49b9d4..000000000000 --- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/CCode/lowPassFIRFilterDesign.c +++ /dev/null @@ -1,53 +0,0 @@ -#include -#include -#include - -void hamming(double* window, int N) { - for (int i = 0; i < N; i++) { - window[i] = 0.54 - 0.46 * cos(2 * M_PI * i / (N - 1)); - } -} - -void lowPassFIRFilter(double* filter, double wc, int N) { - int mid = (N - 1) / 2; - for (int n = 0; n < N; n++) { - if (n == mid) { - filter[n] = wc / M_PI; - } else { - filter[n] = sin(wc * (n - mid)) / (M_PI * (n - mid)); - } - } -} - -void elementWiseMultiplication(double* output, double* array1, double* array2, int N) { - for (int i = 0; i < N; i++) { - output[i] = array1[i] * array2[i]; - } -} - -double getElemAtIndx(double* array, int index) { - return array[index]; -} - -int main() { - int N = 51; - double pi = 3.14159265359; - double fc1 = 500; - double Fs = 8000; - double wc1 = 2 * pi * fc1 / Fs; - - double lpf[51]; - lowPassFIRFilter(lpf, wc1, N); - - double hamming_window[51]; - hamming(hamming_window, N); - - double lpf_w[51]; - elementWiseMultiplication(lpf_w, lpf, hamming_window, N); - - double final1 = getElemAtIndx(lpf_w, 6); - - printf("%f\n", final1); - - return 0; -} diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/CCode/lowPassFull1.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/CCode/lowPassFull1.c deleted file mode 100644 index f00a7f8c1bcf..000000000000 --- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/CCode/lowPassFull1.c +++ /dev/null @@ -1,110 +0,0 @@ -#include -#include - -#define PI 3.14159265359 - -void getRangeOfVector(double* input, int start, int NoOfElements, double Increment) { - for (int i = 0; i < NoOfElements; i++) { - input[i] = start + i * Increment; - } -} - -void gain(double* output, double* input, double multiplier, int length) { - for (int i = 0; i < length; i++) { - output[i] = input[i] * multiplier; - } -} - -void elementWiseAdd(double* output, double* input1, double* input2, int length) { - for (int i = 0; i < length; i++) { - output[i] = input1[i] + input2[i]; - } -} - -void elementWiseMultiply(double* output, double* input1, double* input2, int length) { - for (int i = 0; i < length; i++) { - output[i] = input1[i] * input2[i]; - } -} - -void lowPassFIRFilter(double* lpf, double wc, int N) { - int mid = (N - 1) / 2; - for (int n = 0; n < N; n++) { - if (n == mid) { - lpf[n] = wc / PI; - } else { - lpf[n] = (wc / PI) * sin(wc * (n - mid)) / (wc * (n - mid)); - } - } -} - -void hammingWindow(double* hamming, int N) { - for (int n = 0; n < N; n++) { - hamming[n] = 0.54 - 0.46 * cos(2 * PI * n / (N - 1)); - } -} - -void FIRFilterResponse(double* output, double* input, double* filter, int input_length, int filter_length) { - int i, j; - for (i = 0; i < input_length; i++) { - output[i] = 0; - for (j = 0; j < filter_length; j++) { - if (i - j >= 0) { - output[i] += input[i - j] * filter[j]; - } - } - } -} - -int main() { - int fs = 8000; - int input_length = 30; - double input[30]; - getRangeOfVector(input, 0, input_length, 0.000125); - - double f_sig = 500; - double getMultiplier = 2 * PI * f_sig; - - double getSinDuration[30]; - gain(getSinDuration, input, getMultiplier, input_length); - - double clean_sig[30]; - for (int i = 0; i < input_length; i++) { - clean_sig[i] = sin(getSinDuration[i]); - } - - double f_noise = 3000; - double getNoiseSinDuration[30]; - gain(getNoiseSinDuration, input, 2 * PI * f_noise, input_length); - - double noise[30]; - for (int i = 0; i < input_length; i++) { - noise[i] = sin(getNoiseSinDuration[i]); - } - - double noisy_sig[30]; - double scaled_noise[30]; - gain(scaled_noise, noise, 0.5, input_length); - elementWiseAdd(noisy_sig, clean_sig, scaled_noise, input_length); - - double fc = 1000; - double wc = 2 * PI * fc / fs; - int N = 101; - double lpf[101]; - lowPassFIRFilter(lpf, wc, N); - - double hamming[101]; - hammingWindow(hamming, N); - - double lpf_w[101]; - elementWiseMultiply(lpf_w, lpf, hamming, N); - - double FIRfilterResponse[30]; - FIRFilterResponse(FIRfilterResponse, noisy_sig, lpf_w, input_length, N); - - for (int i = 0; i < input_length; i++) { - printf("%f\n", FIRfilterResponse[i]); - } - - return 0; -} diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/CCode/noisecancelling.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/CCode/noisecancelling.c deleted file mode 100644 index 235d64705c05..000000000000 --- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/CCode/noisecancelling.c +++ /dev/null @@ -1,91 +0,0 @@ -#include -#include - -void getRangeOfVector(double* vector, double start, int length, double increment) { - for (int i = 0; i < length; i++) { - vector[i] = start + i * increment; - } -} - -void gain(double* output, double* input, double multiplier, int length) { - for (int i = 0; i < length; i++) { - output[i] = input[i] * multiplier; - } -} - -void sine(double* output, double* input, int length) { - for (int i = 0; i < length; i++) { - output[i] = sin(input[i]); - } -} - -void add(double* output, double* input1, double* input2, int length) { - for (int i = 0; i < length; i++) { - output[i] = input1[i] + input2[i]; - } -} - -void lmsFilterResponse(double* output, double* noisy_sig, double* clean_sig, double mu, int filterSize, int length) { - double w[32] = {0}; - for (int n = 0; n < length; n++) { - double y = 0; - for (int i = 0; i < filterSize; i++) { - if (n - i >= 0) { - y += w[i] * noisy_sig[n - i]; - } - } - double e = clean_sig[n] - y; - for (int i = 0; i < filterSize; i++) { - if (n - i >= 0) { - w[i] += mu * e * noisy_sig[n - i]; - } - } - output[n] = y; - } -} - -int main() { - int length = 100; - double fs = 8000; - double t[100]; - getRangeOfVector(t, 0, length, 0.000125); - - double f_sig = 500; - double pi = 3.14159265359; - double getMultiplier = 2 * pi * f_sig; - - double getSinDuration[100]; - gain(getSinDuration, t, getMultiplier, length); - - double clean_sig[100]; - sine(clean_sig, getSinDuration, length); - - double f_noise = 3000; - double getNoiseMultiplier = 2 * pi * f_noise; - - double getNoiseSinDuration[100]; - gain(getNoiseSinDuration, t, getNoiseMultiplier, length); - - double noise[100]; - sine(noise, getNoiseSinDuration, length); - - double noise1[100]; - gain(noise1, noise, 0.5, length); - - double noisy_sig[100]; - add(noisy_sig, clean_sig, noise1, length); - - double mu = 0.01; - int filterSize = 32; - double y[100]; - lmsFilterResponse(y, noisy_sig, clean_sig, mu, filterSize, length); - - double sol[100]; - gain(sol, y, 10, length); - - for (int i = 0; i < length; i++) { - printf("%f\n", sol[i]); - } - - return 0; -} diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/CountLinesFile.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/CountLinesFile.py deleted file mode 100644 index 565e5d680e96..000000000000 --- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/CountLinesFile.py +++ /dev/null @@ -1,60 +0,0 @@ -import os - -# folder1 = "./CCode" # Replace with your folder path -# Get the current Python file's directory -current_dir = os.path.dirname(os.path.abspath(__file__)) - -# Specify the folder path relative to the current directory -folderC = os.path.join(current_dir, 'CCode') -folderPy = os.path.join(current_dir, 'PyDSL') - -os.makedirs('Output', exist_ok=True) - -# Specify the output file path -output_fileC = os.path.join(current_dir, 'Output', 'NoOfLinesInC.txt') -output_filePy = os.path.join(current_dir, 'Output', 'NoOfLinesInPy.txt') - -def count_non_empty_linesInC(file_path): - with open(file_path, 'r') as file: - lines = file.readlines() - non_empty_lines = [line for line in lines if line.strip()] - return len(non_empty_lines) - -def count_valid_code_linesInPyFile(file_path): - valid_code_lines = 0 - - with open(file_path, 'r') as file: - for line in file: - stripped_line = line.strip() - # Check if the line is not empty and does not start with a comment - if stripped_line and not stripped_line.startswith('#'): - valid_code_lines += 1 - - return valid_code_lines - - -def list_files_and_write_line_counts(folder, output_path): - # List files in the folder and sort them by filename - files = sorted(os.listdir(folder)) - with open(output_path, 'w') as output: - for filename in files: - file_path = os.path.join(folder, filename) - if os.path.isfile(file_path) and filename.endswith('.c'): # Check if it's a text file - line_count = count_non_empty_linesInC(file_path) - output.write(f"{filename}: \t{line_count} \n") - -def list_files_and_write_line_countsPy(folder, output_path): - # List files in the folder and sort them by filename - files = sorted(os.listdir(folder)) - with open(output_path, 'w') as output: - for filename in files: - file_path = os.path.join(folder, filename) - if os.path.isfile(file_path) and filename.endswith('.py'): # Check if it's a text file - line_count = count_valid_code_linesInPyFile(file_path) - output.write(f"{filename}: \t{line_count}\n") - - -# Call the function -list_files_and_write_line_counts(folderC, output_fileC) -list_files_and_write_line_countsPy(folderPy, output_filePy) - diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/Output/NoOfLinesInC.txt b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/Output/NoOfLinesInC.txt deleted file mode 100644 index c033b33ae0b5..000000000000 --- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/Output/NoOfLinesInC.txt +++ /dev/null @@ -1,6 +0,0 @@ -audioCompression.c: 89 -energyOfSignal.c: 62 -lowPassFIRFilterDesign.c: 42 -lowPassFull1.c: 89 -noisecancelling.c: 73 -periodogram2Conv.c: 64 diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/Output/NoOfLinesInPy.txt b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/Output/NoOfLinesInPy.txt deleted file mode 100644 index 2e215af6f463..000000000000 --- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/Output/NoOfLinesInPy.txt +++ /dev/null @@ -1,6 +0,0 @@ -audioCompression.py: 19 -energyOfSignal.py: 10 -lowPassFIRFilterDesign1.py: 12 -lowPassFull1.py: 20 -noisecancelling.py: 19 -periodogram2Conv.py: 11 diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/Output/OpsNameDump.txt b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/Output/OpsNameDump.txt deleted file mode 100644 index 7cbda332fad6..000000000000 --- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/Output/OpsNameDump.txt +++ /dev/null @@ -1 +0,0 @@ -constant, add, cast, func, generic_call, mul, div, print, reshape, return, transpose, delay, gain, sub, zeroCrossCount, FIRFilterResponse, slidingWindowAvg, downsampling, upsampling, lowPassFilter, highPassFilter, fft1d, ifft1d, hamming, dct, filter, sum, sin, cos, square, fft1dreal, fft1dimg, sinc, getElemAtIndx, setElemAtIndx, lowPassFIRFilter, lmsFilter, highPassFIRFilter, getRangeOfVector, FIRFilterHammingOptimized, highPassFIRHammingOptimizedOp, threshold, quantization, lmsFilterResponse, runLenEncoding, FIRFilterResSymmOptimized, len, reverseInput, padding, FIRFilterYSymmOptimized, fft1DRealSymm, fft1DimgConjSymm, \ No newline at end of file diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/PyDSL/periodogram2Conv.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/PyDSL/periodogram2Conv.py deleted file mode 100644 index f19baf5fa17d..000000000000 --- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/PyDSL/periodogram2Conv.py +++ /dev/null @@ -1,51 +0,0 @@ - -def main() { - - #Steps: - #calculate x[l] , x[-l] - #calculate conv1d of x[l] , x[-l] ie, conv1 = conv(x[l] , x[-l]) - #calculate fft : res = fft(conv1) - #then periodogram = |abs(fft)|^2 = real^2 + img^2 - - #Another way: - #pad x[l] & x[-l] with zeroes - #calculate fft of x[l] & x[-l] ie, fft_x , fft_reverse_x - #multiply them to get final real ans : fft_x * fft_reverse_x - - #size 10 - # var a10 = [ 10,20,30,40,50,60,70,80,90,100]; - var input = getRangeOfVector(0, 10, 1); - # var input = [1,2,3,4]; - # print(a10); - - #Get x[-l] ie, reverseInput & - var reverse_input = reverseInput(input); - var conv1d = FIRFilterResponse(input, reverse_input); - # var fft_real = fft1DRealSymm(conv1d); #fft1DRealSymm - var fft_real = fft1dreal(conv1d); - var fft_img = fft1dimg(conv1d); - # var sq = fft_real * fft_real + fft_img * fft_img; - # print(sq); - var final1 = getElemAtIndx(fft_real , [6]); - var final2 = getElemAtIndx(fft_real , [7]); - print(final1); - print(final2); - # print(conv1d); - # print(fft_real); - # print(fft_img); - #Pad the input , reverse_input for the size of conv o/p - #Calculate - # var padLen = 9 ; #10 + 10 - 1 - 10 - # var input_padded = padding(input , 0, padLen ); - - - # var fft10real = fft1dreal(input); - # var fft10img = fft1dimg(input); - - # #try input * -input - # var neg_input = gain(input , -1); - # var sq = fft10real * fft10real + fft10img * fft10img; - # print(sq); - -} - diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/PythonCodeRough.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/PythonCodeRough.py deleted file mode 100644 index 1ae321552603..000000000000 --- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/PythonCodeRough.py +++ /dev/null @@ -1,13 +0,0 @@ -import numpy as np -import matplotlib.pyplot as plt -from scipy.signal import lfilter, freqz - -def TestNumpyFuncs(): - t1 = np.arange(0,10,2); - print(t1) - -TestNumpyFuncs() - -# if __name__=="main": -# if __name__=="PythonCodeRough": -# TestNumpyFuncs() \ No newline at end of file diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/tokenCount.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/tokenCount.py deleted file mode 100644 index 3382e035fcb4..000000000000 --- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/tokenCount.py +++ /dev/null @@ -1,139 +0,0 @@ -# Let's count the number of tokens in the provided answer. -answer = """ -Here is a list of 30 DSP algorithms or blocks commonly implemented in software rather than hardware, along with their input and output: - -1. Fast Fourier Transform (FFT) - - Input: Time-domain signal - - Output: Frequency-domain representation - -2. Inverse Fast Fourier Transform (IFFT) - - Input: Frequency-domain signal - - Output: Time-domain representation - -3. Finite Impulse Response (FIR) Filter - - Input: Input signal - - Output: Filtered signal - -4. Infinite Impulse Response (IIR) Filter - - Input: Input signal - - Output: Filtered signal - -5. Discrete Fourier Transform (DFT) - - Input: Time-domain signal - - Output: Frequency-domain representation - -6. Convolution - - Input: Two signals - - Output: Convolved signal - -7. Cross-Correlation - - Input: Two signals - - Output: Cross-correlation sequence - -8. Autocorrelation - - Input: Single signal - - Output: Autocorrelation sequence - -9. Spectrogram - - Input: Time-domain signal - - Output: Time-frequency representation - -10. Wavelet Transform - - Input: Time-domain signal - - Output: Time-scale representation - -11. Inverse Wavelet Transform - - Input: Time-scale representation - - Output: Time-domain signal - -12. Hilbert Transform - - Input: Real signal - - Output: Analytic signal - -13. Short-Time Fourier Transform (STFT) - - Input: Time-domain signal - - Output: Time-frequency representation - -14. Linear Predictive Coding (LPC) - - Input: Speech signal - - Output: Linear prediction coefficients - -15. Cepstral Analysis - - Input: Speech signal - - Output: Cepstral coefficients - -16. Adaptive Filtering - - Input: Input signal, desired signal - - Output: Filtered signal - -17. Phase Vocoder - - Input: Audio signal - - Output: Time-stretched or pitch-shifted audio - -18. Echo Cancellation - - Input: Input signal, echo signal - - Output: Echo-canceled signal - -19. Noise Reduction - - Input: Noisy signal - - Output: Cleaned signal - -20. Compression - - Input: Audio signal - - Output: Compressed signal - -21. Decompression - - Input: Compressed signal - - Output: Audio signal - -22. Pitch Detection - - Input: Audio signal - - Output: Pitch frequency - -23. Modulation - - Input: Baseband signal - - Output: Modulated signal - -24. Demodulation - - Input: Modulated signal - - Output: Baseband signal - -25. Channel Equalization - - Input: Received signal - - Output: Equalized signal - -26. Digital Down Conversion (DDC) - - Input: High-frequency signal - - Output: Baseband signal - -27. Digital Up Conversion (DUC) - - Input: Baseband signal - - Output: High-frequency signal - -28. Amplitude Modulation (AM) - - Input: Carrier signal, modulating signal - - Output: Amplitude modulated signal - -29. Frequency Modulation (FM) - - Input: Carrier signal, modulating signal - - Output: Frequency modulated signal - -30. Quantization - - Input: Continuous signal - - Output: Discrete signal - -These blocks and algorithms represent fundamental components of DSP that are frequently implemented in software to leverage the flexibility and processing power of general-purpose processors. -""" - -# Counting the number of tokens using the tiktoken library -import tiktoken - -# Initialize tokenizer -tokenizer = tiktoken.get_encoding("gpt-3.5-turbo") - -# Tokenize the text -tokens = tokenizer.encode(answer) - -# Get the number of tokens -num_tokens = len(tokens) -num_tokens diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/.gitignore b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/.gitignore deleted file mode 100644 index 1ee5b456d2cd..000000000000 --- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/.gitignore +++ /dev/null @@ -1 +0,0 @@ -periodogram/* \ No newline at end of file diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/.gitignore b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/.gitignore deleted file mode 100644 index 698bdfbad524..000000000000 --- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -Output/* -dsp1 -dsp1_Debug \ No newline at end of file diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/EnergyOfSignal.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/EnergyOfSignal.py deleted file mode 100644 index 2e6cd6fd2d18..000000000000 --- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/EnergyOfSignal.py +++ /dev/null @@ -1,40 +0,0 @@ - -def main() { - - #Steps: - #calculate x[l] - #calculate fft : fft1 = fft(conv1) - #then sq_abs = |abs(fft)|^2 = real^2 + img^2 - # sum = sum(sq_abs) - # res = gain(sum , 1/N) - - #Optimized res: - #sq1 = input * input - #sum1 = sum(sq1) - - - #size 10 - # var a10 = [ 10,20,30,40,50,60,70,80,90,100]; - var input = getRangeOfVector(0, 100, 1); - #calculate x[l] - #calculate fft : fft1 = fft(conv1) - var fft_real = fft1dreal(input); - var fft_img = fft1dimg(input); - - #then sq_abs = |abs(fft)|^2 = real^2 + img^2 - # var sq_abs = fft_real * fft_real + fft_img * fft_img ; - var sq_abs = square(fft_real) + square(fft_img) ; - # sum = sum(sq_abs) - var sum1 = sum(sq_abs); - # res = gain(sum , 1/N) - var len1 = len(input); - var res = sum1 / len1; - - print(res); - # var final1 = getElemAtIndx(fft_real , [6]); - # print(final1); - - - -} - diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/ScriptForCases.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/ScriptForCases.py deleted file mode 100644 index c21d58de1049..000000000000 --- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/ScriptForCases.py +++ /dev/null @@ -1,167 +0,0 @@ -import os -import subprocess -import time - -# The script does the following -# Input : filename.py -# Output : TimeOfExecution for different IP sizes : -# Steps to run: - # Open a terminal at the path of the script -- - # Run: python ScriptForCases.py #3.11 validated - -# Pseudo-code: - # Iterate for all the input-size & update the input value in file - # Update logic -- change the 2nd parameter of line: var c = getRangeOfVector(init , Count, StepSize) - # Run the respective commands on the file - -# Path to the input file -# Apps = "noisecancelling.py" , "lowPassFull.py" , " audioCompression.py" , - # "back2backDelay.py" , "lowPassFIRFilterDesign.py" , -input_file_path = "noisecancelling.py" -BasePathForLLVM = "/mnt/sharedDrive/SourceCode/llvm-project/" -OutputScriptPath = "mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/" -# OutputPath = BasePathForLLVM + "mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/Output/" - -# Construct full output path -OutputPath = os.path.join(BasePathForLLVM, OutputScriptPath, "Output") - -# Check if the Output folder exists, create it if it doesn't -if not os.path.exists(OutputPath): - os.makedirs(OutputPath) - -# Now OutputPath is ready for use -print("InputPath:{}".format(BasePathForLLVM)) -print(f"OutputPath: {OutputPath}") -# exit() - -# ************ Don't change unless u required -# Define the values dictionary -inputValues = { - # "10": 10, - # "100": 100, - # "1K": 1000, - # "10K": 10000, - # "20K": 20000, - # "30K": 30000, - # "40K": 40000, - # "50K": 50000, - # "100K": 100000, - "1M": 1000000, - "10M": 10000000, - "20M": 20000000, - "30M": 30000000, - "40M": 40000000, - "50M": 50000000, - "100M": 100000000, - # "1B": 1000000000 -} -NoOfIterations = 3 - -# -------------------------------------------------- -commands_base = [ - # "./dsp1 lowPassFull.py -emit=mlir-affine", - f"./dsp1 {input_file_path} -emit=llvm", - # f"{BasePathForLLVM}build/bin/dsp1 {input_file_path} -emit=llvm", - "clang-17 -O0 file.ll -o fileexe -lm", -] - -# Define the cases -cases = [ - {"affineOpt": False, "canonOpt": False, "suffix": "fileNoOpt.ll" , "exe" : "fileNoOptExe"}, - {"affineOpt": True, "canonOpt": False, "suffix": "fileAffineOpt.ll" , "exe" : "fileAffineOptExe"}, - {"affineOpt": True, "canonOpt": True, "suffix": "fileAffineCanonOpt.ll", "exe" : "fileAffineCanonOptExe"}, -] - -# Read the input file -with open(input_file_path, "r") as file: - lines = file.readlines() - -print("",end="\t") -for case in cases: - print(f"{case['exe']}",end="\t") - -# print("\n") -for key, value in inputValues.items(): - # Update the specific line in the file - # print("Updating for {}".format(value)) - # print("\n") - print("\n{}".format(key), end="\t") - with open(input_file_path, "w") as file: - for line in lines: - if line.strip().startswith("var input = getRangeOfVector("): - # if line.strip().startswith("var N = "): - # Replace the second parameter with the current value - updated_line = f"\tvar input = getRangeOfVector(0, {value}, 0.000125);\n" - # updated_line = f"\tvar input = getRangeOfVector(0, {value}, 1);\n" - # updated_line = f" var N = {value + 1} ;\n" - file.write(updated_line) - else: - file.write(line) - # print(lines) - - # Iterate through the cases and run the commands - for case in cases: - command_llvm = commands_base[0] - if case["affineOpt"]: - command_llvm += " -affineOpt" - if case["canonOpt"]: - command_llvm += " -canonOpt" - # command_llvm += f" 2> {case['suffix']}" #OutputPath - command_llvm += f" 2> {OutputPath}/{case['suffix']}" #OutputPath - - commands = [ - command_llvm, - # f"clang-17 -O0 {case['suffix']} -o fileexe -lm", - f"clang-17 -O0 {OutputPath}/{case['suffix']} -o {OutputPath}/{case['exe']} -lm", - ] - # print(case,end="\n") - # print("\n") - - # Iterate over each value and perform the necessary operations - for command in commands: - # Run the commands for the current case - result = subprocess.run(command, shell=True, capture_output=True, text=True) - - sum_exe_time = 0 - for i in range(0,NoOfIterations): - # for command in commands: - # # print("running command {}".format(command)) - # # os.system(command) - # result = subprocess.run(command, shell=True, capture_output=True, text=True) - - # Clear the cache to minimize caching effects - # subprocess.run("sync; echo 3 > /proc/sys/vm/drop_caches", shell=True) - try: - process = subprocess.run("sudo sh -c 'sync; echo 3 > /proc/sys/vm/drop_caches'", shell=True, check=True) - # process.wait() - except subprocess.CalledProcessError: - process.terminate() - # The command to be executed - # command2 = "./fileexe" - # Limit execution to a single core - # command2 = "taskset -c 0 ./fileexe" - # command2 = f"taskset -c 0 ./{case['exe']}" #{OutputPath} - command2 = f"taskset -c 0 ./Output/{case['exe']}" - - # Record the start time - start_time = time.time() - - # Execute the command - try: - subprocess.run(command2, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True) - # subprocess.run(command2, shell=True) - except subprocess.CalledProcessError as exc: - print(f"Process failed because did not return a successful return code. " - f"Returned {exc.returncode}\n{exc}") - - - # Record the end time - end_time = time.time() - - # Calculate the elapsed time - execution_time = end_time - start_time - sum_exe_time = sum_exe_time + execution_time - # print("{}".format(execution_time), end="\t") - avg_exe_time = sum_exe_time / NoOfIterations - print("{}".format(avg_exe_time), end="\t") - # print(f"The command took {execution_time} seconds to execute.") diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/dsp1 b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/dsp1 deleted file mode 100755 index 4c6877a9a9c0..000000000000 Binary files a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/dsp1 and /dev/null differ diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/dsp1_Debug b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/dsp1_Debug deleted file mode 100755 index 2835db055a2f..000000000000 Binary files a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/dsp1_Debug and /dev/null differ diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/find_peaks.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/find_peaks.py new file mode 100644 index 000000000000..4290babc8e72 --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/find_peaks.py @@ -0,0 +1,8 @@ +def main() { + + var signal = [0.4, 0.3, 0.6, 1.8, 0.9, 0.5, 0.2, 0.7, 1.2, 0.8, 2.0, 1.9, 1.8, 1.7, 1.8, 1.7]; + var peaks = find_peaks(signal, 0.5, 1); + + print(peaks); + +} diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/qam_demodulate.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/qam_demodulate.py new file mode 100644 index 000000000000..6de430689119 --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/qam_demodulate.py @@ -0,0 +1,10 @@ +def main() { + # var input_data = [1,1,1,0,1,1,1,0]; + # print(input_data); + # var modulated_symbols = qam_modulate(input_data); + # print(modulated_symbols); + var real_part = [1, 1, 1, 1]; + var img_part = [1, -1, 1, -1]; + var decoded_data = qam_demodulate(real_part, img_part); + print(decoded_data); +} diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/qam_modulate.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/qam_modulate.py new file mode 100644 index 000000000000..87ec2cfbab9e --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/qam_modulate.py @@ -0,0 +1,12 @@ +def main() { + var input_data = [0,1,1,0,1,1,1,0]; + # print(input_data); + var real = qam_modulate_real(input_data); + var imagine = qam_modulate_imagine(input_data); + print(real); + print(imagine); + # var real_part = [1, 1, 1, 1]; + # var img_part = [1, -1, 1, -1]; + # var decoded_data = qam_demodulate(real_part, img_part); + # print(decoded_data); +} diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/target_identification.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/target_identification.py new file mode 100644 index 000000000000..ae1d280d624b --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/target_identification.py @@ -0,0 +1,35 @@ +def main() { + + + var fs = 8000; + # var step = 1/8000; + # print(step); + var t = getRangeOfVector(0,100, 0.000125); + var f_sig = 500; + var pi = 3.14159265359; + var getMultiplier = 2 * pi * f_sig; + # print(getMultiplier); + var getSinDuration = gain(t, getMultiplier); + # print(getSinDuration); + var clean_sig = sin(getSinDuration ); + + #define a noise signal with freq = 3000 + var f_noise = 3000; + var getNoiseSinDuration = gain(t, 2 * pi * f_noise); + var noise = sin(getNoiseSinDuration); + var noise1 = gain(noise, 0.5); + + var noisy_sig = clean_sig + noise1; + # print(noisy_sig); + # print(clean_sig); + var mu = 0.01; + var filterSize = 32; + var y = lmsFilterResponse(noisy_sig, clean_sig, mu, filterSize); + print(y); + + #var signal = [0.4, 0.3, 0.6, 1.8, 0.9, 0.5, 0.2, 0.7, 1.2, 0.8, 2.0, 1.9, 1.8, 1.7, 1.8, 1.7]; + var peaks = find_peaks(y, 1.0, 20); + + print(peaks); + +} diff --git a/mlir/examples/dsp/SimpleBlocks/include/toy/AST.h b/mlir/examples/dsp/SimpleBlocks/include/toy/AST.h index c13b287bdd2f..d6b6f2c0e50e 100644 --- a/mlir/examples/dsp/SimpleBlocks/include/toy/AST.h +++ b/mlir/examples/dsp/SimpleBlocks/include/toy/AST.h @@ -43,6 +43,7 @@ class ExprAST { Expr_BinOp, Expr_Call, Expr_Print, + Expr_String, }; ExprAST(ExprASTKind kind, Location location) @@ -107,6 +108,20 @@ class VariableExprAST : public ExprAST { static bool classof(const ExprAST *c) { return c->getKind() == Expr_Var; } }; +/// Expression class for string val. +class StringExprAST : public ExprAST { + std::string string_val; + +public: + StringExprAST(Location loc, llvm::StringRef string_val) + : ExprAST(Expr_String, std::move(loc)), string_val(string_val) {} + + llvm::StringRef getStringVal() { return string_val; } + + /// LLVM style RTTI + static bool classof(const ExprAST *c) { return c->getKind() == Expr_String; } +}; + /// Expression class for defining a variable. class VarDeclExprAST : public ExprAST { std::string name; diff --git a/mlir/examples/dsp/SimpleBlocks/include/toy/Lexer.h b/mlir/examples/dsp/SimpleBlocks/include/toy/Lexer.h index d6bc5443fb30..3d5827638470 100644 --- a/mlir/examples/dsp/SimpleBlocks/include/toy/Lexer.h +++ b/mlir/examples/dsp/SimpleBlocks/include/toy/Lexer.h @@ -36,6 +36,7 @@ enum Token : int { tok_bracket_close = '}', tok_sbracket_open = '[', tok_sbracket_close = ']', + tok_comma = ',', tok_eof = -1, @@ -43,6 +44,7 @@ enum Token : int { tok_return = -2, tok_var = -3, tok_def = -4, + tok_string_val = -7, // primary tok_identifier = -5, @@ -83,6 +85,11 @@ class Lexer { return identifierStr; } + llvm::StringRef getString() { + assert(curTok == tok_string_val); + return stringVal; + } + /// Return the current number (prereq: getCurToken() == tok_number) double getValue() { assert(curTok == tok_number); @@ -145,11 +152,13 @@ class Lexer { return tok_def; if (identifierStr == "var") return tok_var; + if(identifierStr == ",") + return tok_comma; return tok_identifier; } // Number: [0-9.]+ - if (isdigit(lastChar) || lastChar == '.') { + if (lastChar == '-' || isdigit(lastChar) || lastChar == '.') { std::string numStr; do { numStr += lastChar; @@ -170,6 +179,17 @@ class Lexer { return getTok(); } + // String val: "..." + if(lastChar == '"') { + stringVal = ""; + while (isalnum((lastChar = Token(getNextChar()))) || lastChar == '_' || lastChar== ' ') { + if(lastChar == '"') break; + stringVal += (char)lastChar; + } + lastChar = Token(getNextChar()); + return tok_string_val; + } + // Check for end of file. Don't eat the EOF. if (lastChar == EOF) return tok_eof; @@ -188,6 +208,9 @@ class Lexer { /// If the current Token is an identifier, this string contains the value. std::string identifierStr; + + // If current Token is a string val + std::string stringVal; /// If the current Token is a number, this contains the value. double numVal = 0; diff --git a/mlir/examples/dsp/SimpleBlocks/include/toy/Ops.td b/mlir/examples/dsp/SimpleBlocks/include/toy/Ops.td index 6d714f9832b6..32b8fff48410 100644 --- a/mlir/examples/dsp/SimpleBlocks/include/toy/Ops.td +++ b/mlir/examples/dsp/SimpleBlocks/include/toy/Ops.td @@ -92,6 +92,26 @@ def ConstantOp : Dsp_Op<"constant", [Pure]> { let hasVerifier = 1; } +//===----------------------------------------------------------------------===// +// ModuloOp +//===----------------------------------------------------------------------===// + +def ModuloOp : Dsp_Op<"modulo", + [Pure, DeclareOpInterfaceMethods]> { + let summary = "element-wise modulo operation"; + let description = [{ + The "modulo" operation performs element-wise modulo op between two tensors. + The shapes of the tensor operands are expected to match. + }]; + + let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs); + let results = (outs F64Tensor); + + let builders = [ + OpBuilder<(ins "Value":$lhs, "Value":$rhs)> + ]; +} + //===----------------------------------------------------------------------===// // AddOp //===----------------------------------------------------------------------===// @@ -284,8 +304,61 @@ def DivOp : Dsp_Op<"div", let builders = [ OpBuilder<(ins "Value":$lhs, "Value":$rhs)> ]; + + let hasCanonicalizer = 1; +} + +//===----------------------------------------------------------------------===// +// BitwiseAndOp +//===----------------------------------------------------------------------===// + +def BitwiseAndOp : Dsp_Op<"bitwiseand", + [Pure, DeclareOpInterfaceMethods]> { + let summary = "bit-wise and operation"; + let description = [{ + The "bitwiseand" operation performs bit-wise and between two + tensors. The shapes of the tensor operands are expected to match. + }]; + + let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs); + let results = (outs F64Tensor); + + // Indicate that the operation has a custom parser and printer method. + let hasCustomAssemblyFormat = 1; + + // Allow building a BitwiseAndOp with from the two input operands. + let builders = [ + OpBuilder<(ins "Value":$lhs, "Value":$rhs)> + ]; } +//===----------------------------------------------------------------------===// +// PowerOp +//===----------------------------------------------------------------------===// + +def PowOp : Dsp_Op<"pow", + [Pure, DeclareOpInterfaceMethods]>{ + let summary = "element-wise power operation for tensor"; + let description = [{ + The "pow" operation performs element-wise power for base tensor. + The accepted operand is restrict to a scaler constant. + }]; + + let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs); + let results = (outs F64Tensor); + + // has custom parser and printer for method + // FIXME: pow op should have custom assembly format + // let hasCustomAssemblyFormat = 1; + + // Allow building a PowOp from two operands. + let builders = [ + OpBuilder<(ins "Value":$lhs, "Value":$rhs)> + ]; + let hasVerifier = 1; + } + + //===----------------------------------------------------------------------===// // PrintOp //===----------------------------------------------------------------------===// @@ -443,6 +516,7 @@ def DelayOp : Dsp_Op<"delay" , let hasCanonicalizer = 1; let hasVerifier = 1; + } @@ -516,14 +590,56 @@ def SubOp : Dsp_Op<"sub", [Pure , DeclareOpInterfaceMethods]> { + let summary = "Performs FFT Operation on the input"; + let description = [{ + This function accepts a 1D input array of size 2^n and returns the real part of its Fourier transform, + producing an output array of the same size. The function is designed to work exclusively with input sizes that are powers of 2. + Providing an array of any other size will result in a segmentation fault. + }]; + + let arguments = (ins F64Tensor:$lhs); + let results = (outs F64Tensor); + + let builders = [ + OpBuilder<(ins "Value":$lhs)> + ]; + + let hasCanonicalizer = 1; +} + +//===----------------------------------------------------------------------===// +// FFTImagOp +//===----------------------------------------------------------------------===// + +def FFTImagOp : Dsp_Op<"fftImag", [Pure, DeclareOpInterfaceMethods]> { + let summary = "Performs FFT Operation on the input"; + let description = [{ + This function accepts a 1D input array of size 2^n and returns the imaginary part of its Fourier transform, + producing an output array of the same size. The function is designed to work exclusively with input sizes that are powers of 2. + Providing an array of any other size will result in a segmentation fault. + }]; + + let arguments = (ins F64Tensor:$lhs); + let results = (outs F64Tensor); + + let builders = [ + OpBuilder<(ins "Value":$lhs)> + ]; +} + //===----------------------------------------------------------------------===// // zeroCrossCountOp //===----------------------------------------------------------------------===// def zeroCrossCountOp : Dsp_Op<"zeroCrossCount" , [Pure , DeclareOpInterfaceMethods]> { - let summary = "shifting tensor by given number"; + let summary = "count the crosses through zero"; let description = [{ - The "zeroCrossCountOp" operation detects no of zero crosses in a given array -- + The "zeroCrossCount" operation detects no of zero crosses in a given array -- ex: [-1 , -2 , 3, 0 , 0, -2] has 2 zero-crosses }]; @@ -586,6 +702,21 @@ def FIRFilterResponseOp : Dsp_Op<"FIRFilterResponse" , let hasVerifier = 1; } +//===----------------------------------------------------------------------===// +// MedianFilterOp +//===----------------------------------------------------------------------===// + +def MedianFilterOp : Dsp_Op<"medianFilter", + [Pure, DeclareOpInterfaceMethods]> { + + let arguments = (ins F64Tensor:$input); + let results = (outs F64Tensor); + + let builders = [ + OpBuilder<(ins "Value":$input)> + ]; +} + //===----------------------------------------------------------------------===// // SlidingWindowAvg //===----------------------------------------------------------------------===// @@ -611,6 +742,7 @@ def SlidingWindowAvgOp : Dsp_Op<"slidingWindowAvg", // Indicate that additional verification for this operation is necessary. let hasVerifier = 1; + let hasCanonicalizer = 1; } @@ -829,17 +961,8 @@ def IFFT1DOp : Dsp_Op<"ifft1d", let arguments = (ins F64Tensor:$real , F64Tensor:$img); // let results = (outs F64Tensor:$real, F64Tensor:$img); //(ins F64Tensor:$lhs, F64Tensor:$rhs); let results = (outs F64Tensor); - // let results = (outs F64Tensor:$real); - // let assemblyFormat = [{ - // `(` $input `:` type($input) `)` attr-dict `to` type(results) - // }]; - - // let assemblyFormat = [{ - // `(` $input `:` type($input) `)` attr-dict `to` `(` type($real) `,` type($img) `)` - // }]; - // Enable registering canonicalization patterns with this operation. - // let hasCanonicalizer = 1; + let hasCanonicalizer = 1; // Allow building a IFFT1D with from the input operand. let builders = [ @@ -1237,6 +1360,93 @@ def GetElemAtIndxOp : Dsp_Op<"getElemAtIndx", } + + + + +//===----------------------------------------------------------------------===// +// GetSingleElemAtIdxOp +//===----------------------------------------------------------------------===// + +def GetSingleElemAtIdxOp : Dsp_Op<"getSingleElemAtIndx", + [Pure, DeclareOpInterfaceMethods]> { + let summary = "This one access ranked tensor at idx and returns signle tensor without dimension."; + + let arguments = (ins F64Tensor:$input, F64Tensor:$indx); + let results = (outs F64Tensor); + + let builders = [ + OpBuilder<(ins "Value":$input, "Value":$indx)> + ]; + + // Indicate that additional verification for this operation is necessary. + //let hasVerifier = 1; +} + + + + +//===----------------------------------------------------------------------===// +// Diff2MeanOptimizedOp +//===----------------------------------------------------------------------===// + +def Diff2MeanOptimizedOp : Dsp_Op<"diff2meanOpt", + [Pure, DeclareOpInterfaceMethods]> { + let summary = "This one implemnets mean(diff(input)) as (input[-1] - input[0])/len(input). Note that mean uses length of diff, this operation consider input[-1] == input[length paramter of mean], not input[length paramter of mean-1]"; + + let arguments = (ins F64Tensor:$input, F64Tensor:$length); + let results = (outs F64Tensor); + + let builders = [ + OpBuilder<(ins "Value":$input, "Value":$length)> + ]; +} + + + +//===----------------------------------------------------------------------===// +// FindPeaks2Diff2MeanOptimizedOp +//===----------------------------------------------------------------------===// + +def FindPeaks2Diff2MeanOptimizedOp : Dsp_Op<"findpeaks2diff2meanOpt", + [Pure, DeclareOpInterfaceMethods]> { + let summary = "This one implemnets mean(diff(find_peaks(input))) as (peak[-1] - peak[0])/(len(peaks)-1)."; + + let arguments = (ins F64Tensor:$signal, F64Tensor:$height, F64Tensor:$distance); + let results = (outs F64Tensor); + + let builders = [ + OpBuilder<(ins "Value":$signal, "Value":$height, "Value":$distance)> + ]; +} + + + + + + + +//===----------------------------------------------------------------------===// +// LMS2FindPeaksOptimizedOp +//===----------------------------------------------------------------------===// + +def LMS2FindPeaksOptimizedOp : Dsp_Op<"lms2findPeaks", + [Pure, DeclareOpInterfaceMethods]> { + let summary = "Fusing loop for LMSFilterResponseOp and FindPeaksOp"; + + let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs, F64Tensor:$mu, F64Tensor:$filterLen, F64Tensor:$height, F64Tensor:$distance); + let results = (outs F64Tensor); + + let builders = [ + OpBuilder<(ins "Value":$lhs, "Value":$rhs, "Value":$mu, "Value":$filterLen, "Value":$height, "Value":$distance)> + ]; +} + + + + + + //===----------------------------------------------------------------------===// // SetElemAtIndxOp //===----------------------------------------------------------------------===// @@ -1909,4 +2119,856 @@ def FFT1DImgConjSymmOp : Dsp_Op<"fft1DimgConjSymm", let hasVerifier = 1; } -#endif // TOY_OPS + + +//===----------------------------------------------------------------------===// +// ShiftRightOp +//===----------------------------------------------------------------------===// + +def ShiftRightOp : Dsp_Op<"shiftRight", [Pure , DeclareOpInterfaceMethods]> { + let summary = "Bit-wise shift right a by b"; + let description = [{ + The shift right block shifts each element of a vector by right-hand side integer. + }]; + + let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs); //Problem: how can we do logical shift with floating point tensor? + let results = (outs F64Tensor); + + // Indicate that the operation has a custom parser and printer method. + // let hasCustomAssemblyFormat = 1; + // let assemblyFormat = [{ + // `(` $input `:` type($input1 , $input2) `)` attr-dict `to` type(results) + // }]; + // Allow building a MulOp with from the two input operands. + let builders = [ + OpBuilder<(ins "Value":$lhs, "Value":$rhs)> + ]; + + // Indicate that the operation has a custom parser and printer method. + // let hasCustomAssemblyFormat = 1; + + // let hasVerifier = 1; + } + + +//===----------------------------------------------------------------------===// +// MatmulOp +//===----------------------------------------------------------------------===// + +def MatmulOp : Dsp_Op<"matmul", [Pure , DeclareOpInterfaceMethods]> { + let summary = "Matrix multiplication a * b"; + let description = [{ + Matrix multiplication between the left-hand side and right-hand side. + }]; + + let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs); //Problem: how can we do logical shift with floating point tensor? + let results = (outs F64Tensor); + + // Indicate that the operation has a custom parser and printer method. + // let hasCustomAssemblyFormat = 1; + // let assemblyFormat = [{ + // `(` $input `:` type($input1 , $input2) `)` attr-dict `to` type(results) + // }]; + // Allow building a MulOp with from the two input operands. + let builders = [ + OpBuilder<(ins "Value":$lhs, "Value":$rhs)> + ]; + + // Indicate that the operation has a custom parser and printer method. + // let hasCustomAssemblyFormat = 1; + + let hasVerifier = 1; + } + + + + +//===----------------------------------------------------------------------===// +// Conv2DOp +//===----------------------------------------------------------------------===// + +def Conv2DOp : Dsp_Op<"conv2d", + [Pure, DeclareOpInterfaceMethods]> { + let summary = "Dsp dialect convolution 2d operation"; + let description = [{ + Performs a 2D convolution on the input tensor using specified kernel. + }]; + + let arguments = (ins F64Tensor:$input, F64Tensor:$kernel, F64Tensor:$bias); + + let results = (outs F64Tensor:$output); + + let builders = [ + OpBuilder<(ins "Value":$input, "Value":$kernel, "Value":$bias)> + ]; + + let extraClassDeclaration = [{ + static StringRef getStrideName() { return "stride"; } + static StringRef getPaddingName() { return "padding"; } + }]; + + let hasVerifier = 1; +} + +//===----------------------------------------------------------------------===// +// ThresholdUpOp +//===----------------------------------------------------------------------===// + +def ThresholdUpOp : Dsp_Op<"thresholdUp", + [Pure, DeclareOpInterfaceMethods]> { + let summary = "Converts all the values above threhold to 1 else 0"; + let description = [{ + Converts all the values above threhold to 1 if returnoriginal is false, returns original value if returnoriginal is true else 0 + }]; + + let arguments = (ins F64Tensor:$input, F64Tensor:$threshold, F64Tensor:$returnoriginal); + + let results = (outs F64Tensor:$output); + + let builders = [ + OpBuilder<(ins "Value":$input, "Value":$threshold, "Value":$returnoriginal)> + ]; + let hasVerifier = 1; + let hasCanonicalizer = 1; +} + +//===----------------------------------------------------------------------===// +// GenerateDTMFOp +//===----------------------------------------------------------------------===// + +def GenerateDTMFOp : Dsp_Op<"generateDtmf", + [Pure, DeclareOpInterfaceMethods]> { + let summary = "Generates signal for the digit input."; + let description = [{ + Converts the digit into a signal. + }]; + + let arguments = (ins F64Tensor:$digit, F64Tensor:$duration, F64Tensor:$fs); + + let results = (outs F64Tensor:$output); + + let builders = [ + OpBuilder<(ins "Value":$digit, "Value":$duration, "Value":$fs)> + ]; + let hasVerifier = 1; +} + +//===----------------------------------------------------------------------===// +// FFTFreqOp +//===----------------------------------------------------------------------===// + +def FFTFreqOp : Dsp_Op<"fftfreq", + [Pure, DeclareOpInterfaceMethods]> { + let summary = "Based on --> np.fft.fftfreq(N, d=1/fs)"; + let description = [{ + Generates frequency bins for fft. + }]; + + let arguments = (ins F64Tensor:$length, F64Tensor:$distance); + + let results = (outs F64Tensor:$output); + + let builders = [ + OpBuilder<(ins "Value":$length, "Value":$distance)> + ]; + let hasVerifier = 1; +} + +//===----------------------------------------------------------------------===// +// FindDominantPeaksOp +//===----------------------------------------------------------------------===// + +def FindDominantPeaksOp : Dsp_Op<"findDominantPeaks", + [Pure, DeclareOpInterfaceMethods]> { + let summary = "finds two dominant peaks in the frequency array."; + let description = [{ + Designed for the DTMF Application. + }]; + + let arguments = (ins F64Tensor:$frequencies, F64Tensor:$magnitudes); + + let results = (outs F64Tensor:$output); + + let builders = [ + OpBuilder<(ins "Value":$frequencies, "Value":$magnitudes)> + ]; + let hasVerifier = 1; +} + +//===----------------------------------------------------------------------===// +// RecoverDTMFDigitOp +//===----------------------------------------------------------------------===// + +def RecoverDTMFDigitOp : Dsp_Op<"recoverDtmfDigit", + [Pure, DeclareOpInterfaceMethods]> { + let summary = "Recover digit from given frequency pair else return -1."; + let description = [{ + -----------------------------. + }]; + + let arguments = (ins F64Tensor:$frequencies, F64Tensor:$freqPairs); + + let results = (outs F64Tensor:$output); + + let builders = [ + OpBuilder<(ins "Value":$frequencies, "Value":$freqPairs)> + ]; + let hasVerifier = 1; +} + +//===----------------------------------------------------------------------===// +// FFTCombineOp +//===----------------------------------------------------------------------===// + +def FFTCombineOp : Dsp_Op<"fftCombine", + [Pure, DeclareOpInterfaceMethods]> { + let summary = "Combines the real and imaginary parts to obtain an amplitude array."; + let description = [{ + This function takes arrays of real and imaginary parts of frequency components + and computes the amplitude of each frequency. The amplitudes are returned as an + array representing the magnitudes of the corresponding complex values. + }]; + + let arguments = (ins F64Tensor:$real, F64Tensor:$imag); + + let results = (outs F64Tensor:$output); + + let builders = [ + OpBuilder<(ins "Value":$real, "Value":$imag)> + ]; + let hasVerifier = 1; +} + +//===----------------------------------------------------------------------===// +// GenerateVoiceSignatureOp +//===----------------------------------------------------------------------===// + +def GenerateVoiceSignatureOp : Dsp_Op<"generateVoiceSignature", + [Pure, DeclareOpInterfaceMethods]> { + let summary = "Generate voice signature of the speaker."; + let description = [{ + This function takes two frequencies as input along with the duration and + the sampling frequency, and generate the voice signature of the speaker. + }]; + + let arguments = (ins F64Tensor:$f1, F64Tensor:$f2, F64Tensor:$duration, F64Tensor:$fs); + + let results = (outs F64Tensor:$output); + + let builders = [ + OpBuilder<(ins "Value":$f1, "Value":$f2, "Value":$duration, "Value":$fs)> + ]; + let hasVerifier = 1; +} + +//===----------------------------------------------------------------------===// +// SqrtOp +//===----------------------------------------------------------------------===// + +def SqrtOp : Dsp_Op<"sqrt", + [Pure, DeclareOpInterfaceMethods]> { + let summary = "Takes the element wise square root of the tensor."; + let description = [{ + Takes the element wise square root of the tensor. + }]; + + let arguments = (ins F64Tensor:$input); + + let results = (outs F64Tensor:$output); + + let builders = [ + OpBuilder<(ins "Value":$input)> + ]; + let hasVerifier = 1; +} + +//===----------------------------------------------------------------------===// +// QamModulateRealOp real +//===----------------------------------------------------------------------===// + +def QamModulateRealOp : Dsp_Op<"qam_modulate_real", + [Pure, DeclareOpInterfaceMethods]> { + let summary = "Dsp dialect qam modulation real operation"; + let description = [{ + Performs a digital modulation on input tensor. + }]; + + let arguments = (ins F64Tensor:$signal); + + let results = (outs F64Tensor:$real); + + + let builders = [ + OpBuilder<(ins "Value":$signal)> + ]; + + let hasVerifier = 1; +} + +//===----------------------------------------------------------------------===// +// QamModulateImgOp imagine +//===----------------------------------------------------------------------===// + +def QamModulateImgOp : Dsp_Op<"qam_modulate_imagine", + [Pure, DeclareOpInterfaceMethods]> { + let summary = "Dsp dialect qam modulation imagine operation"; + let description = [{ + Performs a digital modulation on input tensor. + }]; + + let arguments = (ins F64Tensor:$signal); + + let results = (outs F64Tensor:$imagine); + + + let builders = [ + OpBuilder<(ins "Value":$signal)> + ]; + + let hasVerifier = 1; +} + + +//===----------------------------------------------------------------------===// +// QamDemodulateOp +//===----------------------------------------------------------------------===// + +def QamDemodulateOp : Dsp_Op<"qam_demodulate", + [Pure, DeclareOpInterfaceMethods]> { + let summary = "Dsp dialect qam demodulation operation"; + let description = [{ + Takes in 2 arrays, one is the real part of a signal the other is the imaginary part of a signal. + Returns the decoded binary output. + }]; + + let arguments = (ins F64Tensor:$real, F64Tensor:$imagine); + + let results = (outs F64Tensor:$output); + + let builders = [ + OpBuilder<(ins "Value":$real, "Value":$imagine)> + ]; + + + let hasVerifier = 1; +} + +//===----------------------------------------------------------------------===// +// FindPeaksOp +//===----------------------------------------------------------------------===// + +def FindPeaksOp : Dsp_Op<"find_peaks", [Pure , DeclareOpInterfaceMethods]> { + let summary = "Find peaks from the signal. Since the number of peaks vary, the output is initialized as -1."; + let description = [{ + Input: signal, height, distance + Output: indices of peaks. All of none-used values are initialized as -1, so the length can be measured by this. + + Functionality: check the below original python-level code. + + def manual_find_peaks(signal, height, distance): + peaks = [] + for i in range(1, len(signal) - 1): + # Check if the current point is higher than its neighbors + if signal[i] > signal[i-1] and signal[i] > signal[i+1]: + # Check if it meets the height criterion + if signal[i] >= height: + # Check if it's far enough from the previously detected peak + if not peaks or i - peaks[-1] >= distance: + peaks.append(i) + return np.array(peaks) + + }]; + + let arguments = (ins F64Tensor:$signal, F64Tensor:$height, F64Tensor:$distance); + let results = (outs F64Tensor); + + // Indicate that the operation has a custom parser and printer method. + // let hasCustomAssemblyFormat = 1; + // let assemblyFormat = [{ + // `(` $input `:` type($input1 , $input2) `)` attr-dict `to` type(results) + // }]; + // Allow building a MulOp with from the two input operands. + + let builders = [ + OpBuilder<(ins "Value":$signal, "Value":$height, "Value":$distance)> + ]; + + // Indicate that the operation has a custom parser and printer method. + // let hasCustomAssemblyFormat = 1; + + // let hasVerifier = 1; + let hasCanonicalizer = 1; + + } + + +//===----------------------------------------------------------------------===// +// BeamFormOp +//===----------------------------------------------------------------------===// + +def BeamFormOp : Dsp_Op<"beam_form", + [Pure, DeclareOpInterfaceMethods]> { + let summary = "Dsp dialect Beam forming operation"; + let description = [{ + Performs a beam forming signal encoding on the input tensor using specified weights. + }]; + + let arguments = (ins I64Attr:$antennas, I64Attr:$freq, F64Tensor:$time, F64Tensor:$weights); + + let results = (outs F64Tensor:$output); + + let builders = [ + OpBuilder<(ins "int64_t":$antennas, "int64_t":$freq, "Value":$time, "Value":$weights)> + ]; + + let hasVerifier = 1; +} + +//===----------------------------------------------------------------------===// +// SpaceModulateOp +//===----------------------------------------------------------------------===// + +def SpaceModulateOp : Dsp_Op<"space_modulate", + [Pure, DeclareOpInterfaceMethods]> { + let summary = "Dsp dialect space modulation operation"; + let description = [{ + Takes in string input and convert it to binary. + }]; + + let arguments = (ins F64Tensor:$signal); + + let results = (outs F64Tensor:$output); + + let builders = [ + OpBuilder<(ins "Value":$signal)> + ]; + + let hasVerifier = 1; +} + +//===----------------------------------------------------------------------===// +// SpaceDemodulateOp +//===----------------------------------------------------------------------===// + +def SpaceDemodulateOp : Dsp_Op<"space_demodulate", + [Pure, DeclareOpInterfaceMethods]> { + let summary = "Dsp dialect space demodulation operation"; + let description = [{ + Takes in binary input and convert it to string. + }]; + + let arguments = (ins F64Tensor:$binary); + + let results = (outs F64Tensor:$output); + + let builders = [ + OpBuilder<(ins "Value":$binary)> + ]; + + let hasVerifier = 1; + let hasCanonicalizer = 1; +} + +//===----------------------------------------------------------------------===// +// SpaceErrCorrectionOp +//===----------------------------------------------------------------------===// + +def SpaceErrCorrectionOp : Dsp_Op<"space_err_correction", + [Pure, DeclareOpInterfaceMethods]> { + let summary = "Dsp dialect space error correction operation"; + let description = [{ + Remove noise operation for signal transmission in space. + }]; + + let arguments = (ins F64Tensor:$signal); + + let results = (outs F64Tensor:$output); + + let builders = [ + OpBuilder<(ins "Value":$signal)> + ]; + + let hasVerifier = 1; +} + + +//===----------------------------------------------------------------------===// +// MaxOp +//===----------------------------------------------------------------------===// + +def MaxOp : Dsp_Op<"max", [Pure , DeclareOpInterfaceMethods]> { + let summary = "Find maximum value in tensor"; + let description = [{ + This operation finds and returns the maximum value of the tensor. + }]; + + let arguments = (ins F64Tensor:$input); + let results = (outs F64Tensor); + + // Indicate that the operation has a custom parser and printer method. + // let hasCustomAssemblyFormat = 1; + // let assemblyFormat = [{ + // `(` $input `:` type($input1 , $input2) `)` attr-dict `to` type(results) + // }]; + // Allow building a MulOp with from the two input operands. + + let builders = [ + OpBuilder<(ins "Value":$input)> + ]; + + // Indicate that the operation has a custom parser and printer method. + // let hasCustomAssemblyFormat = 1; + + // let hasVerifier = 1; + let hasCanonicalizer = 1; + } + + +//===----------------------------------------------------------------------===// +// MeanOp +//===----------------------------------------------------------------------===// + +def MeanOp : Dsp_Op<"mean", [Pure , DeclareOpInterfaceMethods]> { + let summary = "Find mean value of tensor. Requires (input, length)."; + let description = [{ + This operation finds and returns the mean value of the tensor. + Note that it requires length. + It would be better if we can implement both versions + - no length argument -> automatically use the length of tensor + - with length argument -> use the provided length + }]; + + let arguments = (ins F64Tensor:$input, F64Tensor:$length); + let results = (outs F64Tensor); + + let builders = [ + OpBuilder<(ins "Value":$input, "Value":$length)> + ]; + + // Indicate that the operation has a custom parser and printer method. + // let hasCustomAssemblyFormat = 1; + + // let hasVerifier = 1; + let hasCanonicalizer = 1; + } + + + +//===----------------------------------------------------------------------===// +// DiffOp +//===----------------------------------------------------------------------===// + +def DiffOp : Dsp_Op<"diff", [Pure , DeclareOpInterfaceMethods]> { + let summary = "np.diff (out[i] = a[i+1] - a[i]). It receives second argument as length"; + let description = [{ + This operation returns a tensor that contains diff (out[i] = a[i+1] - a[i]). + The length of the output tensor is len(input)-1, regardless of length parameter. + Note that it requires length. + It would be better if we can implement both versions + - no length argument -> automatically use the length of tensor + - with length argument -> use the provided length + }]; + + let arguments = (ins F64Tensor:$input, F64Tensor:$length); + let results = (outs F64Tensor); + + let builders = [ + OpBuilder<(ins "Value":$input, "Value":$length)> + ]; + + // Indicate that the operation has a custom parser and printer method. + // let hasCustomAssemblyFormat = 1; + + // let hasVerifier = 1; + } + +//===----------------------------------------------------------------------===// +// AbsOp +//===----------------------------------------------------------------------===// + +def AbsOp : Dsp_Op<"abs", [Pure , DeclareOpInterfaceMethods]> { + let summary = "np.abs -> calculate the absolute value element-wise"; + let description = [{ + This operation calculates the absolute value element-wise. + }]; + + let arguments = (ins F64Tensor:$input); + let results = (outs F64Tensor:$output); + + let builders = [ + OpBuilder<(ins "Value":$input)> + ]; + } + + +//===----------------------------------------------------------------------===// +// ArgMaxOp +//===----------------------------------------------------------------------===// + +def ArgMaxOp : Dsp_Op<"argmax", [Pure , DeclareOpInterfaceMethods]> { + let summary = "np.argmax -> find the indices of the maximum values along a specifies axis in an array."; + let description = [{ + This operation find the indices of the maximum values along a specifies axis in an array. + }]; + + let arguments = (ins F64Tensor:$input, I64Attr:$axis); + let results = (outs F64Tensor:$output); + + let builders = [ + OpBuilder<(ins "Value":$input, "int64_t":$axis)> + ]; + } +//===----------------------------------------------------------------------===// +// NormalizeOp +//===----------------------------------------------------------------------===// + +def NormalizeOp : Dsp_Op<"normalize", [Pure , DeclareOpInterfaceMethods]> { + let summary = "normalize operation."; + let description = [{ + normalization dsp operation. + }]; + + let arguments = (ins F64Tensor:$signal); + let results = (outs F64Tensor); + + let builders = [ + OpBuilder<(ins "Value":$signal)> + ]; + + let hasCanonicalizer = 1; + } + +//===----------------------------------------------------------------------===// +// NormLMSFilterResponseOptimizeOp +//===----------------------------------------------------------------------===// + + +def NormLMSFilterResponseOptimizeOp : Dsp_Op<"norm_LMSFilterResponse_opt", + [Pure, DeclareOpInterfaceMethods]> { + let summary = "LMS filter Response + norm optimize"; + let description = [{ + norm + lmsfilter + }]; + + let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs, F64Tensor:$mu, F64Tensor:$filterLen); + + let results = (outs F64Tensor); + + let builders = [ + OpBuilder<(ins "Value":$lhs, "Value":$rhs, "Value":$mu, "Value":$filterLen)> + ]; + + let hasVerifier = 1; +} + +//===----------------------------------------------------------------------===// +// Median2SlidingOptimizedOp +//===----------------------------------------------------------------------===// + +def Median2SlidingOptimizedOp : Dsp_Op<"median2slidingOp", + [Pure, DeclareOpInterfaceMethods]> { + + let arguments = (ins F64Tensor:$input); + let results = (outs F64Tensor); + + let builders = [ + OpBuilder<(ins "Value":$input)> + ]; +} + + +//===----------------------------------------------------------------------===// +// FIRFilterResSymmThresholdUpOptimizedOp +//===----------------------------------------------------------------------===// +def FIRFilterResSymmThresholdUpOptimizedOp : Dsp_Op<"FIRFilterResSymmThresholdUpOptimizedOp" , + [Pure , DeclareOpInterfaceMethods]> { + let summary = "FIRFilterResSymmThresholdUpOptimizedOp"; + let description = [{ + The "FIRFilterResSymmThresholdUpOptimizedOp" operation is basically the convolution of input 1-D and filter vector when the filter is symmetrical ie, + h[0] = h[L-1] , h[1] = h[L-2] .. h[middle ie, (L-1)/2] is single element & filter length is odd always + ex: x[n] = [2,1,3,2,4], h[l] = [1, -1 ,1] then + y[n] = sum(h(k) . x(n-k)) k=0 to N-1 can be rewritten as + y[n] = sum(h[k] .{ x[n-k] + x[n-(L-1-k)]}) + h[L-1/2].x[n-(L-1)/2] , k=0 to L-1/2 + + Basically, we are trying to reduce the number of load/store operations by half -- so that we can reduce the operations + }]; + + let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs, F64Tensor:$threshold, F64Tensor:$returnoriginal); + let results = (outs F64Tensor); + + let builders = [ + OpBuilder<(ins "Value":$lhs, "Value":$rhs, "Value":$threshold, "Value":$returnoriginal)> + ]; + +} + +//===----------------------------------------------------------------------===// +// FFTOp +//===----------------------------------------------------------------------===// + +def FFTOp : Dsp_Op<"fft", [Pure, DeclareOpInterfaceMethods]> { + let summary = "Performs FFT Operation on the input"; + let description = [{ + This function accepts a 1D input array of size 2^n and returns the real part of its Fourier transform, + producing an output array of the same size. The function is designed to work exclusively with input sizes that are powers of 2. + Providing an array of any other size will result in a segmentation fault. + }]; + + let arguments = (ins F64Tensor:$lhs); + let results = (outs F64Tensor:$real, F64Tensor:$imag); + + let builders = [ + OpBuilder<(ins "Value":$lhs)> + ]; + + // let hasCanonicalizer = 1; +} + +//===----------------------------------------------------------------------===// +// FFTAbsOp +//===----------------------------------------------------------------------===// + +def FFTAbsOp : Dsp_Op<"FFTAbsOp", [Pure, DeclareOpInterfaceMethods]> { + let summary = "FFTAbsOp"; + let description = [{ + ------------------ + }]; + + let arguments = (ins F64Tensor:$input); + let results = (outs F64Tensor:$amplitude); + + let builders = [ + OpBuilder<(ins "Value":$input)> + ]; +} + +//===----------------------------------------------------------------------===// +// DFTAbsOp +//===----------------------------------------------------------------------===// + +def DFTAbsOp : Dsp_Op<"DFTAbsOp", [Pure, DeclareOpInterfaceMethods]> { + let summary = "DFTAbsOp"; + let description = [{ + ------------------ + }]; + + let arguments = (ins F64Tensor:$input); + let results = (outs F64Tensor:$amplitude); + + let builders = [ + OpBuilder<(ins "Value":$input)> + ]; +} + +//===----------------------------------------------------------------------===// +// DFTAbsThresholdUpOp +//===----------------------------------------------------------------------===// + +def DFTAbsThresholdUpOp : Dsp_Op<"DFTAbsThresholdUpOp", [Pure, DeclareOpInterfaceMethods]> { + let summary = "DFTAbsThresholdUpOp"; + let description = [{ + ------------------ + }]; + + let arguments = (ins F64Tensor:$input, F64Tensor:$threshold, F64Tensor:$returnoriginal); + let results = (outs F64Tensor:$amplitude); + + let builders = [ + OpBuilder<(ins "Value":$input, "Value":$threshold, "Value":$returnoriginal)> + ]; + let hasVerifier = 1; +} + + +//===----------------------------------------------------------------------===// +// CorrelateOp +//===----------------------------------------------------------------------===// + +def CorrelateOp : Dsp_Op<"correlate", [Pure , DeclareOpInterfaceMethods]> { + let summary = "This implements scp.correlate. Right now we assume size of lhs == size of rhs"; + let description = [{ + This operation finds and returns the maximum value of the tensor. + }]; + + let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs); + let results = (outs F64Tensor); + + // Indicate that the operation has a custom parser and printer method. + // let hasCustomAssemblyFormat = 1; + // let assemblyFormat = [{ + // `(` $input `:` type($input1 , $input2) `)` attr-dict `to` type(results) + // }]; + // Allow building a MulOp with from the two input operands. + + let builders = [ + OpBuilder<(ins "Value":$lhs, "Value":$rhs)> + ]; + + // let hasVerifier = 1; + } + + +//===----------------------------------------------------------------------===// +// SetSingleElemAtIdxOp +//===----------------------------------------------------------------------===// + +def SetSingleElemAtIdxOp : Dsp_Op<"setSingleElemAtIndx", + [Pure, DeclareOpInterfaceMethods]> { + let summary = "This one access ranked tensor at idx and sets signle tensor with or without dimension."; + + let arguments = (ins F64Tensor:$input, F64Tensor:$indx , F64Tensor:$val); + let results = (outs F64Tensor); + + let builders = [ + OpBuilder<(ins "Value":$input, "Value":$indx, "Value":$val)> + ]; + + // Indicate that additional verification for this operation is necessary. + //let hasVerifier = 1; +} + + +//===----------------------------------------------------------------------===// +// Correl2MaxOptimizedOp +//===----------------------------------------------------------------------===// + +def Correl2MaxOptimizedOp : Dsp_Op<"correl2max", [Pure , DeclareOpInterfaceMethods]> { + let summary = "Find the index of maximum value in tensor. outputs with floating point-converted index"; + let description = [{ + This operation fuses loops for correlate and max. + }]; + + let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs); + let results = (outs F64Tensor); + + let builders = [ + OpBuilder<(ins "Value":$lhs, "Value":$rhs)> + ]; + } + + + + +//===----------------------------------------------------------------------===// +// LMSFilterResponse2GainOp +//===----------------------------------------------------------------------===// + +def LMSFilterResponse2GainOp : Dsp_Op<"lmsFilterResponse2gain", + [Pure, DeclareOpInterfaceMethods]> { + let summary = "Fusing loop for LMSFilterResponseOp and GainOp"; + + let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs, F64Tensor:$mu, F64Tensor:$filterLen, F64Tensor:$gain); + let results = (outs F64Tensor); + + let builders = [ + OpBuilder<(ins "Value":$lhs, "Value":$rhs, "Value":$mu, "Value":$filterLen, "Value":$gain)> + ]; +} + + + + +#endif // TOY_OPS + + + diff --git a/mlir/examples/dsp/SimpleBlocks/include/toy/Parser.h b/mlir/examples/dsp/SimpleBlocks/include/toy/Parser.h index 42bd653b156c..a9d673f8f5f0 100644 --- a/mlir/examples/dsp/SimpleBlocks/include/toy/Parser.h +++ b/mlir/examples/dsp/SimpleBlocks/include/toy/Parser.h @@ -167,6 +167,16 @@ class Parser { return v; } + /// parenexpr ::= '"' string_val '"' + std::unique_ptr parseStringExpr() { + auto loc = lexer.getLastLocation(); + + std::string string_val(lexer.getString()); + lexer.consume(tok_string_val); + + return std::make_unique(std::move(loc), string_val); + } + /// identifierexpr /// ::= identifier /// ::= identifier '(' expression ')' @@ -175,7 +185,7 @@ class Parser { auto loc = lexer.getLastLocation(); lexer.getNextToken(); // eat identifier. - + if (lexer.getCurToken() != '(') // Simple variable ref. return std::make_unique(std::move(loc), name); @@ -216,6 +226,7 @@ class Parser { /// ::= numberexpr /// ::= parenexpr /// ::= tensorliteral + /// ::= stringexpr std::unique_ptr parsePrimary() { switch (lexer.getCurToken()) { default: @@ -230,6 +241,8 @@ class Parser { return parseParenExpr(); case '[': return parseTensorLiteralExpr(); + case tok_string_val: + return parseStringExpr(); case ';': return nullptr; case '}': @@ -334,7 +347,11 @@ class Parser { if (!type) type = std::make_unique(); lexer.consume(Token('=')); - auto expr = parseExpression(); + std::unique_ptr expr; + if(lexer.getCurToken() == tok_string_val) { + expr = parseStringExpr(); + } + else expr = parseExpression(); return std::make_unique(std::move(loc), std::move(id), std::move(*type), std::move(expr)); } @@ -465,6 +482,8 @@ class Parser { return 40; case '/': return 40; + case '^': + return 60; default: return -1; } diff --git a/mlir/examples/dsp/SimpleBlocks/include/toy/noopt.mlir b/mlir/examples/dsp/SimpleBlocks/include/toy/noopt.mlir new file mode 100644 index 000000000000..47dde27be52e --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/include/toy/noopt.mlir @@ -0,0 +1,821 @@ +module { + func.func @main() { + %c2047_i64 = arith.constant 2047 : i64 + %cst = arith.constant 0.49971199035644531 : f64 + %c11 = arith.constant 11 : index + %c10 = arith.constant 10 : index + %cst_0 = arith.constant 3.000000e+00 : f64 + %c-1 = arith.constant -1 : index + %cst_1 = arith.constant 7.700000e+02 : f64 + %cst_2 = arith.constant 1.209000e+03 : f64 + %cst_3 = arith.constant 6.970000e+02 : f64 + %cst_4 = arith.constant 1.336000e+03 : f64 + %cst_5 = arith.constant 9.410000e+02 : f64 + %cst_6 = arith.constant 1.220000e-04 : f64 + %cst_7 = arith.constant 4.096000e+03 : f64 + %cst_8 = arith.constant -2.000000e+00 : f64 + %cst_9 = arith.constant 3.1415926535897931 : f64 + %c12 = arith.constant 12 : index + %cst_10 = arith.constant 0.000000e+00 : f64 + %c1_i64 = arith.constant 1 : i64 + %c0_i64 = arith.constant 0 : i64 + %c4096 = arith.constant 4096 : index + %c1 = arith.constant 1 : index + %cst_11 = arith.constant 1.477000e+03 : f64 + %cst_12 = arith.constant 8.520000e+02 : f64 + %cst_13 = arith.constant 1.000000e+01 : f64 + %cst_14 = arith.constant 6.2831853071800001 : f64 + %cst_15 = arith.constant 8.192000e+03 : f64 + %cst_16 = arith.constant 5.000000e-01 : f64 + %cst_17 = arith.constant 9.000000e+00 : f64 + %c0 = arith.constant 0 : index + %alloc = memref.alloc() : memref + %alloc_18 = memref.alloc() : memref<1xf64> + %alloc_19 = memref.alloc() : memref<10x2xf64> + %alloc_20 = memref.alloc() : memref<2xf64> + %alloc_21 = memref.alloc() : memref<4096xf64> + %alloc_22 = memref.alloc() : memref + %alloc_23 = memref.alloc() : memref + %alloc_24 = memref.alloc() : memref<4096xf64> + %alloc_25 = memref.alloc() : memref<4096xf64> + %alloc_26 = memref.alloc() : memref<4096xf64> + %alloc_27 = memref.alloc() : memref<4096xf64> + %alloc_28 = memref.alloc() : memref + %alloc_29 = memref.alloc() : memref + %alloc_30 = memref.alloc() : memref + affine.store %cst_17, %alloc_30[] : memref + affine.store %cst_16, %alloc_29[] : memref + affine.store %cst_15, %alloc_28[] : memref + scf.for %arg0 = %c0 to %c4096 step %c1 { + %6 = arith.index_cast %arg0 : index to i64 + %7 = arith.sitofp %6 : i64 to f64 + %8 = arith.divf %7, %cst_15 : f64 + %9 = arith.mulf %8, %cst_12 : f64 + %10 = arith.mulf %9, %cst_14 : f64 + %11 = math.sin %10 : f64 + %12 = arith.mulf %8, %cst_11 : f64 + %13 = arith.mulf %12, %cst_14 : f64 + %14 = math.sin %13 : f64 + %15 = arith.addf %11, %14 : f64 + %16 = arith.mulf %15, %cst_13 : f64 + memref.store %16, %alloc_27[%arg0] : memref<4096xf64> + } + scf.for %arg0 = %c0 to %c4096 step %c1 { + %6 = arith.index_cast %arg0 : index to i64 + %7 = scf.for %arg1 = %c0 to %c12 step %c1 iter_args(%arg2 = %c0_i64) -> (i64) { + %10 = arith.index_cast %arg1 : index to i64 + %11 = arith.shli %c1_i64, %10 : i64 + %12 = arith.andi %6, %11 : i64 + %13 = arith.cmpi ne, %12, %c0_i64 : i64 + %14 = arith.subi %c11, %arg1 : index + %15 = arith.index_cast %14 : index to i64 + %16 = arith.shli %c1_i64, %15 : i64 + %17 = arith.select %13, %16, %c0_i64 : i64 + %18 = arith.ori %arg2, %17 : i64 + scf.yield %18 : i64 + } + %8 = arith.index_cast %7 : i64 to index + %9 = memref.load %alloc_27[%arg0] : memref<4096xf64> + memref.store %9, %alloc_26[%8] : memref<4096xf64> + memref.store %cst_10, %alloc_25[%8] : memref<4096xf64> + } + scf.for %arg0 = %c0 to %c12 step %c1 { + %6 = arith.shli %c1, %arg0 : index + %7 = arith.shli %6, %c1 : index + scf.for %arg1 = %c0 to %c4096 step %7 { + scf.for %arg2 = %c0 to %6 step %c1 { + %8 = arith.addi %arg1, %arg2 : index + %9 = arith.addi %8, %6 : index + %10 = arith.index_cast %arg2 : index to i64 + %11 = arith.sitofp %10 : i64 to f64 + %12 = arith.index_cast %7 : index to i64 + %13 = arith.sitofp %12 : i64 to f64 + %14 = arith.divf %11, %13 : f64 + %15 = arith.mulf %14, %cst_8 : f64 + %16 = arith.mulf %15, %cst_9 : f64 + %17 = math.cos %16 : f64 + %18 = math.sin %16 : f64 + %19 = memref.load %alloc_26[%9] : memref<4096xf64> + %20 = memref.load %alloc_25[%9] : memref<4096xf64> + %21 = arith.mulf %19, %17 : f64 + %22 = arith.mulf %20, %18 : f64 + %23 = arith.subf %21, %22 : f64 + %24 = arith.mulf %19, %18 : f64 + %25 = arith.mulf %20, %17 : f64 + %26 = arith.addf %24, %25 : f64 + %27 = memref.load %alloc_26[%8] : memref<4096xf64> + %28 = memref.load %alloc_25[%8] : memref<4096xf64> + %29 = arith.addf %27, %23 : f64 + %30 = arith.addf %28, %26 : f64 + %31 = arith.subf %27, %23 : f64 + %32 = arith.subf %28, %26 : f64 + %33 = arith.mulf %29, %29 : f64 + %34 = arith.mulf %30, %30 : f64 + %35 = arith.addf %33, %34 : f64 + %36 = math.sqrt %35 : f64 + %37 = arith.mulf %31, %31 : f64 + %38 = arith.mulf %32, %32 : f64 + %39 = arith.addf %37, %38 : f64 + %40 = math.sqrt %39 : f64 + memref.store %29, %alloc_26[%8] : memref<4096xf64> + memref.store %30, %alloc_25[%8] : memref<4096xf64> + memref.store %31, %alloc_26[%9] : memref<4096xf64> + memref.store %32, %alloc_25[%9] : memref<4096xf64> + memref.store %36, %alloc_24[%8] : memref<4096xf64> + memref.store %40, %alloc_24[%9] : memref<4096xf64> + } + } + } + affine.store %cst_7, %alloc_23[] : memref + affine.store %cst_6, %alloc_22[] : memref + scf.for %arg0 = %c0 to %c4096 step %c1 { + %6 = arith.index_cast %arg0 : index to i64 + %7 = arith.sitofp %6 : i64 to f64 + %8 = arith.cmpi sle, %6, %c2047_i64 : i64 + scf.if %8 { + %9 = arith.divf %7, %cst : f64 + memref.store %9, %alloc_21[%arg0] : memref<4096xf64> + } else { + %9 = arith.subf %7, %cst_7 : f64 + %10 = arith.divf %9, %cst : f64 + memref.store %10, %alloc_21[%arg0] : memref<4096xf64> + } + } + %0:4 = scf.for %arg0 = %c0 to %c4096 step %c1 iter_args(%arg1 = %cst_10, %arg2 = %cst_10, %arg3 = %cst_10, %arg4 = %cst_10) -> (f64, f64, f64, f64) { + %6 = memref.load %alloc_21[%arg0] : memref<4096xf64> + %7 = memref.load %alloc_24[%arg0] : memref<4096xf64> + %8 = arith.cmpf ogt, %6, %cst_10 : f64 + %9:4 = scf.if %8 -> (f64, f64, f64, f64) { + %10 = arith.cmpf ogt, %7, %arg1 : f64 + %11 = arith.select %10, %7, %arg1 : f64 + %12 = arith.select %10, %6, %arg3 : f64 + %13:2 = scf.if %10 -> (f64, f64) { + scf.yield %arg1, %arg3 : f64, f64 + } else { + %14 = arith.cmpf ogt, %7, %arg2 : f64 + %15 = arith.select %14, %7, %arg2 : f64 + %16 = arith.select %14, %6, %arg4 : f64 + scf.yield %15, %16 : f64, f64 + } + scf.yield %11, %13#0, %12, %13#1 : f64, f64, f64, f64 + } else { + scf.yield %arg1, %arg2, %arg3, %arg4 : f64, f64, f64, f64 + } + scf.yield %9#0, %9#1, %9#2, %9#3 : f64, f64, f64, f64 + } + memref.store %0#2, %alloc_20[%c0] : memref<2xf64> + memref.store %0#3, %alloc_20[%c1] : memref<2xf64> + affine.store %cst_5, %alloc_19[0, 0] : memref<10x2xf64> + affine.store %cst_4, %alloc_19[0, 1] : memref<10x2xf64> + affine.store %cst_3, %alloc_19[1, 0] : memref<10x2xf64> + affine.store %cst_2, %alloc_19[1, 1] : memref<10x2xf64> + affine.store %cst_3, %alloc_19[2, 0] : memref<10x2xf64> + affine.store %cst_4, %alloc_19[2, 1] : memref<10x2xf64> + affine.store %cst_3, %alloc_19[3, 0] : memref<10x2xf64> + affine.store %cst_11, %alloc_19[3, 1] : memref<10x2xf64> + affine.store %cst_1, %alloc_19[4, 0] : memref<10x2xf64> + affine.store %cst_2, %alloc_19[4, 1] : memref<10x2xf64> + affine.store %cst_1, %alloc_19[5, 0] : memref<10x2xf64> + affine.store %cst_4, %alloc_19[5, 1] : memref<10x2xf64> + affine.store %cst_1, %alloc_19[6, 0] : memref<10x2xf64> + affine.store %cst_11, %alloc_19[6, 1] : memref<10x2xf64> + affine.store %cst_12, %alloc_19[7, 0] : memref<10x2xf64> + affine.store %cst_2, %alloc_19[7, 1] : memref<10x2xf64> + affine.store %cst_12, %alloc_19[8, 0] : memref<10x2xf64> + affine.store %cst_4, %alloc_19[8, 1] : memref<10x2xf64> + affine.store %cst_12, %alloc_19[9, 0] : memref<10x2xf64> + affine.store %cst_11, %alloc_19[9, 1] : memref<10x2xf64> + %1 = memref.load %alloc_20[%c0] : memref<2xf64> + %2 = memref.load %alloc_20[%c1] : memref<2xf64> + affine.store %c-1, %alloc[] : memref + scf.for %arg0 = %c0 to %c10 step %c1 { + %6 = memref.load %alloc[] : memref + %7 = memref.load %alloc_19[%arg0, %c0] : memref<10x2xf64> + %8 = memref.load %alloc_19[%arg0, %c1] : memref<10x2xf64> + %9 = arith.subf %7, %1 : f64 + %10 = arith.subf %8, %2 : f64 + %11 = math.absf %9 : f64 + %12 = math.absf %10 : f64 + %13 = arith.cmpf ole, %11, %cst_0 : f64 + %14 = arith.cmpf ole, %12, %cst_0 : f64 + %15 = arith.andi %13, %14 : i1 + %16 = arith.select %15, %arg0, %6 : index + memref.store %16, %alloc[] : memref + } + %3 = memref.load %alloc[] : memref + %4 = arith.index_cast %3 : index to i64 + %5 = arith.sitofp %4 : i64 to f64 + memref.store %5, %alloc_18[%c0] : memref<1xf64> +root@f68572e75858:/home/DSP_MLIR# /home/DSP_MLIR/build/bin/dsp1 /home/DSP_MLIR/mlir/test/Examples/DspExample/full_dtmf.py -emit=mlir-affine -affineOpt -canonOpt -opt +module { + func.func @main() { + %c2047_i64 = arith.constant 2047 : i64 + %cst = arith.constant 0.49971199035644531 : f64 + %c11 = arith.constant 11 : index + %c10 = arith.constant 10 : index + %cst_0 = arith.constant 3.000000e+00 : f64 + %c-1 = arith.constant -1 : index + %cst_1 = arith.constant 7.700000e+02 : f64 + %cst_2 = arith.constant 1.209000e+03 : f64 + %cst_3 = arith.constant 6.970000e+02 : f64 + %cst_4 = arith.constant 1.336000e+03 : f64 + %cst_5 = arith.constant 9.410000e+02 : f64 + %cst_6 = arith.constant 1.220000e-04 : f64 + %cst_7 = arith.constant 4.096000e+03 : f64 + %cst_8 = arith.constant -2.000000e+00 : f64 + %cst_9 = arith.constant 3.1415926535897931 : f64 + %c12 = arith.constant 12 : index + %cst_10 = arith.constant 0.000000e+00 : f64 + %c1_i64 = arith.constant 1 : i64 + %c0_i64 = arith.constant 0 : i64 + %c4096 = arith.constant 4096 : index + %c1 = arith.constant 1 : index + %cst_11 = arith.constant 1.477000e+03 : f64 + %cst_12 = arith.constant 8.520000e+02 : f64 + %cst_13 = arith.constant 1.000000e+01 : f64 + %cst_14 = arith.constant 6.2831853071800001 : f64 + %cst_15 = arith.constant 8.192000e+03 : f64 + %cst_16 = arith.constant 5.000000e-01 : f64 + %cst_17 = arith.constant 9.000000e+00 : f64 + %c0 = arith.constant 0 : index + %alloc = memref.alloc() : memref + %alloc_18 = memref.alloc() : memref<1xf64> + %alloc_19 = memref.alloc() : memref<10x2xf64> + %alloc_20 = memref.alloc() : memref<2xf64> + %alloc_21 = memref.alloc() : memref<4096xf64> + %alloc_22 = memref.alloc() : memref + %alloc_23 = memref.alloc() : memref + %alloc_24 = memref.alloc() : memref<4096xf64> + %alloc_25 = memref.alloc() : memref<4096xf64> + %alloc_26 = memref.alloc() : memref<4096xf64> + %alloc_27 = memref.alloc() : memref<4096xf64> + %alloc_28 = memref.alloc() : memref + %alloc_29 = memref.alloc() : memref + %alloc_30 = memref.alloc() : memref + affine.store %cst_17, %alloc_30[] : memref + affine.store %cst_16, %alloc_29[] : memref + affine.store %cst_15, %alloc_28[] : memref + scf.for %arg0 = %c0 to %c4096 step %c1 { + %6 = arith.index_cast %arg0 : index to i64 + %7 = arith.sitofp %6 : i64 to f64 + %8 = arith.divf %7, %cst_15 : f64 + %9 = arith.mulf %8, %cst_12 : f64 + %10 = arith.mulf %9, %cst_14 : f64 + %11 = math.sin %10 : f64 + %12 = arith.mulf %8, %cst_11 : f64 + %13 = arith.mulf %12, %cst_14 : f64 + %14 = math.sin %13 : f64 + %15 = arith.addf %11, %14 : f64 + %16 = arith.mulf %15, %cst_13 : f64 + memref.store %16, %alloc_27[%arg0] : memref<4096xf64> + } + scf.for %arg0 = %c0 to %c4096 step %c1 { + %6 = arith.index_cast %arg0 : index to i64 + %7 = scf.for %arg1 = %c0 to %c12 step %c1 iter_args(%arg2 = %c0_i64) -> (i64) { + %10 = arith.index_cast %arg1 : index to i64 + %11 = arith.shli %c1_i64, %10 : i64 + %12 = arith.andi %6, %11 : i64 + %13 = arith.cmpi ne, %12, %c0_i64 : i64 + %14 = arith.subi %c11, %arg1 : index + %15 = arith.index_cast %14 : index to i64 + %16 = arith.shli %c1_i64, %15 : i64 + %17 = arith.select %13, %16, %c0_i64 : i64 + %18 = arith.ori %arg2, %17 : i64 + scf.yield %18 : i64 + } + %8 = arith.index_cast %7 : i64 to index + %9 = memref.load %alloc_27[%arg0] : memref<4096xf64> + memref.store %9, %alloc_26[%8] : memref<4096xf64> + memref.store %cst_10, %alloc_25[%8] : memref<4096xf64> + } + scf.for %arg0 = %c0 to %c12 step %c1 { + %6 = arith.shli %c1, %arg0 : index + %7 = arith.shli %6, %c1 : index + scf.for %arg1 = %c0 to %c4096 step %7 { + scf.for %arg2 = %c0 to %6 step %c1 { + %8 = arith.addi %arg1, %arg2 : index + %9 = arith.addi %8, %6 : index + %10 = arith.index_cast %arg2 : index to i64 + %11 = arith.sitofp %10 : i64 to f64 + %12 = arith.index_cast %7 : index to i64 + %13 = arith.sitofp %12 : i64 to f64 + %14 = arith.divf %11, %13 : f64 + %15 = arith.mulf %14, %cst_8 : f64 + %16 = arith.mulf %15, %cst_9 : f64 + %17 = math.cos %16 : f64 + %18 = math.sin %16 : f64 + %19 = memref.load %alloc_26[%9] : memref<4096xf64> + %20 = memref.load %alloc_25[%9] : memref<4096xf64> + %21 = arith.mulf %19, %17 : f64 + %22 = arith.mulf %20, %18 : f64 + %23 = arith.subf %21, %22 : f64 + %24 = arith.mulf %19, %18 : f64 + %25 = arith.mulf %20, %17 : f64 + %26 = arith.addf %24, %25 : f64 + %27 = memref.load %alloc_26[%8] : memref<4096xf64> + %28 = memref.load %alloc_25[%8] : memref<4096xf64> + %29 = arith.addf %27, %23 : f64 + %30 = arith.addf %28, %26 : f64 + %31 = arith.subf %27, %23 : f64 + %32 = arith.subf %28, %26 : f64 + %33 = arith.mulf %29, %29 : f64 + %34 = arith.mulf %30, %30 : f64 + %35 = arith.addf %33, %34 : f64 + %36 = math.sqrt %35 : f64 + %37 = arith.mulf %31, %31 : f64 + %38 = arith.mulf %32, %32 : f64 + %39 = arith.addf %37, %38 : f64 + %40 = math.sqrt %39 : f64 + memref.store %29, %alloc_26[%8] : memref<4096xf64> + memref.store %30, %alloc_25[%8] : memref<4096xf64> + memref.store %31, %alloc_26[%9] : memref<4096xf64> + memref.store %32, %alloc_25[%9] : memref<4096xf64> + memref.store %36, %alloc_24[%8] : memref<4096xf64> + memref.store %40, %alloc_24[%9] : memref<4096xf64> + } + } + } + affine.store %cst_7, %alloc_23[] : memref + affine.store %cst_6, %alloc_22[] : memref + scf.for %arg0 = %c0 to %c4096 step %c1 { + %6 = arith.index_cast %arg0 : index to i64 + %7 = arith.sitofp %6 : i64 to f64 + %8 = arith.cmpi sle, %6, %c2047_i64 : i64 + scf.if %8 { + %9 = arith.divf %7, %cst : f64 + memref.store %9, %alloc_21[%arg0] : memref<4096xf64> + } else { + %9 = arith.subf %7, %cst_7 : f64 + %10 = arith.divf %9, %cst : f64 + memref.store %10, %alloc_21[%arg0] : memref<4096xf64> + } + } + %0:4 = scf.for %arg0 = %c0 to %c4096 step %c1 iter_args(%arg1 = %cst_10, %arg2 = %cst_10, %arg3 = %cst_10, %arg4 = %cst_10) -> (f64, f64, f64, f64) { + %6 = memref.load %alloc_21[%arg0] : memref<4096xf64> + %7 = memref.load %alloc_24[%arg0] : memref<4096xf64> + %8 = arith.cmpf ogt, %6, %cst_10 : f64 + %9:4 = scf.if %8 -> (f64, f64, f64, f64) { + %10 = arith.cmpf ogt, %7, %arg1 : f64 + %11 = arith.select %10, %7, %arg1 : f64 + %12 = arith.select %10, %6, %arg3 : f64 + %13:2 = scf.if %10 -> (f64, f64) { + scf.yield %arg1, %arg3 : f64, f64 + } else { + %14 = arith.cmpf ogt, %7, %arg2 : f64 + %15 = arith.select %14, %7, %arg2 : f64 + %16 = arith.select %14, %6, %arg4 : f64 + scf.yield %15, %16 : f64, f64 + } + scf.yield %11, %13#0, %12, %13#1 : f64, f64, f64, f64 + } else { + scf.yield %arg1, %arg2, %arg3, %arg4 : f64, f64, f64, f64 + } + scf.yield %9#0, %9#1, %9#2, %9#3 : f64, f64, f64, f64 + } + memref.store %0#2, %alloc_20[%c0] : memref<2xf64> + memref.store %0#3, %alloc_20[%c1] : memref<2xf64> + affine.store %cst_5, %alloc_19[0, 0] : memref<10x2xf64> + affine.store %cst_4, %alloc_19[0, 1] : memref<10x2xf64> + affine.store %cst_3, %alloc_19[1, 0] : memref<10x2xf64> + affine.store %cst_2, %alloc_19[1, 1] : memref<10x2xf64> + affine.store %cst_3, %alloc_19[2, 0] : memref<10x2xf64> + affine.store %cst_4, %alloc_19[2, 1] : memref<10x2xf64> + affine.store %cst_3, %alloc_19[3, 0] : memref<10x2xf64> + affine.store %cst_11, %alloc_19[3, 1] : memref<10x2xf64> + affine.store %cst_1, %alloc_19[4, 0] : memref<10x2xf64> + affine.store %cst_2, %alloc_19[4, 1] : memref<10x2xf64> + affine.store %cst_1, %alloc_19[5, 0] : memref<10x2xf64> + affine.store %cst_4, %alloc_19[5, 1] : memref<10x2xf64> + affine.store %cst_1, %alloc_19[6, 0] : memref<10x2xf64> + affine.store %cst_11, %alloc_19[6, 1] : memref<10x2xf64> + affine.store %cst_12, %alloc_19[7, 0] : memref<10x2xf64> + affine.store %cst_2, %alloc_19[7, 1] : memref<10x2xf64> + affine.store %cst_12, %alloc_19[8, 0] : memref<10x2xf64> + affine.store %cst_4, %alloc_19[8, 1] : memref<10x2xf64> + affine.store %cst_12, %alloc_19[9, 0] : memref<10x2xf64> + affine.store %cst_11, %alloc_19[9, 1] : memref<10x2xf64> + %1 = memref.load %alloc_20[%c0] : memref<2xf64> + %2 = memref.load %alloc_20[%c1] : memref<2xf64> + affine.store %c-1, %alloc[] : memref + scf.for %arg0 = %c0 to %c10 step %c1 { + %6 = memref.load %alloc[] : memref + %7 = memref.load %alloc_19[%arg0, %c0] : memref<10x2xf64> + %8 = memref.load %alloc_19[%arg0, %c1] : memref<10x2xf64> + %9 = arith.subf %7, %1 : f64 + %10 = arith.subf %8, %2 : f64 + %11 = math.absf %9 : f64 + %12 = math.absf %10 : f64 + %13 = arith.cmpf ole, %11, %cst_0 : f64 + %14 = arith.cmpf ole, %12, %cst_0 : f64 + %15 = arith.andi %13, %14 : i1 + %16 = arith.select %15, %arg0, %6 : index + memref.store %16, %alloc[] : memref + } + %3 = memref.load %alloc[] : memref + %4 = arith.index_cast %3 : index to i64 + %5 = arith.sitofp %4 : i64 to f64 + memref.store %5, %alloc_18[%c0] : memref<1xf64> +root@f68572e75858:/home/DSP_MLIR# /home/DSP_MLIR/build/bin/dsp1 /home/DSP_MLIR/mlir/test/Examples/DspExample/full_dtmf.py -emit=mlir-affine +module { + func.func @main() { + %alloc = memref.alloc() : memref + %alloc_0 = memref.alloc() : memref<1xf64> + %alloc_1 = memref.alloc() : memref<10x2xf64> + %alloc_2 = memref.alloc() : memref<2xf64> + %alloc_3 = memref.alloc() : memref<4096xf64> + %alloc_4 = memref.alloc() : memref + %alloc_5 = memref.alloc() : memref + %alloc_6 = memref.alloc() : memref<4096xf64> + %alloc_7 = memref.alloc() : memref<4096xf64> + %alloc_8 = memref.alloc() : memref<4096xf64> + %alloc_9 = memref.alloc() : memref<4096xf64> + %alloc_10 = memref.alloc() : memref<4096xf64> + %alloc_11 = memref.alloc() : memref<4096xf64> + %alloc_12 = memref.alloc() : memref<4096xf64> + %alloc_13 = memref.alloc() : memref<4096xf64> + %alloc_14 = memref.alloc() : memref<4096xf64> + %alloc_15 = memref.alloc() : memref<4096xf64> + %alloc_16 = memref.alloc() : memref<4096xf64> + %alloc_17 = memref.alloc() : memref<4096xf64> + %alloc_18 = memref.alloc() : memref<4096xf64> + %alloc_19 = memref.alloc() : memref + %alloc_20 = memref.alloc() : memref + %alloc_21 = memref.alloc() : memref + %c0 = arith.constant 0 : index + %cst = arith.constant 9.000000e+00 : f64 + affine.store %cst, %alloc_21[] : memref + %c0_22 = arith.constant 0 : index + %cst_23 = arith.constant 5.000000e-01 : f64 + affine.store %cst_23, %alloc_20[] : memref + %c0_24 = arith.constant 0 : index + %cst_25 = arith.constant 8.192000e+03 : f64 + affine.store %cst_25, %alloc_19[] : memref + %cst_26 = arith.constant 6.2831853071800001 : f64 + %cst_27 = arith.constant 1.000000e+01 : f64 + %cst_28 = arith.constant 8.192000e+03 : f64 + %cst_29 = arith.constant 8.520000e+02 : f64 + %cst_30 = arith.constant 1.477000e+03 : f64 + %c1 = arith.constant 1 : index + %c4096 = arith.constant 4096 : index + %c0_31 = arith.constant 0 : index + scf.for %arg0 = %c0_31 to %c4096 step %c1 { + %19 = arith.index_cast %arg0 : index to i64 + %20 = arith.sitofp %19 : i64 to f64 + %21 = arith.divf %20, %cst_28 : f64 + %22 = arith.mulf %cst_29, %21 : f64 + %23 = arith.mulf %cst_26, %22 : f64 + %24 = math.sin %23 : f64 + %25 = arith.mulf %cst_30, %21 : f64 + %26 = arith.mulf %cst_26, %25 : f64 + %27 = math.sin %26 : f64 + %28 = arith.addf %24, %27 : f64 + %29 = arith.mulf %cst_27, %28 : f64 + memref.store %29, %alloc_18[%arg0] : memref<4096xf64> + } + %c0_32 = arith.constant 0 : index + %c4096_33 = arith.constant 4096 : index + %c1_34 = arith.constant 1 : index + %0 = arith.index_cast %c4096_33 : index to i64 + %1 = arith.sitofp %0 : i64 to f64 + %2 = math.log2 %1 : f64 + %3 = arith.fptosi %2 : f64 to i64 + %4 = arith.index_cast %3 : i64 to index + scf.for %arg0 = %c0_32 to %c4096_33 step %c1_34 { + %19 = arith.index_cast %arg0 : index to i64 + %c0_i64 = arith.constant 0 : i64 + %20 = scf.for %arg1 = %c0_32 to %4 step %c1_34 iter_args(%arg2 = %c0_i64) -> (i64) { + %23 = arith.index_cast %arg1 : index to i64 + %c1_i64 = arith.constant 1 : i64 + %24 = arith.shli %c1_i64, %23 : i64 + %25 = arith.andi %19, %24 : i64 + %c0_i64_92 = arith.constant 0 : i64 + %26 = arith.cmpi ne, %25, %c0_i64_92 : i64 + %c1_93 = arith.constant 1 : index + %27 = arith.subi %4, %arg1 : index + %28 = arith.subi %27, %c1_93 : index + %29 = arith.index_cast %28 : index to i64 + %c1_i64_94 = arith.constant 1 : i64 + %30 = arith.shli %c1_i64_94, %29 : i64 + %c0_i64_95 = arith.constant 0 : i64 + %31 = arith.select %26, %30, %c0_i64_95 : i64 + %32 = arith.ori %arg2, %31 : i64 + scf.yield %32 : i64 + } + %21 = arith.index_cast %20 : i64 to index + %22 = memref.load %alloc_18[%arg0] : memref<4096xf64> + %cst_91 = arith.constant 0.000000e+00 : f64 + memref.store %22, %alloc_15[%21] : memref<4096xf64> + memref.store %cst_91, %alloc_14[%21] : memref<4096xf64> + } + %c12 = arith.constant 12 : index + %cst_35 = arith.constant 3.1415926535897931 : f64 + %cst_36 = arith.constant -2.000000e+00 : f64 + scf.for %arg0 = %c0_32 to %c12 step %c1_34 { + %c1_91 = arith.constant 1 : index + %19 = arith.shli %c1_91, %arg0 : index + %c1_92 = arith.constant 1 : index + %20 = arith.shli %19, %c1_92 : index + scf.for %arg1 = %c0_32 to %c4096_33 step %20 { + scf.for %arg2 = %c0_32 to %19 step %c1_34 { + %21 = arith.addi %arg1, %arg2 : index + %22 = arith.addi %21, %19 : index + %23 = arith.index_cast %arg2 : index to i64 + %24 = arith.sitofp %23 : i64 to f64 + %25 = arith.index_cast %20 : index to i64 + %26 = arith.sitofp %25 : i64 to f64 + %27 = arith.divf %24, %26 : f64 + %28 = arith.mulf %cst_36, %27 : f64 + %29 = arith.mulf %cst_35, %28 : f64 + %30 = math.cos %29 : f64 + %31 = math.sin %29 : f64 + %32 = memref.load %alloc_15[%22] : memref<4096xf64> + %33 = memref.load %alloc_14[%22] : memref<4096xf64> + %34 = arith.mulf %32, %30 : f64 + %35 = arith.mulf %33, %31 : f64 + %36 = arith.subf %34, %35 : f64 + %37 = arith.mulf %32, %31 : f64 + %38 = arith.mulf %33, %30 : f64 + %39 = arith.addf %37, %38 : f64 + %40 = memref.load %alloc_15[%21] : memref<4096xf64> + %41 = memref.load %alloc_14[%21] : memref<4096xf64> + %42 = arith.addf %40, %36 : f64 + %43 = arith.addf %41, %39 : f64 + %44 = arith.subf %40, %36 : f64 + %45 = arith.subf %41, %39 : f64 + memref.store %42, %alloc_15[%21] : memref<4096xf64> + memref.store %43, %alloc_14[%21] : memref<4096xf64> + memref.store %44, %alloc_15[%22] : memref<4096xf64> + memref.store %45, %alloc_14[%22] : memref<4096xf64> + } + } + } + %c0_37 = arith.constant 0 : index + %c4096_38 = arith.constant 4096 : index + %c1_39 = arith.constant 1 : index + %5 = arith.index_cast %c4096_38 : index to i64 + %6 = arith.sitofp %5 : i64 to f64 + %7 = math.log2 %6 : f64 + %8 = arith.fptosi %7 : f64 to i64 + %9 = arith.index_cast %8 : i64 to index + scf.for %arg0 = %c0_37 to %c4096_38 step %c1_39 { + %19 = arith.index_cast %arg0 : index to i64 + %c0_i64 = arith.constant 0 : i64 + %20 = scf.for %arg1 = %c0_37 to %9 step %c1_39 iter_args(%arg2 = %c0_i64) -> (i64) { + %23 = arith.index_cast %arg1 : index to i64 + %c1_i64 = arith.constant 1 : i64 + %24 = arith.shli %c1_i64, %23 : i64 + %25 = arith.andi %19, %24 : i64 + %c0_i64_92 = arith.constant 0 : i64 + %26 = arith.cmpi ne, %25, %c0_i64_92 : i64 + %c1_93 = arith.constant 1 : index + %27 = arith.subi %9, %arg1 : index + %28 = arith.subi %27, %c1_93 : index + %29 = arith.index_cast %28 : index to i64 + %c1_i64_94 = arith.constant 1 : i64 + %30 = arith.shli %c1_i64_94, %29 : i64 + %c0_i64_95 = arith.constant 0 : i64 + %31 = arith.select %26, %30, %c0_i64_95 : i64 + %32 = arith.ori %arg2, %31 : i64 + scf.yield %32 : i64 + } + %21 = arith.index_cast %20 : i64 to index + %22 = memref.load %alloc_18[%arg0] : memref<4096xf64> + %cst_91 = arith.constant 0.000000e+00 : f64 + memref.store %22, %alloc_11[%21] : memref<4096xf64> + memref.store %cst_91, %alloc_10[%21] : memref<4096xf64> + } + %c12_40 = arith.constant 12 : index + %cst_41 = arith.constant 3.1415926535897931 : f64 + %cst_42 = arith.constant -2.000000e+00 : f64 + scf.for %arg0 = %c0_37 to %c12_40 step %c1_39 { + %c1_91 = arith.constant 1 : index + %19 = arith.shli %c1_91, %arg0 : index + %c1_92 = arith.constant 1 : index + %20 = arith.shli %19, %c1_92 : index + scf.for %arg1 = %c0_37 to %c4096_38 step %20 { + scf.for %arg2 = %c0_37 to %19 step %c1_39 { + %21 = arith.addi %arg1, %arg2 : index + %22 = arith.addi %21, %19 : index + %23 = arith.index_cast %arg2 : index to i64 + %24 = arith.sitofp %23 : i64 to f64 + %25 = arith.index_cast %20 : index to i64 + %26 = arith.sitofp %25 : i64 to f64 + %27 = arith.divf %24, %26 : f64 + %28 = arith.mulf %cst_42, %27 : f64 + %29 = arith.mulf %cst_41, %28 : f64 + %30 = math.cos %29 : f64 + %31 = math.sin %29 : f64 + %32 = memref.load %alloc_11[%22] : memref<4096xf64> + %33 = memref.load %alloc_10[%22] : memref<4096xf64> + %34 = arith.mulf %32, %30 : f64 + %35 = arith.mulf %33, %31 : f64 + %36 = arith.subf %34, %35 : f64 + %37 = arith.mulf %32, %31 : f64 + %38 = arith.mulf %33, %30 : f64 + %39 = arith.addf %37, %38 : f64 + %40 = memref.load %alloc_11[%21] : memref<4096xf64> + %41 = memref.load %alloc_10[%21] : memref<4096xf64> + %42 = arith.addf %40, %36 : f64 + %43 = arith.addf %41, %39 : f64 + %44 = arith.subf %40, %36 : f64 + %45 = arith.subf %41, %39 : f64 + memref.store %42, %alloc_11[%21] : memref<4096xf64> + memref.store %43, %alloc_10[%21] : memref<4096xf64> + memref.store %44, %alloc_11[%22] : memref<4096xf64> + memref.store %45, %alloc_10[%22] : memref<4096xf64> + } + } + } + affine.for %arg0 = 0 to 4096 { + %19 = affine.load %alloc_15[%arg0] : memref<4096xf64> + %20 = arith.mulf %19, %19 : f64 + affine.store %20, %alloc_9[%arg0] : memref<4096xf64> + } + affine.for %arg0 = 0 to 4096 { + %19 = affine.load %alloc_10[%arg0] : memref<4096xf64> + %20 = arith.mulf %19, %19 : f64 + affine.store %20, %alloc_8[%arg0] : memref<4096xf64> + } + affine.for %arg0 = 0 to 4096 { + %19 = affine.load %alloc_9[%arg0] : memref<4096xf64> + %20 = affine.load %alloc_8[%arg0] : memref<4096xf64> + %21 = arith.addf %19, %20 : f64 + affine.store %21, %alloc_7[%arg0] : memref<4096xf64> + } + affine.for %arg0 = 0 to 4096 { + %19 = affine.load %alloc_7[%arg0] : memref<4096xf64> + %20 = math.sqrt %19 : f64 + affine.store %20, %alloc_6[%arg0] : memref<4096xf64> + } + %c0_43 = arith.constant 0 : index + %cst_44 = arith.constant 4.096000e+03 : f64 + affine.store %cst_44, %alloc_5[] : memref + %c0_45 = arith.constant 0 : index + %cst_46 = arith.constant 1.220000e-04 : f64 + affine.store %cst_46, %alloc_4[] : memref + %cst_47 = arith.constant 4.096000e+03 : f64 + %cst_48 = arith.constant 1.2199999764561653E-4 : f64 + %c0_49 = arith.constant 0 : index + %c4096_50 = arith.constant 4096 : index + %c1_51 = arith.constant 1 : index + %10 = arith.mulf %cst_47, %cst_48 : f64 + %cst_52 = arith.constant 5.000000e-01 : f64 + %cst_53 = arith.constant 1.000000e+00 : f64 + %11 = arith.subf %cst_47, %cst_53 : f64 + %12 = arith.mulf %11, %cst_52 : f64 + scf.for %arg0 = %c0_49 to %c4096_50 step %c1_51 { + %19 = arith.index_cast %arg0 : index to i64 + %20 = arith.sitofp %19 : i64 to f64 + %21 = arith.cmpf ole, %20, %12 : f64 + %22 = scf.if %21 -> (f64) { + %23 = arith.divf %20, %10 : f64 + memref.store %23, %alloc_3[%arg0] : memref<4096xf64> + scf.yield %23 : f64 + } else { + %23 = arith.subf %20, %cst_47 : f64 + %24 = arith.divf %23, %10 : f64 + memref.store %24, %alloc_3[%arg0] : memref<4096xf64> + scf.yield %24 : f64 + } + } + %cst_54 = arith.constant 0.000000e+00 : f64 + %cst_55 = arith.constant 0.000000e+00 : f64 + %cst_56 = arith.constant 0.000000e+00 : f64 + %cst_57 = arith.constant 0.000000e+00 : f64 + %c0_58 = arith.constant 0 : index + %c4096_59 = arith.constant 4096 : index + %c1_60 = arith.constant 1 : index + %13:4 = scf.for %arg0 = %c0_58 to %c4096_59 step %c1_60 iter_args(%arg1 = %cst_54, %arg2 = %cst_55, %arg3 = %cst_56, %arg4 = %cst_57) -> (f64, f64, f64, f64) { + %19 = memref.load %alloc_3[%arg0] : memref<4096xf64> + %20 = memref.load %alloc_6[%arg0] : memref<4096xf64> + %cst_91 = arith.constant 0.000000e+00 : f64 + %21 = arith.cmpf ogt, %19, %cst_91 : f64 + %22:4 = scf.if %21 -> (f64, f64, f64, f64) { + %23 = arith.cmpf ogt, %20, %arg1 : f64 + %24:4 = scf.if %23 -> (f64, f64, f64, f64) { + scf.yield %20, %arg1, %19, %arg3 : f64, f64, f64, f64 + } else { + %25 = arith.cmpf ogt, %20, %arg2 : f64 + %26:4 = scf.if %25 -> (f64, f64, f64, f64) { + scf.yield %arg1, %20, %arg3, %19 : f64, f64, f64, f64 + } else { + scf.yield %arg1, %arg2, %arg3, %arg4 : f64, f64, f64, f64 + } + scf.yield %26#0, %26#1, %26#2, %26#3 : f64, f64, f64, f64 + } + scf.yield %24#0, %24#1, %24#2, %24#3 : f64, f64, f64, f64 + } else { + scf.yield %arg1, %arg2, %arg3, %arg4 : f64, f64, f64, f64 + } + scf.yield %22#0, %22#1, %22#2, %22#3 : f64, f64, f64, f64 + } + %c0_61 = arith.constant 0 : index + memref.store %13#2, %alloc_2[%c0_61] : memref<2xf64> + %c1_62 = arith.constant 1 : index + memref.store %13#3, %alloc_2[%c1_62] : memref<2xf64> + %c0_63 = arith.constant 0 : index + %c1_64 = arith.constant 1 : index + %c2 = arith.constant 2 : index + %c3 = arith.constant 3 : index + %c4 = arith.constant 4 : index + %c5 = arith.constant 5 : index + %c6 = arith.constant 6 : index + %c7 = arith.constant 7 : index + %c8 = arith.constant 8 : index + %c9 = arith.constant 9 : index + %cst_65 = arith.constant 9.410000e+02 : f64 + affine.store %cst_65, %alloc_1[%c0_63, %c0_63] : memref<10x2xf64> + %cst_66 = arith.constant 1.336000e+03 : f64 + affine.store %cst_66, %alloc_1[%c0_63, %c1_64] : memref<10x2xf64> + %cst_67 = arith.constant 6.970000e+02 : f64 + affine.store %cst_67, %alloc_1[%c1_64, %c0_63] : memref<10x2xf64> + %cst_68 = arith.constant 1.209000e+03 : f64 + affine.store %cst_68, %alloc_1[%c1_64, %c1_64] : memref<10x2xf64> + %cst_69 = arith.constant 6.970000e+02 : f64 + affine.store %cst_69, %alloc_1[%c2, %c0_63] : memref<10x2xf64> + %cst_70 = arith.constant 1.336000e+03 : f64 + affine.store %cst_70, %alloc_1[%c2, %c1_64] : memref<10x2xf64> + %cst_71 = arith.constant 6.970000e+02 : f64 + affine.store %cst_71, %alloc_1[%c3, %c0_63] : memref<10x2xf64> + %cst_72 = arith.constant 1.477000e+03 : f64 + affine.store %cst_72, %alloc_1[%c3, %c1_64] : memref<10x2xf64> + %cst_73 = arith.constant 7.700000e+02 : f64 + affine.store %cst_73, %alloc_1[%c4, %c0_63] : memref<10x2xf64> + %cst_74 = arith.constant 1.209000e+03 : f64 + affine.store %cst_74, %alloc_1[%c4, %c1_64] : memref<10x2xf64> + %cst_75 = arith.constant 7.700000e+02 : f64 + affine.store %cst_75, %alloc_1[%c5, %c0_63] : memref<10x2xf64> + %cst_76 = arith.constant 1.336000e+03 : f64 + affine.store %cst_76, %alloc_1[%c5, %c1_64] : memref<10x2xf64> + %cst_77 = arith.constant 7.700000e+02 : f64 + affine.store %cst_77, %alloc_1[%c6, %c0_63] : memref<10x2xf64> + %cst_78 = arith.constant 1.477000e+03 : f64 + affine.store %cst_78, %alloc_1[%c6, %c1_64] : memref<10x2xf64> + %cst_79 = arith.constant 8.520000e+02 : f64 + affine.store %cst_79, %alloc_1[%c7, %c0_63] : memref<10x2xf64> + %cst_80 = arith.constant 1.209000e+03 : f64 + affine.store %cst_80, %alloc_1[%c7, %c1_64] : memref<10x2xf64> + %cst_81 = arith.constant 8.520000e+02 : f64 + affine.store %cst_81, %alloc_1[%c8, %c0_63] : memref<10x2xf64> + %cst_82 = arith.constant 1.336000e+03 : f64 + affine.store %cst_82, %alloc_1[%c8, %c1_64] : memref<10x2xf64> + %cst_83 = arith.constant 8.520000e+02 : f64 + affine.store %cst_83, %alloc_1[%c9, %c0_63] : memref<10x2xf64> + %cst_84 = arith.constant 1.477000e+03 : f64 + affine.store %cst_84, %alloc_1[%c9, %c1_64] : memref<10x2xf64> + %c0_85 = arith.constant 0 : index + %c1_86 = arith.constant 1 : index + %14 = memref.load %alloc_2[%c0_85] : memref<2xf64> + %15 = memref.load %alloc_2[%c1_86] : memref<2xf64> + %c-1 = arith.constant -1 : index + affine.store %c-1, %alloc[] : memref + %cst_87 = arith.constant 3.000000e+00 : f64 + %c0_88 = arith.constant 0 : index + %c10 = arith.constant 10 : index + %c1_89 = arith.constant 1 : index + scf.for %arg0 = %c0_88 to %c10 step %c1_89 { + %19 = memref.load %alloc[] : memref + %20 = memref.load %alloc_1[%arg0, %c0_85] : memref<10x2xf64> + %21 = memref.load %alloc_1[%arg0, %c1_86] : memref<10x2xf64> + %22 = arith.subf %20, %14 : f64 + %23 = arith.subf %21, %15 : f64 + %24 = math.absf %22 : f64 + %25 = math.absf %23 : f64 + %26 = arith.cmpf ole, %24, %cst_87 : f64 + %27 = arith.cmpf ole, %25, %cst_87 : f64 + %28 = arith.andi %26, %27 : i1 + %29 = arith.select %28, %arg0, %19 : index + memref.store %29, %alloc[] : memref + } + %16 = memref.load %alloc[] : memref + %17 = arith.index_cast %16 : index to i64 + %18 = arith.sitofp %17 : i64 to f64 + %c0_90 = arith.constant 0 : index + memref.store %18, %alloc_0[%c0_90] : memref<1xf64> + dsp.print %alloc_0 : memref<1xf64> + memref.dealloc %alloc_21 : memref + memref.dealloc %alloc_20 : memref + memref.dealloc %alloc_19 : memref + memref.dealloc %alloc_18 : memref<4096xf64> + memref.dealloc %alloc_17 : memref<4096xf64> + memref.dealloc %alloc_16 : memref<4096xf64> + memref.dealloc %alloc_15 : memref<4096xf64> + memref.dealloc %alloc_14 : memref<4096xf64> + memref.dealloc %alloc_13 : memref<4096xf64> + memref.dealloc %alloc_12 : memref<4096xf64> + memref.dealloc %alloc_11 : memref<4096xf64> + memref.dealloc %alloc_10 : memref<4096xf64> + memref.dealloc %alloc_9 : memref<4096xf64> + memref.dealloc %alloc_8 : memref<4096xf64> + memref.dealloc %alloc_7 : memref<4096xf64> + memref.dealloc %alloc_6 : memref<4096xf64> + memref.dealloc %alloc_5 : memref + memref.dealloc %alloc_4 : memref + memref.dealloc %alloc_3 : memref<4096xf64> + memref.dealloc %alloc_2 : memref<2xf64> + memref.dealloc %alloc_1 : memref<10x2xf64> + memref.dealloc %alloc_0 : memref<1xf64> + memref.dealloc %alloc : memref + return + } +} \ No newline at end of file diff --git a/mlir/examples/dsp/SimpleBlocks/include/toy/opt.mlir b/mlir/examples/dsp/SimpleBlocks/include/toy/opt.mlir new file mode 100644 index 000000000000..9d3f9b34350a --- /dev/null +++ b/mlir/examples/dsp/SimpleBlocks/include/toy/opt.mlir @@ -0,0 +1,435 @@ +module { + func.func @main() { + %c2047_i64 = arith.constant 2047 : i64 + %cst = arith.constant 0.49971199035644531 : f64 + %c11 = arith.constant 11 : index + %c10 = arith.constant 10 : index + %cst_0 = arith.constant 3.000000e+00 : f64 + %c-1 = arith.constant -1 : index + %cst_1 = arith.constant 7.700000e+02 : f64 + %cst_2 = arith.constant 1.209000e+03 : f64 + %cst_3 = arith.constant 6.970000e+02 : f64 + %cst_4 = arith.constant 1.336000e+03 : f64 + %cst_5 = arith.constant 9.410000e+02 : f64 + %cst_6 = arith.constant 1.220000e-04 : f64 + %cst_7 = arith.constant 4.096000e+03 : f64 + %cst_8 = arith.constant -2.000000e+00 : f64 + %cst_9 = arith.constant 3.1415926535897931 : f64 + %c12 = arith.constant 12 : index + %cst_10 = arith.constant 0.000000e+00 : f64 + %c1_i64 = arith.constant 1 : i64 + %c0_i64 = arith.constant 0 : i64 + %c4096 = arith.constant 4096 : index + %c1 = arith.constant 1 : index + %cst_11 = arith.constant 1.477000e+03 : f64 + %cst_12 = arith.constant 8.520000e+02 : f64 + %cst_13 = arith.constant 1.000000e+01 : f64 + %cst_14 = arith.constant 6.2831853071800001 : f64 + %cst_15 = arith.constant 8.192000e+03 : f64 + %cst_16 = arith.constant 5.000000e-01 : f64 + %cst_17 = arith.constant 9.000000e+00 : f64 + %c0 = arith.constant 0 : index + %alloc = memref.alloc() : memref + %alloc_18 = memref.alloc() : memref<1xf64> + %alloc_19 = memref.alloc() : memref<10x2xf64> + %alloc_20 = memref.alloc() : memref<2xf64> + %alloc_21 = memref.alloc() : memref<4096xf64> + %alloc_22 = memref.alloc() : memref + %alloc_23 = memref.alloc() : memref + %alloc_24 = memref.alloc() : memref<4096xf64> + %alloc_25 = memref.alloc() : memref<4096xf64> + %alloc_26 = memref.alloc() : memref<4096xf64> + %alloc_27 = memref.alloc() : memref<4096xf64> + %alloc_28 = memref.alloc() : memref + %alloc_29 = memref.alloc() : memref + %alloc_30 = memref.alloc() : memref + affine.store %cst_17, %alloc_30[] : memref + affine.store %cst_16, %alloc_29[] : memref + affine.store %cst_15, %alloc_28[] : memref + scf.for %arg0 = %c0 to %c4096 step %c1 { + %6 = arith.index_cast %arg0 : index to i64 + %7 = arith.sitofp %6 : i64 to f64 + %8 = arith.divf %7, %cst_15 : f64 + %9 = arith.mulf %8, %cst_12 : f64 + %10 = arith.mulf %9, %cst_14 : f64 + %11 = math.sin %10 : f64 + %12 = arith.mulf %8, %cst_11 : f64 + %13 = arith.mulf %12, %cst_14 : f64 + %14 = math.sin %13 : f64 + %15 = arith.addf %11, %14 : f64 + %16 = arith.mulf %15, %cst_13 : f64 + memref.store %16, %alloc_27[%arg0] : memref<4096xf64> + } + scf.for %arg0 = %c0 to %c4096 step %c1 { + %6 = arith.index_cast %arg0 : index to i64 + %7 = scf.for %arg1 = %c0 to %c12 step %c1 iter_args(%arg2 = %c0_i64) -> (i64) { + %10 = arith.index_cast %arg1 : index to i64 + %11 = arith.shli %c1_i64, %10 : i64 + %12 = arith.andi %6, %11 : i64 + %13 = arith.cmpi ne, %12, %c0_i64 : i64 + %14 = arith.subi %c11, %arg1 : index + %15 = arith.index_cast %14 : index to i64 + %16 = arith.shli %c1_i64, %15 : i64 + %17 = arith.select %13, %16, %c0_i64 : i64 + %18 = arith.ori %arg2, %17 : i64 + scf.yield %18 : i64 + } + %8 = arith.index_cast %7 : i64 to index + %9 = memref.load %alloc_27[%arg0] : memref<4096xf64> + memref.store %9, %alloc_26[%8] : memref<4096xf64> + memref.store %cst_10, %alloc_25[%8] : memref<4096xf64> + } + scf.for %arg0 = %c0 to %c12 step %c1 { + %6 = arith.shli %c1, %arg0 : index + %7 = arith.shli %6, %c1 : index + scf.for %arg1 = %c0 to %c4096 step %7 { + scf.for %arg2 = %c0 to %6 step %c1 { + %8 = arith.addi %arg1, %arg2 : index + %9 = arith.addi %8, %6 : index + %10 = arith.index_cast %arg2 : index to i64 + %11 = arith.sitofp %10 : i64 to f64 + %12 = arith.index_cast %7 : index to i64 + %13 = arith.sitofp %12 : i64 to f64 + %14 = arith.divf %11, %13 : f64 + %15 = arith.mulf %14, %cst_8 : f64 + %16 = arith.mulf %15, %cst_9 : f64 + %17 = math.cos %16 : f64 + %18 = math.sin %16 : f64 + %19 = memref.load %alloc_26[%9] : memref<4096xf64> + %20 = memref.load %alloc_25[%9] : memref<4096xf64> + %21 = arith.mulf %19, %17 : f64 + %22 = arith.mulf %20, %18 : f64 + %23 = arith.subf %21, %22 : f64 + %24 = arith.mulf %19, %18 : f64 + %25 = arith.mulf %20, %17 : f64 + %26 = arith.addf %24, %25 : f64 + %27 = memref.load %alloc_26[%8] : memref<4096xf64> + %28 = memref.load %alloc_25[%8] : memref<4096xf64> + %29 = arith.addf %27, %23 : f64 + %30 = arith.addf %28, %26 : f64 + %31 = arith.subf %27, %23 : f64 + %32 = arith.subf %28, %26 : f64 + %33 = arith.mulf %29, %29 : f64 + %34 = arith.mulf %30, %30 : f64 + %35 = arith.addf %33, %34 : f64 + %36 = math.sqrt %35 : f64 + %37 = arith.mulf %31, %31 : f64 + %38 = arith.mulf %32, %32 : f64 + %39 = arith.addf %37, %38 : f64 + %40 = math.sqrt %39 : f64 + memref.store %29, %alloc_26[%8] : memref<4096xf64> + memref.store %30, %alloc_25[%8] : memref<4096xf64> + memref.store %31, %alloc_26[%9] : memref<4096xf64> + memref.store %32, %alloc_25[%9] : memref<4096xf64> + memref.store %36, %alloc_24[%8] : memref<4096xf64> + memref.store %40, %alloc_24[%9] : memref<4096xf64> + } + } + } + affine.store %cst_7, %alloc_23[] : memref + affine.store %cst_6, %alloc_22[] : memref + scf.for %arg0 = %c0 to %c4096 step %c1 { + %6 = arith.index_cast %arg0 : index to i64 + %7 = arith.sitofp %6 : i64 to f64 + %8 = arith.cmpi sle, %6, %c2047_i64 : i64 + scf.if %8 { + %9 = arith.divf %7, %cst : f64 + memref.store %9, %alloc_21[%arg0] : memref<4096xf64> + } else { + %9 = arith.subf %7, %cst_7 : f64 + %10 = arith.divf %9, %cst : f64 + memref.store %10, %alloc_21[%arg0] : memref<4096xf64> + } + } + %0:4 = scf.for %arg0 = %c0 to %c4096 step %c1 iter_args(%arg1 = %cst_10, %arg2 = %cst_10, %arg3 = %cst_10, %arg4 = %cst_10) -> (f64, f64, f64, f64) { + %6 = memref.load %alloc_21[%arg0] : memref<4096xf64> + %7 = memref.load %alloc_24[%arg0] : memref<4096xf64> + %8 = arith.cmpf ogt, %6, %cst_10 : f64 + %9:4 = scf.if %8 -> (f64, f64, f64, f64) { + %10 = arith.cmpf ogt, %7, %arg1 : f64 + %11 = arith.select %10, %7, %arg1 : f64 + %12 = arith.select %10, %6, %arg3 : f64 + %13:2 = scf.if %10 -> (f64, f64) { + scf.yield %arg1, %arg3 : f64, f64 + } else { + %14 = arith.cmpf ogt, %7, %arg2 : f64 + %15 = arith.select %14, %7, %arg2 : f64 + %16 = arith.select %14, %6, %arg4 : f64 + scf.yield %15, %16 : f64, f64 + } + scf.yield %11, %13#0, %12, %13#1 : f64, f64, f64, f64 + } else { + scf.yield %arg1, %arg2, %arg3, %arg4 : f64, f64, f64, f64 + } + scf.yield %9#0, %9#1, %9#2, %9#3 : f64, f64, f64, f64 + } + memref.store %0#2, %alloc_20[%c0] : memref<2xf64> + memref.store %0#3, %alloc_20[%c1] : memref<2xf64> + affine.store %cst_5, %alloc_19[0, 0] : memref<10x2xf64> + affine.store %cst_4, %alloc_19[0, 1] : memref<10x2xf64> + affine.store %cst_3, %alloc_19[1, 0] : memref<10x2xf64> + affine.store %cst_2, %alloc_19[1, 1] : memref<10x2xf64> + affine.store %cst_3, %alloc_19[2, 0] : memref<10x2xf64> + affine.store %cst_4, %alloc_19[2, 1] : memref<10x2xf64> + affine.store %cst_3, %alloc_19[3, 0] : memref<10x2xf64> + affine.store %cst_11, %alloc_19[3, 1] : memref<10x2xf64> + affine.store %cst_1, %alloc_19[4, 0] : memref<10x2xf64> + affine.store %cst_2, %alloc_19[4, 1] : memref<10x2xf64> + affine.store %cst_1, %alloc_19[5, 0] : memref<10x2xf64> + affine.store %cst_4, %alloc_19[5, 1] : memref<10x2xf64> + affine.store %cst_1, %alloc_19[6, 0] : memref<10x2xf64> + affine.store %cst_11, %alloc_19[6, 1] : memref<10x2xf64> + affine.store %cst_12, %alloc_19[7, 0] : memref<10x2xf64> + affine.store %cst_2, %alloc_19[7, 1] : memref<10x2xf64> + affine.store %cst_12, %alloc_19[8, 0] : memref<10x2xf64> + affine.store %cst_4, %alloc_19[8, 1] : memref<10x2xf64> + affine.store %cst_12, %alloc_19[9, 0] : memref<10x2xf64> + affine.store %cst_11, %alloc_19[9, 1] : memref<10x2xf64> + %1 = memref.load %alloc_20[%c0] : memref<2xf64> + %2 = memref.load %alloc_20[%c1] : memref<2xf64> + affine.store %c-1, %alloc[] : memref + scf.for %arg0 = %c0 to %c10 step %c1 { + %6 = memref.load %alloc[] : memref + %7 = memref.load %alloc_19[%arg0, %c0] : memref<10x2xf64> + %8 = memref.load %alloc_19[%arg0, %c1] : memref<10x2xf64> + %9 = arith.subf %7, %1 : f64 + %10 = arith.subf %8, %2 : f64 + %11 = math.absf %9 : f64 + %12 = math.absf %10 : f64 + %13 = arith.cmpf ole, %11, %cst_0 : f64 + %14 = arith.cmpf ole, %12, %cst_0 : f64 + %15 = arith.andi %13, %14 : i1 + %16 = arith.select %15, %arg0, %6 : index + memref.store %16, %alloc[] : memref + } + %3 = memref.load %alloc[] : memref + %4 = arith.index_cast %3 : index to i64 + %5 = arith.sitofp %4 : i64 to f64 + memref.store %5, %alloc_18[%c0] : memref<1xf64> +root@f68572e75858:/home/DSP_MLIR# /home/DSP_MLIR/build/bin/dsp1 /home/DSP_MLIR/mlir/test/Examples/DspExample/full_dtmf.py -emit=mlir-affine -affineOpt -canonOpt -opt +module { + func.func @main() { + %c2047_i64 = arith.constant 2047 : i64 + %cst = arith.constant 0.49971199035644531 : f64 + %c11 = arith.constant 11 : index + %c10 = arith.constant 10 : index + %cst_0 = arith.constant 3.000000e+00 : f64 + %c-1 = arith.constant -1 : index + %cst_1 = arith.constant 7.700000e+02 : f64 + %cst_2 = arith.constant 1.209000e+03 : f64 + %cst_3 = arith.constant 6.970000e+02 : f64 + %cst_4 = arith.constant 1.336000e+03 : f64 + %cst_5 = arith.constant 9.410000e+02 : f64 + %cst_6 = arith.constant 1.220000e-04 : f64 + %cst_7 = arith.constant 4.096000e+03 : f64 + %cst_8 = arith.constant -2.000000e+00 : f64 + %cst_9 = arith.constant 3.1415926535897931 : f64 + %c12 = arith.constant 12 : index + %cst_10 = arith.constant 0.000000e+00 : f64 + %c1_i64 = arith.constant 1 : i64 + %c0_i64 = arith.constant 0 : i64 + %c4096 = arith.constant 4096 : index + %c1 = arith.constant 1 : index + %cst_11 = arith.constant 1.477000e+03 : f64 + %cst_12 = arith.constant 8.520000e+02 : f64 + %cst_13 = arith.constant 1.000000e+01 : f64 + %cst_14 = arith.constant 6.2831853071800001 : f64 + %cst_15 = arith.constant 8.192000e+03 : f64 + %cst_16 = arith.constant 5.000000e-01 : f64 + %cst_17 = arith.constant 9.000000e+00 : f64 + %c0 = arith.constant 0 : index + %alloc = memref.alloc() : memref + %alloc_18 = memref.alloc() : memref<1xf64> + %alloc_19 = memref.alloc() : memref<10x2xf64> + %alloc_20 = memref.alloc() : memref<2xf64> + %alloc_21 = memref.alloc() : memref<4096xf64> + %alloc_22 = memref.alloc() : memref + %alloc_23 = memref.alloc() : memref + %alloc_24 = memref.alloc() : memref<4096xf64> + %alloc_25 = memref.alloc() : memref<4096xf64> + %alloc_26 = memref.alloc() : memref<4096xf64> + %alloc_27 = memref.alloc() : memref<4096xf64> + %alloc_28 = memref.alloc() : memref + %alloc_29 = memref.alloc() : memref + %alloc_30 = memref.alloc() : memref + affine.store %cst_17, %alloc_30[] : memref + affine.store %cst_16, %alloc_29[] : memref + affine.store %cst_15, %alloc_28[] : memref + scf.for %arg0 = %c0 to %c4096 step %c1 { + %6 = arith.index_cast %arg0 : index to i64 + %7 = arith.sitofp %6 : i64 to f64 + %8 = arith.divf %7, %cst_15 : f64 + %9 = arith.mulf %8, %cst_12 : f64 + %10 = arith.mulf %9, %cst_14 : f64 + %11 = math.sin %10 : f64 + %12 = arith.mulf %8, %cst_11 : f64 + %13 = arith.mulf %12, %cst_14 : f64 + %14 = math.sin %13 : f64 + %15 = arith.addf %11, %14 : f64 + %16 = arith.mulf %15, %cst_13 : f64 + memref.store %16, %alloc_27[%arg0] : memref<4096xf64> + } + scf.for %arg0 = %c0 to %c4096 step %c1 { + %6 = arith.index_cast %arg0 : index to i64 + %7 = scf.for %arg1 = %c0 to %c12 step %c1 iter_args(%arg2 = %c0_i64) -> (i64) { + %10 = arith.index_cast %arg1 : index to i64 + %11 = arith.shli %c1_i64, %10 : i64 + %12 = arith.andi %6, %11 : i64 + %13 = arith.cmpi ne, %12, %c0_i64 : i64 + %14 = arith.subi %c11, %arg1 : index + %15 = arith.index_cast %14 : index to i64 + %16 = arith.shli %c1_i64, %15 : i64 + %17 = arith.select %13, %16, %c0_i64 : i64 + %18 = arith.ori %arg2, %17 : i64 + scf.yield %18 : i64 + } + %8 = arith.index_cast %7 : i64 to index + %9 = memref.load %alloc_27[%arg0] : memref<4096xf64> + memref.store %9, %alloc_26[%8] : memref<4096xf64> + memref.store %cst_10, %alloc_25[%8] : memref<4096xf64> + } + scf.for %arg0 = %c0 to %c12 step %c1 { + %6 = arith.shli %c1, %arg0 : index + %7 = arith.shli %6, %c1 : index + scf.for %arg1 = %c0 to %c4096 step %7 { + scf.for %arg2 = %c0 to %6 step %c1 { + %8 = arith.addi %arg1, %arg2 : index + %9 = arith.addi %8, %6 : index + %10 = arith.index_cast %arg2 : index to i64 + %11 = arith.sitofp %10 : i64 to f64 + %12 = arith.index_cast %7 : index to i64 + %13 = arith.sitofp %12 : i64 to f64 + %14 = arith.divf %11, %13 : f64 + %15 = arith.mulf %14, %cst_8 : f64 + %16 = arith.mulf %15, %cst_9 : f64 + %17 = math.cos %16 : f64 + %18 = math.sin %16 : f64 + %19 = memref.load %alloc_26[%9] : memref<4096xf64> + %20 = memref.load %alloc_25[%9] : memref<4096xf64> + %21 = arith.mulf %19, %17 : f64 + %22 = arith.mulf %20, %18 : f64 + %23 = arith.subf %21, %22 : f64 + %24 = arith.mulf %19, %18 : f64 + %25 = arith.mulf %20, %17 : f64 + %26 = arith.addf %24, %25 : f64 + %27 = memref.load %alloc_26[%8] : memref<4096xf64> + %28 = memref.load %alloc_25[%8] : memref<4096xf64> + %29 = arith.addf %27, %23 : f64 + %30 = arith.addf %28, %26 : f64 + %31 = arith.subf %27, %23 : f64 + %32 = arith.subf %28, %26 : f64 + %33 = arith.mulf %29, %29 : f64 + %34 = arith.mulf %30, %30 : f64 + %35 = arith.addf %33, %34 : f64 + %36 = math.sqrt %35 : f64 + %37 = arith.mulf %31, %31 : f64 + %38 = arith.mulf %32, %32 : f64 + %39 = arith.addf %37, %38 : f64 + %40 = math.sqrt %39 : f64 + memref.store %29, %alloc_26[%8] : memref<4096xf64> + memref.store %30, %alloc_25[%8] : memref<4096xf64> + memref.store %31, %alloc_26[%9] : memref<4096xf64> + memref.store %32, %alloc_25[%9] : memref<4096xf64> + memref.store %36, %alloc_24[%8] : memref<4096xf64> + memref.store %40, %alloc_24[%9] : memref<4096xf64> + } + } + } + affine.store %cst_7, %alloc_23[] : memref + affine.store %cst_6, %alloc_22[] : memref + scf.for %arg0 = %c0 to %c4096 step %c1 { + %6 = arith.index_cast %arg0 : index to i64 + %7 = arith.sitofp %6 : i64 to f64 + %8 = arith.cmpi sle, %6, %c2047_i64 : i64 + scf.if %8 { + %9 = arith.divf %7, %cst : f64 + memref.store %9, %alloc_21[%arg0] : memref<4096xf64> + } else { + %9 = arith.subf %7, %cst_7 : f64 + %10 = arith.divf %9, %cst : f64 + memref.store %10, %alloc_21[%arg0] : memref<4096xf64> + } + } + %0:4 = scf.for %arg0 = %c0 to %c4096 step %c1 iter_args(%arg1 = %cst_10, %arg2 = %cst_10, %arg3 = %cst_10, %arg4 = %cst_10) -> (f64, f64, f64, f64) { + %6 = memref.load %alloc_21[%arg0] : memref<4096xf64> + %7 = memref.load %alloc_24[%arg0] : memref<4096xf64> + %8 = arith.cmpf ogt, %6, %cst_10 : f64 + %9:4 = scf.if %8 -> (f64, f64, f64, f64) { + %10 = arith.cmpf ogt, %7, %arg1 : f64 + %11 = arith.select %10, %7, %arg1 : f64 + %12 = arith.select %10, %6, %arg3 : f64 + %13:2 = scf.if %10 -> (f64, f64) { + scf.yield %arg1, %arg3 : f64, f64 + } else { + %14 = arith.cmpf ogt, %7, %arg2 : f64 + %15 = arith.select %14, %7, %arg2 : f64 + %16 = arith.select %14, %6, %arg4 : f64 + scf.yield %15, %16 : f64, f64 + } + scf.yield %11, %13#0, %12, %13#1 : f64, f64, f64, f64 + } else { + scf.yield %arg1, %arg2, %arg3, %arg4 : f64, f64, f64, f64 + } + scf.yield %9#0, %9#1, %9#2, %9#3 : f64, f64, f64, f64 + } + memref.store %0#2, %alloc_20[%c0] : memref<2xf64> + memref.store %0#3, %alloc_20[%c1] : memref<2xf64> + affine.store %cst_5, %alloc_19[0, 0] : memref<10x2xf64> + affine.store %cst_4, %alloc_19[0, 1] : memref<10x2xf64> + affine.store %cst_3, %alloc_19[1, 0] : memref<10x2xf64> + affine.store %cst_2, %alloc_19[1, 1] : memref<10x2xf64> + affine.store %cst_3, %alloc_19[2, 0] : memref<10x2xf64> + affine.store %cst_4, %alloc_19[2, 1] : memref<10x2xf64> + affine.store %cst_3, %alloc_19[3, 0] : memref<10x2xf64> + affine.store %cst_11, %alloc_19[3, 1] : memref<10x2xf64> + affine.store %cst_1, %alloc_19[4, 0] : memref<10x2xf64> + affine.store %cst_2, %alloc_19[4, 1] : memref<10x2xf64> + affine.store %cst_1, %alloc_19[5, 0] : memref<10x2xf64> + affine.store %cst_4, %alloc_19[5, 1] : memref<10x2xf64> + affine.store %cst_1, %alloc_19[6, 0] : memref<10x2xf64> + affine.store %cst_11, %alloc_19[6, 1] : memref<10x2xf64> + affine.store %cst_12, %alloc_19[7, 0] : memref<10x2xf64> + affine.store %cst_2, %alloc_19[7, 1] : memref<10x2xf64> + affine.store %cst_12, %alloc_19[8, 0] : memref<10x2xf64> + affine.store %cst_4, %alloc_19[8, 1] : memref<10x2xf64> + affine.store %cst_12, %alloc_19[9, 0] : memref<10x2xf64> + affine.store %cst_11, %alloc_19[9, 1] : memref<10x2xf64> + %1 = memref.load %alloc_20[%c0] : memref<2xf64> + %2 = memref.load %alloc_20[%c1] : memref<2xf64> + affine.store %c-1, %alloc[] : memref + scf.for %arg0 = %c0 to %c10 step %c1 { + %6 = memref.load %alloc[] : memref + %7 = memref.load %alloc_19[%arg0, %c0] : memref<10x2xf64> + %8 = memref.load %alloc_19[%arg0, %c1] : memref<10x2xf64> + %9 = arith.subf %7, %1 : f64 + %10 = arith.subf %8, %2 : f64 + %11 = math.absf %9 : f64 + %12 = math.absf %10 : f64 + %13 = arith.cmpf ole, %11, %cst_0 : f64 + %14 = arith.cmpf ole, %12, %cst_0 : f64 + %15 = arith.andi %13, %14 : i1 + %16 = arith.select %15, %arg0, %6 : index + memref.store %16, %alloc[] : memref + } + %3 = memref.load %alloc[] : memref + %4 = arith.index_cast %3 : index to i64 + %5 = arith.sitofp %4 : i64 to f64 + memref.store %5, %alloc_18[%c0] : memref<1xf64> + dsp.print %alloc_18 : memref<1xf64> + memref.dealloc %alloc_30 : memref + memref.dealloc %alloc_29 : memref + memref.dealloc %alloc_28 : memref + memref.dealloc %alloc_27 : memref<4096xf64> + memref.dealloc %alloc_26 : memref<4096xf64> + memref.dealloc %alloc_25 : memref<4096xf64> + memref.dealloc %alloc_24 : memref<4096xf64> + memref.dealloc %alloc_23 : memref + memref.dealloc %alloc_22 : memref + memref.dealloc %alloc_21 : memref<4096xf64> + memref.dealloc %alloc_20 : memref<2xf64> + memref.dealloc %alloc_19 : memref<10x2xf64> + memref.dealloc %alloc_18 : memref<1xf64> + memref.dealloc %alloc : memref + return + } +} \ No newline at end of file diff --git a/mlir/examples/dsp/SimpleBlocks/mlir/Dialect.cpp b/mlir/examples/dsp/SimpleBlocks/mlir/Dialect.cpp index 47f76a2ff96c..93dfaa7fdc9c 100644 --- a/mlir/examples/dsp/SimpleBlocks/mlir/Dialect.cpp +++ b/mlir/examples/dsp/SimpleBlocks/mlir/Dialect.cpp @@ -10,9 +10,9 @@ // operation verification. // //===----------------------------------------------------------------------===// -#include #include "toy/Dialect.h" #include "toy/DebugConfig.h" +#include #include "mlir/IR/Attributes.h" #include "mlir/IR/Builders.h" @@ -216,7 +216,8 @@ void ConstantOp::print(mlir::OpAsmPrinter &printer) { mlir::LogicalResult ConstantOp::verify() { // If the return type of the constant is not an unranked tensor, the shape // must match the shape of the attribute holding the data. - auto resultType = llvm::dyn_cast(getResult().getType()); + auto resultType = + llvm::dyn_cast(getResult().getType()); if (!resultType) return success(); @@ -241,6 +242,18 @@ mlir::LogicalResult ConstantOp::verify() { return mlir::success(); } +//===----------------------------------------------------------------------===// +// ModuloOp +//===----------------------------------------------------------------------===// + +void ModuloOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, + mlir::Value lhs, mlir::Value rhs) { + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); + state.addOperands({lhs, rhs}); +} + +void ModuloOp::inferShapes() { getResult().setType(getLhs().getType()); } + //===----------------------------------------------------------------------===// // AddOp //===----------------------------------------------------------------------===// @@ -398,6 +411,27 @@ void DivOp::print(mlir::OpAsmPrinter &p) { printBinaryOp(p, *this); } /// interface. void DivOp::inferShapes() { getResult().setType(getLhs().getType()); } +//===----------------------------------------------------------------------===// +// BitwiseAndOp +//===----------------------------------------------------------------------===// + +void BitwiseAndOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, + mlir::Value lhs, mlir::Value rhs) { + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); + state.addOperands({lhs, rhs}); +} + +mlir::ParseResult BitwiseAndOp::parse(mlir::OpAsmParser &parser, + mlir::OperationState &result) { + return parseBinaryOp(parser, result); +} + +void BitwiseAndOp::print(mlir::OpAsmPrinter &p) { printBinaryOp(p, *this); } + +/// Infer the output shape of the BitwiseAndOp, this is required by the shape +/// inference interface. +void BitwiseAndOp::inferShapes() { getResult().setType(getLhs().getType()); } + //===----------------------------------------------------------------------===// // ReturnOp //===----------------------------------------------------------------------===// @@ -426,7 +460,8 @@ mlir::LogicalResult ReturnOp::verify() { auto resultType = results.front(); // Check that the result type of the function matches the operand type. - if (inputType == resultType || llvm::isa(inputType) || + if (inputType == resultType || + llvm::isa(inputType) || llvm::isa(resultType)) return mlir::success(); @@ -466,40 +501,40 @@ mlir::LogicalResult TransposeOp::verify() { return mlir::success(); } - //===----------------------------------------------------------------------===// // DelayOp //===----------------------------------------------------------------------===// // void DelayOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, // mlir::Value lhs, unsigned rhs){ void DelayOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, - mlir::Value lhs, mlir::Value rhs){ - // - // state.addTypes(UnrankedTensorType::get(builder.getF64Type()), builder.getI32Type()); - state.addTypes(UnrankedTensorType::get(builder.getF64Type())); //working - state.addOperands({lhs, rhs}); - // state.addOperands(value); - - } + mlir::Value lhs, mlir::Value rhs) { + // + // state.addTypes(UnrankedTensorType::get(builder.getF64Type()), + // builder.getI32Type()); + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); // working + state.addOperands({lhs, rhs}); + // state.addOperands(value); +} - mlir::LogicalResult DelayOp::verify(){ - // auto inputType1 = llvm::dyn_cast(getOperand(0).getType()); - // auto inputType2 = llvm::dyn_cast(getOperand(1).getType()); - // auto resultType = llvm::dyn_cast(getType()); - // if(!inputType || !resultType) - // return mlir::success(); +mlir::LogicalResult DelayOp::verify() { + // auto inputType1 = + // llvm::dyn_cast(getOperand(0).getType()); auto inputType2 + // = llvm::dyn_cast(getOperand(1).getType()); auto + // resultType = llvm::dyn_cast(getType()); if(!inputType || + // !resultType) + // return mlir::success(); - return mlir::success(); - } + return mlir::success(); +} // void DelayOp::inferShapes() { getResult().setType(getOperand(0).getType()) ;} -//getLHS defined with Operation as : -// fro addOp +// getLHS defined with Operation as : +// fro addOp // ::mlir::TypedValue<::mlir::TensorType> AddOp::getLhs() { -// return ::llvm::cast<::mlir::TypedValue<::mlir::TensorType>>(*getODSOperands(0).begin()); +// return +// ::llvm::cast<::mlir::TypedValue<::mlir::TensorType>>(*getODSOperands(0).begin()); // } -void DelayOp::inferShapes() { getResult().setType(getLhs().getType()) ;} - +void DelayOp::inferShapes() { getResult().setType(getLhs().getType()); } //===----------------------------------------------------------------------===// // GainOp @@ -507,27 +542,31 @@ void DelayOp::inferShapes() { getResult().setType(getLhs().getType()) ;} // void GainOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, // mlir::Value lhs, unsigned rhs){ // void GainOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, -// mlir::Value lhs, mlir::Float64Type rhs){ +// mlir::Value lhs, mlir::Float64Type rhs){ void GainOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, - mlir::Value lhs, mlir::Value rhs){ - // state.addTypes(UnrankedTensorType::get(builder.getF64Type()), builder.getI32Type()); - // state.addTypes(UnrankedTensorType::get(builder.getF64Type())); - // state.addTypes({UnrankedTensorType::get(builder.getF64Type()), builder.getF64Type()}); //working - state.addTypes(UnrankedTensorType::get(builder.getF64Type())); - state.addOperands({lhs, rhs}); - // state.addOperands({rhs}); - // state.addTypes(); - // state.addAttribute("rhs", rhs); - // state.addAttribute("rhs", builder.getF64FloatAttr(builder.getF64Type())); - // state.addAttribute("rhs", builder.getF64Type()); - // state.addAttribute("rhs", builder.getFloatAttr(builder.getF64Type() , rhs)); - // state.addOperands(value); - } + mlir::Value lhs, mlir::Value rhs) { + // state.addTypes(UnrankedTensorType::get(builder.getF64Type()), + // builder.getI32Type()); + // state.addTypes(UnrankedTensorType::get(builder.getF64Type())); + // state.addTypes({UnrankedTensorType::get(builder.getF64Type()), + // builder.getF64Type()}); //working + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); + state.addOperands({lhs, rhs}); + // state.addOperands({rhs}); + // state.addTypes(); + // state.addAttribute("rhs", rhs); + // state.addAttribute("rhs", builder.getF64FloatAttr(builder.getF64Type())); + // state.addAttribute("rhs", builder.getF64Type()); + // state.addAttribute("rhs", builder.getFloatAttr(builder.getF64Type() , + // rhs)); state.addOperands(value); +} // mlir::LogicalResult GainOp::verify(){ -// auto inputType1 = llvm::dyn_cast(getOperand(0).getType()); -// auto inputType2 = llvm::dyn_cast(getOperand(1).getType()); -// // auto inputType2 = llvm::dyn_cast(getOperand(1).getType()); +// auto inputType1 = +// llvm::dyn_cast(getOperand(0).getType()); auto +// inputType2 = llvm::dyn_cast(getOperand(1).getType()); +// // auto inputType2 = +// llvm::dyn_cast(getOperand(1).getType()); // // auto resultType = llvm::dyn_cast(getType()); // // if(!inputType || !resultType) // // return mlir::success(); @@ -536,92 +575,360 @@ void GainOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, // } // void GainOp::inferShapes() { getResult().setType(getOperand(0).getType()) ;} -//getLHS defined with Operation as : -// fro addOp +// getLHS defined with Operation as : +// fro addOp // ::mlir::TypedValue<::mlir::TensorType> AddOp::getLhs() { -// return ::llvm::cast<::mlir::TypedValue<::mlir::TensorType>>(*getODSOperands(0).begin()); +// return +// ::llvm::cast<::mlir::TypedValue<::mlir::TensorType>>(*getODSOperands(0).begin()); // } -void GainOp::inferShapes() { getResult().setType(getLhs().getType()) ;} +void GainOp::inferShapes() { getResult().setType(getLhs().getType()); } //===----------------------------------------------------------------------===// - // SubOp - //===----------------------------------------------------------------------===// +// SubOp +//===----------------------------------------------------------------------===// - void SubOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, - mlir::Value lhs, mlir::Value rhs) { - state.addTypes(UnrankedTensorType::get(builder.getF64Type())); - state.addOperands({lhs, rhs}); - } +void SubOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, + mlir::Value lhs, mlir::Value rhs) { + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); + state.addOperands({lhs, rhs}); +} + +// mlir::ParseResult SubOp::parse(mlir::OpAsmParser &parser, +// mlir::OperationState &result) { +// return parseBinaryOp(parser, result); +// } + +// void SubOp::print(mlir::OpAsmPrinter &p) { printBinaryOp(p, *this); } + +/// Infer the output shape of the SubOp, this is required by the shape inference +/// interface. +void SubOp::inferShapes() { getResult().setType(getLhs().getType()); } + +//===----------------------------------------------------------------------===// +// FFTRealOp +//===----------------------------------------------------------------------===// + +void FFTRealOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, + mlir::Value lhs) { + state.addTypes(lhs.getType()); + state.addOperands({lhs}); +} + +void FFTRealOp::inferShapes() { getResult().setType(getLhs().getType()); } + +//===----------------------------------------------------------------------===// +// FFTImagOp +//===----------------------------------------------------------------------===// + +void FFTImagOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, + mlir::Value lhs) { + state.addTypes(lhs.getType()); + state.addOperands({lhs}); +} + +void FFTImagOp::inferShapes() { getResult().setType(getLhs().getType()); } + +//===----------------------------------------------------------------------===// +// MatmulOp +//===----------------------------------------------------------------------===// + +void MatmulOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, + mlir::Value lhs, mlir::Value rhs) { + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); + state.addOperands({lhs, rhs}); +} + +// mlir::ParseResult MatmulOp::parse(mlir::OpAsmParser &parser, +// mlir::OperationState &result) { +// return parseBinaryOp(parser, result); +// } + +// void MatmulOp::print(mlir::OpAsmPrinter &p) { printBinaryOp(p, *this); } + +mlir::LogicalResult MatmulOp::verify() { + + // auto resultType = + // llvm::dyn_cast(getResult().getType()); + + auto tensorLhs = getLhs().getType(); + auto shapeOfLhs = tensorLhs.getShape(); + + auto tensorRhs = getRhs().getType(); + auto shapeOfRhs = tensorRhs.getShape(); + + if (shapeOfLhs[1] != shapeOfRhs[0]) + return emitOpError("Matmul: the second dimension of LHS should be equal to " + "the first dimention of RHS."); + return mlir::success(); +} + +/// Infer the output shape of the MatmulOp, this is required by the shape +/// inference interface. +void MatmulOp::inferShapes() { + + // get the shape of Lhs & rhs + // add the shape for each dimension + // auto tensorInput = llvm::cast(getLhs().getType()); + auto tensorLhs = getLhs().getType(); + auto shapeOfLhs = tensorLhs.getShape(); + + auto tensorRhs = getRhs().getType(); + auto shapeOfRhs = tensorRhs.getShape(); + + std::vector shapeForOutput; + + shapeForOutput.push_back(shapeOfLhs[0]); + shapeForOutput.push_back(shapeOfRhs[1]); + + mlir::TensorType manipulatedType = mlir::RankedTensorType::get( + shapeForOutput, getLhs().getType().getElementType()); + + // getResult().setType(getLhs().getType()); + getResult().setType(manipulatedType); +} + +//===----------------------------------------------------------------------===// +// FindPeaksOp +//===----------------------------------------------------------------------===// + +void FindPeaksOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, + mlir::Value signal, mlir::Value height, + mlir::Value distance) { + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); + state.addOperands({signal, height, distance}); +} + +void FindPeaksOp::inferShapes() { + // Maximum possible number of peaks = (length of signal -1) / distance + 1. + // We will return a tensor with size (length of signal -1) / distance + 1 + + // 1(last one to provide number of peaks). + auto signalType = getSignal().getType(); + auto signalShape = signalType.getShape(); + int64_t len_signal = signalShape[0]; + + Value distanceArg = getOperand(2); + dsp::ConstantOp constantOpDistance = + distanceArg.getDefiningOp(); + DenseElementsAttr constantDistanceValue = constantOpDistance.getValue(); + + auto elements = constantDistanceValue.getValues(); + float distanceFloat = elements[0].getValueAsDouble(); + // SecondValueInt = (int64_t)SecondValue; + + int64_t sizeOfOutput = (len_signal - 1) / distanceFloat + 2; + + std::vector shapeForOutput; + shapeForOutput.push_back(sizeOfOutput); + + mlir::TensorType manipulatedType = + mlir::RankedTensorType::get(shapeForOutput, signalType.getElementType()); + + getResult().setType(manipulatedType); +} + +//===----------------------------------------------------------------------===// +// MaxOp +//===----------------------------------------------------------------------===// + +void MaxOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, + mlir::Value input) { + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); + state.addOperands({input}); +} + +/// Infer the output shape of the MaxOp, this is required by the shape inference +/// interface. +void MaxOp::inferShapes() { + auto tensorInput = getInput().getType(); + // auto shapeOfInput = tensorInput.getShape(); + + std::vector shapeForOutput; + + mlir::TensorType manipulatedType = + mlir::RankedTensorType::get(shapeForOutput, tensorInput.getElementType()); + + getResult().setType(manipulatedType); +} - // mlir::ParseResult SubOp::parse(mlir::OpAsmParser &parser, - // mlir::OperationState &result) { - // return parseBinaryOp(parser, result); - // } +//===----------------------------------------------------------------------===// +// MeanOp +//===----------------------------------------------------------------------===// + +void MeanOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, + mlir::Value input, mlir::Value length) { + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); + state.addOperands({input, length}); +} + +void MeanOp::inferShapes() { + auto tensorInput = getInput().getType(); + + std::vector shapeForOutput; + + mlir::TensorType manipulatedType = + mlir::RankedTensorType::get(shapeForOutput, tensorInput.getElementType()); + + getResult().setType(manipulatedType); +} + +//===----------------------------------------------------------------------===// +// DiffOp +//===----------------------------------------------------------------------===// + +void DiffOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, + mlir::Value input, mlir::Value length) { + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); + state.addOperands({input, length}); +} + +void DiffOp::inferShapes() { + auto tensorInput = getInput().getType(); + auto shapeOfInput = tensorInput.getShape(); + + std::vector shapeForOutput; + shapeForOutput.push_back(shapeOfInput[0] - 1); + + mlir::TensorType manipulatedType = + mlir::RankedTensorType::get(shapeForOutput, tensorInput.getElementType()); + + getResult().setType(manipulatedType); +} + +//===----------------------------------------------------------------------===// +// AbsOp +//===----------------------------------------------------------------------===// + +void AbsOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, + mlir::Value input) { + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); + state.addOperands({input}); +} + +void AbsOp::inferShapes() { getResult().setType(getInput().getType()); } + +//===----------------------------------------------------------------------===// +// ArgMaxOp +//===----------------------------------------------------------------------===// + +void ArgMaxOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, + mlir::Value input, int64_t axis) { + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); + state.addAttribute("axis", builder.getI64IntegerAttr(axis)); + state.addOperands({input}); +} + +void ArgMaxOp::inferShapes() { + + auto inputType = getInput().getType(); + auto inputRank = inputType.getRank(); + auto inputShape = inputType.getShape(); + + if (inputRank == 1) { + vector outputShape(1, 1); + auto outputType = + mlir::RankedTensorType::get(outputShape, inputType.getElementType()); + getResult().setType(outputType); + return; + } + + int64_t axis = getAxis(); + int64_t dim = axis == 1 ? 0 : 1; + + auto outputType = + mlir::RankedTensorType::get(inputShape[dim], inputType.getElementType()); + + getResult().setType(outputType); +} + +//===----------------------------------------------------------------------===// +// PowOp +//===----------------------------------------------------------------------===// + +void PowOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, + mlir::Value lhs, mlir::Value rhs) { + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); + state.addOperands({lhs, rhs}); +} + +void PowOp::inferShapes() { getResult().setType(getLhs().getType()); } + +mlir::LogicalResult PowOp::verify() { + auto lhsType = llvm::dyn_cast(getLhs().getType()); + auto resultType = llvm::dyn_cast(getType()); + + if (!lhsType || !resultType) + return mlir::success(); - // void SubOp::print(mlir::OpAsmPrinter &p) { printBinaryOp(p, *this); } + // ensure result shape matches lhs shape + auto resultShape = resultType.getShape(); + if (!std::equal(lhsType.getShape().begin(), lhsType.getShape().end(), + resultShape.rbegin())) { + return emitError() + << "expected result shape to be the same as the lhs input operand."; + } - /// Infer the output shape of the SubOp, this is required by the shape inference - /// interface. - void SubOp::inferShapes() { getResult().setType(getLhs().getType()); } + return mlir::success(); +} //===----------------------------------------------------------------------===// // zeroCrossCountOp //===----------------------------------------------------------------------===// -void zeroCrossCountOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, - mlir::Value lhs) { +void zeroCrossCountOp::build(mlir::OpBuilder &builder, + mlir::OperationState &state, mlir::Value lhs) { state.addTypes(UnrankedTensorType::get(builder.getF64Type())); // state.addTypes(builder.getF64Type())); // state.addTypes(builder.getI64Type()); state.addOperands({lhs}); } -/// Infer the output shape of the zeroCrossCountOp, this is required by the shape inference - /// interface. - void zeroCrossCountOp::inferShapes() { getResult().setType(getLhs().getType()); } - +/// Infer the output shape of the zeroCrossCountOp, this is required by the +/// shape inference interface. +void zeroCrossCountOp::inferShapes() { + getResult().setType(getLhs().getType()); +} //===----------------------------------------------------------------------===// // FIRFilterResponseOp //===----------------------------------------------------------------------===// -void FIRFilterResponseOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, - mlir::Value lhs, mlir::Value rhs) { +void FIRFilterResponseOp::build(mlir::OpBuilder &builder, + mlir::OperationState &state, mlir::Value lhs, + mlir::Value rhs) { state.addTypes(UnrankedTensorType::get(builder.getF64Type())); state.addOperands({lhs, rhs}); } - - -/// Infer the output shape of the FIRFilterResponseOp, this is required by the shape inference -/// interface. -//ToDo -- shape should be the length of Lhs + Rhs - 1 -void FIRFilterResponseOp::inferShapes() { - //get the shape of Lhs & rhs - //add the shape for each dimension - // auto tensorInput = llvm::cast(getLhs().getType()); - auto tensorInput = getLhs().getType(); +/// Infer the output shape of the FIRFilterResponseOp, this is required by the +/// shape inference interface. +// ToDo -- shape should be the length of Lhs + Rhs - 1 +void FIRFilterResponseOp::inferShapes() { + // get the shape of Lhs & rhs + // add the shape for each dimension + // auto tensorInput = llvm::cast(getLhs().getType()); + auto tensorInput = getLhs().getType(); auto shapeOfInput = tensorInput.getShape(); auto tensorFilter = getRhs().getType(); auto shapeOfFilter = tensorFilter.getShape(); - std::vector shapeForOutput ; + std::vector shapeForOutput; - for(size_t i=0; i < shapeOfInput.size() ; i++){ + for (size_t i = 0; i < shapeOfInput.size(); i++) { shapeForOutput.push_back(shapeOfInput[i] + shapeOfFilter[i] - 1); } - - mlir::TensorType manipulatedType = mlir::RankedTensorType::get(shapeForOutput, - getLhs().getType().getElementType()); - // getResult().setType(getLhs().getType()); + mlir::TensorType manipulatedType = mlir::RankedTensorType::get( + shapeForOutput, getLhs().getType().getElementType()); + + // getResult().setType(getLhs().getType()); getResult().setType(manipulatedType); - } +} -//get rank of Input & Filter -- make sure it is of rank 1 +// get rank of Input & Filter -- make sure it is of rank 1 mlir::LogicalResult FIRFilterResponseOp::verify() { // auto inputType = llvm::dyn_cast(getOperand(0).getType()); - // auto filterType = llvm::dyn_cast(getOperand(1).getType()); + // auto filterType = + // llvm::dyn_cast(getOperand(1).getType()); // // auto resultType = llvm::dyn_cast(getType()); // auto inputRank = inputType.getRank(); @@ -634,41 +941,72 @@ mlir::LogicalResult FIRFilterResponseOp::verify() { // } return mlir::success(); -} +} + +//===----------------------------------------------------------------------===// +// MedianFilterOp +//===----------------------------------------------------------------------===// + +void MedianFilterOp::build(mlir::OpBuilder &builder, + mlir::OperationState &state, mlir::Value value) { + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); + state.addOperands(value); +} + +void MedianFilterOp::inferShapes() { + // for each rank + // Get the shape/size of input + // output size = input_size - 2 + auto inputType = llvm::dyn_cast(getOperand().getType()); + + auto shapeOfInput = inputType.getShape(); + + std::vector shapeForOutput; + + // Iterate for each rank : tensor<1x2x3x2> = rank 4 + for (size_t i = 0; i < shapeOfInput.size(); i++) { + shapeForOutput.push_back(shapeOfInput[i] - 2); + } + + mlir::TensorType outputType = mlir::RankedTensorType::get( + shapeForOutput, getInput().getType().getElementType()); + // getOperand().getType()); + // getOperand().getType().getElementType()); + getResult().setType(outputType); +} //===----------------------------------------------------------------------===// // SlidingWindowAvgOp //===----------------------------------------------------------------------===// -void SlidingWindowAvgOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, - mlir::Value value) { +void SlidingWindowAvgOp::build(mlir::OpBuilder &builder, + mlir::OperationState &state, mlir::Value value) { state.addTypes(UnrankedTensorType::get(builder.getF64Type())); state.addOperands(value); } void SlidingWindowAvgOp::inferShapes() { - //for each rank - //Get the shape/size of input - //output size = input_size - 2 + // for each rank + // Get the shape/size of input + // output size = input_size - 2 auto inputType = llvm::dyn_cast(getOperand().getType()); auto shapeOfInput = inputType.getShape(); std::vector shapeForOutput; - //Iterate for each rank : tensor<1x2x3x2> = rank 4 - for(size_t i=0; i < shapeOfInput.size() ; i++){ + // Iterate for each rank : tensor<1x2x3x2> = rank 4 + for (size_t i = 0; i < shapeOfInput.size(); i++) { shapeForOutput.push_back(shapeOfInput[i] - 2); } - mlir::TensorType outputType = mlir::RankedTensorType::get(shapeForOutput, - getInput().getType().getElementType()); - // getOperand().getType()); - // getOperand().getType().getElementType()); + mlir::TensorType outputType = mlir::RankedTensorType::get( + shapeForOutput, getInput().getType().getElementType()); + // getOperand().getType()); + // getOperand().getType().getElementType()); getResult().setType(outputType); - } mlir::LogicalResult SlidingWindowAvgOp::verify() { @@ -683,7 +1021,8 @@ mlir::LogicalResult SlidingWindowAvgOp::verify() { // for(size_t i=0; i < shapeOfInput.size() ; i++){ // if(shapeOfInput[i] < 3){ - // llvm::errs() << "Warning:SlidingWindowAvgOp = Input size < 3 " << "size= " << shapeOfInput[i] << "\n" ; + // llvm::errs() << "Warning:SlidingWindowAvgOp = Input size < 3 " << + // "size= " << shapeOfInput[i] << "\n" ; // } // } @@ -694,221 +1033,226 @@ mlir::LogicalResult SlidingWindowAvgOp::verify() { // DownsamplingOp //===----------------------------------------------------------------------===// -void DownsamplingOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, - mlir::Value lhs, mlir::Value rhs) { +void DownsamplingOp::build(mlir::OpBuilder &builder, + mlir::OperationState &state, mlir::Value lhs, + mlir::Value rhs) { state.addTypes(UnrankedTensorType::get(builder.getF64Type())); state.addOperands({lhs, rhs}); } - - -/// Infer the output shape of the DownsamplingOp, this is required by the shape inference -/// interface. -//ToDo -- shape should be the length of Lhs + Rhs - 1 -void DownsamplingOp::inferShapes() { - //get the shape of Lhs & rhs - //add the shape for each dimension - // auto tensorInput = llvm::cast(getLhs().getType()); - auto tensorInput = getLhs().getType(); +/// Infer the output shape of the DownsamplingOp, this is required by the shape +/// inference interface. +// ToDo -- shape should be the length of Lhs + Rhs - 1 +void DownsamplingOp::inferShapes() { + // get the shape of Lhs & rhs + // add the shape for each dimension + // auto tensorInput = llvm::cast(getLhs().getType()); + auto tensorInput = getLhs().getType(); auto shapeOfInput = tensorInput.getShape(); - // auto tensorDownsampling = getRhs().getType(); - // auto shapeOfDownsampling = tensorDownsampling.getShape(); //shape is the dimension - + // auto tensorDownsampling = getRhs().getType(); + // auto shapeOfDownsampling = tensorDownsampling.getShape(); //shape is the + // dimension - std::vector shapeForOutput ; + std::vector shapeForOutput; int64_t SecondValueInt = 1; - //To extract value from the SSA value: - //get the Operand - //convert it to ConstantOp - //convert it to corresponding elements attribute - //extract the value as float then convert to int + // To extract value from the SSA value: + // get the Operand + // convert it to ConstantOp + // convert it to corresponding elements attribute + // extract the value as float then convert to int Value downsampling2ndArg = getOperand(1); - dsp::ConstantOp constantOp2ndArg = downsampling2ndArg.getDefiningOp(); - DenseElementsAttr constantRhsValue = constantOp2ndArg.getValue();; + dsp::ConstantOp constantOp2ndArg = + downsampling2ndArg.getDefiningOp(); + DenseElementsAttr constantRhsValue = constantOp2ndArg.getValue(); + ; auto elements = constantRhsValue.getValues(); float SecondValue = elements[0].getValueAsDouble(); - SecondValueInt = (int64_t) SecondValue; - // llvm::errs() << "Downsampling: SamplingRate: " << SecondValueInt << " \n"; //downsamplingRate - - - for(size_t i=0; i < shapeOfInput.size() ; i++){ - double GetLenForOutput = static_cast(shapeOfInput[i] )/ SecondValueInt ; - if(fmod(GetLenForOutput, 1.0) != 0) { - //if remainder remains + SecondValueInt = (int64_t)SecondValue; + // llvm::errs() << "Downsampling: SamplingRate: " << SecondValueInt << " \n"; + // //downsamplingRate + + for (size_t i = 0; i < shapeOfInput.size(); i++) { + double GetLenForOutput = + static_cast(shapeOfInput[i]) / SecondValueInt; + if (fmod(GetLenForOutput, 1.0) != 0) { + // if remainder remains GetLenForOutput = ceil(GetLenForOutput); } - int64_t OutlenInt = static_cast (GetLenForOutput); + int64_t OutlenInt = static_cast(GetLenForOutput); llvm::errs() << "Downsampling: OutlenInt: " << OutlenInt << " \n"; shapeForOutput.push_back(OutlenInt); } - - mlir::TensorType manipulatedType = mlir::RankedTensorType::get(shapeForOutput, - getLhs().getType().getElementType()); - // getResult().setType(getLhs().getType()); + mlir::TensorType manipulatedType = mlir::RankedTensorType::get( + shapeForOutput, getLhs().getType().getElementType()); + + // getResult().setType(getLhs().getType()); getResult().setType(manipulatedType); - } +} -//get rank of Input & Downsampling -- make sure it is of rank 1 +// get rank of Input & Downsampling -- make sure it is of rank 1 mlir::LogicalResult DownsamplingOp::verify() { // auto inputType = llvm::dyn_cast(getOperand(0).getType()); - // auto samplingRateType = llvm::dyn_cast(getOperand(1).getType()); + // auto samplingRateType = + // llvm::dyn_cast(getOperand(1).getType()); // // auto resultType = llvm::dyn_cast(getType()); // auto inputRank = inputType.getRank(); // auto samplingRateRank = samplingRateType.getRank(); - // // llvm::errs() << "inputRank: " << inputRank << " samplingRateRank: " << samplingRateRank << "\n"; - // //once ensured only 1 rank from above -- also make sure there is just 1 elem - // if( inputRank != 1 || samplingRateRank != 0 ) + // // llvm::errs() << "inputRank: " << inputRank << " samplingRateRank: " << + // samplingRateRank << "\n"; + // //once ensured only 1 rank from above -- also make sure there is just 1 + // elem if( inputRank != 1 || samplingRateRank != 0 ) // { - // llvm::errs() << "inputRank: " << inputRank << " samplingRateRank: " << samplingRateRank << "\n"; - // return emitError() + // llvm::errs() << "inputRank: " << inputRank << " samplingRateRank: " << + // samplingRateRank << "\n"; return emitError() // << "expected rank of input & Downsampling is 1"; // } return mlir::success(); -} +} //===----------------------------------------------------------------------===// // UpsamplingOp //===----------------------------------------------------------------------===// void UpsamplingOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, - mlir::Value lhs, mlir::Value rhs) { + mlir::Value lhs, mlir::Value rhs) { state.addTypes(UnrankedTensorType::get(builder.getF64Type())); state.addOperands({lhs, rhs}); } - - -/// Infer the output shape of the UpsamplingOp, this is required by the shape inference -/// interface. -//ToDo -- shape should be the length of input * UpsamplingRate ie, Rhs -void UpsamplingOp::inferShapes() { - //get the shape of Lhs & rhs - //add the shape for each dimension - // auto tensorInput = llvm::cast(getLhs().getType()); - auto tensorInput = getLhs().getType(); +/// Infer the output shape of the UpsamplingOp, this is required by the shape +/// inference interface. +// ToDo -- shape should be the length of input * UpsamplingRate ie, Rhs +void UpsamplingOp::inferShapes() { + // get the shape of Lhs & rhs + // add the shape for each dimension + // auto tensorInput = llvm::cast(getLhs().getType()); + auto tensorInput = getLhs().getType(); auto shapeOfInput = tensorInput.getShape(); - // auto tensorUpsampling = getRhs().getType(); + // auto tensorUpsampling = getRhs().getType(); // auto shapeOfUpsampling = tensorUpsampling.getShape(); //shape is the length - - std::vector shapeForOutput ; + std::vector shapeForOutput; int64_t SecondValueInt = 1; - //To extract value from the SSA value: - //get the Operand - //convert it to ConstantOp - //convert it to corresponding elements attribute - //extract the value as float then convert to int + // To extract value from the SSA value: + // get the Operand + // convert it to ConstantOp + // convert it to corresponding elements attribute + // extract the value as float then convert to int Value upsampling2ndArg = getOperand(1); - dsp::ConstantOp constantOp2ndArg = upsampling2ndArg.getDefiningOp(); - DenseElementsAttr constantRhsValue = constantOp2ndArg.getValue();; + dsp::ConstantOp constantOp2ndArg = + upsampling2ndArg.getDefiningOp(); + DenseElementsAttr constantRhsValue = constantOp2ndArg.getValue(); + ; auto elements = constantRhsValue.getValues(); float SecondValue = elements[0].getValueAsDouble(); - SecondValueInt = (int64_t) SecondValue; - // llvm::errs() << "Upsampling: SamplingRate: " << SecondValueInt << " \n"; //downsamplingRate - - - for(size_t i=0; i < shapeOfInput.size() ; i++){ - double GetLenForOutput = static_cast(shapeOfInput[i] ) * SecondValueInt ; - int64_t OutlenInt = static_cast (GetLenForOutput); + SecondValueInt = (int64_t)SecondValue; + // llvm::errs() << "Upsampling: SamplingRate: " << SecondValueInt << " \n"; + // //downsamplingRate + + for (size_t i = 0; i < shapeOfInput.size(); i++) { + double GetLenForOutput = + static_cast(shapeOfInput[i]) * SecondValueInt; + int64_t OutlenInt = static_cast(GetLenForOutput); llvm::errs() << "Upsampling: OutlenInt: " << OutlenInt << " \n"; shapeForOutput.push_back(OutlenInt); } - - mlir::TensorType manipulatedType = mlir::RankedTensorType::get(shapeForOutput, - getLhs().getType().getElementType()); - // getResult().setType(getLhs().getType()); + mlir::TensorType manipulatedType = mlir::RankedTensorType::get( + shapeForOutput, getLhs().getType().getElementType()); + + // getResult().setType(getLhs().getType()); getResult().setType(manipulatedType); - } +} -//get rank of Input & Upsampling -- make sure it is of rank 1 +// get rank of Input & Upsampling -- make sure it is of rank 1 mlir::LogicalResult UpsamplingOp::verify() { // auto inputType = llvm::dyn_cast(getOperand(0).getType()); - // auto samplingRateType = llvm::dyn_cast(getOperand(1).getType()); + // auto samplingRateType = + // llvm::dyn_cast(getOperand(1).getType()); // // auto resultType = llvm::dyn_cast(getType()); // auto inputRank = inputType.getRank(); // auto samplingRateRank = samplingRateType.getRank(); - // // llvm::errs() << "inputRank: " << inputRank << " samplingRateRank: " << samplingRateRank << "\n"; - // //once ensured only 1 rank from above -- also make sure there is just 1 elem - // if( inputRank != 1 || samplingRateRank != 0 ) + // // llvm::errs() << "inputRank: " << inputRank << " samplingRateRank: " << + // samplingRateRank << "\n"; + // //once ensured only 1 rank from above -- also make sure there is just 1 + // elem if( inputRank != 1 || samplingRateRank != 0 ) // { - // llvm::errs() << "inputRank: " << inputRank << " samplingRateRank: " << samplingRateRank << "\n"; - // return emitError() + // llvm::errs() << "inputRank: " << inputRank << " samplingRateRank: " << + // samplingRateRank << "\n"; return emitError() // << "expected rank of input is 1 & Upsampling is 0"; // } return mlir::success(); -} - +} //===----------------------------------------------------------------------===// // LowPassFilter1stOrderOp //===----------------------------------------------------------------------===// -void LowPassFilter1stOrderOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, - mlir::Value lhs, mlir::Value rhs) { +void LowPassFilter1stOrderOp::build(mlir::OpBuilder &builder, + mlir::OperationState &state, + mlir::Value lhs, mlir::Value rhs) { state.addTypes(UnrankedTensorType::get(builder.getF64Type())); state.addOperands({lhs, rhs}); } - - -/// Infer the output shape of the LowPassFilter1stOrderOp, this is required by the shape inference -/// interface. -void LowPassFilter1stOrderOp::inferShapes() { - //get the shape of Lhs & rhs - // auto tensorInput = llvm::cast(getLhs().getType()); - auto tensorInput = getLhs().getType(); +/// Infer the output shape of the LowPassFilter1stOrderOp, this is required by +/// the shape inference interface. +void LowPassFilter1stOrderOp::inferShapes() { + // get the shape of Lhs & rhs + // auto tensorInput = llvm::cast(getLhs().getType()); + auto tensorInput = getLhs().getType(); getResult().setType(tensorInput); } -//get rank of Input & alphaValue -- make sure it is of rank 1 +// get rank of Input & alphaValue -- make sure it is of rank 1 mlir::LogicalResult LowPassFilter1stOrderOp::verify() { // auto inputType = llvm::dyn_cast(getOperand(0).getType()); - // auto alphaValueType = llvm::dyn_cast(getOperand(1).getType()); + // auto alphaValueType = + // llvm::dyn_cast(getOperand(1).getType()); // // auto resultType = llvm::dyn_cast(getType()); // auto inputRank = inputType.getRank(); // auto alphaValueRank = alphaValueType.getRank(); - // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " << alphaValueRank << "\n"; - // //once ensured only 1 rank from above -- also make sure there is just 1 elem - // if( inputRank != 1 || alphaValueRank != 0 ) + // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " << + // alphaValueRank << "\n"; + // //once ensured only 1 rank from above -- also make sure there is just 1 + // elem if( inputRank != 1 || alphaValueRank != 0 ) // { - // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " << alphaValueRank << "\n"; - // return emitError() + // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " << + // alphaValueRank << "\n"; return emitError() // << "expected rank of input & Upsampling is 1"; // } return mlir::success(); -} +} //===----------------------------------------------------------------------===// // HighPassFilterOp //===----------------------------------------------------------------------===// -void HighPassFilterOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, - mlir::Value value) { +void HighPassFilterOp::build(mlir::OpBuilder &builder, + mlir::OperationState &state, mlir::Value value) { state.addTypes(UnrankedTensorType::get(builder.getF64Type())); state.addOperands(value); } void HighPassFilterOp::inferShapes() { - //for each rank - //Get the shape/size of input - //output size = input_size - auto tensorInput = getInput().getType(); + // for each rank + // Get the shape/size of input + // output size = input_size + auto tensorInput = getInput().getType(); getResult().setType(tensorInput); - } mlir::LogicalResult HighPassFilterOp::verify() { @@ -916,7 +1260,7 @@ mlir::LogicalResult HighPassFilterOp::verify() { // auto inputRank = inputType.getRank(); // llvm::errs() << "inputRank: " << inputRank << "\n"; - // //once ensured only 1 rank from above -- + // //once ensured only 1 rank from above -- // if( inputRank != 1 ) // { // llvm::errs() << "inputRank: " << inputRank << "\n"; @@ -926,25 +1270,24 @@ mlir::LogicalResult HighPassFilterOp::verify() { return mlir::success(); } - //===----------------------------------------------------------------------===// // FFT1DOp //===----------------------------------------------------------------------===// void FFT1DOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, - mlir::Value value) { - DEBUG_PRINT_NO_ARGS() ; - state.addTypes({UnrankedTensorType::get(builder.getF64Type()), - UnrankedTensorType::get(builder.getF64Type())}); + mlir::Value value) { + DEBUG_PRINT_NO_ARGS(); + state.addTypes({UnrankedTensorType::get(builder.getF64Type()), + UnrankedTensorType::get(builder.getF64Type())}); state.addOperands(value); - DEBUG_PRINT_NO_ARGS() ; + DEBUG_PRINT_NO_ARGS(); } void FFT1DOp::inferShapes() { - //for each rank - //Get the shape/size of input - //output size = input_size - auto tensorInput = getInput().getType(); + // for each rank + // Get the shape/size of input + // output size = input_size + auto tensorInput = getInput().getType(); // getResult().setType(tensorInput); getResult(0).setType(tensorInput); getResult(1).setType(tensorInput); @@ -952,12 +1295,13 @@ void FFT1DOp::inferShapes() { } mlir::LogicalResult FFT1DOp::verify() { - DEBUG_PRINT_NO_ARGS() ; + DEBUG_PRINT_NO_ARGS(); // auto inputType = llvm::dyn_cast(getOperand().getType()); // auto inputRank = inputType.getRank(); - // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " << alphaValueRank << "\n"; - // //once ensured only 1 rank from above -- + // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " << + // alphaValueRank << "\n"; + // //once ensured only 1 rank from above -- // if( inputRank != 1 ) // { // llvm::errs() << "inputRank: " << inputRank << "\n"; @@ -967,36 +1311,36 @@ mlir::LogicalResult FFT1DOp::verify() { return mlir::success(); } - //===----------------------------------------------------------------------===// // IFFT1DOp //===----------------------------------------------------------------------===// void IFFT1DOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, - mlir::Value real, mlir::Value img) { - DEBUG_PRINT_NO_ARGS() ; + mlir::Value real, mlir::Value img) { + DEBUG_PRINT_NO_ARGS(); state.addTypes({UnrankedTensorType::get(builder.getF64Type())}); - state.addOperands({real , img}); - DEBUG_PRINT_NO_ARGS() ; + state.addOperands({real, img}); + DEBUG_PRINT_NO_ARGS(); } void IFFT1DOp::inferShapes() { - //for each rank - //Get the shape/size of input - //output size = input_size - auto tensorInput = getReal().getType(); + // for each rank + // Get the shape/size of input + // output size = input_size + auto tensorInput = getReal().getType(); getResult().setType(tensorInput); // getResult(0).setType(tensorInput); // getResult(1).setType(tensorInput); } mlir::LogicalResult IFFT1DOp::verify() { - DEBUG_PRINT_NO_ARGS() ; + DEBUG_PRINT_NO_ARGS(); // auto inputType = llvm::dyn_cast(getOperand().getType()); // auto inputRank = inputType.getRank(); - // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " << alphaValueRank << "\n"; - // //once ensured only 1 rank from above -- + // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " << + // alphaValueRank << "\n"; + // //once ensured only 1 rank from above -- // if( inputRank != 1 ) // { // llvm::errs() << "inputRank: " << inputRank << "\n"; @@ -1010,55 +1354,54 @@ mlir::LogicalResult IFFT1DOp::verify() { // HammingWindowOp //===----------------------------------------------------------------------===// -void HammingWindowOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, - mlir::Value value) { +void HammingWindowOp::build(mlir::OpBuilder &builder, + mlir::OperationState &state, mlir::Value value) { state.addTypes(UnrankedTensorType::get(builder.getF64Type())); state.addOperands(value); } void HammingWindowOp::inferShapes() { - //for each rank - //Get the shape/size of input - //output size = input_size - // auto inputType = llvm::dyn_cast(getOperand().getType()); + // for each rank + // Get the shape/size of input + // output size = input_size + // auto inputType = llvm::dyn_cast(getOperand().getType()); // auto shapeOfInput = inputType.getShape(); - std::vector shapeForOutput ; + std::vector shapeForOutput; int64_t FirstOpInt = 1; - //To extract value from the SSA value: - //get the Operand - //convert it to ConstantOp - //convert it to corresponding elements attribute - //extract the value as float then convert to int + // To extract value from the SSA value: + // get the Operand + // convert it to ConstantOp + // convert it to corresponding elements attribute + // extract the value as float then convert to int // llvm::errs() << "LINE " << __LINE__ << " file= " << __FILE__ << "\n" ; Value hammingLen = getOperand(); - dsp::ConstantOp constantOp1stArg = hammingLen.getDefiningOp(); + dsp::ConstantOp constantOp1stArg = + hammingLen.getDefiningOp(); // llvm::errs() << "LINE " << __LINE__ << " file= " << __FILE__ << "\n" ; DenseElementsAttr constantLhsValue = constantOp1stArg.getValue(); // llvm::errs() << "LINE " << __LINE__ << " file= " << __FILE__ << "\n" ; auto elements = constantLhsValue.getValues(); float FirstValue = elements[0].getValueAsDouble(); - FirstOpInt = (int64_t) FirstValue; + FirstOpInt = (int64_t)FirstValue; // llvm::errs() << "FirstOpInt " << FirstOpInt << "\n" ; // llvm::errs() << "shapeOfInput.size() " << shapeOfInput.size() << "\n" ; // for(size_t i=0; i < shapeOfInput.size() ; i++){ - // llvm::errs() << "LINE " << __LINE__ << " file= " << __FILE__ << "\n" ; - shapeForOutput.push_back(FirstOpInt); + // llvm::errs() << "LINE " << __LINE__ << " file= " << __FILE__ << "\n" ; + shapeForOutput.push_back(FirstOpInt); // } - mlir::TensorType outputType = mlir::RankedTensorType::get(shapeForOutput, - getInput().getType().getElementType()); - // getOperand().getType()); - // getOperand().getType().getElementType()); + mlir::TensorType outputType = mlir::RankedTensorType::get( + shapeForOutput, getInput().getType().getElementType()); + // getOperand().getType()); + // getOperand().getType().getElementType()); getResult().setType(outputType); // llvm::errs() << "LINE " << __LINE__ << " file= " << __FILE__ << "\n" ; - - } mlir::LogicalResult HammingWindowOp::verify() { @@ -1073,7 +1416,8 @@ mlir::LogicalResult HammingWindowOp::verify() { // for(size_t i=0; i < shapeOfInput.size() ; i++){ // if(shapeOfInput[i] < 3){ - // llvm::errs() << "Warning:HammingWindowOp = Input size < 3 " << "size= " << shapeOfInput[i] << "\n" ; + // llvm::errs() << "Warning:HammingWindowOp = Input size < 3 " << "size= " + // << shapeOfInput[i] << "\n" ; // } // } @@ -1085,7 +1429,7 @@ mlir::LogicalResult HammingWindowOp::verify() { //===----------------------------------------------------------------------===// void DCTOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, - mlir::Value value) { + mlir::Value value) { // DEBUG_PRINT_NO_ARGS() ; state.addTypes(UnrankedTensorType::get(builder.getF64Type())); state.addOperands(value); @@ -1093,10 +1437,10 @@ void DCTOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, } void DCTOp::inferShapes() { - //for each rank - //Get the shape/size of input - //output size = input_size - auto tensorInput = getInput().getType(); + // for each rank + // Get the shape/size of input + // output size = input_size + auto tensorInput = getInput().getType(); getResult().setType(tensorInput); // getResult(0).setType(tensorInput); // getResult(1).setType(tensorInput); @@ -1107,46 +1451,42 @@ mlir::LogicalResult DCTOp::verify() { auto inputType = llvm::dyn_cast(getOperand().getType()); auto inputRank = inputType.getRank(); - // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " << alphaValueRank << "\n"; - //once ensured only 1 rank from above -- - if( inputRank != 1 ) - { - llvm::errs() << "inputRank: " << inputRank << "\n"; - return emitError() - << "expected rank of input is 1"; + // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " << + // alphaValueRank << "\n"; + // once ensured only 1 rank from above -- + if (inputRank != 1) { + llvm::errs() << "inputRank: " << inputRank << "\n"; + return emitError() << "expected rank of input is 1"; } return mlir::success(); } - - //===----------------------------------------------------------------------===// // filterOp //===----------------------------------------------------------------------===// void filterOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, - mlir::Value b, mlir::Value a, mlir::Value x) { + mlir::Value b, mlir::Value a, mlir::Value x) { state.addTypes(UnrankedTensorType::get(builder.getF64Type())); state.addOperands({b, a, x}); } +/// Infer the output shape of the filterOp, this is required by the shape +/// inference interface. +// ToDo -- shape should be the length of Lhs + Rhs - 1 +void filterOp::inferShapes() { + // get the shape of Lhs & rhs + // add the shape for each dimension + // auto tensorInput = llvm::cast(getLhs().getType()); + auto tensorInput = getX().getType(); + getResult().setType(tensorInput); +} - -/// Infer the output shape of the filterOp, this is required by the shape inference -/// interface. -//ToDo -- shape should be the length of Lhs + Rhs - 1 -void filterOp::inferShapes() { - //get the shape of Lhs & rhs - //add the shape for each dimension - // auto tensorInput = llvm::cast(getLhs().getType()); - auto tensorInput = getX().getType(); - getResult().setType(tensorInput ); - } - -//get rank of Input & Filter -- make sure it is of rank 1 +// get rank of Input & Filter -- make sure it is of rank 1 mlir::LogicalResult filterOp::verify() { // auto inputType = llvm::dyn_cast(getOperand(0).getType()); - // auto filterType = llvm::dyn_cast(getOperand(1).getType()); + // auto filterType = + // llvm::dyn_cast(getOperand(1).getType()); // // auto resultType = llvm::dyn_cast(getType()); // auto inputRank = inputType.getRank(); @@ -1159,15 +1499,14 @@ mlir::LogicalResult filterOp::verify() { // } return mlir::success(); -} - +} //===----------------------------------------------------------------------===// // SumOp //===----------------------------------------------------------------------===// void SumOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, - mlir::Value value) { + mlir::Value value) { state.addTypes(UnrankedTensorType::get(builder.getF64Type())); state.addOperands(value); } @@ -1179,8 +1518,8 @@ void SumOp::inferShapes() { shapeForOutput.push_back(1); - mlir::TensorType manipulatedType = mlir::RankedTensorType::get(shapeForOutput, - getInput().getType().getElementType()); + mlir::TensorType manipulatedType = mlir::RankedTensorType::get( + shapeForOutput, getInput().getType().getElementType()); getResult().setType(manipulatedType); } @@ -1199,96 +1538,99 @@ mlir::LogicalResult SumOp::verify() { return mlir::success(); } - //===----------------------------------------------------------------------===// - // CosOp - //===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// CosOp +//===----------------------------------------------------------------------===// - void CosOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, - mlir::Value value) { - // DEBUG_PRINT_NO_ARGS() ; - state.addTypes(UnrankedTensorType::get(builder.getF64Type())); - state.addOperands(value); - // DEBUG_PRINT_NO_ARGS() ; - } +void CosOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, + mlir::Value value) { + // DEBUG_PRINT_NO_ARGS() ; + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); + state.addOperands(value); + // DEBUG_PRINT_NO_ARGS() ; +} - void CosOp::inferShapes() { - //for each rank - //Get the shape/size of input - //output size = input_size - auto tensorInput = getInput().getType(); - getResult().setType(tensorInput); - // getResult(0).setType(tensorInput); - // getResult(1).setType(tensorInput); - } +void CosOp::inferShapes() { + // for each rank + // Get the shape/size of input + // output size = input_size + auto tensorInput = getInput().getType(); + getResult().setType(tensorInput); + // getResult(0).setType(tensorInput); + // getResult(1).setType(tensorInput); +} - mlir::LogicalResult CosOp::verify() { - // DEBUG_PRINT_NO_ARGS() ; +mlir::LogicalResult CosOp::verify() { + // DEBUG_PRINT_NO_ARGS() ; // auto inputType = llvm::dyn_cast(getOperand().getType()); // auto inputRank = inputType.getRank(); - // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " << alphaValueRank << "\n"; - // //once ensured only 1 rank from above -- + // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " << + // alphaValueRank << "\n"; + // //once ensured only 1 rank from above -- // if( inputRank != 1 ) // { // llvm::errs() << "inputRank: " << inputRank << "\n"; // return emitError() // << "expected rank of input is 1"; // } - return mlir::success(); - } + return mlir::success(); +} - //===----------------------------------------------------------------------===// - // SinOp - //===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// SinOp +//===----------------------------------------------------------------------===// - void SinOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, - mlir::Value value) { - // DEBUG_PRINT_NO_ARGS() ; - state.addTypes(UnrankedTensorType::get(builder.getF64Type())); - state.addOperands(value); - // DEBUG_PRINT_NO_ARGS() ; - } +void SinOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, + mlir::Value value) { + // DEBUG_PRINT_NO_ARGS() ; + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); + state.addOperands(value); + // DEBUG_PRINT_NO_ARGS() ; +} - void SinOp::inferShapes() { - //for each rank - //Get the shape/size of input - //output size = input_size - auto tensorInput = getInput().getType(); - getResult().setType(tensorInput); - // getResult(0).setType(tensorInput); - // getResult(1).setType(tensorInput); - } +void SinOp::inferShapes() { + // for each rank + // Get the shape/size of input + // output size = input_size + auto tensorInput = getInput().getType(); + getResult().setType(tensorInput); + // getResult(0).setType(tensorInput); + // getResult(1).setType(tensorInput); +} - mlir::LogicalResult SinOp::verify() { - // DEBUG_PRINT_NO_ARGS() ; +mlir::LogicalResult SinOp::verify() { + // DEBUG_PRINT_NO_ARGS() ; // auto inputType = llvm::dyn_cast(getOperand().getType()); // auto inputRank = inputType.getRank(); - // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " << alphaValueRank << "\n"; - // //once ensured only 1 rank from above -- + // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " << + // alphaValueRank << "\n"; + // //once ensured only 1 rank from above -- // if( inputRank != 1 ) // { // llvm::errs() << "inputRank: " << inputRank << "\n"; // return emitError() // << "expected rank of input is 1"; // } - return mlir::success(); - } + return mlir::success(); +} //===----------------------------------------------------------------------===// // SquareOp //===----------------------------------------------------------------------===// void SquareOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, - mlir::Value value) { + mlir::Value value) { state.addTypes(UnrankedTensorType::get(builder.getF64Type())); state.addOperands(value); } void SquareOp::inferShapes() { - auto tensorInput = getInput().getType(); - // mlir::TensorType manipulatedType = mlir::RankedTensorType::get(shapeForOutput, - // getInput().getType().getElementType()); + auto tensorInput = getInput().getType(); + // mlir::TensorType manipulatedType = + // mlir::RankedTensorType::get(shapeForOutput, + // getInput().getType().getElementType()); getResult().setType(tensorInput); } @@ -1313,17 +1655,17 @@ mlir::LogicalResult SquareOp::verify() { void FFT1DRealOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, mlir::Value value) { - DEBUG_PRINT_NO_ARGS() ; + DEBUG_PRINT_NO_ARGS(); state.addTypes({UnrankedTensorType::get(builder.getF64Type())}); state.addOperands(value); - DEBUG_PRINT_NO_ARGS() ; + DEBUG_PRINT_NO_ARGS(); } void FFT1DRealOp::inferShapes() { - //for each rank - //Get the shape/size of input - //output size = input_size - auto tensorInput = getInput().getType(); + // for each rank + // Get the shape/size of input + // output size = input_size + auto tensorInput = getInput().getType(); // getResult().setType(tensorInput); getResult().setType(tensorInput); // getResult(2).setType(tensorInput); @@ -1334,8 +1676,9 @@ mlir::LogicalResult FFT1DRealOp::verify() { // auto inputType = llvm::dyn_cast(getOperand().getType()); // auto inputRank = inputType.getRank(); - // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " << alphaValueRank << "\n"; - // //once ensured only 1 rank from above -- + // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " << + // alphaValueRank << "\n"; + // //once ensured only 1 rank from above -- // if( inputRank != 1 ) // { // llvm::errs() << "inputRank: " << inputRank << "\n"; @@ -1350,30 +1693,31 @@ mlir::LogicalResult FFT1DRealOp::verify() { //===----------------------------------------------------------------------===// void FFT1DImgOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, - mlir::Value value) { - DEBUG_PRINT_NO_ARGS() ; + mlir::Value value) { + DEBUG_PRINT_NO_ARGS(); state.addTypes({UnrankedTensorType::get(builder.getF64Type())}); state.addOperands(value); - DEBUG_PRINT_NO_ARGS() ; + DEBUG_PRINT_NO_ARGS(); } void FFT1DImgOp::inferShapes() { - //for each rank - //Get the shape/size of input - //output size = input_size - auto tensorInput = getInput().getType(); + // for each rank + // Get the shape/size of input + // output size = input_size + auto tensorInput = getInput().getType(); // getResult().setType(tensorInput); getResult().setType(tensorInput); // getResult(2).setType(tensorInput); } mlir::LogicalResult FFT1DImgOp::verify() { - DEBUG_PRINT_NO_ARGS() ; + DEBUG_PRINT_NO_ARGS(); // auto inputType = llvm::dyn_cast(getOperand().getType()); // auto inputRank = inputType.getRank(); - // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " << alphaValueRank << "\n"; - // //once ensured only 1 rank from above -- + // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " << + // alphaValueRank << "\n"; + // //once ensured only 1 rank from above -- // if( inputRank != 1 ) // { // llvm::errs() << "inputRank: " << inputRank << "\n"; @@ -1388,30 +1732,30 @@ mlir::LogicalResult FFT1DImgOp::verify() { //===----------------------------------------------------------------------===// void SincOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, - mlir::Value wc, mlir::Value n) { - DEBUG_PRINT_NO_ARGS() ; + mlir::Value wc, mlir::Value n) { + DEBUG_PRINT_NO_ARGS(); state.addTypes({UnrankedTensorType::get(builder.getF64Type())}); state.addOperands({wc, n}); - DEBUG_PRINT_NO_ARGS() ; + DEBUG_PRINT_NO_ARGS(); } void SincOp::inferShapes() { - //for each rank - //Get the shape/size of input - //output size = input_size - // auto inputType = llvm::dyn_cast(getN().getType()); + // for each rank + // Get the shape/size of input + // output size = input_size + // auto inputType = llvm::dyn_cast(getN().getType()); // auto shapeOfInput = inputType.getShape(); - std::vector shapeForOutput ; + std::vector shapeForOutput; int64_t GetLen = 1; - //To extract value from the SSA value: - //get the Operand - //convert it to ConstantOp - //convert it to corresponding elements attribute - //extract the value as float then convert to int + // To extract value from the SSA value: + // get the Operand + // convert it to ConstantOp + // convert it to corresponding elements attribute + // extract the value as float then convert to int DEBUG_PRINT_NO_ARGS(); Value inputLen = getOperand(1); dsp::ConstantOp constantOp1stArg = inputLen.getDefiningOp(); @@ -1419,26 +1763,25 @@ void SincOp::inferShapes() { DenseElementsAttr constantLhsValue = constantOp1stArg.getValue(); auto elements = constantLhsValue.getValues(); float LenN = elements[0].getValueAsDouble(); - GetLen = (int64_t) LenN; + GetLen = (int64_t)LenN; DEBUG_PRINT_WITH_ARGS(GetLen); - DEBUG_PRINT_WITH_ARGS("GetLen= " , GetLen); + DEBUG_PRINT_WITH_ARGS("GetLen= ", GetLen); shapeForOutput.push_back(GetLen); - mlir::TensorType outputType = mlir::RankedTensorType::get(shapeForOutput, - getWc().getType().getElementType()); - + mlir::TensorType outputType = mlir::RankedTensorType::get( + shapeForOutput, getWc().getType().getElementType()); getResult().setType(outputType); - } mlir::LogicalResult SincOp::verify() { - DEBUG_PRINT_NO_ARGS() ; + DEBUG_PRINT_NO_ARGS(); // auto inputType = llvm::dyn_cast(getOperand().getType()); // auto inputRank = inputType.getRank(); - // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " << alphaValueRank << "\n"; - // //once ensured only 1 rank from above -- + // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " << + // alphaValueRank << "\n"; + // //once ensured only 1 rank from above -- // if( inputRank != 1 ) // { // llvm::errs() << "inputRank: " << inputRank << "\n"; @@ -1452,11 +1795,12 @@ mlir::LogicalResult SincOp::verify() { // GetElemAtIndxOp //===----------------------------------------------------------------------===// -void GetElemAtIndxOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, - mlir::Value input, mlir::Value indx) { +void GetElemAtIndxOp::build(mlir::OpBuilder &builder, + mlir::OperationState &state, mlir::Value input, + mlir::Value indx) { DEBUG_PRINT_NO_ARGS(); state.addTypes({UnrankedTensorType::get(builder.getF64Type())}); - state.addOperands({input, indx} ); + state.addOperands({input, indx}); DEBUG_PRINT_NO_ARGS(); } @@ -1467,8 +1811,8 @@ void GetElemAtIndxOp::inferShapes() { DEBUG_PRINT_NO_ARGS(); shapeForOutput.push_back(1); - mlir::TensorType manipulatedType = mlir::RankedTensorType::get(shapeForOutput, - getInput().getType().getElementType()); + mlir::TensorType manipulatedType = mlir::RankedTensorType::get( + shapeForOutput, getInput().getType().getElementType()); getResult().setType(manipulatedType); DEBUG_PRINT_NO_ARGS(); } @@ -1488,16 +1832,151 @@ mlir::LogicalResult GetElemAtIndxOp::verify() { return mlir::success(); } +//===----------------------------------------------------------------------===// +// GetSingleElemAtIdxOp +//===----------------------------------------------------------------------===// + +void GetSingleElemAtIdxOp::build(mlir::OpBuilder &builder, + mlir::OperationState &state, mlir::Value input, + mlir::Value indx) { + state.addTypes({UnrankedTensorType::get(builder.getF64Type())}); + state.addOperands({input, indx}); +} + +void GetSingleElemAtIdxOp::inferShapes() { + std::vector shapeForOutput; + + mlir::TensorType manipulatedType = mlir::RankedTensorType::get( + shapeForOutput, getInput().getType().getElementType()); + getResult().setType(manipulatedType); +} + +//===----------------------------------------------------------------------===// +// Diff2MeanOptimizedOp +//===----------------------------------------------------------------------===// + +void Diff2MeanOptimizedOp::build(mlir::OpBuilder &builder, + mlir::OperationState &state, mlir::Value input, + mlir::Value length) { + state.addTypes({UnrankedTensorType::get(builder.getF64Type())}); + state.addOperands({input, length}); +} + +void Diff2MeanOptimizedOp::inferShapes() { + mlir::TensorType manipulatedType = + mlir::RankedTensorType::get({}, getInput().getType().getElementType()); + getResult().setType(manipulatedType); +} + +//===----------------------------------------------------------------------===// +// FindPeaks2Diff2MeanOptimizedOp +//===----------------------------------------------------------------------===// + +void FindPeaks2Diff2MeanOptimizedOp::build(mlir::OpBuilder &builder, + mlir::OperationState &state, + mlir::Value signal, + mlir::Value height, + mlir::Value distance) { + state.addTypes({UnrankedTensorType::get(builder.getF64Type())}); + state.addOperands({signal, height, distance}); +} + +void FindPeaks2Diff2MeanOptimizedOp::inferShapes() { + mlir::TensorType manipulatedType = + mlir::RankedTensorType::get({}, getSignal().getType().getElementType()); + getResult().setType(manipulatedType); +} + +//===----------------------------------------------------------------------===// +// Median2SlidingOptimizedOp +//===----------------------------------------------------------------------===// + +void Median2SlidingOptimizedOp::build(mlir::OpBuilder &builder, + mlir::OperationState &state, + mlir::Value input) { + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); + state.addOperands(input); +} + +void Median2SlidingOptimizedOp::inferShapes() { + // for each rank + // Get the shape/size of input + // output size = input_size - 4 + auto inputType = llvm::dyn_cast(getOperand().getType()); + + auto shapeOfInput = inputType.getShape(); + + std::vector shapeForOutput; + + // Iterate for each rank : tensor<1x2x3x2> = rank 4 + for (size_t i = 0; i < shapeOfInput.size(); i++) { + shapeForOutput.push_back(shapeOfInput[i] - 4); + } + + mlir::TensorType outputType = mlir::RankedTensorType::get( + shapeForOutput, getInput().getType().getElementType()); + // getOperand().getType()); + // getOperand().getType().getElementType()); + + getResult().setType(outputType); +} + +//===----------------------------------------------------------------------===// +// LMS2FindPeaksOptimizedOp +//===----------------------------------------------------------------------===// + +void LMS2FindPeaksOptimizedOp::build(mlir::OpBuilder &builder, + mlir::OperationState &state, + mlir::Value lhs, mlir::Value rhs, + mlir::Value mu, mlir::Value filterLen, + mlir::Value height, mlir::Value distance) { + state.addTypes({UnrankedTensorType::get(builder.getF64Type())}); + state.addOperands({lhs, rhs, mu, filterLen, height, distance}); +} + +void LMS2FindPeaksOptimizedOp::inferShapes() { + + // getResult().setType(getLhs().getType()); + + // The above is for LMSFilterResponseOp + + // Maximum possible number of peaks = (length of signal -1) / distance + 1. + // We will return a tensor with size (length of signal -1) / distance + 1 + + // 1(last one to provide number of peaks). + auto signalType = getLhs().getType(); + auto signalShape = signalType.getShape(); + int64_t len_signal = signalShape[0]; + + Value distanceArg = getOperand(5); + dsp::ConstantOp constantOpDistance = + distanceArg.getDefiningOp(); + DenseElementsAttr constantDistanceValue = constantOpDistance.getValue(); + + auto elements = constantDistanceValue.getValues(); + float distanceFloat = elements[0].getValueAsDouble(); + // SecondValueInt = (int64_t)SecondValue; + + int64_t sizeOfOutput = (len_signal - 1) / distanceFloat + 2; + + std::vector shapeForOutput; + shapeForOutput.push_back(sizeOfOutput); + + mlir::TensorType manipulatedType = + mlir::RankedTensorType::get(shapeForOutput, signalType.getElementType()); + + getResult().setType(manipulatedType); +} //===----------------------------------------------------------------------===// // SetElemAtIndxOp //===----------------------------------------------------------------------===// -void SetElemAtIndxOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, - mlir::Value input, mlir::Value indx, mlir::Value val) { +void SetElemAtIndxOp::build(mlir::OpBuilder &builder, + mlir::OperationState &state, mlir::Value input, + mlir::Value indx, mlir::Value val) { DEBUG_PRINT_NO_ARGS(); state.addTypes({UnrankedTensorType::get(builder.getF64Type())}); - state.addOperands({input, indx, val} ); + state.addOperands({input, indx, val}); DEBUG_PRINT_NO_ARGS(); } @@ -1508,45 +1987,44 @@ void SetElemAtIndxOp::inferShapes() { DEBUG_PRINT_NO_ARGS(); shapeForOutput.push_back(1); - mlir::TensorType manipulatedType = mlir::RankedTensorType::get(shapeForOutput, - getInput().getType().getElementType()); + mlir::TensorType manipulatedType = mlir::RankedTensorType::get( + shapeForOutput, getInput().getType().getElementType()); getResult().setType(manipulatedType); DEBUG_PRINT_NO_ARGS(); } -mlir::LogicalResult SetElemAtIndxOp::verify() { - return mlir::success(); -} +mlir::LogicalResult SetElemAtIndxOp::verify() { return mlir::success(); } //===----------------------------------------------------------------------===// // LowPassFIRFilterOp //===----------------------------------------------------------------------===// -void LowPassFIRFilterOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, - mlir::Value wc, mlir::Value n) { - DEBUG_PRINT_NO_ARGS() ; +void LowPassFIRFilterOp::build(mlir::OpBuilder &builder, + mlir::OperationState &state, mlir::Value wc, + mlir::Value n) { + DEBUG_PRINT_NO_ARGS(); state.addTypes({UnrankedTensorType::get(builder.getF64Type())}); state.addOperands({wc, n}); - DEBUG_PRINT_NO_ARGS() ; + DEBUG_PRINT_NO_ARGS(); } void LowPassFIRFilterOp::inferShapes() { - //for each rank - //Get the shape/size of input - //output size = input_size - // auto inputType = llvm::dyn_cast(getN().getType()); + // for each rank + // Get the shape/size of input + // output size = input_size + // auto inputType = llvm::dyn_cast(getN().getType()); // auto shapeOfInput = inputType.getShape(); - std::vector shapeForOutput ; + std::vector shapeForOutput; uint64_t GetLen = 1; - //To extract value from the SSA value: - //get the Operand - //convert it to ConstantOp - //convert it to corresponding elements attribute - //extract the value as float then convert to int + // To extract value from the SSA value: + // get the Operand + // convert it to ConstantOp + // convert it to corresponding elements attribute + // extract the value as float then convert to int DEBUG_PRINT_NO_ARGS(); Value inputLen = getOperand(1); dsp::ConstantOp constantOp1stArg = inputLen.getDefiningOp(); @@ -1554,30 +2032,27 @@ void LowPassFIRFilterOp::inferShapes() { DenseElementsAttr constantLhsValue = constantOp1stArg.getValue(); auto elements = constantLhsValue.getValues(); float LenN = elements[0].getValueAsDouble(); - GetLen = (uint64_t) LenN; + GetLen = (uint64_t)LenN; DEBUG_PRINT_WITH_ARGS(GetLen); - DEBUG_PRINT_WITH_ARGS("GetLen= " , GetLen); - - //int64_t N = tensorType.getShape()[0]; + DEBUG_PRINT_WITH_ARGS("GetLen= ", GetLen); + // int64_t N = tensorType.getShape()[0]; shapeForOutput.push_back(GetLen); - mlir::TensorType outputType = mlir::RankedTensorType::get(shapeForOutput, - getWc().getType().getElementType()); - + mlir::TensorType outputType = mlir::RankedTensorType::get( + shapeForOutput, getWc().getType().getElementType()); getResult().setType(outputType); - } mlir::LogicalResult LowPassFIRFilterOp::verify() { uint64_t GetLen = 1; - //To extract value from the SSA value: - //get the Operand - //convert it to ConstantOp - //convert it to corresponding elements attribute - //extract the value as float then convert to int + // To extract value from the SSA value: + // get the Operand + // convert it to ConstantOp + // convert it to corresponding elements attribute + // extract the value as float then convert to int DEBUG_PRINT_NO_ARGS(); Value inputLen = getOperand(1); dsp::ConstantOp constantOp1stArg = inputLen.getDefiningOp(); @@ -1585,17 +2060,16 @@ mlir::LogicalResult LowPassFIRFilterOp::verify() { DenseElementsAttr constantLhsValue = constantOp1stArg.getValue(); auto elements = constantLhsValue.getValues(); float LenN = elements[0].getValueAsDouble(); - GetLen = (uint64_t) LenN; + GetLen = (uint64_t)LenN; DEBUG_PRINT_WITH_ARGS(GetLen); - DEBUG_PRINT_WITH_ARGS("GetLen= " , GetLen); - - //filter-order even not supported -- so making it odd - if(GetLen % 2 == 0 ) - { + DEBUG_PRINT_WITH_ARGS("GetLen= ", GetLen); + + // filter-order even not supported -- so making it odd + if (GetLen % 2 == 0) { // GetLen = GetLen + 1; llvm::errs() << "N for lowPassFilter must be odd but is " << GetLen << "\n"; - // DEBUG_PRINT_WITH_ARGS("Making LowPassFilterLen Odd= " , GetLen); - return mlir::failure(); + // DEBUG_PRINT_WITH_ARGS("Making LowPassFilterLen Odd= " , GetLen); + return mlir::failure(); } return mlir::success(); } @@ -1605,19 +2079,20 @@ mlir::LogicalResult LowPassFIRFilterOp::verify() { //===----------------------------------------------------------------------===// void LMSFilterOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, - mlir::Value lhs, mlir::Value rhs, mlir::Value mu, mlir::Value filterLen, mlir::Value iters) { - + mlir::Value lhs, mlir::Value rhs, mlir::Value mu, + mlir::Value filterLen, mlir::Value iters) { + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); - state.addOperands({lhs, rhs, mu, filterLen, iters}); + state.addOperands({lhs, rhs, mu, filterLen, iters}); } - void LMSFilterOp::inferShapes() { getResult().setType(getLhs().getType()); } mlir::LogicalResult LMSFilterOp::verify() { - DEBUG_PRINT_NO_ARGS() ; + DEBUG_PRINT_NO_ARGS(); // auto inputType = llvm::dyn_cast(getOperand(0).getType()); - // auto filterType = llvm::dyn_cast(getOperand(1).getType()); + // auto filterType = + // llvm::dyn_cast(getOperand(1).getType()); // // auto resultType = llvm::dyn_cast(getType()); // auto inputRank = inputType.getRank(); @@ -1632,36 +2107,36 @@ mlir::LogicalResult LMSFilterOp::verify() { return mlir::success(); } - //===----------------------------------------------------------------------===// // HighPassFIRFilterOp //===----------------------------------------------------------------------===// -void HighPassFIRFilterOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, - mlir::Value wc, mlir::Value n) { - DEBUG_PRINT_NO_ARGS() ; +void HighPassFIRFilterOp::build(mlir::OpBuilder &builder, + mlir::OperationState &state, mlir::Value wc, + mlir::Value n) { + DEBUG_PRINT_NO_ARGS(); state.addTypes({UnrankedTensorType::get(builder.getF64Type())}); state.addOperands({wc, n}); - DEBUG_PRINT_NO_ARGS() ; + DEBUG_PRINT_NO_ARGS(); } void HighPassFIRFilterOp::inferShapes() { - //for each rank - //Get the shape/size of input - //output size = input_size - // auto inputType = llvm::dyn_cast(getN().getType()); + // for each rank + // Get the shape/size of input + // output size = input_size + // auto inputType = llvm::dyn_cast(getN().getType()); // auto shapeOfInput = inputType.getShape(); - std::vector shapeForOutput ; + std::vector shapeForOutput; int64_t GetLen = 1; - //To extract value from the SSA value: - //get the Operand - //convert it to ConstantOp - //convert it to corresponding elements attribute - //extract the value as float then convert to int + // To extract value from the SSA value: + // get the Operand + // convert it to ConstantOp + // convert it to corresponding elements attribute + // extract the value as float then convert to int DEBUG_PRINT_NO_ARGS(); Value inputLen = getOperand(1); dsp::ConstantOp constantOp1stArg = inputLen.getDefiningOp(); @@ -1669,26 +2144,25 @@ void HighPassFIRFilterOp::inferShapes() { DenseElementsAttr constantLhsValue = constantOp1stArg.getValue(); auto elements = constantLhsValue.getValues(); float LenN = elements[0].getValueAsDouble(); - GetLen = (int64_t) LenN; + GetLen = (int64_t)LenN; DEBUG_PRINT_WITH_ARGS(GetLen); - DEBUG_PRINT_WITH_ARGS("GetLen= " , GetLen); + DEBUG_PRINT_WITH_ARGS("GetLen= ", GetLen); shapeForOutput.push_back(GetLen); - mlir::TensorType outputType = mlir::RankedTensorType::get(shapeForOutput, - getWc().getType().getElementType()); - + mlir::TensorType outputType = mlir::RankedTensorType::get( + shapeForOutput, getWc().getType().getElementType()); getResult().setType(outputType); - } mlir::LogicalResult HighPassFIRFilterOp::verify() { - DEBUG_PRINT_NO_ARGS() ; + DEBUG_PRINT_NO_ARGS(); // auto inputType = llvm::dyn_cast(getOperand().getType()); // auto inputRank = inputType.getRank(); - // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " << alphaValueRank << "\n"; - // //once ensured only 1 rank from above -- + // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " << + // alphaValueRank << "\n"; + // //once ensured only 1 rank from above -- // if( inputRank != 1 ) // { // llvm::errs() << "inputRank: " << inputRank << "\n"; @@ -1702,31 +2176,32 @@ mlir::LogicalResult HighPassFIRFilterOp::verify() { // GetRangeOfVectorOp //===----------------------------------------------------------------------===// -void GetRangeOfVectorOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, - mlir::Value first, mlir::Value N, mlir::Value step) { - DEBUG_PRINT_NO_ARGS() ; +void GetRangeOfVectorOp::build(mlir::OpBuilder &builder, + mlir::OperationState &state, mlir::Value first, + mlir::Value N, mlir::Value step) { + DEBUG_PRINT_NO_ARGS(); state.addTypes({UnrankedTensorType::get(builder.getF64Type())}); state.addOperands({first, N, step}); - DEBUG_PRINT_NO_ARGS() ; + DEBUG_PRINT_NO_ARGS(); } void GetRangeOfVectorOp::inferShapes() { - //for each rank - //Get the shape/size of input - //output size = input_size - // auto inputType = llvm::dyn_cast(getN().getType()); + // for each rank + // Get the shape/size of input + // output size = input_size + // auto inputType = llvm::dyn_cast(getN().getType()); // auto shapeOfInput = inputType.getShape(); - std::vector shapeForOutput ; + std::vector shapeForOutput; int64_t GetLen = 1; - //To extract value from the SSA value: - //get the Operand - //convert it to ConstantOp - //convert it to corresponding elements attribute - //extract the value as float then convert to int + // To extract value from the SSA value: + // get the Operand + // convert it to ConstantOp + // convert it to corresponding elements attribute + // extract the value as float then convert to int DEBUG_PRINT_NO_ARGS(); Value inputLen = getOperand(1); dsp::ConstantOp constantOp1stArg = inputLen.getDefiningOp(); @@ -1734,26 +2209,25 @@ void GetRangeOfVectorOp::inferShapes() { DenseElementsAttr constantLhsValue = constantOp1stArg.getValue(); auto elements = constantLhsValue.getValues(); float LenN = elements[0].getValueAsDouble(); - GetLen = (int64_t) LenN; + GetLen = (int64_t)LenN; DEBUG_PRINT_WITH_ARGS(GetLen); - DEBUG_PRINT_WITH_ARGS("GetLen= " , GetLen); + DEBUG_PRINT_WITH_ARGS("GetLen= ", GetLen); shapeForOutput.push_back(GetLen); - mlir::TensorType outputType = mlir::RankedTensorType::get(shapeForOutput, - getFirst().getType().getElementType()); - + mlir::TensorType outputType = mlir::RankedTensorType::get( + shapeForOutput, getFirst().getType().getElementType()); getResult().setType(outputType); - } mlir::LogicalResult GetRangeOfVectorOp::verify() { - DEBUG_PRINT_NO_ARGS() ; + DEBUG_PRINT_NO_ARGS(); // auto inputType = llvm::dyn_cast(getOperand().getType()); // auto inputRank = inputType.getRank(); - // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " << alphaValueRank << "\n"; - // //once ensured only 1 rank from above -- + // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " << + // alphaValueRank << "\n"; + // //once ensured only 1 rank from above -- // if( inputRank != 1 ) // { // llvm::errs() << "inputRank: " << inputRank << "\n"; @@ -1767,31 +2241,32 @@ mlir::LogicalResult GetRangeOfVectorOp::verify() { // FIRFilterHammingOptimizedOp //===----------------------------------------------------------------------===// -void FIRFilterHammingOptimizedOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, - mlir::Value wc, mlir::Value n) { - DEBUG_PRINT_NO_ARGS() ; +void FIRFilterHammingOptimizedOp::build(mlir::OpBuilder &builder, + mlir::OperationState &state, + mlir::Value wc, mlir::Value n) { + DEBUG_PRINT_NO_ARGS(); state.addTypes({UnrankedTensorType::get(builder.getF64Type())}); state.addOperands({wc, n}); - DEBUG_PRINT_NO_ARGS() ; + DEBUG_PRINT_NO_ARGS(); } void FIRFilterHammingOptimizedOp::inferShapes() { - //for each rank - //Get the shape/size of input - //output size = input_size - // auto inputType = llvm::dyn_cast(getN().getType()); + // for each rank + // Get the shape/size of input + // output size = input_size + // auto inputType = llvm::dyn_cast(getN().getType()); // auto shapeOfInput = inputType.getShape(); - std::vector shapeForOutput ; + std::vector shapeForOutput; uint64_t GetLen = 1; - //To extract value from the SSA value: - //get the Operand - //convert it to ConstantOp - //convert it to corresponding elements attribute - //extract the value as float then convert to int + // To extract value from the SSA value: + // get the Operand + // convert it to ConstantOp + // convert it to corresponding elements attribute + // extract the value as float then convert to int DEBUG_PRINT_NO_ARGS(); Value inputLen = getOperand(1); dsp::ConstantOp constantOp1stArg = inputLen.getDefiningOp(); @@ -1799,30 +2274,27 @@ void FIRFilterHammingOptimizedOp::inferShapes() { DenseElementsAttr constantLhsValue = constantOp1stArg.getValue(); auto elements = constantLhsValue.getValues(); float LenN = elements[0].getValueAsDouble(); - GetLen = (uint64_t) LenN; + GetLen = (uint64_t)LenN; DEBUG_PRINT_WITH_ARGS(GetLen); - DEBUG_PRINT_WITH_ARGS("GetLen= " , GetLen); - - //int64_t N = tensorType.getShape()[0]; + DEBUG_PRINT_WITH_ARGS("GetLen= ", GetLen); + // int64_t N = tensorType.getShape()[0]; shapeForOutput.push_back(GetLen); - mlir::TensorType outputType = mlir::RankedTensorType::get(shapeForOutput, - getWc().getType().getElementType()); - + mlir::TensorType outputType = mlir::RankedTensorType::get( + shapeForOutput, getWc().getType().getElementType()); getResult().setType(outputType); - } mlir::LogicalResult FIRFilterHammingOptimizedOp::verify() { uint64_t GetLen = 1; - //To extract value from the SSA value: - //get the Operand - //convert it to ConstantOp - //convert it to corresponding elements attribute - //extract the value as float then convert to int + // To extract value from the SSA value: + // get the Operand + // convert it to ConstantOp + // convert it to corresponding elements attribute + // extract the value as float then convert to int DEBUG_PRINT_NO_ARGS(); Value inputLen = getOperand(1); dsp::ConstantOp constantOp1stArg = inputLen.getDefiningOp(); @@ -1830,17 +2302,16 @@ mlir::LogicalResult FIRFilterHammingOptimizedOp::verify() { DenseElementsAttr constantLhsValue = constantOp1stArg.getValue(); auto elements = constantLhsValue.getValues(); float LenN = elements[0].getValueAsDouble(); - GetLen = (uint64_t) LenN; + GetLen = (uint64_t)LenN; DEBUG_PRINT_WITH_ARGS(GetLen); - DEBUG_PRINT_WITH_ARGS("GetLen= " , GetLen); - - //filter-order even not supported -- so making it odd - if(GetLen % 2 == 0 ) - { + DEBUG_PRINT_WITH_ARGS("GetLen= ", GetLen); + + // filter-order even not supported -- so making it odd + if (GetLen % 2 == 0) { // GetLen = GetLen + 1; llvm::errs() << "N for lowPassFilter must be odd but is " << GetLen << "\n"; - // DEBUG_PRINT_WITH_ARGS("Making LowPassFilterLen Odd= " , GetLen); - return mlir::failure(); + // DEBUG_PRINT_WITH_ARGS("Making LowPassFilterLen Odd= " , GetLen); + return mlir::failure(); } return mlir::success(); } @@ -1849,31 +2320,32 @@ mlir::LogicalResult FIRFilterHammingOptimizedOp::verify() { // HighPassFIRHammingOptimizedOp //===----------------------------------------------------------------------===// -void HighPassFIRHammingOptimizedOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, - mlir::Value wc, mlir::Value n) { - DEBUG_PRINT_NO_ARGS() ; +void HighPassFIRHammingOptimizedOp::build(mlir::OpBuilder &builder, + mlir::OperationState &state, + mlir::Value wc, mlir::Value n) { + DEBUG_PRINT_NO_ARGS(); state.addTypes({UnrankedTensorType::get(builder.getF64Type())}); state.addOperands({wc, n}); - DEBUG_PRINT_NO_ARGS() ; + DEBUG_PRINT_NO_ARGS(); } void HighPassFIRHammingOptimizedOp::inferShapes() { - //for each rank - //Get the shape/size of input - //output size = input_size - // auto inputType = llvm::dyn_cast(getN().getType()); + // for each rank + // Get the shape/size of input + // output size = input_size + // auto inputType = llvm::dyn_cast(getN().getType()); // auto shapeOfInput = inputType.getShape(); - std::vector shapeForOutput ; + std::vector shapeForOutput; uint64_t GetLen = 1; - //To extract value from the SSA value: - //get the Operand - //convert it to ConstantOp - //convert it to corresponding elements attribute - //extract the value as float then convert to int + // To extract value from the SSA value: + // get the Operand + // convert it to ConstantOp + // convert it to corresponding elements attribute + // extract the value as float then convert to int DEBUG_PRINT_NO_ARGS(); Value inputLen = getOperand(1); dsp::ConstantOp constantOp1stArg = inputLen.getDefiningOp(); @@ -1881,30 +2353,27 @@ void HighPassFIRHammingOptimizedOp::inferShapes() { DenseElementsAttr constantLhsValue = constantOp1stArg.getValue(); auto elements = constantLhsValue.getValues(); float LenN = elements[0].getValueAsDouble(); - GetLen = (uint64_t) LenN; + GetLen = (uint64_t)LenN; DEBUG_PRINT_WITH_ARGS(GetLen); - DEBUG_PRINT_WITH_ARGS("GetLen= " , GetLen); - - //int64_t N = tensorType.getShape()[0]; + DEBUG_PRINT_WITH_ARGS("GetLen= ", GetLen); + // int64_t N = tensorType.getShape()[0]; shapeForOutput.push_back(GetLen); - mlir::TensorType outputType = mlir::RankedTensorType::get(shapeForOutput, - getWc().getType().getElementType()); - + mlir::TensorType outputType = mlir::RankedTensorType::get( + shapeForOutput, getWc().getType().getElementType()); getResult().setType(outputType); - } mlir::LogicalResult HighPassFIRHammingOptimizedOp::verify() { uint64_t GetLen = 1; - //To extract value from the SSA value: - //get the Operand - //convert it to ConstantOp - //convert it to corresponding elements attribute - //extract the value as float then convert to int + // To extract value from the SSA value: + // get the Operand + // convert it to ConstantOp + // convert it to corresponding elements attribute + // extract the value as float then convert to int DEBUG_PRINT_NO_ARGS(); Value inputLen = getOperand(1); dsp::ConstantOp constantOp1stArg = inputLen.getDefiningOp(); @@ -1912,22 +2381,20 @@ mlir::LogicalResult HighPassFIRHammingOptimizedOp::verify() { DenseElementsAttr constantLhsValue = constantOp1stArg.getValue(); auto elements = constantLhsValue.getValues(); float LenN = elements[0].getValueAsDouble(); - GetLen = (uint64_t) LenN; + GetLen = (uint64_t)LenN; DEBUG_PRINT_WITH_ARGS(GetLen); - DEBUG_PRINT_WITH_ARGS("GetLen= " , GetLen); - - //filter-order even not supported -- so making it odd - if(GetLen % 2 == 0 ) - { + DEBUG_PRINT_WITH_ARGS("GetLen= ", GetLen); + + // filter-order even not supported -- so making it odd + if (GetLen % 2 == 0) { // GetLen = GetLen + 1; llvm::errs() << "N for lowPassFilter must be odd but is " << GetLen << "\n"; - // DEBUG_PRINT_WITH_ARGS("Making LowPassFilterLen Odd= " , GetLen); - return mlir::failure(); + // DEBUG_PRINT_WITH_ARGS("Making LowPassFilterLen Odd= " , GetLen); + return mlir::failure(); } return mlir::success(); } - //===----------------------------------------------------------------------===// // ThresholdOp //===----------------------------------------------------------------------===// @@ -1936,23 +2403,23 @@ void ThresholdOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, mlir::Value input, mlir::Value threshld) { DEBUG_PRINT_NO_ARGS(); state.addTypes({UnrankedTensorType::get(builder.getF64Type())}); - state.addOperands({input, threshld} ); + state.addOperands({input, threshld}); DEBUG_PRINT_NO_ARGS(); } void ThresholdOp::inferShapes() { DEBUG_PRINT_NO_ARGS(); - auto tensorInput = getInput().getType(); + auto tensorInput = getInput().getType(); getResult().setType(tensorInput); DEBUG_PRINT_NO_ARGS(); } mlir::LogicalResult ThresholdOp::verify() { - //To extract value from the SSA value: - //get the Operand - //convert it to ConstantOp - //convert it to corresponding elements attribute - //extract the value as float then convert to int + // To extract value from the SSA value: + // get the Operand + // convert it to ConstantOp + // convert it to corresponding elements attribute + // extract the value as float then convert to int DEBUG_PRINT_NO_ARGS(); Value threshold = getOperand(1); dsp::ConstantOp constantOp1stArg = threshold.getDefiningOp(); @@ -1960,52 +2427,54 @@ mlir::LogicalResult ThresholdOp::verify() { DenseElementsAttr constantLhsValue = constantOp1stArg.getValue(); auto elements = constantLhsValue.getValues(); float GetThresholdVal = elements[0].getValueAsDouble(); - + DEBUG_PRINT_WITH_ARGS(GetThresholdVal); - DEBUG_PRINT_WITH_ARGS("GetThresholdVal= " , GetThresholdVal); - - //filter-order even not supported -- so making it odd - if(GetThresholdVal <= 0 ) - { + DEBUG_PRINT_WITH_ARGS("GetThresholdVal= ", GetThresholdVal); + + // filter-order even not supported -- so making it odd + if (GetThresholdVal <= 0) { // GetThresholdVal = GetThresholdVal + 1; - llvm::errs() << "threshold value must be >= 0 but got: " << GetThresholdVal << "\n"; - // DEBUG_PRINT_WITH_ARGS("Making LowPassFilterLen Odd= " , GetThresholdVal); - return mlir::failure(); + llvm::errs() << "threshold value must be >= 0 but got: " << GetThresholdVal + << "\n"; + // DEBUG_PRINT_WITH_ARGS("Making LowPassFilterLen Odd= " , GetThresholdVal); + return mlir::failure(); } return mlir::success(); - } //===----------------------------------------------------------------------===// // QuantizationOp //===----------------------------------------------------------------------===// -void QuantizationOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, - mlir::Value input, mlir::Value nLevels, mlir::Value max, mlir::Value min) { +void QuantizationOp::build(mlir::OpBuilder &builder, + mlir::OperationState &state, mlir::Value input, + mlir::Value nLevels, mlir::Value max, + mlir::Value min) { DEBUG_PRINT_NO_ARGS(); state.addTypes({UnrankedTensorType::get(builder.getF64Type())}); - state.addOperands({input, nLevels, max, min} ); + state.addOperands({input, nLevels, max, min}); DEBUG_PRINT_NO_ARGS(); } void QuantizationOp::inferShapes() { DEBUG_PRINT_NO_ARGS(); - auto tensorInput = getInput().getType(); + auto tensorInput = getInput().getType(); getResult().setType(tensorInput); DEBUG_PRINT_NO_ARGS(); } mlir::LogicalResult QuantizationOp::verify() { - //To extract value from the SSA value: - //get the Operand - //convert it to ConstantOp - //convert it to corresponding elements attribute - //extract the value as float then convert to int + // To extract value from the SSA value: + // get the Operand + // convert it to ConstantOp + // convert it to corresponding elements attribute + // extract the value as float then convert to int // DEBUG_PRINT_NO_ARGS(); - // check max > min && NoOfLevels = powerOf2 + // check max > min && NoOfLevels = powerOf2 Value maxOperand = getOperand(2); - dsp::ConstantOp constantOp1stArg = maxOperand.getDefiningOp(); + dsp::ConstantOp constantOp1stArg = + maxOperand.getDefiningOp(); DEBUG_PRINT_NO_ARGS(); DenseElementsAttr constantLhsValue = constantOp1stArg.getValue(); auto elements = constantLhsValue.getValues(); @@ -2014,45 +2483,48 @@ mlir::LogicalResult QuantizationOp::verify() { Value minOperand = getOperand(3); constantOp1stArg = minOperand.getDefiningOp(); - if(!constantOp1stArg){ - llvm::errs() << "QuantizationOp: unable to get Constant for minOp -- 4th opernad " << "\n"; - return mlir::failure(); + if (!constantOp1stArg) { + llvm::errs() + << "QuantizationOp: unable to get Constant for minOp -- 4th opernad " + << "\n"; + return mlir::failure(); } DEBUG_PRINT_NO_ARGS(); constantLhsValue = constantOp1stArg.getValue(); elements = constantLhsValue.getValues(); float getMin = elements[0].getValueAsDouble(); - if(getMax < getMin){ - llvm::errs() << "QuantizatnOp : Max < Min --" << " Max: " << getMax ; - llvm::errs() << " Min: " << getMin ; + if (getMax < getMin) { + llvm::errs() << "QuantizatnOp : Max < Min --" << " Max: " << getMax; + llvm::errs() << " Min: " << getMin; return mlir::failure(); } - return mlir::success(); - } - //===----------------------------------------------------------------------===// // LMSFilterResponseOp //===----------------------------------------------------------------------===// -void LMSFilterResponseOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, - mlir::Value lhs, mlir::Value rhs, mlir::Value mu, mlir::Value filterLen) { - +void LMSFilterResponseOp::build(mlir::OpBuilder &builder, + mlir::OperationState &state, mlir::Value lhs, + mlir::Value rhs, mlir::Value mu, + mlir::Value filterLen) { + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); - state.addOperands({lhs, rhs, mu, filterLen}); + state.addOperands({lhs, rhs, mu, filterLen}); } - -void LMSFilterResponseOp::inferShapes() { getResult().setType(getLhs().getType()); } +void LMSFilterResponseOp::inferShapes() { + getResult().setType(getLhs().getType()); +} mlir::LogicalResult LMSFilterResponseOp::verify() { - DEBUG_PRINT_NO_ARGS() ; + DEBUG_PRINT_NO_ARGS(); // auto inputType = llvm::dyn_cast(getOperand(0).getType()); - // auto filterType = llvm::dyn_cast(getOperand(1).getType()); + // auto filterType = + // llvm::dyn_cast(getOperand(1).getType()); // // auto resultType = llvm::dyn_cast(getType()); // auto inputRank = inputType.getRank(); @@ -2071,90 +2543,89 @@ mlir::LogicalResult LMSFilterResponseOp::verify() { // RunLenEncodingOp //===----------------------------------------------------------------------===// -void RunLenEncodingOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, - mlir::Value input) { +void RunLenEncodingOp::build(mlir::OpBuilder &builder, + mlir::OperationState &state, mlir::Value input) { DEBUG_PRINT_NO_ARGS(); state.addTypes({UnrankedTensorType::get(builder.getF64Type())}); - state.addOperands({input} ); + state.addOperands({input}); DEBUG_PRINT_NO_ARGS(); } void RunLenEncodingOp::inferShapes() { DEBUG_PRINT_NO_ARGS(); - auto tensorInput = getInput().getType(); + auto tensorInput = getInput().getType(); auto shapeOfInput = tensorInput.getShape(); - // auto tensorUpsampling = getRhs().getType(); + // auto tensorUpsampling = getRhs().getType(); // auto shapeOfUpsampling = tensorUpsampling.getShape(); //shape is the length - //Assume rank is 1 , then get the shape of output + // Assume rank is 1 , then get the shape of output // shapeOfInput - std::vector shapeForOutput ; + std::vector shapeForOutput; int64_t LengthOfInput = shapeOfInput[0]; int64_t lenOfOutput = 2 * LengthOfInput; shapeForOutput.push_back(lenOfOutput); - - mlir::TensorType manipulatedType = mlir::RankedTensorType::get(shapeForOutput, - getInput().getType().getElementType()); + + mlir::TensorType manipulatedType = mlir::RankedTensorType::get( + shapeForOutput, getInput().getType().getElementType()); getResult().setType(manipulatedType); DEBUG_PRINT_NO_ARGS(); } mlir::LogicalResult RunLenEncodingOp::verify() { - //To extract value from the SSA value: - //get the Operand - //convert it to ConstantOp - //convert it to corresponding elements attribute - //extract the value as float then convert to int + // To extract value from the SSA value: + // get the Operand + // convert it to ConstantOp + // convert it to corresponding elements attribute + // extract the value as float then convert to int // DEBUG_PRINT_NO_ARGS(); - - return mlir::success(); + return mlir::success(); } //===----------------------------------------------------------------------===// // FIRFilterResSymmOptimizedOp //===----------------------------------------------------------------------===// -void FIRFilterResSymmOptimizedOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, - mlir::Value lhs, mlir::Value rhs) { +void FIRFilterResSymmOptimizedOp::build(mlir::OpBuilder &builder, + mlir::OperationState &state, + mlir::Value lhs, mlir::Value rhs) { state.addTypes(UnrankedTensorType::get(builder.getF64Type())); state.addOperands({lhs, rhs}); } - - -/// Infer the output shape of the FIRFilterResSymmOptimizedOp, this is required by the shape inference -/// interface. -//ToDo -- shape should be the length of Lhs + Rhs - 1 -void FIRFilterResSymmOptimizedOp::inferShapes() { - //get the shape of Lhs & rhs - //add the shape for each dimension - // auto tensorInput = llvm::cast(getLhs().getType()); - auto tensorInput = getLhs().getType(); +/// Infer the output shape of the FIRFilterResSymmOptimizedOp, this is required +/// by the shape inference interface. +// ToDo -- shape should be the length of Lhs + Rhs - 1 +void FIRFilterResSymmOptimizedOp::inferShapes() { + // get the shape of Lhs & rhs + // add the shape for each dimension + // auto tensorInput = llvm::cast(getLhs().getType()); + auto tensorInput = getLhs().getType(); auto shapeOfInput = tensorInput.getShape(); auto tensorFilter = getRhs().getType(); auto shapeOfFilter = tensorFilter.getShape(); - std::vector shapeForOutput ; + std::vector shapeForOutput; - for(size_t i=0; i < shapeOfInput.size() ; i++){ + for (size_t i = 0; i < shapeOfInput.size(); i++) { shapeForOutput.push_back(shapeOfInput[i] + shapeOfFilter[i] - 1); } - - mlir::TensorType manipulatedType = mlir::RankedTensorType::get(shapeForOutput, - getLhs().getType().getElementType()); - // getResult().setType(getLhs().getType()); + mlir::TensorType manipulatedType = mlir::RankedTensorType::get( + shapeForOutput, getLhs().getType().getElementType()); + + // getResult().setType(getLhs().getType()); getResult().setType(manipulatedType); } -//get rank of Input & Filter -- make sure it is of rank 1 +// get rank of Input & Filter -- make sure it is of rank 1 mlir::LogicalResult FIRFilterResSymmOptimizedOp::verify() { // auto inputType = llvm::dyn_cast(getOperand(0).getType()); - // auto filterType = llvm::dyn_cast(getOperand(1).getType()); + // auto filterType = + // llvm::dyn_cast(getOperand(1).getType()); // // auto resultType = llvm::dyn_cast(getType()); // auto inputRank = inputType.getRank(); @@ -2169,16 +2640,15 @@ mlir::LogicalResult FIRFilterResSymmOptimizedOp::verify() { return mlir::success(); } - //===----------------------------------------------------------------------===// // LengthOp //===----------------------------------------------------------------------===// void LengthOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, - mlir::Value input) { + mlir::Value input) { DEBUG_PRINT_NO_ARGS(); state.addTypes({UnrankedTensorType::get(builder.getF64Type())}); - state.addOperands({input} ); + state.addOperands({input}); DEBUG_PRINT_NO_ARGS(); } @@ -2189,8 +2659,8 @@ void LengthOp::inferShapes() { DEBUG_PRINT_NO_ARGS(); shapeForOutput.push_back(1); - mlir::TensorType manipulatedType = mlir::RankedTensorType::get(shapeForOutput, - getInput().getType().getElementType()); + mlir::TensorType manipulatedType = mlir::RankedTensorType::get( + shapeForOutput, getInput().getType().getElementType()); getResult().setType(manipulatedType); DEBUG_PRINT_NO_ARGS(); } @@ -2214,16 +2684,17 @@ mlir::LogicalResult LengthOp::verify() { // ReverseInputOp //===----------------------------------------------------------------------===// -void ReverseInputOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, - mlir::Value input) { +void ReverseInputOp::build(mlir::OpBuilder &builder, + mlir::OperationState &state, mlir::Value input) { state.addTypes(UnrankedTensorType::get(builder.getF64Type())); state.addOperands(input); } void ReverseInputOp::inferShapes() { - auto tensorInput = getInput().getType(); - // mlir::TensorType manipulatedType = mlir::RankedTensorType::get(shapeForOutput, - // getInput().getType().getElementType()); + auto tensorInput = getInput().getType(); + // mlir::TensorType manipulatedType = + // mlir::RankedTensorType::get(shapeForOutput, + // getInput().getType().getElementType()); getResult().setType(tensorInput); } @@ -2242,129 +2713,131 @@ mlir::LogicalResult ReverseInputOp::verify() { return mlir::success(); } - //===----------------------------------------------------------------------===// // PaddingOp //===----------------------------------------------------------------------===// void PaddingOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, - mlir::Value input, mlir::Value PadValue, mlir::Value PadLen) { + mlir::Value input, mlir::Value PadValue, + mlir::Value PadLen) { state.addTypes(UnrankedTensorType::get(builder.getF64Type())); state.addOperands({input, PadValue, PadLen}); } - - -/// Infer the output shape of the PaddingOp, this is required by the shape inference -/// interface. -//ToDo -- shape should be the length of input * UpsamplingRate ie, Rhs -void PaddingOp::inferShapes() { - //get the shape of Lhs & rhs - //add the shape for each dimension - // auto tensorInput = llvm::cast(getLhs().getType()); - auto tensorInput = getInput().getType(); +/// Infer the output shape of the PaddingOp, this is required by the shape +/// inference interface. +// ToDo -- shape should be the length of input * UpsamplingRate ie, Rhs +void PaddingOp::inferShapes() { + // get the shape of Lhs & rhs + // add the shape for each dimension + // auto tensorInput = llvm::cast(getLhs().getType()); + auto tensorInput = getInput().getType(); auto shapeOfInput = tensorInput.getShape(); - // auto tensorUpsampling = getRhs().getType(); + // auto tensorUpsampling = getRhs().getType(); // auto shapeOfUpsampling = tensorUpsampling.getShape(); //shape is the length - - std::vector shapeForOutput ; + std::vector shapeForOutput; int64_t SecondValueInt = 1; - //To extract value from the SSA value: - //get the Operand - //convert it to ConstantOp - //convert it to corresponding elements attribute - //extract the value as float then convert to int + // To extract value from the SSA value: + // get the Operand + // convert it to ConstantOp + // convert it to corresponding elements attribute + // extract the value as float then convert to int DEBUG_PRINT_NO_ARGS(); Value padding3rdArg = getOperand(2); - dsp::ConstantOp constantOp2ndArg = padding3rdArg.getDefiningOp(); + dsp::ConstantOp constantOp2ndArg = + padding3rdArg.getDefiningOp(); DEBUG_PRINT_NO_ARGS(); - DenseElementsAttr constantRhsValue = constantOp2ndArg.getValue();; + DenseElementsAttr constantRhsValue = constantOp2ndArg.getValue(); + ; auto elements = constantRhsValue.getValues(); float SecondValue = elements[0].getValueAsDouble(); - SecondValueInt = (int64_t) SecondValue; - // llvm::errs() << "Upsampling: SamplingRate: " << SecondValueInt << " \n"; //downsamplingRate - - DEBUG_PRINT_NO_ARGS(); - for(size_t i=0; i < shapeOfInput.size() ; i++){ - double GetLenForOutput = static_cast(shapeOfInput[i] ) + SecondValueInt ; - int64_t OutlenInt = static_cast (GetLenForOutput); - DEBUG_PRINT_WITH_ARGS("PaddingLen= " , OutlenInt); + SecondValueInt = (int64_t)SecondValue; + // llvm::errs() << "Upsampling: SamplingRate: " << SecondValueInt << " \n"; + // //downsamplingRate + + DEBUG_PRINT_NO_ARGS(); + for (size_t i = 0; i < shapeOfInput.size(); i++) { + double GetLenForOutput = + static_cast(shapeOfInput[i]) + SecondValueInt; + int64_t OutlenInt = static_cast(GetLenForOutput); + DEBUG_PRINT_WITH_ARGS("PaddingLen= ", OutlenInt); shapeForOutput.push_back(OutlenInt); } - - mlir::TensorType manipulatedType = mlir::RankedTensorType::get(shapeForOutput, - getInput().getType().getElementType()); - // getResult().setType(getLhs().getType()); + mlir::TensorType manipulatedType = mlir::RankedTensorType::get( + shapeForOutput, getInput().getType().getElementType()); + + // getResult().setType(getLhs().getType()); getResult().setType(manipulatedType); - } +} -//get rank of Input & Upsampling -- make sure it is of rank 1 +// get rank of Input & Upsampling -- make sure it is of rank 1 mlir::LogicalResult PaddingOp::verify() { // auto inputType = llvm::dyn_cast(getOperand(0).getType()); - // auto samplingRateType = llvm::dyn_cast(getOperand(1).getType()); + // auto samplingRateType = + // llvm::dyn_cast(getOperand(1).getType()); // // auto resultType = llvm::dyn_cast(getType()); // auto inputRank = inputType.getRank(); // auto samplingRateRank = samplingRateType.getRank(); - // // llvm::errs() << "inputRank: " << inputRank << " samplingRateRank: " << samplingRateRank << "\n"; - // //once ensured only 1 rank from above -- also make sure there is just 1 elem - // if( inputRank != 1 || samplingRateRank != 0 ) + // // llvm::errs() << "inputRank: " << inputRank << " samplingRateRank: " << + // samplingRateRank << "\n"; + // //once ensured only 1 rank from above -- also make sure there is just 1 + // elem if( inputRank != 1 || samplingRateRank != 0 ) // { - // llvm::errs() << "inputRank: " << inputRank << " samplingRateRank: " << samplingRateRank << "\n"; - // return emitError() + // llvm::errs() << "inputRank: " << inputRank << " samplingRateRank: " << + // samplingRateRank << "\n"; return emitError() // << "expected rank of input is 1 & Upsampling is 0"; // } return mlir::success(); -} - +} //===----------------------------------------------------------------------===// // FIRFilterYSymmOptimizedOp //===----------------------------------------------------------------------===// -void FIRFilterYSymmOptimizedOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, - mlir::Value lhs, mlir::Value rhs) { +void FIRFilterYSymmOptimizedOp::build(mlir::OpBuilder &builder, + mlir::OperationState &state, + mlir::Value lhs, mlir::Value rhs) { state.addTypes(UnrankedTensorType::get(builder.getF64Type())); state.addOperands({lhs, rhs}); } - - -/// Infer the output shape of the FIRFilterYSymmOptimizedOp, this is required by the shape inference -/// interface. -//ToDo -- shape should be the length of Lhs + Rhs - 1 -void FIRFilterYSymmOptimizedOp::inferShapes() { - //get the shape of Lhs & rhs - //add the shape for each dimension - // auto tensorInput = llvm::cast(getLhs().getType()); - auto tensorInput = getLhs().getType(); +/// Infer the output shape of the FIRFilterYSymmOptimizedOp, this is required by +/// the shape inference interface. +// ToDo -- shape should be the length of Lhs + Rhs - 1 +void FIRFilterYSymmOptimizedOp::inferShapes() { + // get the shape of Lhs & rhs + // add the shape for each dimension + // auto tensorInput = llvm::cast(getLhs().getType()); + auto tensorInput = getLhs().getType(); auto shapeOfInput = tensorInput.getShape(); auto tensorFilter = getRhs().getType(); auto shapeOfFilter = tensorFilter.getShape(); - std::vector shapeForOutput ; + std::vector shapeForOutput; - for(size_t i=0; i < shapeOfInput.size() ; i++){ + for (size_t i = 0; i < shapeOfInput.size(); i++) { shapeForOutput.push_back(shapeOfInput[i] + shapeOfFilter[i] - 1); } - - mlir::TensorType manipulatedType = mlir::RankedTensorType::get(shapeForOutput, - getLhs().getType().getElementType()); - // getResult().setType(getLhs().getType()); + mlir::TensorType manipulatedType = mlir::RankedTensorType::get( + shapeForOutput, getLhs().getType().getElementType()); + + // getResult().setType(getLhs().getType()); getResult().setType(manipulatedType); } -//get rank of Input & Filter -- make sure it is of rank 1 +// get rank of Input & Filter -- make sure it is of rank 1 mlir::LogicalResult FIRFilterYSymmOptimizedOp::verify() { // auto inputType = llvm::dyn_cast(getOperand(0).getType()); - // auto filterType = llvm::dyn_cast(getOperand(1).getType()); + // auto filterType = + // llvm::dyn_cast(getOperand(1).getType()); // // auto resultType = llvm::dyn_cast(getType()); // auto inputRank = inputType.getRank(); @@ -2383,19 +2856,19 @@ mlir::LogicalResult FIRFilterYSymmOptimizedOp::verify() { // FFT1DRealSymmOp //===----------------------------------------------------------------------===// -void FFT1DRealSymmOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, - mlir::Value value) { - DEBUG_PRINT_NO_ARGS() ; +void FFT1DRealSymmOp::build(mlir::OpBuilder &builder, + mlir::OperationState &state, mlir::Value value) { + DEBUG_PRINT_NO_ARGS(); state.addTypes({UnrankedTensorType::get(builder.getF64Type())}); state.addOperands(value); - DEBUG_PRINT_NO_ARGS() ; + DEBUG_PRINT_NO_ARGS(); } void FFT1DRealSymmOp::inferShapes() { - //for each rank - //Get the shape/size of input - //output size = input_size - auto tensorInput = getInput().getType(); + // for each rank + // Get the shape/size of input + // output size = input_size + auto tensorInput = getInput().getType(); // getResult().setType(tensorInput); getResult().setType(tensorInput); // getResult(2).setType(tensorInput); @@ -2406,8 +2879,9 @@ mlir::LogicalResult FFT1DRealSymmOp::verify() { // auto inputType = llvm::dyn_cast(getOperand().getType()); // auto inputRank = inputType.getRank(); - // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " << alphaValueRank << "\n"; - // //once ensured only 1 rank from above -- + // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " << + // alphaValueRank << "\n"; + // //once ensured only 1 rank from above -- // if( inputRank != 1 ) // { // llvm::errs() << "inputRank: " << inputRank << "\n"; @@ -2421,31 +2895,32 @@ mlir::LogicalResult FFT1DRealSymmOp::verify() { // FFT1DImgConjSymmOp //===----------------------------------------------------------------------===// -void FFT1DImgConjSymmOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, - mlir::Value value) { - DEBUG_PRINT_NO_ARGS() ; +void FFT1DImgConjSymmOp::build(mlir::OpBuilder &builder, + mlir::OperationState &state, mlir::Value value) { + DEBUG_PRINT_NO_ARGS(); state.addTypes({UnrankedTensorType::get(builder.getF64Type())}); state.addOperands(value); - DEBUG_PRINT_NO_ARGS() ; + DEBUG_PRINT_NO_ARGS(); } void FFT1DImgConjSymmOp::inferShapes() { - //for each rank - //Get the shape/size of input - //output size = input_size - auto tensorInput = getInput().getType(); + // for each rank + // Get the shape/size of input + // output size = input_size + auto tensorInput = getInput().getType(); // getResult().setType(tensorInput); getResult().setType(tensorInput); // getResult(2).setType(tensorInput); } mlir::LogicalResult FFT1DImgConjSymmOp::verify() { - DEBUG_PRINT_NO_ARGS() ; + DEBUG_PRINT_NO_ARGS(); // auto inputType = llvm::dyn_cast(getOperand().getType()); // auto inputRank = inputType.getRank(); - // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " << alphaValueRank << "\n"; - // //once ensured only 1 rank from above -- + // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " << + // alphaValueRank << "\n"; + // //once ensured only 1 rank from above -- // if( inputRank != 1 ) // { // llvm::errs() << "inputRank: " << inputRank << "\n"; @@ -2455,6 +2930,815 @@ mlir::LogicalResult FFT1DImgConjSymmOp::verify() { return mlir::success(); } +//===----------------------------------------------------------------------===// +// ShiftRightOp +//===----------------------------------------------------------------------===// + +void ShiftRightOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, + mlir::Value lhs, mlir::Value rhs) { + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); + state.addOperands({lhs, rhs}); +} + +// mlir::ParseResult SubOp::parse(mlir::OpAsmParser &parser, +// mlir::OperationState &result) { +// return parseBinaryOp(parser, result); +// } + +// void SubOp::print(mlir::OpAsmPrinter &p) { printBinaryOp(p, *this); } + +// Infer the output shape of the ShiftRightOp, this is required by the shape +// inference. interface. +void ShiftRightOp::inferShapes() { getResult().setType(getLhs().getType()); } + +//===----------------------------------------------------------------------===// +// Conv2DOp +//===----------------------------------------------------------------------===// + +void Conv2DOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, + mlir::Value input, mlir::Value weight, mlir::Value bias) { + state.addTypes({UnrankedTensorType::get(builder.getF64Type())}); + state.addOperands({input, weight, bias}); +} +void Conv2DOp::inferShapes() { + auto inputType = llvm::dyn_cast(getInput().getType()); + auto kernelType = llvm::dyn_cast(getKernel().getType()); + + int64_t IH = inputType.getShape()[0]; + int64_t IW = inputType.getShape()[1]; + int64_t KH = kernelType.getShape()[0]; + int64_t KW = kernelType.getShape()[1]; + int64_t OH = IH - KH + 1, OW = IW - KW + 1; + + SmallVector dims = {OH, OW}; + getResult().setType(RankedTensorType::get(dims, inputType.getElementType())); +} + +mlir::LogicalResult Conv2DOp::verify() { + + auto inputType = llvm::dyn_cast(getInput().getType()); + auto kernelType = llvm::dyn_cast(getKernel().getType()); + auto biasType = llvm::dyn_cast(getBias().getType()); + + if (!inputType) { + llvm::errs() << "expect a ranked tensor for input, get " << getInput(); + return mlir::failure(); + } + if (!kernelType) { + llvm::errs() << "expect a ranked tensor for kernel, get " << getKernel(); + return mlir::failure(); + } + if (!biasType) { + llvm::errs() << "expect a one dimensional ranked tensor for bias, get " + << getBias(); + return mlir::failure(); + } + + auto inputRank = inputType.getRank(); + auto kernelRank = kernelType.getRank(); + + if (inputRank != 2) { + llvm::errs() << "expect 2 dimensional input, format N IH IW IC, get " + << inputRank; + return mlir::failure(); + } + if (kernelRank != 2) { + llvm::errs() << "expect 2 dimensional kernel, format OC KH KW IC."; + return mlir::failure(); + } + + if (inputType.getShape()[0] < kernelType.getShape()[0]) { + llvm::errs() << "input shape < kernel shape at 1st dimension"; + return mlir::failure(); + } + + if (inputType.getShape()[1] < kernelType.getShape()[1]) { + llvm::errs() << "input shape < kernel shape at 2nd dimension"; + return mlir::failure(); + } + + return mlir::success(); +} + +//===----------------------------------------------------------------------===// +// ThresholdUpOp +//===----------------------------------------------------------------------===// + +mlir::LogicalResult ThresholdUpOp::verify() { + int64_t returnOriginal = 5; + Value returnoriginal = getOperand(2); + dsp::ConstantOp constantOp1stArg = + returnoriginal.getDefiningOp(); + DenseElementsAttr constantLhsValue = constantOp1stArg.getValue(); + auto elements = constantLhsValue.getValues(); + float LenN = elements[0].getValueAsDouble(); + returnOriginal = (int64_t)LenN; + + // filter-order even not supported -- so making it odd + if (returnOriginal != 0 && returnOriginal != 1) { + return mlir::failure(); + } + return mlir::success(); +} + +void ThresholdUpOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, + mlir::Value input, mlir::Value threshold, + mlir::Value returnoriginal) { + state.addTypes({UnrankedTensorType::get(builder.getF64Type())}); + state.addOperands({input, threshold, returnoriginal}); +} +void ThresholdUpOp::inferShapes() { getResult().setType(getInput().getType()); } + +//===----------------------------------------------------------------------===// +// GenerateDTMFOp +//===----------------------------------------------------------------------===// + +mlir::LogicalResult GenerateDTMFOp::verify() { + auto digitType = llvm::dyn_cast(getDigit().getType()); + auto durationType = llvm::dyn_cast(getDuration().getType()); + auto fsType = llvm::dyn_cast(getFs().getType()); + + if (!digitType) { + return emitError() << "Digit must be a ranked tensor"; + return mlir::failure(); + } + if (!durationType) { + return emitError() << "Duration must be a ranked tensor"; + return mlir::failure(); + } + if (!fsType) { + return emitError() << "Frequency must be a ranked tensor"; + return mlir::failure(); + } + + auto digitNoOfElements = digitType.getNumElements(); + auto durationNoOfElements = durationType.getNumElements(); + auto fsNoOfElements = fsType.getNumElements(); + + if (digitNoOfElements != 1) { + return emitError() << "Digit must contain exactly one element"; + return mlir::failure(); + } + if (durationNoOfElements != 1) { + return emitError() << "Duration must contain exactly one element"; + return mlir::failure(); + } + if (fsNoOfElements != 1) { + return emitError() << "Frequency must contain exactly one element"; + return mlir::failure(); + } + + auto digit = getDigit(); + auto digitConst = digit.getDefiningOp(); + auto digitValue = digitConst.getValue(); + auto digitFloat = digitValue.getValues(); + auto dig = digitFloat[0].getValueAsDouble(); + + if (dig != 0 && dig != 1 && dig != 2 && dig != 3 && dig != 4 && dig != 5 && + dig != 6 && dig != 7 && dig != 8 && dig != 9) { + return emitError() << "Digit can only take one of the following values: " + "0,1,2,3,4,5,6,7,8,9"; + return mlir::failure(); + } + + return mlir::success(); +} + +void GenerateDTMFOp::build(mlir::OpBuilder &builder, + mlir::OperationState &state, mlir::Value digit, + mlir::Value duration, mlir::Value fs) { + state.addTypes({UnrankedTensorType::get(builder.getF64Type())}); + state.addOperands({digit, duration, fs}); +} +void GenerateDTMFOp::inferShapes() { + auto digitType = llvm::dyn_cast(getDigit().getType()); + auto durationType = llvm::dyn_cast(getDuration().getType()); + auto fsType = llvm::dyn_cast(getFs().getType()); + // auto digitElementType = digitType.getElementType(); + + auto duration = getDuration(); + auto durationConst = duration.getDefiningOp(); + auto durationValue = durationConst.getValue(); + auto durationFloat = durationValue.getValues(); + auto dur = durationFloat[0].getValueAsDouble(); + + auto fs = getFs(); + auto fsConst = fs.getDefiningOp(); + auto fsValue = fsConst.getValue(); + auto fsFloat = fsValue.getValues(); + auto freq = fsFloat[0].getValueAsDouble(); + + auto output = dur * freq; + auto outputShape = (int64_t)output; + + getResult().setType( + RankedTensorType::get(outputShape, digitType.getElementType())); +} + +//===----------------------------------------------------------------------===// +// FFTFreqOp +//===----------------------------------------------------------------------===// + +void FFTFreqOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, + mlir::Value length, mlir::Value distance) { + state.addTypes({UnrankedTensorType::get(builder.getF64Type())}); + state.addOperands({length, distance}); +} + +mlir::LogicalResult FFTFreqOp::verify() { return mlir::success(); } + +void FFTFreqOp::inferShapes() { + auto lengthType = llvm::dyn_cast(getLength().getType()); + auto length = getLength(); + auto lengthConst = length.getDefiningOp(); + auto lengthValue = lengthConst.getValue(); + auto lengthFloat = lengthValue.getValues(); + auto l = lengthFloat[0].getValueAsDouble(); + auto outputShape = (int64_t)l; + + getResult().setType( + RankedTensorType::get(outputShape, lengthType.getElementType())); +} + +//===----------------------------------------------------------------------===// +// FindDominantPeaksOp +//===----------------------------------------------------------------------===// + +void FindDominantPeaksOp::build(mlir::OpBuilder &builder, + mlir::OperationState &state, + mlir::Value frequencies, + mlir::Value magnitudes) { + state.addTypes({UnrankedTensorType::get(builder.getF64Type())}); + state.addOperands({frequencies, magnitudes}); +} + +void FindDominantPeaksOp::inferShapes() { + auto frequenciesType = + llvm::dyn_cast(getFrequencies().getType()); + SmallVector resultShape{2}; + auto resultType = + RankedTensorType::get(resultShape, frequenciesType.getElementType()); + getResult().setType(resultType); +} + +mlir::LogicalResult FindDominantPeaksOp::verify() { + auto frequenciesType = + llvm::dyn_cast(getFrequencies().getType()); + auto magnitudesType = + llvm::dyn_cast(getMagnitudes().getType()); + return mlir::success(); +} + +//===----------------------------------------------------------------------===// +// RecoverDTMFDigitOp +//===----------------------------------------------------------------------===// + +void RecoverDTMFDigitOp::build(mlir::OpBuilder &builder, + mlir::OperationState &state, + mlir::Value frequencies, mlir::Value freqPairs) { + state.addTypes({UnrankedTensorType::get(builder.getF64Type())}); + state.addOperands({frequencies, freqPairs}); +} + +void RecoverDTMFDigitOp::inferShapes() { + auto frequenciesType = + llvm::dyn_cast(getFrequencies().getType()); + SmallVector resultShape{1}; + auto resultType = + RankedTensorType::get(resultShape, frequenciesType.getElementType()); + getResult().setType(resultType); +} + +mlir::LogicalResult RecoverDTMFDigitOp::verify() { + auto frequenciesType = + llvm::dyn_cast(getFrequencies().getType()); + auto freqPairsType = + llvm::dyn_cast(getFreqPairs().getType()); + return mlir::success(); +} + +//===----------------------------------------------------------------------===// +// FFTCombineOp +//===----------------------------------------------------------------------===// + +void FFTCombineOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, + mlir::Value real, mlir::Value imag) { + state.addTypes({UnrankedTensorType::get(builder.getF64Type())}); + state.addOperands({real, imag}); +} + +mlir::LogicalResult FFTCombineOp::verify() { + auto realType = llvm::dyn_cast(getReal().getType()); + auto imagType = llvm::dyn_cast(getImag().getType()); + + auto realNoOfElements = realType.getNumElements(); + auto imagNoOfElements = imagType.getNumElements(); + + if (realNoOfElements != imagNoOfElements) { + return emitError() + << "Real and Imaginary parts should have same number of elements.\n"; + return mlir::failure(); + } + return mlir::success(); +} + +void FFTCombineOp::inferShapes() { getResult().setType(getReal().getType()); } + +//===----------------------------------------------------------------------===// +// GenerateVoiceSignatureOp +//===----------------------------------------------------------------------===// + +void GenerateVoiceSignatureOp::build(mlir::OpBuilder &builder, + mlir::OperationState &state, + mlir::Value f1, mlir::Value f2, + mlir::Value duration, mlir::Value fs) { + state.addTypes({UnrankedTensorType::get(builder.getF64Type())}); + state.addOperands({f1, f2, duration, fs}); +} + +mlir::LogicalResult GenerateVoiceSignatureOp::verify() { + auto f1Type = llvm::dyn_cast(getF1().getType()); + auto f2Type = llvm::dyn_cast(getF2().getType()); + auto durationType = llvm::dyn_cast(getDuration().getType()); + auto fsType = llvm::dyn_cast(getFs().getType()); + + if (!f1Type) { + return emitError() << "f1 must be a ranked tensor"; + return mlir::failure(); + } + if (!f2Type) { + return emitError() << "f2 must be a ranked tensor"; + return mlir::failure(); + } + if (!durationType) { + return emitError() << "Duration must be a ranked tensor"; + return mlir::failure(); + } + if (!fsType) { + return emitError() << "Frequency must be a ranked tensor"; + return mlir::failure(); + } + auto f1NoOfElements = f1Type.getNumElements(); + auto f2NoOfElements = f2Type.getNumElements(); + auto durationNoOfElements = durationType.getNumElements(); + auto fsNoOfElements = fsType.getNumElements(); + + if (f1NoOfElements != 1) { + return emitError() << "f1 must contain exactly one element"; + return mlir::failure(); + } + if (f2NoOfElements != 1) { + return emitError() << "f2 must contain exactly one element"; + return mlir::failure(); + } + if (durationNoOfElements != 1) { + return emitError() << "Duration must contain exactly one element"; + return mlir::failure(); + } + if (fsNoOfElements != 1) { + return emitError() << "Frequency must contain exactly one element"; + return mlir::failure(); + } + return mlir::success(); +} + +void GenerateVoiceSignatureOp::inferShapes() { + auto durationType = llvm::dyn_cast(getDuration().getType()); + auto fsType = llvm::dyn_cast(getFs().getType()); + // auto digitElementType = digitType.getElementType(); + + auto duration = getDuration(); + auto durationConst = duration.getDefiningOp(); + auto durationValue = durationConst.getValue(); + auto durationFloat = durationValue.getValues(); + auto dur = durationFloat[0].getValueAsDouble(); + + auto fs = getFs(); + auto fsConst = fs.getDefiningOp(); + auto fsValue = fsConst.getValue(); + auto fsFloat = fsValue.getValues(); + auto freq = fsFloat[0].getValueAsDouble(); + + auto output = dur * freq; + auto outputShape = (int64_t)output; + + getResult().setType( + RankedTensorType::get(outputShape, fsType.getElementType())); +} + +//===----------------------------------------------------------------------===// +// SqrtOp +//===----------------------------------------------------------------------===// + +void SqrtOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, + mlir::Value input) { + state.addTypes({UnrankedTensorType::get(builder.getF64Type())}); + state.addOperands({input}); +} + +mlir::LogicalResult SqrtOp::verify() { + auto inputType = llvm::dyn_cast(getInput().getType()); + return mlir::success(); +} + +void SqrtOp::inferShapes() { getResult().setType(getInput().getType()); } + +//===----------------------------------------------------------------------===// +// QamDemodulateOp +//===----------------------------------------------------------------------===// + +void QamDemodulateOp::build(mlir::OpBuilder &builder, + mlir::OperationState &state, mlir::Value real, + mlir::Value imagine) { + state.addTypes({UnrankedTensorType::get(builder.getF64Type())}); + state.addOperands({real, imagine}); +} + +void QamDemodulateOp::inferShapes() { + auto realType = llvm::dyn_cast(getReal().getType()); + auto realShape = realType.getShape(); + SmallVector outputShape(realShape); + + for (size_t i = 0; i < realShape.size(); ++i) { + outputShape[i] = realShape[i] * 2; + } + getResult().setType( + RankedTensorType::get(outputShape, realType.getElementType())); +} + +mlir::LogicalResult QamDemodulateOp::verify() { + auto realType = llvm::dyn_cast(getReal().getType()); + auto imagineType = llvm::dyn_cast(getImagine().getType()); + + return mlir::success(); +} + +//===----------------------------------------------------------------------===// +// QamModulateRealOp +//===----------------------------------------------------------------------===// + +void QamModulateRealOp::build(mlir::OpBuilder &builder, + mlir::OperationState &state, mlir::Value signal) { + auto tensorType = UnrankedTensorType::get(builder.getF64Type()); + state.addTypes({tensorType}); + + state.addOperands({signal}); +} +void QamModulateRealOp::inferShapes() { + auto signalType = llvm::dyn_cast(getSignal().getType()); + auto signalShape = signalType.getShape(); + + SmallVector outputShape(signalShape); + for (size_t i = 0; i < signalShape.size(); ++i) { + outputShape[i] = signalShape[i] / 2; + } + + getResult().setType( + RankedTensorType::get(outputShape, signalType.getElementType())); +} + +mlir::LogicalResult QamModulateRealOp::verify() { + + // auto signalType = llvm::dyn_cast(getSignal().getType()); + // + // if(!signalType) { + // llvm::errs() << "expect a ranked tensor for signal input, get " << + // getSignal(); return mlir::failure(); + //} + // + // auto signalRank = signalType.getRank(); + // + // if(signalRank != 1 ) { + // llvm::errs() << "expect 1 dimensional signal, get " << signalRank; + // return mlir::failure(); + //} + // + return mlir::success(); +} + +//===----------------------------------------------------------------------===// +// QamModulateImgOp +//===----------------------------------------------------------------------===// + +void QamModulateImgOp::build(mlir::OpBuilder &builder, + mlir::OperationState &state, mlir::Value signal) { + auto tensorType = UnrankedTensorType::get(builder.getF64Type()); + state.addTypes({tensorType}); + + state.addOperands({signal}); +} +void QamModulateImgOp::inferShapes() { + auto signalType = llvm::dyn_cast(getSignal().getType()); + auto signalShape = signalType.getShape(); + + SmallVector outputShape(signalShape); + for (size_t i = 0; i < signalShape.size(); ++i) { + outputShape[i] = signalShape[i] / 2; + } + + getResult().setType( + RankedTensorType::get(outputShape, signalType.getElementType())); +} + +mlir::LogicalResult QamModulateImgOp::verify() { + + return mlir::success(); +} + +//===----------------------------------------------------------------------===// +// BeamFormOp +//===----------------------------------------------------------------------===// + +void BeamFormOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, + int64_t antennas, int64_t freq, mlir::Value time, + mlir::Value weights) { + state.addTypes({UnrankedTensorType::get(builder.getF64Type())}); + state.addAttribute("antennas", builder.getI64IntegerAttr(antennas)); + state.addAttribute("freq", builder.getI64IntegerAttr(freq)); + state.addOperands({time, weights}); +} + +void BeamFormOp::inferShapes() { getResult().setType(getTime().getType()); } + +mlir::LogicalResult BeamFormOp::verify() { + return mlir::success(); +} + +//===----------------------------------------------------------------------===// +// SpaceModulateOp +//===----------------------------------------------------------------------===// + +void SpaceModulateOp::build(mlir::OpBuilder &builder, + mlir::OperationState &state, mlir::Value signals) { + state.addTypes({UnrankedTensorType::get(builder.getF64Type())}); + state.addOperands({signals}); +} + +void SpaceModulateOp::inferShapes() { + getResult().setType(getSignal().getType()); +} + +mlir::LogicalResult SpaceModulateOp::verify() { return mlir::success(); } + +//===----------------------------------------------------------------------===// +// SpaceDemodulateOp +//===----------------------------------------------------------------------===// + +void SpaceDemodulateOp::build(mlir::OpBuilder &builder, + mlir::OperationState &state, mlir::Value binary) { + state.addTypes({UnrankedTensorType::get(builder.getF64Type())}); + state.addOperands({binary}); +} + +void SpaceDemodulateOp::inferShapes() { + getResult().setType(getBinary().getType()); +} + +mlir::LogicalResult SpaceDemodulateOp::verify() { return mlir::success(); } + +//===----------------------------------------------------------------------===// +// SpaceDemodulateOp +//===----------------------------------------------------------------------===// + +void SpaceErrCorrectionOp::build(mlir::OpBuilder &builder, + mlir::OperationState &state, + mlir::Value signal) { + state.addTypes({UnrankedTensorType::get(builder.getF64Type())}); + state.addOperands({signal}); +} + +void SpaceErrCorrectionOp::inferShapes() { + getResult().setType(getSignal().getType()); +} + +mlir::LogicalResult SpaceErrCorrectionOp::verify() { return mlir::success(); } + +//===----------------------------------------------------------------------===// +// NormalizeOp +//===----------------------------------------------------------------------===// + +void NormalizeOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, + mlir::Value signal) { + state.addTypes({UnrankedTensorType::get(builder.getF64Type())}); + state.addOperands({signal}); +} + +void NormalizeOp::inferShapes() { getResult().setType(getSignal().getType()); } + +//===----------------------------------------------------------------------===// +// NormLMSFilterResponseOptimizeOp +//===----------------------------------------------------------------------===// + +void NormLMSFilterResponseOptimizeOp::build(mlir::OpBuilder &builder, + mlir::OperationState &state, + mlir::Value lhs, mlir::Value rhs, + mlir::Value mu, + mlir::Value filterLen) { + + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); + state.addOperands({lhs, rhs, mu, filterLen}); +} + +void NormLMSFilterResponseOptimizeOp::inferShapes() { + getResult().setType(getLhs().getType()); +} + +mlir::LogicalResult NormLMSFilterResponseOptimizeOp::verify() { + return mlir::success(); +} + +//===----------------------------------------------------------------------===// +// FIRFilterResSymmThresholdUpOptimizedOp +//===----------------------------------------------------------------------===// + +void FIRFilterResSymmThresholdUpOptimizedOp::build( + mlir::OpBuilder &builder, mlir::OperationState &state, mlir::Value lhs, + mlir::Value rhs, mlir::Value threshold, mlir::Value returnoriginal) { + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); + state.addOperands({lhs, rhs, threshold, returnoriginal}); +} + +/// Infer the output shape of the FIRFilterResSymmThresholdUpOptimizedOp, this +/// is required by the shape inference interface. +// ToDo -- shape should be the length of Lhs + Rhs - 1 +void FIRFilterResSymmThresholdUpOptimizedOp::inferShapes() { + // get the shape of Lhs & rh@id:github.copilot-chats + // add the shape for each dimension + // auto tensorInput = llvm::cast(getLhs().getType()); + auto tensorInput = getLhs().getType(); + auto shapeOfInput = tensorInput.getShape(); + + auto tensorFilter = getRhs().getType(); + auto shapeOfFilter = tensorFilter.getShape(); + std::vector shapeForOutput; + + for (size_t i = 0; i < shapeOfInput.size(); i++) { + shapeForOutput.push_back(shapeOfInput[i] + shapeOfFilter[i] - 1); + } + + mlir::TensorType manipulatedType = mlir::RankedTensorType::get( + shapeForOutput, getLhs().getType().getElementType()); + + // getResult().setType(getLhs().getType()); + getResult().setType(manipulatedType); +} + +//===----------------------------------------------------------------------===// +// FFTOp +//===----------------------------------------------------------------------===// + +void FFTOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, + mlir::Value lhs) { + state.addTypes({lhs.getType(), lhs.getType()}); + state.addOperands({lhs}); +} + +void FFTOp::inferShapes() { + getResult(0).setType(getLhs().getType()); + getResult(1).setType(getLhs().getType()); +} + +//===----------------------------------------------------------------------===// +// FFTAbsOp +//===----------------------------------------------------------------------===// + +void FFTAbsOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, + mlir::Value input) { + state.addTypes(input.getType()); + state.addOperands({input}); +} + +void FFTAbsOp::inferShapes() { getResult().setType(getInput().getType()); } + +//===----------------------------------------------------------------------===// +// DFTAbsOp +//===----------------------------------------------------------------------===// + +void DFTAbsOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, + mlir::Value input) { + state.addTypes(input.getType()); + state.addOperands({input}); +} + +void DFTAbsOp::inferShapes() { getResult().setType(getInput().getType()); } + +//===----------------------------------------------------------------------===// +// DFTAbsThresholdUpOp +//===----------------------------------------------------------------------===// + +void DFTAbsThresholdUpOp::build(mlir::OpBuilder &builder, + mlir::OperationState &state, mlir::Value input, + mlir::Value threshold, + mlir::Value returnoriginal) { + state.addTypes(input.getType()); + state.addOperands({input, threshold, returnoriginal}); +} + +void DFTAbsThresholdUpOp::inferShapes() { + getResult().setType(getInput().getType()); +} + +mlir::LogicalResult DFTAbsThresholdUpOp::verify() { + int64_t returnOriginal = 5; + Value returnoriginal = getOperand(2); + dsp::ConstantOp constantOp1stArg = + returnoriginal.getDefiningOp(); + DenseElementsAttr constantLhsValue = constantOp1stArg.getValue(); + auto elements = constantLhsValue.getValues(); + float LenN = elements[0].getValueAsDouble(); + returnOriginal = (int64_t)LenN; + + // filter-order even not supported -- so making it odd + if (returnOriginal != 0 && returnOriginal != 1) { + return mlir::failure(); + } + return mlir::success(); +} + + +//===----------------------------------------------------------------------===// + // CorrelateOp + //===----------------------------------------------------------------------===// + + void CorrelateOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, + mlir::Value lhs, mlir::Value rhs) { + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); + state.addOperands({lhs, rhs}); + } + + void CorrelateOp::inferShapes() { + auto tensorLhs = getLhs().getType(); + auto shapeOfLhs = tensorLhs.getShape(); + + std::vector shapeForOutput; + shapeForOutput.push_back(shapeOfLhs[0]*2-1); + + mlir::TensorType manipulatedType = mlir::RankedTensorType::get( + shapeForOutput, tensorLhs.getElementType()); + + getResult().setType(manipulatedType); +} + +//===----------------------------------------------------------------------===// +// SetSingleElemAtIdxOp +//===----------------------------------------------------------------------===// + +void SetSingleElemAtIdxOp::build(mlir::OpBuilder &builder, + mlir::OperationState &state, mlir::Value input, + mlir::Value indx, mlir::Value val) { + state.addTypes({UnrankedTensorType::get(builder.getF64Type())}); + state.addOperands({input, indx, val}); +} + +void SetSingleElemAtIdxOp::inferShapes() { + std::vector shapeForOutput; + + mlir::TensorType manipulatedType = mlir::RankedTensorType::get( + shapeForOutput, getInput().getType().getElementType()); + getResult().setType(manipulatedType); +} + + +//===----------------------------------------------------------------------===// +// Correl2MaxOptimizedOp +//===----------------------------------------------------------------------===// + + void Correl2MaxOptimizedOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, + mlir::Value lhs, mlir::Value rhs) { + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); + state.addOperands({lhs, rhs}); + } + + void Correl2MaxOptimizedOp::inferShapes() { + auto tensorInput = getLhs().getType(); + std::vector shapeForOutput; + + mlir::TensorType manipulatedType = mlir::RankedTensorType::get( + shapeForOutput, tensorInput.getElementType()); + + getResult().setType(manipulatedType); +} + + +//===----------------------------------------------------------------------===// +// LMSFilterResponse2GainOp +//===----------------------------------------------------------------------===// + + +void LMSFilterResponse2GainOp::build(mlir::OpBuilder &builder, + mlir::OperationState &state, mlir::Value lhs, + mlir::Value rhs, mlir::Value mu, + mlir::Value filterLen, mlir::Value gain) { + + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); + state.addOperands({lhs, rhs, mu, filterLen, gain}); +} + +void LMSFilterResponse2GainOp::inferShapes() { + getResult().setType(getLhs().getType()); +} + + //===----------------------------------------------------------------------===// // TableGen'd op method definitions diff --git a/mlir/examples/dsp/SimpleBlocks/mlir/LowerToAffineLoops.cpp b/mlir/examples/dsp/SimpleBlocks/mlir/LowerToAffineLoops.cpp index 537989becb84..d564d89b4a2e 100644 --- a/mlir/examples/dsp/SimpleBlocks/mlir/LowerToAffineLoops.cpp +++ b/mlir/examples/dsp/SimpleBlocks/mlir/LowerToAffineLoops.cpp @@ -12,9 +12,16 @@ // //===----------------------------------------------------------------------===// -#pragma GCC diagnostic push +#pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wall" +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/Linalg/IR/Linalg.h" +#include "mlir/Dialect/Math/IR/Math.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/BuiltinDialect.h" #include "mlir/IR/BuiltinOps.h" @@ -23,20 +30,14 @@ #include "mlir/IR/DialectRegistry.h" #include "mlir/IR/PatternMatch.h" #include "mlir/IR/ValueRange.h" +#include "mlir/Pass/Pass.h" #include "mlir/Support/LLVM.h" #include "mlir/Support/LogicalResult.h" #include "mlir/Support/TypeID.h" -#include "toy/Dialect.h" +#include "mlir/Transforms/DialectConversion.h" #include "toy/DebugConfig.h" +#include "toy/Dialect.h" #include "toy/Passes.h" - -#include "mlir/Dialect/Affine/IR/AffineOps.h" -#include "mlir/Dialect/Arith/IR/Arith.h" -#include "mlir/Dialect/Func/IR/FuncOps.h" -#include "mlir/Dialect/MemRef/IR/MemRef.h" -#include "mlir/Dialect/Math/IR/Math.h" -#include "mlir/Pass/Pass.h" -#include "mlir/Transforms/DialectConversion.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Sequence.h" @@ -47,9 +48,9 @@ #include #include -//For IntegerSet -#include "mlir/IR/IntegerSet.h" +// For IntegerSet #include "mlir/Dialect/SCF/IR/SCF.h" +#include "mlir/IR/IntegerSet.h" #include using namespace mlir; using namespace std; @@ -59,7 +60,6 @@ using namespace dsp; // ToyToAffine RewritePatterns //===----------------------------------------------------------------------===// - // #pragma warning(push, 0) /// Convert the given RankedTensorType into the corresponding MemRefType. static MemRefType convertTensorToMemRef(RankedTensorType type) { @@ -73,12 +73,15 @@ static Value insertAllocAndDealloc(MemRefType type, Location loc, // Make sure to allocate at the beginning of the block. auto *parentBlock = alloc->getBlock(); - alloc->moveBefore(&parentBlock->front()); //Abhinav-- move allock->block->front before alloc operation?? + alloc->moveBefore( + &parentBlock->front()); // Abhinav-- move allock->block->front before + // alloc operation?? // Make sure to deallocate this alloc at the end of the block. This is fine // as dsp functions have no control flow. auto dealloc = rewriter.create(loc, alloc); - dealloc->moveBefore(&parentBlock->back()); //move alloc->block->back before dealloc + dealloc->moveBefore( + &parentBlock->back()); // move alloc->block->back before dealloc return alloc; } @@ -94,29 +97,32 @@ static void lowerOpToLoops(Operation *op, ValueRange operands, PatternRewriter &rewriter, LoopIterationFn processIteration) { auto tensorType = llvm::cast((*op->result_type_begin())); - - // for (auto i : tensorType.getShape()) - // { - // llvm::errs() << "tensortype =" << i << "\n" ; - // } - // llvm::errs() << "tensortype.getElementType =" << tensorType.getElementType() << "\n" ; - // llvm::errs() << "op->getLoc = " << op->getLoc() << "\n"; //getDialect - // llvm::errs() << "op->getDialect = " << op->getDialect() << "\n"; - // llvm::errs() << "op->getName = " << op->getName() << "\n"; - // // llvm::errs() << "op->getType = " << op->getType() << "\n"; - // llvm::errs() << "op->getParentRegion = " << op->getParentRegion() << "\n"; - // llvm::errs() << "op->getParentOp = " << op->getParentOp()->getName() << "\n"; - - // llvm::errs() << "op->getNumOperands = " << op->getNumOperands() << "\n"; - // for (auto i : op->getOperands()) - // { - // llvm::errs() << "op->Operand = " << i << "\n"; - // } - - // llvm::errs() << "op->getParentOp = " << op->getParentOp()->getName() << "\n"; - // llvm::errs() << "op->getParentOp = " << op->getParentOp()->getName() << "\n"; - // llvm::errs() << "op->getParentOp = " << op->getParentOp()->getName() << "\n"; - + + // for (auto i : tensorType.getShape()) + // { + // llvm::errs() << "tensortype =" << i << "\n" ; + // } + // llvm::errs() << "tensortype.getElementType =" << + // tensorType.getElementType() << "\n" ; llvm::errs() << "op->getLoc = " << + // op->getLoc() << "\n"; //getDialect llvm::errs() << "op->getDialect = " << + // op->getDialect() << "\n"; llvm::errs() << "op->getName = " << op->getName() + // << "\n"; + // // llvm::errs() << "op->getType = " << op->getType() << "\n"; + // llvm::errs() << "op->getParentRegion = " << op->getParentRegion() << "\n"; + // llvm::errs() << "op->getParentOp = " << op->getParentOp()->getName() << + // "\n"; + + // llvm::errs() << "op->getNumOperands = " << op->getNumOperands() << "\n"; + // for (auto i : op->getOperands()) + // { + // llvm::errs() << "op->Operand = " << i << "\n"; + // } + + // llvm::errs() << "op->getParentOp = " << op->getParentOp()->getName() << + // "\n"; llvm::errs() << "op->getParentOp = " << op->getParentOp()->getName() + // << "\n"; llvm::errs() << "op->getParentOp = " << + // op->getParentOp()->getName() << "\n"; + auto loc = op->getLoc(); // Insert an allocation and deallocation for the result of this operation. @@ -156,22 +162,21 @@ static void lowerOpToLoops(Operation *op, ValueRange operands, rewriter.replaceOp(op, alloc); } - -#define TryJustAffineLoop 0 //working -#define TryAffineForAndAffineIf 0 // working +#define TryJustAffineLoop 0 // working +#define TryAffineForAndAffineIf 0 // working #define TryAffineIf2 0 -#define TryAffineMap 0 //working basic -- TO do --try with symbols -#define TrySumOfVector 0 //Working -#define TryMultiDimLoop 0 //Working -#define TryFIRFilter 1 -#define TryMultiDimForAndIf 0 // -#define TryMultiDimLoopAndAffineMap 0 //Working -#define TryMultiDimLoopAndAffineSet 0 //Working +#define TryAffineMap 0 // working basic -- TO do --try with symbols +#define TrySumOfVector 0 // Working +#define TryMultiDimLoop 0 // Working +#define TryFIRFilter 1 +#define TryMultiDimForAndIf 0 // +#define TryMultiDimLoopAndAffineMap 0 // Working +#define TryMultiDimLoopAndAffineSet 0 // Working static void lowerOpToLoopsFIR(Operation *op, ValueRange operands, - PatternRewriter &rewriter, - LoopIterationFn processIteration) { + PatternRewriter &rewriter, + LoopIterationFn processIteration) { auto tensorType = llvm::cast((*op->result_type_begin())); - + auto loc = op->getLoc(); // Insert an allocation and deallocation for the result of this operation. @@ -186,1000 +191,1044 @@ static void lowerOpToLoopsFIR(Operation *op, ValueRange operands, SmallVector steps(tensorType.getRank(), /*Value=*/1); // llvm::errs() << "tensorType.getRank() " << tensorType.getRank() << "\n"; - // cout << "tensorType.getRank() .. " << tensorType.getRank() << "\n"; - // for (auto i : tensorType.getRank()) - // { - // llvm::errs() << "tensorType.getRank() = " << i << "\n"; - // } - // for (auto i : tensorType.getShape()) - // { - // llvm::errs() << "tensorType.getShape() = " << i << "\n"; - // } - // llvm::errs() << "tensorType.getShape() " << tensorType.getShape() << "\n"; + // cout << "tensorType.getRank() .. " << tensorType.getRank() << "\n"; + // for (auto i : tensorType.getRank()) + // { + // llvm::errs() << "tensorType.getRank() = " << i << "\n"; + // } + // for (auto i : tensorType.getShape()) + // { + // llvm::errs() << "tensorType.getShape() = " << i << "\n"; + // } + // llvm::errs() << "tensorType.getShape() " << tensorType.getShape() << "\n"; - // affine::AffineForOp forOp = rewriter.create( - // loc, lowerBounds, tensorType.getShape() , steps, ValueRange()); - // mlir::IntegerSet set1 = mlir::IntegerSet::get(1, 0, map, {true}); + // affine::AffineForOp forOp = rewriter.create( + // loc, lowerBounds, tensorType.getShape() , steps, ValueRange()); + // mlir::IntegerSet set1 = mlir::IntegerSet::get(1, 0, map, {true}); - //create an affineFor - // affineFor It has one region containing its body & the region must contain a block terminating with affine.yield - //block has argument of index type - // + // create an affineFor + // affineFor It has one region containing its body & the region must contain + // a block terminating with affine.yield + // block has argument of index type + // #if TryJustAffineLoop - int64_t lb = 0 ; - int64_t ub = tensorType.getShape()[0]; - int64_t step = 1; + int64_t lb = 0; + int64_t ub = tensorType.getShape()[0]; + int64_t step = 1; + + // create AffineMap and set + // %1 = affine.load + // if ( %arg0 >= 5) ie, integerSet <(d0) : (d0 - 5 >= 0) > + AffineExpr dimExpr = + rewriter.getAffineDimExpr(0) - rewriter.getAffineConstantExpr(5); + // AffineMap map = AffineMap::get(1, 0, dimExpr); + // AffineMap map = AffineMap::get(1, 0 , rewriter.getAffineDimExpr(0) - 5); + IntegerSet set1 = IntegerSet::get(1, 0, {dimExpr}, {false}); + affine::AffineForOp forOp1 = + rewriter.create(loc, lb, ub, step); + + // inside the forOp body --> create the operations & then close the body + // OpBuilder::InsertionGuard guard(rewriter); + rewriter.setInsertionPointToStart(forOp1.getBody()); + + // start adding operations like a arith::constant = 100.0 to the body of + // forOp1 + // Inside the loop body: + + Value constant15 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(15)); + + llvm::errs() << "LINE = " << __LINE__ << "\n"; + auto storeOp = rewriter.create( + loc, constant15, alloc, forOp1.getInductionVar()); - //create AffineMap and set - // %1 = affine.load - // if ( %arg0 >= 5) ie, integerSet <(d0) : (d0 - 5 >= 0) > - AffineExpr dimExpr = rewriter.getAffineDimExpr(0) - rewriter.getAffineConstantExpr(5); - // AffineMap map = AffineMap::get(1, 0, dimExpr); - // AffineMap map = AffineMap::get(1, 0 , rewriter.getAffineDimExpr(0) - 5); - IntegerSet set1 = IntegerSet::get(1, 0, {dimExpr}, {false}); - affine::AffineForOp forOp1 = rewriter.create(loc, - lb, ub, step ); - - //inside the forOp body --> create the operations & then close the body - // OpBuilder::InsertionGuard guard(rewriter); - rewriter.setInsertionPointToStart(forOp1.getBody()); - - //start adding operations like a arith::constant = 100.0 to the body of forOp1 - // Inside the loop body: - - Value constant15 = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(15)); - - llvm::errs() << "LINE = " << __LINE__ << "\n"; - auto storeOp = rewriter.create(loc, constant15, alloc, forOp1.getInductionVar()); - -#endif +#endif #if TryAffineForAndAffineIf - int64_t lb = 0 ; - int64_t ub = tensorType.getShape()[0]; - int64_t step = 1; - - //create AffineMap and set - // %1 = affine.load - // if ( %arg0 >= 5) ie, integerSet <(d0) : (d0 - 5 >= 0) > - AffineExpr dimExpr = rewriter.getAffineDimExpr(0) - rewriter.getAffineConstantExpr(5); - // AffineExpr dimExpr2 = rewriter - // AffineMap map = AffineMap::get(1, 0, dimExpr); - // AffineMap map = AffineMap::get(1, 0 , rewriter.getAffineDimExpr(0) - 5); - IntegerSet set1 = IntegerSet::get(1, 0, {dimExpr}, {false}); - - //affine.if %arg1 >= 0 and %5 <= %1 - 1 - // n-k >= 0 && n-k <= len -1 //n = %arg0 , k = %arg1 - // %arg0 >= 0 and %arg0 - %arg1 - %sym1 + 1 <= 0 - - affine::AffineForOp forOp1 = rewriter.create(loc, - lb, ub, step ); - - //inside the forOp body --> create the operations & then close the body - // OpBuilder::InsertionGuard guard(rewriter); - rewriter.setInsertionPointToStart(forOp1.getBody()); - auto iv = forOp1.getInductionVar(); - //start adding operations like a arith::constant = 100.0 to the body of forOp1 - // Inside the loop body: - - // #set affine_set<(d0) : (d0 - 5 <= 0)> - // affine.for %arg0 = 0 to 10 { - // %3 = affine.if #set (%arg0) { - // %1 = arith.const 25 - // affine.yield %1 - // } - // else{ - // %2 = arith.const 15 - // affine.yield %2 - // } - // affine.store %3, alloc[%arg0] - // } - - // auto ifOp = rewriter.create( loc, set1 , ValueRange{iv} , false /*no else*/ ); - // auto ifOp = rewriter.create( loc, set1 , ValueRange{iv} , true /*no else*/ ); - - //use typeRange too: - Type floatType = rewriter.getF64Type(); - auto ifOp = rewriter.create( loc, TypeRange{ floatType },set1 , ValueRange{iv} , true /*no else*/ ); - - rewriter.setInsertionPointToStart(ifOp.getThenBlock()); - - FIRFilterResponseAdaptor firFilterOperands(operands); - - //load from the input - Value loadInput = rewriter.create(loc, firFilterOperands.getLhs(), iv); - Value constant25 = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(25)); - Value constsq25 = rewriter.create(loc, loadInput, constant25) ; - - rewriter.create(loc, constsq25 , alloc, iv); - rewriter.create(loc, ValueRange{constsq25}); - // rewriter.setInsertionPointToEnd(ifOp.getThenBlock()); + int64_t lb = 0; + int64_t ub = tensorType.getShape()[0]; + int64_t step = 1; + + // create AffineMap and set + // %1 = affine.load + // if ( %arg0 >= 5) ie, integerSet <(d0) : (d0 - 5 >= 0) > + AffineExpr dimExpr = + rewriter.getAffineDimExpr(0) - rewriter.getAffineConstantExpr(5); + // AffineExpr dimExpr2 = rewriter + // AffineMap map = AffineMap::get(1, 0, dimExpr); + // AffineMap map = AffineMap::get(1, 0 , rewriter.getAffineDimExpr(0) - 5); + IntegerSet set1 = IntegerSet::get(1, 0, {dimExpr}, {false}); + + // affine.if %arg1 >= 0 and %5 <= %1 - 1 + // n-k >= 0 && n-k <= len -1 //n = %arg0 , k = %arg1 + // %arg0 >= 0 and %arg0 - %arg1 - %sym1 + 1 <= 0 + + affine::AffineForOp forOp1 = + rewriter.create(loc, lb, ub, step); + + // inside the forOp body --> create the operations & then close the body + // OpBuilder::InsertionGuard guard(rewriter); + rewriter.setInsertionPointToStart(forOp1.getBody()); + auto iv = forOp1.getInductionVar(); + // start adding operations like a arith::constant = 100.0 to the body of + // forOp1 + // Inside the loop body: + + // #set affine_set<(d0) : (d0 - 5 <= 0)> + // affine.for %arg0 = 0 to 10 { + // %3 = affine.if #set (%arg0) { + // %1 = arith.const 25 + // affine.yield %1 + // } + // else{ + // %2 = arith.const 15 + // affine.yield %2 + // } + // affine.store %3, alloc[%arg0] + // } - rewriter.setInsertionPointToStart(ifOp.getElseBlock()); - Value loadInput2 = rewriter.create(loc, firFilterOperands.getRhs(), iv); - Value constant15 = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(15)); - Value elseResult = rewriter.create(loc, loadInput2, constant15) ; - rewriter.create(loc, elseResult , alloc, iv); - rewriter.create(loc, ValueRange{elseResult}); - // rewriter.setInsertionPointToEnd(ifOp.getElseBlock()); - rewriter.setInsertionPointAfter(ifOp); - ifOp->dump(); - // forOp1->dump(); - rewriter.create(loc, ifOp.getResult(0) , alloc, iv); - //getParentBlock then use - // rewriter.setInsertionPointToEnd(ifOp.getThenBlock()->getParentOp()); - // rewriter.setInsertionPointToEnd(ifOp->getBlock()); - // rewriter.setInsertionPoint(ifOp->getParentOp()); - // rewriter.create(loc, ValueRange{constant25}); - // rewriter.setInsertionPointToEnd(ifOp.getThenBlock()); - - // rewriter.setInsertionPointAfter(ifOp); - // rewriter.create(loc, ifOp.getResult(0) , alloc, iv); - - //try to add the affine.If condition - //create affine.If , - // use integer set to represent the condition - //check the AffineArgs - // affine.if operation contains two regions for the “then” and “else” clauses - //each region of affine.if must contain a single block with no args and terminated by affine.yield op - // if affine.if defines no values --> no need for affine.yield - - // affineIf.setConditional(set1, forOp1.getInductionVar()); - //start then "block" - // "then" block - - // Value constant15 = rewriter.create(loc, rewriter.getF64Type(), - // rewriter.getF64FloatAttr(15)); - - // rewriter.create(loc, ValueRange{constant15}); - // rewriter.setInsertionPointToEnd(ifOp.getThenBlock()); - //else block - // rewriter.setInsertionPointToStart(ifOp.getElseBlock()); - - // Set insertion point to the end of the "then" block - // rewriter.setInsertionPointAfter(ifOp.getThenBlock()->getTerminator()); - - - // rewriter.create(loc, constant25); - llvm::errs() << "LINE = " << __LINE__ << "\n"; - //Back to parentOp -- ifOp stops here - // rewriter.setInsertionPointAfter(ifOp); - - - //also use affine::AffineStore to store at the loop induction variable - // auto storeOp = rewriter.create(loc, ifOp.getResult(0), alloc, forOp1.getInductionVar()); - // auto storeOp = rewriter.create(loc, constant25, alloc, forOp1.getInductionVar()); - // Back to parentOp -- forOp1 - // rewriter.setInsertionPointAfter(storeOp); - - llvm::errs() << "LINE = " << __LINE__ << " xx\n"; - //create affine yield for the loop - // rewriter.create(loc); + // auto ifOp = rewriter.create( loc, set1 , ValueRange{iv} + // , false /*no else*/ ); auto ifOp = rewriter.create( + // loc, set1 , ValueRange{iv} , true /*no else*/ ); + + // use typeRange too: + Type floatType = rewriter.getF64Type(); + auto ifOp = rewriter.create( + loc, TypeRange{floatType}, set1, ValueRange{iv}, true /*no else*/); + + rewriter.setInsertionPointToStart(ifOp.getThenBlock()); + + FIRFilterResponseAdaptor firFilterOperands(operands); + + // load from the input + Value loadInput = + rewriter.create(loc, firFilterOperands.getLhs(), iv); + Value constant25 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(25)); + Value constsq25 = rewriter.create(loc, loadInput, constant25); + + rewriter.create(loc, constsq25, alloc, iv); + rewriter.create(loc, ValueRange{constsq25}); + // rewriter.setInsertionPointToEnd(ifOp.getThenBlock()); + + rewriter.setInsertionPointToStart(ifOp.getElseBlock()); + Value loadInput2 = + rewriter.create(loc, firFilterOperands.getRhs(), iv); + Value constant15 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(15)); + Value elseResult = + rewriter.create(loc, loadInput2, constant15); + rewriter.create(loc, elseResult, alloc, iv); + rewriter.create(loc, ValueRange{elseResult}); + // rewriter.setInsertionPointToEnd(ifOp.getElseBlock()); + rewriter.setInsertionPointAfter(ifOp); + ifOp->dump(); + // forOp1->dump(); + rewriter.create(loc, ifOp.getResult(0), alloc, iv); + // getParentBlock then use + // rewriter.setInsertionPointToEnd(ifOp.getThenBlock()->getParentOp()); + // rewriter.setInsertionPointToEnd(ifOp->getBlock()); + // rewriter.setInsertionPoint(ifOp->getParentOp()); + // rewriter.create(loc, ValueRange{constant25}); + // rewriter.setInsertionPointToEnd(ifOp.getThenBlock()); + + // rewriter.setInsertionPointAfter(ifOp); + // rewriter.create(loc, ifOp.getResult(0) , alloc, iv); + + // try to add the affine.If condition + // create affine.If , + // use integer set to represent the condition + // check the AffineArgs + // affine.if operation contains two regions for the “then” and “else” clauses + // each region of affine.if must contain a single block with no args and + // terminated by affine.yield op + // if affine.if defines no values --> no need for affine.yield + + // affineIf.setConditional(set1, forOp1.getInductionVar()); + // start then "block" + // "then" block + + // Value constant15 = rewriter.create(loc, + // rewriter.getF64Type(), + // rewriter.getF64FloatAttr(15)); + + // rewriter.create(loc, ValueRange{constant15}); + // rewriter.setInsertionPointToEnd(ifOp.getThenBlock()); + // else block + // rewriter.setInsertionPointToStart(ifOp.getElseBlock()); + + // Set insertion point to the end of the "then" block + // rewriter.setInsertionPointAfter(ifOp.getThenBlock()->getTerminator()); + + // rewriter.create(loc, constant25); + llvm::errs() << "LINE = " << __LINE__ << "\n"; + // Back to parentOp -- ifOp stops here + // rewriter.setInsertionPointAfter(ifOp); + + // also use affine::AffineStore to store at the loop induction variable + // auto storeOp = rewriter.create(loc, + // ifOp.getResult(0), alloc, forOp1.getInductionVar()); auto storeOp = + // rewriter.create(loc, constant25, alloc, + // forOp1.getInductionVar()); Back to parentOp -- forOp1 + // rewriter.setInsertionPointAfter(storeOp); + + llvm::errs() << "LINE = " << __LINE__ << " xx\n"; + // create affine yield for the loop + // rewriter.create(loc); #endif #if TryAffineIf2 - int64_t lb = 0 ; - int64_t ub = tensorType.getShape()[0]; - int64_t step = 1; - - //create AffineMap and set - // %1 = affine.load - // if ( %arg0 >= 5) ie, integerSet <(d0) : (d0 - 5 >= 0) > - AffineExpr dimExpr = rewriter.getAffineDimExpr(0) - rewriter.getAffineConstantExpr(5); - // AffineExpr dimExpr2 = rewriter - // AffineMap map = AffineMap::get(1, 0, dimExpr); - // AffineMap map = AffineMap::get(1, 0 , rewriter.getAffineDimExpr(0) - 5); - IntegerSet set1 = IntegerSet::get(1, 0, {dimExpr}, {false}); - - //affine.if %arg1 >= 0 and %5 <= %1 - 1 - // n-k >= 0 && n-k <= len -1 //n = %arg0 , k = %arg1 - // %arg0 >= 0 and %arg0 - %arg1 - %sym1 + 1 <= 0 - - affine::AffineForOp forOp1 = rewriter.create(loc, - lb, ub, step ); + int64_t lb = 0; + int64_t ub = tensorType.getShape()[0]; + int64_t step = 1; + + // create AffineMap and set + // %1 = affine.load + // if ( %arg0 >= 5) ie, integerSet <(d0) : (d0 - 5 >= 0) > + AffineExpr dimExpr = + rewriter.getAffineDimExpr(0) - rewriter.getAffineConstantExpr(5); + // AffineExpr dimExpr2 = rewriter + // AffineMap map = AffineMap::get(1, 0, dimExpr); + // AffineMap map = AffineMap::get(1, 0 , rewriter.getAffineDimExpr(0) - 5); + IntegerSet set1 = IntegerSet::get(1, 0, {dimExpr}, {false}); + + // affine.if %arg1 >= 0 and %5 <= %1 - 1 + // n-k >= 0 && n-k <= len -1 //n = %arg0 , k = %arg1 + // %arg0 >= 0 and %arg0 - %arg1 - %sym1 + 1 <= 0 + + affine::AffineForOp forOp1 = + rewriter.create(loc, lb, ub, step); + + // inside the forOp body --> create the operations & then close the body + // OpBuilder::InsertionGuard guard(rewriter); + rewriter.setInsertionPointToStart(forOp1.getBody()); + auto iv = forOp1.getInductionVar(); + // start adding operations like a arith::constant = 100.0 to the body of + // forOp1 + // Inside the loop body: + + // #set affine_set<(d0) : (d0 - 5 <= 0)> + // affine.for %arg0 = 0 to 10 { + // %3 = affine.if #set (%arg0) { + // %1 = arith.const 25 + // affine.yield %1 + // } + // affine.store %3, alloc[%arg0] + // } - //inside the forOp body --> create the operations & then close the body - // OpBuilder::InsertionGuard guard(rewriter); - rewriter.setInsertionPointToStart(forOp1.getBody()); - auto iv = forOp1.getInductionVar(); - //start adding operations like a arith::constant = 100.0 to the body of forOp1 - // Inside the loop body: - - // #set affine_set<(d0) : (d0 - 5 <= 0)> - // affine.for %arg0 = 0 to 10 { - // %3 = affine.if #set (%arg0) { - // %1 = arith.const 25 - // affine.yield %1 - // } - // affine.store %3, alloc[%arg0] - // } + // auto ifOp = rewriter.create( loc, set1 , ValueRange{iv} + // , false /*no else*/ ); + auto ifOp = rewriter.create(loc, set1, ValueRange{iv}, + true /*no else*/); + rewriter.setInsertionPointToStart(ifOp.getThenBlock()); + // rewriter.setInsertionPointToEnd(ifOp.getThenBlock()); + Value constant25 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(25)); + Value constsq25 = rewriter.create(loc, constant25, constant25); + + // ifOp.setR + // rewriter.create(loc, constant25 , alloc, iv); + // rewriter.setInsertionPointToStart(ifOp.getElseBlock()); + Value constant15 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(15)); + rewriter.create(loc, constsq25, alloc, iv); + + // getParentBlock then use + // rewriter.setInsertionPointToEnd(ifOp.getThenBlock()->getParentOp()); + // rewriter.setInsertionPointToEnd(ifOp->getBlock()); + rewriter.setInsertionPoint(ifOp->getParentOp()); + // rewriter.create(loc, ValueRange{constant25}); + // rewriter.setInsertionPointToEnd(ifOp.getThenBlock()); + + // rewriter.setInsertionPointAfter(ifOp); + // rewriter.create(loc, ifOp.getResult(0) , alloc, iv); + // rewriter.cre - // auto ifOp = rewriter.create( loc, set1 , ValueRange{iv} , false /*no else*/ ); - auto ifOp = rewriter.create( loc, set1 , ValueRange{iv} , true /*no else*/ ); - rewriter.setInsertionPointToStart(ifOp.getThenBlock()); - // rewriter.setInsertionPointToEnd(ifOp.getThenBlock()); - Value constant25 = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(25)); - Value constsq25 = rewriter.create(loc, constant25, constant25) ; - - // ifOp.setR - // rewriter.create(loc, constant25 , alloc, iv); - // rewriter.setInsertionPointToStart(ifOp.getElseBlock()); - Value constant15 = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(15)); - rewriter.create(loc, constsq25 , alloc, iv); - - - //getParentBlock then use - // rewriter.setInsertionPointToEnd(ifOp.getThenBlock()->getParentOp()); - // rewriter.setInsertionPointToEnd(ifOp->getBlock()); - rewriter.setInsertionPoint(ifOp->getParentOp()); - // rewriter.create(loc, ValueRange{constant25}); - // rewriter.setInsertionPointToEnd(ifOp.getThenBlock()); - - // rewriter.setInsertionPointAfter(ifOp); - // rewriter.create(loc, ifOp.getResult(0) , alloc, iv); - // rewriter.cre - #endif #if TryAffineMap - int64_t lb = 0 ; - int64_t ub = tensorType.getShape()[0] - 2; - int64_t step = 1; - - affine::AffineForOp forOp1 = rewriter.create(loc, - lb, ub, step ); - - - //inside the forOp body --> create the operations & then close the body - // OpBuilder::InsertionGuard guard(rewriter); - rewriter.setInsertionPointToStart(forOp1.getBody()); - auto iv = forOp1.getInductionVar(); - //start adding operations like a arith::constant = 100.0 to the body of forOp1 - // Inside the loop body: - //create affine for - // use affine-map expression for dimension then symbol then combination - // affine-map expression for dimension: affine_map (d0 , d1 + s0, d1 - s0) - // use affine map - // Define an affine map: #map2 = affine_map<(d0) -> (d0 + 2)> - auto symbol1 = tensorType.getShape()[0]; - AffineExpr indx = rewriter.getAffineDimExpr(0); - AffineExpr constantExpr = rewriter.getAffineConstantExpr(2); - AffineMap addMap = AffineMap::get(1, 0, symbol1 - indx); - auto outputIndex = rewriter.create(loc, addMap , iv); - - // Value constant15 = rewriter.create(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(15)); - - - //try replace constant15 ie, with input & filter - FIRFilterResponseOpAdaptor firOpAdaptor(operands); - - Value inputForFilter = rewriter.create(loc, firOpAdaptor.getLhs() , iv); - // Value inputForFilterMapped = rewriter.create(loc, firOpAdaptor.getLhs() , addMap, iv); - - Value impulseFilter = rewriter.create(loc, firOpAdaptor.getRhs() , iv); - - auto storeOp = rewriter.create(loc, inputForFilter, alloc,ValueRange{outputIndex}); - - - llvm::errs() << "LINE = " << __LINE__ << "\n"; + int64_t lb = 0; + int64_t ub = tensorType.getShape()[0] - 2; + int64_t step = 1; + + affine::AffineForOp forOp1 = + rewriter.create(loc, lb, ub, step); + + // inside the forOp body --> create the operations & then close the body + // OpBuilder::InsertionGuard guard(rewriter); + rewriter.setInsertionPointToStart(forOp1.getBody()); + auto iv = forOp1.getInductionVar(); + // start adding operations like a arith::constant = 100.0 to the body of + // forOp1 + // Inside the loop body: + // create affine for + // use affine-map expression for dimension then symbol then combination + // affine-map expression for dimension: affine_map (d0 , d1 + + // s0, d1 - s0) use affine map Define an affine map: #map2 = affine_map<(d0) + // -> (d0 + 2)> + auto symbol1 = tensorType.getShape()[0]; + AffineExpr indx = rewriter.getAffineDimExpr(0); + AffineExpr constantExpr = rewriter.getAffineConstantExpr(2); + AffineMap addMap = AffineMap::get(1, 0, symbol1 - indx); + auto outputIndex = rewriter.create(loc, addMap, iv); + + // Value constant15 = rewriter.create(loc, + // rewriter.getF64Type(), rewriter.getF64FloatAttr(15)); + + // try replace constant15 ie, with input & filter + FIRFilterResponseOpAdaptor firOpAdaptor(operands); + + Value inputForFilter = + rewriter.create(loc, firOpAdaptor.getLhs(), iv); + // Value inputForFilterMapped = rewriter.create(loc, + // firOpAdaptor.getLhs() , addMap, iv); + + Value impulseFilter = + rewriter.create(loc, firOpAdaptor.getRhs(), iv); + + auto storeOp = rewriter.create( + loc, inputForFilter, alloc, ValueRange{outputIndex}); + + llvm::errs() << "LINE = " << __LINE__ << "\n"; #endif #if TrySumOfVector - // here, we have to use iter - int64_t lb = 0 ; - int64_t ub = tensorType.getShape()[0] ; - int64_t step = 1; - - Value constant0 = rewriter.create(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); - - affine::AffineForOp forOp1 = rewriter.create(loc, - lb, ub, step , ValueRange{constant0} ); - - rewriter.setInsertionPointToStart(forOp1.getBody()); - auto iv = forOp1.getInductionVar(); - - - //inside the forOp body --> create the operations & then close the body - // OpBuilder::InsertionGuard guard(rewriter); - // Initial sum set to 0. - // %sum_0 = arith.constant 0.0 : f32 - // // iter_args binds initial values to the loop's region arguments. - // %sum = affine.for %i = 0 to 10 step 1 - // iter_args(%sum_iter = %sum_0) -> (f32) { - // %t = affine.load %buffer[%i] : memref<10xf32> - // %sum_next = arith.addf %sum_iter, %t : f32 - // // Yield current iteration sum to next iteration %sum_iter or to %sum - // // if final iteration. - // affine.yield %sum_next : f32 - // } - // return %sum : f32 - // } - - - // Inside the loop body: - - //try replace constant15 ie, with input & filter - FIRFilterResponseOpAdaptor firOpAdaptor(operands); - - Value inputForFilter = rewriter.create(loc, firOpAdaptor.getLhs() , iv); - - //Get iter_arg - auto getIterArg = forOp1.getBody()->getArgument(1); //forOp1.getIterOperands(); - Value sumNext = rewriter.create(loc, inputForFilter, getIterArg); - // Value sumNext = rewriter.create(loc, inputForFilter, constant0); - - //here, at indx 0 , o/p = in[0] - // at indx 1 , o/p = in[0] + in[1] & so on - //at indx last o/p[9] = sum of all input elements - auto storeOp = rewriter.create(loc, sumNext, alloc,ValueRange{iv}); - rewriter.create(loc, ValueRange{sumNext} ); - // rewriter.create(loc); - // auto result = forOp1.getResult(0); - llvm::errs() << "LINE = " << __LINE__ << "\n"; + // here, we have to use iter + int64_t lb = 0; + int64_t ub = tensorType.getShape()[0]; + int64_t step = 1; + + Value constant0 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + + affine::AffineForOp forOp1 = rewriter.create( + loc, lb, ub, step, ValueRange{constant0}); + + rewriter.setInsertionPointToStart(forOp1.getBody()); + auto iv = forOp1.getInductionVar(); + + // inside the forOp body --> create the operations & then close the body + // OpBuilder::InsertionGuard guard(rewriter); + // Initial sum set to 0. + // %sum_0 = arith.constant 0.0 : f32 + // // iter_args binds initial values to the loop's region arguments. + // %sum = affine.for %i = 0 to 10 step 1 + // iter_args(%sum_iter = %sum_0) -> (f32) { + // %t = affine.load %buffer[%i] : memref<10xf32> + // %sum_next = arith.addf %sum_iter, %t : f32 + // // Yield current iteration sum to next iteration %sum_iter or to %sum + // // if final iteration. + // affine.yield %sum_next : f32 + // } + // return %sum : f32 + // } + + // Inside the loop body: + + // try replace constant15 ie, with input & filter + FIRFilterResponseOpAdaptor firOpAdaptor(operands); + + Value inputForFilter = + rewriter.create(loc, firOpAdaptor.getLhs(), iv); + + // Get iter_arg + auto getIterArg = + forOp1.getBody()->getArgument(1); // forOp1.getIterOperands(); + Value sumNext = + rewriter.create(loc, inputForFilter, getIterArg); + // Value sumNext = rewriter.create(loc, inputForFilter, + // constant0); + + // here, at indx 0 , o/p = in[0] + // at indx 1 , o/p = in[0] + in[1] & so on + // at indx last o/p[9] = sum of all input elements + auto storeOp = rewriter.create(loc, sumNext, alloc, + ValueRange{iv}); + rewriter.create(loc, ValueRange{sumNext}); + // rewriter.create(loc); + // auto result = forOp1.getResult(0); + llvm::errs() << "LINE = " << __LINE__ << "\n"; #endif #if TryMultiDimLoop - // here, we have to use iter - int64_t lb = 0 ; - int64_t ub = tensorType.getShape()[0] ; - int64_t step = 1; - - Value constant0 = rewriter.create(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); - - affine::AffineForOp forOp1 = rewriter.create(loc, - lb, ub, step ); - - rewriter.setInsertionPointToStart(forOp1.getBody()); - auto iv = forOp1.getInductionVar(); - - //create loadOp - FIRFilterResponseOpAdaptor firOpAdaptor(operands); - - Value loadInput = rewriter.create(loc, firOpAdaptor.getLhs() , iv); - - //create another loop -- - affine::AffineForOp forOp2 = rewriter.create(loc, - lb, ub, step , ValueRange{loadInput} ); - - rewriter.setInsertionPointToStart(forOp2.getBody()); - auto iv2 = forOp2.getInductionVar(); - Value loadFilter = rewriter.create(loc, firOpAdaptor.getRhs() , iv2); - - // get iterArg - auto getIterArg = forOp2.getBody()->getArgument(1); - auto sumNext = rewriter.create(loc, loadInput, loadFilter); - - - - //store the result to output - // rewriter.create(loc, sumNext, alloc, iv ); - rewriter.create(loc, ValueRange{sumNext}); - rewriter.setInsertionPointAfter(forOp2); - rewriter.create(loc, forOp2.getResult(0), alloc, iv ); - // - //yield the - //inside the forOp body --> create the operations & then close the body - // OpBuilder::InsertionGuard guard(rewriter); - // Initial sum set to 0. - // affine.for %arg0 = 0 to 10 { - // %1 = affine.load input[%arg0] - // %4 = affine.for %arg1 = 0 to 10 step 1 - // iter_args(%sum_iter = %1) { - // %2 = affine.load filter[%arg1] - // %3 = arith.add sum_iter , %2 - // affine.yield %3 : f64 - // } - // affine.store %4, output[%arg0] - // } - - - // Inside the loop body: + // here, we have to use iter + int64_t lb = 0; + int64_t ub = tensorType.getShape()[0]; + int64_t step = 1; + + Value constant0 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + + affine::AffineForOp forOp1 = + rewriter.create(loc, lb, ub, step); + + rewriter.setInsertionPointToStart(forOp1.getBody()); + auto iv = forOp1.getInductionVar(); + + // create loadOp + FIRFilterResponseOpAdaptor firOpAdaptor(operands); + + Value loadInput = + rewriter.create(loc, firOpAdaptor.getLhs(), iv); + + // create another loop -- + affine::AffineForOp forOp2 = rewriter.create( + loc, lb, ub, step, ValueRange{loadInput}); + + rewriter.setInsertionPointToStart(forOp2.getBody()); + auto iv2 = forOp2.getInductionVar(); + Value loadFilter = + rewriter.create(loc, firOpAdaptor.getRhs(), iv2); + + // get iterArg + auto getIterArg = forOp2.getBody()->getArgument(1); + auto sumNext = rewriter.create(loc, loadInput, loadFilter); + + // store the result to output + // rewriter.create(loc, sumNext, alloc, iv ); + rewriter.create(loc, ValueRange{sumNext}); + rewriter.setInsertionPointAfter(forOp2); + rewriter.create(loc, forOp2.getResult(0), alloc, iv); + // + // yield the + // inside the forOp body --> create the operations & then close the body + // OpBuilder::InsertionGuard guard(rewriter); + // Initial sum set to 0. + // affine.for %arg0 = 0 to 10 { + // %1 = affine.load input[%arg0] + // %4 = affine.for %arg1 = 0 to 10 step 1 + // iter_args(%sum_iter = %1) { + // %2 = affine.load filter[%arg1] + // %3 = arith.add sum_iter , %2 + // affine.yield %3 : f64 + // } + // affine.store %4, output[%arg0] + // } - - llvm::errs() << "LINE = " << __LINE__ << "\n"; + // Inside the loop body: + llvm::errs() << "LINE = " << __LINE__ << "\n"; #endif #if TryMultiDimForAndIf - int64_t lb = 0 ; - int64_t ub = tensorType.getShape()[0]; - int64_t step = 1; - - //create AffineMap and set - // %1 = affine.load - // if ( %arg0 >= 5) ie, integerSet <(d0) : (d0 - 5 >= 0) > - - //affine.if %arg1 >= 0 and %5 <= %1 - 1 - // n-k >= 0 && n-k <= len -1 //n = %arg0 , k = %arg1 - // %arg0 >= 0 and %arg0 - %arg1 - %sym1 + 1 <= 0 - - affine::AffineForOp forOp1 = rewriter.create(loc, - lb, ub, step ); - - //inside the forOp body --> create the operations & then close the body - // OpBuilder::InsertionGuard guard(rewriter); - rewriter.setInsertionPointToStart(forOp1.getBody()); - auto iv = forOp1.getInductionVar(); - //start adding operations like a arith::constant = 100.0 to the body of forOp1 - // Inside the loop body: - - AffineExpr dimExpr = rewriter.getAffineDimExpr(0) - rewriter.getAffineConstantExpr(5); - IntegerSet set1 = IntegerSet::get(1, 0, {dimExpr}, {false}); + int64_t lb = 0; + int64_t ub = tensorType.getShape()[0]; + int64_t step = 1; + + // create AffineMap and set + // %1 = affine.load + // if ( %arg0 >= 5) ie, integerSet <(d0) : (d0 - 5 >= 0) > + + // affine.if %arg1 >= 0 and %5 <= %1 - 1 + // n-k >= 0 && n-k <= len -1 //n = %arg0 , k = %arg1 + // %arg0 >= 0 and %arg0 - %arg1 - %sym1 + 1 <= 0 + + affine::AffineForOp forOp1 = + rewriter.create(loc, lb, ub, step); + + // inside the forOp body --> create the operations & then close the body + // OpBuilder::InsertionGuard guard(rewriter); + rewriter.setInsertionPointToStart(forOp1.getBody()); + auto iv = forOp1.getInductionVar(); + // start adding operations like a arith::constant = 100.0 to the body of + // forOp1 + // Inside the loop body: + + AffineExpr dimExpr = + rewriter.getAffineDimExpr(0) - rewriter.getAffineConstantExpr(5); + IntegerSet set1 = IntegerSet::get(1, 0, {dimExpr}, {false}); + + // create 2nd loop + // use loop inductn variable for 2nd loop + // use if condition on 2nd loop inductn variable + // get the result of inner for loop and store at output + + affine::AffineForOp forOp2 = + rewriter.create(loc, lb, ub, step); + rewriter.setInsertionPointToStart(forOp2.getBody()); + auto iv2 = forOp2.getInductionVar(); + AffineExpr dimExpr2 = + rewriter.getAffineDimExpr(1) - rewriter.getAffineConstantExpr(6); + IntegerSet set2 = IntegerSet::get(1, 0, {dimExpr, dimExpr2}, {false}); + + auto ifOp = rewriter.create(loc, set2, ValueRange{iv}, + false /*no else*/); + rewriter.setInsertionPointToStart(ifOp.getThenBlock()); + Value constant25 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(25)); + Value resultFromInnerLoop = + rewriter.create(loc, constant25, constant25); + + // rewriter.setInsertionPointAfter(forOp2); + // rewriter.setInsertionPointToEnd(forOp2->getBlock()); + // rewriter.create(loc, constant25 , alloc, iv2); + // rewriter.create(loc, ValueRange{resultFromInnerLoop}); + // rewriter.setInsertionPointAfter(ifOp); + // rewriter.create(loc, ValueRange{resultFromInnerLoop}); + // rewriter.setInsertionPointAfter(forOp2); + rewriter.create(loc, constant25, alloc, iv); + // #set2 = affine_set<(d0, d1)[]: (d0 - 5 >= 0, d1- 5 >= 0 ) > + // affine.for %arg0 = 0 to 10 { + // %N = len(output) + // %4 = affine.for %arg1 = 0 to 10 { + // affine.if #set2(%arg0 , %arg1 )[%N] { + // %1 = const 5 + // %2 = const 3 + // %3 = arith.mulf %1 , %2 + // affine.yield %3 + // } + // } + // affine.store %4, alloc[%arg0] + // } + // rewriter.create(loc, ValueRange{constant25}); + // rewriter.setInsertionPointAfter(ifOp); + // rewriter.create(loc, ifOp.getResult(0) , alloc, iv); - // create 2nd loop - // use loop inductn variable for 2nd loop - // use if condition on 2nd loop inductn variable - // get the result of inner for loop and store at output + // try to add the affine.If condition + // create affine.If , + // use integer set to represent the condition + // check the AffineArgs + // affine.if operation contains two regions for the “then” and “else” clauses + // each region of affine.if must contain a single block with no args and + // terminated by affine.yield op + // if affine.if defines no values --> no need for affine.yield - affine::AffineForOp forOp2 = rewriter.create(loc, - lb, ub, step ); - rewriter.setInsertionPointToStart(forOp2.getBody()); - auto iv2 = forOp2.getInductionVar(); - AffineExpr dimExpr2 = rewriter.getAffineDimExpr(1) - rewriter.getAffineConstantExpr(6); - IntegerSet set2 = IntegerSet::get(1, 0, {dimExpr,dimExpr2}, {false}); + // affineIf.setConditional(set1, forOp1.getInductionVar()); + // start then "block" + // "then" block - auto ifOp = rewriter.create( loc, set2 , ValueRange{iv} , false /*no else*/ ); - rewriter.setInsertionPointToStart(ifOp.getThenBlock()); - Value constant25 = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(25)); - Value resultFromInnerLoop = rewriter.create(loc, constant25 , constant25); + // rewriter.create(loc, constant25); + llvm::errs() << "LINE = " << __LINE__ << "\n"; + // Back to parentOp -- ifOp stops here + // rewriter.setInsertionPointAfter(ifOp); - // rewriter.setInsertionPointAfter(forOp2); - // rewriter.setInsertionPointToEnd(forOp2->getBlock()); - // rewriter.create(loc, constant25 , alloc, iv2); - // rewriter.create(loc, ValueRange{resultFromInnerLoop}); - // rewriter.setInsertionPointAfter(ifOp); - // rewriter.create(loc, ValueRange{resultFromInnerLoop}); - // rewriter.setInsertionPointAfter(forOp2); - rewriter.create(loc, constant25 , alloc, iv); - // #set2 = affine_set<(d0, d1)[]: (d0 - 5 >= 0, d1- 5 >= 0 ) > - // affine.for %arg0 = 0 to 10 { - // %N = len(output) - // %4 = affine.for %arg1 = 0 to 10 { - // affine.if #set2(%arg0 , %arg1 )[%N] { - // %1 = const 5 - // %2 = const 3 - // %3 = arith.mulf %1 , %2 - // affine.yield %3 - // } - // } - // affine.store %4, alloc[%arg0] - // } - - - - // rewriter.create(loc, ValueRange{constant25}); - // rewriter.setInsertionPointAfter(ifOp); - // rewriter.create(loc, ifOp.getResult(0) , alloc, iv); - - //try to add the affine.If condition - //create affine.If , - // use integer set to represent the condition - //check the AffineArgs - // affine.if operation contains two regions for the “then” and “else” clauses - //each region of affine.if must contain a single block with no args and terminated by affine.yield op - // if affine.if defines no values --> no need for affine.yield - - // affineIf.setConditional(set1, forOp1.getInductionVar()); - //start then "block" - // "then" block - - // rewriter.create(loc, constant25); - llvm::errs() << "LINE = " << __LINE__ << "\n"; - //Back to parentOp -- ifOp stops here - // rewriter.setInsertionPointAfter(ifOp); - - llvm::errs() << "LINE = " << __LINE__ << " xx\n"; + llvm::errs() << "LINE = " << __LINE__ << " xx\n"; #endif #if TryMultiDimLoopAndAffineMap - // here, we have to use iter - int64_t lb = 0 ; - int64_t ub = tensorType.getShape()[0] ; - int64_t step = 1; - - Value constant0 = rewriter.create(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); - - affine::AffineForOp forOp1 = rewriter.create(loc, - lb, ub, step ); - - rewriter.setInsertionPointToStart(forOp1.getBody()); - auto iv = forOp1.getInductionVar(); - - //create loadOp - FIRFilterResponseOpAdaptor firOpAdaptor(operands); - - Value loadInput = rewriter.create(loc, firOpAdaptor.getLhs() , iv); - - //create another loop -- - affine::AffineForOp forOp2 = rewriter.create(loc, - lb, ub, step , ValueRange{loadInput} ); - - rewriter.setInsertionPointToStart(forOp2.getBody()); - auto iv2 = forOp2.getInductionVar(); - - //Use AffineMap for affine.load alloc_9[%arg0 - %arg1] - AffineExpr OuterIndx = rewriter.getAffineDimExpr(0); - AffineExpr InnerIndx = rewriter.getAffineDimExpr(1); - AffineMap addMap = AffineMap::get(2, 0, OuterIndx - InnerIndx); - // auto outputIndex = rewriter.create(loc, addMap , ValueRange{iv,iv2}); - - // Value constant15 = rewriter.create(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(15)); - - - // Value loadFilter = rewriter.create(loc, firOpAdaptor.getRhs() , addMap, ValueRange{iv2,iv}); - Value loadFilter = rewriter.create(loc, firOpAdaptor.getRhs() , addMap, ValueRange{iv,iv2}); - // Value loadFilter = rewriter.create(loc, firOpAdaptor.getRhs() , outputIndex); - // get iterArg - auto getIterArg = forOp2.getBody()->getArgument(1); - auto sumNext = rewriter.create(loc, getIterArg, loadFilter); - - - - //store the result to output - // rewriter.create(loc, sumNext, alloc, iv ); - rewriter.create(loc, ValueRange{sumNext}); - rewriter.setInsertionPointAfter(forOp2); - rewriter.create(loc, forOp2.getResult(0), alloc, iv ); - // - //yield the - //inside the forOp body --> create the operations & then close the body - // OpBuilder::InsertionGuard guard(rewriter); - // Initial sum set to 0. - // affine.for %arg0 = 0 to 10 { - // %1 = affine.load input[%arg0] - // %4 = affine.for %arg1 = 0 to 10 step 1 - // iter_args(%sum_iter = %1) { - // %2 = affine.load filter[%arg1] - // %3 = arith.add sum_iter , %2 - // affine.yield %3 : f64 - // } - // affine.store %4, output[%arg0] - // } - - - // Inside the loop body: + // here, we have to use iter + int64_t lb = 0; + int64_t ub = tensorType.getShape()[0]; + int64_t step = 1; + + Value constant0 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + + affine::AffineForOp forOp1 = + rewriter.create(loc, lb, ub, step); + + rewriter.setInsertionPointToStart(forOp1.getBody()); + auto iv = forOp1.getInductionVar(); + + // create loadOp + FIRFilterResponseOpAdaptor firOpAdaptor(operands); + + Value loadInput = + rewriter.create(loc, firOpAdaptor.getLhs(), iv); + + // create another loop -- + affine::AffineForOp forOp2 = rewriter.create( + loc, lb, ub, step, ValueRange{loadInput}); + + rewriter.setInsertionPointToStart(forOp2.getBody()); + auto iv2 = forOp2.getInductionVar(); + + // Use AffineMap for affine.load alloc_9[%arg0 - %arg1] + AffineExpr OuterIndx = rewriter.getAffineDimExpr(0); + AffineExpr InnerIndx = rewriter.getAffineDimExpr(1); + AffineMap addMap = AffineMap::get(2, 0, OuterIndx - InnerIndx); + // auto outputIndex = rewriter.create(loc, addMap , + // ValueRange{iv,iv2}); + + // Value constant15 = rewriter.create(loc, + // rewriter.getF64Type(), rewriter.getF64FloatAttr(15)); + + // Value loadFilter = rewriter.create(loc, firOpAdaptor.getRhs() + // , addMap, ValueRange{iv2,iv}); + Value loadFilter = rewriter.create(loc, firOpAdaptor.getRhs(), + addMap, ValueRange{iv, iv2}); + // Value loadFilter = rewriter.create(loc, firOpAdaptor.getRhs() + // , outputIndex); get iterArg + auto getIterArg = forOp2.getBody()->getArgument(1); + auto sumNext = rewriter.create(loc, getIterArg, loadFilter); + + // store the result to output + // rewriter.create(loc, sumNext, alloc, iv ); + rewriter.create(loc, ValueRange{sumNext}); + rewriter.setInsertionPointAfter(forOp2); + rewriter.create(loc, forOp2.getResult(0), alloc, iv); + // + // yield the + // inside the forOp body --> create the operations & then close the body + // OpBuilder::InsertionGuard guard(rewriter); + // Initial sum set to 0. + // affine.for %arg0 = 0 to 10 { + // %1 = affine.load input[%arg0] + // %4 = affine.for %arg1 = 0 to 10 step 1 + // iter_args(%sum_iter = %1) { + // %2 = affine.load filter[%arg1] + // %3 = arith.add sum_iter , %2 + // affine.yield %3 : f64 + // } + // affine.store %4, output[%arg0] + // } - - llvm::errs() << "LINE = " << __LINE__ << "\n"; + // Inside the loop body: + llvm::errs() << "LINE = " << __LINE__ << "\n"; #endif #if TryMultiDimLoopAndAffineSet - // here, we have to use iter - int64_t lb = 0 ; - int64_t ub = tensorType.getShape()[0] ; - int64_t step = 1; - - Value constant0 = rewriter.create(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); - - affine::AffineForOp forOp1 = rewriter.create(loc, - lb, ub, step ); - - rewriter.setInsertionPointToStart(forOp1.getBody()); - auto iv = forOp1.getInductionVar(); - - //create loadOp - FIRFilterResponseOpAdaptor firOpAdaptor(operands); - - Value loadInput = rewriter.create(loc, firOpAdaptor.getLhs() , iv); - - //create another loop -- - affine::AffineForOp forOp2 = rewriter.create(loc, - lb, ub, step , ValueRange{loadInput} ); - - rewriter.setInsertionPointToStart(forOp2.getBody()); - auto iv2 = forOp2.getInductionVar(); - - //Use AffineMap for affine.load alloc_9[%arg0 - %arg1] - AffineExpr OuterIndx = rewriter.getAffineDimExpr(0); - AffineExpr InnerIndx = rewriter.getAffineDimExpr(1); - AffineMap addMap = AffineMap::get(2, 0, OuterIndx - InnerIndx); - auto outputIndex = rewriter.create(loc, addMap , ValueRange{iv,iv2}); - - // Value constant15 = rewriter.create(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(15)); - AffineExpr dimExpr = OuterIndx - InnerIndx; - IntegerSet set1 = IntegerSet::get(2, 0, {dimExpr}, {false}); - - auto ifOp = rewriter.create( loc, set1 , ValueRange{iv,iv2} , false /*no else*/ ); - rewriter.setInsertionPointToStart(ifOp.getThenBlock()); - // Value loadFilter = rewriter.create(loc, firOpAdaptor.getRhs() , addMap, ValueRange{iv2,iv}); - Value loadFilter = rewriter.create(loc, firOpAdaptor.getRhs() , addMap, ValueRange{iv,iv2}); - // get iterArg - auto getIterArg = forOp2.getBody()->getArgument(1); - auto sumNext = rewriter.create(loc, loadFilter, loadFilter); - // rewriter.create(loc, sumNext, alloc, iv ); - rewriter.create(loc, ValueRange{sumNext}); - - //store the result to output - // rewriter.create(loc, sumNext, alloc, iv ); - rewriter.setInsertionPointAfter(ifOp); - rewriter.create(loc, ValueRange{sumNext}); - rewriter.setInsertionPointAfter(forOp2); - rewriter.create(loc, forOp2.getResult(0), alloc, iv ); - // - //yield the - //inside the forOp body --> create the operations & then close the body - // OpBuilder::InsertionGuard guard(rewriter); - // Initial sum set to 0. - // affine.for %arg0 = 0 to 10 { - // %1 = affine.load input[%arg0] - // %4 = affine.for %arg1 = 0 to 10 step 1 - // iter_args(%sum_iter = %1) { - // %2 = affine.load filter[%arg1] - // %3 = arith.add sum_iter , %2 - // affine.yield %3 : f64 - // } - // affine.store %4, output[%arg0] - // } - - - // Inside the loop body: + // here, we have to use iter + int64_t lb = 0; + int64_t ub = tensorType.getShape()[0]; + int64_t step = 1; + + Value constant0 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + + affine::AffineForOp forOp1 = + rewriter.create(loc, lb, ub, step); + + rewriter.setInsertionPointToStart(forOp1.getBody()); + auto iv = forOp1.getInductionVar(); + + // create loadOp + FIRFilterResponseOpAdaptor firOpAdaptor(operands); + + Value loadInput = + rewriter.create(loc, firOpAdaptor.getLhs(), iv); + + // create another loop -- + affine::AffineForOp forOp2 = rewriter.create( + loc, lb, ub, step, ValueRange{loadInput}); + + rewriter.setInsertionPointToStart(forOp2.getBody()); + auto iv2 = forOp2.getInductionVar(); + + // Use AffineMap for affine.load alloc_9[%arg0 - %arg1] + AffineExpr OuterIndx = rewriter.getAffineDimExpr(0); + AffineExpr InnerIndx = rewriter.getAffineDimExpr(1); + AffineMap addMap = AffineMap::get(2, 0, OuterIndx - InnerIndx); + auto outputIndex = + rewriter.create(loc, addMap, ValueRange{iv, iv2}); + + // Value constant15 = rewriter.create(loc, + // rewriter.getF64Type(), rewriter.getF64FloatAttr(15)); + AffineExpr dimExpr = OuterIndx - InnerIndx; + IntegerSet set1 = IntegerSet::get(2, 0, {dimExpr}, {false}); + + auto ifOp = rewriter.create( + loc, set1, ValueRange{iv, iv2}, false /*no else*/); + rewriter.setInsertionPointToStart(ifOp.getThenBlock()); + // Value loadFilter = rewriter.create(loc, firOpAdaptor.getRhs() + // , addMap, ValueRange{iv2,iv}); + Value loadFilter = rewriter.create(loc, firOpAdaptor.getRhs(), + addMap, ValueRange{iv, iv2}); + // get iterArg + auto getIterArg = forOp2.getBody()->getArgument(1); + auto sumNext = rewriter.create(loc, loadFilter, loadFilter); + // rewriter.create(loc, sumNext, alloc, iv ); + rewriter.create(loc, ValueRange{sumNext}); + + // store the result to output + // rewriter.create(loc, sumNext, alloc, iv ); + rewriter.setInsertionPointAfter(ifOp); + rewriter.create(loc, ValueRange{sumNext}); + rewriter.setInsertionPointAfter(forOp2); + rewriter.create(loc, forOp2.getResult(0), alloc, iv); + // + // yield the + // inside the forOp body --> create the operations & then close the body + // OpBuilder::InsertionGuard guard(rewriter); + // Initial sum set to 0. + // affine.for %arg0 = 0 to 10 { + // %1 = affine.load input[%arg0] + // %4 = affine.for %arg1 = 0 to 10 step 1 + // iter_args(%sum_iter = %1) { + // %2 = affine.load filter[%arg1] + // %3 = arith.add sum_iter , %2 + // affine.yield %3 : f64 + // } + // affine.store %4, output[%arg0] + // } - - llvm::errs() << "LINE = " << __LINE__ << "\n"; + // Inside the loop body: + llvm::errs() << "LINE = " << __LINE__ << "\n"; #endif #if TryFIRFilter - int64_t lb = 0 ; - int64_t ub = tensorType.getShape()[0]; - int64_t step = 1; - - affine::AffineForOp forOp1 = rewriter.create(loc, - lb, ub, step ); - rewriter.setInsertionPointToStart(forOp1.getBody()); - auto iv = forOp1.getInductionVar(); - - // Value sum0 = rewriter.create(loc, rewriter.getF64Type(), - // rewriter.getF64FloatAttr(0)); - //get filter len - // auto tensorTypeFilter = llvm::cast((*op->getOperand(1))); //operand_type_end - // auto tensorTypeFilter = llvm::cast((*op->operand_type_begin())); - auto operandIt = op->operand_type_begin(); - auto tensorTypeInput = llvm::cast(*operandIt); - int64_t ubForInput = tensorTypeInput.getShape()[0]; - //get second operand - operandIt = operandIt + 1; - - // auto tensorTypeFilter = llvm::cast((*op->operand_type_begin())); //operandIt - auto tensorTypeFilter = llvm::cast(*operandIt); - int64_t ubForFilter = tensorTypeFilter.getShape()[0]; - - // llvm::errs() << "ubForFilter= " << ubForFilter << "\n"; - //create a constant for sum - Value constant0 = rewriter.create(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); - affine::AffineForOp forOp2 = rewriter.create(loc, - lb, ubForFilter, step , ValueRange{constant0}); - rewriter.setInsertionPointToStart(forOp2.getBody()); - auto iv2 = forOp2.getInductionVar(); - - auto getIterArg = forOp2.getBody()->getArgument(1); //forOp1.getIterOperands(); - - // AffineExpr dimExpr = rewriter.getAffineDimExpr(0); - AffineExpr dimExpr2 = rewriter.getAffineDimExpr(0) - rewriter.getAffineDimExpr(1); - //n-k <= inputLen -1 or, k-n >= 1 - inputLen ie, k - n + inputLen - 1 >= 0 - AffineExpr ExprForUpperBoundCheck = rewriter.getAffineConstantExpr(ubForInput) + rewriter.getAffineDimExpr(1) - - rewriter.getAffineDimExpr(0) - rewriter.getAffineConstantExpr(1) ; - IntegerSet set2 = IntegerSet::get(2, 0, {dimExpr2,ExprForUpperBoundCheck}, {false, false}); - - //use typeRange too: - Type floatType = rewriter.getF64Type(); - // if n-k >= 0 - auto ifOp = rewriter.create( loc, TypeRange{floatType}, set2 , ValueRange{iv,iv2} , true /*else*/ ); - rewriter.setInsertionPointToStart(ifOp.getThenBlock()); - - AffineMap addMap = AffineMap::get(2, 0, dimExpr2); - // auto inputIndex = rewriter.create(loc, addMap , ValueRange{iv,iv2}); - - FIRFilterResponseOpAdaptor firOpAdaptor(operands); - Value loadInput = rewriter.create(loc, firOpAdaptor.getLhs(), addMap , ValueRange{iv,iv2}); + int64_t lb = 0; + int64_t ub = tensorType.getShape()[0]; + int64_t step = 1; + + affine::AffineForOp forOp1 = + rewriter.create(loc, lb, ub, step); + rewriter.setInsertionPointToStart(forOp1.getBody()); + auto iv = forOp1.getInductionVar(); + + // Value sum0 = rewriter.create(loc, rewriter.getF64Type(), + // rewriter.getF64FloatAttr(0)); + // get filter len + // auto tensorTypeFilter = llvm::cast((*op->getOperand(1))); + // //operand_type_end auto tensorTypeFilter = + // llvm::cast((*op->operand_type_begin())); + auto operandIt = op->operand_type_begin(); + auto tensorTypeInput = llvm::cast(*operandIt); + int64_t ubForInput = tensorTypeInput.getShape()[0]; + // get second operand + operandIt = operandIt + 1; + + // auto tensorTypeFilter = + // llvm::cast((*op->operand_type_begin())); //operandIt + auto tensorTypeFilter = llvm::cast(*operandIt); + int64_t ubForFilter = tensorTypeFilter.getShape()[0]; + + // llvm::errs() << "ubForFilter= " << ubForFilter << "\n"; + // create a constant for sum + Value constant0 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + affine::AffineForOp forOp2 = rewriter.create( + loc, lb, ubForFilter, step, ValueRange{constant0}); + rewriter.setInsertionPointToStart(forOp2.getBody()); + auto iv2 = forOp2.getInductionVar(); + + auto getIterArg = + forOp2.getBody()->getArgument(1); // forOp1.getIterOperands(); + + // AffineExpr dimExpr = rewriter.getAffineDimExpr(0); + AffineExpr dimExpr2 = + rewriter.getAffineDimExpr(0) - rewriter.getAffineDimExpr(1); + // n-k <= inputLen -1 or, k-n >= 1 - inputLen ie, k - n + inputLen - 1 >= 0 + AffineExpr ExprForUpperBoundCheck = + rewriter.getAffineConstantExpr(ubForInput) + + rewriter.getAffineDimExpr(1) - rewriter.getAffineDimExpr(0) - + rewriter.getAffineConstantExpr(1); + IntegerSet set2 = + IntegerSet::get(2, 0, {dimExpr2, ExprForUpperBoundCheck}, {false, false}); + + // use typeRange too: + Type floatType = rewriter.getF64Type(); + // if n-k >= 0 + auto ifOp = rewriter.create( + loc, TypeRange{floatType}, set2, ValueRange{iv, iv2}, true /*else*/); + rewriter.setInsertionPointToStart(ifOp.getThenBlock()); + + AffineMap addMap = AffineMap::get(2, 0, dimExpr2); + // auto inputIndex = rewriter.create(loc, addMap , + // ValueRange{iv,iv2}); + + FIRFilterResponseOpAdaptor firOpAdaptor(operands); + Value loadInput = rewriter.create(loc, firOpAdaptor.getLhs(), + addMap, ValueRange{iv, iv2}); + + rewriter.create(loc, ValueRange{loadInput}); + // else block + rewriter.setInsertionPointToStart(ifOp.getElseBlock()); + Value const0ForElse = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + rewriter.create(loc, ValueRange{const0ForElse}); + rewriter.setInsertionPointAfter(ifOp); + + // load filter and then mult and then sum + Value loadFilter = + rewriter.create(loc, firOpAdaptor.getRhs(), iv2); + // Value constant25 = rewriter.create(loc, + // rewriter.getF64Type(), + // rewriter.getF64FloatAttr(25)); + Value filterMulInput = + rewriter.create(loc, ifOp.getResult(0), loadFilter); + Value sumNext = + rewriter.create(loc, filterMulInput, getIterArg); + rewriter.create(loc, ValueRange{sumNext}); + // rewriter.setInsertionPointToEnd(forOp2->getBlock()); + rewriter.setInsertionPointAfter(forOp2); + rewriter.create(loc, forOp2.getResult(0), alloc, iv); + rewriter.setInsertionPointAfter(forOp1); + + // ifOp->dump(); + + // FIRFilterResponse code -- x[n] , h[n] + + // iterate for output + // start with sum=0 + // iterate for filter len + // check for input_indx must be within bounds + // load filter and input[indx] + // multiply them + // add this to sum + // update output with sum + + // inside the forOp body --> create the operations & then close the body + // OpBuilder::InsertionGuard guard(rewriter); + + // start adding operations like a arith::constant = 100.0 to the body of + // forOp1 + // Inside the loop body: + + // #set2 = affine_set<(d0, d1)[]: (d0 - 5 >= 0, d1- 5 >= 0 ) > + // affine.for %arg0 = 0 to 10 { + // %N = len(output) + // %4 = affine.for %arg1 = 0 to 10 { + // affine.if #set2(%arg0 , %arg1 )[%N] { + // %1 = const 5 + // %2 = const 3 + // %3 = arith.mulf %1 , %2 + // affine.yield %3 + // } + // } + // affine.store %4, alloc[%arg0] + // } - rewriter.create(loc, ValueRange{loadInput}); - //else block - rewriter.setInsertionPointToStart(ifOp.getElseBlock()); - Value const0ForElse = rewriter.create(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); - rewriter.create(loc, ValueRange{const0ForElse}); - rewriter.setInsertionPointAfter(ifOp); + // rewriter.create(loc, ValueRange{constant25}); + // rewriter.setInsertionPointAfter(ifOp); + // rewriter.create(loc, ifOp.getResult(0) , alloc, iv); - //load filter and then mult and then sum - Value loadFilter = rewriter.create(loc, firOpAdaptor.getRhs() , iv2); - // Value constant25 = rewriter.create(loc, rewriter.getF64Type(), - // rewriter.getF64FloatAttr(25)); - Value filterMulInput = rewriter.create(loc, ifOp.getResult(0) , loadFilter); - Value sumNext = rewriter.create(loc, filterMulInput, getIterArg); - rewriter.create(loc, ValueRange{sumNext}); - // rewriter.setInsertionPointToEnd(forOp2->getBlock()); - rewriter.setInsertionPointAfter(forOp2); - rewriter.create(loc, forOp2.getResult(0) , alloc, iv); - rewriter.setInsertionPointAfter(forOp1); + // try to add the affine.If condition + // create affine.If , + // use integer set to represent the condition + // check the AffineArgs + // affine.if operation contains two regions for the “then” and “else” clauses + // each region of affine.if must contain a single block with no args and + // terminated by affine.yield op + // if affine.if defines no values --> no need for affine.yield - // ifOp->dump(); - - - //FIRFilterResponse code -- x[n] , h[n] - - //iterate for output - //start with sum=0 - //iterate for filter len - //check for input_indx must be within bounds - //load filter and input[indx] - //multiply them - //add this to sum - //update output with sum - - - - //inside the forOp body --> create the operations & then close the body - // OpBuilder::InsertionGuard guard(rewriter); - - //start adding operations like a arith::constant = 100.0 to the body of forOp1 - // Inside the loop body: - - - // #set2 = affine_set<(d0, d1)[]: (d0 - 5 >= 0, d1- 5 >= 0 ) > - // affine.for %arg0 = 0 to 10 { - // %N = len(output) - // %4 = affine.for %arg1 = 0 to 10 { - // affine.if #set2(%arg0 , %arg1 )[%N] { - // %1 = const 5 - // %2 = const 3 - // %3 = arith.mulf %1 , %2 - // affine.yield %3 - // } - // } - // affine.store %4, alloc[%arg0] - // } - - - - // rewriter.create(loc, ValueRange{constant25}); - // rewriter.setInsertionPointAfter(ifOp); - // rewriter.create(loc, ifOp.getResult(0) , alloc, iv); - - //try to add the affine.If condition - //create affine.If , - // use integer set to represent the condition - //check the AffineArgs - // affine.if operation contains two regions for the “then” and “else” clauses - //each region of affine.if must contain a single block with no args and terminated by affine.yield op - // if affine.if defines no values --> no need for affine.yield - - // affineIf.setConditional(set1, forOp1.getInductionVar()); - //start then "block" - // "then" block - - // rewriter.create(loc, constant25); - // llvm::errs() << "LINE = " << __LINE__ << "\n"; - //Back to parentOp -- ifOp stops here - // rewriter.setInsertionPointAfter(ifOp); - - // llvm::errs() << "LINE = " << __LINE__ << " xx\n"; + // affineIf.setConditional(set1, forOp1.getInductionVar()); + // start then "block" + // "then" block + // rewriter.create(loc, constant25); + // llvm::errs() << "LINE = " << __LINE__ << "\n"; + // Back to parentOp -- ifOp stops here + // rewriter.setInsertionPointAfter(ifOp); + // llvm::errs() << "LINE = " << __LINE__ << " xx\n"; #endif - // Terminate the loop body with affine.yield. - // rewriter.create(loc); - + // Terminate the loop body with affine.yield. + // rewriter.create(loc); // Replace this operation with the generated alloc. rewriter.replaceOp(op, alloc); } -namespace { - //===----------------------------------------------------------------------===// // ToyToAffine RewritePatterns: FFT1DImg operations //===----------------------------------------------------------------------===// - struct FFT1DImgConjSymmOpLowering : public ConversionPattern { FFT1DImgConjSymmOpLowering(MLIRContext *ctx) - : ConversionPattern(dsp::FFT1DImgConjSymmOp::getOperationName(), 1, ctx) {} + : ConversionPattern(dsp::FFT1DImgConjSymmOp::getOperationName(), 1, ctx) { + } LogicalResult matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { auto loc = op->getLoc(); - - //Pseudo-code: - // y[k] = y_real[k] + j *y_img[k] - // y_img = sumOver_n(x[n]*sin[2*pi * k *n/N ] * -1 - - // For k=0: - //y[0] = 0 - - // for k=1 to (N+1)/2 - // sum = 0 - // for n=0 to N - // sum = sum + x[n] * sin(2*pi*k*n/N) - //y[k] = -1 * sum - //y[N-k] = sum - //init output mem for y_real & y_img as 0 - //iterate for output from k=0 to last - //iterate for all x from n=0 to last - //perform the calculations : ie x[n] * cos[2*pi * k *n/N ] and sum and store them at y[k] - // - // replace this upsampling op with the output_mem_allocation op - - DEBUG_PRINT_NO_ARGS() ; - - //output for result type - auto tensorType = llvm::cast((*op->result_type_begin())); - - //allocation & deallocation for the result of this operation + + // Pseudo-code: + // y[k] = y_real[k] + j *y_img[k] + // y_img = sumOver_n(x[n]*sin[2*pi * k *n/N ] * -1 + + // For k=0: + // y[0] = 0 + + // for k=1 to (N+1)/2 + // sum = 0 + // for n=0 to N + // sum = sum + x[n] * sin(2*pi*k*n/N) + // y[k] = -1 * sum + // y[N-k] = sum + // init output mem for y_real & y_img as 0 + // iterate for output from k=0 to last + // iterate for all x from n=0 to last + // perform the calculations : ie x[n] * cos[2*pi * k *n/N ] and sum and + // store them at y[k] + // + // replace this upsampling op with the output_mem_allocation op + + DEBUG_PRINT_NO_ARGS(); + + // output for result type + auto tensorType = llvm::cast((*op->result_type_begin())); + + // allocation & deallocation for the result of this operation auto memRefType = convertTensorToMemRef(tensorType); // auto memRefType2 = convertTensorToMemRef(tensorType1); auto alloc_img = insertAllocAndDealloc(memRefType, loc, rewriter); - //construct affine loops for the input - SmallVector lowerBounds(tensorType.getRank(), /*Value*/0); - SmallVector steps(tensorType.getRank(), /*Value=*/1); + // construct affine loops for the input + SmallVector lowerBounds(tensorType.getRank(), /*Value*/ 0); + SmallVector steps(tensorType.getRank(), /*Value=*/1); // affine.for %y = 0 to 4 { - // affine.store %cst_3, %alloc_real[%y] : memref<4xf64> - // affine.store %cst_3, %alloc_img[%y] : memref<4xf64> - // } - Value constant0 = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(0)); - + // affine.store %cst_3, %alloc_real[%y] : memref<4xf64> + // affine.store %cst_3, %alloc_img[%y] : memref<4xf64> + // } + Value constant0 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); - //For loop -- iterate from 1 to last - int64_t lb = 0 ; + // For loop -- iterate from 1 to last + int64_t lb = 0; int64_t ub = tensorType.getShape()[0]; - int64_t ubBy2 = (ub+1)/2; + int64_t ubBy2 = (ub + 1) / 2; int64_t step = 1; - // affine::AffineForOp forOp1 = rewriter.create(loc, lb, ub, step); - // auto iv = forOp1.getInductionVar(); + // affine::AffineForOp forOp1 = rewriter.create(loc, lb, ub, + // step); auto iv = forOp1.getInductionVar(); // rewriter.setInsertionPointToStart(forOp1.getBody()); - // rewriter.create(loc, constant0, alloc_img, ValueRange{iv}); - // rewriter.setInsertionPointAfter(forOp1); - DEBUG_PRINT_NO_ARGS() ; - //for k=0 + // rewriter.create(loc, constant0, alloc_img, + // ValueRange{iv}); rewriter.setInsertionPointAfter(forOp1); + DEBUG_PRINT_NO_ARGS(); + // for k=0 Value Indx0 = rewriter.create(loc, 0); - rewriter.create(loc, constant0, alloc_img, ValueRange{Indx0}); + rewriter.create(loc, constant0, alloc_img, + ValueRange{Indx0}); - //loop for Y - affine::AffineForOp forOpY = rewriter.create(loc, lb+1, ubBy2, step); + // loop for Y + affine::AffineForOp forOpY = + rewriter.create(loc, lb + 1, ubBy2, step); auto ivY = forOpY.getInductionVar(); rewriter.setInsertionPointToStart(forOpY.getBody()); - - //loop for X - affine::AffineForOp forOpX = rewriter.create(loc, lb, ub, step, ValueRange{constant0}); + // loop for X + affine::AffineForOp forOpX = + rewriter.create(loc, lb, ub, step, ValueRange{constant0}); auto ivX = forOpX.getInductionVar(); - auto getIterArg = forOpX.getBody()->getArgument(1); + auto getIterArg = forOpX.getBody()->getArgument(1); rewriter.setInsertionPointToStart(forOpX.getBody()); - //load from X, & y1 & y2 + // load from X, & y1 & y2 FFT1DImgConjSymmOpAdaptor fft1DImgConjSymmAdaptor(operands); - Value inputX = rewriter.create(loc, fft1DImgConjSymmAdaptor.getInput(), ValueRange{ivX}); - // Value loadYImg = rewriter.create(loc, alloc_img, ValueRange{ivY}); + Value inputX = rewriter.create( + loc, fft1DImgConjSymmAdaptor.getInput(), ValueRange{ivX}); + // Value loadYImg = rewriter.create(loc, alloc_img, + // ValueRange{ivY}); + + // convert index to f64 + Value IndxY = rewriter.create( + loc, rewriter.getIntegerType(32), ivY); + Value k = + rewriter.create(loc, rewriter.getF64Type(), IndxY); - //convert index to f64 - Value IndxY = rewriter.create(loc, rewriter.getIntegerType(32), ivY); - Value k = rewriter.create(loc, rewriter.getF64Type(), IndxY); + Value IndxX = rewriter.create( + loc, rewriter.getIntegerType(32), ivX); + Value i = + rewriter.create(loc, rewriter.getF64Type(), IndxX); - Value IndxX = rewriter.create(loc, rewriter.getIntegerType(32), ivX); - Value i = rewriter.create(loc, rewriter.getF64Type(), IndxX); + // get 2*pi * k * i / N + Value muli_k = rewriter.create(loc, k, i); - //get 2*pi * k * i / N - Value muli_k = rewriter.create(loc, k , i); - - Value const2pi = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(6.28318530718)); - Value mul2piKI = rewriter.create(loc, const2pi , muli_k); + Value const2pi = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(6.28318530718)); + Value mul2piKI = rewriter.create(loc, const2pi, muli_k); // getOperand().getType() - // auto inputTensorType = llvm::cast(op->getOperand(0).getType()); - float LengthOfInput = (float) ub; - Value N = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(LengthOfInput)); + // auto inputTensorType = + // llvm::cast(op->getOperand(0).getType()); + float LengthOfInput = (float)ub; + Value N = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(LengthOfInput)); // Value N = inputTensorType.getShape()[0]; - Value divIndxByN = rewriter.create(loc, mul2piKI, N ) ; - + Value divIndxByN = rewriter.create(loc, mul2piKI, N); + // Img part = -1 * Sum(x[i] * sin(div) ) Value GetSin = rewriter.create(loc, divIndxByN); - Value xMulSin = rewriter.create(loc, inputX , GetSin); - Value imgSum = rewriter.create(loc, getIterArg ,xMulSin) ; + Value xMulSin = rewriter.create(loc, inputX, GetSin); + Value imgSum = rewriter.create(loc, getIterArg, xMulSin); rewriter.create(loc, ValueRange{imgSum}); rewriter.setInsertionPointAfter(forOpX); - - //store imgSum at y[k] - rewriter.create(loc, forOpX.getResult(0), alloc_img, ValueRange{ivY}); - - //store -1 * imgSum at y[N-k] - AffineExpr ExprNminusK = rewriter.getAffineConstantExpr(ub) - rewriter.getAffineDimExpr(0); - AffineMap mapNminusK = AffineMap::get(1, 0 , ExprNminusK); - Value constMinus1 = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(-1)); - Value NegImgSum = rewriter.create(loc, constMinus1 , forOpX.getResult(0)); - - rewriter.create(loc, NegImgSum, alloc_img, mapNminusK, ValueRange{ivY}); + + // store imgSum at y[k] + rewriter.create(loc, forOpX.getResult(0), alloc_img, + ValueRange{ivY}); + + // store -1 * imgSum at y[N-k] + AffineExpr ExprNminusK = + rewriter.getAffineConstantExpr(ub) - rewriter.getAffineDimExpr(0); + AffineMap mapNminusK = AffineMap::get(1, 0, ExprNminusK); + Value constMinus1 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(-1)); + Value NegImgSum = + rewriter.create(loc, constMinus1, forOpX.getResult(0)); + + rewriter.create(loc, NegImgSum, alloc_img, mapNminusK, + ValueRange{ivY}); rewriter.setInsertionPointAfter(forOpY); - //debug - // forOpX->dump(); - // forOpY->dump(); - // affine.for %y = 0 to 4 { - // affine.store %cst_3, %alloc_real[%y] : memref<4xf64> - // affine.store %cst_3, %alloc_img[%y] : memref<4xf64> - // } - - - // affine.for %y = 0 to 4 { - // // %0 = affine.load %alloc_3[%arg0] : memref<4xf64> - // // affine.store %0, %alloc_real[%arg0] : memref<4xf64> - // affine.for %x = 0 to 4 { - // // CAcluations - // %1 = affine.load %alloc_3[%x] : memref<4xf64> - // %2 = affine.load %alloc_real[%y] : memref<4xf64> - // %3 = affine.load %alloc_img[%y] : memref<4xf64> - // // index cast for multiply - // %4 = arith.index_castui %y : index to i32 - // %k = arith.uitofp %4 : i32 to f64 - // %6 = arith.index_castui %x : index to i32 - // %i = arith.uitofp %6 : i32 to f64 - // // %8 = arith.index_castui %arg3 : index to i32 - // // %9 = arith.uitofp %8 : i32 to f64 - // // %10 = arith.index_castui %arg4 : index to i32 - // // %11 = arith.uitofp %10 : i32 to f64 - - // %mul_1 = arith.mulf %i, %k : f64 - // %mul = arith.mulf %mul_1, %cst_2pi : f64 - // // ixk / N - // %div = arith.divf %mul, %N : f64 - // // cos of the above - // %res_cos = math.cos %div : f64 - // // %16 = arith.addf %14, %15 : f64 - // // %res_sin = arith.mulf %16, %cst_0 : f64 - - // %res_sin = math.sin %div : f64 - // %real_prod = arith.mulf %1, %res_cos : f64 - // %img_prod_1 = arith.mulf %1, %res_sin : f64 - // %img_prod = arith.mulf %cst_5, %img_prod_1 : f64 - - // %real = arith.addf %2, %real_prod : f64 - // %img = arith.addf %3, %img_prod : f64 - // affine.store %real, %alloc_real[%y] : memref<4xf64> - // // dsp.print %alloc_real : memref<4xf64> - // affine.store %img, %alloc_img[%y] : memref<4xf64> - - // } - // } + // debug + // forOpX->dump(); + // forOpY->dump(); + // affine.for %y = 0 to 4 { + // affine.store %cst_3, %alloc_real[%y] : memref<4xf64> + // affine.store %cst_3, %alloc_img[%y] : memref<4xf64> + // } + + // affine.for %y = 0 to 4 { + // // %0 = affine.load %alloc_3[%arg0] : memref<4xf64> + // // affine.store %0, %alloc_real[%arg0] : memref<4xf64> + // affine.for %x = 0 to 4 { + // // CAcluations + // %1 = affine.load %alloc_3[%x] : memref<4xf64> + // %2 = affine.load %alloc_real[%y] : memref<4xf64> + // %3 = affine.load %alloc_img[%y] : memref<4xf64> + // // index cast for multiply + // %4 = arith.index_castui %y : index to i32 + // %k = arith.uitofp %4 : i32 to f64 + // %6 = arith.index_castui %x : index to i32 + // %i = arith.uitofp %6 : i32 to f64 + // // %8 = arith.index_castui %arg3 : index to i32 + // // %9 = arith.uitofp %8 : i32 to f64 + // // %10 = arith.index_castui %arg4 : index to i32 + // // %11 = arith.uitofp %10 : i32 to f64 + + // %mul_1 = arith.mulf %i, %k : f64 + // %mul = arith.mulf %mul_1, %cst_2pi : f64 + // // ixk / N + // %div = arith.divf %mul, %N : f64 + // // cos of the above + // %res_cos = math.cos %div : f64 + // // %16 = arith.addf %14, %15 : f64 + // // %res_sin = arith.mulf %16, %cst_0 : f64 + + // %res_sin = math.sin %div : f64 + // %real_prod = arith.mulf %1, %res_cos : f64 + // %img_prod_1 = arith.mulf %1, %res_sin : f64 + // %img_prod = arith.mulf %cst_5, %img_prod_1 : f64 + + // %real = arith.addf %2, %real_prod : f64 + // %img = arith.addf %3, %img_prod : f64 + // affine.store %real, %alloc_real[%y] : memref<4xf64> + // // dsp.print %alloc_real : memref<4xf64> + // affine.store %img, %alloc_img[%y] : memref<4xf64> + + // } + // } // rewriter.replaceOp(op, alloc_real); rewriter.replaceOp(op, alloc_img); - + return success(); } }; - //===----------------------------------------------------------------------===// // ToyToAffine RewritePatterns: FFT1DRealSymmOp operations //===----------------------------------------------------------------------===// - struct FFT1DRealSymmOpLowering : public ConversionPattern { FFT1DRealSymmOpLowering(MLIRContext *ctx) : ConversionPattern(dsp::FFT1DRealSymmOp::getOperationName(), 1, ctx) {} @@ -1188,164 +1237,151 @@ struct FFT1DRealSymmOpLowering : public ConversionPattern { matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { auto loc = op->getLoc(); - - //Pseudo-code: - // y[k] = sumOver_n(x[n]*cos[2*pi * k *n/N ] , 0<=k < (N+1)/2 - // & y[N-k] = y[k] (N+1)/2<= k< N - // For k=0: - //sum=0 - // for n= 0 to N - //sum = sum + x[n] - //y[0] = sum - - // for k=1 to (N+1)/2 - // sum = 0 - // for n=0 to N - // sum = sum + x[n] * cos(2*pi*k*n/N) - //y[k] = sum - //y[N-k] = sum - - //Actual definition - // y[k] = y_real[k] + j *y_img[k] - // y_real = sumOver_n(x[n]*cos[2*pi * k *n/N ] - // y_img = sumOver_n(x[n]*sin[2*pi * k *n/N ] * -1 - //init output mem for y_real & y_img as 0 - // replace this upsampling op with the output_mem_allocation op + // Pseudo-code: + // y[k] = sumOver_n(x[n]*cos[2*pi * k *n/N ] , 0<=k < (N+1)/2 + // & y[N-k] = y[k] (N+1)/2<= k< N + // For k=0: + // sum=0 + // for n= 0 to N + // sum = sum + x[n] + // y[0] = sum + // for k=1 to (N+1)/2 + // sum = 0 + // for n=0 to N + // sum = sum + x[n] * cos(2*pi*k*n/N) + // y[k] = sum + // y[N-k] = sum + + // Actual definition + // y[k] = y_real[k] + j *y_img[k] + // y_real = sumOver_n(x[n]*cos[2*pi * k *n/N ] + // y_img = sumOver_n(x[n]*sin[2*pi * k *n/N ] * -1 + // init output mem for y_real & y_img as 0 + // replace this upsampling op with the output_mem_allocation op // DEBUG_PRINT_NO_ARGS() ; - //output for result type - auto tensorType = llvm::cast((*op->result_type_begin())); - //iterate to result1 --not needed for now but for future reference - // auto tensorType1 = llvm::cast(*std::next(op->result_type_begin(), 1)); + // output for result type + auto tensorType = llvm::cast((*op->result_type_begin())); + // iterate to result1 --not needed for now but for future reference + // auto tensorType1 = + // llvm::cast(*std::next(op->result_type_begin(), 1)); + + // DEBUG_PRINT_NO_ARGS() ; + // tensorType.getShape()[0] + // llvm::errs() << "tensorType1.getShape()[0] " << tensorType1.getShape()[0] + // << " func= " << __func__ << "\n"; - // DEBUG_PRINT_NO_ARGS() ; - //tensorType.getShape()[0] - // llvm::errs() << "tensorType1.getShape()[0] " << tensorType1.getShape()[0] << " func= " << __func__ << "\n"; - - //allocation & deallocation for the result of this operation + // allocation & deallocation for the result of this operation auto memRefType = convertTensorToMemRef(tensorType); // auto memRefType2 = convertTensorToMemRef(tensorType1); auto alloc_real = insertAllocAndDealloc(memRefType, loc, rewriter); - - //construct affine loops for the input - SmallVector lowerBounds(tensorType.getRank(), /*Value*/0); - SmallVector steps(tensorType.getRank(), /*Value=*/1); - // affine.for %y = 0 to 4 { - // affine.store %cst_3, %alloc_real[%y] : memref<4xf64> - // affine.store %cst_3, %alloc_img[%y] : memref<4xf64> - // } - Value constant0 = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(0)); + // construct affine loops for the input + SmallVector lowerBounds(tensorType.getRank(), /*Value*/ 0); + SmallVector steps(tensorType.getRank(), /*Value=*/1); + // affine.for %y = 0 to 4 { + // affine.store %cst_3, %alloc_real[%y] : memref<4xf64> + // affine.store %cst_3, %alloc_img[%y] : memref<4xf64> + // } + Value constant0 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); - //For loop -- iterate from 1 to last - int64_t lb = 0 ; + // For loop -- iterate from 1 to last + int64_t lb = 0; int64_t ub = tensorType.getShape()[0]; - int64_t ubBy2 = (ub+1)/2; + int64_t ubBy2 = (ub + 1) / 2; int64_t step = 1; - //load from X, & y1 & y2 - FFT1DRealSymmOpAdaptor fft1DRealSymmAdaptor(operands); - - // affine::AffineForOp forOp1 = rewriter.create(loc, lb, ub, step); - // auto iv = forOp1.getInductionVar(); + // affine::AffineForOp forOp1 = rewriter.create(loc, lb, ub, + // step); auto iv = forOp1.getInductionVar(); // rewriter.setInsertionPointToStart(forOp1.getBody()); - // rewriter.create(loc, constant0, alloc_real, ValueRange{iv}); - // rewriter.setInsertionPointAfter(forOp1); - - //k=0 - //sum=0 - // for n= 0 to N - //sum = sum + x[n] - //y[0] = sum - affine::AffineForOp forOp2 = rewriter.create(loc, - lb, ub, step , ValueRange{constant0}); - auto iv2 = forOp2.getInductionVar(); - rewriter.setInsertionPointToStart(forOp2.getBody()); - //get previous sum - auto getIterArg1 = forOp2.getBody()->getArgument(1); - Value loadX = rewriter.create(loc, fft1DRealSymmAdaptor.getInput(), ValueRange{iv2}); - Value sumNext1 = rewriter.create(loc, loadX, getIterArg1); - rewriter.create(loc, ValueRange{sumNext1}); - rewriter.setInsertionPointAfter(forOp2); - - //store result for k=0 + // rewriter.create(loc, constant0, alloc_real, + // ValueRange{iv}); rewriter.setInsertionPointAfter(forOp1); + DEBUG_PRINT_NO_ARGS(); + // for k=0 Value Indx0 = rewriter.create(loc, 0); - rewriter.create(loc, forOp2.getResult(0), alloc_real, ValueRange{Indx0}); + rewriter.create(loc, constant0, alloc_real, + ValueRange{Indx0}); - // for k=1 to (N+1)/2 - // sum = 0 - // for n=0 to N - // sum = sum + x[n] * cos(2*pi*k*n/N) - //y[k] = sum - //y[N-k] = sum - //loop for Y ie, k - affine::AffineForOp forOpY = rewriter.create(loc, lb+1, ubBy2, step); + // loop for Y + affine::AffineForOp forOpY = + rewriter.create(loc, lb + 1, ubBy2, step); auto ivY = forOpY.getInductionVar(); rewriter.setInsertionPointToStart(forOpY.getBody()); - //loop for X - affine::AffineForOp forOpX = rewriter.create(loc, lb, ub, step, ValueRange{constant0}); + // loop for X + affine::AffineForOp forOpX = + rewriter.create(loc, lb, ub, step, ValueRange{constant0}); auto ivX = forOpX.getInductionVar(); - //get sum - auto getIterArg = forOpX.getBody()->getArgument(1); + auto getIterArg = forOpX.getBody()->getArgument(1); rewriter.setInsertionPointToStart(forOpX.getBody()); - //load from X, & y1 & y2 - Value inputX = rewriter.create(loc, fft1DRealSymmAdaptor.getInput(), ValueRange{ivX}); - // Value loadYReal = rewriter.create(loc, alloc_real, ValueRange{ivY}); - - //convert index to f64 - Value IndxY = rewriter.create(loc, rewriter.getIntegerType(32), ivY); - Value k = rewriter.create(loc, rewriter.getF64Type(), IndxY); - - Value IndxX = rewriter.create(loc, rewriter.getIntegerType(32), ivX); - Value i = rewriter.create(loc, rewriter.getF64Type(), IndxX); - - //get 2*pi * k * i / N - Value muli_k = rewriter.create(loc, k , i); - - Value const2pi = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(6.28318530718)); - Value mul2piKI = rewriter.create(loc, const2pi , muli_k); - - float LengthOfInput = (float) ub; - Value N = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(LengthOfInput)); - // Value N = inputTensorType.getShape()[0]; + // load from X, & y1 & y2 + FFT1DRealSymmOpAdaptor fft1DRealSymmAdaptor(operands); + Value inputX = rewriter.create( + loc, fft1DRealSymmAdaptor.getInput(), ValueRange{ivX}); + // Value loadYImg = rewriter.create(loc, alloc_img, + // ValueRange{ivY}); - Value divIndxByN = rewriter.create(loc, mul2piKI, N ) ; + // convert index to f64 + Value IndxY = rewriter.create( + loc, rewriter.getIntegerType(32), ivY); + Value k = + rewriter.create(loc, rewriter.getF64Type(), IndxY); - // Real part = Sum(x[i] * cos(div) ) - Value GetCos = rewriter.create(loc, divIndxByN); - Value xMulCos = rewriter.create(loc, inputX , GetCos); + Value IndxX = rewriter.create( + loc, rewriter.getIntegerType(32), ivX); + Value i = + rewriter.create(loc, rewriter.getF64Type(), IndxX); - //realSu - Value sumNext = rewriter.create(loc, getIterArg ,xMulCos) ; - // rewriter.create(loc, sumNext, alloc_real, ValueRange{ivX}); - - // DEBUG_PRINT_NO_ARGS() ; + // get 2*pi * k * i / N + Value muli_k = rewriter.create(loc, k, i); + + Value const2pi = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(6.28318530718)); + Value mul2piKI = rewriter.create(loc, const2pi, muli_k); + + // getOperand().getType() + // auto inputTensorType = + // llvm::cast(op->getOperand(0).getType()); + float LengthOfInput = (float)ub; + Value N = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(LengthOfInput)); + // Value N = inputTensorType.getShape()[0]; + + Value divIndxByN = rewriter.create(loc, mul2piKI, N); + + Value GetCos = rewriter.create(loc, divIndxByN); + Value xMulCos = rewriter.create(loc, inputX, GetCos); + + // realSu + Value sumNext = rewriter.create(loc, getIterArg, xMulCos); + // rewriter.create(loc, sumNext, alloc_real, + // ValueRange{ivX}); + + // DEBUG_PRINT_NO_ARGS() ; rewriter.create(loc, ValueRange{sumNext}); rewriter.setInsertionPointAfter(forOpX); // forOpX->dump(); - //store realSum at y[k] - rewriter.create(loc, forOpX.getResult(0) , alloc_real, ValueRange{ivY}); + // store realSum at y[k] + rewriter.create(loc, forOpX.getResult(0), alloc_real, + ValueRange{ivY}); - //store realSum at y[N-k] - AffineExpr ExprNminusK = rewriter.getAffineConstantExpr(ub) - rewriter.getAffineDimExpr(0); - AffineMap mapNminusK = AffineMap::get(1, 0 , ExprNminusK); - rewriter.create(loc, forOpX.getResult(0), alloc_real, mapNminusK, ValueRange{ivY}); + // store realSum at y[N-k] + AffineExpr ExprNminusK = + rewriter.getAffineConstantExpr(ub) - rewriter.getAffineDimExpr(0); + AffineMap mapNminusK = AffineMap::get(1, 0, ExprNminusK); + + rewriter.create(loc, forOpX.getResult(0), alloc_real, + mapNminusK, ValueRange{ivY}); rewriter.setInsertionPointAfter(forOpY); - //debug - // forOpX->dump(); - // forOpY->dump(); rewriter.replaceOp(op, alloc_real); - + return success(); } }; @@ -1353,138 +1389,150 @@ struct FFT1DRealSymmOpLowering : public ConversionPattern { //===----------------------------------------------------------------------===// // ToyToAffine RewritePatterns: FIRFilterYSymmOptimizedOp operations //===----------------------------------------------------------------------===// -struct FIRFilterYSymmOptimizedOpLowering: public ConversionPattern { - FIRFilterYSymmOptimizedOpLowering(MLIRContext *ctx) - : ConversionPattern(dsp::FIRFilterYSymmOptimizedOp::getOperationName(), 1 , ctx) {} - - LogicalResult - matchAndRewrite(Operation *op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const final { - //dsp.FIRFilterYSymmOptimizedOp has 2 operands -- both of type tensor f64 - - //Get the location of FIRFilterYSymmOptimizedOp - auto loc = op->getLoc(); - - //output for result type - auto tensorType = llvm::cast((*op->result_type_begin())); - - //allocation & deallocation for the result of this operation +struct FIRFilterYSymmOptimizedOpLowering : public ConversionPattern { + FIRFilterYSymmOptimizedOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::FIRFilterYSymmOptimizedOp::getOperationName(), 1, + ctx) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + // dsp.FIRFilterYSymmOptimizedOp has 2 operands -- both of type tensor f64 + + // Get the location of FIRFilterYSymmOptimizedOp + auto loc = op->getLoc(); + + // output for result type + auto tensorType = llvm::cast((*op->result_type_begin())); + + // allocation & deallocation for the result of this operation auto memRefType = convertTensorToMemRef(tensorType); auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); - //Pseudo-code: - //N=lenY , M=lenX here, output is symm ie, y[n] = y[N-1-n] - //y[n] = x[n] conv x[-n] ie, x[M-1-n] ie, x2[n] - //y[n] = SumOverAllk x[k] * x2[n-k] , 0<=k(loc, - lb, ubBy2, step ); + affine::AffineForOp forOp1 = + rewriter.create(loc, lb, ubBy2, step); rewriter.setInsertionPointToStart(forOp1.getBody()); auto iv = forOp1.getInductionVar(); - //for n=0 to N - // sum = 0, temp =0 - //for n=0 to (N+1)/2 - // sum =0 - //get filter len + // for n=0 to N + // sum = 0, temp =0 + // for n=0 to (N+1)/2 + // sum =0 + // get filter len auto operandIt = op->operand_type_begin(); auto tensorTypeInput = llvm::cast(*operandIt); int64_t ubForInput = tensorTypeInput.getShape()[0]; DEBUG_PRINT_NO_ARGS(); - DEBUG_PRINT_WITH_ARGS("ubForInput=" , ubForInput ); + DEBUG_PRINT_WITH_ARGS("ubForInput=", ubForInput); - //create a constant for sum - Value constant0 = rewriter.create(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); - affine::AffineForOp forOp2 = rewriter.create(loc, - lb, ubForInput, step , ValueRange{constant0}); + // create a constant for sum + Value constant0 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + affine::AffineForOp forOp2 = rewriter.create( + loc, lb, ubForInput, step, ValueRange{constant0}); rewriter.setInsertionPointToStart(forOp2.getBody()); auto iv2 = forOp2.getInductionVar(); - //get sum - auto getIterArg = forOp2.getBody()->getArgument(1); + // get sum + auto getIterArg = forOp2.getBody()->getArgument(1); DEBUG_PRINT_NO_ARGS(); FIRFilterYSymmOptimizedOpAdaptor firFilterYSymmOpAdaptor(operands); - // if( 0<= M+k-n-1 =0 ie, 2 dimensions =n & k - //UpperBoundSet: M+k-n-1 <= M-1 ie, n-k>=0 - - //LowerBound Expr: M+k-n-1 >=0 ie, M-1 + k -n >= 0 - AffineExpr ExprLowerBound = rewriter.getAffineConstantExpr(ubForInput - 1) + rewriter.getAffineDimExpr(1) - - rewriter.getAffineDimExpr(0); - //UpperBoundSet: M+k-n-1 <= M-1 ie, n-k>=0 - AffineExpr ExprUpperBound = rewriter.getAffineDimExpr(0) - rewriter.getAffineDimExpr(1) ; - IntegerSet setForIf = IntegerSet::get(2,0, {ExprLowerBound , ExprUpperBound}, {false, false}); + // sum = sum + x[k] * x[M+k-n-1] + // For M+k-n-1 + // LowerBoundSet: M+k-n-1 >=0 ie, 2 dimensions =n & k + // UpperBoundSet: M+k-n-1 <= M-1 ie, n-k>=0 + + // LowerBound Expr: M+k-n-1 >=0 ie, M-1 + k -n >= 0 + AffineExpr ExprLowerBound = rewriter.getAffineConstantExpr(ubForInput - 1) + + rewriter.getAffineDimExpr(1) - + rewriter.getAffineDimExpr(0); + // UpperBoundSet: M+k-n-1 <= M-1 ie, n-k>=0 + AffineExpr ExprUpperBound = + rewriter.getAffineDimExpr(0) - rewriter.getAffineDimExpr(1); + IntegerSet setForIf = + IntegerSet::get(2, 0, {ExprLowerBound, ExprUpperBound}, {false, false}); DEBUG_PRINT_NO_ARGS(); // if( 0<= M+k-n-1 ( loc, TypeRange{floatType}, setForIf , ValueRange{iv,iv2} , true /*else*/ ); + auto ifOp = + rewriter.create(loc, TypeRange{floatType}, setForIf, + ValueRange{iv, iv2}, true /*else*/); rewriter.setInsertionPointToStart(ifOp.getThenBlock()); DEBUG_PRINT_NO_ARGS(); // sum = sum + x[k] * x[M+k-n-1] - //load x[M+k-n-1] - AffineMap mapMPlusKMinusNmin1 = AffineMap::get(2, 0 , ExprLowerBound); - Value loadInputIndx2 = rewriter.create(loc, firFilterYSymmOpAdaptor.getLhs(), mapMPlusKMinusNmin1 , ValueRange{iv,iv2}); + // load x[M+k-n-1] + AffineMap mapMPlusKMinusNmin1 = AffineMap::get(2, 0, ExprLowerBound); + Value loadInputIndx2 = + rewriter.create(loc, firFilterYSymmOpAdaptor.getLhs(), + mapMPlusKMinusNmin1, ValueRange{iv, iv2}); rewriter.create(loc, ValueRange{loadInputIndx2}); - //else return 0 + // else return 0 rewriter.setInsertionPointToStart(ifOp.getElseBlock()); - Value const0ForElse = rewriter.create(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + Value const0ForElse = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); rewriter.create(loc, ValueRange{const0ForElse}); rewriter.setInsertionPointAfter(ifOp); - //outside if - //Now, sum = sum + val2 * x[k] - Value loadX = rewriter.create(loc, firFilterYSymmOpAdaptor.getLhs(), ValueRange{iv2}); + // outside if + // Now, sum = sum + val2 * x[k] + Value loadX = rewriter.create( + loc, firFilterYSymmOpAdaptor.getLhs(), ValueRange{iv2}); DEBUG_PRINT_NO_ARGS(); - //x[k] * x[M+k-n-1] here, val2 = x[M+k-n-1] - Value XMulReverseXIndx = rewriter.create(loc, loadX , ifOp.getResult(0)); - //sum = sum + x[k] * x[M+k-n-1] - Value sumNext = rewriter.create(loc, XMulReverseXIndx, getIterArg); + // x[k] * x[M+k-n-1] here, val2 = x[M+k-n-1] + Value XMulReverseXIndx = + rewriter.create(loc, loadX, ifOp.getResult(0)); + // sum = sum + x[k] * x[M+k-n-1] + Value sumNext = + rewriter.create(loc, XMulReverseXIndx, getIterArg); rewriter.create(loc, ValueRange{sumNext}); - + DEBUG_PRINT_NO_ARGS(); rewriter.setInsertionPointAfter(forOp2); // forOp2->dump(); DEBUG_PRINT_NO_ARGS(); - //y[n] = sum ie, y[n] = sumNext - rewriter.create(loc, forOp2.getResult(0) , alloc, iv); - //y[N-1-n] = sum - AffineExpr ExprNminus1minYn = rewriter.getAffineConstantExpr(ub - 1) - rewriter.getAffineDimExpr(0); - AffineMap mapNminus1minYn = AffineMap::get(1, 0 , ExprNminus1minYn); + // y[n] = sum ie, y[n] = sumNext + rewriter.create(loc, forOp2.getResult(0), alloc, iv); + // y[N-1-n] = sum + AffineExpr ExprNminus1minYn = + rewriter.getAffineConstantExpr(ub - 1) - rewriter.getAffineDimExpr(0); + AffineMap mapNminus1minYn = AffineMap::get(1, 0, ExprNminus1minYn); - rewriter.create(loc, forOp2.getResult(0) , alloc, mapNminus1minYn , ValueRange{iv}); + rewriter.create(loc, forOp2.getResult(0), alloc, + mapNminus1minYn, ValueRange{iv}); rewriter.setInsertionPointAfter(forOp1); DEBUG_PRINT_NO_ARGS(); - + rewriter.replaceOp(op, alloc); return success(); - } + } }; - //===----------------------------------------------------------------------===// // ToyToAffine RewritePatterns: PaddingOp operations //===----------------------------------------------------------------------===// @@ -1497,93 +1545,97 @@ struct PaddingOpLowering : public ConversionPattern { matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { auto loc = op->getLoc(); - - //Pseudo-code: - // y[n] = x[n] 0<=n((*op->result_type_begin())); - - //allocation & deallocation for the result of this operation + + // Pseudo-code: + // y[n] = x[n] 0<=n((*op->result_type_begin())); + + // allocation & deallocation for the result of this operation auto memRefType = convertTensorToMemRef(tensorType); auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); - + DEBUG_PRINT_NO_ARGS(); - //construct affine loops for the input + // construct affine loops for the input PaddingOpAdaptor paddingOpAdaptor(operands); Value GetPadLenOperand = op->getOperand(2); - dsp::ConstantOp constantOp3rdArg = GetPadLenOperand.getDefiningOp(); + dsp::ConstantOp constantOp3rdArg = + GetPadLenOperand.getDefiningOp(); - if(!constantOp3rdArg){ + if (!constantOp3rdArg) { llvm::errs() << "Fail:padding op 3rd operand is not constant\n"; return failure(); } - DenseElementsAttr constant3rdValue = constantOp3rdArg.getValue();; + DenseElementsAttr constant3rdValue = constantOp3rdArg.getValue(); + ; auto elements1 = constant3rdValue.getValues(); float Padlen = elements1[0].getValueAsDouble(); - DEBUG_PRINT_WITH_ARGS("Padlen is" , Padlen); - //first from 0 <= i < N - auto inputType = llvm::dyn_cast(op->getOperand(0).getType()); - int64_t lb = 0 ; - int64_t ub = inputType.getShape()[0]; + DEBUG_PRINT_WITH_ARGS("Padlen is", Padlen); + // first from 0 <= i < N + auto inputType = + llvm::dyn_cast(op->getOperand(0).getType()); + int64_t lb = 0; + int64_t ub = inputType.getShape()[0]; int64_t step = 1; - DEBUG_PRINT_NO_ARGS(); - - //loop from 0 <= i < N - affine::AffineForOp forOpY = rewriter.create(loc, lb, ub, step); + DEBUG_PRINT_NO_ARGS(); + + // loop from 0 <= i < N + affine::AffineForOp forOpY = + rewriter.create(loc, lb, ub, step); auto ivY = forOpY.getInductionVar(); rewriter.setInsertionPointToStart(forOpY.getBody()); - Value InputX = rewriter.create(loc, paddingOpAdaptor.getInput(), ivY); - rewriter.create(loc, InputX, alloc, ivY); + Value InputX = + rewriter.create(loc, paddingOpAdaptor.getInput(), ivY); + rewriter.create(loc, InputX, alloc, ivY); rewriter.setInsertionPointAfter(forOpY); - //loop from N to N+PadLen + // loop from N to N+PadLen int64_t lb2 = ub; - int64_t ub2 = ub + (int64_t) Padlen; + int64_t ub2 = ub + (int64_t)Padlen; - affine::AffineForOp forOp2 = rewriter.create(loc, lb2, ub2, step); + affine::AffineForOp forOp2 = + rewriter.create(loc, lb2, ub2, step); auto iv2 = forOp2.getInductionVar(); rewriter.setInsertionPointToStart(forOp2.getBody()); - Value PaddingValue = rewriter.create(loc, paddingOpAdaptor.getPadValue(), ValueRange{}); //getPadValue - rewriter.create(loc, PaddingValue, alloc, iv2); + Value PaddingValue = rewriter.create( + loc, paddingOpAdaptor.getPadValue(), ValueRange{}); // getPadValue + rewriter.create(loc, PaddingValue, alloc, iv2); rewriter.setInsertionPointAfter(forOp2); - //debug - // forOpX->dump(); - // forOpY->dump(); - + // debug + // forOpX->dump(); + // forOpY->dump(); + + // %cst = arith.constant 6.2831853071800001 : f64 + // %cst_0 = arith.constant 4.600000e-01 : f64 + // %cst_1 = arith.constant 5.400000e-01 : f64 + // %cst_2 = arith.constant 4.000000e+00 : f64 + // %alloc = memref.alloc() : memref<4xf64> + // %alloc_3 = memref.alloc() : memref + // affine.store %cst_2, %alloc_3[] : memref + // affine.for %arg0 = 0 to 4 { + // %0 = arith.index_castui %arg0 : index to i32 + // %1 = arith.uitofp %0 : i32 to f64 + // %2 = arith.mulf %1, %cst : f64 + // %3 = arith.divf %2, %cst_2 : f64 + // %4 = math.cos %3 : f64 + // %5 = arith.mulf %4, %cst_0 : f64 + // %6 = arith.subf %cst_1, %5 : f64 + // affine.store %6, %alloc[%arg0] : memref<4xf64> + // } - // %cst = arith.constant 6.2831853071800001 : f64 - // %cst_0 = arith.constant 4.600000e-01 : f64 - // %cst_1 = arith.constant 5.400000e-01 : f64 - // %cst_2 = arith.constant 4.000000e+00 : f64 - // %alloc = memref.alloc() : memref<4xf64> - // %alloc_3 = memref.alloc() : memref - // affine.store %cst_2, %alloc_3[] : memref - // affine.for %arg0 = 0 to 4 { - // %0 = arith.index_castui %arg0 : index to i32 - // %1 = arith.uitofp %0 : i32 to f64 - // %2 = arith.mulf %1, %cst : f64 - // %3 = arith.divf %2, %cst_2 : f64 - // %4 = math.cos %3 : f64 - // %5 = arith.mulf %4, %cst_0 : f64 - // %6 = arith.subf %cst_1, %5 : f64 - // affine.store %6, %alloc[%arg0] : memref<4xf64> - // } - - - // } - // } + // } + // } rewriter.replaceOp(op, alloc); - + return success(); } }; @@ -1600,70 +1652,68 @@ struct ReverseInputOpLowering : public ConversionPattern { matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { auto loc = op->getLoc(); - - //Pseudo-code: - //output = 0 - //iterate for len = 0 to N - // output[i] = a[N-1-i] + // Pseudo-code: + // output = 0 + // iterate for len = 0 to N + // output[i] = a[N-1-i] + + DEBUG_PRINT_NO_ARGS(); - DEBUG_PRINT_NO_ARGS() ; + // output for result type + auto tensorType = llvm::cast((*op->result_type_begin())); - //output for result type - auto tensorType = llvm::cast((*op->result_type_begin())); - - //allocation & deallocation for the result of this operation + // allocation & deallocation for the result of this operation auto memRefType = convertTensorToMemRef(tensorType); auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); - //construct affine loops for the input - SmallVector lowerBounds(tensorType.getRank(), /*Value*/0); + // construct affine loops for the input + SmallVector lowerBounds(tensorType.getRank(), /*Value*/ 0); SmallVector steps(tensorType.getRank(), /*Value=*/1); - - //For loop + // For loop ReverseInputOpAdaptor reverseInputOpAdaptor(operands); // DEBUG_PRINT_NO_ARGS() ; - - int64_t lb = 0 ; + + int64_t lb = 0; int64_t ub = tensorType.getShape()[0]; int64_t step = 1; - //for loop - affine::AffineForOp forOp1 = rewriter.create(loc, lb, ub, step); + // for loop + affine::AffineForOp forOp1 = + rewriter.create(loc, lb, ub, step); auto iv = forOp1.getInductionVar(); rewriter.setInsertionPointToStart(forOp1.getBody()); - + // DEBUG_PRINT_NO_ARGS() ; //: N-1 - i - AffineExpr reverseIndxExpr = rewriter.getAffineConstantExpr(ub - 1) - rewriter.getAffineDimExpr(0); + AffineExpr reverseIndxExpr = + rewriter.getAffineConstantExpr(ub - 1) - rewriter.getAffineDimExpr(0); AffineMap addMap2 = AffineMap::get(1, 0, reverseIndxExpr); - //load x[N-1-i] + // load x[N-1-i] DEBUG_PRINT_NO_ARGS(); - Value loadInputFrmReverseIndx = rewriter.create(loc, reverseInputOpAdaptor.getInput(), addMap2 , ValueRange{iv}); - + Value loadInputFrmReverseIndx = rewriter.create( + loc, reverseInputOpAdaptor.getInput(), addMap2, ValueRange{iv}); - - //store the result at indx i + // store the result at indx i rewriter.create(loc, loadInputFrmReverseIndx, alloc, iv); rewriter.setInsertionPointAfter(forOp1); - //debug - // forOp1->dump(); - // affine.for %arg0 = 0 to 5 { - // %0 = affine.load %alloc_6[%arg0] : memref<5xf64> - // %1 = arith.mulf %0, %0 : f64 - // affine.store %1, %alloc_5[%arg0] : memref<5xf64> - // } + // debug + // forOp1->dump(); + // affine.for %arg0 = 0 to 5 { + // %0 = affine.load %alloc_6[%arg0] : memref<5xf64> + // %1 = arith.mulf %0, %0 : f64 + // affine.store %1, %alloc_5[%arg0] : memref<5xf64> + // } rewriter.replaceOp(op, alloc); - + return success(); } }; - //===----------------------------------------------------------------------===// // ToyToAffine RewritePatterns: LengthOp operations //===----------------------------------------------------------------------===// @@ -1675,43 +1725,433 @@ struct LengthOpLowering : public ConversionPattern { matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { auto loc = op->getLoc(); - - //Pseudo-code: - // output = len(input) + + // Pseudo-code: + // output = len(input) DEBUG_PRINT_NO_ARGS(); - //output for result type - auto tensorType = llvm::cast((*op->result_type_begin())); - //iterate to result1 --not needed for now but for future reference - - //allocation & deallocation for the result of this operation + // output for result type + auto tensorType = llvm::cast((*op->result_type_begin())); + // iterate to result1 --not needed for now but for future reference + + // allocation & deallocation for the result of this operation auto memRefType = convertTensorToMemRef(tensorType); auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); - auto inputType = llvm::dyn_cast(op->getOperand(0).getType()); //op->getOperand( + auto inputType = llvm::dyn_cast( + op->getOperand(0).getType()); // op->getOperand( int64_t ub = inputType.getShape()[0]; - Value constantUb = rewriter.create(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(ub)); - + Value constantUb = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(ub)); DEBUG_PRINT_WITH_ARGS("\nCheck for index --here"); - //load from X, using 2nd operand as index - // DEBUG_PRINT_WITH_ARGS("Indx is" , SecondValueInt); + // load from X, using 2nd operand as index + // DEBUG_PRINT_WITH_ARGS("Indx is" , SecondValueInt); Value constantIndx0 = rewriter.create(loc, 0); - rewriter.create(loc, constantUb, alloc, ValueRange{constantIndx0}); + rewriter.create(loc, constantUb, alloc, + ValueRange{constantIndx0}); + // debug + // forOpX->dump(); + // forOpY->dump(); + // affine.store %cst, %alloc_10[] : memref + // %0 = affine.load %alloc_11[4] : memref<10xf64> + // affine.store %0, %alloc[0] : memref<1xf64> - //debug - // forOpX->dump(); - // forOpY->dump(); - // affine.store %cst, %alloc_10[] : memref - // %0 = affine.load %alloc_11[4] : memref<10xf64> - // affine.store %0, %alloc[0] : memref<1xf64> - rewriter.replaceOp(op, alloc); - + + return success(); + } +}; + +//===----------------------------------------------------------------------===// +// ToyToAffine RewritePatterns: FFTRealOp operations +//===----------------------------------------------------------------------===// + +struct FFTRealOpLowering : public ConversionPattern { + FFTRealOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::FFTRealOp::getOperationName(), 1, ctx) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + auto loc = op->getLoc(); + auto tensorType = llvm::cast((*op->result_type_begin())); + auto memrefType = convertTensorToMemRef(tensorType); + + auto alloc_temp_real = insertAllocAndDealloc(memrefType, loc, rewriter); + auto alloc_temp_imag = insertAllocAndDealloc(memrefType, loc, rewriter); + + FFTRealOpAdaptor fftRealOpAdaptor(operands); + + auto input = fftRealOpAdaptor.getLhs(); + auto lb = rewriter.create(loc, 0); + auto ub = + rewriter.create(loc, tensorType.getShape()[0]); + auto step = rewriter.create(loc, 1); + + // alloc memory for reversed and dealloc when not required + auto alloc_reversed_real = insertAllocAndDealloc(memrefType, loc, rewriter); + auto alloc_reversed_imag = insertAllocAndDealloc(memrefType, loc, rewriter); + + // bits needed for bit reversal + auto ubInt = + rewriter.create(loc, rewriter.getI64Type(), ub); + auto ubFloat = + rewriter.create(loc, rewriter.getF64Type(), ubInt); + auto bitsNeededFloat = rewriter.create(loc, ubFloat); + auto bitsNeededInt = rewriter.create( + loc, rewriter.getI64Type(), bitsNeededFloat); + auto bitsNeeded = rewriter.create( + loc, rewriter.getIndexType(), bitsNeededInt); + + // bit reversal + auto bitReversalLoop = rewriter.create(loc, lb, ub, step); + rewriter.setInsertionPointToStart(bitReversalLoop.getBody()); + auto i = bitReversalLoop.getInductionVar(); + auto iInt = rewriter.create(loc, rewriter.getI64Type(), + i); // check here + + // Calculate reversed index + // auto zero = rewriter.create(loc, 0); + auto initialRevIndex = rewriter.create(loc, 0, 64); + + auto innerLoop = rewriter.create(loc, lb, bitsNeeded, step, + ValueRange{initialRevIndex}); + rewriter.setInsertionPointToStart(innerLoop.getBody()); + auto j = innerLoop.getInductionVar(); + auto jInt = + rewriter.create(loc, rewriter.getI64Type(), j); + auto carriedRevIndex = innerLoop.getRegionIterArgs()[0]; + + auto bitMask = rewriter.create( + loc, rewriter.create(loc, 1, 64), jInt); + auto iAndMask = rewriter.create(loc, iInt, bitMask); + auto isNonZero = rewriter.create( + loc, arith::CmpIPredicate::ne, iAndMask, + rewriter.create(loc, 0, 64)); + auto shiftAmount = rewriter.create( + loc, rewriter.create(loc, bitsNeeded, j), + rewriter.create(loc, 1)); + auto shiftAmountI64 = rewriter.create( + loc, rewriter.getI64Type(), shiftAmount); + auto bitToSet = rewriter.create( + loc, rewriter.create(loc, 1, 64), shiftAmountI64); + + // Update newRevIndex using a select operation + auto updatedRevIndex = rewriter.create( + loc, carriedRevIndex, + rewriter.create( + loc, isNonZero, bitToSet, + rewriter.create(loc, 0, 64))); + + // Yield the updated value to carry it forward + rewriter.create(loc, ValueRange{updatedRevIndex}); + + // auto revIndex = rewriter.create(loc, + // rewriter.getIndexType(), newRevIndex); + + rewriter.setInsertionPointAfter(innerLoop); + + auto finalRevIndex = innerLoop.getResult(0); + auto revIndex = rewriter.create( + loc, rewriter.getIndexType(), finalRevIndex); + + // Load from alloc_temp and store in alloc_reversed + auto realValue = rewriter.create(loc, input, ValueRange{i}); + auto imagValue = rewriter.create( + loc, llvm::APFloat(0.0), rewriter.getF64Type()); + rewriter.create(loc, realValue, alloc_reversed_real, + ValueRange{revIndex}); + rewriter.create(loc, imagValue, alloc_reversed_imag, + ValueRange{revIndex}); + + rewriter.setInsertionPointAfter(bitReversalLoop); + + // Cooley-Tukey FFT implementation + auto N = tensorType.getShape()[0]; + auto stages = static_cast(std::log2(N)); + auto stagesValue = rewriter.create(loc, stages); + + // Constants for complex arithmetic + auto pi = rewriter.create(loc, llvm::APFloat(M_PI), + rewriter.getF64Type()); + auto neg2 = rewriter.create( + loc, llvm::APFloat(-2.0), rewriter.getF64Type()); + + auto fftLoop = rewriter.create(loc, lb, stagesValue, step); + rewriter.setInsertionPointToStart(fftLoop.getBody()); + auto stage = fftLoop.getInductionVar(); + auto half_size = rewriter.create( + loc, rewriter.create(loc, 1), stage); + auto full_size = rewriter.create( + loc, half_size, rewriter.create(loc, 1)); + + auto outerLoop = rewriter.create(loc, lb, ub, full_size); + rewriter.setInsertionPointToStart(outerLoop.getBody()); + auto start = outerLoop.getInductionVar(); + + auto butterflyLoop = rewriter.create(loc, lb, half_size, step); + rewriter.setInsertionPointToStart(butterflyLoop.getBody()); + auto k = butterflyLoop.getInductionVar(); + + // Calculate indices for even and odd elements + auto even_index = rewriter.create(loc, start, k); + auto odd_index = rewriter.create(loc, even_index, half_size); + + // Calculate twiddle factor + auto k_i64 = + rewriter.create(loc, rewriter.getI64Type(), k); + auto k_f64 = + rewriter.create(loc, rewriter.getF64Type(), k_i64); + auto full_size_i64 = rewriter.create( + loc, rewriter.getI64Type(), full_size); + auto full_size_f64 = rewriter.create( + loc, rewriter.getF64Type(), full_size_i64); + auto angle_div = rewriter.create(loc, k_f64, full_size_f64); + auto angle_mul = rewriter.create(loc, neg2, angle_div); + auto angle_final = rewriter.create(loc, pi, angle_mul); + auto cos = rewriter.create(loc, angle_final); + auto sin = rewriter.create(loc, angle_final); + + // Load odd value + auto odd_real = rewriter.create(loc, alloc_reversed_real, + ValueRange{odd_index}); + auto odd_imag = rewriter.create(loc, alloc_reversed_imag, + ValueRange{odd_index}); + + // Multiply by twiddle factor + auto odd_real_cos = rewriter.create(loc, odd_real, cos); + auto odd_imag_sin = rewriter.create(loc, odd_imag, sin); + auto t_real = + rewriter.create(loc, odd_real_cos, odd_imag_sin); + + auto odd_real_sin = rewriter.create(loc, odd_real, sin); + auto odd_imag_cos = rewriter.create(loc, odd_imag, cos); + auto t_imag = + rewriter.create(loc, odd_real_sin, odd_imag_cos); + + // Load even value + auto even_real = rewriter.create(loc, alloc_reversed_real, + ValueRange{even_index}); + auto even_imag = rewriter.create(loc, alloc_reversed_imag, + ValueRange{even_index}); + // Butterfly operation + auto new_even_real = rewriter.create(loc, even_real, t_real); + auto new_even_imag = rewriter.create(loc, even_imag, t_imag); + auto new_odd_real = rewriter.create(loc, even_real, t_real); + auto new_odd_imag = rewriter.create(loc, even_imag, t_imag); + + // Store results + rewriter.create(loc, new_even_real, alloc_reversed_real, + ValueRange{even_index}); + rewriter.create(loc, new_even_imag, alloc_reversed_imag, + ValueRange{even_index}); + rewriter.create(loc, new_odd_real, alloc_reversed_real, + ValueRange{odd_index}); + rewriter.create(loc, new_odd_imag, alloc_reversed_imag, + ValueRange{odd_index}); + + // replace the operation with the final value + rewriter.replaceOp(op, alloc_reversed_real); + return success(); + } +}; + +//===----------------------------------------------------------------------===// +// ToyToAffine RewritePatterns: FFTImagOp operations +//===----------------------------------------------------------------------===// + +struct FFTImagOpLowering : public ConversionPattern { + // constructor takes the mlir context and the operation as inputs + FFTImagOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::FFTImagOp::getOperationName(), 1, ctx) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + auto loc = op->getLoc(); + auto tensorType = llvm::cast((*op->result_type_begin())); + auto memrefType = convertTensorToMemRef(tensorType); + + auto alloc_temp_real = insertAllocAndDealloc(memrefType, loc, rewriter); + auto alloc_temp_imag = insertAllocAndDealloc(memrefType, loc, rewriter); + + FFTRealOpAdaptor fftRealOpAdaptor(operands); + + auto input = fftRealOpAdaptor.getLhs(); + auto lb = rewriter.create(loc, 0); + auto ub = + rewriter.create(loc, tensorType.getShape()[0]); + auto step = rewriter.create(loc, 1); + + // alloc memory for reversed and dealloc when not required + auto alloc_reversed_real = insertAllocAndDealloc(memrefType, loc, rewriter); + auto alloc_reversed_imag = insertAllocAndDealloc(memrefType, loc, rewriter); + + // bits needed for bit reversal + auto ubInt = + rewriter.create(loc, rewriter.getI64Type(), ub); + auto ubFloat = + rewriter.create(loc, rewriter.getF64Type(), ubInt); + auto bitsNeededFloat = rewriter.create(loc, ubFloat); + auto bitsNeededInt = rewriter.create( + loc, rewriter.getI64Type(), bitsNeededFloat); + auto bitsNeeded = rewriter.create( + loc, rewriter.getIndexType(), bitsNeededInt); + + // bit reversal + auto bitReversalLoop = rewriter.create(loc, lb, ub, step); + rewriter.setInsertionPointToStart(bitReversalLoop.getBody()); + auto i = bitReversalLoop.getInductionVar(); + auto iInt = rewriter.create(loc, rewriter.getI64Type(), + i); // check here + + // Calculate reversed index + // auto zero = rewriter.create(loc, 0); + auto initialRevIndex = rewriter.create(loc, 0, 64); + + auto innerLoop = rewriter.create(loc, lb, bitsNeeded, step, + ValueRange{initialRevIndex}); + rewriter.setInsertionPointToStart(innerLoop.getBody()); + auto j = innerLoop.getInductionVar(); + auto jInt = + rewriter.create(loc, rewriter.getI64Type(), j); + auto carriedRevIndex = innerLoop.getRegionIterArgs()[0]; + + auto bitMask = rewriter.create( + loc, rewriter.create(loc, 1, 64), jInt); + auto iAndMask = rewriter.create(loc, iInt, bitMask); + auto isNonZero = rewriter.create( + loc, arith::CmpIPredicate::ne, iAndMask, + rewriter.create(loc, 0, 64)); + auto shiftAmount = rewriter.create( + loc, rewriter.create(loc, bitsNeeded, j), + rewriter.create(loc, 1)); + auto shiftAmountI64 = rewriter.create( + loc, rewriter.getI64Type(), shiftAmount); + auto bitToSet = rewriter.create( + loc, rewriter.create(loc, 1, 64), shiftAmountI64); + + // Update newRevIndex using a select operation + auto updatedRevIndex = rewriter.create( + loc, carriedRevIndex, + rewriter.create( + loc, isNonZero, bitToSet, + rewriter.create(loc, 0, 64))); + + // Yield the updated value to carry it forward + rewriter.create(loc, ValueRange{updatedRevIndex}); + + // auto revIndex = rewriter.create(loc, + // rewriter.getIndexType(), newRevIndex); + + rewriter.setInsertionPointAfter(innerLoop); + + auto finalRevIndex = innerLoop.getResult(0); + auto revIndex = rewriter.create( + loc, rewriter.getIndexType(), finalRevIndex); + + // Load from alloc_temp and store in alloc_reversed + auto realValue = rewriter.create(loc, input, ValueRange{i}); + auto imagValue = rewriter.create( + loc, llvm::APFloat(0.0), rewriter.getF64Type()); + rewriter.create(loc, realValue, alloc_reversed_real, + ValueRange{revIndex}); + rewriter.create(loc, imagValue, alloc_reversed_imag, + ValueRange{revIndex}); + + rewriter.setInsertionPointAfter(bitReversalLoop); + + // Cooley-Tukey FFT implementation + auto N = tensorType.getShape()[0]; + auto stages = static_cast(std::log2(N)); + auto stagesValue = rewriter.create(loc, stages); + + // Constants for complex arithmetic + auto pi = rewriter.create(loc, llvm::APFloat(M_PI), + rewriter.getF64Type()); + auto neg2 = rewriter.create( + loc, llvm::APFloat(-2.0), rewriter.getF64Type()); + + auto fftLoop = rewriter.create(loc, lb, stagesValue, step); + rewriter.setInsertionPointToStart(fftLoop.getBody()); + auto stage = fftLoop.getInductionVar(); + auto half_size = rewriter.create( + loc, rewriter.create(loc, 1), stage); + auto full_size = rewriter.create( + loc, half_size, rewriter.create(loc, 1)); + + auto outerLoop = rewriter.create(loc, lb, ub, full_size); + rewriter.setInsertionPointToStart(outerLoop.getBody()); + auto start = outerLoop.getInductionVar(); + + auto butterflyLoop = rewriter.create(loc, lb, half_size, step); + rewriter.setInsertionPointToStart(butterflyLoop.getBody()); + auto k = butterflyLoop.getInductionVar(); + + // Calculate indices for even and odd elements + auto even_index = rewriter.create(loc, start, k); + auto odd_index = rewriter.create(loc, even_index, half_size); + + // Calculate twiddle factor + auto k_i64 = + rewriter.create(loc, rewriter.getI64Type(), k); + auto k_f64 = + rewriter.create(loc, rewriter.getF64Type(), k_i64); + auto full_size_i64 = rewriter.create( + loc, rewriter.getI64Type(), full_size); + auto full_size_f64 = rewriter.create( + loc, rewriter.getF64Type(), full_size_i64); + auto angle_div = rewriter.create(loc, k_f64, full_size_f64); + auto angle_mul = rewriter.create(loc, neg2, angle_div); + auto angle_final = rewriter.create(loc, pi, angle_mul); + auto cos = rewriter.create(loc, angle_final); + auto sin = rewriter.create(loc, angle_final); + + // Load odd value + auto odd_real = rewriter.create(loc, alloc_reversed_real, + ValueRange{odd_index}); + auto odd_imag = rewriter.create(loc, alloc_reversed_imag, + ValueRange{odd_index}); + + // Multiply by twiddle factor + auto odd_real_cos = rewriter.create(loc, odd_real, cos); + auto odd_imag_sin = rewriter.create(loc, odd_imag, sin); + auto t_real = + rewriter.create(loc, odd_real_cos, odd_imag_sin); + + auto odd_real_sin = rewriter.create(loc, odd_real, sin); + auto odd_imag_cos = rewriter.create(loc, odd_imag, cos); + auto t_imag = + rewriter.create(loc, odd_real_sin, odd_imag_cos); + + // Load even value + auto even_real = rewriter.create(loc, alloc_reversed_real, + ValueRange{even_index}); + auto even_imag = rewriter.create(loc, alloc_reversed_imag, + ValueRange{even_index}); + // Butterfly operation + auto new_even_real = rewriter.create(loc, even_real, t_real); + auto new_even_imag = rewriter.create(loc, even_imag, t_imag); + auto new_odd_real = rewriter.create(loc, even_real, t_real); + auto new_odd_imag = rewriter.create(loc, even_imag, t_imag); + + // Store results + rewriter.create(loc, new_even_real, alloc_reversed_real, + ValueRange{even_index}); + rewriter.create(loc, new_even_imag, alloc_reversed_imag, + ValueRange{even_index}); + rewriter.create(loc, new_odd_real, alloc_reversed_real, + ValueRange{odd_index}); + rewriter.create(loc, new_odd_imag, alloc_reversed_imag, + ValueRange{odd_index}); + + // replace the operation with the final value + rewriter.replaceOp(op, alloc_reversed_imag); return success(); } }; @@ -1719,227 +2159,260 @@ struct LengthOpLowering : public ConversionPattern { //===----------------------------------------------------------------------===// // ToyToAffine RewritePatterns: FIRFilterResSymmOptimizedOp operations //===----------------------------------------------------------------------===// -struct FIRFilterResSymmOptimizedOpLowering: public ConversionPattern { - FIRFilterResSymmOptimizedOpLowering(MLIRContext *ctx) - : ConversionPattern(dsp::FIRFilterResSymmOptimizedOp::getOperationName(), 1 , ctx) {} - - LogicalResult - matchAndRewrite(Operation *op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const final { - //dsp.FIRFilterResSymmOptimizedOp has 2 operands -- both of type tensor f64 - - //Get the location of FIRFilterResSymmOptimizedOp - auto loc = op->getLoc(); - - //output for result type - auto tensorType = llvm::cast((*op->result_type_begin())); - - //allocation & deallocation for the result of this operation +struct FIRFilterResSymmOptimizedOpLowering : public ConversionPattern { + FIRFilterResSymmOptimizedOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::FIRFilterResSymmOptimizedOp::getOperationName(), + 1, ctx) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + // dsp.FIRFilterResSymmOptimizedOp has 2 operands -- both of type tensor f64 + + // Get the location of FIRFilterResSymmOptimizedOp + auto loc = op->getLoc(); + + // output for result type + auto tensorType = llvm::cast((*op->result_type_begin())); + + // allocation & deallocation for the result of this operation auto memRefType = convertTensorToMemRef(tensorType); auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); - //Pseudo-Code - //y[n] = sum(h[k] .{ x[n-k] + x[n-(L-1-k)]}) + h[L-1/2].x[n-(L-1)/2] , k=0 to L-1/2 - // N = lenY , M = lenX , L = lenH - //for n=0 to N - // sum = 0, temp =0 - // for k = 0 to L-1/2 - //if 0 <= n-k < M - //val1 = x[n-k] else, val1 = 0 - // if 0 <= n+k - (L-1) < M - // val2 = x[n+k-(L-1)] else, val2 = 0 - //temp = val1 + val2 - // sum = sum + h[k] . temp - - //middle-one - // if 0 <= n - (L-1)/2 < M - // sum2 = sum + h[L-1/2] . x[n-(n - (L-1)/2)] - // y[n] = sum2 - - - - int64_t lb = 0 ; + // Pseudo-Code + // y[n] = sum(h[k] .{ x[n-k] + x[n-(L-1-k)]}) + h[L-1/2].x[n-(L-1)/2] , k=0 + // to L-1/2 + // N = lenY , M = lenX , L = lenH + // for n=0 to N + // sum = 0, temp =0 + // for k = 0 to L-1/2 + // if 0 <= n-k < M + // val1 = x[n-k] else, val1 = 0 + // if 0 <= n+k - (L-1) < M + // val2 = x[n+k-(L-1)] else, val2 = 0 + // temp = val1 + val2 + // sum = sum + h[k] . temp + + // middle-one + // if 0 <= n - (L-1)/2 < M + // sum2 = sum + h[L-1/2] . x[n-(n - (L-1)/2)] + // y[n] = sum2 + + int64_t lb = 0; int64_t ub = tensorType.getShape()[0]; int64_t step = 1; DEBUG_PRINT_NO_ARGS(); - affine::AffineForOp forOp1 = rewriter.create(loc, - lb, ub, step ); + affine::AffineForOp forOp1 = + rewriter.create(loc, lb, ub, step); rewriter.setInsertionPointToStart(forOp1.getBody()); auto iv = forOp1.getInductionVar(); - //for n=0 to N - // sum = 0, temp =0 - //get filter len - // auto tensorTypeFilter = llvm::cast((*op->getOperand(1))); //operand_type_end - // auto tensorTypeFilter = llvm::cast((*op->operand_type_begin())); + // for n=0 to N + // sum = 0, temp =0 + // get filter len + // auto tensorTypeFilter = + // llvm::cast((*op->getOperand(1))); //operand_type_end + // auto tensorTypeFilter = + // llvm::cast((*op->operand_type_begin())); auto operandIt = op->operand_type_begin(); auto tensorTypeInput = llvm::cast(*operandIt); int64_t ubForInput = tensorTypeInput.getShape()[0]; - //get second operand + // get second operand operandIt = operandIt + 1; - // auto tensorTypeFilter = llvm::cast((*op->operand_type_begin())); //operandIt + // auto tensorTypeFilter = + // llvm::cast((*op->operand_type_begin())); //operandIt auto tensorTypeFilter = llvm::cast(*operandIt); int64_t ubForFilter = tensorTypeFilter.getShape()[0]; DEBUG_PRINT_NO_ARGS(); // llvm::errs() << "ubForFilter= " << ubForFilter << "\n"; - //create a constant for sum - Value constant0 = rewriter.create(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); - affine::AffineForOp forOp2 = rewriter.create(loc, - lb, ubForFilter/2, step , ValueRange{constant0}); + // create a constant for sum + Value constant0 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + affine::AffineForOp forOp2 = rewriter.create( + loc, lb, ubForFilter / 2, step, ValueRange{constant0}); rewriter.setInsertionPointToStart(forOp2.getBody()); auto iv2 = forOp2.getInductionVar(); - auto getIterArg = forOp2.getBody()->getArgument(1); //forOp1.getIterOperands(); + auto getIterArg = + forOp2.getBody()->getArgument(1); // forOp1.getIterOperands(); DEBUG_PRINT_NO_ARGS(); FIRFilterResSymmOptimizedOpAdaptor firFilterResSymmOpAdaptor(operands); - //if 0 <= n-k < M - //val1 = x[n-k] else, val1 = 0 - //For n-k - //if 0 <= n-k < M or, 0 <= n-k <= M -1 - AffineExpr d0, d1,s0, s1 ; + // if 0 <= n-k < M + // val1 = x[n-k] else, val1 = 0 + // For n-k + // if 0 <= n-k < M or, 0 <= n-k <= M -1 + AffineExpr d0, d1, s0, s1; bindDims(rewriter.getContext(), d0, d1); AffineExpr ExprNMinusK = d0 - d1; - AffineMap mapNMinusK = AffineMap::get(2, 0 , ExprNMinusK); + AffineMap mapNMinusK = AffineMap::get(2, 0, ExprNMinusK); // n-k <= M -1 or, n-k-(M-1) <= 0 - bindSymbols(rewriter.getContext() , s0, s1); - Value constantMMinus1Indx = rewriter.create(loc, ubForInput -1); + bindSymbols(rewriter.getContext(), s0, s1); + Value constantMMinus1Indx = + rewriter.create(loc, ubForInput - 1); - AffineExpr ExprNMinusKMinusMPlus1 = s0 - d0 + d1 ; - IntegerSet setForIf = IntegerSet::get(2,1, {ExprNMinusK , ExprNMinusKMinusMPlus1}, {false, false}); + AffineExpr ExprNMinusKMinusMPlus1 = s0 - d0 + d1; + IntegerSet setForIf = IntegerSet::get( + 2, 1, {ExprNMinusK, ExprNMinusKMinusMPlus1}, {false, false}); DEBUG_PRINT_NO_ARGS(); - //if 0 <= n-k <= M -1 - //use typeRange too: + // if 0 <= n-k <= M -1 + // use typeRange too: Type floatType = rewriter.getF64Type(); // if n-k >= 0 && n-k <= M -1 or, M-1 -n + k >= 0 - auto ifOp = rewriter.create( loc, TypeRange{floatType}, setForIf , ValueRange{iv,iv2, constantMMinus1Indx} , true /*else*/ ); + auto ifOp = rewriter.create( + loc, TypeRange{floatType}, setForIf, + ValueRange{iv, iv2, constantMMinus1Indx}, true /*else*/); rewriter.setInsertionPointToStart(ifOp.getThenBlock()); - //val1 = x[n-k] else, val1 = 0 - //load x[n-k] + // val1 = x[n-k] else, val1 = 0 + // load x[n-k] DEBUG_PRINT_NO_ARGS(); - Value loadInput = rewriter.create(loc, firFilterResSymmOpAdaptor.getLhs(), mapNMinusK , ValueRange{iv,iv2}); + Value loadInput = + rewriter.create(loc, firFilterResSymmOpAdaptor.getLhs(), + mapNMinusK, ValueRange{iv, iv2}); rewriter.create(loc, ValueRange{loadInput}); - //else block + // else block rewriter.setInsertionPointToStart(ifOp.getElseBlock()); - Value const0ForElse = rewriter.create(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + Value const0ForElse = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); rewriter.create(loc, ValueRange{const0ForElse}); rewriter.setInsertionPointAfter(ifOp); - // if 0 <= n+k - (L-1) < M - // val2 = x[n+k-(L-1)] else, val2 = 0 - //val2 lower bound - // AffineExpr ExprNMinKMinLPlus1 = d0 - d1 - s0; //s0 = (L-1) => -s0 = -L+1 - // AffineExpr ExprLowerBoundVal2 = d0 - d1 - s0; //s0 = (L-1) => -s0 = -L+1 - //Val2 LowerBound: n+k - (L-1) >= 0 - AffineExpr ExprLowerBoundVal2 = rewriter.getAffineDimExpr(0) + rewriter.getAffineDimExpr(1) - - rewriter.getAffineConstantExpr(ubForFilter - 1); - //Val2 UpperBound: n+k - (L-1) <= M -1 ie, M - 1 + L -1 -k -n >= 0 ie, (M+L-2) - k -n >= 0 - // AffineExpr ExprUpperBoundVal2 = s0 + s1 + d1 - d0; //s1 = M+L-2 = L-1 + M -1 - AffineExpr ExprUpperBoundVal2 = rewriter.getAffineConstantExpr(ubForInput + ubForFilter - 2) - rewriter.getAffineDimExpr(1) - - rewriter.getAffineDimExpr(0); - //s0 = L -1 - // Value s0LMin1Indx = rewriter.create(loc, ubForFilter - 1); - // s1 = M + L -2 for val2 upperBound - // Value s1MPlusLPlus2Indx = rewriter.create(loc, ubForInput + ubForFilter - 2); - // Value s1MMin1Indx = rewriter.create(loc, ubForInput - 1); - - IntegerSet setForIf2 = IntegerSet::get(2,0, {ExprLowerBoundVal2 , ExprUpperBoundVal2}, {false, false}); - - auto ifOp2 = rewriter.create( loc, TypeRange{floatType}, setForIf2 , ValueRange{iv,iv2} , true /*else*/ ); + // val2 = x[n+k-(L-1)] else, val2 = 0 + // val2 lower bound + // AffineExpr ExprNMinKMinLPlus1 = d0 - d1 - s0; //s0 = (L-1) => -s0 = -L+1 + // AffineExpr ExprLowerBoundVal2 = d0 - d1 - s0; //s0 = (L-1) => -s0 = -L+1 + // Val2 LowerBound: n+k - (L-1) >= 0 + AffineExpr ExprLowerBoundVal2 = + rewriter.getAffineDimExpr(0) + rewriter.getAffineDimExpr(1) - + rewriter.getAffineConstantExpr(ubForFilter - 1); + // Val2 UpperBound: n+k - (L-1) <= M -1 ie, M - 1 + L -1 -k -n >= 0 ie, + // (M+L-2) - k -n >= 0 + // AffineExpr ExprUpperBoundVal2 = s0 + s1 + d1 - d0; //s1 = M+L-2 = L-1 + + // M -1 + AffineExpr ExprUpperBoundVal2 = + rewriter.getAffineConstantExpr(ubForInput + ubForFilter - 2) - + rewriter.getAffineDimExpr(1) - rewriter.getAffineDimExpr(0); + // s0 = L -1 + // Value s0LMin1Indx = rewriter.create(loc, + // ubForFilter - 1); s1 = M + L -2 for val2 upperBound Value + // s1MPlusLPlus2Indx = rewriter.create(loc, + // ubForInput + ubForFilter - 2); Value s1MMin1Indx = + // rewriter.create(loc, ubForInput - 1); + + IntegerSet setForIf2 = IntegerSet::get( + 2, 0, {ExprLowerBoundVal2, ExprUpperBoundVal2}, {false, false}); + + auto ifOp2 = rewriter.create( + loc, TypeRange{floatType}, setForIf2, ValueRange{iv, iv2}, + true /*else*/); rewriter.setInsertionPointToStart(ifOp2.getThenBlock()); - //val2 = x[n+k-(L-1)] else, val2 = 0 + // val2 = x[n+k-(L-1)] else, val2 = 0 AffineMap addMap2 = AffineMap::get(2, 0, ExprLowerBoundVal2); - //load x[n+k-(L-1)] + // load x[n+k-(L-1)] DEBUG_PRINT_NO_ARGS(); - Value loadInputForVal2 = rewriter.create(loc, firFilterResSymmOpAdaptor.getLhs(), addMap2 , ValueRange{iv,iv2 }); + Value loadInputForVal2 = rewriter.create( + loc, firFilterResSymmOpAdaptor.getLhs(), addMap2, ValueRange{iv, iv2}); rewriter.create(loc, ValueRange{loadInputForVal2}); - //else block + // else block rewriter.setInsertionPointToStart(ifOp2.getElseBlock()); - Value const0ForElse2 = rewriter.create(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + Value const0ForElse2 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); rewriter.create(loc, ValueRange{const0ForElse2}); rewriter.setInsertionPointAfter(ifOp2); - //temp = val1 + val2 - // sum = sum + h[k] . temp + // temp = val1 + val2 + // sum = sum + h[k] . temp + + Value Val1Plus2 = rewriter.create(loc, ifOp.getResult(0), + ifOp2.getResult(0)); - Value Val1Plus2 = rewriter.create(loc, ifOp.getResult(0) , ifOp2.getResult(0)); + // load filter and then mult and then sum + Value loadFilter = rewriter.create( + loc, firFilterResSymmOpAdaptor.getRhs(), iv2); - //load filter and then mult and then sum - Value loadFilter = rewriter.create(loc, firFilterResSymmOpAdaptor.getRhs() , iv2); - - Value filterMulInput = rewriter.create(loc, Val1Plus2 , loadFilter); - Value sumNext = rewriter.create(loc, filterMulInput, getIterArg); + Value filterMulInput = + rewriter.create(loc, Val1Plus2, loadFilter); + Value sumNext = + rewriter.create(loc, filterMulInput, getIterArg); rewriter.create(loc, ValueRange{sumNext}); // rewriter.setInsertionPointToEnd(forOp2->getBlock()); rewriter.setInsertionPointAfter(forOp2); DEBUG_PRINT_NO_ARGS(); - // Middle - point - // if 0 <= n - (L-1)/2 < M - // sum2 = sum + h[L-1/2] . x[n-(L-1)/2)] - // y[n] = sum2 + // Middle - point + // if 0 <= n - (L-1)/2 < M + // sum2 = sum + h[L-1/2] . x[n-(L-1)/2)] + // y[n] = sum2 // if 0 <= n - (L-1)/2 < M // AffineExpr ExprLowerBoundVal3 = d0 - s0; //s0 = (L-1)/2 // AffineExpr ExprUpperBoundVal3 = d0 - s1; //s1 = M+ (L-1)/2 - int64_t midFilterLen = (ubForFilter - 1)/2; - AffineExpr ExprLowerBoundVal3 = rewriter.getAffineDimExpr(0) - - rewriter.getAffineConstantExpr(midFilterLen); - //UpperBound: n - (L-1)/2 <= M - 1 ie, M-1 + mid - n - AffineExpr ExprUpperBoundVal3 = rewriter.getAffineConstantExpr(ubForInput + midFilterLen - 1) - - rewriter.getAffineDimExpr(0); + int64_t midFilterLen = (ubForFilter - 1) / 2; + AffineExpr ExprLowerBoundVal3 = + rewriter.getAffineDimExpr(0) - + rewriter.getAffineConstantExpr(midFilterLen); + // UpperBound: n - (L-1)/2 <= M - 1 ie, M-1 + mid - n + AffineExpr ExprUpperBoundVal3 = + rewriter.getAffineConstantExpr(ubForInput + midFilterLen - 1) - + rewriter.getAffineDimExpr(0); AffineMap addMap3 = AffineMap::get(1, 0, ExprLowerBoundVal3); - - IntegerSet setForIf3 = IntegerSet::get(1,0, {ExprLowerBoundVal3 , ExprUpperBoundVal3}, {false, false}); - auto ifOp3 = rewriter.create( loc, TypeRange{floatType}, setForIf3 , ValueRange{iv} , true /*else*/ ); + IntegerSet setForIf3 = IntegerSet::get( + 1, 0, {ExprLowerBoundVal3, ExprUpperBoundVal3}, {false, false}); + + auto ifOp3 = rewriter.create( + loc, TypeRange{floatType}, setForIf3, ValueRange{iv}, true /*else*/); rewriter.setInsertionPointToStart(ifOp3.getThenBlock()); - //val3 = x[n-(L-1)/2)] else, val3 = 0 - //load x[n-(L-1)/2)] + // val3 = x[n-(L-1)/2)] else, val3 = 0 + // load x[n-(L-1)/2)] DEBUG_PRINT_NO_ARGS(); - Value loadInputForVal3 = rewriter.create(loc, firFilterResSymmOpAdaptor.getLhs(), addMap3 , ValueRange{iv}); + Value loadInputForVal3 = rewriter.create( + loc, firFilterResSymmOpAdaptor.getLhs(), addMap3, ValueRange{iv}); rewriter.create(loc, ValueRange{loadInputForVal3}); - //else block + // else block rewriter.setInsertionPointToStart(ifOp3.getElseBlock()); - Value const0ForElse3 = rewriter.create(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + Value const0ForElse3 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); rewriter.create(loc, ValueRange{const0ForElse3}); rewriter.setInsertionPointAfter(ifOp3); - //sum2 = sum + h[L-1/2] . x[n-(L-1)/2)] - // y[n] = sum2 - //load filter and then mult and then sum - Value midFilterLenIndx = rewriter.create(loc, midFilterLen); - - Value loadFilterMid = rewriter.create(loc, firFilterResSymmOpAdaptor.getRhs() , midFilterLenIndx); - Value filterMulInput2 = rewriter.create(loc, ifOp3.getResult(0) , loadFilterMid); - Value sum2 = rewriter.create(loc, filterMulInput2, forOp2.getResult(0)); + // sum2 = sum + h[L-1/2] . x[n-(L-1)/2)] + // y[n] = sum2 + // load filter and then mult and then sum + Value midFilterLenIndx = + rewriter.create(loc, midFilterLen); + + Value loadFilterMid = rewriter.create( + loc, firFilterResSymmOpAdaptor.getRhs(), midFilterLenIndx); + Value filterMulInput2 = + rewriter.create(loc, ifOp3.getResult(0), loadFilterMid); + Value sum2 = rewriter.create(loc, filterMulInput2, + forOp2.getResult(0)); // rewriter.create(loc, forOp2.getResult(0) , alloc, iv); - rewriter.create(loc, sum2 , alloc, iv); + rewriter.create(loc, sum2, alloc, iv); rewriter.setInsertionPointAfter(forOp1); DEBUG_PRINT_NO_ARGS(); // ifOp->dump(); rewriter.replaceOp(op, alloc); return success(); - } + } }; - //===----------------------------------------------------------------------===// // ToyToAffine RewritePatterns: RunLenEncodingOp operations - //===----------------------------------------------------------------------===// #define TryWhileLoop 0 #define TryLoadStoreForWhile 0 -#define TryPassIterIndex 0 //Not working +#define TryPassIterIndex 0 // Not working #define TryScf 0 -#define TryRLE 1 +#define TryRLE 1 struct RunLenEncodingOpLowering : public ConversionPattern { RunLenEncodingOpLowering(MLIRContext *ctx) : ConversionPattern(dsp::RunLenEncodingOp::getOperationName(), 1, ctx) {} @@ -1948,351 +2421,388 @@ struct RunLenEncodingOpLowering : public ConversionPattern { matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { auto loc = op->getLoc(); - - //Pseudo-code: - // y_rle[i] = x[i] , if x[i] != x[i-1] , 1<=i 1 ie, for last element - // store the count value at k + N/2 + + // Pseudo-code: + // y_rle[i] = x[i] , if x[i] != x[i-1] , 1<=i 1 ie, for last element + // store the count value at k + N/2 DEBUG_PRINT_NO_ARGS(); - //output for result type - auto tensorType = llvm::cast((*op->result_type_begin())); + // output for result type + auto tensorType = llvm::cast((*op->result_type_begin())); auto tensorType1 = RankedTensorType::get({1}, rewriter.getIndexType()); - //allocation & deallocation for the result of this operation + // allocation & deallocation for the result of this operation auto memRefType = convertTensorToMemRef(tensorType); auto memRefType2 = convertTensorToMemRef(tensorType1); auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); auto allocK = insertAllocAndDealloc(memRefType2, loc, rewriter); - // count = 1 , y[0] = x[0] , + // count = 1 , y[0] = x[0] , // loop from 0 to len RunLenEncodingOpAdaptor runLenEncodingAdaptor(operands); DEBUG_PRINT_NO_ARGS(); - - - - // len/2,k = n ie, len/2 - int64_t lb = 1 ; - int64_t N = tensorType.getShape()[0]; - int64_t ub = N/2 ; //output len is twice the input len + int64_t lb = 1; + int64_t N = tensorType.getShape()[0]; + int64_t ub = N / 2; // output len is twice the input len int64_t step = 1; int64_t k = 0; int64_t lb1 = 0; - Value const0 = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(0)); + Value const0 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); - //init all output memory with zero - affine::AffineForOp forOp1 = rewriter.create(loc, lb1, N, step); + // init all output memory with zero + affine::AffineForOp forOp1 = + rewriter.create(loc, lb1, N, step); DEBUG_PRINT_NO_ARGS(); auto iv1 = forOp1.getInductionVar(); rewriter.setInsertionPointToStart(forOp1.getBody()); - rewriter.create(loc,const0, alloc, iv1 ); + rewriter.create(loc, const0, alloc, iv1); rewriter.setInsertionPointAfter(forOp1); DEBUG_PRINT_NO_ARGS(); - //load from X, + // load from X, Value constantIndx0 = rewriter.create(loc, 0); - Value inputX0 = rewriter.create(loc, runLenEncodingAdaptor.getInput(), ValueRange{constantIndx0}); - rewriter.create(loc, inputX0, alloc, ValueRange{constantIndx0}); - - -#if TryRLE - - // Initial count and k values as SSA values, count = 1 , k = 0 - // for i=1 to len/2 - // load prev = a[i-1] , current = a[i] - // if prev == current - // count = count + 1 - // else - // store count at index k + N/2 - // y[k + N/2] = count - // k = k +1 - // y[k] = current - // count = 1 - //for last element - // store the count value at k + N/2 - //y[k + N/2] = count - Value countVal = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(1)); + Value inputX0 = rewriter.create( + loc, runLenEncodingAdaptor.getInput(), ValueRange{constantIndx0}); + rewriter.create(loc, inputX0, alloc, + ValueRange{constantIndx0}); + +#if TryRLE + + // Initial count and k values as SSA values, count = 1 , k = 0 + // for i=1 to len/2 + // load prev = a[i-1] , current = a[i] + // if prev == current + // count = count + 1 + // else + // store count at index k + N/2 + // y[k + N/2] = count + // k = k +1 + // y[k] = current + // count = 1 + // for last element + // store the count value at k + N/2 + // y[k + N/2] = count + Value countVal = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1)); Value Indx0 = rewriter.create(loc, 0); Value IndxNBy2 = rewriter.create(loc, ub); Value kVal = rewriter.create(loc, k); - rewriter.create(loc, kVal, allocK , ValueRange{Indx0}); - + rewriter.create(loc, kVal, allocK, ValueRange{Indx0}); + Type floatType = rewriter.getF64Type(); // Type indexType = rewriter.getIndexType(); - //// // for i=1 to len/2 - // load prev = a[i-1] , current = a[i] - affine::AffineForOp forOpY = rewriter.create(loc, lb, ub, step, ValueRange{countVal}); + //// // for i=1 to len/2 + // load prev = a[i-1] , current = a[i] + affine::AffineForOp forOpY = + rewriter.create(loc, lb, ub, step, ValueRange{countVal}); auto ivY = forOpY.getInductionVar(); rewriter.setInsertionPointToStart(forOpY.getBody()); DEBUG_PRINT_NO_ARGS(); auto countArg = forOpY.getRegionIterArgs()[0]; - - Value current = rewriter.create(loc, runLenEncodingAdaptor.getInput(), ivY ); + + Value current = rewriter.create( + loc, runLenEncodingAdaptor.getInput(), ivY); // AffineExpr d0; bindDims(rewriter.getContext(), d0); AffineExpr ExprIMinus1 = d0 - rewriter.getAffineConstantExpr(1); - AffineMap mapExprIMinus1 = AffineMap::get(1,0, ExprIMinus1); - Value prev = rewriter.create(loc, runLenEncodingAdaptor.getInput(),mapExprIMinus1, ValueRange{ivY} ); + AffineMap mapExprIMinus1 = AffineMap::get(1, 0, ExprIMinus1); + Value prev = rewriter.create( + loc, runLenEncodingAdaptor.getInput(), mapExprIMinus1, ValueRange{ivY}); DEBUG_PRINT_NO_ARGS(); - // for i=1 to len/2 - // load prev = a[i-1] , current = a[i] - // if prev == current - // count = count + 1 - // else - // store count at index k + N/2 - // y[k + N/2] = count - // k = k +1 - // y[k] = current - // count = 1 - //for last element - // store the count value at k + N/2 - //y[k + N/2] = count - // TypeRange typeRange = TypeRange{rewriter.getF64Type() , rewriter.getIndexType()}; - // TypeRange typeRange = TypeRange({rewriter.getF64Type(), rewriter.getIndexType()}); - - // auto ifOp = rewriter.create(loc, TypeRange{rewriter.getF64Type(), rewriter.getIndexType()}, rewriter.create(loc, arith::CmpFPredicate::OEQ, prev, current), true, true); - auto CmpPrevCurrent = rewriter.create(loc, arith::CmpFPredicate::OEQ, prev, current); - - - //create if block with else condition + // for i=1 to len/2 + // load prev = a[i-1] , current = a[i] // if prev == current - // count = count + 1 - // auto ifOp = rewriter.create(loc, TypeRange{floatType , indexType}, CmpPrevCurrent , true /* else=1 */); - auto ifOp = rewriter.create(loc, TypeRange{floatType }, CmpPrevCurrent , true /* else=1 */); + // count = count + 1 + // else + // store count at index k + N/2 + // y[k + N/2] = count + // k = k +1 + // y[k] = current + // count = 1 + // for last element + // store the count value at k + N/2 + // y[k + N/2] = count + // TypeRange typeRange = TypeRange{rewriter.getF64Type() , + // rewriter.getIndexType()}; TypeRange typeRange = + // TypeRange({rewriter.getF64Type(), rewriter.getIndexType()}); + + // auto ifOp = rewriter.create(loc, + // TypeRange{rewriter.getF64Type(), rewriter.getIndexType()}, + // rewriter.create(loc, arith::CmpFPredicate::OEQ, prev, + // current), true, true); + auto CmpPrevCurrent = rewriter.create( + loc, arith::CmpFPredicate::OEQ, prev, current); + + // create if block with else condition + // if prev == current + // count = count + 1 + // auto ifOp = rewriter.create(loc, TypeRange{floatType , + // indexType}, CmpPrevCurrent , true /* else=1 */); + auto ifOp = rewriter.create(loc, TypeRange{floatType}, + CmpPrevCurrent, true /* else=1 */); rewriter.setInsertionPointToStart(ifOp.thenBlock()); DEBUG_PRINT_NO_ARGS(); - + auto CountPlusOne = rewriter.create(loc, countArg, countVal); DEBUG_PRINT_NO_ARGS(); - rewriter.create(loc, ValueRange{CountPlusOne} ); - // else - // store count at index k + N/2 - // y[k + N/2] = count - // k = k +1 - // y[k] = current - // count = 1 + rewriter.create(loc, ValueRange{CountPlusOne}); + // else + // store count at index k + N/2 + // y[k + N/2] = count + // k = k +1 + // y[k] = current + // count = 1 rewriter.setInsertionPointToStart(ifOp.elseBlock()); - // // out[k + N/2]= count - Value loadKVal = rewriter.create(loc, allocK, ValueRange{Indx0} ); + // // out[k + N/2]= count + Value loadKVal = + rewriter.create(loc, allocK, ValueRange{Indx0}); - Value kPlusNBy2 = rewriter.create(loc,rewriter.getIndexType(), loadKVal, IndxNBy2); + Value kPlusNBy2 = rewriter.create( + loc, rewriter.getIndexType(), loadKVal, IndxNBy2); rewriter.create(loc, countArg, alloc, kPlusNBy2); - //k = k+1 + // k = k+1 Value Indx1 = rewriter.create(loc, 1); - Value kPlusOne = rewriter.create(loc,rewriter.getIndexType(), loadKVal, Indx1); + Value kPlusOne = rewriter.create( + loc, rewriter.getIndexType(), loadKVal, Indx1); rewriter.create(loc, kPlusOne, allocK, ValueRange{Indx0}); // y[k + 1] = current rewriter.create(loc, current, alloc, kPlusOne); - + DEBUG_PRINT_NO_ARGS(); rewriter.create(loc, ValueRange{countVal}); rewriter.setInsertionPointAfter(ifOp); // ifOp.dump(); Value countRes = ifOp.getResult(0); - - - rewriter.create(loc, ValueRange{countRes }); + + rewriter.create(loc, ValueRange{countRes}); rewriter.setInsertionPointAfter(forOpY); // forOpY->dump(); - //check for last countArg value if countArg > 1, then store it at last + // check for last countArg value if countArg > 1, then store it at last Value finalCountArg = forOpY.getResult(0); - Value finalkArg = rewriter.create(loc, allocK, ValueRange{Indx0} ); - + Value finalkArg = + rewriter.create(loc, allocK, ValueRange{Indx0}); + // //if count>1 ,then store count at index k + N/2 - // auto ifOp1 = rewriter.create(loc, CmpCountGt1 , false /* else=0 */); + // auto ifOp1 = rewriter.create(loc, CmpCountGt1 , false /* + // else=0 */); // rewriter.setInsertionPointToStart(ifOp1.thenBlock()); DEBUG_PRINT_NO_ARGS(); - Value finalkPlusNBy2 = rewriter.create(loc,rewriter.getIndexType(), finalkArg, IndxNBy2); + Value finalkPlusNBy2 = rewriter.create( + loc, rewriter.getIndexType(), finalkArg, IndxNBy2); rewriter.create(loc, finalCountArg, alloc, finalkPlusNBy2); DEBUG_PRINT_NO_ARGS(); - // rewriter.setInsertionPointAfter(ifOp1); + // rewriter.setInsertionPointAfter(ifOp1); #endif #if TryPassIterIndex - //store k at its location & load and do addition to 1 and - Value kVal = rewriter.create(loc, ub-1); + // store k at its location & load and do addition to 1 and + Value kVal = rewriter.create(loc, ub - 1); Value Indx0 = rewriter.create(loc, 0); - auto kValStore = rewriter.create(loc, kVal, alloc2 , ValueRange{Indx0}); - + auto kValStore = + rewriter.create(loc, kVal, alloc2, ValueRange{Indx0}); + Type floatType = rewriter.getF64Type(); Type indexType = rewriter.getIndexType(); - affine::AffineForOp forOpY = rewriter.create(loc, lb, ub, step, ValueRange{inputX0, kVal}); - // affine::AffineForOp forOpY = rewriter.create(loc, lb, ub, step, ValueRange{countVal, kVal}); + affine::AffineForOp forOpY = rewriter.create( + loc, lb, ub, step, ValueRange{inputX0, kVal}); + // affine::AffineForOp forOpY = rewriter.create(loc, lb, ub, + // step, ValueRange{countVal, kVal}); auto ivY = forOpY.getInductionVar(); auto prev = forOpY.getRegionIterArgs()[0]; auto kArg = forOpY.getRegionIterArgs()[1]; rewriter.setInsertionPointToStart(forOpY.getBody()); - - Value Indx00 = rewriter.create(loc, 0); - Value current = rewriter.create(loc, runLenEncodingAdaptor.getInput(), ivY ); - Value loadKVal = rewriter.create(loc, alloc2, ValueRange{Indx0} ); - Value const1 = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(1)); + + Value Indx00 = rewriter.create(loc, 0); + Value current = rewriter.create( + loc, runLenEncodingAdaptor.getInput(), ivY); + Value loadKVal = + rewriter.create(loc, alloc2, ValueRange{Indx0}); + Value const1 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1)); Value currentPlus1 = rewriter.create(loc, prev, const1); - auto CmpPrevCurrent = rewriter.create(loc, arith::CmpFPredicate::OGE, current , const1 ); + auto CmpPrevCurrent = rewriter.create( + loc, arith::CmpFPredicate::OGE, current, const1); + // create if block with else condition + // if prev == current, count++ + auto ifOp = rewriter.create(loc, TypeRange{floatType}, + CmpPrevCurrent, true /* else=1 */); + // auto ifOp = rewriter.create(loc, CmpPrevCurrent , true /* + // else=1 */); - //create if block with else condition - // if prev == current, count++ - auto ifOp = rewriter.create(loc, TypeRange{floatType }, CmpPrevCurrent , true /* else=1 */); - // auto ifOp = rewriter.create(loc, CmpPrevCurrent , true /* else=1 */); - rewriter.setInsertionPointToStart(ifOp.thenBlock()); DEBUG_PRINT_NO_ARGS(); - //store count at N+i - // Value countPlus1 = rewriter.create(loc, countArg, countVal); + // store count at N+i + // Value countPlus1 = rewriter.create(loc, countArg, + // countVal); Value Indx1 = rewriter.create(loc, 1); - Value kPlusOne = rewriter.create(loc, rewriter.getIndexType() , kArg , Indx1); + Value kPlusOne = rewriter.create( + loc, rewriter.getIndexType(), kArg, Indx1); rewriter.create(loc, current, alloc, ValueRange{kArg}); - // rewriter.create(loc, current, alloc, ValueRange{kPlusOne}); + // rewriter.create(loc, current, alloc, + // ValueRange{kPlusOne}); rewriter.create(loc, current, alloc, ValueRange{kPlusOne}); rewriter.create(loc, kPlusOne, alloc2, ValueRange{Indx0}); rewriter.create(loc, ValueRange{currentPlus1}); rewriter.setInsertionPointToStart(ifOp.elseBlock()); rewriter.create(loc, currentPlus1, alloc, ValueRange{ivY}); - //yield the values - // rewriter.create(loc, ValueRange{kPlusOne }); + // yield the values + // rewriter.create(loc, ValueRange{kPlusOne }); rewriter.create(loc, ValueRange{currentPlus1}); rewriter.setInsertionPointAfter(ifOp); Value countRes = ifOp.getResult(0); - // Value kRes = ifOp.getResult(1); + // Value kRes = ifOp.getResult(1); // rewriter.create(loc, ValueRange{countRes,kRes }); - rewriter.create(loc, ValueRange{countRes, Indx00 }); + rewriter.create(loc, ValueRange{countRes, Indx00}); rewriter.setInsertionPointAfter(forOpY); - #endif #if TryWhileLoop auto kVal = rewriter.create(loc, k); - affine::AffineForOp forOpY = rewriter.create(loc, lb, ub, step, ValueRange{kVal}); + affine::AffineForOp forOpY = + rewriter.create(loc, lb, ub, step, ValueRange{kVal}); auto ivY = forOpY.getInductionVar(); // auto countArg = forOpY.getRegionIterArgs()[0]; auto kArg = forOpY.getRegionIterArgs()[0]; rewriter.setInsertionPointToStart(forOpY.getBody()); - - Value current = rewriter.create(loc, runLenEncodingAdaptor.getInput(), ivY ); - //store count at N+i - // Value countPlus1 = rewriter.create(loc, countArg, countVal); + Value current = rewriter.create( + loc, runLenEncodingAdaptor.getInput(), ivY); + + // store count at N+i + // Value countPlus1 = rewriter.create(loc, countArg, + // countVal); Value Indx1 = rewriter.create(loc, 1); - Value kPlusOne = rewriter.create(loc,rewriter.getIndexType(), kArg, Indx1); - // Value constInt1 = rewriter.create(loc,rewriter.getI64IntegerAttr(1), rewriter.getI64Type() ); + Value kPlusOne = rewriter.create( + loc, rewriter.getIndexType(), kArg, Indx1); + // Value constInt1 = + // rewriter.create(loc,rewriter.getI64IntegerAttr(1), + // rewriter.getI64Type() ); - // Value kPlusOneIndex = rewriter.create(loc, rewriter.getIndexType(), kPlusOne); + // Value kPlusOneIndex = rewriter.create(loc, + // rewriter.getIndexType(), kPlusOne); // kPlusOne.dump(); - // Value kArg1 = rewriter.create(loc, rewriter.getIndexType(), kArg); + // Value kArg1 = rewriter.create(loc, + // rewriter.getIndexType(), kArg); - // rewriter.create(loc, countPlus1, alloc, mapExprNPlusI, ValueRange{kPlusOne}); - // rewriter.create(loc, countPlus1, alloc, ValueRange{kArg}); - // Store the result + // rewriter.create(loc, countPlus1, alloc, mapExprNPlusI, + // ValueRange{kPlusOne}); rewriter.create(loc, countPlus1, + // alloc, ValueRange{kArg}); Store the result // rewriter.create(loc, current, alloc, ivY); //working rewriter.create(loc, current, alloc, ValueRange{kArg}); - //yield the values - rewriter.create(loc, ValueRange{kPlusOne }); + // yield the values + rewriter.create(loc, ValueRange{kPlusOne}); // rewriter.create(loc, ValueRange{countPlus1 , kPlusOne}); rewriter.setInsertionPointAfter(forOpY); #endif #if TryLoadStoreForWhile - //store k at its location & load and do addition to 1 and - Value kVal = rewriter.create(loc, ub-1); + // store k at its location & load and do addition to 1 and + Value kVal = rewriter.create(loc, ub - 1); Value Indx0 = rewriter.create(loc, 0); - auto kValStore = rewriter.create(loc, kVal, alloc2 , ValueRange{Indx0}); - affine::AffineForOp forOpY = rewriter.create(loc, lb, ub, step, ValueRange{inputX0}); + auto kValStore = + rewriter.create(loc, kVal, alloc2, ValueRange{Indx0}); + affine::AffineForOp forOpY = + rewriter.create(loc, lb, ub, step, ValueRange{inputX0}); auto ivY = forOpY.getInductionVar(); auto prev = forOpY.getRegionIterArgs()[0]; // auto kArg = forOpY.getRegionIterArgs()[0]; rewriter.setInsertionPointToStart(forOpY.getBody()); - - Value Indx00 = rewriter.create(loc, 0); - Value current = rewriter.create(loc, runLenEncodingAdaptor.getInput(), ivY ); - Value loadKVal = rewriter.create(loc, alloc2, ValueRange{Indx0} ); - Value const1 = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(1)); + + Value Indx00 = rewriter.create(loc, 0); + Value current = rewriter.create( + loc, runLenEncodingAdaptor.getInput(), ivY); + Value loadKVal = + rewriter.create(loc, alloc2, ValueRange{Indx0}); + Value const1 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1)); Value currentPlus1 = rewriter.create(loc, prev, const1); - auto CmpPrevCurrent = rewriter.create(loc, arith::CmpFPredicate::OGE, current , const1 ); + auto CmpPrevCurrent = rewriter.create( + loc, arith::CmpFPredicate::OGE, current, const1); + // create if block with else condition + // if prev == current, count++ + // auto ifOp = rewriter.create(loc, TypeRange{floatType , + // indexType}, CmpPrevCurrent , true /* else=1 */); + auto ifOp = + rewriter.create(loc, CmpPrevCurrent, true /* else=1 */); - //create if block with else condition - // if prev == current, count++ - // auto ifOp = rewriter.create(loc, TypeRange{floatType , indexType}, CmpPrevCurrent , true /* else=1 */); - auto ifOp = rewriter.create(loc, CmpPrevCurrent , true /* else=1 */); - rewriter.setInsertionPointToStart(ifOp.thenBlock()); DEBUG_PRINT_NO_ARGS(); - //store count at N+i - // Value countPlus1 = rewriter.create(loc, countArg, countVal); + // store count at N+i + // Value countPlus1 = rewriter.create(loc, countArg, + // countVal); Value Indx1 = rewriter.create(loc, 1); - Value kPlusOne = rewriter.create(loc, rewriter.getIndexType() , loadKVal , Indx1); + Value kPlusOne = rewriter.create( + loc, rewriter.getIndexType(), loadKVal, Indx1); rewriter.create(loc, current, alloc, ValueRange{ivY}); - // rewriter.create(loc, current, alloc, ValueRange{kPlusOne}); + // rewriter.create(loc, current, alloc, + // ValueRange{kPlusOne}); rewriter.create(loc, current, alloc, ValueRange{kPlusOne}); rewriter.create(loc, kPlusOne, alloc2, ValueRange{Indx0}); rewriter.setInsertionPointToStart(ifOp.elseBlock()); rewriter.create(loc, currentPlus1, alloc, ValueRange{ivY}); - //yield the values - // rewriter.create(loc, ValueRange{kPlusOne }); + // yield the values + // rewriter.create(loc, ValueRange{kPlusOne }); rewriter.setInsertionPointAfter(ifOp); - rewriter.create(loc, ValueRange{current }); + rewriter.create(loc, ValueRange{current}); rewriter.setInsertionPointAfter(forOpY); - #endif - //debug - // forOpY->dump(); - // affine.store %cst, %alloc_10[] : memref - // %0 = affine.load %alloc_11[4] : memref<10xf64> - // affine.store %0, %alloc[0] : memref<1xf64> - + // debug + // forOpY->dump(); + // affine.store %cst, %alloc_10[] : memref + // %0 = affine.load %alloc_11[4] : memref<10xf64> + // affine.store %0, %alloc[0] : memref<1xf64> + rewriter.replaceOp(op, alloc); - + return success(); } }; @@ -2303,26 +2813,29 @@ struct RunLenEncodingOpLowering : public ConversionPattern { struct LMSFilterResponseOpLowering : public ConversionPattern { LMSFilterResponseOpLowering(MLIRContext *ctx) - : ConversionPattern(dsp::LMSFilterResponseOp::getOperationName(), 1, ctx) {} + : ConversionPattern(dsp::LMSFilterResponseOp::getOperationName(), 1, + ctx) {} LogicalResult matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { auto loc = op->getLoc(); - - //Pseudo-code: - // for (int n = 0; n < NUM_SAMPLES; n++) { - // // Calculate the filter output y[n] - // y[n] = 0; - // for (int i = 0; i < FILTER_LENGTH; i++) { - // if (n - i >= 0) { // affine if - // y[n] = y[n] + (w[i] * x[n - i]); - // } - // } - + + // Pseudo-code: + // for (int n = 0; n < NUM_SAMPLES; n++) { + // // we also need to initialize w + // // w[n] = 0; + // // Calculate the filter output y[n] + // y[n] = 0; + // for (int i = 0; i < FILTER_LENGTH; i++) { + // if (n - i >= 0) { // affine if + // y[n] = y[n] + (w[i] * x[n - i]); + // } + // } + // // Calculate the error e[n] // e[n] = d[n] - y[n]; - + // // Update the filter weights w[i] // for (int i = 0; i < FILTER_LENGTH; i++) { // if (n - i >= 0) { @@ -2331,130 +2844,137 @@ struct LMSFilterResponseOpLowering : public ConversionPattern { // } // } - auto tensorType = llvm::cast((*op->result_type_begin())); - - //allocation & deallocation for the result of this operation + auto tensorType = llvm::cast((*op->result_type_begin())); + + // allocation & deallocation for the result of this operation auto memRefType = convertTensorToMemRef(tensorType); auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); - //construct affine loops for the input - SmallVector lowerBounds(tensorType.getRank(), /*Value*/0); - SmallVector steps(tensorType.getRank(), /*Value=*/1); + // construct affine loops for the input + SmallVector lowerBounds(tensorType.getRank(), /*Value*/ 0); + SmallVector steps(tensorType.getRank(), /*Value=*/1); LMSFilterOpAdaptor lmsFilterAdaptor(operands); - // Value alpha = rewriter.create(loc, rewriter.getF64Type(), + // Value alpha = rewriter.create(loc, + // rewriter.getF64Type(), // rewriter.getF64FloatAttr(1)); - Value zeroval = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(0)); - Value mu = rewriter.create(loc, lmsFilterAdaptor.getMu()); - - //For loop -- iterate from 0 to last - int64_t lb = 0 ; + Value zeroval = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + Value mu = rewriter.create(loc, lmsFilterAdaptor.getMu()); + + // For loop -- iterate from 0 to last + int64_t lb = 0; int64_t numSamples = tensorType.getShape()[0]; int64_t step = 1; Value GetFilterLOp = op->getOperand(3); - dsp::ConstantOp constantOp3rdArg = GetFilterLOp.getDefiningOp(); - DenseElementsAttr constant3rdValue = constantOp3rdArg.getValue();; + dsp::ConstantOp constantOp3rdArg = + GetFilterLOp.getDefiningOp(); + DenseElementsAttr constant3rdValue = constantOp3rdArg.getValue(); + ; auto elements1 = constant3rdValue.getValues(); float filterlenval = elements1[0].getValueAsDouble(); - auto FilterLength = (uint64_t) filterlenval; + auto FilterLength = (uint64_t)filterlenval; auto yMemRefType = MemRefType::get({numSamples}, rewriter.getF64Type()); auto wAlloc = rewriter.create(loc, yMemRefType); - affine::AffineForOp forOp1 = rewriter.create(loc, lb, numSamples, step); + affine::AffineForOp forOp1 = + rewriter.create(loc, lb, numSamples, step); auto iv = forOp1.getInductionVar(); - rewriter.setInsertionPointToStart(forOp1.getBody()); - //For affine expression: #map1 = affine_map<(%arg0)[] : (%arg0 - 1) + // For affine expression: #map1 = affine_map<(%arg0)[] : (%arg0 - 1) AffineExpr d0, d1, s0; bindDims(rewriter.getContext(), d0, d1); - // AffineExpr ExprForXSlice = rewriter.getAffineDimExpr(0) - rewriter.getAffineDimExpr(1); //d0 - d1; + // AffineExpr ExprForXSlice = rewriter.getAffineDimExpr(0) - + // rewriter.getAffineDimExpr(1); //d0 - d1; AffineExpr ExprForXSlice = d0 - d1; AffineMap addMapForLMSFilter = AffineMap::get(2, 0, ExprForXSlice); IntegerSet set1 = IntegerSet::get(2, 0, {ExprForXSlice}, {false}); + // w[n] = 0; // y[n] = 0; // rewriter.create(loc, zeroval, alloc, ValueRange{iv}); // Allocate and initialize array for y // Value constantIndx0 = rewriter.create(loc, 0); - + + rewriter.create(loc, zeroval, wAlloc, ValueRange{iv}); rewriter.create(loc, zeroval, alloc, ValueRange{iv}); - affine::AffineForOp forOp2 = rewriter.create(loc, lb, FilterLength, step); + affine::AffineForOp forOp2 = + rewriter.create(loc, lb, FilterLength, step); auto iv2 = forOp2.getInductionVar(); rewriter.setInsertionPointToStart(forOp2.getBody()); - auto ifOp = rewriter.create( loc, set1 , ValueRange{iv,iv2} , false /*no else*/ ); + auto ifOp = rewriter.create( + loc, set1, ValueRange{iv, iv2}, false /*no else*/); rewriter.setInsertionPointToStart(ifOp.getThenBlock()); - Value inputX = rewriter.create(loc, lmsFilterAdaptor.getLhs(), addMapForLMSFilter, - ValueRange{iv,iv2}); - Value w = rewriter.create(loc, wAlloc, - ValueRange{iv2}); //memRefType + Value inputX = + rewriter.create(loc, lmsFilterAdaptor.getLhs(), + addMapForLMSFilter, ValueRange{iv, iv2}); + Value w = rewriter.create(loc, wAlloc, + ValueRange{iv2}); // memRefType - Value wmulx = rewriter.create(loc, inputX ,w ); + Value wmulx = rewriter.create(loc, inputX, w); Value ybefore = rewriter.create(loc, alloc, ValueRange{iv}); Value sumNext = rewriter.create(loc, wmulx, ybefore); rewriter.create(loc, sumNext, alloc, ValueRange{iv}); rewriter.setInsertionPointAfter(ifOp); rewriter.setInsertionPointAfter(forOp2); - // get e[n] = d[n] - y[n] - Value desiredX = rewriter.create(loc, lmsFilterAdaptor.getRhs(), ValueRange{iv}); - Value ynew = rewriter.create(loc, alloc, - ValueRange{iv}); - - Value err = rewriter.create(loc, desiredX ,ynew ); + Value desiredX = rewriter.create( + loc, lmsFilterAdaptor.getRhs(), ValueRange{iv}); + Value ynew = rewriter.create(loc, alloc, ValueRange{iv}); - + Value err = rewriter.create(loc, desiredX, ynew); - affine::AffineForOp forOp3 = rewriter.create(loc, lb, FilterLength, step); - auto iv3 = forOp3.getInductionVar(); - - rewriter.setInsertionPointToStart(forOp3.getBody()); - - auto ifOp2 = rewriter.create( loc, set1 , ValueRange{iv,iv3} , false /*no else*/ ); - rewriter.setInsertionPointToStart(ifOp2.getThenBlock()); - - Value inputX2 = rewriter.create(loc, lmsFilterAdaptor.getLhs(), addMapForLMSFilter, - ValueRange{iv,iv3}); - - Value Prevw2 = rewriter.create(loc, wAlloc, - ValueRange{iv3}); - - // f(u(n),e(n),μ)=μe(n)u∗(n) - Value mul1 = rewriter.create(loc, err ,inputX2 ); - Value mul2 = rewriter.create(loc, mu ,mul1 ); - - // FInal w[n] - Value answer = rewriter.create(loc, Prevw2 ,mul2 ); - - rewriter.create(loc, answer, wAlloc, ValueRange{iv3}); - rewriter.setInsertionPointAfter(ifOp2); - rewriter.setInsertionPointAfter(forOp3); - - rewriter.setInsertionPointAfter(forOp1); - //debug - // forOp1->dump(); + affine::AffineForOp forOp3 = + rewriter.create(loc, lb, FilterLength, step); + auto iv3 = forOp3.getInductionVar(); + + rewriter.setInsertionPointToStart(forOp3.getBody()); + + auto ifOp2 = rewriter.create( + loc, set1, ValueRange{iv, iv3}, false /*no else*/); + rewriter.setInsertionPointToStart(ifOp2.getThenBlock()); + + Value inputX2 = + rewriter.create(loc, lmsFilterAdaptor.getLhs(), + addMapForLMSFilter, ValueRange{iv, iv3}); + + Value Prevw2 = rewriter.create(loc, wAlloc, ValueRange{iv3}); + + // f(u(n),e(n),μ)=μe(n)u∗(n) + Value mul1 = rewriter.create(loc, err, inputX2); + Value mul2 = rewriter.create(loc, mu, mul1); + + // FInal w[n] + Value answer = rewriter.create(loc, Prevw2, mul2); + + rewriter.create(loc, answer, wAlloc, ValueRange{iv3}); + rewriter.setInsertionPointAfter(ifOp2); + rewriter.setInsertionPointAfter(forOp3); + + rewriter.setInsertionPointAfter(forOp1); + // debug + // forOp1->dump(); rewriter.replaceOp(op, alloc); - + return success(); } -}; +}; //===----------------------------------------------------------------------===// // ToyToAffine RewritePatterns: Quantization operations //===----------------------------------------------------------------------===// - struct QuantizationOpLowering : public ConversionPattern { QuantizationOpLowering(MLIRContext *ctx) : ConversionPattern(dsp::QuantizationOp::getOperationName(), 1, ctx) {} @@ -2463,103 +2983,112 @@ struct QuantizationOpLowering : public ConversionPattern { matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { auto loc = op->getLoc(); - - //Pseudo-code: - // y_quantized[i] = Round(a[i] - min) / step) * step + min - // where, step = (max-min)/ NoOfLevels , NoOLevels = 2^NoOfBits - // steps: - // 1) given NoOfLevels - // 2) Then calculate stepSize = (Max-Min)/NoOfLevels - // 3) iterate for all the elements and calculate quantizedCoeff + // Pseudo-code: + // y_quantized[i] = Round(a[i] - min) / step) * step + min + // where, step = (max-min)/ NoOfLevels , NoOLevels = 2^NoOfBits + + // steps: + // 1) given NoOfLevels + // 2) Then calculate stepSize = (Max-Min)/NoOfLevels + // 3) iterate for all the elements and calculate quantizedCoeff - // GetLevelForVal = (a[i] - min)/step - // RoundedVal = arith.FPToSI(GetLevelForVal) - // QuantVal = RoundedVal * step + min_val + // GetLevelForVal = (a[i] - min)/step + // RoundedVal = arith.FPToSI(GetLevelForVal) + // QuantVal = RoundedVal * step + min_val DEBUG_PRINT_NO_ARGS(); - //output for result type - auto tensorType = llvm::cast((*op->result_type_begin())); - - //allocation & deallocation for the result of this operation + // output for result type + auto tensorType = llvm::cast((*op->result_type_begin())); + + // allocation & deallocation for the result of this operation auto memRefType = convertTensorToMemRef(tensorType); auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); - //create another memory location for getting NoOfLevels + // create another memory location for getting NoOfLevels - // Value constant1 = rewriter.create(loc, rewriter.getF64Type(), + // Value constant1 = rewriter.create(loc, + // rewriter.getF64Type(), // rewriter.getF64FloatAttr(1)); - - //1) Then calculate stepSize = (Max-Min)/NoOfLevels - + // 1) Then calculate stepSize = (Max-Min)/NoOfLevels + QuantizationOpAdaptor quantizationAdaptor(operands); DEBUG_PRINT_NO_ARGS(); Value getMaxMemref = quantizationAdaptor.getMax(); - auto getMax = rewriter.create(loc, getMaxMemref, ValueRange{}); + auto getMax = + rewriter.create(loc, getMaxMemref, ValueRange{}); Value getMinMemref = quantizationAdaptor.getMin(); - auto getMin = rewriter.create(loc, getMinMemref, ValueRange{}); + auto getMin = + rewriter.create(loc, getMinMemref, ValueRange{}); Value getNLevelsMemref = quantizationAdaptor.getNlevels(); - auto getNlevels = rewriter.create(loc, getNLevelsMemref, ValueRange{}); - - Value MaxMinusMin = rewriter.create(loc, getMax ,getMin ); - Value StepSize = rewriter.create(loc, MaxMinusMin, getNlevels); + auto getNlevels = + rewriter.create(loc, getNLevelsMemref, ValueRange{}); + Value MaxMinusMin = rewriter.create(loc, getMax, getMin); + Value StepSize = + rewriter.create(loc, MaxMinusMin, getNlevels); // iterate for all the elements and calculate quantizedCoeff // GetLevelForVal = (a[i] - min)/step // RoundedVal = arith.FPToSI(GetLevelForVal) // QuantVal = RoundedVal * step + min_val - int64_t lb = 0 ; - int64_t ub = tensorType.getShape()[0]; + int64_t lb = 0; + int64_t ub = tensorType.getShape()[0]; int64_t step = 1; DEBUG_PRINT_NO_ARGS(); - - //for loop from 0 to len - // use iter_arg as passing value for the loop - affine::AffineForOp forOpY = rewriter.create(loc, lb, ub, step); + + // for loop from 0 to len + // use iter_arg as passing value for the loop + affine::AffineForOp forOpY = + rewriter.create(loc, lb, ub, step); auto ivY = forOpY.getInductionVar(); rewriter.setInsertionPointToStart(forOpY.getBody()); - //Use iter_arg for taking prev_val - //Get iter_arg - + // Use iter_arg for taking prev_val + // Get iter_arg + // GetLevelForVal = (a[i] - min)/step - - // QuantVal = RoundedVal * step + min_val - Value inputX = rewriter.create(loc, quantizationAdaptor.getInput(), ivY ); - Value inputMinusMin = rewriter.create(loc, inputX, getMin ); - Value aMinusMinDivStep = rewriter.create(loc, inputMinusMin, StepSize ); + // QuantVal = RoundedVal * step + min_val + + Value inputX = + rewriter.create(loc, quantizationAdaptor.getInput(), ivY); + Value inputMinusMin = rewriter.create(loc, inputX, getMin); + Value aMinusMinDivStep = + rewriter.create(loc, inputMinusMin, StepSize); // RoundedVal = arith.FPToSI(GetLevelForVal) - Value RoundedVal = rewriter.create(loc,rewriter.getI64Type(), aMinusMinDivStep); - Value RoundValFloat = rewriter.create(loc, rewriter.getF64Type() , RoundedVal); + Value RoundedVal = rewriter.create( + loc, rewriter.getI64Type(), aMinusMinDivStep); + Value RoundValFloat = rewriter.create( + loc, rewriter.getF64Type(), RoundedVal); // QuantVal = RoundedVal * step + min_val - Value RoundedMulStep = rewriter.create(loc, RoundValFloat , StepSize); - Value QuantVal = rewriter.create(loc, RoundedMulStep, getMin); - rewriter.create(loc, QuantVal, alloc, ValueRange{ivY}); + Value RoundedMulStep = + rewriter.create(loc, RoundValFloat, StepSize); + Value QuantVal = + rewriter.create(loc, RoundedMulStep, getMin); + rewriter.create(loc, QuantVal, alloc, ValueRange{ivY}); rewriter.setInsertionPointAfter(forOpY); - - //debug - // forOpY->dump(); - // affine.store %cst, %alloc_10[] : memref - // %0 = affine.load %alloc_11[4] : memref<10xf64> - // affine.store %0, %alloc[0] : memref<1xf64> - + + // debug + // forOpY->dump(); + // affine.store %cst, %alloc_10[] : memref + // %0 = affine.load %alloc_11[4] : memref<10xf64> + // affine.store %0, %alloc[0] : memref<1xf64> + rewriter.replaceOp(op, alloc); - + return success(); } }; - //===----------------------------------------------------------------------===// // ToyToAffine RewritePatterns: lmsFilter operations //===----------------------------------------------------------------------===// @@ -2572,20 +3101,20 @@ struct LMSFilterOpLowering : public ConversionPattern { matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { auto loc = op->getLoc(); - - //Pseudo-code: - // for (int n = 0; n < NUM_SAMPLES; n++) { - // // Calculate the filter output y[n] - // y[n] = 0; - // for (int i = 0; i < FILTER_LENGTH; i++) { - // if (n - i >= 0) { // affine if - // y[n] = y[n] + (w[i] * x[n - i]); - // } - // } - + + // Pseudo-code: + // for (int n = 0; n < NUM_SAMPLES; n++) { + // // Calculate the filter output y[n] + // y[n] = 0; + // for (int i = 0; i < FILTER_LENGTH; i++) { + // if (n - i >= 0) { // affine if + // y[n] = y[n] + (w[i] * x[n - i]); + // } + // } + // // Calculate the error e[n] // e[n] = d[n] - y[n]; - + // // Update the filter weights w[i] // for (int i = 0; i < FILTER_LENGTH; i++) { // if (n - i >= 0) { @@ -2594,59 +3123,64 @@ struct LMSFilterOpLowering : public ConversionPattern { // } // } - auto tensorType = llvm::cast((*op->result_type_begin())); - - //allocation & deallocation for the result of this operation + auto tensorType = llvm::cast((*op->result_type_begin())); + + // allocation & deallocation for the result of this operation auto memRefType = convertTensorToMemRef(tensorType); auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); - //construct affine loops for the input - SmallVector lowerBounds(tensorType.getRank(), /*Value*/0); - SmallVector steps(tensorType.getRank(), /*Value=*/1); + // construct affine loops for the input + SmallVector lowerBounds(tensorType.getRank(), /*Value*/ 0); + SmallVector steps(tensorType.getRank(), /*Value=*/1); LMSFilterOpAdaptor lmsFilterAdaptor(operands); - // Value alpha = rewriter.create(loc, rewriter.getF64Type(), + // Value alpha = rewriter.create(loc, + // rewriter.getF64Type(), // rewriter.getF64FloatAttr(1)); - Value zeroval = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(0)); - Value mu = rewriter.create(loc, lmsFilterAdaptor.getMu()); - - //For loop -- iterate from 0 to last - int64_t lb = 0 ; + Value zeroval = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + Value mu = rewriter.create(loc, lmsFilterAdaptor.getMu()); + + // For loop -- iterate from 0 to last + int64_t lb = 0; int64_t numSamples = tensorType.getShape()[0]; int64_t step = 1; Value GetFilterLOp = op->getOperand(3); - dsp::ConstantOp constantOp3rdArg = GetFilterLOp.getDefiningOp(); - DenseElementsAttr constant3rdValue = constantOp3rdArg.getValue();; + dsp::ConstantOp constantOp3rdArg = + GetFilterLOp.getDefiningOp(); + DenseElementsAttr constant3rdValue = constantOp3rdArg.getValue(); + ; auto elements1 = constant3rdValue.getValues(); float filterlenval = elements1[0].getValueAsDouble(); - auto FilterLength = (uint64_t) filterlenval; + auto FilterLength = (uint64_t)filterlenval; Value GetItersLOp = op->getOperand(4); - dsp::ConstantOp constantOp4thArg = GetItersLOp.getDefiningOp(); - DenseElementsAttr constant4thValue = constantOp4thArg.getValue();; + dsp::ConstantOp constantOp4thArg = + GetItersLOp.getDefiningOp(); + DenseElementsAttr constant4thValue = constantOp4thArg.getValue(); + ; auto elements = constant4thValue.getValues(); float interationsval = elements[0].getValueAsDouble(); - auto TotalIterations = (uint64_t) interationsval; - - - + auto TotalIterations = (uint64_t)interationsval; + auto yMemRefType = MemRefType::get({numSamples}, rewriter.getF64Type()); auto yAlloc = rewriter.create(loc, yMemRefType); - affine::AffineForOp forOpiter = rewriter.create(loc, lb, TotalIterations, step); + affine::AffineForOp forOpiter = + rewriter.create(loc, lb, TotalIterations, step); rewriter.setInsertionPointToStart(forOpiter.getBody()); - affine::AffineForOp forOp1 = rewriter.create(loc, lb, numSamples, step); + affine::AffineForOp forOp1 = + rewriter.create(loc, lb, numSamples, step); auto iv = forOp1.getInductionVar(); - rewriter.setInsertionPointToStart(forOp1.getBody()); - //For affine expression: #map1 = affine_map<(%arg0)[] : (%arg0 - 1) + // For affine expression: #map1 = affine_map<(%arg0)[] : (%arg0 - 1) AffineExpr d0, d1, s0; bindDims(rewriter.getContext(), d0, d1); - // AffineExpr ExprForXSlice = rewriter.getAffineDimExpr(0) - rewriter.getAffineDimExpr(1); //d0 - d1; + // AffineExpr ExprForXSlice = rewriter.getAffineDimExpr(0) - + // rewriter.getAffineDimExpr(1); //d0 - d1; AffineExpr ExprForXSlice = d0 - d1; AffineMap addMapForLMSFilter = AffineMap::get(2, 0, ExprForXSlice); IntegerSet set1 = IntegerSet::get(2, 0, {ExprForXSlice}, {false}); @@ -2655,72 +3189,74 @@ struct LMSFilterOpLowering : public ConversionPattern { // rewriter.create(loc, zeroval, alloc, ValueRange{iv}); // Allocate and initialize array for y // Value constantIndx0 = rewriter.create(loc, 0); - + rewriter.create(loc, zeroval, yAlloc, ValueRange{iv}); - affine::AffineForOp forOp2 = rewriter.create(loc, lb, FilterLength, step); + affine::AffineForOp forOp2 = + rewriter.create(loc, lb, FilterLength, step); auto iv2 = forOp2.getInductionVar(); rewriter.setInsertionPointToStart(forOp2.getBody()); - auto ifOp = rewriter.create( loc, set1 , ValueRange{iv,iv2} , false /*no else*/ ); + auto ifOp = rewriter.create( + loc, set1, ValueRange{iv, iv2}, false /*no else*/); rewriter.setInsertionPointToStart(ifOp.getThenBlock()); - Value inputX = rewriter.create(loc, lmsFilterAdaptor.getLhs(), addMapForLMSFilter, - ValueRange{iv,iv2}); - Value Prevw = rewriter.create(loc, alloc, - ValueRange{iv2}); //memRefType + Value inputX = + rewriter.create(loc, lmsFilterAdaptor.getLhs(), + addMapForLMSFilter, ValueRange{iv, iv2}); + Value Prevw = rewriter.create(loc, alloc, + ValueRange{iv2}); // memRefType - Value wmulx = rewriter.create(loc, inputX ,Prevw ); + Value wmulx = rewriter.create(loc, inputX, Prevw); Value ybefore = rewriter.create(loc, yAlloc, ValueRange{iv}); Value sumNext = rewriter.create(loc, wmulx, ybefore); rewriter.create(loc, sumNext, yAlloc, ValueRange{iv}); rewriter.setInsertionPointAfter(ifOp); rewriter.setInsertionPointAfter(forOp2); - // get e[n] = d[n] - y[n] - Value desiredX = rewriter.create(loc, lmsFilterAdaptor.getRhs(), ValueRange{iv}); - Value ynew = rewriter.create(loc, yAlloc, - ValueRange{iv}); - - Value err = rewriter.create(loc, desiredX ,ynew ); + Value desiredX = rewriter.create( + loc, lmsFilterAdaptor.getRhs(), ValueRange{iv}); + Value ynew = rewriter.create(loc, yAlloc, ValueRange{iv}); - + Value err = rewriter.create(loc, desiredX, ynew); - affine::AffineForOp forOp3 = rewriter.create(loc, lb, FilterLength, step); - auto iv3 = forOp3.getInductionVar(); - - rewriter.setInsertionPointToStart(forOp3.getBody()); - - auto ifOp2 = rewriter.create( loc, set1 , ValueRange{iv,iv3} , false /*no else*/ ); - rewriter.setInsertionPointToStart(ifOp2.getThenBlock()); - - Value inputX2 = rewriter.create(loc, lmsFilterAdaptor.getLhs(), addMapForLMSFilter, - ValueRange{iv,iv3}); - - Value Prevw2 = rewriter.create(loc, alloc, - ValueRange{iv3}); - - // f(u(n),e(n),μ)=μe(n)u∗(n) - Value mul1 = rewriter.create(loc, err ,inputX2 ); - Value mul2 = rewriter.create(loc, mu ,mul1 ); - - // FInal w[n] - Value answer = rewriter.create(loc, Prevw2 ,mul2 ); - - rewriter.create(loc, answer, alloc, ValueRange{iv3}); - rewriter.setInsertionPointAfter(ifOp2); - rewriter.setInsertionPointAfter(forOp3); - - rewriter.setInsertionPointAfter(forOp1); - rewriter.setInsertionPointAfter(forOpiter); - //debug - // forOp1->dump(); + affine::AffineForOp forOp3 = + rewriter.create(loc, lb, FilterLength, step); + auto iv3 = forOp3.getInductionVar(); + + rewriter.setInsertionPointToStart(forOp3.getBody()); + + auto ifOp2 = rewriter.create( + loc, set1, ValueRange{iv, iv3}, false /*no else*/); + rewriter.setInsertionPointToStart(ifOp2.getThenBlock()); + + Value inputX2 = + rewriter.create(loc, lmsFilterAdaptor.getLhs(), + addMapForLMSFilter, ValueRange{iv, iv3}); + + Value Prevw2 = rewriter.create(loc, alloc, ValueRange{iv3}); + + // f(u(n),e(n),μ)=μe(n)u∗(n) + Value mul1 = rewriter.create(loc, err, inputX2); + Value mul2 = rewriter.create(loc, mu, mul1); + + // FInal w[n] + Value answer = rewriter.create(loc, Prevw2, mul2); + + rewriter.create(loc, answer, alloc, ValueRange{iv3}); + rewriter.setInsertionPointAfter(ifOp2); + rewriter.setInsertionPointAfter(forOp3); + + rewriter.setInsertionPointAfter(forOp1); + rewriter.setInsertionPointAfter(forOpiter); + // debug + // forOp1->dump(); rewriter.replaceOp(op, alloc); - + return success(); } }; @@ -2729,7 +3265,6 @@ struct LMSFilterOpLowering : public ConversionPattern { // ToyToAffine RewritePatterns: Threshold operations //===----------------------------------------------------------------------===// - struct ThresholdOpLowering : public ConversionPattern { ThresholdOpLowering(MLIRContext *ctx) : ConversionPattern(dsp::ThresholdOp::getOperationName(), 1, ctx) {} @@ -2738,207 +3273,227 @@ struct ThresholdOpLowering : public ConversionPattern { matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { auto loc = op->getLoc(); - - //Pseudo-code: - // y[n] = a[n] , if a[i] >= threshld or, a[i] <= -threshld - // = 0 , else + + // Pseudo-code: + // y[n] = a[n] , if a[i] >= threshld or, a[i] <= -threshld + // = 0 , else DEBUG_PRINT_NO_ARGS(); - //output for result type - auto tensorType = llvm::cast((*op->result_type_begin())); - - //allocation & deallocation for the result of this operation + // output for result type + auto tensorType = llvm::cast((*op->result_type_begin())); + + // allocation & deallocation for the result of this operation auto memRefType = convertTensorToMemRef(tensorType); auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); - Value constant0 = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(0)); + Value constant0 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + // y[n] = a[n] , if a[i] >= threshld or, a[i] <= -threshld + // loop from 0 to len - //y[n] = a[n] , if a[i] >= threshld or, a[i] <= -threshld - //loop from 0 to len - - //load from X, + // load from X, ThresholdOpAdaptor thresholdAdaptor(operands); DEBUG_PRINT_NO_ARGS(); - int64_t lb = 0 ; - int64_t ub = tensorType.getShape()[0]; + int64_t lb = 0; + int64_t ub = tensorType.getShape()[0]; int64_t step = 1; DEBUG_PRINT_NO_ARGS(); - - //for loop from 0 to len(Output) - affine::AffineForOp forOpY = rewriter.create(loc, lb, ub, step); + + // for loop from 0 to len(Output) + affine::AffineForOp forOpY = + rewriter.create(loc, lb, ub, step); auto ivY = forOpY.getInductionVar(); rewriter.setInsertionPointToStart(forOpY.getBody()); - - Value inputX = rewriter.create(loc, thresholdAdaptor.getInput(), ivY ); - + + Value inputX = + rewriter.create(loc, thresholdAdaptor.getInput(), ivY); + // Load the threshold value from the memref auto thresholdMemRef = thresholdAdaptor.getThreshld(); - auto threshold = rewriter.create(loc, thresholdMemRef, ValueRange{}); + auto threshold = + rewriter.create(loc, thresholdMemRef, ValueRange{}); // Compare a[i] <= threshold - auto cmp1 = rewriter.create(loc, arith::CmpFPredicate::OLE, inputX, threshold); - + auto cmp1 = rewriter.create(loc, arith::CmpFPredicate::OLE, + inputX, threshold); + // Compare a[i] >= -threshold auto negThreshold = rewriter.create(loc, threshold); - auto cmp2 = rewriter.create(loc, arith::CmpFPredicate::OGE, inputX, negThreshold); + auto cmp2 = rewriter.create(loc, arith::CmpFPredicate::OGE, + inputX, negThreshold); // Combine the comparisons using AND auto cmpAnd = rewriter.create(loc, cmp1, cmp2); // Use select to choose between 0 and a[i] - auto selectOp = rewriter.create(loc, cmpAnd, constant0, inputX); + auto selectOp = + rewriter.create(loc, cmpAnd, constant0, inputX); // Store the result rewriter.create(loc, selectOp, alloc, ivY); rewriter.setInsertionPointAfter(forOpY); - //debug - // forOpY->dump(); - // affine.store %cst, %alloc_10[] : memref - // %0 = affine.load %alloc_11[4] : memref<10xf64> - // affine.store %0, %alloc[0] : memref<1xf64> - + // debug + // forOpY->dump(); + // affine.store %cst, %alloc_10[] : memref + // %0 = affine.load %alloc_11[4] : memref<10xf64> + // affine.store %0, %alloc[0] : memref<1xf64> + rewriter.replaceOp(op, alloc); - + return success(); } }; - - //===----------------------------------------------------------------------===// // ToyToAffine RewritePatterns: HighPassFIRHammingOptimizedOp operations //===----------------------------------------------------------------------===// struct HighPassFIRHammingOptimizedOpLowering : public ConversionPattern { HighPassFIRHammingOptimizedOpLowering(MLIRContext *ctx) - : ConversionPattern(dsp::HighPassFIRHammingOptimizedOp::getOperationName(), 1, ctx) {} + : ConversionPattern( + dsp::HighPassFIRHammingOptimizedOp::getOperationName(), 1, ctx) {} LogicalResult matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { auto loc = op->getLoc(); - - //Pseudo-code: - // y_highFIRHamming[n] = -1 * [wc/pi * sinc(wc * (n- (N-1)/2))] * [0.54 - 0.46 cos(2 *pi * n/N-1)], 0<= n < (N-1)/2 : - // = 1 - wc/pi , n = (N-1)/2 - // and also, y_FIRHamming[N-1-n] = y[n] ie, store at n and also at N-1-n + // Pseudo-code: + // y_highFIRHamming[n] = -1 * [wc/pi * sinc(wc * (n- (N-1)/2))] * [0.54 - + // 0.46 cos(2 *pi * n/N-1)], 0<= n < (N-1)/2 : = 1 - wc/pi , n = (N-1)/2 - // 1 loops : first from 0 <= n < (N-1)/2 - 1 - // + // and also, y_FIRHamming[N-1-n] = y[n] ie, store at n and also at N-1-n - //output for result type - auto tensorType = llvm::cast((*op->result_type_begin())); - - //allocation & deallocation for the result of this operation + // 1 loops : first from 0 <= n < (N-1)/2 - 1 + // + + // output for result type + auto tensorType = llvm::cast((*op->result_type_begin())); + + // allocation & deallocation for the result of this operation auto memRefType = convertTensorToMemRef(tensorType); auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); - - //construct affine loops for the input - SmallVector lowerBounds(tensorType.getRank(), /*Value*/0); - SmallVector steps(tensorType.getRank(), /*Value=*/1); - - - //first from 0 <= i < (N-1)/2 - 1 - int64_t lb = 0 ; - int64_t N = tensorType.getShape()[0]; - int64_t ub = (N-1) / 2 ; + + // construct affine loops for the input + SmallVector lowerBounds(tensorType.getRank(), /*Value*/ 0); + SmallVector steps(tensorType.getRank(), /*Value=*/1); + + // first from 0 <= i < (N-1)/2 - 1 + int64_t lb = 0; + int64_t N = tensorType.getShape()[0]; + int64_t ub = (N - 1) / 2; int64_t step = 1; DEBUG_PRINT_NO_ARGS(); - HighPassFIRHammingOptimizedOpAdaptor highPassFIRHammingOptimizedOpAdaptor(operands); - //Handle middle y[mid] = wc / pi - int64_t midIndx = ub ; - Value constantIndxMid = rewriter.create(loc, midIndx); - // rewriter.create(loc, constant0, alloc, ValueRange{constantIndx0}); - Value wc = rewriter.create(loc, highPassFIRHammingOptimizedOpAdaptor.getWc(), ValueRange{}); - Value constant1 = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(1)); - Value constantMinus1 = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(-1)); - Value constpi = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(3.14159265359)); + HighPassFIRHammingOptimizedOpAdaptor highPassFIRHammingOptimizedOpAdaptor( + operands); + // Handle middle y[mid] = wc / pi + int64_t midIndx = ub; + Value constantIndxMid = + rewriter.create(loc, midIndx); + // rewriter.create(loc, constant0, alloc, + // ValueRange{constantIndx0}); + Value wc = rewriter.create( + loc, highPassFIRHammingOptimizedOpAdaptor.getWc(), ValueRange{}); + Value constant1 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1)); + Value constantMinus1 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(-1)); + Value constpi = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(3.14159265359)); Value wcByPi = rewriter.create(loc, wc, constpi); - Value OneMinusWcByPi = rewriter.create(loc, constant1, wcByPi); - rewriter.create(loc, OneMinusWcByPi, alloc, ValueRange{constantIndxMid}); - - //first from 0 <= i < (N-1)/2 - 1 - - //calculate i-(N-1)/2 - - Value Nminus1By2 = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr((float) ub)); - - //calculate 0.54 - 0.46 cos(2 *pi * n/N-1) - Value constant0_54 = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(0.54)); - Value constant0_46 = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(0.46)); - Value const2pi = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(6.28318530718)); - Value NMinus1 = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr((float) N - 1)); - - affine::AffineForOp forOpY = rewriter.create(loc, lb, ub, step); + Value OneMinusWcByPi = + rewriter.create(loc, constant1, wcByPi); + rewriter.create(loc, OneMinusWcByPi, alloc, + ValueRange{constantIndxMid}); + + // first from 0 <= i < (N-1)/2 - 1 + + // calculate i-(N-1)/2 + + Value Nminus1By2 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr((float)ub)); + + // calculate 0.54 - 0.46 cos(2 *pi * n/N-1) + Value constant0_54 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0.54)); + Value constant0_46 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0.46)); + Value const2pi = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(6.28318530718)); + Value NMinus1 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr((float)N - 1)); + + affine::AffineForOp forOpY = + rewriter.create(loc, lb, ub, step); auto ivY = forOpY.getInductionVar(); rewriter.setInsertionPointToStart(forOpY.getBody()); - //convert index to f64 - Value IndxY = rewriter.create(loc, rewriter.getIntegerType(32), ivY); - Value i = rewriter.create(loc, rewriter.getF64Type(), IndxY); + // convert index to f64 + Value IndxY = rewriter.create( + loc, rewriter.getIntegerType(32), ivY); + Value i = + rewriter.create(loc, rewriter.getF64Type(), IndxY); - //get sin(wc * (i - (N-1)/ 2)) + // get sin(wc * (i - (N-1)/ 2)) Value iMinusMid = rewriter.create(loc, i, Nminus1By2); - Value mulwc_iMinusMid = rewriter.create(loc, wc , iMinusMid); + Value mulwc_iMinusMid = rewriter.create(loc, wc, iMinusMid); Value GetSin = rewriter.create(loc, mulwc_iMinusMid); - + // sin(wc*(i-(N-1)/2)) / pi * (i-(N-1)/2) - Value piMuliMinusMid = rewriter.create(loc, constpi , iMinusMid); - Value GetDiv = rewriter.create(loc, GetSin ,piMuliMinusMid) ; + Value piMuliMinusMid = + rewriter.create(loc, constpi, iMinusMid); + Value GetDiv = rewriter.create(loc, GetSin, piMuliMinusMid); // [sin(wc*(i-(N-1)/2)) / pi * (i-(N-1)/2)] * [0.54-0.46 cos(2*pi*i/N-1) - //get 2*pi * k / (N -1) - Value mul2pi_k = rewriter.create(loc, const2pi , i); - Value divIndxByNMinus1 = rewriter.create(loc, mul2pi_k, NMinus1 ) ; + // get 2*pi * k / (N -1) + Value mul2pi_k = rewriter.create(loc, const2pi, i); + Value divIndxByNMinus1 = + rewriter.create(loc, mul2pi_k, NMinus1); // get cos(2*pi * k/(N-1) Value GetCos = rewriter.create(loc, divIndxByNMinus1); - Value MulCos0_46 = rewriter.create(loc, constant0_46 , GetCos); - Value Sub0_54_Cos = rewriter.create(loc, constant0_54 ,MulCos0_46) ; - - //Multiply Sub0_54_Cos and GetDiv -- sin(wc*(i-(N-1)/2)) / pi * (i-(N-1)/2) - Value MulFilterHamming = rewriter.create(loc, GetDiv , Sub0_54_Cos); - Value MulByMinus1 = rewriter.create(loc, constantMinus1 ,MulFilterHamming) ; - rewriter.create(loc, MulByMinus1, alloc, ValueRange{ivY}); - - //also , store same value at N-1-i using affine-Map - //For affine expression: #map1 = affine_map<(%arg0)[N] : (N - 1 -%arg0) + Value MulCos0_46 = + rewriter.create(loc, constant0_46, GetCos); + Value Sub0_54_Cos = + rewriter.create(loc, constant0_54, MulCos0_46); + + // Multiply Sub0_54_Cos and GetDiv -- sin(wc*(i-(N-1)/2)) / pi * (i-(N-1)/2) + Value MulFilterHamming = + rewriter.create(loc, GetDiv, Sub0_54_Cos); + Value MulByMinus1 = + rewriter.create(loc, constantMinus1, MulFilterHamming); + rewriter.create(loc, MulByMinus1, alloc, ValueRange{ivY}); + + // also , store same value at N-1-i using affine-Map + // For affine expression: #map1 = affine_map<(%arg0)[N] : (N - 1 -%arg0) AffineExpr d0, s0; bindDims(rewriter.getContext(), d0); bindSymbols(rewriter.getContext(), s0); - //calulate N - 1 - i - AffineExpr ExprForNMinus1minusI = s0 - d0 ; - AffineMap addMapForNMinus1minusI = AffineMap::get(1, 1, ExprForNMinus1minusI); - - //store at N-1-i index , result - Value constantNMinus1Indx = rewriter.create(loc, N -1); - rewriter.create(loc, MulByMinus1, alloc, addMapForNMinus1minusI, - ValueRange{ivY,constantNMinus1Indx}); + // calulate N - 1 - i + AffineExpr ExprForNMinus1minusI = s0 - d0; + AffineMap addMapForNMinus1minusI = + AffineMap::get(1, 1, ExprForNMinus1minusI); + + // store at N-1-i index , result + Value constantNMinus1Indx = + rewriter.create(loc, N - 1); + rewriter.create(loc, MulByMinus1, alloc, + addMapForNMinus1minusI, + ValueRange{ivY, constantNMinus1Indx}); rewriter.setInsertionPointAfter(forOpY); - //debug - // forOpX->dump(); - // forOpY->dump(); - + // debug + // forOpX->dump(); + // forOpY->dump(); // affine.for %arg0 = 0 to 3 { // %12 = arith.index_castui %arg0 : index to i32 @@ -2959,11 +3514,10 @@ struct HighPassFIRHammingOptimizedOpLowering : public ConversionPattern { // affine.store %25, %alloc[-%arg0 + 6] : memref<7xf64> // } - - // } - // } + // } + // } rewriter.replaceOp(op, alloc); - + return success(); } }; @@ -2974,203 +3528,222 @@ struct HighPassFIRHammingOptimizedOpLowering : public ConversionPattern { struct FIRFilterHammingOptimizedOpLowering : public ConversionPattern { FIRFilterHammingOptimizedOpLowering(MLIRContext *ctx) - : ConversionPattern(dsp::FIRFilterHammingOptimizedOp::getOperationName(), 1, ctx) {} + : ConversionPattern(dsp::FIRFilterHammingOptimizedOp::getOperationName(), + 1, ctx) {} LogicalResult matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { auto loc = op->getLoc(); - - //Pseudo-code: - // y_FIRHamming[n] = [wc/pi * sinc(wc * (n- (N-1)/2))] * [0.54 - 0.46 cos(2 *pi * n/N-1)], 0<= n < (N-1)/2 : - // = wc/pi * 1 , n = (N-1)/2 - // and also, y_FIRHamming[N-1-n] = y[n] ie, store at n and also at N-1-n + // Pseudo-code: + // y_FIRHamming[n] = [wc/pi * sinc(wc * (n- (N-1)/2))] * [0.54 - 0.46 + // cos(2 *pi * n/N-1)], 0<= n < (N-1)/2 : + // = wc/pi * 1 , n = (N-1)/2 + + // and also, y_FIRHamming[N-1-n] = y[n] ie, store at n and also at N-1-n - // 1 loops : first from 0 <= n < (N-1)/2 - 1 - // + // 1 loops : first from 0 <= n < (N-1)/2 - 1 + // + + // output for result type + auto tensorType = llvm::cast((*op->result_type_begin())); - //output for result type - auto tensorType = llvm::cast((*op->result_type_begin())); - - //allocation & deallocation for the result of this operation + // allocation & deallocation for the result of this operation auto memRefType = convertTensorToMemRef(tensorType); auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); - - //construct affine loops for the input - SmallVector lowerBounds(tensorType.getRank(), /*Value*/0); - SmallVector steps(tensorType.getRank(), /*Value=*/1); - - - //first from 0 <= i < (N-1)/2 - 1 - int64_t lb = 0 ; - int64_t N = tensorType.getShape()[0]; - int64_t ub = (N-1) / 2 ; + + // construct affine loops for the input + SmallVector lowerBounds(tensorType.getRank(), /*Value*/ 0); + SmallVector steps(tensorType.getRank(), /*Value=*/1); + + // first from 0 <= i < (N-1)/2 - 1 + int64_t lb = 0; + int64_t N = tensorType.getShape()[0]; + int64_t ub = (N - 1) / 2; int64_t step = 1; DEBUG_PRINT_NO_ARGS(); - FIRFilterHammingOptimizedOpAdaptor firFilterHammingOptimizedOpAdaptor(operands); - //Handle middle y[mid] = wc / pi - int64_t midIndx = ub ; - Value constantIndxMid = rewriter.create(loc, midIndx); - // rewriter.create(loc, constant0, alloc, ValueRange{constantIndx0}); - Value wc = rewriter.create(loc, firFilterHammingOptimizedOpAdaptor.getWc(), ValueRange{}); - - Value constpi = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(3.14159265359)); + FIRFilterHammingOptimizedOpAdaptor firFilterHammingOptimizedOpAdaptor( + operands); + // Handle middle y[mid] = wc / pi + int64_t midIndx = ub; + Value constantIndxMid = + rewriter.create(loc, midIndx); + // rewriter.create(loc, constant0, alloc, + // ValueRange{constantIndx0}); + Value wc = rewriter.create( + loc, firFilterHammingOptimizedOpAdaptor.getWc(), ValueRange{}); + + Value constpi = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(3.14159265359)); Value wcByPi = rewriter.create(loc, wc, constpi); - rewriter.create(loc, wcByPi, alloc, ValueRange{constantIndxMid}); + rewriter.create(loc, wcByPi, alloc, + ValueRange{constantIndxMid}); + + // first from 0 <= i < (N-1)/2 - 1 - //first from 0 <= i < (N-1)/2 - 1 + // calculate i-(N-1)/2 - //calculate i-(N-1)/2 + Value Nminus1By2 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr((float)ub)); - Value Nminus1By2 = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr((float) ub)); - - //calculate 0.54 - 0.46 cos(2 *pi * n/N-1) - Value constant0_54 = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(0.54)); - Value constant0_46 = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(0.46)); - Value const2pi = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(6.28318530718)); - Value NMinus1 = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr((float) N - 1)); + // calculate 0.54 - 0.46 cos(2 *pi * n/N-1) + Value constant0_54 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0.54)); + Value constant0_46 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0.46)); + Value const2pi = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(6.28318530718)); + Value NMinus1 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr((float)N - 1)); - affine::AffineForOp forOpY = rewriter.create(loc, lb, ub, step); + affine::AffineForOp forOpY = + rewriter.create(loc, lb, ub, step); auto ivY = forOpY.getInductionVar(); rewriter.setInsertionPointToStart(forOpY.getBody()); - //convert index to f64 - Value IndxY = rewriter.create(loc, rewriter.getIntegerType(32), ivY); - Value i = rewriter.create(loc, rewriter.getF64Type(), IndxY); + // convert index to f64 + Value IndxY = rewriter.create( + loc, rewriter.getIntegerType(32), ivY); + Value i = + rewriter.create(loc, rewriter.getF64Type(), IndxY); - //get sin(wc * (i - (N-1)/ 2)) + // get sin(wc * (i - (N-1)/ 2)) Value iMinusMid = rewriter.create(loc, i, Nminus1By2); - Value mulwc_iMinusMid = rewriter.create(loc, wc , iMinusMid); + Value mulwc_iMinusMid = rewriter.create(loc, wc, iMinusMid); Value GetSin = rewriter.create(loc, mulwc_iMinusMid); - + // sin(wc*(i-(N-1)/2)) / pi * (i-(N-1)/2) - Value piMuliMinusMid = rewriter.create(loc, constpi , iMinusMid); - Value GetDiv = rewriter.create(loc, GetSin ,piMuliMinusMid) ; + Value piMuliMinusMid = + rewriter.create(loc, constpi, iMinusMid); + Value GetDiv = rewriter.create(loc, GetSin, piMuliMinusMid); // [sin(wc*(i-(N-1)/2)) / pi * (i-(N-1)/2)] * [0.54-0.46 cos(2*pi*i/N-1) - //get 2*pi * k / (N -1) - Value mul2pi_k = rewriter.create(loc, const2pi , i); - Value divIndxByNMinus1 = rewriter.create(loc, mul2pi_k, NMinus1 ) ; + // get 2*pi * k / (N -1) + Value mul2pi_k = rewriter.create(loc, const2pi, i); + Value divIndxByNMinus1 = + rewriter.create(loc, mul2pi_k, NMinus1); // get cos(2*pi * k/(N-1) Value GetCos = rewriter.create(loc, divIndxByNMinus1); - Value MulCos0_46 = rewriter.create(loc, constant0_46 , GetCos); - Value Sub0_54_Cos = rewriter.create(loc, constant0_54 ,MulCos0_46) ; - - //Multiply Sub0_54_Cos and GetDiv -- sin(wc*(i-(N-1)/2)) / pi * (i-(N-1)/2) - Value MulFilterHamming = rewriter.create(loc, GetDiv , Sub0_54_Cos); - rewriter.create(loc, MulFilterHamming, alloc, ValueRange{ivY}); - - //also , store same value at N-1-i using affine-Map - //For affine expression: #map1 = affine_map<(%arg0)[N] : (N - 1 -%arg0) + Value MulCos0_46 = + rewriter.create(loc, constant0_46, GetCos); + Value Sub0_54_Cos = + rewriter.create(loc, constant0_54, MulCos0_46); + + // Multiply Sub0_54_Cos and GetDiv -- sin(wc*(i-(N-1)/2)) / pi * (i-(N-1)/2) + Value MulFilterHamming = + rewriter.create(loc, GetDiv, Sub0_54_Cos); + rewriter.create(loc, MulFilterHamming, alloc, + ValueRange{ivY}); + + // also , store same value at N-1-i using affine-Map + // For affine expression: #map1 = affine_map<(%arg0)[N] : (N - 1 -%arg0) AffineExpr d0, s0; bindDims(rewriter.getContext(), d0); bindSymbols(rewriter.getContext(), s0); - //calulate N - 1 - i - AffineExpr ExprForNMinus1minusI = s0 - d0 ; - AffineMap addMapForNMinus1minusI = AffineMap::get(1, 1, ExprForNMinus1minusI); - - //store at N-1-i index , result - Value constantNMinus1Indx = rewriter.create(loc, N -1); - rewriter.create(loc, MulFilterHamming, alloc, addMapForNMinus1minusI, - ValueRange{ivY,constantNMinus1Indx}); + // calulate N - 1 - i + AffineExpr ExprForNMinus1minusI = s0 - d0; + AffineMap addMapForNMinus1minusI = + AffineMap::get(1, 1, ExprForNMinus1minusI); + + // store at N-1-i index , result + Value constantNMinus1Indx = + rewriter.create(loc, N - 1); + rewriter.create(loc, MulFilterHamming, alloc, + addMapForNMinus1minusI, + ValueRange{ivY, constantNMinus1Indx}); rewriter.setInsertionPointAfter(forOpY); - //debug - // forOpX->dump(); - // forOpY->dump(); - + // debug + // forOpX->dump(); + // forOpY->dump(); + + // %cst = arith.constant 6.2831853071800001 : f64 + // %cst_0 = arith.constant 4.600000e-01 : f64 + // %cst_1 = arith.constant 5.400000e-01 : f64 + // %cst_2 = arith.constant 4.000000e+00 : f64 + // %alloc = memref.alloc() : memref<4xf64> + // %alloc_3 = memref.alloc() : memref + // affine.store %cst_2, %alloc_3[] : memref + // affine.for %arg0 = 0 to 4 { + // %0 = arith.index_castui %arg0 : index to i32 + // %1 = arith.uitofp %0 : i32 to f64 + // %2 = arith.mulf %1, %cst : f64 + // %3 = arith.divf %2, %cst_2 : f64 + // %4 = math.cos %3 : f64 + // %5 = arith.mulf %4, %cst_0 : f64 + // %6 = arith.subf %cst_1, %5 : f64 + // affine.store %6, %alloc[%arg0] : memref<4xf64> + // } - // %cst = arith.constant 6.2831853071800001 : f64 - // %cst_0 = arith.constant 4.600000e-01 : f64 - // %cst_1 = arith.constant 5.400000e-01 : f64 - // %cst_2 = arith.constant 4.000000e+00 : f64 - // %alloc = memref.alloc() : memref<4xf64> - // %alloc_3 = memref.alloc() : memref - // affine.store %cst_2, %alloc_3[] : memref - // affine.for %arg0 = 0 to 4 { - // %0 = arith.index_castui %arg0 : index to i32 - // %1 = arith.uitofp %0 : i32 to f64 - // %2 = arith.mulf %1, %cst : f64 - // %3 = arith.divf %2, %cst_2 : f64 - // %4 = math.cos %3 : f64 - // %5 = arith.mulf %4, %cst_0 : f64 - // %6 = arith.subf %cst_1, %5 : f64 - // affine.store %6, %alloc[%arg0] : memref<4xf64> - // } - - - // } - // } + // } + // } rewriter.replaceOp(op, alloc); - + return success(); } }; - //===----------------------------------------------------------------------===// // ToyToAffine RewritePatterns: GetRangeOfVectorOp operations //===----------------------------------------------------------------------===// struct GetRangeOfVectorOpLowering : public ConversionPattern { GetRangeOfVectorOpLowering(MLIRContext *ctx) - : ConversionPattern(dsp::GetRangeOfVectorOp::getOperationName(), 1, ctx) {} + : ConversionPattern(dsp::GetRangeOfVectorOp::getOperationName(), 1, ctx) { + } LogicalResult matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { auto loc = op->getLoc(); - - //Pseudo-code: - // y[0] = first: - // y[i] = y[i-1] + step for 1<=i((*op->result_type_begin())); - - //allocation & deallocation for the result of this operation + + // Pseudo-code: + // y[0] = first: + // y[i] = y[i-1] + step for 1<=i((*op->result_type_begin())); + + // allocation & deallocation for the result of this operation auto memRefType = convertTensorToMemRef(tensorType); auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); - - //construct affine loops for the input - SmallVector lowerBounds(tensorType.getRank(), /*Value*/0); - SmallVector steps(tensorType.getRank(), /*Value=*/1); + + // construct affine loops for the input + SmallVector lowerBounds(tensorType.getRank(), /*Value*/ 0); + SmallVector steps(tensorType.getRank(), /*Value=*/1); GetRangeOfVectorOpAdaptor getRangeOfVectorOpOpAdaptor(operands); Value GetValueAtIndx2ndArg = op->getOperand(0); - dsp::ConstantOp constantOp2ndArg = GetValueAtIndx2ndArg.getDefiningOp(); - DenseElementsAttr constantRhsValue = constantOp2ndArg.getValue();; + dsp::ConstantOp constantOp2ndArg = + GetValueAtIndx2ndArg.getDefiningOp(); + DenseElementsAttr constantRhsValue = constantOp2ndArg.getValue(); + ; auto elements = constantRhsValue.getValues(); float FirstValue = elements[0].getValueAsDouble(); - - DEBUG_PRINT_WITH_ARGS("FirstValue is" , FirstValue); + + DEBUG_PRINT_WITH_ARGS("FirstValue is", FirstValue); Value GetStepOp = op->getOperand(2); - dsp::ConstantOp constantOp3rdArg = GetStepOp.getDefiningOp(); - DenseElementsAttr constant3rdValue = constantOp3rdArg.getValue();; + dsp::ConstantOp constantOp3rdArg = + GetStepOp.getDefiningOp(); + DenseElementsAttr constant3rdValue = constantOp3rdArg.getValue(); + ; auto elements1 = constant3rdValue.getValues(); float StepValue = elements1[0].getValueAsDouble(); - //first from 1 <= i < N - int64_t lb = 1 ; - int64_t ub = tensorType.getShape()[0]; + // first from 1 <= i < N + int64_t lb = 1; + int64_t ub = tensorType.getShape()[0]; // int64_t ub = (N-1) / 2 ; int64_t step = 1; @@ -3179,58 +3752,58 @@ struct GetRangeOfVectorOpLowering : public ConversionPattern { float valAtIndxI = FirstValue; Value constantIndx0 = rewriter.create(loc, 0); - Value constantFirst = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(valAtIndxI)); - Value constantStep = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(StepValue)); + Value constantFirst = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(valAtIndxI)); + Value constantStep = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(StepValue)); - rewriter.create(loc, constantFirst, alloc, ValueRange{constantIndx0}); + rewriter.create(loc, constantFirst, alloc, + ValueRange{constantIndx0}); - //loop from 1 <= i < N + // loop from 1 <= i < N - affine::AffineForOp forOpY = rewriter.create(loc, lb, ub, step, ValueRange{constantFirst}); + affine::AffineForOp forOpY = rewriter.create( + loc, lb, ub, step, ValueRange{constantFirst}); auto ivY = forOpY.getInductionVar(); rewriter.setInsertionPointToStart(forOpY.getBody()); - //Use iter_arg for taking prev_val - //Get iter_arg - auto getIterArg = forOpY.getBody()->getArgument(1); + // Use iter_arg for taking prev_val + // Get iter_arg + auto getIterArg = forOpY.getBody()->getArgument(1); // getIterArg.dump(); - Value sumNext = rewriter.create(loc, getIterArg,constantStep ); - rewriter.create(loc, sumNext, alloc, ValueRange{ivY}); + Value sumNext = + rewriter.create(loc, getIterArg, constantStep); + rewriter.create(loc, sumNext, alloc, ValueRange{ivY}); // llvm::errs() << "LINE " << __LINE__ << " file= " << __FILE__ << "\n" ; - rewriter.create(loc, ValueRange{sumNext} ); + rewriter.create(loc, ValueRange{sumNext}); rewriter.setInsertionPointAfter(forOpY); + // debug + // forOpX->dump(); + // forOpY->dump(); + + // %cst = arith.constant 6.2831853071800001 : f64 + // %cst_0 = arith.constant 4.600000e-01 : f64 + // %cst_1 = arith.constant 5.400000e-01 : f64 + // %cst_2 = arith.constant 4.000000e+00 : f64 + // %alloc = memref.alloc() : memref<4xf64> + // %alloc_3 = memref.alloc() : memref + // affine.store %cst_2, %alloc_3[] : memref + // affine.for %arg0 = 0 to 4 { + // %0 = arith.index_castui %arg0 : index to i32 + // %1 = arith.uitofp %0 : i32 to f64 + // %2 = arith.mulf %1, %cst : f64 + // %3 = arith.divf %2, %cst_2 : f64 + // %4 = math.cos %3 : f64 + // %5 = arith.mulf %4, %cst_0 : f64 + // %6 = arith.subf %cst_1, %5 : f64 + // affine.store %6, %alloc[%arg0] : memref<4xf64> + // } - //debug - // forOpX->dump(); - // forOpY->dump(); - - - // %cst = arith.constant 6.2831853071800001 : f64 - // %cst_0 = arith.constant 4.600000e-01 : f64 - // %cst_1 = arith.constant 5.400000e-01 : f64 - // %cst_2 = arith.constant 4.000000e+00 : f64 - // %alloc = memref.alloc() : memref<4xf64> - // %alloc_3 = memref.alloc() : memref - // affine.store %cst_2, %alloc_3[] : memref - // affine.for %arg0 = 0 to 4 { - // %0 = arith.index_castui %arg0 : index to i32 - // %1 = arith.uitofp %0 : i32 to f64 - // %2 = arith.mulf %1, %cst : f64 - // %3 = arith.divf %2, %cst_2 : f64 - // %4 = math.cos %3 : f64 - // %5 = arith.mulf %4, %cst_0 : f64 - // %6 = arith.subf %cst_1, %5 : f64 - // affine.store %6, %alloc[%arg0] : memref<4xf64> - // } - - - // } - // } + // } + // } rewriter.replaceOp(op, alloc); - + return success(); } }; @@ -3241,283 +3814,309 @@ struct GetRangeOfVectorOpLowering : public ConversionPattern { struct HighPassFIRFilterOpLowering : public ConversionPattern { HighPassFIRFilterOpLowering(MLIRContext *ctx) - : ConversionPattern(dsp::HighPassFIRFilterOp::getOperationName(), 1, ctx) {} + : ConversionPattern(dsp::HighPassFIRFilterOp::getOperationName(), 1, + ctx) {} LogicalResult matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { auto loc = op->getLoc(); - - //Pseudo-code: - // y_lpf[n] = wc/pi * sinc(wc * (n- (N-1)/2)) , n!= (N-1)/2 : - // = wc/pi , n = (N-1)/2 - // y_hpf[n] = dirac(n- (N-1)/2) - y_lpf[n] = -1 * wc/pi * sinc(wc * (n- (N-1)/2)) , n!= (N-1)/2 : - // = 1 - wc/pi , n = (N-1)/2 - - // 2 loops : first from 0 <= n <= (N-1)/2 - 1 - // 2nd from (N-1)/2 +1 <= n < N - - //output for result type - auto tensorType = llvm::cast((*op->result_type_begin())); - - //allocation & deallocation for the result of this operation + + // Pseudo-code: + // y_lpf[n] = wc/pi * sinc(wc * (n- (N-1)/2)) , n!= (N-1)/2 : + // = wc/pi , n = (N-1)/2 + // y_hpf[n] = dirac(n- (N-1)/2) - y_lpf[n] = -1 * wc/pi * sinc(wc * (n- + // (N-1)/2)) , n!= (N-1)/2 : + // = 1 - wc/pi , n = (N-1)/2 + + // 2 loops : first from 0 <= n <= (N-1)/2 - 1 + // 2nd from (N-1)/2 +1 <= n < N + + // output for result type + auto tensorType = llvm::cast((*op->result_type_begin())); + + // allocation & deallocation for the result of this operation auto memRefType = convertTensorToMemRef(tensorType); auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); - - //construct affine loops for the input - SmallVector lowerBounds(tensorType.getRank(), /*Value*/0); - SmallVector steps(tensorType.getRank(), /*Value=*/1); - - - //first from 0 <= i <= (N-1)/2 - 1 - int64_t lb = 0 ; - int64_t N = tensorType.getShape()[0]; - int64_t ub = (N-1) / 2 ; + + // construct affine loops for the input + SmallVector lowerBounds(tensorType.getRank(), /*Value*/ 0); + SmallVector steps(tensorType.getRank(), /*Value=*/1); + + // first from 0 <= i <= (N-1)/2 - 1 + int64_t lb = 0; + int64_t N = tensorType.getShape()[0]; + int64_t ub = (N - 1) / 2; int64_t step = 1; DEBUG_PRINT_NO_ARGS(); HighPassFIRFilterOpAdaptor highPassfirFilterOpAdaptor(operands); - //Handle middle y[mid] = wc / pi - int64_t midIndx = ub ; - Value constantIndxMid = rewriter.create(loc, midIndx); - // rewriter.create(loc, constant0, alloc, ValueRange{constantIndx0}); - Value constant1 = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(1)); - Value constantMinus1 = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(-1)); - - Value wc = rewriter.create(loc, highPassfirFilterOpAdaptor.getWc(), ValueRange{}); - - Value constpi = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(3.14159265359)); + // Handle middle y[mid] = wc / pi + int64_t midIndx = ub; + Value constantIndxMid = + rewriter.create(loc, midIndx); + // rewriter.create(loc, constant0, alloc, + // ValueRange{constantIndx0}); + Value constant1 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1)); + Value constantMinus1 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(-1)); + + Value wc = rewriter.create( + loc, highPassfirFilterOpAdaptor.getWc(), ValueRange{}); + + Value constpi = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(3.14159265359)); Value wcByPi = rewriter.create(loc, wc, constpi); - Value OneMinusWcByPi = rewriter.create(loc, constant1, wcByPi); - rewriter.create(loc, OneMinusWcByPi, alloc, ValueRange{constantIndxMid}); - - //first from 0 <= i <= (N-1)/2 - 1 - - //calculate i-(N-1)/2 - Value Nminus1By2 = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr((float) ub)); - affine::AffineForOp forOpY = rewriter.create(loc, lb, ub, step); + Value OneMinusWcByPi = + rewriter.create(loc, constant1, wcByPi); + rewriter.create(loc, OneMinusWcByPi, alloc, + ValueRange{constantIndxMid}); + + // first from 0 <= i <= (N-1)/2 - 1 + + // calculate i-(N-1)/2 + Value Nminus1By2 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr((float)ub)); + affine::AffineForOp forOpY = + rewriter.create(loc, lb, ub, step); auto ivY = forOpY.getInductionVar(); rewriter.setInsertionPointToStart(forOpY.getBody()); - //convert index to f64 - Value IndxY = rewriter.create(loc, rewriter.getIntegerType(32), ivY); - Value i = rewriter.create(loc, rewriter.getF64Type(), IndxY); + // convert index to f64 + Value IndxY = rewriter.create( + loc, rewriter.getIntegerType(32), ivY); + Value i = + rewriter.create(loc, rewriter.getF64Type(), IndxY); - //get sin(wc * (i - (N-1)/ 2)) + // get sin(wc * (i - (N-1)/ 2)) Value iMinusMid = rewriter.create(loc, i, Nminus1By2); - Value mulwc_iMinusMid = rewriter.create(loc, wc , iMinusMid); + Value mulwc_iMinusMid = rewriter.create(loc, wc, iMinusMid); Value GetSin = rewriter.create(loc, mulwc_iMinusMid); - + // get sin(wc*i) / pi * i - Value piMuliMinusMid = rewriter.create(loc, constpi , iMinusMid); - Value GetDiv = rewriter.create(loc, GetSin ,piMuliMinusMid) ; - Value MulByMinus1 = rewriter.create(loc, constantMinus1 ,GetDiv) ; - rewriter.create(loc, MulByMinus1, alloc, ValueRange{ivY}); + Value piMuliMinusMid = + rewriter.create(loc, constpi, iMinusMid); + Value GetDiv = rewriter.create(loc, GetSin, piMuliMinusMid); + Value MulByMinus1 = + rewriter.create(loc, constantMinus1, GetDiv); + rewriter.create(loc, MulByMinus1, alloc, ValueRange{ivY}); // llvm::errs() << "LINE " << __LINE__ << " file= " << __FILE__ << "\n" ; rewriter.setInsertionPointAfter(forOpY); - //2nd loop from (N-1)/2 + 1 <= i < N - lb = ub + 1 ; - ub = N ; + // 2nd loop from (N-1)/2 + 1 <= i < N + lb = ub + 1; + ub = N; - affine::AffineForOp forOp1 = rewriter.create(loc, lb, ub, step); + affine::AffineForOp forOp1 = + rewriter.create(loc, lb, ub, step); auto iv1 = forOp1.getInductionVar(); rewriter.setInsertionPointToStart(forOp1.getBody()); - //convert index to f64 - Value Indx1 = rewriter.create(loc, rewriter.getIntegerType(32), iv1); - Value i1 = rewriter.create(loc, rewriter.getF64Type(), Indx1); + // convert index to f64 + Value Indx1 = rewriter.create( + loc, rewriter.getIntegerType(32), iv1); + Value i1 = + rewriter.create(loc, rewriter.getF64Type(), Indx1); - //get sin(wc * (i1 - (N-1)/ 2)) + // get sin(wc * (i1 - (N-1)/ 2)) Value iMinusMid1 = rewriter.create(loc, i1, Nminus1By2); - Value mulwc_iMinusMid1 = rewriter.create(loc, wc , iMinusMid1); + Value mulwc_iMinusMid1 = + rewriter.create(loc, wc, iMinusMid1); Value GetSin1 = rewriter.create(loc, mulwc_iMinusMid1); - //get sin(i1 - (N-1)/ 2) / (i1 - (N-1)/ 2) * pi - // get sin(wc*i1) / pi * i1 + // get sin(i1 - (N-1)/ 2) / (i1 - (N-1)/ 2) * pi + // get sin(wc*i1) / pi * i1 - Value piMuliMinusMid1 = rewriter.create(loc, constpi , iMinusMid1); - Value GetDiv1 = rewriter.create(loc, GetSin1 ,piMuliMinusMid1) ; + Value piMuliMinusMid1 = + rewriter.create(loc, constpi, iMinusMid1); + Value GetDiv1 = + rewriter.create(loc, GetSin1, piMuliMinusMid1); - Value GetDiv1MulNeg1 = rewriter.create(loc, constantMinus1 ,GetDiv1) ; + Value GetDiv1MulNeg1 = + rewriter.create(loc, constantMinus1, GetDiv1); - rewriter.create(loc, GetDiv1MulNeg1, alloc, ValueRange{iv1}); + rewriter.create(loc, GetDiv1MulNeg1, alloc, ValueRange{iv1}); // llvm::errs() << "LINE " << __LINE__ << " file= " << __FILE__ << "\n" ; rewriter.setInsertionPointAfter(forOp1); - //debug - // forOpX->dump(); - // forOpY->dump(); - + // debug + // forOpX->dump(); + // forOpY->dump(); + + // %cst = arith.constant 6.2831853071800001 : f64 + // %cst_0 = arith.constant 4.600000e-01 : f64 + // %cst_1 = arith.constant 5.400000e-01 : f64 + // %cst_2 = arith.constant 4.000000e+00 : f64 + // %alloc = memref.alloc() : memref<4xf64> + // %alloc_3 = memref.alloc() : memref + // affine.store %cst_2, %alloc_3[] : memref + // affine.for %arg0 = 0 to 4 { + // %0 = arith.index_castui %arg0 : index to i32 + // %1 = arith.uitofp %0 : i32 to f64 + // %2 = arith.mulf %1, %cst : f64 + // %3 = arith.divf %2, %cst_2 : f64 + // %4 = math.cos %3 : f64 + // %5 = arith.mulf %4, %cst_0 : f64 + // %6 = arith.subf %cst_1, %5 : f64 + // affine.store %6, %alloc[%arg0] : memref<4xf64> + // } - // %cst = arith.constant 6.2831853071800001 : f64 - // %cst_0 = arith.constant 4.600000e-01 : f64 - // %cst_1 = arith.constant 5.400000e-01 : f64 - // %cst_2 = arith.constant 4.000000e+00 : f64 - // %alloc = memref.alloc() : memref<4xf64> - // %alloc_3 = memref.alloc() : memref - // affine.store %cst_2, %alloc_3[] : memref - // affine.for %arg0 = 0 to 4 { - // %0 = arith.index_castui %arg0 : index to i32 - // %1 = arith.uitofp %0 : i32 to f64 - // %2 = arith.mulf %1, %cst : f64 - // %3 = arith.divf %2, %cst_2 : f64 - // %4 = math.cos %3 : f64 - // %5 = arith.mulf %4, %cst_0 : f64 - // %6 = arith.subf %cst_1, %5 : f64 - // affine.store %6, %alloc[%arg0] : memref<4xf64> - // } - - - // } - // } + // } + // } rewriter.replaceOp(op, alloc); - + return success(); } }; - //===----------------------------------------------------------------------===// // ToyToAffine RewritePatterns: LowPassFIRFilterOp operations //===----------------------------------------------------------------------===// struct LowPassFIRFilterOpLowering : public ConversionPattern { LowPassFIRFilterOpLowering(MLIRContext *ctx) - : ConversionPattern(dsp::LowPassFIRFilterOp::getOperationName(), 1, ctx) {} + : ConversionPattern(dsp::LowPassFIRFilterOp::getOperationName(), 1, ctx) { + } LogicalResult matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { auto loc = op->getLoc(); - - //Pseudo-code: - // y_lpf[n] = wc/pi * sinc(wc * (n- (N-1)/2)) , n!= (N-1)/2 : - // = wc/pi , n = (N-1)/2 - - // 2 loops : first from 0 <= n <= (N-1)/2 - 1 - // 2nd from (N-1)/2 +1 <= n < N - - //output for result type - auto tensorType = llvm::cast((*op->result_type_begin())); - - //allocation & deallocation for the result of this operation + + // Pseudo-code: + // y_lpf[n] = wc/pi * sinc(wc * (n- (N-1)/2)) , n!= (N-1)/2 : + // = wc/pi , n = (N-1)/2 + + // 2 loops : first from 0 <= n <= (N-1)/2 - 1 + // 2nd from (N-1)/2 +1 <= n < N + + // output for result type + auto tensorType = llvm::cast((*op->result_type_begin())); + + // allocation & deallocation for the result of this operation auto memRefType = convertTensorToMemRef(tensorType); auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); - - //construct affine loops for the input - SmallVector lowerBounds(tensorType.getRank(), /*Value*/0); - SmallVector steps(tensorType.getRank(), /*Value=*/1); - - - //first from 0 <= i <= (N-1)/2 - 1 - int64_t lb = 0 ; - int64_t N = tensorType.getShape()[0]; - int64_t ub = (N-1) / 2 ; + + // construct affine loops for the input + SmallVector lowerBounds(tensorType.getRank(), /*Value*/ 0); + SmallVector steps(tensorType.getRank(), /*Value=*/1); + + // first from 0 <= i <= (N-1)/2 - 1 + int64_t lb = 0; + int64_t N = tensorType.getShape()[0]; + int64_t ub = (N - 1) / 2; int64_t step = 1; DEBUG_PRINT_NO_ARGS(); LowPassFIRFilterOpAdaptor lowPassfirFilterOpAdaptor(operands); - //Handle middle y[mid] = wc / pi - int64_t midIndx = ub ; - Value constantIndxMid = rewriter.create(loc, midIndx); - // rewriter.create(loc, constant0, alloc, ValueRange{constantIndx0}); - Value wc = rewriter.create(loc, lowPassfirFilterOpAdaptor.getWc(), ValueRange{}); - - Value constpi = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(3.14159265359)); + // Handle middle y[mid] = wc / pi + int64_t midIndx = ub; + Value constantIndxMid = + rewriter.create(loc, midIndx); + // rewriter.create(loc, constant0, alloc, + // ValueRange{constantIndx0}); + Value wc = rewriter.create( + loc, lowPassfirFilterOpAdaptor.getWc(), ValueRange{}); + + Value constpi = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(3.14159265359)); Value wcByPi = rewriter.create(loc, wc, constpi); - rewriter.create(loc, wcByPi, alloc, ValueRange{constantIndxMid}); + rewriter.create(loc, wcByPi, alloc, + ValueRange{constantIndxMid}); - //first from 0 <= i <= (N-1)/2 - 1 + // first from 0 <= i <= (N-1)/2 - 1 - //calculate i-(N-1)/2 - Value Nminus1By2 = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr((float) ub)); - affine::AffineForOp forOpY = rewriter.create(loc, lb, ub, step); + // calculate i-(N-1)/2 + Value Nminus1By2 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr((float)ub)); + affine::AffineForOp forOpY = + rewriter.create(loc, lb, ub, step); auto ivY = forOpY.getInductionVar(); rewriter.setInsertionPointToStart(forOpY.getBody()); - //convert index to f64 - Value IndxY = rewriter.create(loc, rewriter.getIntegerType(32), ivY); - Value i = rewriter.create(loc, rewriter.getF64Type(), IndxY); + // convert index to f64 + Value IndxY = rewriter.create( + loc, rewriter.getIntegerType(32), ivY); + Value i = + rewriter.create(loc, rewriter.getF64Type(), IndxY); - //get sin(wc * (i - (N-1)/ 2)) + // get sin(wc * (i - (N-1)/ 2)) Value iMinusMid = rewriter.create(loc, i, Nminus1By2); - Value mulwc_iMinusMid = rewriter.create(loc, wc , iMinusMid); + Value mulwc_iMinusMid = rewriter.create(loc, wc, iMinusMid); Value GetSin = rewriter.create(loc, mulwc_iMinusMid); - + // get sin(wc*i) / pi * i - Value piMuliMinusMid = rewriter.create(loc, constpi , iMinusMid); - Value GetDiv = rewriter.create(loc, GetSin ,piMuliMinusMid) ; - rewriter.create(loc, GetDiv, alloc, ValueRange{ivY}); + Value piMuliMinusMid = + rewriter.create(loc, constpi, iMinusMid); + Value GetDiv = rewriter.create(loc, GetSin, piMuliMinusMid); + rewriter.create(loc, GetDiv, alloc, ValueRange{ivY}); // llvm::errs() << "LINE " << __LINE__ << " file= " << __FILE__ << "\n" ; rewriter.setInsertionPointAfter(forOpY); - //2nd loop from (N-1)/2 + 1 <= i < N - lb = ub + 1 ; - ub = N ; + // 2nd loop from (N-1)/2 + 1 <= i < N + lb = ub + 1; + ub = N; - affine::AffineForOp forOp1 = rewriter.create(loc, lb, ub, step); + affine::AffineForOp forOp1 = + rewriter.create(loc, lb, ub, step); auto iv1 = forOp1.getInductionVar(); rewriter.setInsertionPointToStart(forOp1.getBody()); - //convert index to f64 - Value Indx1 = rewriter.create(loc, rewriter.getIntegerType(32), iv1); - Value i1 = rewriter.create(loc, rewriter.getF64Type(), Indx1); + // convert index to f64 + Value Indx1 = rewriter.create( + loc, rewriter.getIntegerType(32), iv1); + Value i1 = + rewriter.create(loc, rewriter.getF64Type(), Indx1); - //get sin(wc * (i1 - (N-1)/ 2)) + // get sin(wc * (i1 - (N-1)/ 2)) Value iMinusMid1 = rewriter.create(loc, i1, Nminus1By2); - Value mulwc_iMinusMid1 = rewriter.create(loc, wc , iMinusMid1); + Value mulwc_iMinusMid1 = + rewriter.create(loc, wc, iMinusMid1); Value GetSin1 = rewriter.create(loc, mulwc_iMinusMid1); - //get sin(i1 - (N-1)/ 2) / (i1 - (N-1)/ 2) * pi - // get sin(wc*i1) / pi * i1 + // get sin(i1 - (N-1)/ 2) / (i1 - (N-1)/ 2) * pi + // get sin(wc*i1) / pi * i1 - Value piMuliMinusMid1 = rewriter.create(loc, constpi , iMinusMid1); - Value GetDiv1 = rewriter.create(loc, GetSin1 ,piMuliMinusMid1) ; - rewriter.create(loc, GetDiv1, alloc, ValueRange{iv1}); + Value piMuliMinusMid1 = + rewriter.create(loc, constpi, iMinusMid1); + Value GetDiv1 = + rewriter.create(loc, GetSin1, piMuliMinusMid1); + rewriter.create(loc, GetDiv1, alloc, ValueRange{iv1}); // llvm::errs() << "LINE " << __LINE__ << " file= " << __FILE__ << "\n" ; rewriter.setInsertionPointAfter(forOp1); - //debug - // forOpX->dump(); - // forOpY->dump(); - + // debug + // forOpX->dump(); + // forOpY->dump(); + + // %cst = arith.constant 6.2831853071800001 : f64 + // %cst_0 = arith.constant 4.600000e-01 : f64 + // %cst_1 = arith.constant 5.400000e-01 : f64 + // %cst_2 = arith.constant 4.000000e+00 : f64 + // %alloc = memref.alloc() : memref<4xf64> + // %alloc_3 = memref.alloc() : memref + // affine.store %cst_2, %alloc_3[] : memref + // affine.for %arg0 = 0 to 4 { + // %0 = arith.index_castui %arg0 : index to i32 + // %1 = arith.uitofp %0 : i32 to f64 + // %2 = arith.mulf %1, %cst : f64 + // %3 = arith.divf %2, %cst_2 : f64 + // %4 = math.cos %3 : f64 + // %5 = arith.mulf %4, %cst_0 : f64 + // %6 = arith.subf %cst_1, %5 : f64 + // affine.store %6, %alloc[%arg0] : memref<4xf64> + // } - // %cst = arith.constant 6.2831853071800001 : f64 - // %cst_0 = arith.constant 4.600000e-01 : f64 - // %cst_1 = arith.constant 5.400000e-01 : f64 - // %cst_2 = arith.constant 4.000000e+00 : f64 - // %alloc = memref.alloc() : memref<4xf64> - // %alloc_3 = memref.alloc() : memref - // affine.store %cst_2, %alloc_3[] : memref - // affine.for %arg0 = 0 to 4 { - // %0 = arith.index_castui %arg0 : index to i32 - // %1 = arith.uitofp %0 : i32 to f64 - // %2 = arith.mulf %1, %cst : f64 - // %3 = arith.divf %2, %cst_2 : f64 - // %4 = math.cos %3 : f64 - // %5 = arith.mulf %4, %cst_0 : f64 - // %6 = arith.subf %cst_1, %5 : f64 - // affine.store %6, %alloc[%arg0] : memref<4xf64> - // } - - - // } - // } + // } + // } rewriter.replaceOp(op, alloc); - + return success(); } }; - //===----------------------------------------------------------------------===// // ToyToAffine RewritePatterns: SetElemAtIndx operations //===----------------------------------------------------------------------===// @@ -3530,71 +4129,82 @@ struct SetElemAtIndxOpLowering : public ConversionPattern { matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { auto loc = op->getLoc(); - - //Pseudo-code: - // output = input[index] - // replace this upsampling op with the output_mem_allocation op + // Pseudo-code: + // output = input[index] + + // replace this upsampling op with the output_mem_allocation op DEBUG_PRINT_NO_ARGS(); - //output for result type + // output for result type SetElemAtIndxOpAdaptor setElemAtIndxAdaptor(operands); - auto tensorType = llvm::cast((*op->result_type_begin())); - // auto tensorType = llvm::cast(setElemAtIndxAdaptor.getInput()); - //iterate to result1 --not needed for now but for future reference - - //allocation & deallocation for the result of this operation + auto tensorType = llvm::cast((*op->result_type_begin())); + // auto tensorType = + // llvm::cast(setElemAtIndxAdaptor.getInput()); + // iterate to result1 --not needed for now but for future reference + + // allocation & deallocation for the result of this operation auto memRefType = convertTensorToMemRef(tensorType); auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); - //For loop -- iterate from 1 to last - // int64_t lb = 0 ; - // int64_t ub = tensorType.getShape()[0]; - // int64_t step = 1; - // affine::AffineForOp forOpY = rewriter.create(loc, lb, ub, step); - // auto ivY = forOpY.getInductionVar(); - // rewriter.setInsertionPointToStart(forOpY.getBody()); + // For loop -- iterate from 1 to last + // int64_t lb = 0 ; + // int64_t ub = tensorType.getShape()[0]; + // int64_t step = 1; + // affine::AffineForOp forOpY = rewriter.create(loc, lb, ub, + // step); auto ivY = forOpY.getInductionVar(); + // rewriter.setInsertionPointToStart(forOpY.getBody()); - // Value inputX = rewriter.create(loc, setElemAtIndxAdaptor.getInput(), ValueRange{ivY}); + // Value inputX = rewriter.create(loc, + // setElemAtIndxAdaptor.getInput(), ValueRange{ivY}); // rewriter.create(loc, inputX, alloc, ValueRange{ivY}); // rewriter.setInsertionPointAfter(forOpY); DEBUG_PRINT_WITH_ARGS("\nCheck for index --here"); - //load from X, using 2nd operand as index + // load from X, using 2nd operand as index - // Value GetValueAtIndx2ndArg = setElemAtIndxAdaptor.getIndx(); // getOperand(1); + // Value GetValueAtIndx2ndArg = setElemAtIndxAdaptor.getIndx(); // + // getOperand(1); DEBUG_PRINT_NO_ARGS(); Value GetValueAtIndx2ndArg = op->getOperand(1); - dsp::ConstantOp constantOp2ndArg = GetValueAtIndx2ndArg.getDefiningOp(); - DenseElementsAttr constantRhsValue = constantOp2ndArg.getValue();; + dsp::ConstantOp constantOp2ndArg = + GetValueAtIndx2ndArg.getDefiningOp(); + DenseElementsAttr constantRhsValue = constantOp2ndArg.getValue(); + ; auto elements = constantRhsValue.getValues(); float SecondValue = elements[0].getValueAsDouble(); - int SecondValueInt = (int64_t) SecondValue; - DEBUG_PRINT_WITH_ARGS("Indx is" , SecondValueInt); + int SecondValueInt = (int64_t)SecondValue; + DEBUG_PRINT_WITH_ARGS("Indx is", SecondValueInt); - Value constantIndx2Indx = rewriter.create(loc, SecondValueInt); + Value constantIndx2Indx = + rewriter.create(loc, SecondValueInt); Value constantIndx0 = rewriter.create(loc, 0); - // Value constant0 = rewriter.create(loc, rewriter.getF64Type(), + // Value constant0 = rewriter.create(loc, + // rewriter.getF64Type(), // rewriter.getF64FloatAttr(15)); - // Value ValToStore = setElemAtIndxAdaptor.getVal(); + // Value ValToStore = setElemAtIndxAdaptor.getVal(); // Value ValToStore = op->getOperand(2); - Value ValToStore = rewriter.create(loc, setElemAtIndxAdaptor.getVal(), ValueRange{constantIndx0}); - // Value ValToStore = rewriter.create(loc, setElemAtIndxAdaptor.getVal(), ValueRange{}); - - // rewriter.create(loc, constant0, alloc, ValueRange{constantIndx2Indx}); - rewriter.create(loc, ValToStore, setElemAtIndxAdaptor.getInput(), ValueRange{constantIndx2Indx}); + Value ValToStore = rewriter.create( + loc, setElemAtIndxAdaptor.getVal(), ValueRange{constantIndx0}); + // Value ValToStore = rewriter.create(loc, + // setElemAtIndxAdaptor.getVal(), ValueRange{}); + + // rewriter.create(loc, constant0, alloc, + // ValueRange{constantIndx2Indx}); + rewriter.create(loc, ValToStore, + setElemAtIndxAdaptor.getInput(), + ValueRange{constantIndx2Indx}); + + // debug + // forOpY->dump(); + // affine.store %cst, %alloc_10[] : memref + // %0 = affine.load %alloc_11[4] : memref<10xf64> + // affine.store %0, %alloc[0] : memref<1xf64> - - //debug - // forOpY->dump(); - // affine.store %cst, %alloc_10[] : memref - // %0 = affine.load %alloc_11[4] : memref<10xf64> - // affine.store %0, %alloc[0] : memref<1xf64> - rewriter.replaceOp(op, alloc); - + return success(); } }; @@ -3603,7 +4213,6 @@ struct SetElemAtIndxOpLowering : public ConversionPattern { // ToyToAffine RewritePatterns: GetElemAtIndx operations //===----------------------------------------------------------------------===// - struct GetElemAtIndxOpLowering : public ConversionPattern { GetElemAtIndxOpLowering(MLIRContext *ctx) : ConversionPattern(dsp::GetElemAtIndxOp::getOperationName(), 1, ctx) {} @@ -3612,62 +4221,65 @@ struct GetElemAtIndxOpLowering : public ConversionPattern { matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { auto loc = op->getLoc(); - - //Pseudo-code: - // output = input[index] - // replace this upsampling op with the output_mem_allocation op + // Pseudo-code: + // output = input[index] + + // replace this upsampling op with the output_mem_allocation op DEBUG_PRINT_NO_ARGS(); - //output for result type - auto tensorType = llvm::cast((*op->result_type_begin())); - //iterate to result1 --not needed for now but for future reference - - //allocation & deallocation for the result of this operation + // output for result type + auto tensorType = llvm::cast((*op->result_type_begin())); + // iterate to result1 --not needed for now but for future reference + + // allocation & deallocation for the result of this operation auto memRefType = convertTensorToMemRef(tensorType); // auto memRefType2 = convertTensorToMemRef(tensorType1); auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); - // Value constant0 = rewriter.create(loc, rewriter.getF64Type(), + // Value constant0 = rewriter.create(loc, + // rewriter.getF64Type(), // rewriter.getF64FloatAttr(0)); - - DEBUG_PRINT_WITH_ARGS("\nCheck for index --here"); - //load from X, using 2nd operand as index + // load from X, using 2nd operand as index GetElemAtIndxOpAdaptor getElemAtIndxAdaptor(operands); - // Value GetValueAtIndx2ndArg = getElemAtIndxAdaptor.getIndx(); // getOperand(1); + // Value GetValueAtIndx2ndArg = getElemAtIndxAdaptor.getIndx(); // + // getOperand(1); DEBUG_PRINT_NO_ARGS(); Value GetValueAtIndx2ndArg = op->getOperand(1); - dsp::ConstantOp constantOp2ndArg = GetValueAtIndx2ndArg.getDefiningOp(); - DenseElementsAttr constantRhsValue = constantOp2ndArg.getValue();; + dsp::ConstantOp constantOp2ndArg = + GetValueAtIndx2ndArg.getDefiningOp(); + DenseElementsAttr constantRhsValue = constantOp2ndArg.getValue(); + ; auto elements = constantRhsValue.getValues(); float SecondValue = elements[0].getValueAsDouble(); - int SecondValueInt = (int64_t) SecondValue; - DEBUG_PRINT_WITH_ARGS("Indx is" , SecondValueInt); + int SecondValueInt = (int64_t)SecondValue; + DEBUG_PRINT_WITH_ARGS("Indx is", SecondValueInt); - Value constantIndx2Indx = rewriter.create(loc, SecondValueInt); + Value constantIndx2Indx = + rewriter.create(loc, SecondValueInt); Value constantIndx0 = rewriter.create(loc, 0); - - Value inputX = rewriter.create(loc, getElemAtIndxAdaptor.getInput(), ValueRange{constantIndx2Indx}); - rewriter.create(loc, inputX, alloc, ValueRange{constantIndx0}); + Value inputX = rewriter.create( + loc, getElemAtIndxAdaptor.getInput(), ValueRange{constantIndx2Indx}); + rewriter.create(loc, inputX, alloc, + ValueRange{constantIndx0}); + + // debug + // forOpX->dump(); + // forOpY->dump(); + // affine.store %cst, %alloc_10[] : memref + // %0 = affine.load %alloc_11[4] : memref<10xf64> + // affine.store %0, %alloc[0] : memref<1xf64> - //debug - // forOpX->dump(); - // forOpY->dump(); - // affine.store %cst, %alloc_10[] : memref - // %0 = affine.load %alloc_11[4] : memref<10xf64> - // affine.store %0, %alloc[0] : memref<1xf64> - rewriter.replaceOp(op, alloc); - + return success(); } }; - //===----------------------------------------------------------------------===// // ToyToAffine RewritePatterns: SincOp operations //===----------------------------------------------------------------------===// @@ -3680,100 +4292,100 @@ struct SincOpLowering : public ConversionPattern { matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { auto loc = op->getLoc(); - - //Pseudo-code: - // y = sinc(wc * n) = [1, sin(wc)/pi , sin(2* wc)/2*pi , ... sin(n * wc)/n*pi] , 0<=n<=N - - - //output for result type - auto tensorType = llvm::cast((*op->result_type_begin())); - - //allocation & deallocation for the result of this operation + + // Pseudo-code: + // y = sinc(wc * n) = [1, sin(wc)/pi , sin(2* wc)/2*pi , ... sin(n * + // wc)/n*pi] , 0<=n<=N + + // output for result type + auto tensorType = llvm::cast((*op->result_type_begin())); + + // allocation & deallocation for the result of this operation auto memRefType = convertTensorToMemRef(tensorType); auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); - - //construct affine loops for the input - SmallVector lowerBounds(tensorType.getRank(), /*Value*/0); - SmallVector steps(tensorType.getRank(), /*Value=*/1); - - - //For loop -- iterate from 1 to last - int64_t lb = 1 ; - int64_t ub = tensorType.getShape()[0]; + + // construct affine loops for the input + SmallVector lowerBounds(tensorType.getRank(), /*Value*/ 0); + SmallVector steps(tensorType.getRank(), /*Value=*/1); + + // For loop -- iterate from 1 to last + int64_t lb = 1; + int64_t ub = tensorType.getShape()[0]; int64_t step = 1; DEBUG_PRINT_NO_ARGS(); - //get constants -- 0.54 & 0.46 + // get constants -- 0.54 & 0.46 Value constantIndx0 = rewriter.create(loc, 0); - // rewriter.create(loc, constant0, alloc, ValueRange{constantIndx0}); - - Value constant1 = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(1)); - Value constpi = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(3.14159265359)); - rewriter.create(loc, constant1, alloc, ValueRange{constantIndx0}); - - //For loop + // rewriter.create(loc, constant0, alloc, + // ValueRange{constantIndx0}); + + Value constant1 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1)); + Value constpi = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(3.14159265359)); + rewriter.create(loc, constant1, alloc, + ValueRange{constantIndx0}); + + // For loop SincOpAdaptor sincOpAdaptor(operands); - //loop for Y - affine::AffineForOp forOpY = rewriter.create(loc, lb, ub, step); + // loop for Y + affine::AffineForOp forOpY = + rewriter.create(loc, lb, ub, step); auto ivY = forOpY.getInductionVar(); rewriter.setInsertionPointToStart(forOpY.getBody()); - //convert index to f64 - Value IndxY = rewriter.create(loc, rewriter.getIntegerType(32), ivY); - Value i = rewriter.create(loc, rewriter.getF64Type(), IndxY); + // convert index to f64 + Value IndxY = rewriter.create( + loc, rewriter.getIntegerType(32), ivY); + Value i = + rewriter.create(loc, rewriter.getF64Type(), IndxY); + // get wc * i + Value wc = + rewriter.create(loc, sincOpAdaptor.getWc(), ValueRange{}); - //get wc * i - Value wc = rewriter.create(loc, sincOpAdaptor.getWc(), ValueRange{}); - - Value mulwc_i = rewriter.create(loc, wc , i); + Value mulwc_i = rewriter.create(loc, wc, i); // get sin(wc*i) / pi * i Value GetSin = rewriter.create(loc, mulwc_i); - Value piMuli = rewriter.create(loc, constpi , i); - Value GetDiv = rewriter.create(loc, GetSin ,piMuli) ; - rewriter.create(loc, GetDiv, alloc, ValueRange{ivY}); + Value piMuli = rewriter.create(loc, constpi, i); + Value GetDiv = rewriter.create(loc, GetSin, piMuli); + rewriter.create(loc, GetDiv, alloc, ValueRange{ivY}); // llvm::errs() << "LINE " << __LINE__ << " file= " << __FILE__ << "\n" ; rewriter.setInsertionPointAfter(forOpY); - //debug - // forOpX->dump(); - // forOpY->dump(); - + // debug + // forOpX->dump(); + // forOpY->dump(); + + // %cst = arith.constant 6.2831853071800001 : f64 + // %cst_0 = arith.constant 4.600000e-01 : f64 + // %cst_1 = arith.constant 5.400000e-01 : f64 + // %cst_2 = arith.constant 4.000000e+00 : f64 + // %alloc = memref.alloc() : memref<4xf64> + // %alloc_3 = memref.alloc() : memref + // affine.store %cst_2, %alloc_3[] : memref + // affine.for %arg0 = 0 to 4 { + // %0 = arith.index_castui %arg0 : index to i32 + // %1 = arith.uitofp %0 : i32 to f64 + // %2 = arith.mulf %1, %cst : f64 + // %3 = arith.divf %2, %cst_2 : f64 + // %4 = math.cos %3 : f64 + // %5 = arith.mulf %4, %cst_0 : f64 + // %6 = arith.subf %cst_1, %5 : f64 + // affine.store %6, %alloc[%arg0] : memref<4xf64> + // } - // %cst = arith.constant 6.2831853071800001 : f64 - // %cst_0 = arith.constant 4.600000e-01 : f64 - // %cst_1 = arith.constant 5.400000e-01 : f64 - // %cst_2 = arith.constant 4.000000e+00 : f64 - // %alloc = memref.alloc() : memref<4xf64> - // %alloc_3 = memref.alloc() : memref - // affine.store %cst_2, %alloc_3[] : memref - // affine.for %arg0 = 0 to 4 { - // %0 = arith.index_castui %arg0 : index to i32 - // %1 = arith.uitofp %0 : i32 to f64 - // %2 = arith.mulf %1, %cst : f64 - // %3 = arith.divf %2, %cst_2 : f64 - // %4 = math.cos %3 : f64 - // %5 = arith.mulf %4, %cst_0 : f64 - // %6 = arith.subf %cst_1, %5 : f64 - // affine.store %6, %alloc[%arg0] : memref<4xf64> - // } - - - // } - // } + // } + // } rewriter.replaceOp(op, alloc); - + return success(); } }; - //===----------------------------------------------------------------------===// // ToyToAffine RewritePatterns: FFT1DImg operations //===----------------------------------------------------------------------===// - struct FFT1DImgOpLowering : public ConversionPattern { FFT1DImgOpLowering(MLIRContext *ctx) : ConversionPattern(dsp::FFT1DImgOp::getOperationName(), 1, ctx) {} @@ -3782,171 +4394,181 @@ struct FFT1DImgOpLowering : public ConversionPattern { matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { auto loc = op->getLoc(); - - //Pseudo-code: - // y[k] = y_real[k] + j *y_img[k] - // y_img = sumOver_n(x[n]*sin[2*pi * k *n/N ] * -1 - //init output mem for y_real & y_img as 0 - //iterate for output from k=0 to last - //iterate for all x from n=0 to last - //perform the calculations : ie x[n] * cos[2*pi * k *n/N ] and sum and store them at y[k] - // - // replace this upsampling op with the output_mem_allocation op + + // Pseudo-code: + // y[k] = y_real[k] + j *y_img[k] + // y_img = sumOver_n(x[n]*sin[2*pi * k *n/N ] * -1 + // init output mem for y_real & y_img as 0 + // iterate for output from k=0 to last + // iterate for all x from n=0 to last + // perform the calculations : ie x[n] * cos[2*pi * k *n/N ] and sum and + // store them at y[k] + // + // replace this upsampling op with the output_mem_allocation op // DEBUG_PRINT_NO_ARGS() ; - //output for result type - auto tensorType = llvm::cast((*op->result_type_begin())); - //iterate to result1 --not needed for now but for future reference - // auto tensorType1 = llvm::cast(*std::next(op->result_type_begin(), 1)); + // output for result type + auto tensorType = llvm::cast((*op->result_type_begin())); + // iterate to result1 --not needed for now but for future reference + // auto tensorType1 = + // llvm::cast(*std::next(op->result_type_begin(), 1)); + + // DEBUG_PRINT_NO_ARGS() ; + // tensorType.getShape()[0] + // llvm::errs() << "tensorType1.getShape()[0] " << tensorType1.getShape()[0] + // << " func= " << __func__ << "\n"; - // DEBUG_PRINT_NO_ARGS() ; - //tensorType.getShape()[0] - // llvm::errs() << "tensorType1.getShape()[0] " << tensorType1.getShape()[0] << " func= " << __func__ << "\n"; - - //allocation & deallocation for the result of this operation + // allocation & deallocation for the result of this operation auto memRefType = convertTensorToMemRef(tensorType); // auto memRefType2 = convertTensorToMemRef(tensorType1); auto alloc_img = insertAllocAndDealloc(memRefType, loc, rewriter); - //construct affine loops for the input - SmallVector lowerBounds(tensorType.getRank(), /*Value*/0); - SmallVector steps(tensorType.getRank(), /*Value=*/1); + // construct affine loops for the input + SmallVector lowerBounds(tensorType.getRank(), /*Value*/ 0); + SmallVector steps(tensorType.getRank(), /*Value=*/1); // affine.for %y = 0 to 4 { - // affine.store %cst_3, %alloc_real[%y] : memref<4xf64> - // affine.store %cst_3, %alloc_img[%y] : memref<4xf64> - // } - Value constant0 = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(0)); - + // affine.store %cst_3, %alloc_real[%y] : memref<4xf64> + // affine.store %cst_3, %alloc_img[%y] : memref<4xf64> + // } + Value constant0 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); - //For loop -- iterate from 1 to last - int64_t lb = 0 ; + // For loop -- iterate from 1 to last + int64_t lb = 0; int64_t ub = tensorType.getShape()[0]; int64_t step = 1; - affine::AffineForOp forOp1 = rewriter.create(loc, lb, ub, step); + affine::AffineForOp forOp1 = + rewriter.create(loc, lb, ub, step); auto iv = forOp1.getInductionVar(); rewriter.setInsertionPointToStart(forOp1.getBody()); rewriter.create(loc, constant0, alloc_img, ValueRange{iv}); rewriter.setInsertionPointAfter(forOp1); - //loop for Y - affine::AffineForOp forOpY = rewriter.create(loc, lb, ub, step); + // loop for Y + affine::AffineForOp forOpY = + rewriter.create(loc, lb, ub, step); auto ivY = forOpY.getInductionVar(); rewriter.setInsertionPointToStart(forOpY.getBody()); - - //loop for X - affine::AffineForOp forOpX = rewriter.create(loc, lb, ub, step); + // loop for X + affine::AffineForOp forOpX = + rewriter.create(loc, lb, ub, step); auto ivX = forOpX.getInductionVar(); rewriter.setInsertionPointToStart(forOpX.getBody()); - //load from X, & y1 & y2 + // load from X, & y1 & y2 FFT1DImgOpAdaptor fft1DImgAdaptor(operands); - Value inputX = rewriter.create(loc, fft1DImgAdaptor.getInput(), ValueRange{ivX}); - Value loadYImg = rewriter.create(loc, alloc_img, ValueRange{ivY}); + Value inputX = rewriter.create( + loc, fft1DImgAdaptor.getInput(), ValueRange{ivX}); + Value loadYImg = + rewriter.create(loc, alloc_img, ValueRange{ivY}); - //convert index to f64 - Value IndxY = rewriter.create(loc, rewriter.getIntegerType(32), ivY); - Value k = rewriter.create(loc, rewriter.getF64Type(), IndxY); + // convert index to f64 + Value IndxY = rewriter.create( + loc, rewriter.getIntegerType(32), ivY); + Value k = + rewriter.create(loc, rewriter.getF64Type(), IndxY); - Value IndxX = rewriter.create(loc, rewriter.getIntegerType(32), ivX); - Value i = rewriter.create(loc, rewriter.getF64Type(), IndxX); + Value IndxX = rewriter.create( + loc, rewriter.getIntegerType(32), ivX); + Value i = + rewriter.create(loc, rewriter.getF64Type(), IndxX); - //get 2*pi * k * i / N - Value muli_k = rewriter.create(loc, k , i); - - Value const2pi = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(6.28318530718)); - Value mul2piKI = rewriter.create(loc, const2pi , muli_k); + // get 2*pi * k * i / N + Value muli_k = rewriter.create(loc, k, i); + + Value const2pi = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(6.28318530718)); + Value mul2piKI = rewriter.create(loc, const2pi, muli_k); // getOperand().getType() - // auto inputTensorType = llvm::cast(op->getOperand(0).getType()); - float LengthOfInput = (float) ub; - Value N = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(LengthOfInput)); + // auto inputTensorType = + // llvm::cast(op->getOperand(0).getType()); + float LengthOfInput = (float)ub; + Value N = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(LengthOfInput)); // Value N = inputTensorType.getShape()[0]; - Value divIndxByN = rewriter.create(loc, mul2piKI, N ) ; - + Value divIndxByN = rewriter.create(loc, mul2piKI, N); + // Img part = -1 * Sum(x[i] * sin(div) ) Value GetSin = rewriter.create(loc, divIndxByN); - Value xMulSin = rewriter.create(loc, inputX , GetSin); - Value imgSum = rewriter.create(loc, loadYImg ,xMulSin) ; + Value xMulSin = rewriter.create(loc, inputX, GetSin); + Value imgSum = rewriter.create(loc, loadYImg, xMulSin); - // Value constMinus1 = rewriter.create(loc, rewriter.getF64Type(), + // Value constMinus1 = rewriter.create(loc, + // rewriter.getF64Type(), // rewriter.getF64FloatAttr(-1)); - // Value NegImgSum = rewriter.create(loc, constMinus1 , imgSum); - rewriter.create(loc, imgSum, alloc_img, ValueRange{ivY}); + // Value NegImgSum = rewriter.create(loc, constMinus1 , + // imgSum); + rewriter.create(loc, imgSum, alloc_img, ValueRange{ivY}); // x[n-1] rewriter.setInsertionPointAfter(forOpX); // Calculate y[k] = 1/N * y[k] - + rewriter.setInsertionPointAfter(forOpY); - //debug - // forOpX->dump(); - // forOpY->dump(); - // affine.for %y = 0 to 4 { - // affine.store %cst_3, %alloc_real[%y] : memref<4xf64> - // affine.store %cst_3, %alloc_img[%y] : memref<4xf64> - // } - - - // affine.for %y = 0 to 4 { - // // %0 = affine.load %alloc_3[%arg0] : memref<4xf64> - // // affine.store %0, %alloc_real[%arg0] : memref<4xf64> - // affine.for %x = 0 to 4 { - // // CAcluations - // %1 = affine.load %alloc_3[%x] : memref<4xf64> - // %2 = affine.load %alloc_real[%y] : memref<4xf64> - // %3 = affine.load %alloc_img[%y] : memref<4xf64> - // // index cast for multiply - // %4 = arith.index_castui %y : index to i32 - // %k = arith.uitofp %4 : i32 to f64 - // %6 = arith.index_castui %x : index to i32 - // %i = arith.uitofp %6 : i32 to f64 - // // %8 = arith.index_castui %arg3 : index to i32 - // // %9 = arith.uitofp %8 : i32 to f64 - // // %10 = arith.index_castui %arg4 : index to i32 - // // %11 = arith.uitofp %10 : i32 to f64 - - // %mul_1 = arith.mulf %i, %k : f64 - // %mul = arith.mulf %mul_1, %cst_2pi : f64 - // // ixk / N - // %div = arith.divf %mul, %N : f64 - // // cos of the above - // %res_cos = math.cos %div : f64 - // // %16 = arith.addf %14, %15 : f64 - // // %res_sin = arith.mulf %16, %cst_0 : f64 - - // %res_sin = math.sin %div : f64 - // %real_prod = arith.mulf %1, %res_cos : f64 - // %img_prod_1 = arith.mulf %1, %res_sin : f64 - // %img_prod = arith.mulf %cst_5, %img_prod_1 : f64 - - // %real = arith.addf %2, %real_prod : f64 - // %img = arith.addf %3, %img_prod : f64 - // affine.store %real, %alloc_real[%y] : memref<4xf64> - // // dsp.print %alloc_real : memref<4xf64> - // affine.store %img, %alloc_img[%y] : memref<4xf64> - - // } - // } + // debug + // forOpX->dump(); + // forOpY->dump(); + // affine.for %y = 0 to 4 { + // affine.store %cst_3, %alloc_real[%y] : memref<4xf64> + // affine.store %cst_3, %alloc_img[%y] : memref<4xf64> + // } + + // affine.for %y = 0 to 4 { + // // %0 = affine.load %alloc_3[%arg0] : memref<4xf64> + // // affine.store %0, %alloc_real[%arg0] : memref<4xf64> + // affine.for %x = 0 to 4 { + // // CAcluations + // %1 = affine.load %alloc_3[%x] : memref<4xf64> + // %2 = affine.load %alloc_real[%y] : memref<4xf64> + // %3 = affine.load %alloc_img[%y] : memref<4xf64> + // // index cast for multiply + // %4 = arith.index_castui %y : index to i32 + // %k = arith.uitofp %4 : i32 to f64 + // %6 = arith.index_castui %x : index to i32 + // %i = arith.uitofp %6 : i32 to f64 + // // %8 = arith.index_castui %arg3 : index to i32 + // // %9 = arith.uitofp %8 : i32 to f64 + // // %10 = arith.index_castui %arg4 : index to i32 + // // %11 = arith.uitofp %10 : i32 to f64 + + // %mul_1 = arith.mulf %i, %k : f64 + // %mul = arith.mulf %mul_1, %cst_2pi : f64 + // // ixk / N + // %div = arith.divf %mul, %N : f64 + // // cos of the above + // %res_cos = math.cos %div : f64 + // // %16 = arith.addf %14, %15 : f64 + // // %res_sin = arith.mulf %16, %cst_0 : f64 + + // %res_sin = math.sin %div : f64 + // %real_prod = arith.mulf %1, %res_cos : f64 + // %img_prod_1 = arith.mulf %1, %res_sin : f64 + // %img_prod = arith.mulf %cst_5, %img_prod_1 : f64 + + // %real = arith.addf %2, %real_prod : f64 + // %img = arith.addf %3, %img_prod : f64 + // affine.store %real, %alloc_real[%y] : memref<4xf64> + // // dsp.print %alloc_real : memref<4xf64> + // affine.store %img, %alloc_img[%y] : memref<4xf64> + + // } + // } // rewriter.replaceOp(op, alloc_real); rewriter.replaceOp(op, alloc_img); - + return success(); } }; - //===----------------------------------------------------------------------===// // ToyToAffine RewritePatterns: FFT1DReal operations //===----------------------------------------------------------------------===// - struct FFT1DRealOpLowering : public ConversionPattern { FFT1DRealOpLowering(MLIRContext *ctx) : ConversionPattern(dsp::FFT1DRealOp::getOperationName(), 1, ctx) {} @@ -3955,160 +4577,170 @@ struct FFT1DRealOpLowering : public ConversionPattern { matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { auto loc = op->getLoc(); - - //Pseudo-code: - // y[k] = y_real[k] + j *y_img[k] - // y_real = sumOver_n(x[n]*cos[2*pi * k *n/N ] - // y_img = sumOver_n(x[n]*sin[2*pi * k *n/N ] * -1 - //init output mem for y_real & y_img as 0 - //iterate for output from k=0 to last - //iterate for all x from n=0 to last - //perform the calculations : ie x[n] * cos[2*pi * k *n/N ] and sum and store them at y[k] - // - // replace this upsampling op with the output_mem_allocation op + + // Pseudo-code: + // y[k] = y_real[k] + j *y_img[k] + // y_real = sumOver_n(x[n]*cos[2*pi * k *n/N ] + // y_img = sumOver_n(x[n]*sin[2*pi * k *n/N ] * -1 + // init output mem for y_real & y_img as 0 + // iterate for output from k=0 to last + // iterate for all x from n=0 to last + // perform the calculations : ie x[n] * cos[2*pi * k *n/N ] and sum and + // store them at y[k] + // + // replace this upsampling op with the output_mem_allocation op // DEBUG_PRINT_NO_ARGS() ; - //output for result type - auto tensorType = llvm::cast((*op->result_type_begin())); - //iterate to result1 --not needed for now but for future reference - // auto tensorType1 = llvm::cast(*std::next(op->result_type_begin(), 1)); + // output for result type + auto tensorType = llvm::cast((*op->result_type_begin())); + // iterate to result1 --not needed for now but for future reference + // auto tensorType1 = + // llvm::cast(*std::next(op->result_type_begin(), 1)); + + // DEBUG_PRINT_NO_ARGS() ; + // tensorType.getShape()[0] + // llvm::errs() << "tensorType1.getShape()[0] " << tensorType1.getShape()[0] + // << " func= " << __func__ << "\n"; - // DEBUG_PRINT_NO_ARGS() ; - //tensorType.getShape()[0] - // llvm::errs() << "tensorType1.getShape()[0] " << tensorType1.getShape()[0] << " func= " << __func__ << "\n"; - - //allocation & deallocation for the result of this operation + // allocation & deallocation for the result of this operation auto memRefType = convertTensorToMemRef(tensorType); // auto memRefType2 = convertTensorToMemRef(tensorType1); auto alloc_real = insertAllocAndDealloc(memRefType, loc, rewriter); - - //construct affine loops for the input - SmallVector lowerBounds(tensorType.getRank(), /*Value*/0); - SmallVector steps(tensorType.getRank(), /*Value=*/1); - // affine.for %y = 0 to 4 { - // affine.store %cst_3, %alloc_real[%y] : memref<4xf64> - // affine.store %cst_3, %alloc_img[%y] : memref<4xf64> - // } - Value constant0 = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(0)); + // construct affine loops for the input + SmallVector lowerBounds(tensorType.getRank(), /*Value*/ 0); + SmallVector steps(tensorType.getRank(), /*Value=*/1); + // affine.for %y = 0 to 4 { + // affine.store %cst_3, %alloc_real[%y] : memref<4xf64> + // affine.store %cst_3, %alloc_img[%y] : memref<4xf64> + // } + Value constant0 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); - //For loop -- iterate from 1 to last - int64_t lb = 0 ; + // For loop -- iterate from 1 to last + int64_t lb = 0; int64_t ub = tensorType.getShape()[0]; int64_t step = 1; - affine::AffineForOp forOp1 = rewriter.create(loc, lb, ub, step); + affine::AffineForOp forOp1 = + rewriter.create(loc, lb, ub, step); auto iv = forOp1.getInductionVar(); rewriter.setInsertionPointToStart(forOp1.getBody()); rewriter.create(loc, constant0, alloc_real, ValueRange{iv}); rewriter.setInsertionPointAfter(forOp1); - //loop for Y - affine::AffineForOp forOpY = rewriter.create(loc, lb, ub, step); + // loop for Y + affine::AffineForOp forOpY = + rewriter.create(loc, lb, ub, step); auto ivY = forOpY.getInductionVar(); rewriter.setInsertionPointToStart(forOpY.getBody()); - //loop for X - affine::AffineForOp forOpX = rewriter.create(loc, lb, ub, step); + // loop for X + affine::AffineForOp forOpX = + rewriter.create(loc, lb, ub, step); auto ivX = forOpX.getInductionVar(); rewriter.setInsertionPointToStart(forOpX.getBody()); - //load from X, & y1 & y2 + // load from X, & y1 & y2 FFT1DRealOpAdaptor fft1DrealAdaptor(operands); - Value inputX = rewriter.create(loc, fft1DrealAdaptor.getInput(), ValueRange{ivX}); - Value loadYReal = rewriter.create(loc, alloc_real, ValueRange{ivY}); - - //convert index to f64 - Value IndxY = rewriter.create(loc, rewriter.getIntegerType(32), ivY); - Value k = rewriter.create(loc, rewriter.getF64Type(), IndxY); - - Value IndxX = rewriter.create(loc, rewriter.getIntegerType(32), ivX); - Value i = rewriter.create(loc, rewriter.getF64Type(), IndxX); - - //get 2*pi * k * i / N - Value muli_k = rewriter.create(loc, k , i); - - Value const2pi = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(6.28318530718)); - Value mul2piKI = rewriter.create(loc, const2pi , muli_k); + Value inputX = rewriter.create( + loc, fft1DrealAdaptor.getInput(), ValueRange{ivX}); + Value loadYReal = + rewriter.create(loc, alloc_real, ValueRange{ivY}); + + // convert index to f64 + Value IndxY = rewriter.create( + loc, rewriter.getIntegerType(32), ivY); + Value k = + rewriter.create(loc, rewriter.getF64Type(), IndxY); + + Value IndxX = rewriter.create( + loc, rewriter.getIntegerType(32), ivX); + Value i = + rewriter.create(loc, rewriter.getF64Type(), IndxX); + + // get 2*pi * k * i / N + Value muli_k = rewriter.create(loc, k, i); + + Value const2pi = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(6.28318530718)); + Value mul2piKI = rewriter.create(loc, const2pi, muli_k); // getOperand().getType() - // auto inputTensorType = llvm::cast(op->getOperand(0).getType()); - float LengthOfInput = (float) ub; - Value N = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(LengthOfInput)); + // auto inputTensorType = + // llvm::cast(op->getOperand(0).getType()); + float LengthOfInput = (float)ub; + Value N = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(LengthOfInput)); // Value N = inputTensorType.getShape()[0]; - Value divIndxByN = rewriter.create(loc, mul2piKI, N ) ; + Value divIndxByN = rewriter.create(loc, mul2piKI, N); // Real part = Sum(x[i] * cos(div) ) Value GetCos = rewriter.create(loc, divIndxByN); - Value xMulCos = rewriter.create(loc, inputX , GetCos); - Value realSum = rewriter.create(loc, loadYReal ,xMulCos) ; - rewriter.create(loc, realSum, alloc_real, ValueRange{ivY}); - + Value xMulCos = rewriter.create(loc, inputX, GetCos); + Value realSum = rewriter.create(loc, loadYReal, xMulCos); + rewriter.create(loc, realSum, alloc_real, ValueRange{ivY}); // DEBUG_PRINT_NO_ARGS() ; - + rewriter.setInsertionPointAfter(forOpX); // forOpX->dump(); // rewriter.create(loc, ValueRange{alloc_real, alloc_img}); rewriter.setInsertionPointAfter(forOpY); - //debug - // forOpX->dump(); - // forOpY->dump(); - // affine.for %y = 0 to 4 { - // affine.store %cst_3, %alloc_real[%y] : memref<4xf64> - // affine.store %cst_3, %alloc_img[%y] : memref<4xf64> - // } - - - // affine.for %y = 0 to 4 { - // // %0 = affine.load %alloc_3[%arg0] : memref<4xf64> - // // affine.store %0, %alloc_real[%arg0] : memref<4xf64> - // affine.for %x = 0 to 4 { - // // CAcluations - // %1 = affine.load %alloc_3[%x] : memref<4xf64> - // %2 = affine.load %alloc_real[%y] : memref<4xf64> - // %3 = affine.load %alloc_img[%y] : memref<4xf64> - // // index cast for multiply - // %4 = arith.index_castui %y : index to i32 - // %k = arith.uitofp %4 : i32 to f64 - // %6 = arith.index_castui %x : index to i32 - // %i = arith.uitofp %6 : i32 to f64 - // // %8 = arith.index_castui %arg3 : index to i32 - // // %9 = arith.uitofp %8 : i32 to f64 - // // %10 = arith.index_castui %arg4 : index to i32 - // // %11 = arith.uitofp %10 : i32 to f64 - - // %mul_1 = arith.mulf %i, %k : f64 - // %mul = arith.mulf %mul_1, %cst_2pi : f64 - // // ixk / N - // %div = arith.divf %mul, %N : f64 - // // cos of the above - // %res_cos = math.cos %div : f64 - // // %16 = arith.addf %14, %15 : f64 - // // %res_sin = arith.mulf %16, %cst_0 : f64 - - // %res_sin = math.sin %div : f64 - // %real_prod = arith.mulf %1, %res_cos : f64 - // %img_prod_1 = arith.mulf %1, %res_sin : f64 - // %img_prod = arith.mulf %cst_5, %img_prod_1 : f64 - - // %real = arith.addf %2, %real_prod : f64 - // %img = arith.addf %3, %img_prod : f64 - // affine.store %real, %alloc_real[%y] : memref<4xf64> - // // dsp.print %alloc_real : memref<4xf64> - // affine.store %img, %alloc_img[%y] : memref<4xf64> - - // } - // } + // debug + // forOpX->dump(); + // forOpY->dump(); + // affine.for %y = 0 to 4 { + // affine.store %cst_3, %alloc_real[%y] : memref<4xf64> + // affine.store %cst_3, %alloc_img[%y] : memref<4xf64> + // } + + // affine.for %y = 0 to 4 { + // // %0 = affine.load %alloc_3[%arg0] : memref<4xf64> + // // affine.store %0, %alloc_real[%arg0] : memref<4xf64> + // affine.for %x = 0 to 4 { + // // CAcluations + // %1 = affine.load %alloc_3[%x] : memref<4xf64> + // %2 = affine.load %alloc_real[%y] : memref<4xf64> + // %3 = affine.load %alloc_img[%y] : memref<4xf64> + // // index cast for multiply + // %4 = arith.index_castui %y : index to i32 + // %k = arith.uitofp %4 : i32 to f64 + // %6 = arith.index_castui %x : index to i32 + // %i = arith.uitofp %6 : i32 to f64 + // // %8 = arith.index_castui %arg3 : index to i32 + // // %9 = arith.uitofp %8 : i32 to f64 + // // %10 = arith.index_castui %arg4 : index to i32 + // // %11 = arith.uitofp %10 : i32 to f64 + + // %mul_1 = arith.mulf %i, %k : f64 + // %mul = arith.mulf %mul_1, %cst_2pi : f64 + // // ixk / N + // %div = arith.divf %mul, %N : f64 + // // cos of the above + // %res_cos = math.cos %div : f64 + // // %16 = arith.addf %14, %15 : f64 + // // %res_sin = arith.mulf %16, %cst_0 : f64 + + // %res_sin = math.sin %div : f64 + // %real_prod = arith.mulf %1, %res_cos : f64 + // %img_prod_1 = arith.mulf %1, %res_sin : f64 + // %img_prod = arith.mulf %cst_5, %img_prod_1 : f64 + + // %real = arith.addf %2, %real_prod : f64 + // %img = arith.addf %3, %img_prod : f64 + // affine.store %real, %alloc_real[%y] : memref<4xf64> + // // dsp.print %alloc_real : memref<4xf64> + // affine.store %img, %alloc_img[%y] : memref<4xf64> + + // } + // } // rewriter.replaceOp(op, alloc_real); rewriter.replaceOp(op, alloc_real); - + return success(); } }; @@ -4125,64 +4757,64 @@ struct SquareOpLowering : public ConversionPattern { matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { auto loc = op->getLoc(); - - //Pseudo-code: - //output = 0 - //iterate for len = 0 to inputLen - // elem = a[i] - // output[i] = elem * elem - // store output - - //DEBUG_PRINT_NO_ARGS() ; - - //output for result type - auto tensorType = llvm::cast((*op->result_type_begin())); - - //allocation & deallocation for the result of this operation + + // Pseudo-code: + // output = 0 + // iterate for len = 0 to inputLen + // elem = a[i] + // output[i] = elem * elem + // store output + + // DEBUG_PRINT_NO_ARGS() ; + + // output for result type + auto tensorType = llvm::cast((*op->result_type_begin())); + + // allocation & deallocation for the result of this operation auto memRefType = convertTensorToMemRef(tensorType); auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); - //construct affine loops for the input - SmallVector lowerBounds(tensorType.getRank(), /*Value*/0); + // construct affine loops for the input + SmallVector lowerBounds(tensorType.getRank(), /*Value*/ 0); SmallVector steps(tensorType.getRank(), /*Value=*/1); - - //For loop + // For loop SquareOpAdaptor squareOpAdaptor(operands); // DEBUG_PRINT_NO_ARGS() ; - - int64_t lb = 0 ; + + int64_t lb = 0; int64_t ub = tensorType.getShape()[0]; int64_t step = 1; - //for loop - affine::AffineForOp forOp1 = rewriter.create(loc, lb, ub, step); + // for loop + affine::AffineForOp forOp1 = + rewriter.create(loc, lb, ub, step); auto iv = forOp1.getInductionVar(); rewriter.setInsertionPointToStart(forOp1.getBody()); - + // DEBUG_PRINT_NO_ARGS() ; - Value elemIn = rewriter.create(loc, squareOpAdaptor.getInput(), iv); - Value square = rewriter.create(loc, elemIn , elemIn); - - //store the result + Value elemIn = + rewriter.create(loc, squareOpAdaptor.getInput(), iv); + Value square = rewriter.create(loc, elemIn, elemIn); + + // store the result rewriter.create(loc, square, alloc, iv); rewriter.setInsertionPointAfter(forOp1); - //debug - // forOp1->dump(); - // affine.for %arg0 = 0 to 5 { - // %0 = affine.load %alloc_6[%arg0] : memref<5xf64> - // %1 = arith.mulf %0, %0 : f64 - // affine.store %1, %alloc_5[%arg0] : memref<5xf64> - // } + // debug + // forOp1->dump(); + // affine.for %arg0 = 0 to 5 { + // %0 = affine.load %alloc_6[%arg0] : memref<5xf64> + // %1 = arith.mulf %0, %0 : f64 + // affine.store %1, %alloc_5[%arg0] : memref<5xf64> + // } rewriter.replaceOp(op, alloc); - + return success(); } }; - //===----------------------------------------------------------------------===// // ToyToAffine RewritePatterns: SumOp operations //===----------------------------------------------------------------------===// @@ -4195,128 +4827,134 @@ struct SumOpLowering : public ConversionPattern { matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { auto loc = op->getLoc(); - - //Pseudo-code: - //output = 0 - //iterate for len = 0 to inputLen - // output = load output - // elem = a[i] - // output = output + elem - // store output + + // Pseudo-code: + // output = 0 + // iterate for len = 0 to inputLen + // output = load output + // elem = a[i] + // output = output + elem + // store output // DEBUG_PRINT_NO_ARGS() ; - //output for result type - auto tensorType = llvm::cast((*op->result_type_begin())); - - //allocation & deallocation for the result of this operation + // output for result type + auto tensorType = llvm::cast((*op->result_type_begin())); + + // allocation & deallocation for the result of this operation auto memRefType = convertTensorToMemRef(tensorType); auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); - //construct affine loops for the input - SmallVector lowerBounds(tensorType.getRank(), /*Value*/0); + // construct affine loops for the input + SmallVector lowerBounds(tensorType.getRank(), /*Value*/ 0); SmallVector steps(tensorType.getRank(), /*Value=*/1); - - //For loop + // For loop SumOpAdaptor sumOpAdaptor(operands); // DEBUG_PRINT_NO_ARGS() ; - auto inputType = llvm::dyn_cast(op->getOperand(0).getType()); //op->getOperand( - // auto inputType = llvm::dyn_cast(sumOpAdaptor.getInput().getType()); + auto inputType = llvm::dyn_cast( + op->getOperand(0).getType()); // op->getOperand( + // auto inputType = + // llvm::dyn_cast(sumOpAdaptor.getInput().getType()); // DEBUG_PRINT_NO_ARGS() ; - int64_t lb = 0 ; + int64_t lb = 0; int64_t ub = inputType.getShape()[0]; int64_t step = 1; - //init 0 for output + // init 0 for output Value constantIndx0 = rewriter.create(loc, 0); - // Value GetInputX0 = rewriter.create(loc, lowPassFilterAdaptor.getLhs(), /* iv */ ValueRange{constantIndx0}); - Value constant0 = rewriter.create(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); - // Value elemIn = rewriter.create(loc, upsamplingAdaptor.getLhs(), iv); - // DEBUG_PRINT_NO_ARGS() ; - rewriter.create(loc, constant0, alloc, ValueRange{constantIndx0}); - - affine::AffineForOp forOp1 = rewriter.create(loc, lb, ub, step); + // Value GetInputX0 = rewriter.create(loc, + // lowPassFilterAdaptor.getLhs(), /* iv */ ValueRange{constantIndx0}); + Value constant0 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + // Value elemIn = rewriter.create(loc, + // upsamplingAdaptor.getLhs(), iv); DEBUG_PRINT_NO_ARGS() ; + rewriter.create(loc, constant0, alloc, + ValueRange{constantIndx0}); + + affine::AffineForOp forOp1 = + rewriter.create(loc, lb, ub, step); auto iv = forOp1.getInductionVar(); rewriter.setInsertionPointToStart(forOp1.getBody()); - + // DEBUG_PRINT_NO_ARGS() ; - Value elemIn = rewriter.create(loc, sumOpAdaptor.getInput(), iv); - Value loadSum = rewriter.create(loc, alloc, ValueRange{constantIndx0}); - - Value sum = rewriter.create(loc, elemIn , loadSum); - - //store the result + Value elemIn = + rewriter.create(loc, sumOpAdaptor.getInput(), iv); + Value loadSum = + rewriter.create(loc, alloc, ValueRange{constantIndx0}); + + Value sum = rewriter.create(loc, elemIn, loadSum); + + // store the result rewriter.create(loc, sum, alloc, ValueRange{constantIndx0}); rewriter.setInsertionPointAfter(forOp1); - //debug - // forOp1->dump(); - // %cont3 = arith.const 3 : f64 - // affine.for %arg0 = 0 to 8 { - // %elem1 = affine.load input[%arg0] - // #map1 = affine_map<(%arg0)[] : (%arg0 + 1) - // #map2 = affine_map<(%arg0)[] : (%arg0 + 2) - // %elem2 = affine.load input[#map1] <-- affine apply - // %elem3 = affine.load input[#map2] - - // %sum1 = arith.addf %elem1 , %elem2 - // %sum2 = arith.addf %sum1, %elem3 - // %res = arith.divf %sum2 , - // affine.store %sum2, out[%arg0] - // } + // debug + // forOp1->dump(); + // %cont3 = arith.const 3 : f64 + // affine.for %arg0 = 0 to 8 { + // %elem1 = affine.load input[%arg0] + // #map1 = affine_map<(%arg0)[] : (%arg0 + 1) + // #map2 = affine_map<(%arg0)[] : (%arg0 + 2) + // %elem2 = affine.load input[#map1] <-- affine apply + // %elem3 = affine.load input[#map2] + + // %sum1 = arith.addf %elem1 , %elem2 + // %sum2 = arith.addf %sum1, %elem3 + // %res = arith.divf %sum2 , + // affine.store %sum2, out[%arg0] + // } rewriter.replaceOp(op, alloc); - + return success(); } }; - //===----------------------------------------------------------------------===// // ToyToAffine RewritePatterns: FIRFilterResponse operations //===----------------------------------------------------------------------===// -struct filterOpLowering: public ConversionPattern { - filterOpLowering(MLIRContext *ctx) - : ConversionPattern(dsp::filterOp::getOperationName(), 1 , ctx) {} +struct filterOpLowering : public ConversionPattern { + filterOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::filterOp::getOperationName(), 1, ctx) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + // dsp.filterOp has 3 operands -- both of type tensor f64 - LogicalResult - matchAndRewrite(Operation *op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const final { - //dsp.filterOp has 3 operands -- both of type tensor f64 + // Pseudo-code: + // y[i] = sum(b[j] * x(i-j) - a[j] *x[i-j] ) j=1 to i and i=1 to len(x) + // also, y[0] = b[0] * x[0] - //Pseudo-code: - // y[i] = sum(b[j] * x(i-j) - a[j] *x[i-j] ) j=1 to i and i=1 to len(x) - // also, y[0] = b[0] * x[0] - // 1) calculate y[0] // 2) iterate for indx=1 to input_len: // load y[indx] = b[0] * x[indx] - // 3) iterate for j=1 to indx : + // 3) iterate for j=1 to indx : // load b[j] , x[i-j] , a[j] , y[i-j] // y[indx] = y[indx] + b[j] * x[i-j] - a[j]*y[i-j] auto loc = op->getLoc(); - //output for result type - auto tensorType = llvm::cast((*op->result_type_begin())); - - //allocation & deallocation for the result of this operation + // output for result type + auto tensorType = llvm::cast((*op->result_type_begin())); + + // allocation & deallocation for the result of this operation auto memRefType = convertTensorToMemRef(tensorType); auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); filterOpAdaptor filterOpAdaptor1(operands); - //construct affine loops for the input - SmallVector lowerBounds(tensorType.getRank(), /*Value*/0); - SmallVector steps(tensorType.getRank(), /*Value=*/1); + // construct affine loops for the input + SmallVector lowerBounds(tensorType.getRank(), /*Value*/ 0); + SmallVector steps(tensorType.getRank(), /*Value=*/1); // IR: - // ConstantIndx0 + // ConstantIndx0 // b0 = affine.load(b, ConstantIndx0) // x0 = affine.load(x, ConstantIndx0) // tempY0 = arith.mulf(b0,x0) - // lb = 1, ub = x.size() , ivY = forLoopY.inductionVariable() + // lb = 1, ub = x.size() , ivY = forLoopY.inductionVariable() // forLoopY // xIvY = affine.load(x,ivY ) // tempYIndx = affine.mulf(b0, xIvY) @@ -4324,7 +4962,7 @@ struct filterOpLowering: public ConversionPattern { // forloopJ , ivJ = forloopJ.inductionVariable() // //optional get min ivY and len(b) -- iterate for this - // load (b,ivJ) ; (x, map(ivY - ivJ)) , (a, ivJ) , + // load (b,ivJ) ; (x, map(ivY - ivJ)) , (a, ivJ) , // (y, map(ivY - ivJ) ), (y , ivJ) // tempBxX = arith.mulf(b , x) @@ -4333,83 +4971,95 @@ struct filterOpLowering: public ConversionPattern { // sumY_A = arith.addf( Y , tempB_A ) // affine.store(sumY_A , y , ivY) - // ConstantIndx0 + // ConstantIndx0 // b0 = affine.load(b, ConstantIndx0) // x0 = affine.load(x, ConstantIndx0) // tempY0 = arith.mulf(b0,x0) Value constantIndx0 = rewriter.create(loc, 0); - Value b0 = rewriter.create(loc, filterOpAdaptor1.getB() ,ValueRange{constantIndx0} ); - Value x0 = rewriter.create(loc, filterOpAdaptor1.getX() ,ValueRange{constantIndx0} ); + Value b0 = rewriter.create( + loc, filterOpAdaptor1.getB(), ValueRange{constantIndx0}); + Value x0 = rewriter.create( + loc, filterOpAdaptor1.getX(), ValueRange{constantIndx0}); Value tempY0 = rewriter.create(loc, b0, x0); - //store at Y0 - rewriter.create(loc, tempY0 , alloc,ValueRange{constantIndx0} ); + // store at Y0 + rewriter.create(loc, tempY0, alloc, + ValueRange{constantIndx0}); - //For loop -- iterate from 1 to last - // lb = 1, ub = x.size() , ivY = forLoopY.inductionVariable() - // forLoopY - // xIvY = affine.load(x,ivY ) - // tempYIndx = affine.mulf(b0, xIvY) - // affine.store(tempYIndx, y, ivY) + // For loop -- iterate from 1 to last + // lb = 1, ub = x.size() , ivY = forLoopY.inductionVariable() + // forLoopY + // xIvY = affine.load(x,ivY ) + // tempYIndx = affine.mulf(b0, xIvY) + // affine.store(tempYIndx, y, ivY) - int64_t lb = 1 ; + int64_t lb = 1; int64_t ub = tensorType.getShape()[0]; int64_t step = 1; // DEBUG_PRINT_NO_ARGS() ; - //loop for Y - affine::AffineForOp forOpY = rewriter.create(loc, lb, ub, step); + // loop for Y + affine::AffineForOp forOpY = + rewriter.create(loc, lb, ub, step); auto ivY = forOpY.getInductionVar(); rewriter.setInsertionPointToStart(forOpY.getBody()); - Value xIvY = rewriter.create(loc, filterOpAdaptor1.getX() , ivY); + Value xIvY = rewriter.create( + loc, filterOpAdaptor1.getX(), ivY); Value b0mulxIvY = rewriter.create(loc, b0, xIvY); - rewriter.create(loc, b0mulxIvY , alloc,ivY ); + rewriter.create(loc, b0mulxIvY, alloc, ivY); - //loop for X-- 1 to upperIndx ie, ivY - // forloopJ , ivJ = forloopJ.inductionVariable() - // //optional get min ivY and len(b) -- iterate for this - // load (b,ivJ) ; (x, map(ivY - ivJ)) , (a, ivJ) , - // (y, map(ivY - ivJ) ), (y , ivJ) + // loop for X-- 1 to upperIndx ie, ivY + // forloopJ , ivJ = forloopJ.inductionVariable() + // //optional get min ivY and len(b) -- iterate for this + // load (b,ivJ) ; (x, map(ivY - ivJ)) , (a, ivJ) , + // (y, map(ivY - ivJ) ), (y , ivJ) - // tempBxX = arith.mulf(b , x) - // tempAxY = arith.mulf(a , Y_i-j) - // tempB_A = arith.subf( tempBxX - tempAxY) - // sumY_A = arith.addf( Y , tempB_A ) - // affine.store(sumY_A , y , ivY) + // tempBxX = arith.mulf(b , x) + // tempAxY = arith.mulf(a , Y_i-j) + // tempB_A = arith.subf( tempBxX - tempAxY) + // sumY_A = arith.addf( Y , tempB_A ) + // affine.store(sumY_A , y , ivY) - //look for here - // DEBUG_PRINT_NO_ARGS() ; - //Future -- try to loop - // Value forlb = rewriter.create(loc, 1); + // look for here + // DEBUG_PRINT_NO_ARGS() ; + // Future -- try to loop + // Value forlb = rewriter.create(loc, 1); AffineExpr expr0; bindDims(rewriter.getContext(), expr0); // AffineMap lbMap = AffineMap::get(1, 0, expr0); - // affine::AffineForOp forOpJ = rewriter.create(loc, lbMap, ValueRange{forlb} ,lbMap , ValueRange{ivY}, step); - affine::AffineForOp forOpJ = rewriter.create(loc, lb, ub, step); + // affine::AffineForOp forOpJ = rewriter.create(loc, lbMap, + // ValueRange{forlb} ,lbMap , ValueRange{ivY}, step); + affine::AffineForOp forOpJ = + rewriter.create(loc, lb, ub, step); auto ivJ = forOpJ.getInductionVar(); rewriter.setInsertionPointToStart(forOpJ.getBody()); - //load from X, & Y - // DCTOpAdaptor dctAdaptor(operands); - //For affine expression: #map1 = affine_map<(%ivY , ivJ)[] : (%ivY - ivJ) + // load from X, & Y + // DCTOpAdaptor dctAdaptor(operands); + // For affine expression: #map1 = affine_map<(%ivY , ivJ)[] : (%ivY - ivJ) AffineExpr d0, d1, s0; bindDims(rewriter.getContext(), d0, d1); - // AffineExpr ExprForIndxYminusX = rewriter.getAffineDimExpr(0) - rewriter.getAffineDimExpr(1); //d0 - d1; - AffineExpr ExprForIndxYminusX = d0 - d1; + // AffineExpr ExprForIndxYminusX = rewriter.getAffineDimExpr(0) - + // rewriter.getAffineDimExpr(1); //d0 - d1; + AffineExpr ExprForIndxYminusX = d0 - d1; AffineMap addMapForYminusX = AffineMap::get(2, 0, ExprForIndxYminusX); - // load (b,ivJ) ; (x, map(ivY - ivJ)) , (a, ivJ) , + // load (b,ivJ) ; (x, map(ivY - ivJ)) , (a, ivJ) , // (y, map(ivY - ivJ) ), (y , ivJ) - Value inputX = rewriter.create(loc, filterOpAdaptor1.getX(),addMapForYminusX, ValueRange{ivY,ivJ}); - Value inputB = rewriter.create(loc, filterOpAdaptor1.getB(), ValueRange{ivJ}); - Value inputA = rewriter.create(loc, filterOpAdaptor1.getA(), ValueRange{ivJ}); - Value inputPrevY = rewriter.create(loc, alloc,addMapForYminusX, ValueRange{ivY,ivJ}); + Value inputX = rewriter.create( + loc, filterOpAdaptor1.getX(), addMapForYminusX, ValueRange{ivY, ivJ}); + Value inputB = rewriter.create(loc, filterOpAdaptor1.getB(), + ValueRange{ivJ}); + Value inputA = rewriter.create(loc, filterOpAdaptor1.getA(), + ValueRange{ivJ}); + Value inputPrevY = rewriter.create( + loc, alloc, addMapForYminusX, ValueRange{ivY, ivJ}); Value outY = rewriter.create(loc, alloc, ValueRange{ivY}); // tempBxX = arith.mulf(b , x) @@ -4422,77 +5072,71 @@ struct filterOpLowering: public ConversionPattern { Value tempAxY = rewriter.create(loc, inputA, inputPrevY); Value tempBminusA = rewriter.create(loc, tempBxX, tempAxY); Value sumY_A = rewriter.create(loc, outY, tempBminusA); - rewriter.create(loc, sumY_A , alloc,ivY ); + rewriter.create(loc, sumY_A, alloc, ivY); - rewriter.setInsertionPointAfter(forOpJ); rewriter.setInsertionPointAfter(forOpY); // forOpJ->dump(); - - //debug - // forOpJ->dump(); - // forOpY->dump(); - // affine.for %y = 0 to 4 { - // affine.store %cst_3, %alloc[%y] : memref<4xf64> - // affine.store %cst_3, %alloc_img[%y] : memref<4xf64> - // } - - - // affine.for %y = 0 to 4 { - // // %0 = affine.load %alloc_3[%arg0] : memref<4xf64> - // // affine.store %0, %alloc[%arg0] : memref<4xf64> - // affine.for %x = 0 to 4 { - // // CAcluations - // %1 = affine.load %alloc_3[%x] : memref<4xf64> - // %2 = affine.load %alloc[%y] : memref<4xf64> - // %3 = affine.load %alloc_img[%y] : memref<4xf64> - // // index cast for multiply - // %4 = arith.index_castui %y : index to i32 - // %k = arith.uitofp %4 : i32 to f64 - // %6 = arith.index_castui %x : index to i32 - // %i = arith.uitofp %6 : i32 to f64 - // // %8 = arith.index_castui %arg3 : index to i32 - // // %9 = arith.uitofp %8 : i32 to f64 - // // %10 = arith.index_castui %arg4 : index to i32 - // // %11 = arith.uitofp %10 : i32 to f64 - - // %mul_1 = arith.mulf %i, %k : f64 - // %mul = arith.mulf %mul_1, %cst_2pi : f64 - // // ixk / N - // %div = arith.divf %mul, %N : f64 - // // cos of the above - // %res_cos = math.cos %div : f64 - // // %16 = arith.addf %14, %15 : f64 - // // %res_sin = arith.mulf %16, %cst_0 : f64 - - // %res_sin = math.sin %div : f64 - // %real_prod = arith.mulf %1, %res_cos : f64 - // %img_prod_1 = arith.mulf %1, %res_sin : f64 - // %img_prod = arith.mulf %cst_5, %img_prod_1 : f64 - - // %real = arith.addf %2, %real_prod : f64 - // %img = arith.addf %3, %img_prod : f64 - // affine.store %real, %alloc[%y] : memref<4xf64> - // // dsp.print %alloc : memref<4xf64> - // affine.store %img, %alloc_img[%y] : memref<4xf64> - - // } - // } + + // debug + // forOpJ->dump(); + // forOpY->dump(); + // affine.for %y = 0 to 4 { + // affine.store %cst_3, %alloc[%y] : memref<4xf64> + // affine.store %cst_3, %alloc_img[%y] : memref<4xf64> + // } + + // affine.for %y = 0 to 4 { + // // %0 = affine.load %alloc_3[%arg0] : memref<4xf64> + // // affine.store %0, %alloc[%arg0] : memref<4xf64> + // affine.for %x = 0 to 4 { + // // CAcluations + // %1 = affine.load %alloc_3[%x] : memref<4xf64> + // %2 = affine.load %alloc[%y] : memref<4xf64> + // %3 = affine.load %alloc_img[%y] : memref<4xf64> + // // index cast for multiply + // %4 = arith.index_castui %y : index to i32 + // %k = arith.uitofp %4 : i32 to f64 + // %6 = arith.index_castui %x : index to i32 + // %i = arith.uitofp %6 : i32 to f64 + // // %8 = arith.index_castui %arg3 : index to i32 + // // %9 = arith.uitofp %8 : i32 to f64 + // // %10 = arith.index_castui %arg4 : index to i32 + // // %11 = arith.uitofp %10 : i32 to f64 + + // %mul_1 = arith.mulf %i, %k : f64 + // %mul = arith.mulf %mul_1, %cst_2pi : f64 + // // ixk / N + // %div = arith.divf %mul, %N : f64 + // // cos of the above + // %res_cos = math.cos %div : f64 + // // %16 = arith.addf %14, %15 : f64 + // // %res_sin = arith.mulf %16, %cst_0 : f64 + + // %res_sin = math.sin %div : f64 + // %real_prod = arith.mulf %1, %res_cos : f64 + // %img_prod_1 = arith.mulf %1, %res_sin : f64 + // %img_prod = arith.mulf %cst_5, %img_prod_1 : f64 + + // %real = arith.addf %2, %real_prod : f64 + // %img = arith.addf %3, %img_prod : f64 + // affine.store %real, %alloc[%y] : memref<4xf64> + // // dsp.print %alloc : memref<4xf64> + // affine.store %img, %alloc_img[%y] : memref<4xf64> + + // } + // } rewriter.replaceOp(op, alloc); // rewriter.replaceOp(op, ValueRange{alloc,alloc_img}); - - return success(); - } - + return success(); + } }; - //===----------------------------------------------------------------------===// // ToyToAffine RewritePatterns: DCT operations //===----------------------------------------------------------------------===// - struct DCTOpLowering : public ConversionPattern { DCTOpLowering(MLIRContext *ctx) : ConversionPattern(dsp::DCTOp::getOperationName(), 1, ctx) {} @@ -4501,187 +5145,201 @@ struct DCTOpLowering : public ConversionPattern { matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { auto loc = op->getLoc(); - - //Pseudo-code: - // y[k] = sqrt(2/N) * SumOverAllN( x[n] cos(pi * k * (n +0.5)/N)) , 0<=n<=N-1 : - // for y[0] , the answer will be multiplied by 1/sqrt(2) - - //init output mem for y as 0 - //iterate for output from k=0 to last - //iterate for all x from n=0 to last - //perform the calculations : ie x[n] cos(pi * k * (n +0.5)/N) and sum and store them at y[k] - // - // replace this upsampling op with the output_mem_allocation op + + // Pseudo-code: + // y[k] = sqrt(2/N) * SumOverAllN( x[n] cos(pi * k * (n +0.5)/N)) , + // 0<=n<=N-1 : + // for y[0] , the answer will be multiplied by 1/sqrt(2) + + // init output mem for y as 0 + // iterate for output from k=0 to last + // iterate for all x from n=0 to last + // perform the calculations : ie x[n] cos(pi * k * (n +0.5)/N) and sum and + // store them at y[k] + // + // replace this upsampling op with the output_mem_allocation op // DEBUG_PRINT_NO_ARGS() ; - //output for result type - auto tensorType = llvm::cast((*op->result_type_begin())); - - //allocation & deallocation for the result of this operation + // output for result type + auto tensorType = llvm::cast((*op->result_type_begin())); + + // allocation & deallocation for the result of this operation auto memRefType = convertTensorToMemRef(tensorType); auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); DCTOpAdaptor dctAdaptor(operands); - //construct affine loops for the input - SmallVector lowerBounds(tensorType.getRank(), /*Value*/0); - SmallVector steps(tensorType.getRank(), /*Value=*/1); + // construct affine loops for the input + SmallVector lowerBounds(tensorType.getRank(), /*Value*/ 0); + SmallVector steps(tensorType.getRank(), /*Value=*/1); - //constant values: + // constant values: const float sqrt2 = 1.41421356237; const float pi = 3.14159265358; // affine.for %y = 0 to 4 { - // affine.store %cst_3, %alloc[%y] : memref<4xf64> - // } - Value constant0 = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(0)); - + // affine.store %cst_3, %alloc[%y] : memref<4xf64> + // } + Value constant0 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); - //For loop -- iterate from 0 to last - int64_t lb = 0 ; + // For loop -- iterate from 0 to last + int64_t lb = 0; int64_t ub = tensorType.getShape()[0]; int64_t step = 1; - affine::AffineForOp forOp1 = rewriter.create(loc, lb, ub, step); + affine::AffineForOp forOp1 = + rewriter.create(loc, lb, ub, step); auto iv = forOp1.getInductionVar(); rewriter.setInsertionPointToStart(forOp1.getBody()); rewriter.create(loc, constant0, alloc, ValueRange{iv}); rewriter.setInsertionPointAfter(forOp1); // DEBUG_PRINT_NO_ARGS() ; - //loop for Y - affine::AffineForOp forOpY = rewriter.create(loc, lb, ub, step); + // loop for Y + affine::AffineForOp forOpY = + rewriter.create(loc, lb, ub, step); auto ivY = forOpY.getInductionVar(); rewriter.setInsertionPointToStart(forOpY.getBody()); - //loop for X - affine::AffineForOp forOpX = rewriter.create(loc, lb, ub, step); + // loop for X + affine::AffineForOp forOpX = + rewriter.create(loc, lb, ub, step); auto ivX = forOpX.getInductionVar(); rewriter.setInsertionPointToStart(forOpX.getBody()); - //load from X, & Y - // DCTOpAdaptor dctAdaptor(operands); - Value inputX = rewriter.create(loc, dctAdaptor.getInput(), ValueRange{ivX}); - Value loadYReal = rewriter.create(loc, alloc, ValueRange{ivY}); + // load from X, & Y + // DCTOpAdaptor dctAdaptor(operands); + Value inputX = rewriter.create(loc, dctAdaptor.getInput(), + ValueRange{ivX}); + Value loadYReal = + rewriter.create(loc, alloc, ValueRange{ivY}); - //convert index to f64 - Value IndxY = rewriter.create(loc, rewriter.getIntegerType(32), ivY); - Value k = rewriter.create(loc, rewriter.getF64Type(), IndxY); + // convert index to f64 + Value IndxY = rewriter.create( + loc, rewriter.getIntegerType(32), ivY); + Value k = + rewriter.create(loc, rewriter.getF64Type(), IndxY); - Value IndxX = rewriter.create(loc, rewriter.getIntegerType(32), ivX); - Value i = rewriter.create(loc, rewriter.getF64Type(), IndxX); + Value IndxX = rewriter.create( + loc, rewriter.getIntegerType(32), ivX); + Value i = + rewriter.create(loc, rewriter.getF64Type(), IndxX); - //get pi * k * (i + 0.5) / N - Value constant0_5 = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(0.5)); + // get pi * k * (i + 0.5) / N + Value constant0_5 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0.5)); Value add_i_half = rewriter.create(loc, i, constant0_5); - Value muli_k = rewriter.create(loc, k , add_i_half); - - Value constpi = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(pi)); - Value mulpiKI_half = rewriter.create(loc, constpi , muli_k); + Value muli_k = rewriter.create(loc, k, add_i_half); + + Value constpi = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(pi)); + Value mulpiKI_half = rewriter.create(loc, constpi, muli_k); // Get N // DEBUG_PRINT_NO_ARGS() ; - float LengthOfInput = (float) ub; - Value N = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(LengthOfInput)); + float LengthOfInput = (float)ub; + Value N = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(LengthOfInput)); - Value divIndxByN = rewriter.create(loc, mulpiKI_half, N ) ; + Value divIndxByN = rewriter.create(loc, mulpiKI_half, N); // Get cos ( pi * k * (n +0.5)/N)) // DEBUG_PRINT_NO_ARGS() ; Value GetCos = rewriter.create(loc, divIndxByN); - Value xMulCos = rewriter.create(loc, inputX , GetCos); - Value realSum = rewriter.create(loc, loadYReal ,xMulCos) ; - rewriter.create(loc, realSum, alloc, ValueRange{ivY}); - + Value xMulCos = rewriter.create(loc, inputX, GetCos); + Value realSum = rewriter.create(loc, loadYReal, xMulCos); + rewriter.create(loc, realSum, alloc, ValueRange{ivY}); + rewriter.setInsertionPointAfter(forOpX); - //multiply Y(k) with sqrt(2) / sqrt(N) - // DEBUG_PRINT_NO_ARGS() ; - Value loadYReal1 = rewriter.create(loc, alloc, ValueRange{ivY}); - Value constSqrt2 = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(sqrt2)); + // multiply Y(k) with sqrt(2) / sqrt(N) + // DEBUG_PRINT_NO_ARGS() ; + Value loadYReal1 = + rewriter.create(loc, alloc, ValueRange{ivY}); + Value constSqrt2 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(sqrt2)); // Type floatType = rewriter.getF64Type(); - Value N2 = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(LengthOfInput)); + Value N2 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(LengthOfInput)); // Define fast math flags // auto fastMathFlags = arith::FastMathFlagsAttr::get( // rewriter.getContext(), arith::FastMathFlags::none); - // arith::FastMathFlags::ApproximateSqrt | - // arith::FastMathFlags::AllowReciprocal); - Value sqrtN = rewriter.create(loc, N2 ); - // Value sqrtN = rewriter.create(loc, TypeRange{ floatType } , N2 , fastMathFlags ); - - Value mulSqrt2ByN = rewriter.create(loc, constSqrt2 , sqrtN); - Value mulSqrt2ByNByY = rewriter.create(loc, mulSqrt2ByN , loadYReal1); + // arith::FastMathFlags::ApproximateSqrt | + // arith::FastMathFlags::AllowReciprocal); + Value sqrtN = rewriter.create(loc, N2); + // Value sqrtN = rewriter.create(loc, TypeRange{ floatType } + // , N2 , fastMathFlags ); + + Value mulSqrt2ByN = rewriter.create(loc, constSqrt2, sqrtN); + Value mulSqrt2ByNByY = + rewriter.create(loc, mulSqrt2ByN, loadYReal1); // DEBUG_PRINT_NO_ARGS() ; - rewriter.create(loc, mulSqrt2ByNByY, alloc, ValueRange{ivY}); + rewriter.create(loc, mulSqrt2ByNByY, alloc, ValueRange{ivY}); rewriter.setInsertionPointAfter(forOpY); - //get Y0 multiplied by sqrt(2) + // get Y0 multiplied by sqrt(2) Value constantIndx0 = rewriter.create(loc, 0); - Value GetY0 = rewriter.create(loc, alloc, /* iv */ ValueRange{constantIndx0}); - Value valSqrt2 = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(sqrt2)); + Value GetY0 = rewriter.create( + loc, alloc, /* iv */ ValueRange{constantIndx0}); + Value valSqrt2 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(sqrt2)); Value Y0MulSqrt2 = rewriter.create(loc, GetY0, valSqrt2); - rewriter.create(loc, Y0MulSqrt2, alloc, ValueRange{constantIndx0}); - - //debug - // forOpX->dump(); - // forOpY->dump(); - // affine.for %y = 0 to 4 { - // affine.store %cst_3, %alloc[%y] : memref<4xf64> - // affine.store %cst_3, %alloc_img[%y] : memref<4xf64> - // } - - - // affine.for %y = 0 to 4 { - // // %0 = affine.load %alloc_3[%arg0] : memref<4xf64> - // // affine.store %0, %alloc[%arg0] : memref<4xf64> - // affine.for %x = 0 to 4 { - // // CAcluations - // %1 = affine.load %alloc_3[%x] : memref<4xf64> - // %2 = affine.load %alloc[%y] : memref<4xf64> - // %3 = affine.load %alloc_img[%y] : memref<4xf64> - // // index cast for multiply - // %4 = arith.index_castui %y : index to i32 - // %k = arith.uitofp %4 : i32 to f64 - // %6 = arith.index_castui %x : index to i32 - // %i = arith.uitofp %6 : i32 to f64 - // // %8 = arith.index_castui %arg3 : index to i32 - // // %9 = arith.uitofp %8 : i32 to f64 - // // %10 = arith.index_castui %arg4 : index to i32 - // // %11 = arith.uitofp %10 : i32 to f64 - - // %mul_1 = arith.mulf %i, %k : f64 - // %mul = arith.mulf %mul_1, %cst_2pi : f64 - // // ixk / N - // %div = arith.divf %mul, %N : f64 - // // cos of the above - // %res_cos = math.cos %div : f64 - // // %16 = arith.addf %14, %15 : f64 - // // %res_sin = arith.mulf %16, %cst_0 : f64 - - // %res_sin = math.sin %div : f64 - // %real_prod = arith.mulf %1, %res_cos : f64 - // %img_prod_1 = arith.mulf %1, %res_sin : f64 - // %img_prod = arith.mulf %cst_5, %img_prod_1 : f64 - - // %real = arith.addf %2, %real_prod : f64 - // %img = arith.addf %3, %img_prod : f64 - // affine.store %real, %alloc[%y] : memref<4xf64> - // // dsp.print %alloc : memref<4xf64> - // affine.store %img, %alloc_img[%y] : memref<4xf64> - - // } - // } + rewriter.create(loc, Y0MulSqrt2, alloc, + ValueRange{constantIndx0}); + + // debug + // forOpX->dump(); + // forOpY->dump(); + // affine.for %y = 0 to 4 { + // affine.store %cst_3, %alloc[%y] : memref<4xf64> + // affine.store %cst_3, %alloc_img[%y] : memref<4xf64> + // } + + // affine.for %y = 0 to 4 { + // // %0 = affine.load %alloc_3[%arg0] : memref<4xf64> + // // affine.store %0, %alloc[%arg0] : memref<4xf64> + // affine.for %x = 0 to 4 { + // // CAcluations + // %1 = affine.load %alloc_3[%x] : memref<4xf64> + // %2 = affine.load %alloc[%y] : memref<4xf64> + // %3 = affine.load %alloc_img[%y] : memref<4xf64> + // // index cast for multiply + // %4 = arith.index_castui %y : index to i32 + // %k = arith.uitofp %4 : i32 to f64 + // %6 = arith.index_castui %x : index to i32 + // %i = arith.uitofp %6 : i32 to f64 + // // %8 = arith.index_castui %arg3 : index to i32 + // // %9 = arith.uitofp %8 : i32 to f64 + // // %10 = arith.index_castui %arg4 : index to i32 + // // %11 = arith.uitofp %10 : i32 to f64 + + // %mul_1 = arith.mulf %i, %k : f64 + // %mul = arith.mulf %mul_1, %cst_2pi : f64 + // // ixk / N + // %div = arith.divf %mul, %N : f64 + // // cos of the above + // %res_cos = math.cos %div : f64 + // // %16 = arith.addf %14, %15 : f64 + // // %res_sin = arith.mulf %16, %cst_0 : f64 + + // %res_sin = math.sin %div : f64 + // %real_prod = arith.mulf %1, %res_cos : f64 + // %img_prod_1 = arith.mulf %1, %res_sin : f64 + // %img_prod = arith.mulf %cst_5, %img_prod_1 : f64 + + // %real = arith.addf %2, %real_prod : f64 + // %img = arith.addf %3, %img_prod : f64 + // affine.store %real, %alloc[%y] : memref<4xf64> + // // dsp.print %alloc : memref<4xf64> + // affine.store %img, %alloc_img[%y] : memref<4xf64> + + // } + // } rewriter.replaceOp(op, alloc); // rewriter.replaceOp(op, ValueRange{alloc,alloc_img}); - + return success(); } }; @@ -4698,95 +5356,98 @@ struct HammingWindowOpLowering : public ConversionPattern { matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { auto loc = op->getLoc(); - - //Pseudo-code: - // y[k] = 0.54 - 0.46 cos(2 *pi * k/N-1) , 0<=n((*op->result_type_begin())); - // llvm::errs() << "tensorType " << tensorType.get; - //allocation & deallocation for the result of this operation + // output for result type + auto tensorType = llvm::cast((*op->result_type_begin())); + // llvm::errs() << "tensorType " << tensorType.get; + // allocation & deallocation for the result of this operation auto memRefType = convertTensorToMemRef(tensorType); auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); - - //construct affine loops for the input - SmallVector lowerBounds(tensorType.getRank(), /*Value*/0); - SmallVector steps(tensorType.getRank(), /*Value=*/1); - - - //For loop -- iterate from 1 to last - DEBUG_PRINT_NO_ARGS() ; - int64_t lb = 0 ; - int64_t ub = tensorType.getShape()[0]; - int64_t step = 1; - DEBUG_PRINT_NO_ARGS() ; - //get constants -- 0.54 & 0.46 - Value constant0_54 = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(0.54)); - Value constant0_46 = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(0.46)); - Value const2pi = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(6.28318530718)); + // construct affine loops for the input + SmallVector lowerBounds(tensorType.getRank(), /*Value*/ 0); + SmallVector steps(tensorType.getRank(), /*Value=*/1); + // For loop -- iterate from 1 to last + DEBUG_PRINT_NO_ARGS(); + int64_t lb = 0; + int64_t ub = tensorType.getShape()[0]; + int64_t step = 1; - //loop for Y - affine::AffineForOp forOpY = rewriter.create(loc, lb, ub, step); + DEBUG_PRINT_NO_ARGS(); + // get constants -- 0.54 & 0.46 + Value constant0_54 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0.54)); + Value constant0_46 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0.46)); + Value const2pi = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(6.28318530718)); + + // loop for Y + affine::AffineForOp forOpY = + rewriter.create(loc, lb, ub, step); auto ivY = forOpY.getInductionVar(); rewriter.setInsertionPointToStart(forOpY.getBody()); - //convert index to f64 - Value IndxY = rewriter.create(loc, rewriter.getIntegerType(32), ivY); - Value k = rewriter.create(loc, rewriter.getF64Type(), IndxY); - + // convert index to f64 + Value IndxY = rewriter.create( + loc, rewriter.getIntegerType(32), ivY); + Value k = + rewriter.create(loc, rewriter.getF64Type(), IndxY); - //get 2*pi * k / (N -1) - Value mul2pi_k = rewriter.create(loc, const2pi , k); + // get 2*pi * k / (N -1) + Value mul2pi_k = rewriter.create(loc, const2pi, k); // getOperand().getType() - // auto inputTensorType = llvm::cast(op->getOperand(0).getType()); - float LengthOfInput = (float) ub ; - Value NMinus1 = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(LengthOfInput - 1)); - - Value divIndxByNMinus1 = rewriter.create(loc, mul2pi_k, NMinus1 ) ; + // auto inputTensorType = + // llvm::cast(op->getOperand(0).getType()); + float LengthOfInput = (float)ub; + Value NMinus1 = rewriter.create( + loc, rewriter.getF64Type(), + rewriter.getF64FloatAttr(LengthOfInput - 1)); + + Value divIndxByNMinus1 = + rewriter.create(loc, mul2pi_k, NMinus1); // get cos(2*pi * k/(N-1) Value GetCos = rewriter.create(loc, divIndxByNMinus1); - Value MulCos0_46 = rewriter.create(loc, constant0_46 , GetCos); - Value Sub0_54_Cos = rewriter.create(loc, constant0_54 ,MulCos0_46) ; - rewriter.create(loc, Sub0_54_Cos, alloc, ValueRange{ivY}); - DEBUG_PRINT_NO_ARGS() ; + Value MulCos0_46 = + rewriter.create(loc, constant0_46, GetCos); + Value Sub0_54_Cos = + rewriter.create(loc, constant0_54, MulCos0_46); + rewriter.create(loc, Sub0_54_Cos, alloc, ValueRange{ivY}); + DEBUG_PRINT_NO_ARGS(); rewriter.setInsertionPointAfter(forOpY); - //debug - // forOpX->dump(); - // forOpY->dump(); - + // debug + // forOpX->dump(); + // forOpY->dump(); + + // %cst = arith.constant 6.2831853071800001 : f64 + // %cst_0 = arith.constant 4.600000e-01 : f64 + // %cst_1 = arith.constant 5.400000e-01 : f64 + // %cst_2 = arith.constant 4.000000e+00 : f64 + // %alloc = memref.alloc() : memref<4xf64> + // %alloc_3 = memref.alloc() : memref + // affine.store %cst_2, %alloc_3[] : memref + // affine.for %arg0 = 0 to 4 { + // %0 = arith.index_castui %arg0 : index to i32 + // %1 = arith.uitofp %0 : i32 to f64 + // %2 = arith.mulf %1, %cst : f64 + // %3 = arith.divf %2, %cst_2 : f64 + // %4 = math.cos %3 : f64 + // %5 = arith.mulf %4, %cst_0 : f64 + // %6 = arith.subf %cst_1, %5 : f64 + // affine.store %6, %alloc[%arg0] : memref<4xf64> + // } - // %cst = arith.constant 6.2831853071800001 : f64 - // %cst_0 = arith.constant 4.600000e-01 : f64 - // %cst_1 = arith.constant 5.400000e-01 : f64 - // %cst_2 = arith.constant 4.000000e+00 : f64 - // %alloc = memref.alloc() : memref<4xf64> - // %alloc_3 = memref.alloc() : memref - // affine.store %cst_2, %alloc_3[] : memref - // affine.for %arg0 = 0 to 4 { - // %0 = arith.index_castui %arg0 : index to i32 - // %1 = arith.uitofp %0 : i32 to f64 - // %2 = arith.mulf %1, %cst : f64 - // %3 = arith.divf %2, %cst_2 : f64 - // %4 = math.cos %3 : f64 - // %5 = arith.mulf %4, %cst_0 : f64 - // %6 = arith.subf %cst_1, %5 : f64 - // affine.store %6, %alloc[%arg0] : memref<4xf64> - // } - - - // } - // } + // } + // } rewriter.replaceOp(op, alloc); - //rewriter.replaceOp(op, ValueRange{alloc,alloc_img}); - + // rewriter.replaceOp(op, ValueRange{alloc,alloc_img}); + return success(); } }; @@ -4803,187 +5464,194 @@ struct IFFT1DOpLowering : public ConversionPattern { matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { auto loc = op->getLoc(); - - //Pseudo-code: - // y[k] = y_real[k] + j *y_img[k] - // y_real = sumOver_n(x[k]*cos[2*pi * k *n/N ] - // y_img = sumOver_n(x[k]*sin[2*pi * k *n/N ] - // here, x[k] is complex ie, x_real[k] + x_complex[k] - //so, y[k] = sumOver_n(x[k]e^(2*pi * k *n/N)) - // ==> = x_real[k]cos(2*pi * k *n/N) - x_complex[k]sin(2*pi * k *n/N) - - //init output mem for y_real - //iterate for output from k=0 to last - //iterate for all x from n=0 to last - //perform the calculations : ie x_real[k]cos(2*pi * k *n/N) - x_complex[k]sin(2*pi * k *n/N) and - //sum and store them at y[k] - // - - DEBUG_PRINT_NO_ARGS() ; - - //output for result type - auto tensorType = llvm::cast((*op->result_type_begin())); - //iterate to result1 --not needed for now but for future reference - // DEBUG_PRINT_NO_ARGS() ; - - //allocation & deallocation for the result of this operation + + // Pseudo-code: + // y[k] = y_real[k] + j *y_img[k] + // y_real = sumOver_n(x[k]*cos[2*pi * k *n/N ] + // y_img = sumOver_n(x[k]*sin[2*pi * k *n/N ] + // here, x[k] is complex ie, x_real[k] + x_complex[k] + // so, y[k] = sumOver_n(x[k]e^(2*pi * k *n/N)) + // ==> = x_real[k]cos(2*pi * k *n/N) - x_complex[k]sin(2*pi * k *n/N) + + // init output mem for y_real + // iterate for output from k=0 to last + // iterate for all x from n=0 to last + // perform the calculations : ie x_real[k]cos(2*pi * k *n/N) - + // x_complex[k]sin(2*pi * k *n/N) and sum and store them at y[k] + // + + DEBUG_PRINT_NO_ARGS(); + + // output for result type + auto tensorType = llvm::cast((*op->result_type_begin())); + // iterate to result1 --not needed for now but for future reference + // DEBUG_PRINT_NO_ARGS() ; + + // allocation & deallocation for the result of this operation auto memRefType = convertTensorToMemRef(tensorType); auto alloc_real = insertAllocAndDealloc(memRefType, loc, rewriter); - - //construct affine loops for the input - SmallVector lowerBounds(tensorType.getRank(), /*Value*/0); - SmallVector steps(tensorType.getRank(), /*Value=*/1); + + // construct affine loops for the input + SmallVector lowerBounds(tensorType.getRank(), /*Value*/ 0); + SmallVector steps(tensorType.getRank(), /*Value=*/1); // affine.for %y = 0 to 4 { - // affine.store %cst_3, %alloc_real[%y] : memref<4xf64> - // affine.store %cst_3, %alloc_img[%y] : memref<4xf64> - // } - Value constant0 = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(0)); - - DEBUG_PRINT_NO_ARGS() ; - //For loop -- iterate from 0 to last - int64_t lb = 0 ; + // affine.store %cst_3, %alloc_real[%y] : memref<4xf64> + // affine.store %cst_3, %alloc_img[%y] : memref<4xf64> + // } + Value constant0 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + + DEBUG_PRINT_NO_ARGS(); + // For loop -- iterate from 0 to last + int64_t lb = 0; int64_t ub = tensorType.getShape()[0]; int64_t step = 1; - affine::AffineForOp forOp1 = rewriter.create(loc, lb, ub, step); + affine::AffineForOp forOp1 = + rewriter.create(loc, lb, ub, step); auto iv = forOp1.getInductionVar(); rewriter.setInsertionPointToStart(forOp1.getBody()); rewriter.create(loc, constant0, alloc_real, ValueRange{iv}); rewriter.setInsertionPointAfter(forOp1); - //loop for Y - affine::AffineForOp forOpY = rewriter.create(loc, lb, ub, step); + // loop for Y + affine::AffineForOp forOpY = + rewriter.create(loc, lb, ub, step); auto ivY = forOpY.getInductionVar(); rewriter.setInsertionPointToStart(forOpY.getBody()); - //loop for X - affine::AffineForOp forOpX = rewriter.create(loc, lb, ub, step); + // loop for X + affine::AffineForOp forOpX = + rewriter.create(loc, lb, ub, step); auto ivX = forOpX.getInductionVar(); rewriter.setInsertionPointToStart(forOpX.getBody()); - //load from X, & y1 & y2 + // load from X, & y1 & y2 IFFT1DOpAdaptor ifft1DAdaptor(operands); - Value inputReal = rewriter.create(loc, ifft1DAdaptor.getReal(), ValueRange{ivX}); - Value loadYReal = rewriter.create(loc, alloc_real, ValueRange{ivY}); - - //convert index to f64 - Value IndxY = rewriter.create(loc, rewriter.getIntegerType(32), ivY); - Value k = rewriter.create(loc, rewriter.getF64Type(), IndxY); - - Value IndxX = rewriter.create(loc, rewriter.getIntegerType(32), ivX); - Value i = rewriter.create(loc, rewriter.getF64Type(), IndxX); - - //get 2*pi * k * i / N - Value muli_k = rewriter.create(loc, k , i); - - Value const2pi = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(6.28318530718)); - Value mul2piKI = rewriter.create(loc, const2pi , muli_k); + Value inputReal = rewriter.create( + loc, ifft1DAdaptor.getReal(), ValueRange{ivX}); + Value loadYReal = + rewriter.create(loc, alloc_real, ValueRange{ivY}); + + // convert index to f64 + Value IndxY = rewriter.create( + loc, rewriter.getIntegerType(32), ivY); + Value k = + rewriter.create(loc, rewriter.getF64Type(), IndxY); + + Value IndxX = rewriter.create( + loc, rewriter.getIntegerType(32), ivX); + Value i = + rewriter.create(loc, rewriter.getF64Type(), IndxX); + + // get 2*pi * k * i / N + Value muli_k = rewriter.create(loc, k, i); + + Value const2pi = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(6.28318530718)); + Value mul2piKI = rewriter.create(loc, const2pi, muli_k); // getOperand().getType() - // auto inputTensorType = llvm::cast(op->getOperand(0).getType()); - float LengthOfInput = (float) ub; - Value N = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(LengthOfInput)); + // auto inputTensorType = + // llvm::cast(op->getOperand(0).getType()); + float LengthOfInput = (float)ub; + Value N = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(LengthOfInput)); // Value N = inputTensorType.getShape()[0]; - Value divIndxByN = rewriter.create(loc, mul2piKI, N ) ; + Value divIndxByN = rewriter.create(loc, mul2piKI, N); - // Real Cos part = x_real[i] * cos(div) + // Real Cos part = x_real[i] * cos(div) Value GetCos = rewriter.create(loc, divIndxByN); - Value xMulCos = rewriter.create(loc, inputReal , GetCos); + Value xMulCos = rewriter.create(loc, inputReal, GetCos); - // Real Sin part = x_complex[i] * sin(div) - Value inputImg = rewriter.create(loc, ifft1DAdaptor.getImg(), ValueRange{ivX}); + // Real Sin part = x_complex[i] * sin(div) + Value inputImg = rewriter.create(loc, ifft1DAdaptor.getImg(), + ValueRange{ivX}); Value GetSin = rewriter.create(loc, divIndxByN); - Value xMulSin = rewriter.create(loc, inputImg , GetSin); - - //Get real Ans = x_real[i] * cos(div) - x_complex[i] * sin(div) - //Then sum over real_Ans by loading YReal - Value realAns = rewriter.create(loc, xMulCos ,xMulSin) ; - Value realSum = rewriter.create(loc, loadYReal ,realAns) ; - rewriter.create(loc, realSum, alloc_real, ValueRange{ivY}); - - //x[n-1] - DEBUG_PRINT_NO_ARGS() ; + Value xMulSin = rewriter.create(loc, inputImg, GetSin); + + // Get real Ans = x_real[i] * cos(div) - x_complex[i] * sin(div) + // Then sum over real_Ans by loading YReal + Value realAns = rewriter.create(loc, xMulCos, xMulSin); + Value realSum = rewriter.create(loc, loadYReal, realAns); + rewriter.create(loc, realSum, alloc_real, ValueRange{ivY}); + + // x[n-1] + DEBUG_PRINT_NO_ARGS(); // Value xMinusPrevX = rewriter.create(loc, inputX ,PrevX ); rewriter.setInsertionPointAfter(forOpX); // Calculate y[k] = 1/N * y[k] - Value loadY = rewriter.create(loc, alloc_real, ValueRange{ivY}); + Value loadY = + rewriter.create(loc, alloc_real, ValueRange{ivY}); // float LengthOfInput = (float) ub; - Value N1 = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(LengthOfInput)); - Value SumDivByN = rewriter.create(loc,loadY , N1 ); - rewriter.create(loc, SumDivByN, alloc_real, ValueRange{ivY}); - + Value N1 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(LengthOfInput)); + Value SumDivByN = rewriter.create(loc, loadY, N1); + rewriter.create(loc, SumDivByN, alloc_real, ValueRange{ivY}); rewriter.setInsertionPointAfter(forOpY); - //debug - // forOpX->dump(); - // forOpY->dump(); - // affine.for %y = 0 to 4 { - // affine.store %cst_3, %alloc_real[%y] : memref<4xf64> - // affine.store %cst_3, %alloc_img[%y] : memref<4xf64> - // } - - - // affine.for %y = 0 to 4 { - // // %0 = affine.load %alloc_3[%arg0] : memref<4xf64> - // // affine.store %0, %alloc_real[%arg0] : memref<4xf64> - // affine.for %x = 0 to 4 { - // // CAcluations - // %1 = affine.load %alloc_3[%x] : memref<4xf64> - // %2 = affine.load %alloc_real[%y] : memref<4xf64> - // %3 = affine.load %alloc_img[%y] : memref<4xf64> - // // index cast for multiply - // %4 = arith.index_castui %y : index to i32 - // %k = arith.uitofp %4 : i32 to f64 - // %6 = arith.index_castui %x : index to i32 - // %i = arith.uitofp %6 : i32 to f64 - // // %8 = arith.index_castui %arg3 : index to i32 - // // %9 = arith.uitofp %8 : i32 to f64 - // // %10 = arith.index_castui %arg4 : index to i32 - // // %11 = arith.uitofp %10 : i32 to f64 - - // %mul_1 = arith.mulf %i, %k : f64 - // %mul = arith.mulf %mul_1, %cst_2pi : f64 - // // ixk / N - // %div = arith.divf %mul, %N : f64 - // // cos of the above - // %res_cos = math.cos %div : f64 - // // %16 = arith.addf %14, %15 : f64 - // // %res_sin = arith.mulf %16, %cst_0 : f64 - - // %res_sin = math.sin %div : f64 - // %real_prod = arith.mulf %1, %res_cos : f64 - // %img_prod_1 = arith.mulf %1, %res_sin : f64 - // %img_prod = arith.mulf %cst_5, %img_prod_1 : f64 - - // %real = arith.addf %2, %real_prod : f64 - // %img = arith.addf %3, %img_prod : f64 - // affine.store %real, %alloc_real[%y] : memref<4xf64> - // // dsp.print %alloc_real : memref<4xf64> - // affine.store %img, %alloc_img[%y] : memref<4xf64> - - // } - // } + // debug + // forOpX->dump(); + // forOpY->dump(); + // affine.for %y = 0 to 4 { + // affine.store %cst_3, %alloc_real[%y] : memref<4xf64> + // affine.store %cst_3, %alloc_img[%y] : memref<4xf64> + // } + + // affine.for %y = 0 to 4 { + // // %0 = affine.load %alloc_3[%arg0] : memref<4xf64> + // // affine.store %0, %alloc_real[%arg0] : memref<4xf64> + // affine.for %x = 0 to 4 { + // // CAcluations + // %1 = affine.load %alloc_3[%x] : memref<4xf64> + // %2 = affine.load %alloc_real[%y] : memref<4xf64> + // %3 = affine.load %alloc_img[%y] : memref<4xf64> + // // index cast for multiply + // %4 = arith.index_castui %y : index to i32 + // %k = arith.uitofp %4 : i32 to f64 + // %6 = arith.index_castui %x : index to i32 + // %i = arith.uitofp %6 : i32 to f64 + // // %8 = arith.index_castui %arg3 : index to i32 + // // %9 = arith.uitofp %8 : i32 to f64 + // // %10 = arith.index_castui %arg4 : index to i32 + // // %11 = arith.uitofp %10 : i32 to f64 + + // %mul_1 = arith.mulf %i, %k : f64 + // %mul = arith.mulf %mul_1, %cst_2pi : f64 + // // ixk / N + // %div = arith.divf %mul, %N : f64 + // // cos of the above + // %res_cos = math.cos %div : f64 + // // %16 = arith.addf %14, %15 : f64 + // // %res_sin = arith.mulf %16, %cst_0 : f64 + + // %res_sin = math.sin %div : f64 + // %real_prod = arith.mulf %1, %res_cos : f64 + // %img_prod_1 = arith.mulf %1, %res_sin : f64 + // %img_prod = arith.mulf %cst_5, %img_prod_1 : f64 + + // %real = arith.addf %2, %real_prod : f64 + // %img = arith.addf %3, %img_prod : f64 + // affine.store %real, %alloc_real[%y] : memref<4xf64> + // // dsp.print %alloc_real : memref<4xf64> + // affine.store %img, %alloc_img[%y] : memref<4xf64> + + // } + // } rewriter.replaceOp(op, alloc_real); // rewriter.replaceOp(op, ValueRange{alloc_real,alloc_img}); - + return success(); } }; - - //===----------------------------------------------------------------------===// // ToyToAffine RewritePatterns: FFT1D operations //===----------------------------------------------------------------------===// - struct FFT1DOpLowering : public ConversionPattern { FFT1DOpLowering(MLIRContext *ctx) : ConversionPattern(dsp::FFT1DOp::getOperationName(), 1, ctx) {} @@ -4992,173 +5660,187 @@ struct FFT1DOpLowering : public ConversionPattern { matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { auto loc = op->getLoc(); - - //Pseudo-code: - // y[k] = y_real[k] + j *y_img[k] - // y_real = sumOver_n(x[n]*cos[2*pi * k *n/N ] - // y_img = sumOver_n(x[n]*sin[2*pi * k *n/N ] * -1 - //init output mem for y_real & y_img as 0 - //iterate for output from k=0 to last - //iterate for all x from n=0 to last - //perform the calculations : ie x[n] * cos[2*pi * k *n/N ] and sum and store them at y[k] - // - // replace this upsampling op with the output_mem_allocation op + + // Pseudo-code: + // y[k] = y_real[k] + j *y_img[k] + // y_real = sumOver_n(x[n]*cos[2*pi * k *n/N ] + // y_img = sumOver_n(x[n]*sin[2*pi * k *n/N ] * -1 + // init output mem for y_real & y_img as 0 + // iterate for output from k=0 to last + // iterate for all x from n=0 to last + // perform the calculations : ie x[n] * cos[2*pi * k *n/N ] and sum and + // store them at y[k] + // + // replace this upsampling op with the output_mem_allocation op // DEBUG_PRINT_NO_ARGS() ; - //output for result type - auto tensorType = llvm::cast((*op->result_type_begin())); - //iterate to result1 --not needed for now but for future reference - // auto tensorType1 = llvm::cast(*std::next(op->result_type_begin(), 1)); + // output for result type + auto tensorType = llvm::cast((*op->result_type_begin())); + // iterate to result1 --not needed for now but for future reference + // auto tensorType1 = + // llvm::cast(*std::next(op->result_type_begin(), 1)); + + // DEBUG_PRINT_NO_ARGS() ; + // tensorType.getShape()[0] + // llvm::errs() << "tensorType1.getShape()[0] " << tensorType1.getShape()[0] + // << " func= " << __func__ << "\n"; - // DEBUG_PRINT_NO_ARGS() ; - //tensorType.getShape()[0] - // llvm::errs() << "tensorType1.getShape()[0] " << tensorType1.getShape()[0] << " func= " << __func__ << "\n"; - - //allocation & deallocation for the result of this operation + // allocation & deallocation for the result of this operation auto memRefType = convertTensorToMemRef(tensorType); // auto memRefType2 = convertTensorToMemRef(tensorType1); auto alloc_real = insertAllocAndDealloc(memRefType, loc, rewriter); auto alloc_img = insertAllocAndDealloc(memRefType, loc, rewriter); - //construct affine loops for the input - SmallVector lowerBounds(tensorType.getRank(), /*Value*/0); - SmallVector steps(tensorType.getRank(), /*Value=*/1); + // construct affine loops for the input + SmallVector lowerBounds(tensorType.getRank(), /*Value*/ 0); + SmallVector steps(tensorType.getRank(), /*Value=*/1); // affine.for %y = 0 to 4 { - // affine.store %cst_3, %alloc_real[%y] : memref<4xf64> - // affine.store %cst_3, %alloc_img[%y] : memref<4xf64> - // } - Value constant0 = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(0)); - + // affine.store %cst_3, %alloc_real[%y] : memref<4xf64> + // affine.store %cst_3, %alloc_img[%y] : memref<4xf64> + // } + Value constant0 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); - //For loop -- iterate from 1 to last - int64_t lb = 0 ; + // For loop -- iterate from 1 to last + int64_t lb = 0; int64_t ub = tensorType.getShape()[0]; int64_t step = 1; - affine::AffineForOp forOp1 = rewriter.create(loc, lb, ub, step); + affine::AffineForOp forOp1 = + rewriter.create(loc, lb, ub, step); auto iv = forOp1.getInductionVar(); rewriter.setInsertionPointToStart(forOp1.getBody()); rewriter.create(loc, constant0, alloc_real, ValueRange{iv}); rewriter.create(loc, constant0, alloc_img, ValueRange{iv}); rewriter.setInsertionPointAfter(forOp1); - //loop for Y - affine::AffineForOp forOpY = rewriter.create(loc, lb, ub, step); + // loop for Y + affine::AffineForOp forOpY = + rewriter.create(loc, lb, ub, step); auto ivY = forOpY.getInductionVar(); rewriter.setInsertionPointToStart(forOpY.getBody()); - //loop for X - affine::AffineForOp forOpX = rewriter.create(loc, lb, ub, step); + // loop for X + affine::AffineForOp forOpX = + rewriter.create(loc, lb, ub, step); auto ivX = forOpX.getInductionVar(); rewriter.setInsertionPointToStart(forOpX.getBody()); - //load from X, & y1 & y2 + // load from X, & y1 & y2 FFT1DOpAdaptor fft1DAdaptor(operands); - Value inputX = rewriter.create(loc, fft1DAdaptor.getInput(), ValueRange{ivX}); - Value loadYReal = rewriter.create(loc, alloc_real, ValueRange{ivY}); - Value loadYImg = rewriter.create(loc, alloc_img, ValueRange{ivY}); - - //convert index to f64 - Value IndxY = rewriter.create(loc, rewriter.getIntegerType(32), ivY); - Value k = rewriter.create(loc, rewriter.getF64Type(), IndxY); - - Value IndxX = rewriter.create(loc, rewriter.getIntegerType(32), ivX); - Value i = rewriter.create(loc, rewriter.getF64Type(), IndxX); - - //get 2*pi * k * i / N - Value muli_k = rewriter.create(loc, k , i); - - Value const2pi = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(6.28318530718)); - Value mul2piKI = rewriter.create(loc, const2pi , muli_k); + Value inputX = rewriter.create(loc, fft1DAdaptor.getInput(), + ValueRange{ivX}); + Value loadYReal = + rewriter.create(loc, alloc_real, ValueRange{ivY}); + Value loadYImg = + rewriter.create(loc, alloc_img, ValueRange{ivY}); + + // convert index to f64 + Value IndxY = rewriter.create( + loc, rewriter.getIntegerType(32), ivY); + Value k = + rewriter.create(loc, rewriter.getF64Type(), IndxY); + + Value IndxX = rewriter.create( + loc, rewriter.getIntegerType(32), ivX); + Value i = + rewriter.create(loc, rewriter.getF64Type(), IndxX); + + // get 2*pi * k * i / N + Value muli_k = rewriter.create(loc, k, i); + + Value const2pi = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(6.28318530718)); + Value mul2piKI = rewriter.create(loc, const2pi, muli_k); // getOperand().getType() - // auto inputTensorType = llvm::cast(op->getOperand(0).getType()); - float LengthOfInput = (float) ub; - Value N = rewriter.create(loc, rewriter.getF64Type(), - rewriter.getF64FloatAttr(LengthOfInput)); + // auto inputTensorType = + // llvm::cast(op->getOperand(0).getType()); + float LengthOfInput = (float)ub; + Value N = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(LengthOfInput)); // Value N = inputTensorType.getShape()[0]; - Value divIndxByN = rewriter.create(loc, mul2piKI, N ) ; + Value divIndxByN = rewriter.create(loc, mul2piKI, N); // Real part = Sum(x[i] * cos(div) ) Value GetCos = rewriter.create(loc, divIndxByN); - Value xMulCos = rewriter.create(loc, inputX , GetCos); - Value realSum = rewriter.create(loc, loadYReal ,xMulCos) ; - rewriter.create(loc, realSum, alloc_real, ValueRange{ivY}); - + Value xMulCos = rewriter.create(loc, inputX, GetCos); + Value realSum = rewriter.create(loc, loadYReal, xMulCos); + rewriter.create(loc, realSum, alloc_real, ValueRange{ivY}); + // Img part = -1 * Sum(x[i] * sin(div) ) Value GetSin = rewriter.create(loc, divIndxByN); - Value xMulSin = rewriter.create(loc, inputX , GetSin); - Value imgSum = rewriter.create(loc, loadYImg ,xMulSin) ; + Value xMulSin = rewriter.create(loc, inputX, GetSin); + Value imgSum = rewriter.create(loc, loadYImg, xMulSin); - // Value constMinus1 = rewriter.create(loc, rewriter.getF64Type(), + // Value constMinus1 = rewriter.create(loc, + // rewriter.getF64Type(), // rewriter.getF64FloatAttr(-1)); - // Value NegImgSum = rewriter.create(loc, constMinus1 , imgSum); - rewriter.create(loc, imgSum, alloc_img, ValueRange{ivY}); - //x[n-1] - // DEBUG_PRINT_NO_ARGS() ; - // Value xMinusPrevX = rewriter.create(loc, inputX ,PrevX ); + // Value NegImgSum = rewriter.create(loc, constMinus1 , + // imgSum); + rewriter.create(loc, imgSum, alloc_img, ValueRange{ivY}); + // x[n-1] + // DEBUG_PRINT_NO_ARGS() ; + // Value xMinusPrevX = rewriter.create(loc, inputX ,PrevX ); rewriter.setInsertionPointAfter(forOpX); // forOpX->dump(); // rewriter.create(loc, ValueRange{alloc_real, alloc_img}); rewriter.setInsertionPointAfter(forOpY); - //debug - // forOpX->dump(); - // forOpY->dump(); - // affine.for %y = 0 to 4 { - // affine.store %cst_3, %alloc_real[%y] : memref<4xf64> - // affine.store %cst_3, %alloc_img[%y] : memref<4xf64> - // } - - - // affine.for %y = 0 to 4 { - // // %0 = affine.load %alloc_3[%arg0] : memref<4xf64> - // // affine.store %0, %alloc_real[%arg0] : memref<4xf64> - // affine.for %x = 0 to 4 { - // // CAcluations - // %1 = affine.load %alloc_3[%x] : memref<4xf64> - // %2 = affine.load %alloc_real[%y] : memref<4xf64> - // %3 = affine.load %alloc_img[%y] : memref<4xf64> - // // index cast for multiply - // %4 = arith.index_castui %y : index to i32 - // %k = arith.uitofp %4 : i32 to f64 - // %6 = arith.index_castui %x : index to i32 - // %i = arith.uitofp %6 : i32 to f64 - // // %8 = arith.index_castui %arg3 : index to i32 - // // %9 = arith.uitofp %8 : i32 to f64 - // // %10 = arith.index_castui %arg4 : index to i32 - // // %11 = arith.uitofp %10 : i32 to f64 - - // %mul_1 = arith.mulf %i, %k : f64 - // %mul = arith.mulf %mul_1, %cst_2pi : f64 - // // ixk / N - // %div = arith.divf %mul, %N : f64 - // // cos of the above - // %res_cos = math.cos %div : f64 - // // %16 = arith.addf %14, %15 : f64 - // // %res_sin = arith.mulf %16, %cst_0 : f64 - - // %res_sin = math.sin %div : f64 - // %real_prod = arith.mulf %1, %res_cos : f64 - // %img_prod_1 = arith.mulf %1, %res_sin : f64 - // %img_prod = arith.mulf %cst_5, %img_prod_1 : f64 - - // %real = arith.addf %2, %real_prod : f64 - // %img = arith.addf %3, %img_prod : f64 - // affine.store %real, %alloc_real[%y] : memref<4xf64> - // // dsp.print %alloc_real : memref<4xf64> - // affine.store %img, %alloc_img[%y] : memref<4xf64> - - // } - // } + // debug + // forOpX->dump(); + // forOpY->dump(); + // affine.for %y = 0 to 4 { + // affine.store %cst_3, %alloc_real[%y] : memref<4xf64> + // affine.store %cst_3, %alloc_img[%y] : memref<4xf64> + // } + + // affine.for %y = 0 to 4 { + // // %0 = affine.load %alloc_3[%arg0] : memref<4xf64> + // // affine.store %0, %alloc_real[%arg0] : memref<4xf64> + // affine.for %x = 0 to 4 { + // // CAcluations + // %1 = affine.load %alloc_3[%x] : memref<4xf64> + // %2 = affine.load %alloc_real[%y] : memref<4xf64> + // %3 = affine.load %alloc_img[%y] : memref<4xf64> + // // index cast for multiply + // %4 = arith.index_castui %y : index to i32 + // %k = arith.uitofp %4 : i32 to f64 + // %6 = arith.index_castui %x : index to i32 + // %i = arith.uitofp %6 : i32 to f64 + // // %8 = arith.index_castui %arg3 : index to i32 + // // %9 = arith.uitofp %8 : i32 to f64 + // // %10 = arith.index_castui %arg4 : index to i32 + // // %11 = arith.uitofp %10 : i32 to f64 + + // %mul_1 = arith.mulf %i, %k : f64 + // %mul = arith.mulf %mul_1, %cst_2pi : f64 + // // ixk / N + // %div = arith.divf %mul, %N : f64 + // // cos of the above + // %res_cos = math.cos %div : f64 + // // %16 = arith.addf %14, %15 : f64 + // // %res_sin = arith.mulf %16, %cst_0 : f64 + + // %res_sin = math.sin %div : f64 + // %real_prod = arith.mulf %1, %res_cos : f64 + // %img_prod_1 = arith.mulf %1, %res_sin : f64 + // %img_prod = arith.mulf %cst_5, %img_prod_1 : f64 + + // %real = arith.addf %2, %real_prod : f64 + // %img = arith.addf %3, %img_prod : f64 + // affine.store %real, %alloc_real[%y] : memref<4xf64> + // // dsp.print %alloc_real : memref<4xf64> + // affine.store %img, %alloc_img[%y] : memref<4xf64> + + // } + // } // rewriter.replaceOp(op, alloc_real); - rewriter.replaceOp(op, ValueRange{alloc_real,alloc_img}); - + rewriter.replaceOp(op, ValueRange{alloc_real, alloc_img}); + return success(); } }; @@ -5167,7 +5849,6 @@ struct FFT1DOpLowering : public ConversionPattern { // ToyToAffine RewritePatterns: HighPassFilter operations //===----------------------------------------------------------------------===// - struct HighPassFilterOpLowering : public ConversionPattern { HighPassFilterOpLowering(MLIRContext *ctx) : ConversionPattern(dsp::HighPassFilterOp::getOperationName(), 1, ctx) {} @@ -5176,83 +5857,88 @@ struct HighPassFilterOpLowering : public ConversionPattern { matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { auto loc = op->getLoc(); - - //Pseudo-code: - //init first value of output with first value of input: y[0] = x[0] - //iterate for output from 1st to last - //y[i] = x[i] - x[i -1 ] - // replace this upsampling op with the output_mem_allocation op - - DEBUG_PRINT_NO_ARGS() ; - - //output for result type - auto tensorType = llvm::cast((*op->result_type_begin())); - - //allocation & deallocation for the result of this operation + + // Pseudo-code: + // init first value of output with first value of input: y[0] = x[0] + // iterate for output from 1st to last + // y[i] = x[i] - x[i -1 ] + // replace this upsampling op with the output_mem_allocation op + + DEBUG_PRINT_NO_ARGS(); + + // output for result type + auto tensorType = llvm::cast((*op->result_type_begin())); + + // allocation & deallocation for the result of this operation auto memRefType = convertTensorToMemRef(tensorType); auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); - //construct affine loops for the input - SmallVector lowerBounds(tensorType.getRank(), /*Value*/0); - SmallVector steps(tensorType.getRank(), /*Value=*/1); + // construct affine loops for the input + SmallVector lowerBounds(tensorType.getRank(), /*Value*/ 0); + SmallVector steps(tensorType.getRank(), /*Value=*/1); - //Init y for the first index ie, index0 + // Init y for the first index ie, index0 Value constantIndx0 = rewriter.create(loc, 0); HighPassFilterOpAdaptor highPassFilterAdaptor(operands); - Value GetInputX0 = rewriter.create(loc, highPassFilterAdaptor.getInput(), /* iv */ ValueRange{constantIndx0}); - rewriter.create(loc, GetInputX0, alloc, ValueRange{constantIndx0}); - - //For loop -- iterate from 1 to last - int64_t lb = 1 ; + Value GetInputX0 = + rewriter.create(loc, highPassFilterAdaptor.getInput(), + /* iv */ ValueRange{constantIndx0}); + rewriter.create(loc, GetInputX0, alloc, + ValueRange{constantIndx0}); + + // For loop -- iterate from 1 to last + int64_t lb = 1; int64_t ub = tensorType.getShape()[0]; int64_t step = 1; - affine::AffineForOp forOp1 = rewriter.create(loc, lb, ub, step); + affine::AffineForOp forOp1 = + rewriter.create(loc, lb, ub, step); auto iv = forOp1.getInductionVar(); - rewriter.setInsertionPointToStart(forOp1.getBody()); - - - //For affine expression: #map1 = affine_map<(%arg0)[] : (%arg0 - 1) + // For affine expression: #map1 = affine_map<(%arg0)[] : (%arg0 - 1) AffineExpr d0, s0; bindDims(rewriter.getContext(), d0); AffineExpr ExprForPrevX = d0 - 1; AffineMap addMapForHighPassFilter = AffineMap::get(1, 0, ExprForPrevX); - //x[n-1] - DEBUG_PRINT_NO_ARGS() ; - Value PrevX = rewriter.create(loc, highPassFilterAdaptor.getInput(), addMapForHighPassFilter, - ValueRange{iv}); //memRefType + // x[n-1] + DEBUG_PRINT_NO_ARGS(); + Value PrevX = rewriter.create( + loc, highPassFilterAdaptor.getInput(), addMapForHighPassFilter, + ValueRange{iv}); // memRefType // PrevX.dump(); - Value inputX = rewriter.create(loc, highPassFilterAdaptor.getInput(), ValueRange{iv}); - - //get y[i] = x[i] - x[i -1 ] - Value xMinusPrevX = rewriter.create(loc, inputX ,PrevX ); + Value inputX = rewriter.create( + loc, highPassFilterAdaptor.getInput(), ValueRange{iv}); + + // get y[i] = x[i] - x[i -1 ] + Value xMinusPrevX = rewriter.create(loc, inputX, PrevX); // Value cosRes = rewriter.create(loc, xMinusPrevX); - rewriter.create(loc, xMinusPrevX, alloc, ValueRange{iv}); //PrevX //AddmulAlphaXAndPreYAlphaMinus1 + rewriter.create( + loc, xMinusPrevX, alloc, + ValueRange{iv}); // PrevX //AddmulAlphaXAndPreYAlphaMinus1 rewriter.setInsertionPointAfter(forOp1); - //debug - // forOp1->dump(); - // init first value of output with first value of input: y[0] = x[0] - // iterate for output from 1st to last - // y[i] = x[i] - x[i -1 ] - // replace this upsampling op with the output_mem_allocation op - // %indx0 = arith.constantIndex 0 : index - // %0 = affine.load in[indx0 ] : f64 - // affine.store %0 ,out[indx0] - // affine.for %arg0 = 1 to len_y { - // #map1 = affine_map<(%arg0)[] : (%arg0 - 1) - // %1 = affine.load in[#map1] - // %load_in = affine.load in[%arg0] - // %2 = arith.subf %const1 , alpha - // affine.store %2, out[%arg0] - // } + // debug + // forOp1->dump(); + // init first value of output with first value of input: y[0] = x[0] + // iterate for output from 1st to last + // y[i] = x[i] - x[i -1 ] + // replace this upsampling op with the output_mem_allocation op + // %indx0 = arith.constantIndex 0 : index + // %0 = affine.load in[indx0 ] : f64 + // affine.store %0 ,out[indx0] + // affine.for %arg0 = 1 to len_y { + // #map1 = affine_map<(%arg0)[] : (%arg0 - 1) + // %1 = affine.load in[#map1] + // %load_in = affine.load in[%arg0] + // %2 = arith.subf %const1 , alpha + // affine.store %2, out[%arg0] + // } rewriter.replaceOp(op, alloc); - + return success(); } }; @@ -5261,127 +5947,134 @@ struct HighPassFilterOpLowering : public ConversionPattern { // ToyToAffine RewritePatterns: LowPassFilter operations //===----------------------------------------------------------------------===// - struct LowPassFilter1stOrderOpLowering : public ConversionPattern { LowPassFilter1stOrderOpLowering(MLIRContext *ctx) - : ConversionPattern(dsp::LowPassFilter1stOrderOp::getOperationName(), 1, ctx) {} + : ConversionPattern(dsp::LowPassFilter1stOrderOp::getOperationName(), 1, + ctx) {} LogicalResult matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { auto loc = op->getLoc(); - - //Pseudo-code: - //init first value of output with first value of input: y[0] = x[0] - //iterate for output from 1st to last - //y[i] = (1 - alpha) * y[i-1] + alpha * x[i] - // replace this upsampling op with the output_mem_allocation op + + // Pseudo-code: + // init first value of output with first value of input: y[0] = x[0] + // iterate for output from 1st to last + // y[i] = (1 - alpha) * y[i-1] + alpha * x[i] + // replace this upsampling op with the output_mem_allocation op // DEBUG_PRINT_NO_ARGS() ; - //output for result type - auto tensorType = llvm::cast((*op->result_type_begin())); - - //allocation & deallocation for the result of this operation + // output for result type + auto tensorType = llvm::cast((*op->result_type_begin())); + + // allocation & deallocation for the result of this operation auto memRefType = convertTensorToMemRef(tensorType); auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); - //construct affine loops for the input - SmallVector lowerBounds(tensorType.getRank(), /*Value*/0); - SmallVector steps(tensorType.getRank(), /*Value=*/1); + // construct affine loops for the input + SmallVector lowerBounds(tensorType.getRank(), /*Value*/ 0); + SmallVector steps(tensorType.getRank(), /*Value=*/1); - //Init y for the first index ie, index0 + // Init y for the first index ie, index0 Value constantIndx0 = rewriter.create(loc, 0); LowPassFilter1stOrderOpAdaptor lowPassFilterAdaptor(operands); - Value GetInputX0 = rewriter.create(loc, lowPassFilterAdaptor.getLhs(), /* iv */ ValueRange{constantIndx0}); - rewriter.create(loc, GetInputX0, alloc, ValueRange{constantIndx0}); + Value GetInputX0 = rewriter.create( + loc, lowPassFilterAdaptor.getLhs(), /* iv */ ValueRange{constantIndx0}); + rewriter.create(loc, GetInputX0, alloc, + ValueRange{constantIndx0}); - //For loop -- iterate from 1 to last - int64_t lb = 1 ; + // For loop -- iterate from 1 to last + int64_t lb = 1; int64_t ub = tensorType.getShape()[0]; int64_t step = 1; - affine::AffineForOp forOp1 = rewriter.create(loc, lb, ub, step); + affine::AffineForOp forOp1 = + rewriter.create(loc, lb, ub, step); auto iv = forOp1.getInductionVar(); - rewriter.setInsertionPointToStart(forOp1.getBody()); - - - //For affine expression: #map1 = affine_map<(%arg0)[] : (%arg0 - 1) + // For affine expression: #map1 = affine_map<(%arg0)[] : (%arg0 - 1) AffineExpr d0, s0; bindDims(rewriter.getContext(), d0); AffineExpr ExprForPrevY = d0 - 1; AffineMap addMapForLowPassFilter = AffineMap::get(1, 0, ExprForPrevY); - //y[n-1] - // DEBUG_PRINT_NO_ARGS() ; - // Value PrevY = rewriter.create(loc, lowPassFilterAdaptor.getLhs(), addMapForLowPassFilter, - // ValueRange{iv}); - // Value PrevY = rewriter.create(loc, (*op->result_type_begin()), addMapForLowPassFilter, - // ValueRange{iv}); //memRefType - Value PrevY = rewriter.create(loc, alloc, addMapForLowPassFilter, - ValueRange{iv}); //memRefType + // y[n-1] + // DEBUG_PRINT_NO_ARGS() ; + // Value PrevY = rewriter.create(loc, + // lowPassFilterAdaptor.getLhs(), addMapForLowPassFilter, + // ValueRange{iv}); + // Value PrevY = rewriter.create(loc, + // (*op->result_type_begin()), addMapForLowPassFilter, + // ValueRange{iv}); //memRefType + Value PrevY = rewriter.create( + loc, alloc, addMapForLowPassFilter, ValueRange{iv}); // memRefType // PrevY.dump(); - Value constant1 = rewriter.create(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1)); + Value constant1 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1)); // Value alpha = lowPassFilterAdaptor.getRhs(); //op->getOperand(1); - Value alpha = rewriter.create(loc, lowPassFilterAdaptor.getRhs(), /* iv */ ValueRange{}); - //get y[n] = (1- alpha ) * y[n-1] + alpha * x[n] - Value oneMinusAlpha = rewriter.create(loc, constant1 ,alpha ); - Value mulPrevYAlphaMinus1 = rewriter.create(loc, oneMinusAlpha ,PrevY); - - Value inputX = rewriter.create(loc, lowPassFilterAdaptor.getLhs(), ValueRange{iv}); - Value mulAlphaX = rewriter.create(loc, alpha ,inputX); - - Value AddmulAlphaXAndPreYAlphaMinus1 = rewriter.create(loc, mulPrevYAlphaMinus1 ,mulAlphaX); + Value alpha = rewriter.create( + loc, lowPassFilterAdaptor.getRhs(), /* iv */ ValueRange{}); + // get y[n] = (1- alpha ) * y[n-1] + alpha * x[n] + Value oneMinusAlpha = rewriter.create(loc, constant1, alpha); + Value mulPrevYAlphaMinus1 = + rewriter.create(loc, oneMinusAlpha, PrevY); + + Value inputX = rewriter.create( + loc, lowPassFilterAdaptor.getLhs(), ValueRange{iv}); + Value mulAlphaX = rewriter.create(loc, alpha, inputX); + + Value AddmulAlphaXAndPreYAlphaMinus1 = + rewriter.create(loc, mulPrevYAlphaMinus1, mulAlphaX); // DEBUG_PRINT_NO_ARGS() ; // AddmulAlphaXAndPreYAlphaMinus1.dump(); // forOp1->dump(); - rewriter.create(loc, AddmulAlphaXAndPreYAlphaMinus1, alloc, ValueRange{iv}); //PrevY //AddmulAlphaXAndPreYAlphaMinus1 + rewriter.create( + loc, AddmulAlphaXAndPreYAlphaMinus1, alloc, + ValueRange{iv}); // PrevY //AddmulAlphaXAndPreYAlphaMinus1 rewriter.setInsertionPointAfter(forOp1); - //debug - // forOp1->dump(); - // init first value of output with first value of input: y[0] = x[0] - // iterate for output from 1st to last - // y[i] = (1 - alpha) * y[i-1] + alpha * x[i] - // replace this upsampling op with the output_mem_allocation op - // %indx0 = arith.constantIndex 0 : index - // %0 = affine.load in[indx0 ] : f64 - // affine.store %0 ,out[indx0] - // affine.for %arg0 = 1 to len_y { - // #map1 = affine_map<(%arg0)[] : (%arg0 - 1) - // %1 = affine.load out[#map1] - // %2 = arith.subf %const1 , alpha - // %3 = arith.mulf %2 , %1 - - // %load_in = affine.load in[%arg0] - // %4 = arith.mulf alpha, %load_in - // %5 = arith.addf %4, %3 - // affine.store %5, out[%arg0] - // } - // %2ndOperand = arith.const 3 : f64 - // affine.for %arg0 = 0 to input_len { - // %elem1 = affine.load input[%arg0] <-- affine apply - // #map1 = affine_map<(%arg0)[2ndOperand] : (%arg0 * 2ndOperand) - // - // affine.store %elem1, out[#map1] - // } + // debug + // forOp1->dump(); + // init first value of output with first value of input: y[0] = x[0] + // iterate for output from 1st to last + // y[i] = (1 - alpha) * y[i-1] + alpha * x[i] + // replace this upsampling op with the output_mem_allocation op + // %indx0 = arith.constantIndex 0 : index + // %0 = affine.load in[indx0 ] : f64 + // affine.store %0 ,out[indx0] + // affine.for %arg0 = 1 to len_y { + // #map1 = affine_map<(%arg0)[] : (%arg0 - 1) + // %1 = affine.load out[#map1] + // %2 = arith.subf %const1 , alpha + // %3 = arith.mulf %2 , %1 + + // %load_in = affine.load in[%arg0] + // %4 = arith.mulf alpha, %load_in + // %5 = arith.addf %4, %3 + // affine.store %5, out[%arg0] + // } + // %2ndOperand = arith.const 3 : f64 + // affine.for %arg0 = 0 to input_len { + // %elem1 = affine.load input[%arg0] <-- affine apply + // #map1 = affine_map<(%arg0)[2ndOperand] : (%arg0 * 2ndOperand) + // + // affine.store %elem1, out[#map1] + // } rewriter.replaceOp(op, alloc); - + return success(); } }; - //===----------------------------------------------------------------------===// // ToyToAffine RewritePatterns: Upsampling operations //===----------------------------------------------------------------------===// - struct UpSamplingOpLowering : public ConversionPattern { UpSamplingOpLowering(MLIRContext *ctx) : ConversionPattern(dsp::UpsamplingOp::getOperationName(), 1, ctx) {} @@ -5390,111 +6083,124 @@ struct UpSamplingOpLowering : public ConversionPattern { matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { auto loc = op->getLoc(); - - //Pseudo-code: - //init all out values with 0 using affine loop - //Update certain y_values with corresponding x - //iterate for input : i = 0 to len - //get the corresponding output mapping index = M * i - // store in y at that index - // replace this upsampling op with the output_mem_allocation op + + // Pseudo-code: + // init all out values with 0 using affine loop + // Update certain y_values with corresponding x + // iterate for input : i = 0 to len + // get the corresponding output mapping index = M * i + // store in y at that index + // replace this upsampling op with the output_mem_allocation op // DEBUG_PRINT_NO_ARGS() ; - //output for result type - auto tensorType = llvm::cast((*op->result_type_begin())); - - //allocation & deallocation for the result of this operation + // output for result type + auto tensorType = llvm::cast((*op->result_type_begin())); + + // allocation & deallocation for the result of this operation auto memRefType = convertTensorToMemRef(tensorType); auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); - //construct affine loops for the input - SmallVector lowerBounds(tensorType.getRank(), /*Value*/0); - SmallVector steps(tensorType.getRank(), /*Value=*/1); + // construct affine loops for the input + SmallVector lowerBounds(tensorType.getRank(), /*Value*/ 0); + SmallVector steps(tensorType.getRank(), /*Value=*/1); - //For loop - int64_t lb = 0 ; + // For loop + int64_t lb = 0; int64_t ub = tensorType.getShape()[0]; int64_t step = 1; - //init all the output mem location with 0 - affine::AffineForOp forOpSetOut0Loop = rewriter.create(loc, lb, ub, step); + // init all the output mem location with 0 + affine::AffineForOp forOpSetOut0Loop = + rewriter.create(loc, lb, ub, step); auto ivforOpSetOut0Loop = forOpSetOut0Loop.getInductionVar(); - rewriter.setInsertionPointToStart(forOpSetOut0Loop.getBody()); - Value constant0 = rewriter.create(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); - //store the result + Value constant0 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + // store the result rewriter.create(loc, constant0, alloc, ivforOpSetOut0Loop); rewriter.setInsertionPointAfter(forOpSetOut0Loop); Value upsampling2ndArg = op->getOperand(1); UpsamplingOpAdaptor upsamplingAdaptor(operands); - auto inputType = llvm::dyn_cast(op->getOperand(0).getType()); - int64_t ub2 = inputType.getShape()[0]; // tensorType.getShape()[0]; - //create another for loop for updating corresponding y with x - affine::AffineForOp forOp1 = rewriter.create(loc, lb, ub2, step); + auto inputType = + llvm::dyn_cast(op->getOperand(0).getType()); + int64_t ub2 = inputType.getShape()[0]; // tensorType.getShape()[0]; + // create another for loop for updating corresponding y with x + affine::AffineForOp forOp1 = + rewriter.create(loc, lb, ub2, step); auto iv = forOp1.getInductionVar(); - rewriter.setInsertionPointToStart(forOp1.getBody()); - //Load input elem - - Value elemIn = rewriter.create(loc, upsamplingAdaptor.getLhs(), iv); + // Load input elem - // Value elemIn = rewriter.create(loc, upsamplingAdaptor.getLhs(), addMapForUpSampling, + Value elemIn = + rewriter.create(loc, upsamplingAdaptor.getLhs(), iv); + + // Value elemIn = rewriter.create(loc, + // upsamplingAdaptor.getLhs(), addMapForUpSampling, // ValueRange{iv,constantSamplingRateIndx}); - - - //For affine expression: #map1 = affine_map<(%arg0)[2ndOperand] : (%arg0 * 2ndOperand) + // For affine expression: #map1 = affine_map<(%arg0)[2ndOperand] : (%arg0 * + // 2ndOperand) AffineExpr d0, s0; bindDims(rewriter.getContext(), d0); bindSymbols(rewriter.getContext(), s0); - // AffineExpr ExprForUpSampling = rewriter.getAffineDimExpr(0) * rewriter.getAffineSymbolExpr(0); + // AffineExpr ExprForUpSampling = rewriter.getAffineDimExpr(0) * + // rewriter.getAffineSymbolExpr(0); AffineExpr ExprForUpSampling = d0 * s0; - // Value constant3 = rewriter.create(loc, rewriter.getI64Type(), rewriter.getIntegerAttr(rewriter.getIntegerType(64), 3)); - Value constant3 = rewriter.create(loc, 3); //working + // Value constant3 = rewriter.create(loc, + // rewriter.getI64Type(), + // rewriter.getIntegerAttr(rewriter.getIntegerType(64), 3)); + Value constant3 = + rewriter.create(loc, 3); // working constant3.dump(); int64_t SecondValueInt = 1; - - dsp::ConstantOp constantOp2ndArg = upsampling2ndArg.getDefiningOp(); - DenseElementsAttr constantRhsValue = constantOp2ndArg.getValue();; + + dsp::ConstantOp constantOp2ndArg = + upsampling2ndArg.getDefiningOp(); + DenseElementsAttr constantRhsValue = constantOp2ndArg.getValue(); + ; auto elements = constantRhsValue.getValues(); float SecondValue = elements[0].getValueAsDouble(); - SecondValueInt = (int64_t) SecondValue; + SecondValueInt = (int64_t)SecondValue; - // Value downSamplingRateAsIndex = rewriter.create(loc, rewriter.getIndexType(),UpsamplingRate); - Value constantSamplingRateIndx = rewriter.create(loc, SecondValueInt); + // Value downSamplingRateAsIndex = rewriter.create(loc, + // rewriter.getIndexType(),UpsamplingRate); + Value constantSamplingRateIndx = + rewriter.create(loc, SecondValueInt); constantSamplingRateIndx.dump(); - + AffineMap addMapForUpSampling = AffineMap::get(1, 1, ExprForUpSampling); // DEBUG_PRINT_NO_ARGS() ; - // Value elem2 = rewriter.create(loc, upsamplingAdaptor.getLhs(), addMapForUpSampling, + // Value elem2 = rewriter.create(loc, + // upsamplingAdaptor.getLhs(), addMapForUpSampling, // ValueRange{iv,constantSamplingRateIndx}); // elem2.dump(); - //store the result - rewriter.create(loc, elemIn, alloc, addMapForUpSampling, ValueRange{iv,constantSamplingRateIndx}); + // store the result + rewriter.create(loc, elemIn, alloc, addMapForUpSampling, + ValueRange{iv, constantSamplingRateIndx}); rewriter.setInsertionPointAfter(forOp1); - //debug - // forOp1->dump(); - // %0 = arith.const 0 : f64 - // affine.for %arg0 = 0 to out_y { - // affine.store %0, out[%arg0] - // } - // %2ndOperand = arith.const 3 : f64 - // affine.for %arg0 = 0 to input_len { - // %elem1 = affine.load input[%arg0] <-- affine apply - // #map1 = affine_map<(%arg0)[2ndOperand] : (%arg0 * 2ndOperand) - // - // affine.store %elem1, out[#map1] - // } + // debug + // forOp1->dump(); + // %0 = arith.const 0 : f64 + // affine.for %arg0 = 0 to out_y { + // affine.store %0, out[%arg0] + // } + // %2ndOperand = arith.const 3 : f64 + // affine.for %arg0 = 0 to input_len { + // %elem1 = affine.load input[%arg0] <-- affine apply + // #map1 = affine_map<(%arg0)[2ndOperand] : (%arg0 * 2ndOperand) + // + // affine.store %elem1, out[#map1] + // } rewriter.replaceOp(op, alloc); - + return success(); } }; @@ -5503,7 +6209,6 @@ struct UpSamplingOpLowering : public ConversionPattern { // ToyToAffine RewritePatterns: Downsampling operations //===----------------------------------------------------------------------===// - struct DownSamplingOpLowering : public ConversionPattern { DownSamplingOpLowering(MLIRContext *ctx) : ConversionPattern(dsp::DownsamplingOp::getOperationName(), 1, ctx) {} @@ -5512,83 +6217,162 @@ struct DownSamplingOpLowering : public ConversionPattern { matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { auto loc = op->getLoc(); - - //Pseudo-code: - //iterate for output len : i = 0 to len - //get the input elem using input mapping index = M* i - // store in y - // replace this op with the output_mem + + // Pseudo-code: + // iterate for output len : i = 0 to len + // get the input elem using input mapping index = M* i + // store in y + // replace this op with the output_mem // DEBUG_PRINT_NO_ARGS() ; - //output for result type - auto tensorType = llvm::cast((*op->result_type_begin())); - - //allocation & deallocation for the result of this operation + // output for result type + auto tensorType = llvm::cast((*op->result_type_begin())); + + // allocation & deallocation for the result of this operation auto memRefType = convertTensorToMemRef(tensorType); auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); - //construct affine loops for the input - SmallVector lowerBounds(tensorType.getRank(), /*Value*/0); - SmallVector steps(tensorType.getRank(), /*Value=*/1); + // construct affine loops for the input + SmallVector lowerBounds(tensorType.getRank(), /*Value*/ 0); + SmallVector steps(tensorType.getRank(), /*Value=*/1); - //For loop - int64_t lb = 0 ; + // For loop + int64_t lb = 0; int64_t ub = tensorType.getShape()[0]; int64_t step = 1; - affine::AffineForOp forOp1 = rewriter.create(loc, lb, ub, step); + affine::AffineForOp forOp1 = + rewriter.create(loc, lb, ub, step); auto iv = forOp1.getInductionVar(); - rewriter.setInsertionPointToStart(forOp1.getBody()); DownsamplingOpAdaptor downsamplingAdaptor(operands); - - //For affine expression: #map1 = affine_map<(%arg0)[2ndOperand] : (%arg0 * 2ndOperand) + + // For affine expression: #map1 = affine_map<(%arg0)[2ndOperand] : (%arg0 * + // 2ndOperand) AffineExpr d0, s0; bindDims(rewriter.getContext(), d0); bindSymbols(rewriter.getContext(), s0); - // AffineExpr ExprForDownSampling = rewriter.getAffineDimExpr(0) * rewriter.getAffineSymbolExpr(0); + // AffineExpr ExprForDownSampling = rewriter.getAffineDimExpr(0) * + // rewriter.getAffineSymbolExpr(0); AffineExpr ExprForDownSampling = d0 * s0; - // Value constant3 = rewriter.create(loc, rewriter.getI64Type(), rewriter.getIntegerAttr(rewriter.getIntegerType(64), 3)); - Value constant3 = rewriter.create(loc, 3); //working + // Value constant3 = rewriter.create(loc, + // rewriter.getI64Type(), + // rewriter.getIntegerAttr(rewriter.getIntegerType(64), 3)); + Value constant3 = + rewriter.create(loc, 3); // working constant3.dump(); int64_t SecondValueInt = 1; Value downsampling2ndArg = op->getOperand(1); - dsp::ConstantOp constantOp2ndArg = downsampling2ndArg.getDefiningOp(); - DenseElementsAttr constantRhsValue = constantOp2ndArg.getValue();; + dsp::ConstantOp constantOp2ndArg = + downsampling2ndArg.getDefiningOp(); + DenseElementsAttr constantRhsValue = constantOp2ndArg.getValue(); + ; auto elements = constantRhsValue.getValues(); float SecondValue = elements[0].getValueAsDouble(); - SecondValueInt = (int64_t) SecondValue; + SecondValueInt = (int64_t)SecondValue; - // Value downSamplingRateAsIndex = rewriter.create(loc, rewriter.getIndexType(),DownsamplingRate); - Value constantSamplingRateIndx = rewriter.create(loc, SecondValueInt); + // Value downSamplingRateAsIndex = rewriter.create(loc, + // rewriter.getIndexType(),DownsamplingRate); + Value constantSamplingRateIndx = + rewriter.create(loc, SecondValueInt); constantSamplingRateIndx.dump(); - + AffineMap addMapForDownSampling = AffineMap::get(1, 1, ExprForDownSampling); - // AffineMap addMapForDownSampling = AffineMap::get(1, 1, ValueRange{d0,s0 }); - // AffineMap addMapForDownSampling = AffineMap::get(1, 1, ExprForDownSampling, rewriter.getContext()); - // AffineMap addMapForDownSampling = AffineMap::get(1, 0, { d0}); //Working + // AffineMap addMapForDownSampling = AffineMap::get(1, 1, ValueRange{d0,s0 + // }); AffineMap addMapForDownSampling = AffineMap::get(1, 1, + // ExprForDownSampling, rewriter.getContext()); AffineMap + // addMapForDownSampling = AffineMap::get(1, 0, { d0}); //Working // DEBUG_PRINT_NO_ARGS() ; - Value elem2 = rewriter.create(loc, downsamplingAdaptor.getLhs(), addMapForDownSampling, - ValueRange{iv,constantSamplingRateIndx}); + Value elem2 = rewriter.create( + loc, downsamplingAdaptor.getLhs(), addMapForDownSampling, + ValueRange{iv, constantSamplingRateIndx}); elem2.dump(); - //store the result + // store the result rewriter.create(loc, elem2, alloc, iv); rewriter.setInsertionPointAfter(forOp1); - //debug - // forOp1->dump(); - // %2ndOperand = arith.const 3 : f64 - // affine.for %arg0 = 0 to 10 { - // #map1 = affine_map<(%arg0)[2ndOperand] : (%arg0 * 2ndOperand) - // %elem1 = affine.load input[#map1] <-- affine apply - // affine.store %elem1, out[%arg0] - // } + // debug + // forOp1->dump(); + // %2ndOperand = arith.const 3 : f64 + // affine.for %arg0 = 0 to 10 { + // #map1 = affine_map<(%arg0)[2ndOperand] : (%arg0 * 2ndOperand) + // %elem1 = affine.load input[#map1] <-- affine apply + // affine.store %elem1, out[%arg0] + // } + rewriter.replaceOp(op, alloc); + + return success(); + } +}; + +//===----------------------------------------------------------------------===// +// ToyToAffine RewritePatterns: MedianFilterOp operations +//===----------------------------------------------------------------------===// + +struct MedianFilterOpLowering : public ConversionPattern { + MedianFilterOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::MedianFilterOp::getOperationName(), 1, ctx) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + auto loc = op->getLoc(); + auto tensorType = llvm::cast((*op->result_type_begin())); + auto memRefType = convertTensorToMemRef(tensorType); + auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); + + SmallVector lowerBounds(tensorType.getRank(), 0); + SmallVector steps(tensorType.getRank(), 1); + + // For loop + int64_t lb = 0; + int64_t ub = tensorType.getShape()[0]; + int64_t step = 1; + + affine::AffineForOp forOp1 = + rewriter.create(loc, lb, ub, step); + auto iv = forOp1.getInductionVar(); + + rewriter.setInsertionPointToStart(forOp1.getBody()); + MedianFilterOpAdaptor medianFilterOpAdaptor(operands); + + Value elem1 = rewriter.create( + loc, medianFilterOpAdaptor.getInput(), iv); + AffineExpr ExprForElem2 = + rewriter.getAffineDimExpr(0) + rewriter.getAffineConstantExpr(1); + AffineExpr ExprForElem3 = + rewriter.getAffineDimExpr(0) + rewriter.getAffineConstantExpr(2); + AffineMap addMapForElem2 = AffineMap::get(1, 0, ExprForElem2); + AffineMap addMapForElem3 = AffineMap::get(1, 0, ExprForElem3); + Value elem2 = rewriter.create( + loc, medianFilterOpAdaptor.getInput(), addMapForElem2, ValueRange{iv}); + Value elem3 = rewriter.create( + loc, medianFilterOpAdaptor.getInput(), addMapForElem3, ValueRange{iv}); + + // sum + Value sum1 = rewriter.create(loc, elem1, elem2); + Value sum = rewriter.create(loc, sum1, elem3); + + // min + Value minElem1Elem2 = rewriter.create(loc, elem1, elem2); + Value min = rewriter.create(loc, minElem1Elem2, elem3); + + // max + Value maxElem1Elem2 = rewriter.create(loc, elem1, elem2); + Value max = rewriter.create(loc, maxElem1Elem2, elem3); + + // median + Value min_plus_max = rewriter.create(loc, min, max); + Value median = rewriter.create(loc, sum, min_plus_max); + + // store in alloc + rewriter.create(loc, median, alloc, iv); + rewriter.setInsertionPointAfter(forOp1); rewriter.replaceOp(op, alloc); - return success(); } }; @@ -5599,84 +6383,90 @@ struct DownSamplingOpLowering : public ConversionPattern { struct SlidingWindowAvgOpLowering : public ConversionPattern { SlidingWindowAvgOpLowering(MLIRContext *ctx) - : ConversionPattern(dsp::SlidingWindowAvgOp::getOperationName(), 1, ctx) {} + : ConversionPattern(dsp::SlidingWindowAvgOp::getOperationName(), 1, ctx) { + } LogicalResult matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { auto loc = op->getLoc(); - - //Pseudo-code: - //iterate for len = len - 2 - //get 3 elements - //get the sum - //get the avg = sum / 3 - // store the result to output_mem - // replace this op with the output_mem + + // Pseudo-code: + // iterate for len = len - 2 + // get 3 elements + // get the sum + // get the avg = sum / 3 + // store the result to output_mem + // replace this op with the output_mem // DEBUG_PRINT_NO_ARGS() ; - //output for result type - auto tensorType = llvm::cast((*op->result_type_begin())); - - //allocation & deallocation for the result of this operation + // output for result type + auto tensorType = llvm::cast((*op->result_type_begin())); + + // allocation & deallocation for the result of this operation auto memRefType = convertTensorToMemRef(tensorType); auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); - //construct affine loops for the input - SmallVector lowerBounds(tensorType.getRank(), /*Value*/0); + // construct affine loops for the input + SmallVector lowerBounds(tensorType.getRank(), /*Value*/ 0); SmallVector steps(tensorType.getRank(), /*Value=*/1); - Value constant3 = rewriter.create(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(3)); - //For loop - int64_t lb = 0 ; + Value constant3 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(3)); + // For loop + int64_t lb = 0; int64_t ub = tensorType.getShape()[0]; int64_t step = 1; - affine::AffineForOp forOp1 = rewriter.create(loc, lb, ub, step); + affine::AffineForOp forOp1 = + rewriter.create(loc, lb, ub, step); auto iv = forOp1.getInductionVar(); rewriter.setInsertionPointToStart(forOp1.getBody()); SlidingWindowAvgOpAdaptor slidingWinAvgAdaptor(operands); - - Value elem1 = rewriter.create(loc, slidingWinAvgAdaptor.getInput(), iv); - //affine-maps for elem2 and elem3 - AffineExpr ExprForElem2 = rewriter.getAffineDimExpr(0) + rewriter.getAffineConstantExpr(1); - AffineExpr ExprForElem3 = rewriter.getAffineDimExpr(0) + rewriter.getAffineConstantExpr(2); + Value elem1 = + rewriter.create(loc, slidingWinAvgAdaptor.getInput(), iv); + + // affine-maps for elem2 and elem3 + AffineExpr ExprForElem2 = + rewriter.getAffineDimExpr(0) + rewriter.getAffineConstantExpr(1); + AffineExpr ExprForElem3 = + rewriter.getAffineDimExpr(0) + rewriter.getAffineConstantExpr(2); AffineMap addMapForElem2 = AffineMap::get(1, 0, ExprForElem2); AffineMap addMapForElem3 = AffineMap::get(1, 0, ExprForElem3); - Value elem2 = rewriter.create(loc, slidingWinAvgAdaptor.getInput(), addMapForElem2, - ValueRange{iv}); - Value elem3 = rewriter.create(loc, slidingWinAvgAdaptor.getInput(), addMapForElem3, - ValueRange{iv}); + Value elem2 = rewriter.create( + loc, slidingWinAvgAdaptor.getInput(), addMapForElem2, ValueRange{iv}); + Value elem3 = rewriter.create( + loc, slidingWinAvgAdaptor.getInput(), addMapForElem3, ValueRange{iv}); - Value sum1 = rewriter.create(loc, elem1 , elem2); - Value sum2 = rewriter.create(loc, sum1 , elem3); + Value sum1 = rewriter.create(loc, elem1, elem2); + Value sum2 = rewriter.create(loc, sum1, elem3); Value avg = rewriter.create(loc, sum2, constant3); - //store the result + // store the result rewriter.create(loc, avg, alloc, iv); rewriter.setInsertionPointAfter(forOp1); - //debug - // forOp1->dump(); - // %cont3 = arith.const 3 : f64 - // affine.for %arg0 = 0 to 8 { - // %elem1 = affine.load input[%arg0] - // #map1 = affine_map<(%arg0)[] : (%arg0 + 1) - // #map2 = affine_map<(%arg0)[] : (%arg0 + 2) - // %elem2 = affine.load input[#map1] <-- affine apply - // %elem3 = affine.load input[#map2] - - // %sum1 = arith.addf %elem1 , %elem2 - // %sum2 = arith.addf %sum1, %elem3 - // %res = arith.divf %sum2 , - // affine.store %sum2, out[%arg0] - // } + // debug + // forOp1->dump(); + // %cont3 = arith.const 3 : f64 + // affine.for %arg0 = 0 to 8 { + // %elem1 = affine.load input[%arg0] + // #map1 = affine_map<(%arg0)[] : (%arg0 + 1) + // #map2 = affine_map<(%arg0)[] : (%arg0 + 2) + // %elem2 = affine.load input[#map1] <-- affine apply + // %elem3 = affine.load input[#map2] + + // %sum1 = arith.addf %elem1 , %elem2 + // %sum2 = arith.addf %sum1, %elem3 + // %res = arith.divf %sum2 , + // affine.store %sum2, out[%arg0] + // } rewriter.replaceOp(op, alloc); - + return success(); } }; @@ -5684,146 +6474,157 @@ struct SlidingWindowAvgOpLowering : public ConversionPattern { //===----------------------------------------------------------------------===// // ToyToAffine RewritePatterns: FIRFilterResponse operations //===----------------------------------------------------------------------===// -struct FIRFilterResponseOpLowering: public ConversionPattern { - FIRFilterResponseOpLowering(MLIRContext *ctx) - : ConversionPattern(dsp::FIRFilterResponseOp::getOperationName(), 1 , ctx) {} - - LogicalResult - matchAndRewrite(Operation *op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const final { - //dsp.FIRFilterResponseOp has 2 operands -- both of type tensor f64 - - //Get the location of FIRFilterResponseOp - auto loc = op->getLoc(); - - //Pseudo-Code - // y[n] = sum( h[k] * x[n-k]) k = 0 to lenOfh - - //Range for each element of the output tensor -- i = %arg0 - // Create a tempValue = 0 - // Range for each of the elements of filter len -- k = %arg1 - // check for the condition that %arg0 - %arg1 >= 0 && < inputLen - // get elem1 = filter[k] , elem2 = x[i-k] - // use affine-map expression for calculating i-k - // tempValue = tempValue + elem1 * elem2 - // y[i] = tempValue - - lowerOpToLoopsFIR(op, operands, rewriter, - [loc, op ] (OpBuilder &builder, ValueRange memRefOperands, - ValueRange loopIvs) { - // ValueRange loopIvs) { - - // Generate an adaptor for the remapped operands of the - // BinaryOp. This allows for using the nice named accessors - // that are generated by the ODS. - dsp::FIRFilterResponseOpAdaptor firFilterAdaptor(memRefOperands); +struct FIRFilterResponseOpLowering : public ConversionPattern { + FIRFilterResponseOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::FIRFilterResponseOp::getOperationName(), 1, + ctx) {} - // Generate loads for the element of 'lhs' and 'rhs' at the - // inner loop. - // auto lhsTensor = delayAdaptor.getLhs(); - auto lhsTensor = builder.create( - loc, firFilterAdaptor.getLhs(), loopIvs); + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + // dsp.FIRFilterResponseOp has 2 operands -- both of type tensor f64 + + // Get the location of FIRFilterResponseOp + auto loc = op->getLoc(); - // auto rhsScalar = op->getOperand(1); - auto rhsScalar = builder.create( - loc, firFilterAdaptor.getRhs(), loopIvs); + // Pseudo-Code + // y[n] = sum( h[k] * x[n-k]) k = 0 to lenOfh + + // Range for each element of the output tensor -- i = %arg0 + // Create a tempValue = 0 + // Range for each of the elements of filter len -- k = %arg1 + // check for the condition that %arg0 - %arg1 >= 0 && < inputLen + // get elem1 = filter[k] , elem2 = x[i-k] + // use affine-map expression for calculating i-k + // tempValue = tempValue + elem1 * elem2 + // y[i] = tempValue + + lowerOpToLoopsFIR( + op, operands, rewriter, + [loc, op](OpBuilder &builder, ValueRange memRefOperands, + ValueRange loopIvs) { + // ValueRange loopIvs) { - auto resultMulOp = builder.create(loc, lhsTensor, - rhsScalar); + // Generate an adaptor for the remapped operands of the + // BinaryOp. This allows for using the nice named accessors + // that are generated by the ODS. + dsp::FIRFilterResponseOpAdaptor firFilterAdaptor(memRefOperands); - return resultMulOp; + // Generate loads for the element of 'lhs' and 'rhs' at the + // inner loop. + // auto lhsTensor = delayAdaptor.getLhs(); + auto lhsTensor = builder.create( + loc, firFilterAdaptor.getLhs(), loopIvs); - }); + // auto rhsScalar = op->getOperand(1); + auto rhsScalar = builder.create( + loc, firFilterAdaptor.getRhs(), loopIvs); - return success(); - } + auto resultMulOp = + builder.create(loc, lhsTensor, rhsScalar); + return resultMulOp; + }); + return success(); + } }; //===----------------------------------------------------------------------===// // ToyToAffine RewritePatterns: Delay operations //===----------------------------------------------------------------------===// -struct DelayOpLowering: public ConversionPattern { - DelayOpLowering(MLIRContext *ctx) - : ConversionPattern(dsp::DelayOp::getOperationName(), 1 , ctx) {} - - LogicalResult - matchAndRewrite(Operation *op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const final { - //dsp.DelayOp has 2 operands -- both of type tensor f64 - - //Get the location of delayop - auto loc = op->getLoc(); - - //Pseudo-code - //2 affine loops -- - //first from 0 to delay_2ndArg - // here, inside AffineNest - // create affine:load from the arith.const operation with value 0 - // use affine:store to store at result_op at indx - // - //2nd from delay_2ndArg to lengthOfOperand0 of delayOp - // here, inside AffineNest - // create affine:load from input memref & indx = indx - delay_2ndArg - // create affine:store at result_op indx - - //output for result type - auto tensorType = llvm::cast((*op->result_type_begin())); - - //allocation & deallocation for the result of this operation +struct DelayOpLowering : public ConversionPattern { + DelayOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::DelayOp::getOperationName(), 1, ctx) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + // dsp.DelayOp has 2 operands -- both of type tensor f64 + + // Get the location of delayop + auto loc = op->getLoc(); + + // Pseudo-code + // 2 affine loops -- + // first from 0 to delay_2ndArg + // here, inside AffineNest + // create affine:load from the arith.const operation with value 0 + // use affine:store to store at result_op at indx + // + // 2nd from delay_2ndArg to lengthOfOperand0 of delayOp + // here, inside AffineNest + // create affine:load from input memref & indx = indx - + // delay_2ndArg create affine:store at result_op indx + + // output for result type + auto tensorType = llvm::cast((*op->result_type_begin())); + + // allocation & deallocation for the result of this operation auto memRefType = convertTensorToMemRef(tensorType); auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); - //construct affine loops for the input - SmallVector lowerBounds(tensorType.getRank(), /*Value*/0); + // construct affine loops for the input + SmallVector lowerBounds(tensorType.getRank(), /*Value*/ 0); SmallVector steps(tensorType.getRank(), /*Value=*/1); - //For loop + // For loop int64_t ub = tensorType.getShape()[0]; - //Get 2nd Arg + // Get 2nd Arg DelayOpAdaptor delayOpAdaptor(operands); - Value constant0 = rewriter.create(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + Value constant0 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); DEBUG_PRINT_NO_ARGS(); // Creating SSA values for the lower bound and upper bound - Value lowerBound = rewriter.create(loc, rewriter.getIndexType(), rewriter.getIntegerAttr(rewriter.getIndexType(), 0)); + Value lowerBound = rewriter.create( + loc, rewriter.getIndexType(), + rewriter.getIntegerAttr(rewriter.getIndexType(), 0)); // Cast the f64 value directly to the index type - Value inputUnit = rewriter.create(loc, delayOpAdaptor.getRhs(), ValueRange{} ); - Value i64UpperBound = rewriter.create(loc, rewriter.getI64Type(), inputUnit); + Value inputUnit = rewriter.create( + loc, delayOpAdaptor.getRhs(), ValueRange{}); + Value i64UpperBound = + rewriter.create(loc, rewriter.getI64Type(), inputUnit); // Cast the i64 value to index type - Value delay2ndArg = rewriter.create(loc, rewriter.getIndexType(), i64UpperBound); - // Value inputLen = rewriter.create(loc, rewriter.getIndexType(), rewriter.getIntegerAttr(rewriter.getIndexType(), ub)); + Value delay2ndArg = rewriter.create( + loc, rewriter.getIndexType(), i64UpperBound); + // Value inputLen = rewriter.create(loc, + // rewriter.getIndexType(), rewriter.getIntegerAttr(rewriter.getIndexType(), + // ub)); DEBUG_PRINT_WITH_ARGS("print delay2ndArg.dump() for debugging"); - + DEBUG_PRINT_NO_ARGS(); // Create an empty affine map list // SmallVector lbMaps, ubMaps; // Create identity affine maps for bounds - // AffineMap lbMap = AffineMap::get(/*dimCount=*/0, /*symbolCount=*/0, rewriter.getContext()); - // AffineMap ubMap = AffineMap::get(/*dimCount=*/0, /*symbolCount=*/0, rewriter.getContext()); + // AffineMap lbMap = AffineMap::get(/*dimCount=*/0, /*symbolCount=*/0, + // rewriter.getContext()); AffineMap ubMap = AffineMap::get(/*dimCount=*/0, + // /*symbolCount=*/0, rewriter.getContext()); // Create an AffineForOp with SSA values for the bounds Value step1 = rewriter.create(loc, 1); - scf::ForOp forOp1 = rewriter.create(loc, lowerBound, delay2ndArg, step1); - //Affine loop with non-int loop indices - // affine::AffineForOp forOp1 = rewriter.create(loc, lowerBound, lbMap, inputLen, ubMap, 1); + scf::ForOp forOp1 = + rewriter.create(loc, lowerBound, delay2ndArg, step1); + // Affine loop with non-int loop indices + // affine::AffineForOp forOp1 = rewriter.create(loc, + // lowerBound, lbMap, inputLen, ubMap, 1); DEBUG_PRINT_NO_ARGS(); - + auto iv = forOp1.getInductionVar(); rewriter.setInsertionPointToStart(forOp1.getBody()); - //store the result - // rewriter.create(loc, constant0, alloc, iv); + // store the result + // rewriter.create(loc, constant0, alloc, iv); rewriter.create(loc, constant0, alloc, iv); rewriter.setInsertionPointAfter(forOp1); // Create the constants for lb2, step1, and calculate ub2 Value lb2 = rewriter.create(loc, 0); - Value lenOfInput = rewriter.create(loc, /*length of input*/ub); // Replace with the actual length + Value lenOfInput = rewriter.create( + loc, /*length of input*/ ub); // Replace with the actual length Value ub2 = rewriter.create(loc, lenOfInput, delay2ndArg); Value step2 = rewriter.create(loc, 1); @@ -5835,7 +6636,8 @@ struct DelayOpLowering: public ConversionPattern { rewriter.setInsertionPointToStart(forOp2.getBody()); // Load value from allocIP[iv2] - Value loadedVal = rewriter.create(loc, delayOpAdaptor.getLhs(), iv2); + Value loadedVal = + rewriter.create(loc, delayOpAdaptor.getLhs(), iv2); // Calculate the index iv2 + delaySecondArg Value newIndex = rewriter.create(loc, iv2, delay2ndArg); @@ -5844,135 +6646,340 @@ struct DelayOpLowering: public ConversionPattern { rewriter.create(loc, loadedVal, alloc, newIndex); rewriter.setInsertionPointAfter(forOp2); DEBUG_PRINT_NO_ARGS(); - //For 2nd loop -- - //loop from 0 to lenOfInput - 2ndArg - // load from index - // store at index + 2ndArg + // For 2nd loop -- + // loop from 0 to lenOfInput - 2ndArg + // load from index + // store at index + 2ndArg // forOp1.dump(); - //Expected MLIR-Affine - // %0 = affine.load %alloc_0[] : memref - // %1 = arith.fptosi %0 : f64 to i64 - // %2 = arith.index_cast %1 : i64 to index - // %c1_15 = arith.constant 1 : index - // scf.for %arg0 = %c0_14 to %2 step %c1_15 { - // memref.store %cst_13, %alloc[%arg0] : memref<10xf64> - // } - // %c0_16 = arith.constant 0 : index - // %c10 = arith.constant 10 : index - // %3 = arith.subi %c10, %2 : index - // %c1_17 = arith.constant 1 : index - // scf.for %arg0 = %c0_16 to %3 step %c1_17 { - // %4 = memref.load %alloc_1[%arg0] : memref<10xf64> - // %5 = arith.addi %arg0, %2 : index - // memref.store %4, %alloc[%5] : memref<10xf64> - // } - - + // Expected MLIR-Affine + // %0 = affine.load %alloc_0[] : memref + // %1 = arith.fptosi %0 : f64 to i64 + // %2 = arith.index_cast %1 : i64 to index + // %c1_15 = arith.constant 1 : index + // scf.for %arg0 = %c0_14 to %2 step %c1_15 { + // memref.store %cst_13, %alloc[%arg0] : memref<10xf64> + // } + // %c0_16 = arith.constant 0 : index + // %c10 = arith.constant 10 : index + // %3 = arith.subi %c10, %2 : index + // %c1_17 = arith.constant 1 : index + // scf.for %arg0 = %c0_16 to %3 step %c1_17 { + // %4 = memref.load %alloc_1[%arg0] : memref<10xf64> + // %5 = arith.addi %arg0, %2 : index + // memref.store %4, %alloc[%5] : memref<10xf64> + // } rewriter.replaceOp(op, alloc); DEBUG_PRINT_NO_ARGS(); return success(); - } - - + } }; //===----------------------------------------------------------------------===// // ToyToAffine RewritePatterns: Gain operations //===----------------------------------------------------------------------===// -struct GainOpLowering: public ConversionPattern { - GainOpLowering(MLIRContext *ctx) - : ConversionPattern(dsp::GainOp::getOperationName(), 1 , ctx) {} +struct GainOpLowering : public ConversionPattern { + GainOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::GainOp::getOperationName(), 1, ctx) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + // dsp.GainOp has 2 operands -- both of type tensor f64 , 2ndOperand should + // have only 1 element - LogicalResult - matchAndRewrite(Operation *op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const final { - //dsp.GainOp has 2 operands -- both of type tensor f64 , 2ndOperand should have only 1 element + // Get the location of GainOp + auto loc = op->getLoc(); - //Get the location of GainOp - auto loc = op->getLoc(); - - - //Pseudo-code: - // y[i] = y[i] * gain for 0<=i((*op->result_type_begin())); + // output for result type + auto tensorType = llvm::cast((*op->result_type_begin())); - //allocation & deallocation for the result of this operation + // allocation & deallocation for the result of this operation auto memRefType = convertTensorToMemRef(tensorType); auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); - //construct affine loops for the input - SmallVector lowerBounds(tensorType.getRank(), /*Value*/0); - SmallVector steps(tensorType.getRank(), /*Value=*/1); + // construct affine loops for the input + SmallVector lowerBounds(tensorType.getRank(), /*Value*/ 0); + SmallVector steps(tensorType.getRank(), /*Value=*/1); GainOpAdaptor gainOpOpAdaptor(operands); // Value GetValueAtIndx2ndArg = op->getOperand(1); - // dsp::ConstantOp constantOp2ndArg = GetValueAtIndx2ndArg.getDefiningOp(); - // DenseElementsAttr constantRhsValue = constantOp2ndArg.getValue();; - // auto elements = constantRhsValue.getValues(); - // float gain = elements[0].getValueAsDouble(); + // dsp::ConstantOp constantOp2ndArg = + // GetValueAtIndx2ndArg.getDefiningOp(); DenseElementsAttr + // constantRhsValue = constantOp2ndArg.getValue();; auto elements = + // constantRhsValue.getValues(); float gain = + // elements[0].getValueAsDouble(); // Value gain = gainOpOpAdaptor.getRhs(); - + DEBUG_PRINT_NO_ARGS(); - //first from 1 <= i < N - int64_t lb = 0 ; - int64_t ub = tensorType.getShape()[0]; + // first from 1 <= i < N + int64_t lb = 0; + int64_t ub = tensorType.getShape()[0]; int64_t step = 1; DEBUG_PRINT_NO_ARGS(); - - //loop from 0 <= i < N + // loop from 0 <= i < N - affine::AffineForOp forOpY = rewriter.create(loc, lb, ub, step); - auto ivY = forOpY.getInductionVar(); + affine::AffineForOp forOpY = + rewriter.create(loc, lb, ub, step); rewriter.setInsertionPointToStart(forOpY.getBody()); + auto ivY = forOpY.getInductionVar(); - Value getLhs = rewriter.create(loc, gainOpOpAdaptor.getLhs() , ivY); - Value getRhs = rewriter.create(loc, gainOpOpAdaptor.getRhs() , ValueRange{}); - Value mulProd = rewriter.create(loc, getLhs, getRhs ); - rewriter.create(loc, mulProd, alloc, ValueRange{ivY}); + Value getLhs = + rewriter.create(loc, gainOpOpAdaptor.getLhs(), ValueRange{ivY}); + Value getRhs = rewriter.create(loc, gainOpOpAdaptor.getRhs(), + ValueRange{}); + Value mulProd = rewriter.create(loc, getLhs, getRhs); + rewriter.create(loc, mulProd, alloc, ValueRange{ivY}); DEBUG_PRINT_NO_ARGS(); rewriter.setInsertionPointAfter(forOpY); + // debug + // forOpX->dump(); + // forOpY->dump(); + + // %cst = arith.constant 6.2831853071800001 : f64 + // %cst_0 = arith.constant 4.600000e-01 : f64 + // %cst_1 = arith.constant 5.400000e-01 : f64 + // %cst_2 = arith.constant 4.000000e+00 : f64 + // %alloc = memref.alloc() : memref<4xf64> + // %alloc_3 = memref.alloc() : memref + // affine.store %cst_2, %alloc_3[] : memref + // affine.for %arg0 = 0 to 4 { + // %0 = arith.index_castui %arg0 : index to i32 + // %1 = arith.uitofp %0 : i32 to f64 + // %2 = arith.mulf %1, %cst : f64 + // %3 = arith.divf %2, %cst_2 : f64 + // %4 = math.cos %3 : f64 + // %5 = arith.mulf %4, %cst_0 : f64 + // %6 = arith.subf %cst_1, %5 : f64 + // affine.store %6, %alloc[%arg0] : memref<4xf64> + // } - //debug - // forOpX->dump(); - // forOpY->dump(); + // } + // } + rewriter.replaceOp(op, alloc); + + return success(); + } +}; + +//===----------------------------------------------------------------------===// +// ToyToAffine RewritePatterns: BitwiseAndOp operations +//===----------------------------------------------------------------------===// + +struct BitwiseAndOpLowering : public ConversionPattern { + BitwiseAndOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::BitwiseAndOp::getOperationName(), 1, ctx) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + // dsp.bitwiseandop has 2 operands -- both of type tensor f64 , of the same + // size + + // Get the location of BitwiseAndOp + auto loc = op->getLoc(); + + // Pseudo-code: + // y[i] = bitwiseand(lhs[i], rhs[i]) for 0<=i((*op->result_type_begin())); + + // allocation & deallocation for the result of this operation + auto memRefType = convertTensorToMemRef(tensorType); + auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); + + // construct affine loops for the input + SmallVector lowerBounds(tensorType.getRank(), /*Value*/ 0); + SmallVector steps(tensorType.getRank(), /*Value=*/1); + BitwiseAndOpAdaptor bitwiseandOpAdaptor(operands); + + DEBUG_PRINT_NO_ARGS(); + + // first from 0 <= i < N + int64_t lb = 0; + int64_t ub = tensorType.getShape()[0]; + int64_t step = 1; + + DEBUG_PRINT_NO_ARGS(); + + // loop from 0 <= i < N + affine::AffineForOp forOpY = + rewriter.create(loc, lb, ub, step); + auto ivY = forOpY.getInductionVar(); + rewriter.setInsertionPointToStart(forOpY.getBody()); + + Value getLhs = + rewriter.create(loc, bitwiseandOpAdaptor.getLhs(), ivY); + Value getRhs = + rewriter.create(loc, bitwiseandOpAdaptor.getRhs(), ivY); + Value lhsInt = + rewriter.create(loc, rewriter.getI64Type(), getLhs); + Value rhsInt = + rewriter.create(loc, rewriter.getI64Type(), getRhs); + Value andiResult = rewriter.create(loc, lhsInt, rhsInt); + Value resultFp = rewriter.create( + loc, rewriter.getF64Type(), andiResult); + + rewriter.create(loc, resultFp, alloc, ValueRange{ivY}); + rewriter.setInsertionPointAfter(forOpY); + // debug + forOpY->dump(); - // %cst = arith.constant 6.2831853071800001 : f64 - // %cst_0 = arith.constant 4.600000e-01 : f64 - // %cst_1 = arith.constant 5.400000e-01 : f64 - // %cst_2 = arith.constant 4.000000e+00 : f64 - // %alloc = memref.alloc() : memref<4xf64> - // %alloc_3 = memref.alloc() : memref - // affine.store %cst_2, %alloc_3[] : memref - // affine.for %arg0 = 0 to 4 { - // %0 = arith.index_castui %arg0 : index to i32 - // %1 = arith.uitofp %0 : i32 to f64 - // %2 = arith.mulf %1, %cst : f64 - // %3 = arith.divf %2, %cst_2 : f64 - // %4 = math.cos %3 : f64 - // %5 = arith.mulf %4, %cst_0 : f64 - // %6 = arith.subf %cst_1, %5 : f64 - // affine.store %6, %alloc[%arg0] : memref<4xf64> - // } - - - // } - // } rewriter.replaceOp(op, alloc); - + return success(); - } + }; +}; + +//===----------------------------------------------------------------------===// +// ToyToAffine RewritePatterns: BitwiseAndOp operations +//===----------------------------------------------------------------------===// + +struct zeroCrossCountOpLowering : public ConversionPattern { + zeroCrossCountOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::zeroCrossCountOp::getOperationName(), 1, ctx) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + // dsp.zerocrosscount has 1 operand -- of type tensor f64 + + // Get the location of zeroCrossCountOp + auto loc = op->getLoc(); + + // Pseudo-code is based on the C++ implementation here: + // https://toto-share.com/2011/05/cc-zero-crossing-code/ + // for 1<=i((*op->result_type_begin())); + Type integerType = rewriter.getI64Type(); + + // allocation & deallocation for the result of this operation + // auto memRefType = convertTensorToMemRef(tensorType); + // Force the result to be a tensor of size 1 + auto alloc = insertAllocAndDealloc( + MemRefType::get(ArrayRef(1), tensorType.getElementType()), loc, + rewriter); + zeroCrossCountOpAdaptor zeroCrossCountOpAdaptor(operands); + DEBUG_PRINT_NO_ARGS(); + + // Define constants + Value constant0 = rewriter.create( + loc, rewriter.getI64Type(), + rewriter.getIntegerAttr(rewriter.getI64Type(), 0)); + Value constant1 = rewriter.create( + loc, rewriter.getI64Type(), + rewriter.getIntegerAttr(rewriter.getI64Type(), 1)); + Value Indx0 = rewriter.create(loc, 0); + + // Define bounds + Value lb = rewriter.create( + loc, rewriter.getIndexType(), + rewriter.getIntegerAttr(rewriter.getIndexType(), 1)); + Value ub = rewriter.create( + loc, rewriter.getIndexType(), + rewriter.getIntegerAttr(rewriter.getIndexType(), + tensorType.getShape()[0])); + Value step = rewriter.create(loc, 1); + + // Set up for loop + auto forOpY = + rewriter.create(loc, lb, ub, step, ValueRange{constant0}); + auto ivY = forOpY.getInductionVar(); + rewriter.setInsertionPointToStart(forOpY.getBody()); + auto countArg = forOpY.getRegionIterArgs()[0]; + + // Get the current and previous elements + Value ivYPrev = rewriter.create(loc, ivY, step); + Value getLhsPrev = rewriter.create( + loc, zeroCrossCountOpAdaptor.getLhs(), ivYPrev); + Value getLhs = rewriter.create( + loc, zeroCrossCountOpAdaptor.getLhs(), ivY); + + // Convert from float to integer + Value lhsPrevInt = rewriter.create( + loc, rewriter.getI64Type(), getLhsPrev); + Value lhsInt = + rewriter.create(loc, rewriter.getI64Type(), getLhs); + + // Check whether the elements are less than zero + Value signLhsPrev = rewriter.create( + loc, arith::CmpIPredicate::slt, lhsPrevInt, constant0); + Value signLhs = rewriter.create( + loc, arith::CmpIPredicate::slt, lhsInt, constant0); + Value equal = rewriter.create(loc, arith::CmpIPredicate::eq, + signLhsPrev, signLhs); + + // If the signs aren't the same, increment the zero cross counter + auto ifOp = + rewriter.create(loc, TypeRange{integerType}, equal, true); + + // If block + rewriter.setInsertionPointToStart(ifOp.thenBlock()); + rewriter.create(loc, ValueRange{countArg}); + + // Else block + rewriter.setInsertionPointToStart(ifOp.elseBlock()); + auto countPlusOne = + rewriter.create(loc, countArg, constant1); + rewriter.create(loc, ValueRange{countPlusOne}); + + rewriter.setInsertionPointAfter(ifOp); + auto countRes = ifOp.getResults()[0]; + rewriter.create(loc, ValueRange{countRes}); + + rewriter.setInsertionPointAfter(forOpY); + + // debug + // forOpY->dump(); + // %15 = "scf.for"(%12, %13, %14, %9) ({ + // ^bb0(%arg0: index, %arg1: i64): + // %17 = "arith.subi"(%arg0, %14) <{overflowFlags = + // #arith.overflow}> + // : (index, index) -> index %18 = "memref.load"(%1, %17) <{nontemporal = + // false}> : (memref<3xf64>, index) -> f64 %19 = "memref.load"(%1, %arg0) + // <{nontemporal = false}> : (memref<3xf64>, index) -> f64 %20 = + // "arith.fptosi"(%18) : (f64) -> i64 %21 = "arith.fptosi"(%19) : (f64) -> + // i64 + // %22 = "arith.cmpi"(%20, %9) <{predicate = 2 : i64}> : (i64, i64) -> + // i1 %23 = "arith.cmpi"(%21, %9) <{predicate = 2 : i64}> : (i64, i64) + // -> i1 %24 = "arith.cmpi"(%22, %23) <{predicate = 0 : i64}> : (i1, i1) + // -> i1 %25 = "scf.if"(%24) ({ + // "scf.yield"(%arg1) : (i64) -> () + // }, { + // %26 = "arith.addi"(%arg1, %10) <{overflowFlags = + // #arith.overflow}> : (i64, i64) -> i64 "scf.yield"(%26) : (i64) -> + // () + // }) : (i1) -> i64 + // "scf.yield"(%25) : (i64) -> () + // }) : (index, index, index, i64) -> i64 + + auto finalCountArg = forOpY.getResults()[0]; + Value finalCountArgFloat = rewriter.create( + loc, rewriter.getF64Type(), finalCountArg); + + rewriter.create(loc, finalCountArgFloat, alloc, Indx0); + rewriter.replaceOp(op, alloc); + return success(); + }; }; + //===----------------------------------------------------------------------===// // ToyToAffine RewritePatterns: Binary operations //===----------------------------------------------------------------------===// @@ -6010,217 +7017,5057 @@ struct BinaryOpLowering : public ConversionPattern { } }; - - //===----------------------------------------------------------------------===// - // ToyToAffine RewritePatterns: Unary operations - //===----------------------------------------------------------------------===// - - template - struct UnaryOpLowering : public ConversionPattern { - UnaryOpLowering(MLIRContext *ctx) - : ConversionPattern(UnaryOp::getOperationName(), 1, ctx) {} - - LogicalResult - matchAndRewrite(Operation *op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const final { - auto loc = op->getLoc(); - lowerOpToLoops(op, operands, rewriter, - [loc](OpBuilder &builder, ValueRange memRefOperands, - ValueRange loopIvs) { - // Generate an adaptor for the remapped operands of the - // UnaryOp. This allows for using the nice named accessors - // that are generated by the ODS. - typename UnaryOp::Adaptor unaryAdaptor(memRefOperands); - - // Generate loads for the element of 'lhs' and 'rhs' at the - // inner loop. - auto loadedInput = builder.create( - loc, unaryAdaptor.getInput(), loopIvs); - - // Create the unary operation performed on the loaded - // values. - return builder.create(loc, loadedInput); - }); - return success(); - } - }; - -using AddOpLowering = BinaryOpLowering; -using SubOpLowering = BinaryOpLowering; -using MulOpLowering = BinaryOpLowering; -using DivOpLowering = BinaryOpLowering; -using SinOpLowering = UnaryOpLowering; -using CosOpLowering = UnaryOpLowering; //===----------------------------------------------------------------------===// -// ToyToAffine RewritePatterns: Constant operations +// ToyToAffine AdditionalPatterns: Shift operations //===----------------------------------------------------------------------===// -struct ConstantOpLowering : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; +struct ShiftRightOpLowering : public ConversionPattern { + ShiftRightOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::ShiftRightOp::getOperationName(), 1, ctx) {} - LogicalResult matchAndRewrite(dsp::ConstantOp op, - PatternRewriter &rewriter) const final { - DenseElementsAttr constantValue = op.getValue(); - Location loc = op.getLoc(); + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { - // When lowering the constant operation, we allocate and assign the constant - // values to a corresponding memref allocation. - auto tensorType = llvm::cast(op.getType()); + // Get the location of GainOp + auto loc = op->getLoc(); + + // output for result type + auto tensorType = llvm::cast((*op->result_type_begin())); + + // allocation & deallocation for the result of this operation auto memRefType = convertTensorToMemRef(tensorType); auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); - // We will be generating constant indices up-to the largest dimension. - // Create these constants up-front to avoid large amounts of redundant - // operations. - auto valueShape = memRefType.getShape(); - SmallVector constantIndices; + // first from 1 <= i < N + int64_t lb = 0; + int64_t ub = tensorType.getShape()[0]; + int64_t step = 1; - if (!valueShape.empty()) { - for (auto i : llvm::seq( - 0, *std::max_element(valueShape.begin(), valueShape.end()))) - constantIndices.push_back( - rewriter.create(loc, i)); - } else { - // This is the case of a tensor of rank 0. - constantIndices.push_back( - rewriter.create(loc, 0)); - } + typename dsp::ShiftRightOp::Adaptor binaryAdaptor(operands); - // The constant operation represents a multi-dimensional constant, so we - // will need to generate a store for each of the elements. The following - // functor recursively walks the dimensions of the constant shape, - // generating a store when the recursion hits the base case. - SmallVector indices; - auto valueIt = constantValue.value_begin(); - std::function storeElements = [&](uint64_t dimension) { - // The last dimension is the base case of the recursion, at this point - // we store the element at the given index. - if (dimension == valueShape.size()) { - rewriter.create( - loc, rewriter.create(loc, *valueIt++), alloc, - llvm::ArrayRef(indices)); - return; - } + affine::AffineForOp forOpY = + rewriter.create(loc, lb, ub, step); + auto ivY = forOpY.getInductionVar(); + rewriter.setInsertionPointToStart(forOpY.getBody()); - // Otherwise, iterate over the current dimension and add the indices to - // the list. - for (uint64_t i = 0, e = valueShape[dimension]; i != e; ++i) { - indices.push_back(constantIndices[i]); - storeElements(dimension + 1); - indices.pop_back(); - } - }; + auto loadedLhs = + rewriter.create(loc, binaryAdaptor.getLhs(), ivY); + Value IntegerLhs = + rewriter.create(loc, rewriter.getI64Type(), loadedLhs); - // Start the element storing recursion from the first dimension. - storeElements(/*dimension=*/0); + auto loadedRhs = + rewriter.create(loc, binaryAdaptor.getRhs(), ivY); + Value IntegerRhs = + rewriter.create(loc, rewriter.getI64Type(), loadedRhs); - // Replace this operation with the generated alloc. + auto LoweredOp = + rewriter.create(loc, IntegerLhs, IntegerRhs); + + Value FloatOp = + rewriter.create(loc, rewriter.getF64Type(), LoweredOp); + + rewriter.create(loc, FloatOp, alloc, ValueRange{ivY}); + + rewriter.setInsertionPointAfter(forOpY); + + DEBUG_PRINT_NO_ARGS(); + + // rewriter.replaceOp(op, FloatOp); rewriter.replaceOp(op, alloc); + return success(); } }; //===----------------------------------------------------------------------===// -// ToyToAffine RewritePatterns: Func operations +// ToyToAffine AdditionalPatterns: Matmul operations //===----------------------------------------------------------------------===// -struct FuncOpLowering : public OpConversionPattern { - using OpConversionPattern::OpConversionPattern; +// template + +struct MatmulOpLowering : public ConversionPattern { + MatmulOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::MatmulOp::getOperationName(), 1, ctx) {} LogicalResult - matchAndRewrite(dsp::FuncOp op, OpAdaptor adaptor, + matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { - // We only lower the main function as we expect that all other functions - // have been inlined. - if (op.getName() != "main") - return failure(); - // Verify that the given main has no inputs and results. - if (op.getNumArguments() || op.getFunctionType().getNumResults()) { - return rewriter.notifyMatchFailure(op, [](Diagnostic &diag) { - diag << "expected 'main' to have 0 inputs and 0 results"; - }); - } + // Get the location of GainOp + auto loc = op->getLoc(); + + // output for result type + auto tensorType = llvm::cast((*op->result_type_begin())); + + // allocation & deallocation for the result of this operation + auto memRefType = convertTensorToMemRef(tensorType); + auto alloc_output = insertAllocAndDealloc(memRefType, loc, rewriter); + + typename dsp::MatmulOp::Adaptor binaryAdaptor(operands); + + auto lhsType = + llvm::dyn_cast(op->getOperand(0).getType()); + // auto rhsType = + // llvm::dyn_cast(op->getOperand(1).getType()); + + // first from 1 <= i < N + int64_t lb = 0; + int64_t ub_0 = lhsType.getShape()[0]; + int64_t ub_1 = lhsType.getShape()[1]; + int64_t step = 1; + + Value constantZero = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + + // NOTE: matrix [y, x] --> y means row, x means column + affine::AffineForOp forOpY = + rewriter.create(loc, lb, ub_0, step); + auto ivY = forOpY.getInductionVar(); + rewriter.setInsertionPointToStart(forOpY.getBody()); + + affine::AffineForOp forOpX = + rewriter.create(loc, lb, ub_1, step); + auto ivX = forOpX.getInductionVar(); + // auto getIterArg = forOpX.getBody()->getArgument(1); //HWISOO: Find this + // to check how previous codes did + rewriter.setInsertionPointToStart(forOpX.getBody()); + + rewriter.create(loc, constantZero, alloc_output, + ValueRange{ivY, ivX}); + + affine::AffineForOp forOpIndex = + rewriter.create(loc, lb, ub_1, step); + auto ivIndex = forOpIndex.getInductionVar(); + rewriter.setInsertionPointToStart(forOpIndex.getBody()); + + auto loadedLhs = rewriter.create( + loc, binaryAdaptor.getLhs(), ValueRange{ivY, ivIndex}); + + auto loadedRhs = rewriter.create( + loc, binaryAdaptor.getRhs(), ValueRange{ivIndex, ivX}); + + Value mulLhsRhs = rewriter.create(loc, loadedLhs, loadedRhs); + + auto loadedResult = rewriter.create( + loc, alloc_output, ValueRange{ivY, ivX}); + + Value addResultAndMul = + rewriter.create(loc, loadedResult, mulLhsRhs); + + rewriter.create(loc, addResultAndMul, alloc_output, + ValueRange{ivY, ivX}); + + /* + auto loadedLhs = rewriter.create(loc, +binaryAdaptor.getLhs(), ivY); Value IntegerLhs = +rewriter.create(loc, rewriter.getI64Type(), loadedLhs); + +auto loadedRhs = rewriter.create(loc, +binaryAdaptor.getRhs(), ivY); Value IntegerRhs = +rewriter.create(loc, rewriter.getI64Type(), loadedRhs); + + auto LoweredOp = rewriter.create(loc, IntegerLhs, +IntegerRhs); + + Value FloatOp = rewriter.create(loc, rewriter.getF64Type(), +LoweredOp); + + rewriter.create(loc, FloatOp, alloc, ValueRange{ivY}); + + */ + + rewriter.setInsertionPointAfter(forOpY); + + DEBUG_PRINT_NO_ARGS(); + + // rewriter.replaceOp(op, FloatOp); + rewriter.replaceOp(op, alloc_output); - // Create a new non-dsp function, with the same region. - auto func = rewriter.create(op.getLoc(), op.getName(), - op.getFunctionType()); - rewriter.inlineRegionBefore(op.getRegion(), func.getBody(), func.end()); - rewriter.eraseOp(op); return success(); } }; //===----------------------------------------------------------------------===// -// ToyToAffine RewritePatterns: Print operations +// ToyToAffine AdditionalPatterns: Find peaks operations //===----------------------------------------------------------------------===// -struct PrintOpLowering : public OpConversionPattern { - using OpConversionPattern::OpConversionPattern; +// template + +struct FindPeaksOpLowering : public ConversionPattern { + FindPeaksOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::FindPeaksOp::getOperationName(), 1, ctx) {} LogicalResult - matchAndRewrite(dsp::PrintOp op, OpAdaptor adaptor, + matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { - // We don't lower "dsp.print" in this pass, but we need to update its - // operands. - rewriter.modifyOpInPlace(op, - [&] { op->setOperands(adaptor.getOperands()); }); + + // Get the location of GainOp + auto loc = op->getLoc(); + + // output for result type + auto tensorType = llvm::cast((*op->result_type_begin())); + + // allocation & deallocation for the result of this operation + auto memRefType = convertTensorToMemRef(tensorType); + auto alloc_output = insertAllocAndDealloc(memRefType, loc, rewriter); + + auto countMemRefType = MemRefType::get({}, rewriter.getIndexType()); + auto alloc_peaks_count = + insertAllocAndDealloc(countMemRefType, loc, rewriter); + + typename dsp::FindPeaksOp::Adaptor findPeaksOpAdaptor(operands); + + Value constant_minus_one = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(-1)); + + Value constant_index_zero = rewriter.create( + loc, rewriter.getIndexType(), rewriter.getIndexAttr(0)); + Value constant_index_one = rewriter.create( + loc, rewriter.getIndexType(), rewriter.getIndexAttr(1)); + + rewriter.create(loc, constant_index_zero, alloc_peaks_count, + ValueRange{}); + + auto heightArgType = + llvm::dyn_cast(op->getOperand(1).getType()); + + int heightArgShape = heightArgType.getShape().size(); + + ValueRange heightValueRange; + + if (heightArgShape == 0) + heightValueRange = ValueRange{}; + else + heightValueRange = ValueRange{constant_index_zero}; + + auto distanceArgType = + llvm::dyn_cast(op->getOperand(2).getType()); + + int distanceArgShape = distanceArgType.getShape().size(); + + ValueRange distanceValueRange; + + if (distanceArgShape == 0) + distanceValueRange = ValueRange{}; + else + distanceValueRange = ValueRange{constant_index_zero}; + + auto signalType = + llvm::dyn_cast(op->getOperand(0).getType()); + int64_t lb = 1; + int64_t ub = signalType.getShape()[0] - 1; + int64_t step = 1; + + //%distance = affine.load %alloc_distance[] : memref + auto distance_fp = rewriter.create( + loc, findPeaksOpAdaptor.getDistance(), distanceValueRange); + // f64 to index + Value distance_ui = rewriter.create( + loc, rewriter.getIntegerType(32), distance_fp); + Value distance = rewriter.create( + loc, rewriter.getIndexType(), distance_ui); + + auto height = rewriter.create( + loc, findPeaksOpAdaptor.getHeight(), heightValueRange); + + affine::AffineForOp forOpInit = + rewriter.create(loc, 0, tensorType.getShape()[0], step); + auto init_iter = forOpInit.getInductionVar(); + rewriter.setInsertionPointToStart(forOpInit.getBody()); + + rewriter.create(loc, constant_minus_one, alloc_output, + ValueRange{init_iter}); + + rewriter.setInsertionPointAfter(forOpInit); + + affine::AffineForOp forOpSignal = + rewriter.create(loc, lb, ub, step); + auto current_index = forOpSignal.getInductionVar(); + rewriter.setInsertionPointToStart(forOpSignal.getBody()); + + // %prev_index = arith.subi %current_index, %cst_one_index : index + // %signal_prev = memref.load %alloc_signal[%prev_index] : memref<10xf64> + // %signal_current = affine.load %alloc_signal[%current_index] : + // memref<10xf64> %signal_next = affine.load %alloc_signal[%current_index+1] + // : memref<10xf64> Q. How can I do this? %height = affine.load + // %alloc_height[] : memref + + AffineExpr ExprForPrev = + rewriter.getAffineDimExpr(0) - rewriter.getAffineConstantExpr(1); + AffineMap addMapForPrev = AffineMap::get(1, 0, ExprForPrev); + + AffineExpr ExprForNext = + rewriter.getAffineDimExpr(0) + rewriter.getAffineConstantExpr(1); + AffineMap addMapForNext = AffineMap::get(1, 0, ExprForNext); + + auto signal_prev = + rewriter.create(loc, findPeaksOpAdaptor.getSignal(), + addMapForPrev, ValueRange{current_index}); + auto signal_current = rewriter.create( + loc, findPeaksOpAdaptor.getSignal(), ValueRange{current_index}); + auto signal_next = + rewriter.create(loc, findPeaksOpAdaptor.getSignal(), + addMapForNext, ValueRange{current_index}); + + //%cmp_current_prev = arith.cmpf ogt, %signal_current, %signal_prev : f64 + //%cmp_current_next = arith.cmpf ogt, %signal_current, %signal_next : f64 + //%cmp_current_height = arith.cmpf oge, %signal_current, %signal_next : f64 + auto cmp_current_prev = rewriter.create( + loc, arith::CmpFPredicate::OGT, signal_current, signal_prev); + auto cmp_current_next = rewriter.create( + loc, arith::CmpFPredicate::OGT, signal_current, signal_next); + auto cmp_current_height = rewriter.create( + loc, arith::CmpFPredicate::OGE, signal_current, height); + + //%and_two_cmps = arith.andi %cmp_current_prev, %cmp_current_next : index + //%and_three_cmps = arith.andi %and_two_cmps, cmp_current_height : index + auto and_two_cmps = + rewriter.create(loc, cmp_current_prev, cmp_current_next); + auto and_three_cmps = + rewriter.create(loc, and_two_cmps, cmp_current_height); + + // scf.if %and_three_cmps { + auto firstIfOp = + rewriter.create(loc, and_three_cmps, false /* else=1 */); + rewriter.setInsertionPointToStart(firstIfOp.thenBlock()); + + //%peaks_count = affine.load %alloc_peaks_count[] : memref + //%cmp_new_peak = arith.cmpi eq, %peaks_count, %cst_zero_index : index + auto peaks_count = rewriter.create( + loc, alloc_peaks_count, ValueRange{}); + auto cmp_new_peak = rewriter.create( + loc, arith::CmpIPredicate::eq, peaks_count, constant_index_zero); + + // scf.if %cmp_new_peak { + // memref.store %current_index, %alloc_peaks[%peaks_count] : + // memref<10xindex> %peaks_count_inc = arith.addi %peaks_count, + // %cst_one_index : index affine.store %peaks_count_inc, + // %alloc_peaks_count[] : memref + // } + auto secondIfOp = + rewriter.create(loc, cmp_new_peak, true /* else=1 */); + rewriter.setInsertionPointToStart(secondIfOp.thenBlock()); + // index to f64 + Value current_index_to_ui = rewriter.create( + loc, rewriter.getIntegerType(32), current_index); + Value current_index_to_f64 = rewriter.create( + loc, rewriter.getF64Type(), current_index_to_ui); + rewriter.create(loc, current_index_to_f64, alloc_output, + ValueRange{peaks_count}); + auto peaks_count_inc = + rewriter.create(loc, peaks_count, constant_index_one); + rewriter.create(loc, peaks_count_inc, alloc_peaks_count, + ValueRange{}); + + /* + else { + %last_peaks_count = arith.subi %peaks_count, %cst_one_index : index + %last_peak_index = memref.load %alloc_peaks[%last_peaks_count] : + memref<10xindex> %subtract_current_index_last_peak = arith.subi + %current_index, %last_peak_index : index %cmp_sub_distance = arith.cmpi sge, + %subtract_current_index_last_peak, %distance : index + */ + rewriter.setInsertionPointToStart(secondIfOp.elseBlock()); + // auto last_peak_index = rewriter.create(loc, alloc_output, + // addMapForPrev, ValueRange{peaks_count}); HWISOO: It does not work since + // it gives "error: 'affine.load' op index must be a valid dimension or + // symbol identifier" here. + Value last_peaks_count = + rewriter.create(loc, peaks_count, constant_index_one); + auto last_peak_index_fp = rewriter.create( + loc, alloc_output, ValueRange{last_peaks_count}); + // f64 to index + Value last_peak_index_ui = rewriter.create( + loc, rewriter.getIntegerType(32), last_peak_index_fp); + Value last_peak_index = rewriter.create( + loc, rewriter.getIndexType(), last_peak_index_ui); + Value subtract_current_index_last_peak = + rewriter.create(loc, current_index, last_peak_index); + auto cmp_sub_distance = rewriter.create( + loc, arith::CmpIPredicate::sge, subtract_current_index_last_peak, + distance); + + /* + scf.if %cmp_sub_distance { + memref.store %current_index, %alloc_peaks[%peaks_count] : memref<10xindex> + %peaks_count_inc = arith.addi %peaks_count, %cst_one_index : index + affine.store %peaks_count_inc, %alloc_peaks_count[] : memref + } + } + */ + auto thirdIfOp = + rewriter.create(loc, cmp_sub_distance, true /* else=1 */); + rewriter.setInsertionPointToStart(thirdIfOp.thenBlock()); + // index to f64 + Value current_index_to_ui_2 = rewriter.create( + loc, rewriter.getIntegerType(32), current_index); + Value current_index_to_f64_2 = rewriter.create( + loc, rewriter.getF64Type(), current_index_to_ui_2); + rewriter.create(loc, current_index_to_f64_2, alloc_output, + ValueRange{peaks_count}); + auto peaks_count_inc_2 = + rewriter.create(loc, peaks_count, constant_index_one); + rewriter.create(loc, peaks_count_inc_2, alloc_peaks_count, + ValueRange{}); + + rewriter.setInsertionPointAfter(forOpSignal); + + /* Setting last element of the output as the count of peaks. + Note that last-last ([-2]) should be always -1. */ + auto peaks_count_final = rewriter.create( + loc, alloc_peaks_count, ValueRange{}); + // index to f64 + Value peaks_count_final_to_ui = rewriter.create( + loc, rewriter.getIntegerType(32), peaks_count_final); + Value peaks_count_final_to_f64 = rewriter.create( + loc, rewriter.getF64Type(), peaks_count_final_to_ui); + + Value result_size = rewriter.create( + loc, rewriter.getIndexType(), + rewriter.getIndexAttr(tensorType.getShape()[0])); + Value result_size_minusOne = + rewriter.create(loc, result_size, constant_index_one); + rewriter.create(loc, peaks_count_final_to_f64, alloc_output, + ValueRange{result_size_minusOne}); + + rewriter.replaceOp(op, alloc_output); + return success(); } }; -//===----------------------------------------------------------------------===// -// ToyToAffine RewritePatterns: Return operations -//===----------------------------------------------------------------------===// +struct MaxOpLowering : public ConversionPattern { + MaxOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::MaxOp::getOperationName(), 1, ctx) {} -struct ReturnOpLowering : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { - LogicalResult matchAndRewrite(dsp::ReturnOp op, - PatternRewriter &rewriter) const final { - // During this lowering, we expect that all function calls have been - // inlined. - if (op.hasOperand()) - return failure(); + // Get the location of GainOp + auto loc = op->getLoc(); + + // output for result type + auto tensorType = llvm::cast((*op->result_type_begin())); + + // allocation & deallocation for the result of this operation + auto memRefType = convertTensorToMemRef(tensorType); + auto alloc_output = insertAllocAndDealloc(memRefType, loc, rewriter); + + typename dsp::MaxOp::Adaptor maxOpAdaptor(operands); + + Value constantZero = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + + // Value cst_idx_zero = rewriter.create(loc, 0); + + rewriter.create(loc, constantZero, alloc_output, + ValueRange{}); + + auto inputType = + llvm::dyn_cast(op->getOperand(0).getType()); + + // loop for 0 <= i < N + int64_t lb = 0; + int64_t ub = inputType.getShape()[0]; + int64_t step = 1; + + affine::AffineForOp forOp = rewriter.create(loc, lb, ub, step); + auto idx = forOp.getInductionVar(); + rewriter.setInsertionPointToStart(forOp.getBody()); + + auto loadedInput = rewriter.create( + loc, maxOpAdaptor.getInput(), ValueRange{idx}); + auto loadedOutput = + rewriter.create(loc, alloc_output, ValueRange{}); + auto compare_input_output = rewriter.create( + loc, arith::CmpFPredicate::OGT, loadedInput, loadedOutput); + + auto ifOp = rewriter.create(loc, compare_input_output, false); + + rewriter.setInsertionPointToStart(ifOp.thenBlock()); + + rewriter.create(loc, loadedInput, alloc_output, + ValueRange{}); + + rewriter.setInsertionPointAfter(forOp); + + rewriter.replaceOp(op, alloc_output); - // We lower "dsp.return" directly to "func.return". - rewriter.replaceOpWithNewOp(op); return success(); } }; -//===----------------------------------------------------------------------===// -// ToyToAffine RewritePatterns: Transpose operations -//===----------------------------------------------------------------------===// +struct MeanOpLowering : public ConversionPattern { + MeanOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::MeanOp::getOperationName(), 1, ctx) {} -struct TransposeOpLowering : public ConversionPattern { - TransposeOpLowering(MLIRContext *ctx) - : ConversionPattern(dsp::TransposeOp::getOperationName(), 1, ctx) {} + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + + // Get the location of GainOp + auto loc = op->getLoc(); + + // output for result type + auto tensorType = llvm::cast((*op->result_type_begin())); + + // allocation & deallocation for the result of this operation + auto memRefType = convertTensorToMemRef(tensorType); + auto alloc_output = insertAllocAndDealloc(memRefType, loc, rewriter); + + typename dsp::MeanOp::Adaptor meanOpAdaptor(operands); + + Value constantZero = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + Value cst_idx_zero = rewriter.create(loc, 0); + + rewriter.create(loc, constantZero, alloc_output, + ValueRange{}); + + auto lengthArgType = + llvm::dyn_cast(op->getOperand(1).getType()); + + int lengthArgShape = lengthArgType.getShape().size(); + + ValueRange lengthValueRange; + + if (lengthArgShape == 0) + lengthValueRange = ValueRange{}; + else + lengthValueRange = ValueRange{cst_idx_zero}; + + auto loadedLength = rewriter.create( + loc, meanOpAdaptor.getLength(), lengthValueRange); + + // f64 to index + Value length_ui = rewriter.create( + loc, rewriter.getIntegerType(32), loadedLength); + Value length_index = rewriter.create( + loc, rewriter.getIndexType(), length_ui); + + // loop for 0 <= i < length + // Note: we need to use scf.for and memref::LoadOp/StoreOp (can we use + // dynamic ub for affine.for?) + auto lb = rewriter.create(loc, 0); + auto step = rewriter.create(loc, 1); + auto forOp = rewriter.create(loc, lb, length_index, step); + auto idx = forOp.getInductionVar(); + rewriter.setInsertionPointToStart(forOp.getBody()); + + auto loadedInput = rewriter.create( + loc, meanOpAdaptor.getInput(), ValueRange{idx}); + auto loadedOutput = + rewriter.create(loc, alloc_output, ValueRange{}); + auto added_output = + rewriter.create(loc, loadedInput, loadedOutput); + rewriter.create(loc, added_output, alloc_output, + ValueRange{}); + + rewriter.setInsertionPointAfter(forOp); + + auto loadedOutput2 = + rewriter.create(loc, alloc_output, ValueRange{}); + auto divided_output = + rewriter.create(loc, loadedOutput2, loadedLength); + rewriter.create(loc, divided_output, alloc_output, + ValueRange{}); + + rewriter.replaceOp(op, alloc_output); + + return success(); + } +}; + +struct DiffOpLowering : public ConversionPattern { + DiffOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::DiffOp::getOperationName(), 1, ctx) {} LogicalResult matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { + + // Get the location of GainOp auto loc = op->getLoc(); - lowerOpToLoops(op, operands, rewriter, - [loc](OpBuilder &builder, ValueRange memRefOperands, - ValueRange loopIvs) { - // Generate an adaptor for the remapped operands of the - // TransposeOp. This allows for using the nice named - // accessors that are generated by the ODS. - dsp::TransposeOpAdaptor transposeAdaptor(memRefOperands); - Value input = transposeAdaptor.getInput(); - // Transpose the elements by generating a load from the - // reverse indices. - SmallVector reverseIvs(llvm::reverse(loopIvs)); - return builder.create(loc, input, - reverseIvs); - }); + // output for result type + auto tensorType = llvm::cast((*op->result_type_begin())); + + auto memRefType = convertTensorToMemRef(tensorType); + auto alloc_output = insertAllocAndDealloc(memRefType, loc, rewriter); + + typename dsp::DiffOp::Adaptor diffOpAdaptor(operands); + + Value cst_idx_zero = rewriter.create(loc, 0); + Value cst_idx_one = rewriter.create(loc, 1); + + auto lengthArgType = + llvm::dyn_cast(op->getOperand(1).getType()); + + int lengthArgShape = lengthArgType.getShape().size(); + + ValueRange lengthValueRange; + + if (lengthArgShape == 0) + lengthValueRange = ValueRange{}; + else + lengthValueRange = ValueRange{cst_idx_zero}; + + auto loadedLength = rewriter.create( + loc, diffOpAdaptor.getLength(), lengthValueRange); + + // f64 to index + Value length_ui = rewriter.create( + loc, rewriter.getIntegerType(32), loadedLength); + Value length_index = rewriter.create( + loc, rewriter.getIndexType(), length_ui); + Value length_index_minus = + rewriter.create(loc, length_index, cst_idx_one); + + // loop for 0 <= i < N-1 + // Note: we need to use scf.for and memref::LoadOp/StoreOp (can we use + // dynamic ub for affine.for?) + auto lb = rewriter.create(loc, 0); + auto step = rewriter.create(loc, 1); + auto forOp = rewriter.create(loc, lb, length_index_minus, step); + auto idx = forOp.getInductionVar(); + rewriter.setInsertionPointToStart(forOp.getBody()); + + Value constant_index_one = rewriter.create( + loc, rewriter.getIndexType(), rewriter.getIndexAttr(1)); + Value idx_next = + rewriter.create(loc, idx, constant_index_one); + + auto input_current = rewriter.create( + loc, diffOpAdaptor.getInput(), ValueRange{idx}); + auto input_next = rewriter.create( + loc, diffOpAdaptor.getInput(), ValueRange{idx_next}); + + auto diff_input = + rewriter.create(loc, input_next, input_current); + rewriter.create(loc, diff_input, alloc_output, + ValueRange{idx}); + + rewriter.setInsertionPointAfter(forOp); + + rewriter.replaceOp(op, alloc_output); + return success(); } }; -} // namespace +struct GetSingleElemAtIdxOpLowering : public ConversionPattern { + GetSingleElemAtIdxOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::GetSingleElemAtIdxOp::getOperationName(), 1, + ctx) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + auto loc = op->getLoc(); + + // auto tensorType = + // llvm::cast((*op->result_type_begin())); auto + // memRefType = convertTensorToMemRef(tensorType); + auto memRefType = MemRefType::get({}, rewriter.getF64Type()); + auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); + + typename dsp::GetSingleElemAtIdxOp::Adaptor getSingleElemAtIdxAdaptor( + operands); + + auto indxArgType = + llvm::dyn_cast(op->getOperand(1).getType()); + + int indxArgShape = indxArgType.getShape().size(); + + ValueRange indexValueRange; + + if (indxArgShape == 0) + indexValueRange = ValueRange{}; + else { + Value cst_idx_zero = rewriter.create(loc, 0); + indexValueRange = ValueRange{cst_idx_zero}; + } + + Value loadedIndx = rewriter.create( + loc, getSingleElemAtIdxAdaptor.getIndx(), indexValueRange); + + // f64 to index + Value indx_ui = rewriter.create( + loc, rewriter.getIntegerType(32), loadedIndx); + Value indx_index = rewriter.create( + loc, rewriter.getIndexType(), indx_ui); + + Value loadedElement = rewriter.create( + loc, getSingleElemAtIdxAdaptor.getInput(), ValueRange{indx_index}); + + rewriter.create(loc, loadedElement, alloc, ValueRange{}); + + rewriter.replaceOp(op, alloc); + + return success(); + } +}; + +struct Diff2MeanOptimizedOpLowering : public ConversionPattern { + Diff2MeanOptimizedOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::Diff2MeanOptimizedOp::getOperationName(), 1, + ctx) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + + // Get the location of GainOp + auto loc = op->getLoc(); + + // output for result type + auto tensorType = llvm::cast((*op->result_type_begin())); + + auto memRefType = convertTensorToMemRef(tensorType); + auto alloc_output = insertAllocAndDealloc(memRefType, loc, rewriter); + + typename dsp::Diff2MeanOptimizedOp::Adaptor diff2MeanOptimizedOpAdaptor( + operands); + + Value cst_idx_zero = rewriter.create(loc, 0); + + auto lengthArgType = + llvm::dyn_cast(op->getOperand(1).getType()); + + int lengthArgShape = lengthArgType.getShape().size(); + + ValueRange lengthValueRange; + + if (lengthArgShape == 0) + lengthValueRange = ValueRange{}; + else + lengthValueRange = ValueRange{cst_idx_zero}; + + auto loadedLength = rewriter.create( + loc, diff2MeanOptimizedOpAdaptor.getLength(), lengthValueRange); + + // f64 to index + Value length_ui = rewriter.create( + loc, rewriter.getIntegerType(32), loadedLength); + Value length_index = rewriter.create( + loc, rewriter.getIndexType(), length_ui); + + auto input_first = rewriter.create( + loc, diff2MeanOptimizedOpAdaptor.getInput(), ValueRange{cst_idx_zero}); + auto input_last = rewriter.create( + loc, diff2MeanOptimizedOpAdaptor.getInput(), ValueRange{length_index}); + + auto diff_input = + rewriter.create(loc, input_last, input_first); + + auto div_input = + rewriter.create(loc, diff_input, loadedLength); + + rewriter.create(loc, div_input, alloc_output, + ValueRange{}); + + rewriter.replaceOp(op, alloc_output); + + return success(); + } +}; + +struct FindPeaks2Diff2MeanOptimizedOpLowering : public ConversionPattern { + FindPeaks2Diff2MeanOptimizedOpLowering(MLIRContext *ctx) + : ConversionPattern( + dsp::FindPeaks2Diff2MeanOptimizedOp::getOperationName(), 1, ctx) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + + // Get the location of GainOp + auto loc = op->getLoc(); + + // output for result type + auto tensorType = llvm::cast((*op->result_type_begin())); + + // allocation & deallocation for the result of this operation + auto memRefType = convertTensorToMemRef(tensorType); + auto alloc_output = insertAllocAndDealloc(memRefType, loc, rewriter); + + auto alloc_output_last = insertAllocAndDealloc(memRefType, loc, rewriter); + + auto countMemRefType = MemRefType::get({}, rewriter.getIndexType()); + auto alloc_peaks_count = + insertAllocAndDealloc(countMemRefType, loc, rewriter); + + typename dsp::FindPeaks2Diff2MeanOptimizedOp::Adaptor + findPeaks2Diff2MeanOptOpAdaptor(operands); + + Value constant_minus_one = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(-1)); + + Value constant_index_zero = rewriter.create( + loc, rewriter.getIndexType(), rewriter.getIndexAttr(0)); + Value constant_index_one = rewriter.create( + loc, rewriter.getIndexType(), rewriter.getIndexAttr(1)); + + rewriter.create(loc, constant_index_zero, alloc_peaks_count, + ValueRange{}); + + auto heightArgType = + llvm::dyn_cast(op->getOperand(1).getType()); + + int heightArgShape = heightArgType.getShape().size(); + + ValueRange heightValueRange; + + if (heightArgShape == 0) + heightValueRange = ValueRange{}; + else + heightValueRange = ValueRange{constant_index_zero}; + + auto distanceArgType = + llvm::dyn_cast(op->getOperand(2).getType()); + + int distanceArgShape = distanceArgType.getShape().size(); + + ValueRange distanceValueRange; + + if (distanceArgShape == 0) + distanceValueRange = ValueRange{}; + else + distanceValueRange = ValueRange{constant_index_zero}; + + auto signalType = + llvm::dyn_cast(op->getOperand(0).getType()); + int64_t lb = 1; + int64_t ub = signalType.getShape()[0] - 1; + int64_t step = 1; + + //%distance = affine.load %alloc_distance[] : memref + auto distance_fp = rewriter.create( + loc, findPeaks2Diff2MeanOptOpAdaptor.getDistance(), distanceValueRange); + // f64 to index + Value distance_ui = rewriter.create( + loc, rewriter.getIntegerType(32), distance_fp); + Value distance = rewriter.create( + loc, rewriter.getIndexType(), distance_ui); + + auto height = rewriter.create( + loc, findPeaks2Diff2MeanOptOpAdaptor.getHeight(), heightValueRange); + + rewriter.create(loc, constant_minus_one, alloc_output, + ValueRange{}); + + rewriter.create(loc, constant_minus_one, alloc_output_last, + ValueRange{}); + + affine::AffineForOp forOpSignal = + rewriter.create(loc, lb, ub, step); + auto current_index = forOpSignal.getInductionVar(); + rewriter.setInsertionPointToStart(forOpSignal.getBody()); + + // %prev_index = arith.subi %current_index, %cst_one_index : index + // %signal_prev = memref.load %alloc_signal[%prev_index] : memref<10xf64> + // %signal_current = affine.load %alloc_signal[%current_index] : + // memref<10xf64> %signal_next = affine.load %alloc_signal[%current_index+1] + // : memref<10xf64> Q. How can I do this? %height = affine.load + // %alloc_height[] : memref + + AffineExpr ExprForPrev = + rewriter.getAffineDimExpr(0) - rewriter.getAffineConstantExpr(1); + AffineMap addMapForPrev = AffineMap::get(1, 0, ExprForPrev); + + AffineExpr ExprForNext = + rewriter.getAffineDimExpr(0) + rewriter.getAffineConstantExpr(1); + AffineMap addMapForNext = AffineMap::get(1, 0, ExprForNext); + + auto signal_prev = rewriter.create( + loc, findPeaks2Diff2MeanOptOpAdaptor.getSignal(), addMapForPrev, + ValueRange{current_index}); + auto signal_current = rewriter.create( + loc, findPeaks2Diff2MeanOptOpAdaptor.getSignal(), + ValueRange{current_index}); + auto signal_next = rewriter.create( + loc, findPeaks2Diff2MeanOptOpAdaptor.getSignal(), addMapForNext, + ValueRange{current_index}); + + //%cmp_current_prev = arith.cmpf ogt, %signal_current, %signal_prev : f64 + //%cmp_current_next = arith.cmpf ogt, %signal_current, %signal_next : f64 + //%cmp_current_height = arith.cmpf oge, %signal_current, %signal_next : f64 + auto cmp_current_prev = rewriter.create( + loc, arith::CmpFPredicate::OGT, signal_current, signal_prev); + auto cmp_current_next = rewriter.create( + loc, arith::CmpFPredicate::OGT, signal_current, signal_next); + auto cmp_current_height = rewriter.create( + loc, arith::CmpFPredicate::OGE, signal_current, height); + + //%and_two_cmps = arith.andi %cmp_current_prev, %cmp_current_next : index + //%and_three_cmps = arith.andi %and_two_cmps, cmp_current_height : index + auto and_two_cmps = + rewriter.create(loc, cmp_current_prev, cmp_current_next); + auto and_three_cmps = + rewriter.create(loc, and_two_cmps, cmp_current_height); + + // scf.if %and_three_cmps { + auto firstIfOp = + rewriter.create(loc, and_three_cmps, false /* else=1 */); + rewriter.setInsertionPointToStart(firstIfOp.thenBlock()); + + //%peaks_count = affine.load %alloc_peaks_count[] : memref + //%cmp_new_peak = arith.cmpi eq, %peaks_count, %cst_zero_index : index + auto peaks_count = rewriter.create( + loc, alloc_peaks_count, ValueRange{}); + auto cmp_new_peak = rewriter.create( + loc, arith::CmpIPredicate::eq, peaks_count, constant_index_zero); + + // scf.if %cmp_new_peak { + // memref.store %current_index, %alloc_peaks[%peaks_count] : + // memref<10xindex> %peaks_count_inc = arith.addi %peaks_count, + // %cst_one_index : index affine.store %peaks_count_inc, + // %alloc_peaks_count[] : memref + // } + auto secondIfOp = + rewriter.create(loc, cmp_new_peak, true /* else=1 */); + rewriter.setInsertionPointToStart(secondIfOp.thenBlock()); + // index to f64 + Value current_index_to_ui = rewriter.create( + loc, rewriter.getIntegerType(32), current_index); + Value current_index_to_f64 = rewriter.create( + loc, rewriter.getF64Type(), current_index_to_ui); + rewriter.create(loc, current_index_to_f64, alloc_output, + ValueRange{}); + rewriter.create(loc, current_index_to_f64, + alloc_output_last, ValueRange{}); + + auto peaks_count_inc = + rewriter.create(loc, peaks_count, constant_index_one); + rewriter.create(loc, peaks_count_inc, alloc_peaks_count, + ValueRange{}); + + /* + else { + %last_peaks_count = arith.subi %peaks_count, %cst_one_index : index + %last_peak_index = memref.load %alloc_peaks[%last_peaks_count] : + memref<10xindex> %subtract_current_index_last_peak = arith.subi + %current_index, %last_peak_index : index %cmp_sub_distance = arith.cmpi sge, + %subtract_current_index_last_peak, %distance : index + */ + rewriter.setInsertionPointToStart(secondIfOp.elseBlock()); + // auto last_peak_index = rewriter.create(loc, alloc_output, + // addMapForPrev, ValueRange{peaks_count}); HWISOO: It does not work since + // it gives "error: 'affine.load' op index must be a valid dimension or + // symbol identifier" here. + Value last_peaks_count = + rewriter.create(loc, peaks_count, constant_index_one); + auto last_peak_index_fp = + rewriter.create(loc, alloc_output_last, ValueRange{}); + // f64 to index + Value last_peak_index_ui = rewriter.create( + loc, rewriter.getIntegerType(32), last_peak_index_fp); + Value last_peak_index = rewriter.create( + loc, rewriter.getIndexType(), last_peak_index_ui); + Value subtract_current_index_last_peak = + rewriter.create(loc, current_index, last_peak_index); + auto cmp_sub_distance = rewriter.create( + loc, arith::CmpIPredicate::sge, subtract_current_index_last_peak, + distance); + + /* + scf.if %cmp_sub_distance { + memref.store %current_index, %alloc_peaks[%peaks_count] : memref<10xindex> + %peaks_count_inc = arith.addi %peaks_count, %cst_one_index : index + affine.store %peaks_count_inc, %alloc_peaks_count[] : memref + } + } + */ + auto thirdIfOp = + rewriter.create(loc, cmp_sub_distance, true /* else=1 */); + rewriter.setInsertionPointToStart(thirdIfOp.thenBlock()); + // index to f64 + Value current_index_to_ui_2 = rewriter.create( + loc, rewriter.getIntegerType(32), current_index); + Value current_index_to_f64_2 = rewriter.create( + loc, rewriter.getF64Type(), current_index_to_ui_2); + rewriter.create(loc, current_index_to_f64_2, + alloc_output_last, ValueRange{}); + auto peaks_count_inc_2 = + rewriter.create(loc, peaks_count, constant_index_one); + rewriter.create(loc, peaks_count_inc_2, alloc_peaks_count, + ValueRange{}); + + rewriter.setInsertionPointAfter(forOpSignal); + + auto final_loaded_peak_first = + rewriter.create(loc, alloc_output, ValueRange{}); + + auto final_loaded_peak_last = + rewriter.create(loc, alloc_output_last, ValueRange{}); + Value difference = rewriter.create( + loc, final_loaded_peak_last, final_loaded_peak_first); + auto peaks_count_final = rewriter.create( + loc, alloc_peaks_count, ValueRange{}); + // index to f64 + Value peaks_count_final_to_ui = rewriter.create( + loc, rewriter.getIntegerType(32), peaks_count_final); + Value peaks_count_final_to_f64 = rewriter.create( + loc, rewriter.getF64Type(), peaks_count_final_to_ui); + Value peaks_count_minus = rewriter.create( + loc, peaks_count_final_to_f64, constant_minus_one); + + Value final_output = + rewriter.create(loc, difference, peaks_count_minus); + + rewriter.create(loc, final_output, alloc_output, + ValueRange{}); + + rewriter.replaceOp(op, alloc_output); + + return success(); + } +}; + +struct LMS2FindPeaksOptimizedOpLowering : public ConversionPattern { + LMS2FindPeaksOptimizedOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::LMS2FindPeaksOptimizedOp::getOperationName(), 1, + ctx) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + auto loc = op->getLoc(); + + // Pseudo-code: + // for (int n = 0; n < NUM_SAMPLES; n++) { + // // Calculate the filter output y[n] + // y[n] = 0; + // for (int i = 0; i < FILTER_LENGTH; i++) { + // if (n - i >= 0) { // affine if + // y[n] = y[n] + (w[i] * x[n - i]); + // } + // } + + // // Calculate the error e[n] + // e[n] = d[n] - y[n]; + + // // Update the filter weights w[i] + // for (int i = 0; i < FILTER_LENGTH; i++) { + // if (n - i >= 0) { + // w[i] += MU * e[n] * x[n - i]; + // } + // } + // } + + auto tensorType = llvm::cast((*op->result_type_begin())); + auto lhsType = + llvm::dyn_cast(op->getOperand(0).getType()); + + ArrayRef lhsShape = lhsType.getShape(); + + // allocation & deallocation for the result of this operation + auto memRefType = MemRefType::get(lhsShape, rewriter.getF64Type()); + auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); + + auto memRefTypeOutput = convertTensorToMemRef(tensorType); + auto alloc_output = insertAllocAndDealloc(memRefTypeOutput, loc, rewriter); + + auto countMemRefType = MemRefType::get({}, rewriter.getIndexType()); + auto alloc_peaks_count = + insertAllocAndDealloc(countMemRefType, loc, rewriter); + + // construct affine loops for the input + SmallVector lowerBounds(lhsType.getRank(), /*Value*/ 0); + SmallVector steps(lhsType.getRank(), /*Value=*/1); + + typename dsp::LMS2FindPeaksOptimizedOp::Adaptor lfr2fpAdaptor(operands); + + // Value alpha = rewriter.create(loc, + // rewriter.getF64Type(), + // rewriter.getF64FloatAttr(1)); + Value zeroval = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + Value mu = rewriter.create(loc, lfr2fpAdaptor.getMu()); + + Value cst_idx_zero = rewriter.create(loc, 0); + Value cst_idx_one = rewriter.create(loc, 1); + Value constant_minus_one = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(-1)); + + // initialization for findPeaks + rewriter.create(loc, cst_idx_zero, alloc_peaks_count, + ValueRange{}); + + auto heightArgType = + llvm::dyn_cast(op->getOperand(4).getType()); + + int heightArgShape = heightArgType.getShape().size(); + + ValueRange heightValueRange; + + if (heightArgShape == 0) + heightValueRange = ValueRange{}; + else + heightValueRange = ValueRange{cst_idx_zero}; + + auto distanceArgType = + llvm::dyn_cast(op->getOperand(5).getType()); + + int distanceArgShape = distanceArgType.getShape().size(); + + ValueRange distanceValueRange; + + if (distanceArgShape == 0) + distanceValueRange = ValueRange{}; + else + distanceValueRange = ValueRange{cst_idx_zero}; + + auto distance_fp = rewriter.create( + loc, lfr2fpAdaptor.getDistance(), distanceValueRange); + Value distance_ui = rewriter.create( + loc, rewriter.getIntegerType(32), distance_fp); + Value distance = rewriter.create( + loc, rewriter.getIndexType(), distance_ui); + + auto height = rewriter.create( + loc, lfr2fpAdaptor.getHeight(), heightValueRange); + + affine::AffineForOp forOpInit = + rewriter.create(loc, 0, tensorType.getShape()[0], 1); + auto init_iter = forOpInit.getInductionVar(); + rewriter.setInsertionPointToStart(forOpInit.getBody()); + + rewriter.create(loc, constant_minus_one, alloc_output, + ValueRange{init_iter}); + + rewriter.setInsertionPointAfter(forOpInit); + + // unrolled two iterations. + int64_t lb = 0; + int64_t step = 1; + + Value GetFilterLOp = op->getOperand(3); + dsp::ConstantOp constantOp3rdArg = + GetFilterLOp.getDefiningOp(); + DenseElementsAttr constant3rdValue = constantOp3rdArg.getValue(); + + auto elements1 = constant3rdValue.getValues(); + float filterlenval = elements1[0].getValueAsDouble(); + auto FilterLength = (uint64_t)filterlenval; + + int64_t numSamples = lhsType.getShape()[0]; + + auto yMemRefType = MemRefType::get({numSamples}, rewriter.getF64Type()); + // auto wAlloc = rewriter.create(loc, yMemRefType); + auto wAlloc = insertAllocAndDealloc(yMemRefType, loc, rewriter); + + // For affine expression: #map1 = affine_map<(%arg0)[] : (%arg0 - 1) + AffineExpr d0, d1, s0; + bindDims(rewriter.getContext(), d0, d1); + // AffineExpr ExprForXSlice = rewriter.getAffineDimExpr(0) - + // rewriter.getAffineDimExpr(1); //d0 - d1; + AffineExpr ExprForXSlice = d0 - d1; + AffineMap addMapForLMSFilter = AffineMap::get(2, 0, ExprForXSlice); + IntegerSet set1 = IntegerSet::get(2, 0, {ExprForXSlice}, {false}); + + { + + // w[n] = 0; + // y[n] = 0; + // rewriter.create(loc, zeroval, alloc, ValueRange{iv}); + // Allocate and initialize array for y + // Value constantIndx0 = rewriter.create(loc, 0); + rewriter.create(loc, zeroval, wAlloc, + ValueRange{cst_idx_zero}); + rewriter.create(loc, zeroval, alloc, + ValueRange{cst_idx_zero}); + + affine::AffineForOp forOp2 = + rewriter.create(loc, lb, FilterLength, step); + auto iv2 = forOp2.getInductionVar(); + + rewriter.setInsertionPointToStart(forOp2.getBody()); + + auto ifOp = rewriter.create( + loc, set1, ValueRange{cst_idx_zero, iv2}, false /*no else*/); + rewriter.setInsertionPointToStart(ifOp.getThenBlock()); + + Value inputX = rewriter.create( + loc, lfr2fpAdaptor.getLhs(), addMapForLMSFilter, + ValueRange{cst_idx_zero, iv2}); + Value w = rewriter.create(loc, wAlloc, + ValueRange{iv2}); // memRefType + + Value wmulx = rewriter.create(loc, inputX, w); + Value ybefore = + rewriter.create(loc, alloc, ValueRange{cst_idx_zero}); + Value sumNext = rewriter.create(loc, wmulx, ybefore); + rewriter.create(loc, sumNext, alloc, + ValueRange{cst_idx_zero}); + rewriter.setInsertionPointAfter(ifOp); + rewriter.setInsertionPointAfter(forOp2); + + // get e[n] = d[n] - y[n] + + Value desiredX = rewriter.create( + loc, lfr2fpAdaptor.getRhs(), ValueRange{cst_idx_zero}); + Value ynew = + rewriter.create(loc, alloc, ValueRange{cst_idx_zero}); + + Value err = rewriter.create(loc, desiredX, ynew); + + affine::AffineForOp forOp3 = + rewriter.create(loc, lb, FilterLength, step); + auto iv3 = forOp3.getInductionVar(); + + rewriter.setInsertionPointToStart(forOp3.getBody()); + + auto ifOp2 = rewriter.create( + loc, set1, ValueRange{cst_idx_zero, iv3}, false /*no else*/); + rewriter.setInsertionPointToStart(ifOp2.getThenBlock()); + + Value inputX2 = rewriter.create( + loc, lfr2fpAdaptor.getLhs(), addMapForLMSFilter, + ValueRange{cst_idx_zero, iv3}); + + Value Prevw2 = + rewriter.create(loc, wAlloc, ValueRange{iv3}); + + // f(u(n),e(n),μ)=μe(n)u∗(n) + Value mul1 = rewriter.create(loc, err, inputX2); + Value mul2 = rewriter.create(loc, mu, mul1); + + // FInal w[n] + Value answer = rewriter.create(loc, Prevw2, mul2); + + rewriter.create(loc, answer, wAlloc, ValueRange{iv3}); + + rewriter.setInsertionPointAfter(ifOp2); + rewriter.setInsertionPointAfter(forOp3); + } + + { + // w[n] = 0; + // y[n] = 0; + // rewriter.create(loc, zeroval, alloc, ValueRange{iv}); + // Allocate and initialize array for y + // Value constantIndx0 = rewriter.create(loc, 0); + rewriter.create(loc, zeroval, wAlloc, + ValueRange{cst_idx_one}); + rewriter.create(loc, zeroval, alloc, + ValueRange{cst_idx_one}); + + affine::AffineForOp forOp2 = + rewriter.create(loc, lb, FilterLength, step); + auto iv2 = forOp2.getInductionVar(); + + rewriter.setInsertionPointToStart(forOp2.getBody()); + + auto ifOp = rewriter.create( + loc, set1, ValueRange{cst_idx_one, iv2}, false /*no else*/); + rewriter.setInsertionPointToStart(ifOp.getThenBlock()); + + Value inputX = rewriter.create( + loc, lfr2fpAdaptor.getLhs(), addMapForLMSFilter, + ValueRange{cst_idx_one, iv2}); + Value w = rewriter.create(loc, wAlloc, + ValueRange{iv2}); // memRefType + + Value wmulx = rewriter.create(loc, inputX, w); + Value ybefore = + rewriter.create(loc, alloc, ValueRange{cst_idx_one}); + Value sumNext = rewriter.create(loc, wmulx, ybefore); + rewriter.create(loc, sumNext, alloc, + ValueRange{cst_idx_one}); + rewriter.setInsertionPointAfter(ifOp); + rewriter.setInsertionPointAfter(forOp2); + + // get e[n] = d[n] - y[n] + + Value desiredX = rewriter.create( + loc, lfr2fpAdaptor.getRhs(), ValueRange{cst_idx_one}); + Value ynew = + rewriter.create(loc, alloc, ValueRange{cst_idx_one}); + + Value err = rewriter.create(loc, desiredX, ynew); + + affine::AffineForOp forOp3 = + rewriter.create(loc, lb, FilterLength, step); + auto iv3 = forOp3.getInductionVar(); + + rewriter.setInsertionPointToStart(forOp3.getBody()); + + auto ifOp2 = rewriter.create( + loc, set1, ValueRange{cst_idx_one, iv3}, false /*no else*/); + rewriter.setInsertionPointToStart(ifOp2.getThenBlock()); + + Value inputX2 = rewriter.create( + loc, lfr2fpAdaptor.getLhs(), addMapForLMSFilter, + ValueRange{cst_idx_one, iv3}); + + Value Prevw2 = + rewriter.create(loc, wAlloc, ValueRange{iv3}); + + // f(u(n),e(n),μ)=μe(n)u∗(n) + Value mul1 = rewriter.create(loc, err, inputX2); + Value mul2 = rewriter.create(loc, mu, mul1); + + // FInal w[n] + Value answer = rewriter.create(loc, Prevw2, mul2); + + rewriter.create(loc, answer, wAlloc, ValueRange{iv3}); + + rewriter.setInsertionPointAfter(ifOp2); + rewriter.setInsertionPointAfter(forOp3); + } + + // Outer for loop -- iterate from 2 to last + int64_t lb_outer = 2; + + affine::AffineForOp forOp1 = + rewriter.create(loc, lb_outer, numSamples, step); + auto iv = forOp1.getInductionVar(); + + rewriter.setInsertionPointToStart(forOp1.getBody()); + // w[n] = 0; + // y[n] = 0; + // rewriter.create(loc, zeroval, alloc, ValueRange{iv}); + // Allocate and initialize array for y + // Value constantIndx0 = rewriter.create(loc, 0); + + rewriter.create(loc, zeroval, wAlloc, ValueRange{iv}); + rewriter.create(loc, zeroval, alloc, ValueRange{iv}); + + affine::AffineForOp forOp2 = + rewriter.create(loc, lb, FilterLength, step); + auto iv2 = forOp2.getInductionVar(); + + rewriter.setInsertionPointToStart(forOp2.getBody()); + + auto ifOp = rewriter.create( + loc, set1, ValueRange{iv, iv2}, false /*no else*/); + rewriter.setInsertionPointToStart(ifOp.getThenBlock()); + + Value inputX = rewriter.create( + loc, lfr2fpAdaptor.getLhs(), addMapForLMSFilter, ValueRange{iv, iv2}); + Value w = rewriter.create(loc, wAlloc, + ValueRange{iv2}); // memRefType + + Value wmulx = rewriter.create(loc, inputX, w); + Value ybefore = rewriter.create(loc, alloc, ValueRange{iv}); + Value sumNext = rewriter.create(loc, wmulx, ybefore); + rewriter.create(loc, sumNext, alloc, ValueRange{iv}); + rewriter.setInsertionPointAfter(ifOp); + rewriter.setInsertionPointAfter(forOp2); + + // get e[n] = d[n] - y[n] + + Value desiredX = rewriter.create(loc, lfr2fpAdaptor.getRhs(), + ValueRange{iv}); + Value ynew = rewriter.create(loc, alloc, ValueRange{iv}); + + Value err = rewriter.create(loc, desiredX, ynew); + + affine::AffineForOp forOp3 = + rewriter.create(loc, lb, FilterLength, step); + auto iv3 = forOp3.getInductionVar(); + + rewriter.setInsertionPointToStart(forOp3.getBody()); + + auto ifOp2 = rewriter.create( + loc, set1, ValueRange{iv, iv3}, false /*no else*/); + rewriter.setInsertionPointToStart(ifOp2.getThenBlock()); + + Value inputX2 = rewriter.create( + loc, lfr2fpAdaptor.getLhs(), addMapForLMSFilter, ValueRange{iv, iv3}); + + Value Prevw2 = rewriter.create(loc, wAlloc, ValueRange{iv3}); + + // f(u(n),e(n),μ)=μe(n)u∗(n) + Value mul1 = rewriter.create(loc, err, inputX2); + Value mul2 = rewriter.create(loc, mu, mul1); + + // FInal w[n] + Value answer = rewriter.create(loc, Prevw2, mul2); + + rewriter.create(loc, answer, wAlloc, ValueRange{iv3}); + rewriter.setInsertionPointAfter(ifOp2); + rewriter.setInsertionPointAfter(forOp3); + + // HERE WE SHOULD INSERT FIND_PEAKS FOR FUSING LOOP + + AffineExpr ExprForPrev = + rewriter.getAffineDimExpr(0) - rewriter.getAffineConstantExpr(2); + AffineMap addMapForPrev = AffineMap::get(1, 0, ExprForPrev); + + AffineExpr ExprForCurrent = + rewriter.getAffineDimExpr(0) - rewriter.getAffineConstantExpr(1); + AffineMap addMapForCurrent = AffineMap::get(1, 0, ExprForCurrent); + + auto signal_prev = rewriter.create(loc, alloc, addMapForPrev, + ValueRange{iv}); + auto signal_current = rewriter.create( + loc, alloc, addMapForCurrent, ValueRange{iv}); + auto signal_next = + rewriter.create(loc, alloc, ValueRange{iv}); + + auto cmp_current_prev = rewriter.create( + loc, arith::CmpFPredicate::OGT, signal_current, signal_prev); + auto cmp_current_next = rewriter.create( + loc, arith::CmpFPredicate::OGT, signal_current, signal_next); + auto cmp_current_height = rewriter.create( + loc, arith::CmpFPredicate::OGE, signal_current, height); + + auto and_two_cmps = + rewriter.create(loc, cmp_current_prev, cmp_current_next); + auto and_three_cmps = + rewriter.create(loc, and_two_cmps, cmp_current_height); + + auto firstIfOp = + rewriter.create(loc, and_three_cmps, false /* else=1 */); + rewriter.setInsertionPointToStart(firstIfOp.thenBlock()); + + auto peaks_count = rewriter.create( + loc, alloc_peaks_count, ValueRange{}); + auto cmp_new_peak = rewriter.create( + loc, arith::CmpIPredicate::eq, peaks_count, cst_idx_zero); + + auto current_index = rewriter.create(loc, iv, cst_idx_one); + + auto secondIfOp = + rewriter.create(loc, cmp_new_peak, true /* else=1 */); + rewriter.setInsertionPointToStart(secondIfOp.thenBlock()); + Value current_index_to_ui = rewriter.create( + loc, rewriter.getIntegerType(32), current_index); + Value current_index_to_f64 = rewriter.create( + loc, rewriter.getF64Type(), current_index_to_ui); + rewriter.create(loc, current_index_to_f64, alloc_output, + ValueRange{peaks_count}); + auto peaks_count_inc = + rewriter.create(loc, peaks_count, cst_idx_one); + rewriter.create(loc, peaks_count_inc, alloc_peaks_count, + ValueRange{}); + + rewriter.setInsertionPointToStart(secondIfOp.elseBlock()); + + Value last_peaks_count = + rewriter.create(loc, peaks_count, cst_idx_one); + auto last_peak_index_fp = rewriter.create( + loc, alloc_output, ValueRange{last_peaks_count}); + Value last_peak_index_ui = rewriter.create( + loc, rewriter.getIntegerType(32), last_peak_index_fp); + Value last_peak_index = rewriter.create( + loc, rewriter.getIndexType(), last_peak_index_ui); + Value subtract_current_index_last_peak = + rewriter.create(loc, current_index, last_peak_index); + auto cmp_sub_distance = rewriter.create( + loc, arith::CmpIPredicate::sge, subtract_current_index_last_peak, + distance); + + auto thirdIfOp = + rewriter.create(loc, cmp_sub_distance, true /* else=1 */); + rewriter.setInsertionPointToStart(thirdIfOp.thenBlock()); + Value current_index_to_ui_2 = rewriter.create( + loc, rewriter.getIntegerType(32), current_index); + Value current_index_to_f64_2 = rewriter.create( + loc, rewriter.getF64Type(), current_index_to_ui_2); + rewriter.create(loc, current_index_to_f64_2, alloc_output, + ValueRange{peaks_count}); + auto peaks_count_inc_2 = + rewriter.create(loc, peaks_count, cst_idx_one); + rewriter.create(loc, peaks_count_inc_2, alloc_peaks_count, + ValueRange{}); + + rewriter.setInsertionPointAfter(forOp1); + // debug + // forOp1->dump(); + + /* Setting last element of the output as the count of peaks. */ + auto peaks_count_final = rewriter.create( + loc, alloc_peaks_count, ValueRange{}); + // index to f64 + Value peaks_count_final_to_ui = rewriter.create( + loc, rewriter.getIntegerType(32), peaks_count_final); + Value peaks_count_final_to_f64 = rewriter.create( + loc, rewriter.getF64Type(), peaks_count_final_to_ui); + + Value result_size = rewriter.create( + loc, rewriter.getIndexType(), + rewriter.getIndexAttr(tensorType.getShape()[0])); + + rewriter.create(loc, peaks_count_final_to_f64, alloc_output, + addMapForCurrent, ValueRange{result_size}); + + // auto testValue = rewriter.create( + // loc, alloc, ValueRange{cst_idx_zero}); + + // rewriter.create(loc, testValue, alloc_output, + // addMapForCurrent, ValueRange{result_size}); + + rewriter.replaceOp(op, alloc_output); + + return success(); + } +}; + +//===----------------------------------------------------------------------===// +// ToyToAffine RewritePatterns: Unary operations +//===----------------------------------------------------------------------===// + +template +struct UnaryOpLowering : public ConversionPattern { + UnaryOpLowering(MLIRContext *ctx) + : ConversionPattern(UnaryOp::getOperationName(), 1, ctx) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + auto loc = op->getLoc(); + lowerOpToLoops(op, operands, rewriter, + [loc](OpBuilder &builder, ValueRange memRefOperands, + ValueRange loopIvs) { + // Generate an adaptor for the remapped operands of the + // UnaryOp. This allows for using the nice named accessors + // that are generated by the ODS. + typename UnaryOp::Adaptor unaryAdaptor(memRefOperands); + + // Generate loads for the element of 'lhs' and 'rhs' at the + // inner loop. + auto loadedInput = builder.create( + loc, unaryAdaptor.getInput(), loopIvs); + + // Create the unary operation performed on the loaded + // values. + return builder.create(loc, loadedInput); + }); + return success(); + } +}; + +using AddOpLowering = BinaryOpLowering; +using ModuloOpLowering = BinaryOpLowering; +using SubOpLowering = BinaryOpLowering; +using MulOpLowering = BinaryOpLowering; +using DivOpLowering = BinaryOpLowering; +using AbsOpLowering = UnaryOpLowering; +using SinOpLowering = UnaryOpLowering; +using CosOpLowering = UnaryOpLowering; +using SqrtOpLowering = UnaryOpLowering; + +//===----------------------------------------------------------------------===// +// ToyToAffine RewritePatterns: Constant operations +//===----------------------------------------------------------------------===// + +struct ConstantOpLowering : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(dsp::ConstantOp op, + PatternRewriter &rewriter) const final { + DenseElementsAttr constantValue = op.getValue(); + Location loc = op.getLoc(); + + // When lowering the constant operation, we allocate and assign the constant + // values to a corresponding memref allocation. + auto tensorType = llvm::cast(op.getType()); + auto memRefType = convertTensorToMemRef(tensorType); + auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); + + // We will be generating constant indices up-to the largest dimension. + // Create these constants up-front to avoid large amounts of redundant + // operations. + auto valueShape = memRefType.getShape(); + SmallVector constantIndices; + + if (!valueShape.empty()) { + for (auto i : llvm::seq( + 0, *std::max_element(valueShape.begin(), valueShape.end()))) + constantIndices.push_back( + rewriter.create(loc, i)); + } else { + // This is the case of a tensor of rank 0. + constantIndices.push_back( + rewriter.create(loc, 0)); + } + + // The constant operation represents a multi-dimensional constant, so we + // will need to generate a store for each of the elements. The following + // functor recursively walks the dimensions of the constant shape, + // generating a store when the recursion hits the base case. + SmallVector indices; + auto valueIt = constantValue.value_begin(); + std::function storeElements = [&](uint64_t dimension) { + // The last dimension is the base case of the recursion, at this point + // we store the element at the given index. + if (dimension == valueShape.size()) { + rewriter.create( + loc, rewriter.create(loc, *valueIt++), alloc, + llvm::ArrayRef(indices)); + return; + } + + // Otherwise, iterate over the current dimension and add the indices to + // the list. + for (uint64_t i = 0, e = valueShape[dimension]; i != e; ++i) { + indices.push_back(constantIndices[i]); + storeElements(dimension + 1); + indices.pop_back(); + } + }; + + // Start the element storing recursion from the first dimension. + storeElements(/*dimension=*/0); + + // Replace this operation with the generated alloc. + rewriter.replaceOp(op, alloc); + return success(); + } +}; + +//===----------------------------------------------------------------------===// +// ToyToAffine RewritePatterns: Func operations +//===----------------------------------------------------------------------===// + +struct FuncOpLowering : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(dsp::FuncOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const final { + // We only lower the main function as we expect that all other functions + // have been inlined. + if (op.getName() != "main") + return failure(); + + // Verify that the given main has no inputs and results. + if (op.getNumArguments() || op.getFunctionType().getNumResults()) { + return rewriter.notifyMatchFailure(op, [](Diagnostic &diag) { + diag << "expected 'main' to have 0 inputs and 0 results"; + }); + } + + // Create a new non-dsp function, with the same region. + auto func = rewriter.create(op.getLoc(), op.getName(), + op.getFunctionType()); + rewriter.inlineRegionBefore(op.getRegion(), func.getBody(), func.end()); + rewriter.eraseOp(op); + return success(); + } +}; + +//===----------------------------------------------------------------------===// +// ToyToAffine RewritePatterns: Print operations +//===----------------------------------------------------------------------===// + +struct PrintOpLowering : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(dsp::PrintOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const final { + // We don't lower "dsp.print" in this pass, but we need to update its + // operands. + rewriter.modifyOpInPlace(op, + [&] { op->setOperands(adaptor.getOperands()); }); + return success(); + } +}; + +//===----------------------------------------------------------------------===// +// ToyToAffine RewritePatterns: Return operations +//===----------------------------------------------------------------------===// + +struct ReturnOpLowering : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(dsp::ReturnOp op, + PatternRewriter &rewriter) const final { + // During this lowering, we expect that all function calls have been + // inlined. + if (op.hasOperand()) + return failure(); + + // We lower "dsp.return" directly to "func.return". + rewriter.replaceOpWithNewOp(op); + return success(); + } +}; + +//===----------------------------------------------------------------------===// +// ToyToAffine RewritePatterns: Transpose operations +//===----------------------------------------------------------------------===// + +struct TransposeOpLowering : public ConversionPattern { + TransposeOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::TransposeOp::getOperationName(), 1, ctx) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + auto loc = op->getLoc(); + lowerOpToLoops(op, operands, rewriter, + [loc](OpBuilder &builder, ValueRange memRefOperands, + ValueRange loopIvs) { + // Generate an adaptor for the remapped operands of the + // TransposeOp. This allows for using the nice named + // accessors that are generated by the ODS. + dsp::TransposeOpAdaptor transposeAdaptor(memRefOperands); + Value input = transposeAdaptor.getInput(); + + // Transpose the elements by generating a load from the + // reverse indices. + SmallVector reverseIvs(llvm::reverse(loopIvs)); + return builder.create(loc, input, + reverseIvs); + }); + return success(); + } +}; + +//===----------------------------------------------------------------------===// +// ToyToAffine RewritePatterns: Transpose operations +//===----------------------------------------------------------------------===// + +struct Conv2DOpLowering : public ConversionPattern { + Conv2DOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::Conv2DOp::getOperationName(), 1, ctx) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + + auto loc = op->getLoc(); + // output mem alloc and dealloc + auto output = llvm::dyn_cast((*op->result_type_begin())); + auto outputMem = convertTensorToMemRef(output); + auto alloc = insertAllocAndDealloc(outputMem, loc, rewriter); + + Conv2DOpAdaptor conv2dAdaptor(operands); + Value input = conv2dAdaptor.getInput(); + Value kernel = conv2dAdaptor.getKernel(); + + // ranked tensor type + auto inputType = + llvm::dyn_cast(op->getOperand(0).getType()); + auto kernelType = + llvm::dyn_cast(op->getOperand(1).getType()); + + ArrayRef inputShape = inputType.getShape(); + ArrayRef kernelShape = kernelType.getShape(); + + // input layout + int64_t IH = inputShape[0]; + int64_t IW = inputShape[1]; + + // kernel layout + int64_t KH = kernelShape[0]; + int64_t KW = kernelShape[1]; + + // output layout + ArrayRef outputShape = output.getShape(); + int64_t OH = outputShape[0]; + int64_t OW = outputShape[1]; + + AffineExpr d0, d1, d2, d3; // declare affine expression: i, j, p, q + bindDims( + rewriter.getContext(), d0, d1, d2, + d3); // bind affine expr d0, d1 to current input dimension i, j, p, q + + // input affine map + AffineMap inputMap = AffineMap::get( + 4, 0, ArrayRef{d0 + d2, d1 + d3}, rewriter.getContext()); + // kernel affine map + AffineMap kernelMap = AffineMap::get(4, 0, ArrayRef{d2, d3}, + rewriter.getContext()); + + // loops + int64_t lb = 0, step = 1; + /* looping i*/ + AffineForOp forOpI = rewriter.create(loc, lb, OH, step); + rewriter.setInsertionPointToStart(forOpI.getBody()); + auto ivI = forOpI.getInductionVar(); + + /* looping j*/ + AffineForOp forOpJ = rewriter.create(loc, lb, OW, step); + rewriter.setInsertionPointToStart(forOpJ.getBody()); + auto ivJ = forOpJ.getInductionVar(); + + // initilize output val + Value zeroVal = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + rewriter.create(loc, zeroVal, alloc, ValueRange{ivI, ivJ}); + + /* looping p*/ + AffineForOp forOpP = rewriter.create(loc, lb, KH, step); + rewriter.setInsertionPointToStart(forOpP.getBody()); + auto ivP = forOpP.getInductionVar(); + + /* looping q*/ + AffineForOp forOpQ = rewriter.create(loc, lb, KW, step); + rewriter.setInsertionPointToStart(forOpQ.getBody()); + auto ivQ = forOpQ.getInductionVar(); + + // input bound check + Value inputRow = rewriter.create( + loc, inputMap.getSubMap(0), ValueRange{ivI, ivJ, ivP, ivQ}); + Value inputCol = rewriter.create( + loc, inputMap.getSubMap(1), ValueRange{ivI, ivJ, ivP, ivQ}); + Value rowUB = rewriter.create( + loc, arith::CmpIPredicate::slt, inputRow, + rewriter.create(loc, IH)); + Value colUB = rewriter.create( + loc, arith::CmpIPredicate::slt, inputCol, + rewriter.create(loc, IW)); + Value bound = rewriter.create(loc, rowUB, colUB); + + // bound condition + rewriter.create( + loc, bound, [&](OpBuilder &builder, Location loc) { + // load input + Value inputVal = builder.create( + loc, input, inputMap, ValueRange{ivI, ivJ, ivP, ivQ}); + Value kernelVal = builder.create( + loc, kernel, kernelMap, ValueRange{ivI, ivJ, ivP, ivQ}); + // mul + Value prod = builder.create(loc, inputVal, kernelVal); + Value outputVal = + builder.create(loc, alloc, ValueRange{ivI, ivJ}); + Value sum = builder.create(loc, prod, outputVal); + + // store the computed output + builder.create(loc, sum, alloc, ValueRange{ivI, ivJ}); + + builder.create(loc); + }); + + rewriter.replaceOp(op, alloc); + + return success(); + } +}; // conv2d + +//===----------------------------------------------------------------------===// +// ToyToAffine RewritePatterns: ThresholdUpOpLowering operations +//===----------------------------------------------------------------------===// + +struct ThresholdUpOpLowering : public ConversionPattern { + ThresholdUpOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::ThresholdUpOp::getOperationName(), 1, ctx) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + auto loc = op->getLoc(); + + // Pseudo-code: + // y[n] = 1 , if a[i] >= threshld + // = 0 , else + + // output for result type + auto tensorType = llvm::cast((*op->result_type_begin())); + + // allocation & deallocation for the result of this operation + auto memRefType = convertTensorToMemRef(tensorType); + + auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); + + Value constant0 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + + Value constant1 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1)); + + // y[n] = a[n] , if a[i] >= threshld + // loop from 0 to len + + // load from X, + ThresholdUpOpAdaptor thresholdUpAdaptor(operands); + auto input = thresholdUpAdaptor.getInput(); + auto thresholdMemRef = thresholdUpAdaptor.getThreshold(); + auto returnOriginalMemRef = thresholdUpAdaptor.getReturnoriginal(); + + int64_t lb = 0; + int64_t ub = tensorType.getShape()[0]; + int64_t step = 1; + + // for loop from 0 to len(Output) + affine::AffineForOp forOpY = + rewriter.create(loc, lb, ub, step); + rewriter.setInsertionPointToStart(forOpY.getBody()); + auto ivY = forOpY.getInductionVar(); + + Value inputX = + rewriter.create(loc, input, ValueRange{ivY}); + + // Load the threshold value from the memref + auto threshold = + rewriter.create(loc, thresholdMemRef, ValueRange{}); + auto returnOriginal = + rewriter.create(loc, returnOriginalMemRef, ValueRange{}); + + // Compare a[i] >= threshold + auto cmp1 = rewriter.create(loc, arith::CmpFPredicate::OGE, + inputX, threshold); + // Compare if return original is true or false and return 1 or original + // value + auto cmpro = rewriter.create(loc, arith::CmpFPredicate::OEQ, + constant1, returnOriginal); + + // Use select to choose between inputX and 1 + auto selectreturn = + rewriter.create(loc, cmpro, inputX, constant1); + + // Use select to choose between 0 and selectreturn + auto selectOp = + rewriter.create(loc, cmp1, selectreturn, constant0); + + // Store the result + rewriter.create(loc, selectOp, alloc, ValueRange{ivY}); + + rewriter.setInsertionPointAfter(forOpY); + // debug + // forOpY->dump(); + // affine.store %cst, %alloc_10[] : memref + // %0 = affine.load %alloc_11[4] : memref<10xf64> + // affine.store %0, %alloc[0] : memref<1xf64> + + rewriter.replaceOp(op, alloc); + + return success(); + } +}; + +//===----------------------------------------------------------------------===// +// ToyToAffine RewritePatterns: GenerateDTMFOpLowering operations +//===----------------------------------------------------------------------===// + +struct GenerateDTMFOpLowering : public ConversionPattern { + GenerateDTMFOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::GenerateDTMFOp::getOperationName(), 1, ctx) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + auto loc = op->getLoc(); + + auto tensorType = llvm::cast((*op->result_type_begin())); + auto memRefType = convertTensorToMemRef(tensorType); + auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); + + GenerateDTMFOpAdaptor generatedtmfAdaptor(operands); + std::vector> freqPairs = { + {941, 1336}, {697, 1209}, {697, 1336}, {697, 1477}, {770, 1209}, + {770, 1336}, {770, 1477}, {852, 1209}, {852, 1336}, {852, 1477}}; + + auto GetDigitInput = op->getOperand(0); + auto inputvl = GetDigitInput.getDefiningOp(); + auto inputvalue = inputvl.getValue(); + auto elements1 = inputvalue.getValues(); + float input = elements1[0].getValueAsDouble(); + + auto GetDurationOp = op->getOperand(1); + auto constantOp2ndArg = GetDurationOp.getDefiningOp(); + auto constant2ndValue = constantOp2ndArg.getValue(); + auto elements2 = constant2ndValue.getValues(); + float duration = elements2[0].getValueAsDouble(); + + auto GetFreqOp = op->getOperand(2); + auto constantOp3rdArg = GetFreqOp.getDefiningOp(); + auto constant3rdValue = constantOp3rdArg.getValue(); + auto elements3 = constant3rdValue.getValues(); + float freq = elements3[0].getValueAsDouble(); + + const std::vector &pair = freqPairs[input]; + auto f1 = pair[0]; + auto f2 = pair[1]; + auto ub = tensorType.getShape()[0]; + auto step = 1; + + // Create constants + auto const2pi = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(6.28318530718)); + auto const10 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(10)); + auto constFs = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(freq)); + auto constF1 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(f1)); + auto constF2 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(f2)); + + // Create a loop to generate the DTMF tone + auto forOp = rewriter.create( + loc, rewriter.create(loc, 0), + rewriter.create(loc, ub), + rewriter.create(loc, 1)); + + rewriter.setInsertionPointToStart(forOp.getBody()); + + // Get the loop induction variable + auto iv = forOp.getInductionVar(); + + // Convert loop index to time + auto indexToI64 = + rewriter.create(loc, rewriter.getI64Type(), iv); + auto indexToFloat = rewriter.create( + loc, rewriter.getF64Type(), indexToI64); + auto time = rewriter.create(loc, indexToFloat, constFs); + + // Generate sine wave for f1 + auto mulFreqTime1 = rewriter.create(loc, constF1, time); + auto mul2Pi1 = rewriter.create(loc, const2pi, mulFreqTime1); + auto sine1 = rewriter.create(loc, mul2Pi1); + + // Generate sine wave for f2 + auto mulFreqTime2 = rewriter.create(loc, constF2, time); + auto mul2Pi2 = rewriter.create(loc, const2pi, mulFreqTime2); + auto sine2 = rewriter.create(loc, mul2Pi2); + + // Combine the two sine waves + auto sumSines = rewriter.create(loc, sine1, sine2); + auto scaledSum = rewriter.create(loc, const10, sumSines); + + // Store the result in the allocated memref + rewriter.create(loc, scaledSum, alloc, iv); + + rewriter.setInsertionPointAfter(forOp); + + rewriter.replaceOp(op, alloc); + + return success(); + } +}; + +//===----------------------------------------------------------------------===// +// ToyToAffine RewritePatterns: FFTFreqOpLowering operations +//===----------------------------------------------------------------------===// + +struct FFTFreqOpLowering : public ConversionPattern { + FFTFreqOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::FFTFreqOp::getOperationName(), 1, ctx) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + auto loc = op->getLoc(); + + // Get the result type of the operation + auto tensorType = llvm::cast((*op->result_type_begin())); + auto memRefType = convertTensorToMemRef(tensorType); + auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); + + // Extract the operands + auto n = op->getOperand(0); + auto nArg = n.getDefiningOp(); + auto nValue = nArg.getValue(); + auto elements0 = nValue.getValues(); + float nDouble = elements0[0].getValueAsDouble(); + + auto d = op->getOperand(1); + auto dArg = d.getDefiningOp(); + auto dValue = dArg.getValue(); + auto elements1 = dValue.getValues(); + float dDouble = elements1[0].getValueAsDouble(); + + // Create constants + auto constN = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(nDouble)); + auto constD = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(dDouble)); + + auto lb = rewriter.create(loc, 0); + auto ub = + rewriter.create(loc, tensorType.getShape()[0]); + auto step = rewriter.create(loc, 1); + + auto NtimesD = rewriter.create(loc, constN, constD); + auto half = rewriter.create(loc, llvm::APFloat(0.5), + rewriter.getF64Type()); + auto one = rewriter.create(loc, llvm::APFloat(1.0), + rewriter.getF64Type()); + auto nPlusOne = rewriter.create(loc, constN, one); + auto nPlusOneByTwo = rewriter.create(loc, nPlusOne, half); + + auto forOp = rewriter.create(loc, lb, ub, step); + rewriter.setInsertionPointToStart(forOp.getBody()); + auto iv = forOp.getInductionVar(); + auto ivInt = + rewriter.create(loc, rewriter.getI64Type(), iv); + auto ivFloat = + rewriter.create(loc, rewriter.getF64Type(), ivInt); + + auto ifCondition = rewriter.create( + loc, arith::CmpFPredicate::OLE, ivFloat, nPlusOneByTwo); + auto ifOp = rewriter.create( + loc, TypeRange{rewriter.getF64Type()}, ifCondition, true); + + rewriter.setInsertionPointToStart(ifOp.thenBlock()); + auto freq = rewriter.create(loc, ivFloat, NtimesD); + rewriter.create(loc, freq, alloc, ValueRange{iv}); + rewriter.create(loc, ValueRange{freq}); + + rewriter.setInsertionPointToStart(ifOp.elseBlock()); + auto ivminusN = rewriter.create(loc, ivFloat, constN); + auto negfreq = rewriter.create(loc, ivminusN, NtimesD); + rewriter.create(loc, negfreq, alloc, ValueRange{iv}); + rewriter.create(loc, ValueRange{negfreq}); + + rewriter.setInsertionPointAfter(ifOp); + + rewriter.replaceOp(op, alloc); + + return success(); + } +}; + +//===----------------------------------------------------------------------===// +// ToyToAffine RewritePatterns: FindDominantPeaksOpLowering operations +//===----------------------------------------------------------------------===// + +struct FindDominantPeaksOpLowering : public ConversionPattern { + FindDominantPeaksOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::FindDominantPeaksOp::getOperationName(), 1, + ctx) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + auto loc = op->getLoc(); + auto tensorType = llvm::cast((*op->result_type_begin())); + auto memRefType = convertTensorToMemRef(tensorType); + auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); + + auto frequencyOperand = op->getOperand(0); + auto frequenciesType = + llvm::dyn_cast(frequencyOperand.getType()); + auto frequenciesLength = frequenciesType.getNumElements(); + + auto frequenciesLengthIndex = rewriter.create(loc, frequenciesLength); + auto frequenciesLengthI64 = rewriter.create(loc, rewriter.getI64Type(), frequenciesLengthIndex); + + auto frequenciesLengthF64 = rewriter.create(loc, + rewriter.getF64Type(), // frequenciesLength); + frequenciesLengthI64 + ); + + auto two = rewriter.create(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(2.0)); + + auto frequenciesHalfLength = rewriter.create(loc, frequenciesLengthF64, two); + + auto frequenciesHalfLengthI32 = rewriter.create(loc, rewriter.getIntegerType(32), frequenciesHalfLength); + auto frequenciesHalfLengthIndex = rewriter.create(loc, rewriter.getIndexType(), frequenciesHalfLengthI32); + // Value length_ui = rewriter.create( + // loc, rewriter.getIntegerType(32), loadedLength); + // Value length_index = rewriter.create( + // loc, rewriter.getIndexType(), length_ui); + + FindDominantPeaksOpAdaptor findDominantPeaksOpAdaptor(operands); + auto frequencies = findDominantPeaksOpAdaptor.getFrequencies(); + auto magnitudes = findDominantPeaksOpAdaptor.getMagnitudes(); + + // Initialize variables to track the two highest magnitudes and their + // corresponding frequencies + auto max1 = rewriter.create(loc, llvm::APFloat(0.0), + rewriter.getF64Type()); + auto max2 = rewriter.create(loc, llvm::APFloat(0.0), + rewriter.getF64Type()); + auto freq1 = rewriter.create( + loc, llvm::APFloat(0.0), rewriter.getF64Type()); + auto freq2 = rewriter.create( + loc, llvm::APFloat(0.0), rewriter.getF64Type()); + + auto lb = rewriter.create(loc, 0); + auto ub = frequenciesHalfLengthIndex; // rewriter.create(loc, frequenciesLength); + auto step = rewriter.create(loc, 1); + + auto forOp = rewriter.create( + loc, lb, ub, step, ValueRange{max1, max2, freq1, freq2}); + rewriter.setInsertionPointToStart(forOp.getBody()); + auto iv = forOp.getInductionVar(); + // Load current frequency and magnitude + auto currentFreq = + rewriter.create(loc, frequencies, ValueRange{iv}); + auto currentMag = + rewriter.create(loc, magnitudes, ValueRange{iv}); + + // Check if frequency is positive + auto zero = rewriter.create(loc, llvm::APFloat(0.0), + rewriter.getF64Type()); + auto isPositive = rewriter.create( + loc, arith::CmpFPredicate::OGE, currentFreq, zero); + + // Create if operation for positive frequency check + auto ifOp = rewriter.create(loc, forOp.getResultTypes(), + isPositive, true); + rewriter.setInsertionPointToStart(&ifOp.getThenRegion().front()); + // Compare current magnitude with max1 + auto cmpMax1 = rewriter.create( + loc, arith::CmpFPredicate::OGT, currentMag, + forOp.getRegionIterArgs()[0]); + auto ifMax1 = + rewriter.create(loc, forOp.getResultTypes(), cmpMax1, true); + + rewriter.setInsertionPointToStart(&ifMax1.getThenRegion().front()); + // Update max2 and freq2 with previous max1 and freq1 + auto newMax2 = forOp.getRegionIterArgs()[0]; + auto newFreq2 = forOp.getRegionIterArgs()[2]; + // Update max1 and freq1 with current values + auto newMax1 = currentMag; + auto newFreq1 = currentFreq; + rewriter.create( + loc, ValueRange({newMax1, newMax2, newFreq1, newFreq2})); + + rewriter.setInsertionPointToStart(&ifMax1.getElseRegion().front()); + // Compare current magnitude with max2 + auto cmpMax2 = rewriter.create( + loc, arith::CmpFPredicate::OGT, currentMag, + forOp.getRegionIterArgs()[1]); + auto ifMax2 = + rewriter.create(loc, forOp.getResultTypes(), cmpMax2, true); + + rewriter.setInsertionPointToStart(&ifMax2.getThenRegion().front()); + // Update max2 and freq2 with current values + rewriter.create( + loc, ValueRange{forOp.getRegionIterArgs()[0], currentMag, + forOp.getRegionIterArgs()[2], currentFreq}); + + rewriter.setInsertionPointToStart(&ifMax2.getElseRegion().front()); + // No update, yield original values + rewriter.create(loc, forOp.getRegionIterArgs()); + + rewriter.setInsertionPointAfter(ifMax2); + rewriter.create(loc, ifMax2.getResults()); + + rewriter.setInsertionPointAfter(ifMax1); + rewriter.create(loc, ifMax1.getResults()); + + rewriter.setInsertionPointToStart(&ifOp.getElseRegion().front()); + // No update for negative frequencies, yield original values + rewriter.create(loc, forOp.getRegionIterArgs()); + + rewriter.setInsertionPointAfter(ifOp); + rewriter.create(loc, ifOp.getResults()); + + rewriter.setInsertionPointAfter(forOp); + + // Compare freq1 and freq2 to determine the order + auto cmpFreq = rewriter.create( + loc, arith::CmpFPredicate::OLT, forOp.getResult(2), forOp.getResult(3)); + + auto ifFreq = rewriter.create( + loc, TypeRange{rewriter.getF64Type(), rewriter.getF64Type()}, cmpFreq, + true); + + rewriter.setInsertionPointToStart(&ifFreq.getThenRegion().front()); + // freq1 < freq2, so keep the order + rewriter.create( + loc, ValueRange{forOp.getResult(2), forOp.getResult(3)}); + + rewriter.setInsertionPointToStart(&ifFreq.getElseRegion().front()); + // freq1 >= freq2, so swap the order + rewriter.create( + loc, ValueRange{forOp.getResult(3), forOp.getResult(2)}); + + rewriter.setInsertionPointAfter(ifFreq); + + // Store the two highest peak frequencies in the result memref, now in the + // correct order + auto storeFreq1 = rewriter.create( + loc, ifFreq.getResult(0), alloc, + ValueRange{rewriter.create(loc, 0)}); + auto storeFreq2 = rewriter.create( + loc, ifFreq.getResult(1), alloc, + ValueRange{rewriter.create(loc, 1)}); + rewriter.replaceOp(op, alloc); + + return success(); + } +}; + +//===----------------------------------------------------------------------===// +// ToyToAffine RewritePatterns: RecoverDTMFDigitOpLowering operations +//===----------------------------------------------------------------------===// + +struct RecoverDTMFDigitOpLowering : public ConversionPattern { + RecoverDTMFDigitOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::RecoverDTMFDigitOp::getOperationName(), 1, ctx) { + } + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + auto loc = op->getLoc(); + + auto tensorType = llvm::cast((*op->result_type_begin())); + auto memRefType = convertTensorToMemRef(tensorType); + auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); + + auto indexMemRefType = MemRefType::get({}, rewriter.getIndexType()); + auto finalMatchIndex_alloc = + insertAllocAndDealloc(indexMemRefType, loc, rewriter); + + RecoverDTMFDigitOpAdaptor recoverDTMFDigitOpAdaptor(operands); + + auto frequencies = recoverDTMFDigitOpAdaptor.getFrequencies(); + auto freqPairs = recoverDTMFDigitOpAdaptor.getFreqPairs(); + + auto highFreqIndex = rewriter.create(loc, 0); + auto lowFreqIndex = rewriter.create(loc, 1); + + auto highFreq = rewriter.create(loc, frequencies, + ValueRange{highFreqIndex}); + auto lowFreq = rewriter.create(loc, frequencies, + ValueRange{lowFreqIndex}); + + auto initialMatchIndex = rewriter.create(loc, -1); + rewriter.create(loc, initialMatchIndex, + finalMatchIndex_alloc, ValueRange{}); + + auto tolerance = rewriter.create( + loc, llvm::APFloat(3.0), rewriter.getF64Type()); + + auto lb = rewriter.create(loc, 0); + auto ub = rewriter.create(loc, 10); + auto step = rewriter.create(loc, 1); + + auto forOp = rewriter.create(loc, lb, ub, step); + rewriter.setInsertionPointToStart(forOp.getBody()); + auto iv = forOp.getInductionVar(); + + auto matchIndex = rewriter.create( + loc, finalMatchIndex_alloc, ValueRange{}); + + auto highFreqOg = rewriter.create( + loc, freqPairs, ValueRange{iv, highFreqIndex}); + auto lowFreqOg = rewriter.create( + loc, freqPairs, ValueRange{iv, lowFreqIndex}); + + auto highFreqDiff = + rewriter.create(loc, highFreqOg, highFreq); + auto lowFreqDiff = rewriter.create(loc, lowFreqOg, lowFreq); + + auto absHighFreqDiff = rewriter.create(loc, highFreqDiff); + auto absLowFreqDiff = rewriter.create(loc, lowFreqDiff); + + auto highFreqMatch = rewriter.create( + loc, arith::CmpFPredicate::OLE, absHighFreqDiff, tolerance); + auto lowFreqMatch = rewriter.create( + loc, arith::CmpFPredicate::OLE, absLowFreqDiff, tolerance); + auto bothMatch = + rewriter.create(loc, highFreqMatch, lowFreqMatch); + + auto newMatchIndex = + rewriter.create(loc, bothMatch, iv, matchIndex); + + rewriter.create(loc, newMatchIndex, finalMatchIndex_alloc, + ValueRange{}); + + rewriter.setInsertionPointAfter(forOp); + + auto finalMatchIndex = rewriter.create( + loc, finalMatchIndex_alloc, ValueRange{}); + + auto finalMatchIndexI64 = rewriter.create( + loc, rewriter.getI64Type(), finalMatchIndex); + auto finalMatchIndexF64 = rewriter.create( + loc, rewriter.getF64Type(), finalMatchIndexI64); + + auto zero = rewriter.create(loc, 0); + rewriter.create(loc, finalMatchIndexF64, alloc, + ValueRange{zero}); + + rewriter.replaceOp(op, alloc); + + return success(); + } +}; + +// Store finalMatchIndexF64 into alloc +// auto zero = rewriter.create(loc, 0); +// rewriter.create(loc, finalMatchIndexF64, alloc, +// ValueRange{zero}); + +//===----------------------------------------------------------------------===// +// ToyToAffine RewritePatterns: GenerateVoiceSignatureOpLowering operations +//===----------------------------------------------------------------------===// + +struct GenerateVoiceSignatureOpLowering : public ConversionPattern { + GenerateVoiceSignatureOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::GenerateVoiceSignatureOp::getOperationName(), 1, + ctx) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + auto loc = op->getLoc(); + auto tensorType = llvm::cast((*op->result_type_begin())); + auto memRefType = convertTensorToMemRef(tensorType); + auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); + + auto GetF1Op = op->getOperand(0); + auto constantOp0thArg = GetF1Op.getDefiningOp(); + auto constant0thValue = constantOp0thArg.getValue(); + auto elements0 = constant0thValue.getValues(); + float f1 = elements0[0].getValueAsDouble(); + + auto GetF2Op = op->getOperand(1); + auto constantOp1stArg = GetF2Op.getDefiningOp(); + auto constant1stValue = constantOp1stArg.getValue(); + auto elements1 = constant1stValue.getValues(); + float f2 = elements1[0].getValueAsDouble(); + + auto GetDurationOp = op->getOperand(2); + auto constantOp2ndArg = GetDurationOp.getDefiningOp(); + auto constant2ndValue = constantOp2ndArg.getValue(); + auto elements2 = constant2ndValue.getValues(); + float duration = elements2[0].getValueAsDouble(); + + auto GetFreqOp = op->getOperand(3); + auto constantOp3rdArg = GetFreqOp.getDefiningOp(); + auto constant3rdValue = constantOp3rdArg.getValue(); + auto elements3 = constant3rdValue.getValues(); + float freq = elements3[0].getValueAsDouble(); + + auto lb = rewriter.create(loc, 0); + auto ub = + rewriter.create(loc, tensorType.getShape()[0]); + auto step = rewriter.create(loc, 1); + + // Create constants + auto const2pi = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(6.28318530718)); + auto const05 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0.5)); + auto constFs = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(freq)); + auto constF1 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(f1)); + auto constF2 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(f2)); + + // Create a loop to generate the DTMF tone + auto forOp = rewriter.create(loc, lb, ub, step); + rewriter.setInsertionPointToStart(forOp.getBody()); + // Get the loop induction variable + auto iv = forOp.getInductionVar(); + + // Convert loop index to time + auto indexToI64 = + rewriter.create(loc, rewriter.getI64Type(), iv); + auto indexToFloat = rewriter.create( + loc, rewriter.getF64Type(), indexToI64); + auto time = rewriter.create(loc, indexToFloat, constFs); + + // Generate sine wave for f1 + auto mulFreqTime1 = rewriter.create(loc, constF1, time); + auto mul2Pi1 = rewriter.create(loc, const2pi, mulFreqTime1); + auto sine1 = rewriter.create(loc, mul2Pi1); + + // Generate sine wave for f2 + auto mulFreqTime2 = rewriter.create(loc, constF2, time); + auto mul2Pi2 = rewriter.create(loc, const2pi, mulFreqTime2); + auto sine2 = rewriter.create(loc, mul2Pi2); + + // Combine the two sine waves + auto sumSines = rewriter.create(loc, sine1, sine2); + // auto scaledSum = rewriter.create(loc, const05, sumSines); + + // Store the result in the allocated memref + rewriter.create(loc, sumSines, alloc, iv); + + rewriter.setInsertionPointAfter(forOp); + + rewriter.replaceOp(op, alloc); + + return success(); + } +}; + +//===----------------------------------------------------------------------===// +// ToyToAffine RewritePatterns: FFTCombineOpLowering operations +//===----------------------------------------------------------------------===// + +struct FFTCombineOpLowering : public ConversionPattern { + FFTCombineOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::FFTCombineOp::getOperationName(), 1, ctx) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + auto loc = op->getLoc(); + auto tensorType = llvm::cast((*op->result_type_begin())); + auto memRefType = convertTensorToMemRef(tensorType); + auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); + + FFTCombineOpAdaptor fftCombineOpAdaptor(operands); + + auto real = fftCombineOpAdaptor.getReal(); + auto imag = fftCombineOpAdaptor.getImag(); + + auto lb = rewriter.create(loc, 0); + auto ub = + rewriter.create(loc, tensorType.getShape()[0]); + auto step = rewriter.create(loc, 1); + + auto forOp = rewriter.create(loc, lb, ub, step); + rewriter.setInsertionPointToStart(forOp.getBody()); + auto iv = forOp.getInductionVar(); + + auto realInput = rewriter.create(loc, real, ValueRange{iv}); + auto imagInput = rewriter.create(loc, imag, ValueRange{iv}); + auto realInputSquared = + rewriter.create(loc, realInput, realInput); + auto imagInputSquared = + rewriter.create(loc, imagInput, imagInput); + auto sum = + rewriter.create(loc, realInputSquared, imagInputSquared); + auto root = rewriter.create(loc, sum); + + rewriter.create(loc, root, alloc, ValueRange{iv}); + + rewriter.setInsertionPointAfter(forOp); + + rewriter.replaceOp(op, alloc); + + return success(); + } +}; + +// Store finalMatchIndexF64 into alloc +// auto zero = rewriter.create(loc, 0); +// rewriter.create(loc, finalMatchIndexF64, alloc, +// ValueRange{zero}); + +struct QamModulateRealOpLowering : public ConversionPattern { + QamModulateRealOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::QamModulateRealOp::getOperationName(), 1, ctx) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + auto loc = op->getLoc(); + + auto output = llvm::cast((*op->result_type_begin())); + auto outputMem = convertTensorToMemRef(output); + auto alloc = insertAllocAndDealloc(outputMem, loc, rewriter); + + QamModulateRealOpAdaptor adaptor(operands); + Value signal = adaptor.getSignal(); + + llvm::ArrayRef outputShape = output.getShape(); + + // constant vals; + Value negOneVal = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(-1)); + Value zeroVal = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + Value oneVal = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1)); + + // get i*2 from input signal + AffineExpr realExpr = rewriter.getAffineDimExpr(0) * rewriter.getAffineConstantExpr(2); + + // real affine map + AffineMap signalMap = AffineMap::get(1, 0, realExpr); + + // loops + int64_t lb = 0, step = 1, ub = outputShape[0]; + /* looping i*/ + AffineForOp forOpI = rewriter.create(loc, lb, ub, step); + rewriter.setInsertionPointToStart(forOpI.getBody()); + auto ivI = forOpI.getInductionVar(); + + // input bound check + Value signalNum = + rewriter.create(loc, signal, signalMap, ValueRange{ivI}); + + Value zeroReal = rewriter.create( + loc, arith::CmpFPredicate::OEQ, signalNum, zeroVal); + + Value out = + rewriter.create(loc, zeroReal, negOneVal, oneVal); + + rewriter.create(loc, out, alloc, ValueRange{ivI}); + + rewriter.setInsertionPointAfter(forOpI); + rewriter.replaceOp(op, alloc); + + return success(); + } +}; + +struct QamModulateImgOpLowering : public ConversionPattern { + QamModulateImgOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::QamModulateImgOp::getOperationName(), 1, ctx) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + auto loc = op->getLoc(); + + auto output = llvm::cast((*op->result_type_begin())); + auto outputMem = convertTensorToMemRef(output); + auto alloc = insertAllocAndDealloc(outputMem, loc, rewriter); + + QamModulateImgOpAdaptor adaptor(operands); + Value signal = adaptor.getSignal(); + + llvm::ArrayRef outputShape = output.getShape(); + + // constant vals; + Value negOneVal = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(-1)); + Value zeroVal = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + Value oneVal = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1)); + + AffineExpr imgExpr = rewriter.getAffineDimExpr(0) * rewriter.getAffineConstantExpr(2) + rewriter.getAffineConstantExpr(1); + + // real affine map + AffineMap signalMap = AffineMap::get(1, 0, imgExpr); + // loops + int64_t lb = 0, step = 1, ub = outputShape[0]; + /* looping i*/ + AffineForOp forOpI = rewriter.create(loc, lb, ub, step); + rewriter.setInsertionPointToStart(forOpI.getBody()); + auto ivI = forOpI.getInductionVar(); + + // input bound check + Value signalNum = + rewriter.create(loc, signal, signalMap, ValueRange{ivI}); + + Value zeroReal = rewriter.create( + loc, arith::CmpFPredicate::OEQ, signalNum, zeroVal); + + Value out = + rewriter.create(loc, zeroReal, negOneVal, oneVal); + + rewriter.create(loc, out, alloc, ValueRange{ivI}); + + rewriter.setInsertionPointAfter(forOpI); + rewriter.replaceOp(op, alloc); + + return success(); + } +}; +//===----------------------------------------------------------------------===// +// ToyToAffine RewritePatterns: QAM demodulate operations +//===----------------------------------------------------------------------===// +// #define DUMP(x) llvm::errs() << x << "\n"; + +struct QamDemodulateOpLowering : public ConversionPattern { + QamDemodulateOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::QamDemodulateOp::getOperationName(), 1, ctx) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + + auto loc = op->getLoc(); + // output mem alloc and dealloc + auto output = llvm::cast((*op->result_type_begin())); + auto outputMem = convertTensorToMemRef(output); + auto alloc = insertAllocAndDealloc(outputMem, loc, rewriter); + + QamDemodulateOpAdaptor qamDemodualteAdaptor(operands); + Value realVal = qamDemodualteAdaptor.getReal(); + Value imgVal = qamDemodualteAdaptor.getImagine(); + + // ranked tensor type + auto realType = + llvm::cast(op->getOperand(0).getType()); + + llvm::ArrayRef realShape = realType.getShape(); + + // constant vals; + Value negOneVal = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(-1)); + Value zeroVal = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + Value oneVal = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1)); + + AffineExpr signalExpr = rewriter.getAffineDimExpr(0).floorDiv(2); + AffineExpr outputExpr = rewriter.getAffineDimExpr(0) + rewriter.getAffineConstantExpr(1); + + // output affine map + AffineMap signalMap = AffineMap::get(1, 0, signalExpr); + AffineMap outputMap = AffineMap::get(1, 0, outputExpr); + + // loops + int64_t lb = 0, step = 2, ub = output.getShape()[0]; + /* looping i*/ + AffineForOp forOpI = rewriter.create(loc, lb, ub, step); + rewriter.setInsertionPointToStart(forOpI.getBody()); + auto ivI = forOpI.getInductionVar(); + + // input bound check + Value realNum = + rewriter.create(loc, realVal, signalMap, ValueRange{ivI}); + Value imgNum = + rewriter.create(loc, imgVal, signalMap, ValueRange{ivI}); + + Value negReal = rewriter.create( + loc, arith::CmpFPredicate::OEQ, realNum, negOneVal); + Value negImagine = rewriter.create( + loc, arith::CmpFPredicate::OEQ, imgNum, negOneVal); + + Value out1 = + rewriter.create(loc, negReal, zeroVal, oneVal); + Value out2 = + rewriter.create(loc, negImagine, zeroVal, oneVal); + + rewriter.create(loc, out1, alloc, ValueRange{ivI}); + rewriter.create(loc, out2, alloc, outputMap, ValueRange{ivI}); + + rewriter.setInsertionPointAfter(forOpI); + rewriter.replaceOp(op, alloc); + + return success(); + } +}; // qam_demodulate op + +//===----------------------------------------------------------------------===// +// ToyToAffine RewritePatterns: BeamForm operations +//===----------------------------------------------------------------------===// + +struct BeamFormOpLowering : public ConversionPattern { + BeamFormOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::BeamFormOp::getOperationName(), 1, ctx) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + auto loc = op->getLoc(); + auto beamFormOp = llvm::cast(op); + + // allocating space for output + auto output = llvm::cast((*op->result_type_begin())); + auto outputMemRefType = convertTensorToMemRef(output); + auto alloc = insertAllocAndDealloc(outputMemRefType, loc, rewriter); + + BeamFormOpAdaptor beamFormAdaptor(operands); + auto time = beamFormAdaptor.getTime(); + auto weights = beamFormAdaptor.getWeights(); + + // allocating space for internal generated signals + int64_t timeDim = output.getShape()[0]; // dry run: 9 + int64_t antennas = beamFormOp.getAntennas(); + int64_t frequency = beamFormOp.getFreq(); + + llvm::SmallVector signalShapeVec{antennas, timeDim}; + llvm::ArrayRef signalShape(signalShapeVec); + + auto signalType = output.clone(signalShape, output.getElementType()); + auto signalMemRefType = convertTensorToMemRef(signalType); + auto allocSignal = insertAllocAndDealloc(signalMemRefType, loc, rewriter); + + AffineExpr d0, d1; // i, j for generated signal dimension + bindDims(rewriter.getContext(), d0, d1); + + // generated input map + AffineMap genInputMap = + AffineMap::get(2 /* dim */, 0 /* sym */, ArrayRef{d1, d0}, + rewriter.getContext()); + // time affine map + AffineMap timeMap = + AffineMap::get(2 /* dim */, 0 /* sym */, ArrayRef{d1}, + rewriter.getContext()); + + // // output map + // AffineMap outputMap = + // AffineMap::get(2, 0, ArrayRef{d0}, rewriter.getContext()); + + auto pi = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(3.1415926)); + auto zero = rewriter.create(loc, rewriter.getF64Type(), + rewriter.getF64FloatAttr(0)); + auto one = rewriter.create(loc, rewriter.getF64Type(), + rewriter.getF64FloatAttr(1)); + auto two = rewriter.create(loc, rewriter.getF64Type(), + rewriter.getF64FloatAttr(2)); + auto four = rewriter.create(loc, rewriter.getF64Type(), + rewriter.getF64FloatAttr(4)); + auto two_pi = rewriter.create(loc, pi, two); // 2 * pi + auto freq_val = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(frequency)); + auto phase_var = + rewriter.create(loc, two_pi, freq_val); // 2*pi*freq + + // for loop from 0 to phase + int64_t lb = 0, ub = antennas, step = 1; + affine::AffineForOp forOpI = + rewriter.create(loc, lb, ub, step, ValueRange{zero}); + auto ivI = forOpI.getInductionVar(); // i : phase + rewriter.setInsertionPointToStart(forOpI.getBody()); + + // get the induction var to phase variable + auto floatI = forOpI.getBody()->getArgument(1); + + auto iter_tmp = rewriter.create(loc, floatI, pi); // i * pi + auto iter_args = + rewriter.create(loc, iter_tmp, four); // i*pi/4 + + // for loop from 0 to timeDim + ub = timeDim; + affine::AffineForOp forOpJ = + rewriter.create(loc, lb, ub, step); + auto ivJ = forOpJ.getInductionVar(); // i : phase + rewriter.setInsertionPointToStart(forOpJ.getBody()); + + // loop body + auto time_var = + rewriter.create(loc, time, timeMap, ValueRange{ivI, ivJ}); + auto mul_var = rewriter.create(loc, time_var, phase_var); + auto sin_body = rewriter.create(loc, mul_var, iter_args); + auto result = rewriter.create(loc, sin_body); + rewriter.create(loc, result, allocSignal, + ValueRange{ivI, ivJ}); + + rewriter.setInsertionPointAfter(forOpJ); // end for loop: j + + auto increFloatI = rewriter.create(loc, floatI, one); + rewriter.create(loc, ValueRange{increFloatI}); + + rewriter.setInsertionPointAfter(forOpI); // end for loop: i + + ub = timeDim; + affine::AffineForOp forOpIOut = + rewriter.create(loc, lb, ub, step); + auto ivIoutput = forOpIOut.getInductionVar(); + rewriter.setInsertionPointToStart(forOpIOut.getBody()); + + ub = antennas; + affine::AffineForOp forOpJOut = + rewriter.create(loc, lb, ub, step, ValueRange{zero}); + auto ivJoutput = forOpJOut.getInductionVar(); + rewriter.setInsertionPointToStart(forOpJOut.getBody()); + + // load from signal input + auto signalInput = rewriter.create( + loc, allocSignal, genInputMap, ValueRange{ivIoutput, ivJoutput}); + auto weight = rewriter.create( + loc, weights, timeMap, ValueRange{ivIoutput, ivJoutput}); + auto intermediateVal = + rewriter.create(loc, signalInput, weight); + + // iterargs + auto sumVal = forOpJOut.getBody()->getArgument(1); + auto beamOut = rewriter.create(loc, intermediateVal, sumVal); + + rewriter.create(loc, beamOut, alloc, ValueRange{ivIoutput}); + rewriter.create(loc, ValueRange{beamOut}); + + rewriter.setInsertionPointAfter(forOpJOut); + rewriter.setInsertionPointAfter(forOpIOut); + + rewriter.replaceOp(op, alloc); + + return mlir::success(); + } +}; + +struct SpaceModulateOpLowering : public ConversionPattern { + SpaceModulateOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::SpaceModulateOp::getOperationName(), 1, ctx) {} + + mlir::LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + auto loc = op->getLoc(); + + // output + auto output = llvm::dyn_cast((*op->result_type_begin())); + auto outputMem = convertTensorToMemRef(output); + auto alloc = insertAllocAndDealloc(outputMem, loc, rewriter); + + SpaceModulateOpAdaptor spaceModAdaptor(operands); + Value signal = spaceModAdaptor.getSignal(); + auto signalType = + llvm::dyn_cast(op->getOperand(0).getType()); + llvm::ArrayRef signalShape = signalType.getShape(); + + Value negOneVal = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(-1)); + // Value zeroVal = rewriter.create( + // loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + Value oneVal = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1)); + + // one dim loop + int64_t lb = 0, ub = signalShape[0], step = 1; + AffineForOp forOp = rewriter.create(loc, lb, ub, step); + rewriter.setInsertionPointToStart(forOp.getBody()); + auto iv = forOp.getInductionVar(); + + Value bit = rewriter.create(loc, signal, ValueRange{iv}); + + Value isOne = rewriter.create(loc, arith::CmpFPredicate::OEQ, + bit, oneVal); + + auto out = rewriter.create(loc, isOne, oneVal, negOneVal); + + rewriter.create(loc, out, alloc, ValueRange{iv}); + rewriter.setInsertionPointAfter(forOp); + + rewriter.replaceOp(op, alloc); + return mlir::success(); + } +}; // space modulate + +struct SpaceDemodulateOpLowering : public ConversionPattern { + SpaceDemodulateOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::SpaceDemodulateOp::getOperationName(), 1, ctx) {} + + mlir::LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + auto loc = op->getLoc(); + + // output + auto output = llvm::dyn_cast((*op->result_type_begin())); + auto outputMem = convertTensorToMemRef(output); + auto alloc = insertAllocAndDealloc(outputMem, loc, rewriter); + + SpaceDemodulateOpAdaptor spaceDemodAdaptor(operands); + Value binary = spaceDemodAdaptor.getBinary(); + auto binaryType = + llvm::dyn_cast(op->getOperand(0).getType()); + llvm::ArrayRef binaryShape = binaryType.getShape(); + + // Value negOneVal = rewriter.create( + // loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(-1)); + Value zeroVal = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + Value oneVal = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1)); + + // one dim loop + int64_t lb = 0, ub = binaryShape[0], step = 1; + AffineForOp forOp = rewriter.create(loc, lb, ub, step); + rewriter.setInsertionPointToStart(forOp.getBody()); + auto iv = forOp.getInductionVar(); + + Value bit = rewriter.create(loc, binary, ValueRange{iv}); + + Value isOne = rewriter.create(loc, arith::CmpFPredicate::OGE, + bit, oneVal); + + auto out = rewriter.create(loc, isOne, oneVal, zeroVal); + + rewriter.create(loc, out, alloc, ValueRange{iv}); + + rewriter.setInsertionPointAfter(forOp); + rewriter.replaceOp(op, alloc); + return mlir::success(); + } +}; // soace demodulate + +struct SpaceErrCorrectionOpLowering : public ConversionPattern { + SpaceErrCorrectionOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::SpaceErrCorrectionOp::getOperationName(), 1, + ctx) {} + + mlir::LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + auto loc = op->getLoc(); + + // output + auto output = llvm::dyn_cast((*op->result_type_begin())); + auto outputMem = convertTensorToMemRef(output); + auto alloc = insertAllocAndDealloc(outputMem, loc, rewriter); + + SpaceErrCorrectionOpAdaptor adaptor(operands); + Value signal = adaptor.getSignal(); + auto signalType = + llvm::dyn_cast(op->getOperand(0).getType()); + llvm::ArrayRef signalShape = signalType.getShape(); + + Value zeroVal = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + Value oneVal = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1)); + Value twoVal = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(2)); + + AffineExpr d0, d1; + bindDims(rewriter.getContext(), d0, d1); + AffineMap first = + AffineMap::get(2, 0, ArrayRef{d0}, rewriter.getContext()); + AffineMap index = AffineMap::get(2, 0, ArrayRef{d0 + d1}, + rewriter.getContext()); + + int64_t lb = 0, ub = signalShape[0], step = 8; + AffineForOp forOpI = rewriter.create(loc, lb, ub, step); + rewriter.setInsertionPointToStart(forOpI.getBody()); + auto ivI = forOpI.getInductionVar(); + + auto firstVal = rewriter.create( + loc, signal, ValueRange{ivI}); // signal [0] + rewriter.create( + loc, firstVal, alloc, ValueRange{ivI}); // store signal[0] to alloc[0] + + int64_t inner_lb = 1, inner_ub = 8, inner_step = 1; + AffineForOp forOpJ = + rewriter.create(loc, inner_lb, inner_ub, inner_step); + rewriter.setInsertionPointToStart(forOpJ.getBody()); + auto ivJ = forOpJ.getInductionVar(); + + auto stored = rewriter.create( + loc, alloc, first, ValueRange{ivI, ivJ}); // load alloc[0] + auto loaded = rewriter.create( + loc, signal, index, ValueRange{ivI, ivJ}); // load signal[1...7] + + auto added = rewriter.create(loc, stored, loaded); // add + rewriter.create(loc, added, alloc, + ValueRange{ivI}); // store val to alloc[0] + rewriter.create( + loc, loaded, alloc, index, + ValueRange{ivI, ivJ}); // store val to alloc[1...7] + + rewriter.setInsertionPointAfter(forOpJ); + + auto initVal = rewriter.create( + loc, signal, ValueRange{ivI}); // load signal[0] + auto oneCount = rewriter.create( + loc, alloc, ValueRange{ivI}); // load alloc[0] + auto parityCheck = rewriter.create( + loc, oneCount, + twoVal); // get remainder from oneCount / 2 -> either 1 or 0 + + auto oddParity = + rewriter.create(loc, arith::CmpFPredicate::OEQ, oneVal, + parityCheck); // if paritycheck == 1 + auto valToAlloc = rewriter.create( + loc, oddParity, zeroVal, initVal); // if true: valToAlloc = 0 else NC + + rewriter.create( + loc, valToAlloc, alloc, ValueRange{ivI}); // store the value to alloc[0] + + rewriter.setInsertionPointAfter(forOpI); + + rewriter.replaceOp(op, alloc); + return mlir::success(); + } +}; + +struct ArgMaxOpLowering : public ConversionPattern { + ArgMaxOpLowering(MLIRContext *context) + : ConversionPattern(dsp::ArgMaxOp::getOperationName(), 1, context) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + auto loc = op->getLoc(); + + auto zeroVal = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + auto oneVal = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1)); + + // argmax adaptor + ArgMaxOpAdaptor adaptor(operands); + auto input = adaptor.getInput(); + auto inputType = + llvm::dyn_cast(op->getOperand(0).getType()); + + // get operation + auto argmaxOp = llvm::dyn_cast(op); + + // get attribute + int64_t axis = argmaxOp.getAxis(); + + // output allocation + auto output = llvm::dyn_cast((*op->result_type_begin())); + auto outputMemRef = convertTensorToMemRef(output); + auto alloc = insertAllocAndDealloc(outputMemRef, loc, + rewriter); // stroing max ele index + + auto allocEle = + insertAllocAndDealloc(outputMemRef, loc, rewriter); // stroing max ele + + auto outputShape = output.getShape(); + auto outputSizeOp = rewriter.create( + loc, rewriter.getF64Type(), + rewriter.getF64FloatAttr(outputShape.size())); + + auto sizeSwitch = rewriter.create( + loc, arith::CmpFPredicate::OEQ, outputSizeOp, + oneVal); // if outputsize > 1 + AffineExpr d0; + bindDims(rewriter.getContext(), d0); + AffineMap zeroIdx = AffineMap::get(1, 0, ArrayRef{d0 - d0}, + rewriter.getContext()); + + auto ifOp = rewriter.create( + loc, sizeSwitch, + true); // FIXME: else condition for 2 dimensional tensor input + rewriter.setInsertionPointToStart(ifOp.thenBlock()); + // output size == 1 + /* -> one loop through tensor, recording max val and its index + */ + Value iv0 = rewriter.create(loc, 0); + rewriter.create(loc, zeroVal, allocEle, ValueRange{iv0}); + + auto zero = rewriter.create(loc, rewriter.getF64Type(), + rewriter.getF64FloatAttr(0)); + auto one = rewriter.create(loc, rewriter.getF64Type(), + rewriter.getF64FloatAttr(1)); + + int lb = 0, ub = inputType.getShape()[0], step = 1; + auto forOp = + rewriter.create(loc, lb, ub, step, ValueRange{zero}); + auto ivI = forOp.getInductionVar(); + rewriter.setInsertionPointToStart(forOp.getBody()); + + auto floatI = forOp.getBody()->getArgument(1); + + auto curMax = + rewriter.create(loc, allocEle, zeroIdx, ValueRange{ivI}); + auto curMaxIdx = + rewriter.create(loc, alloc, zeroIdx, ValueRange{ivI}); + auto curEle = rewriter.create(loc, input, ivI); + auto cmpOp = rewriter.create(loc, arith::CmpFPredicate::OGT, + curEle, curMax); + // if ele > max: update val + auto maxOp = rewriter.create(loc, cmpOp, curEle, curMax); + + // store the idx based on cmp output + auto idxOp = + rewriter.create(loc, cmpOp, floatI, curMaxIdx); + + rewriter.create(loc, maxOp, allocEle, zeroIdx, + ValueRange{ivI}); + rewriter.create(loc, idxOp, alloc, zeroIdx, ValueRange{ivI}); + + auto increFloatI = rewriter.create(loc, floatI, one); + rewriter.create(loc, ValueRange{increFloatI}); + + rewriter.setInsertionPointAfter(forOp); + rewriter.setInsertionPointAfter(ifOp); + + rewriter.replaceOp(op, alloc); + return mlir::success(); + } +}; +//===----------------------------------------------------------------------===// +// ToyToAffine RewritePatterns: Power operations +//===----------------------------------------------------------------------===// + +struct PowOpLowering : public ConversionPattern { + PowOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::PowOp::getOperationName(), 1, ctx) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + auto loc = op->getLoc(); + + dsp::PowOpAdaptor powerAdaptor(operands); + Value lhs = powerAdaptor.getLhs(); + Value rhs = powerAdaptor.getRhs(); + + auto inputType = llvm::cast(lhs.getType()); + auto resultType = llvm::cast((*op->result_type_begin())); + + // allocate space for result + auto memRefType = convertTensorToMemRef(resultType); + auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); + + // affine loops for input + int64_t lb = 0; + int64_t ub = inputType.getShape()[0]; + int64_t step = 1; + + affine::AffineForOp forOp = rewriter.create(loc, lb, ub, step); + auto iv = forOp.getInductionVar(); + + rewriter.setInsertionPointToStart(forOp.getBody()); + + Value loadLHS = rewriter.create(loc, lhs, ValueRange{iv}); + Value loadRHS = rewriter.create(loc, rhs, ValueRange{}); + + Value power = rewriter.create(loc, loadLHS, loadRHS); + + // store result + rewriter.create(loc, power, alloc, ValueRange{iv}); + rewriter.setInsertionPointAfter(forOp); + + // replace op + rewriter.replaceOp(op, alloc); + return success(); + } +}; + +//===----------------------------------------------------------------------===// +// ToyToAffine RewritePatterns: Normalize operations +//===----------------------------------------------------------------------===// + +struct NormalizeOpLowering : public ConversionPattern { + NormalizeOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::NormalizeOp::getOperationName(), 1, ctx) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + auto loc = op->getLoc(); + + auto tensorType = + llvm::dyn_cast(*op->result_type_begin()); + auto memRefType = convertTensorToMemRef(tensorType); + auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); + auto shape = tensorType.getShape()[0]; + + dsp::NormalizeOpAdaptor adaptor(operands); + Value signal = adaptor.getSignal(); + + Value min = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(INT64_MAX)); + Value max = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(INT64_MIN)); + + int64_t lb = 0, ub = shape, step = 1; + // finding min and max; + affine::AffineForOp forOp = + rewriter.create(loc, lb, ub, step, ValueRange{min, max}); + auto iv = forOp.getInductionVar(); + rewriter.setInsertionPointToStart(forOp.getBody()); + + auto minVal = forOp.getBody()->getArgument(1); + auto maxVal = forOp.getBody()->getArgument(2); + + auto cmpVal = rewriter.create(loc, signal, ValueRange{iv}); + Value isMin = rewriter.create(loc, arith::CmpFPredicate::OLT, + cmpVal, minVal); + Value isMax = rewriter.create(loc, arith::CmpFPredicate::OGT, + cmpVal, maxVal); + + auto minOut = rewriter.create(loc, isMin, cmpVal, minVal); + auto maxOut = rewriter.create(loc, isMax, cmpVal, maxVal); + + rewriter.create( + loc, ValueRange{minOut.getResult(), maxOut.getResult()}); + rewriter.setInsertionPointAfter(forOp); + + auto minSignal = forOp.getResults()[0]; + auto maxSignal = forOp.getResults()[1]; + + auto divisor = rewriter.create(loc, maxSignal, minSignal); + // ele-wise normalize + affine::AffineForOp forOpI = + rewriter.create(loc, lb, ub, step); + auto ivI = forOpI.getInductionVar(); + rewriter.setInsertionPointToStart(forOpI.getBody()); + + auto loadedVal = + rewriter.create(loc, signal, ValueRange{ivI}); + auto subVal = rewriter.create(loc, loadedVal, minSignal); + auto resultVal = rewriter.create(loc, subVal, divisor); + + rewriter.create(loc, resultVal, alloc, ValueRange{ivI}); + rewriter.setInsertionPointAfter(forOpI); + + rewriter.replaceOp(op, alloc); + return mlir::success(); + } +}; + +//===----------------------------------------------------------------------===// +// ToyToAffine RewritePatterns: NormLMSFilterResponseOptimizeOp operations +//===----------------------------------------------------------------------===// + +struct NormLMSFilterResponseOptimizeOpLowering : public ConversionPattern { + NormLMSFilterResponseOptimizeOpLowering(MLIRContext *ctx) + : ConversionPattern( + dsp::NormLMSFilterResponseOptimizeOp::getOperationName(), 1, ctx) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + auto loc = op->getLoc(); + + auto tensorType = llvm::cast((*op->result_type_begin())); + + // allocation & deallocation for the result of this operation + auto memRefType = convertTensorToMemRef(tensorType); + auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); + + // construct affine loops for the input + SmallVector lowerBounds(tensorType.getRank(), /*Value*/ 0); + SmallVector steps(tensorType.getRank(), /*Value=*/1); + + LMSFilterOpAdaptor lmsFilterAdaptor(operands); + + Value zeroval = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + Value mu = rewriter.create(loc, lmsFilterAdaptor.getMu()); + + // For loop -- iterate from 0 to last + int64_t lb = 0; + int64_t numSamples = tensorType.getShape()[0]; + int64_t step = 1; + + Value GetFilterLOp = op->getOperand(3); + dsp::ConstantOp constantOp3rdArg = + GetFilterLOp.getDefiningOp(); + DenseElementsAttr constant3rdValue = constantOp3rdArg.getValue(); + + auto elements1 = constant3rdValue.getValues(); + float filterlenval = elements1[0].getValueAsDouble(); + auto FilterLength = (uint64_t)filterlenval; + + auto yMemRefType = MemRefType::get({numSamples}, rewriter.getF64Type()); + auto wAlloc = rewriter.create(loc, yMemRefType); + + Value min = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(INT64_MAX)); + Value max = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(INT64_MIN)); + + affine::AffineForOp forOp1 = rewriter.create( + loc, lb, numSamples, step, ValueRange{min, max}); + auto iv = forOp1.getInductionVar(); + + rewriter.setInsertionPointToStart(forOp1.getBody()); + + AffineExpr d0, d1, s0; + bindDims(rewriter.getContext(), d0, d1); + AffineExpr ExprForXSlice = d0 - d1; + AffineMap addMapForLMSFilter = AffineMap::get(2, 0, ExprForXSlice); + IntegerSet set1 = IntegerSet::get(2, 0, {ExprForXSlice}, {false}); + + rewriter.create(loc, zeroval, alloc, ValueRange{iv}); + + affine::AffineForOp forOp2 = + rewriter.create(loc, lb, FilterLength, step); + auto iv2 = forOp2.getInductionVar(); + + rewriter.setInsertionPointToStart(forOp2.getBody()); + + auto ifOp = rewriter.create( + loc, set1, ValueRange{iv, iv2}, false /*no else*/); + rewriter.setInsertionPointToStart(ifOp.getThenBlock()); + + Value inputX = + rewriter.create(loc, lmsFilterAdaptor.getLhs(), + addMapForLMSFilter, ValueRange{iv, iv2}); + Value w = rewriter.create(loc, wAlloc, + ValueRange{iv2}); // memRefType + + auto wmulx = rewriter.create(loc, inputX, w); + auto ybefore = rewriter.create(loc, alloc, ValueRange{iv}); + auto sumNext = rewriter.create(loc, wmulx, ybefore); + rewriter.create(loc, sumNext, alloc, ValueRange{iv}); + rewriter.setInsertionPointAfter(ifOp); + rewriter.setInsertionPointAfter(forOp2); + + auto cmpVal = rewriter.create(loc, alloc, ValueRange{iv}); + Value minVal = forOp1.getBody()->getArgument(1); + Value maxVal = forOp1.getBody()->getArgument(2); + + auto minOut = rewriter.create(loc, cmpVal, minVal); + auto maxOut = rewriter.create(loc, cmpVal, maxVal); + // get e[n] = d[n] - y[n] + + Value desiredX = rewriter.create( + loc, lmsFilterAdaptor.getRhs(), ValueRange{iv}); + Value ynew = rewriter.create(loc, alloc, ValueRange{iv}); + + Value err = rewriter.create(loc, desiredX, ynew); + + affine::AffineForOp forOp3 = + rewriter.create(loc, lb, FilterLength, step); + auto iv3 = forOp3.getInductionVar(); + + rewriter.setInsertionPointToStart(forOp3.getBody()); + + auto ifOp2 = rewriter.create( + loc, set1, ValueRange{iv, iv3}, false /*no else*/); + rewriter.setInsertionPointToStart(ifOp2.getThenBlock()); + + Value inputX2 = + rewriter.create(loc, lmsFilterAdaptor.getLhs(), + addMapForLMSFilter, ValueRange{iv, iv3}); + + Value Prevw2 = rewriter.create(loc, wAlloc, ValueRange{iv3}); + + // f(u(n),e(n),μ)=μe(n)u∗(n) + Value mul1 = rewriter.create(loc, err, inputX2); + Value mul2 = rewriter.create(loc, mu, mul1); + + // FInal w[n] + Value answer = rewriter.create(loc, Prevw2, mul2); + + rewriter.create(loc, answer, wAlloc, ValueRange{iv3}); + rewriter.setInsertionPointAfter(ifOp2); + rewriter.setInsertionPointAfter(forOp3); + + rewriter.create( + loc, ValueRange{minOut.getResult(), maxOut.getResult()}); + rewriter.setInsertionPointAfter(forOp1); + + Value minSignal = forOp1.getResults()[0]; + Value maxSignal = forOp1.getResults()[1]; + + Value divisor = rewriter.create(loc, maxSignal, minSignal); + + // ele-wise normalize + affine::AffineForOp forOpI = + rewriter.create(loc, lb, numSamples, step); + auto ivI = forOpI.getInductionVar(); + rewriter.setInsertionPointToStart(forOpI.getBody()); + + auto loadedVal = rewriter.create(loc, alloc, ValueRange{ivI}); + auto subVal = rewriter.create(loc, loadedVal, minSignal); + auto resultVal = rewriter.create(loc, subVal, divisor); + + rewriter.create(loc, resultVal, alloc, ValueRange{ivI}); + rewriter.setInsertionPointAfter(forOpI); + + rewriter.replaceOp(op, alloc); + return success(); + } +}; + +struct Median2SlidingOptimizedOpLowering : public ConversionPattern { + Median2SlidingOptimizedOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::Median2SlidingOptimizedOp::getOperationName(), 1, + ctx) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + auto loc = op->getLoc(); + auto tensorType = llvm::cast((*op->result_type_begin())); + auto memRefType = convertTensorToMemRef(tensorType); + auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); + + SmallVector lowerBounds(tensorType.getRank(), 0); + SmallVector steps(tensorType.getRank(), 1); + + // For loop + int64_t lb = 0; + int64_t ub = tensorType.getShape()[0]; + int64_t step = 1; + + Value constant_three = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(3)); + + affine::AffineForOp forOp1 = + rewriter.create(loc, lb, ub, step); + auto iv = forOp1.getInductionVar(); + + rewriter.setInsertionPointToStart(forOp1.getBody()); + typename dsp::Median2SlidingOptimizedOp::Adaptor + median2SlidingOptimizedOpAdaptor(operands); + + Value elem1 = rewriter.create( + loc, median2SlidingOptimizedOpAdaptor.getInput(), iv); + AffineExpr ExprForElem2 = + rewriter.getAffineDimExpr(0) + rewriter.getAffineConstantExpr(1); + AffineExpr ExprForElem3 = + rewriter.getAffineDimExpr(0) + rewriter.getAffineConstantExpr(2); + AffineExpr ExprForElem4 = + rewriter.getAffineDimExpr(0) + rewriter.getAffineConstantExpr(3); + AffineExpr ExprForElem5 = + rewriter.getAffineDimExpr(0) + rewriter.getAffineConstantExpr(4); + + AffineMap addMapForElem2 = AffineMap::get(1, 0, ExprForElem2); + AffineMap addMapForElem3 = AffineMap::get(1, 0, ExprForElem3); + AffineMap addMapForElem4 = AffineMap::get(1, 0, ExprForElem4); + AffineMap addMapForElem5 = AffineMap::get(1, 0, ExprForElem5); + + Value elem2 = rewriter.create( + loc, median2SlidingOptimizedOpAdaptor.getInput(), addMapForElem2, + ValueRange{iv}); + Value elem3 = rewriter.create( + loc, median2SlidingOptimizedOpAdaptor.getInput(), addMapForElem3, + ValueRange{iv}); + Value elem4 = rewriter.create( + loc, median2SlidingOptimizedOpAdaptor.getInput(), addMapForElem4, + ValueRange{iv}); + Value elem5 = rewriter.create( + loc, median2SlidingOptimizedOpAdaptor.getInput(), addMapForElem5, + ValueRange{iv}); + + // sums + Value sum23 = rewriter.create(loc, elem2, elem3); + Value sum34 = rewriter.create(loc, elem3, elem4); + + Value sum123 = rewriter.create(loc, elem1, sum23); + Value sum234 = rewriter.create(loc, sum23, elem4); + Value sum345 = rewriter.create(loc, sum34, elem5); + + // min + Value min23 = rewriter.create(loc, elem2, elem3); + Value min34 = rewriter.create(loc, elem3, elem4); + + Value min123 = rewriter.create(loc, elem1, min23); + Value min234 = rewriter.create(loc, min23, elem4); + Value min345 = rewriter.create(loc, min34, elem5); + + // max + Value max23 = rewriter.create(loc, elem2, elem3); + Value max34 = rewriter.create(loc, elem3, elem4); + + Value max123 = rewriter.create(loc, elem1, max23); + Value max234 = rewriter.create(loc, max23, elem4); + Value max345 = rewriter.create(loc, max34, elem5); + + // median + Value min_plus_max_123 = + rewriter.create(loc, min123, max123); + Value min_plus_max_234 = + rewriter.create(loc, min234, max234); + Value min_plus_max_345 = + rewriter.create(loc, min345, max345); + + Value median123 = + rewriter.create(loc, sum123, min_plus_max_123); + Value median234 = + rewriter.create(loc, sum234, min_plus_max_234); + Value median345 = + rewriter.create(loc, sum345, min_plus_max_345); + + // mean of three medians + Value two_medians = + rewriter.create(loc, median123, median234); + Value three_medians = + rewriter.create(loc, two_medians, median345); + Value median_mean = + rewriter.create(loc, three_medians, constant_three); + + // store in alloc + rewriter.create(loc, median_mean, alloc, iv); + rewriter.setInsertionPointAfter(forOp1); + rewriter.replaceOp(op, alloc); + return success(); + } +}; + +//===----------------------------------------------------------------------===// +// ToyToAffine RewritePatterns: FIRFilterResSymmThresholdUpOptimizedOp +// operations +//===----------------------------------------------------------------------===// +struct FIRFilterResSymmThresholdUpOptimizedOpLowering + : public ConversionPattern { + FIRFilterResSymmThresholdUpOptimizedOpLowering(MLIRContext *ctx) + : ConversionPattern( + dsp::FIRFilterResSymmThresholdUpOptimizedOp::getOperationName(), 1, + ctx) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + // dsp.FIRFilterResSymmThresholdUpOptimizedOp has 2 operands -- both of type + // tensor f64 + + // Get the location of FIRFilterResSymmThresholdUpOptimizedOp + auto loc = op->getLoc(); + + // output for result type + auto tensorType = llvm::cast((*op->result_type_begin())); + + // allocation & deallocation for the result of this operation + auto memRefType = convertTensorToMemRef(tensorType); + auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); + // Pseudo-Code + // y[n] = sum(h[k] .{ x[n-k] + x[n-(L-1-k)]}) + h[L-1/2].x[n-(L-1)/2] , k=0 + // to L-1/2 + // N = lenY , M = lenX , L = lenH + // for n=0 to N + // sum = 0, temp =0 + // for k = 0 to L-1/2 + // if 0 <= n-k < M + // val1 = x[n-k] else, val1 = 0 + // if 0 <= n+k - (L-1) < M + // val2 = x[n+k-(L-1)] else, val2 = 0 + // temp = val1 + val2 + // sum = sum + h[k] . temp + + // middle-one + // if 0 <= n - (L-1)/2 < M + // sum2 = sum + h[L-1/2] . x[n-(n - (L-1)/2)] + // y[n] = sum2 + + int64_t lb = 0; + int64_t ub = tensorType.getShape()[0]; + int64_t step = 1; + DEBUG_PRINT_NO_ARGS(); + affine::AffineForOp forOp1 = + rewriter.create(loc, lb, ub, step); + rewriter.setInsertionPointToStart(forOp1.getBody()); + auto iv = forOp1.getInductionVar(); + + // for n=0 to N + // sum = 0, temp =0 + // get filter len + // auto tensorTypeFilter = + // llvm::cast((*op->getOperand(1))); //operand_type_end + // auto tensorTypeFilter = + // llvm::cast((*op->operand_type_begin())); + auto operandIt = op->operand_type_begin(); + auto tensorTypeInput = llvm::cast(*operandIt); + int64_t ubForInput = tensorTypeInput.getShape()[0]; + // get second operand + operandIt = operandIt + 1; + + // auto tensorTypeFilter = + // llvm::cast((*op->operand_type_begin())); //operandIt + auto tensorTypeFilter = llvm::cast(*operandIt); + int64_t ubForFilter = tensorTypeFilter.getShape()[0]; + DEBUG_PRINT_NO_ARGS(); + // llvm::errs() << "ubForFilter= " << ubForFilter << "\n"; + // create a constant for sum + Value constant0 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + affine::AffineForOp forOp2 = rewriter.create( + loc, lb, ubForFilter / 2, step, ValueRange{constant0}); + rewriter.setInsertionPointToStart(forOp2.getBody()); + auto iv2 = forOp2.getInductionVar(); + + auto getIterArg = + forOp2.getBody()->getArgument(1); // forOp1.getIterOperands(); + DEBUG_PRINT_NO_ARGS(); + FIRFilterResSymmThresholdUpOptimizedOpAdaptor + firFilterResSymmThresholdUpOpAdaptor(operands); + + // if 0 <= n-k < M + // val1 = x[n-k] else, val1 = 0 + // For n-k + // if 0 <= n-k < M or, 0 <= n-k <= M -1 + AffineExpr d0, d1, s0, s1; + bindDims(rewriter.getContext(), d0, d1); + AffineExpr ExprNMinusK = d0 - d1; + AffineMap mapNMinusK = AffineMap::get(2, 0, ExprNMinusK); + // n-k <= M -1 or, n-k-(M-1) <= 0 + bindSymbols(rewriter.getContext(), s0, s1); + Value constantMMinus1Indx = + rewriter.create(loc, ubForInput - 1); + + AffineExpr ExprNMinusKMinusMPlus1 = s0 - d0 + d1; + IntegerSet setForIf = IntegerSet::get( + 2, 1, {ExprNMinusK, ExprNMinusKMinusMPlus1}, {false, false}); + DEBUG_PRINT_NO_ARGS(); + + // if 0 <= n-k <= M -1 + // use typeRange too: + Type floatType = rewriter.getF64Type(); + // if n-k >= 0 && n-k <= M -1 or, M-1 -n + k >= 0 + auto ifOp = rewriter.create( + loc, TypeRange{floatType}, setForIf, + ValueRange{iv, iv2, constantMMinus1Indx}, true /*else*/); + rewriter.setInsertionPointToStart(ifOp.getThenBlock()); + + // val1 = x[n-k] else, val1 = 0 + // load x[n-k] + DEBUG_PRINT_NO_ARGS(); + Value loadInput = rewriter.create( + loc, firFilterResSymmThresholdUpOpAdaptor.getLhs(), mapNMinusK, + ValueRange{iv, iv2}); + rewriter.create(loc, ValueRange{loadInput}); + // else block + rewriter.setInsertionPointToStart(ifOp.getElseBlock()); + Value const0ForElse = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + rewriter.create(loc, ValueRange{const0ForElse}); + rewriter.setInsertionPointAfter(ifOp); + + // if 0 <= n+k - (L-1) < M + // val2 = x[n+k-(L-1)] else, val2 = 0 + // val2 lower bound + // AffineExpr ExprNMinKMinLPlus1 = d0 - d1 - s0; //s0 = (L-1) => -s0 = -L+1 + // AffineExpr ExprLowerBoundVal2 = d0 - d1 - s0; //s0 = (L-1) => -s0 = -L+1 + // Val2 LowerBound: n+k - (L-1) >= 0 + AffineExpr ExprLowerBoundVal2 = + rewriter.getAffineDimExpr(0) + rewriter.getAffineDimExpr(1) - + rewriter.getAffineConstantExpr(ubForFilter - 1); + // Val2 UpperBound: n+k - (L-1) <= M -1 ie, M - 1 + L -1 -k -n >= 0 ie, + // (M+L-2) - k -n >= 0 + // AffineExpr ExprUpperBoundVal2 = s0 + s1 + d1 - d0; //s1 = M+L-2 = L-1 + + // M -1 + AffineExpr ExprUpperBoundVal2 = + rewriter.getAffineConstantExpr(ubForInput + ubForFilter - 2) - + rewriter.getAffineDimExpr(1) - rewriter.getAffineDimExpr(0); + // s0 = L -1 + // Value s0LMin1Indx = rewriter.create(loc, + // ubForFilter - 1); s1 = M + L -2 for val2 upperBound Value + // s1MPlusLPlus2Indx = rewriter.create(loc, + // ubForInput + ubForFilter - 2); Value s1MMin1Indx = + // rewriter.create(loc, ubForInput - 1); + + IntegerSet setForIf2 = IntegerSet::get( + 2, 0, {ExprLowerBoundVal2, ExprUpperBoundVal2}, {false, false}); + + auto ifOp2 = rewriter.create( + loc, TypeRange{floatType}, setForIf2, ValueRange{iv, iv2}, + true /*else*/); + rewriter.setInsertionPointToStart(ifOp2.getThenBlock()); + + // val2 = x[n+k-(L-1)] else, val2 = 0 + AffineMap addMap2 = AffineMap::get(2, 0, ExprLowerBoundVal2); + // load x[n+k-(L-1)] + DEBUG_PRINT_NO_ARGS(); + Value loadInputForVal2 = rewriter.create( + loc, firFilterResSymmThresholdUpOpAdaptor.getLhs(), addMap2, + ValueRange{iv, iv2}); + rewriter.create(loc, ValueRange{loadInputForVal2}); + // else block + rewriter.setInsertionPointToStart(ifOp2.getElseBlock()); + Value const0ForElse2 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + rewriter.create(loc, ValueRange{const0ForElse2}); + rewriter.setInsertionPointAfter(ifOp2); + + // temp = val1 + val2 + // sum = sum + h[k] . temp + + Value Val1Plus2 = rewriter.create(loc, ifOp.getResult(0), + ifOp2.getResult(0)); + + // load filter and then mult and then sum + Value loadFilter = rewriter.create( + loc, firFilterResSymmThresholdUpOpAdaptor.getRhs(), iv2); + + Value filterMulInput = + rewriter.create(loc, Val1Plus2, loadFilter); + Value sumNext = + rewriter.create(loc, filterMulInput, getIterArg); + rewriter.create(loc, ValueRange{sumNext}); + // rewriter.setInsertionPointToEnd(forOp2->getBlock()); + rewriter.setInsertionPointAfter(forOp2); + DEBUG_PRINT_NO_ARGS(); + // Middle - point + // if 0 <= n - (L-1)/2 < M + // sum2 = sum + h[L-1/2] . x[n-(L-1)/2)] + // y[n] = sum2 + + // if 0 <= n - (L-1)/2 < M + // AffineExpr ExprLowerBoundVal3 = d0 - s0; //s0 = (L-1)/2 + // AffineExpr ExprUpperBoundVal3 = d0 - s1; //s1 = M+ (L-1)/2 + int64_t midFilterLen = (ubForFilter - 1) / 2; + AffineExpr ExprLowerBoundVal3 = + rewriter.getAffineDimExpr(0) - + rewriter.getAffineConstantExpr(midFilterLen); + // UpperBound: n - (L-1)/2 <= M - 1 ie, M-1 + mid - n + AffineExpr ExprUpperBoundVal3 = + rewriter.getAffineConstantExpr(ubForInput + midFilterLen - 1) - + rewriter.getAffineDimExpr(0); + + AffineMap addMap3 = AffineMap::get(1, 0, ExprLowerBoundVal3); + + IntegerSet setForIf3 = IntegerSet::get( + 1, 0, {ExprLowerBoundVal3, ExprUpperBoundVal3}, {false, false}); + + auto ifOp3 = rewriter.create( + loc, TypeRange{floatType}, setForIf3, ValueRange{iv}, true /*else*/); + rewriter.setInsertionPointToStart(ifOp3.getThenBlock()); + + // val3 = x[n-(L-1)/2)] else, val3 = 0 + // load x[n-(L-1)/2)] + DEBUG_PRINT_NO_ARGS(); + Value loadInputForVal3 = rewriter.create( + loc, firFilterResSymmThresholdUpOpAdaptor.getLhs(), addMap3, + ValueRange{iv}); + rewriter.create(loc, ValueRange{loadInputForVal3}); + // else block + rewriter.setInsertionPointToStart(ifOp3.getElseBlock()); + Value const0ForElse3 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + rewriter.create(loc, ValueRange{const0ForElse3}); + rewriter.setInsertionPointAfter(ifOp3); + + // sum2 = sum + h[L-1/2] . x[n-(L-1)/2)] + // y[n] = sum2 + // load filter and then mult and then sum + Value midFilterLenIndx = + rewriter.create(loc, midFilterLen); + + Value loadFilterMid = rewriter.create( + loc, firFilterResSymmThresholdUpOpAdaptor.getRhs(), midFilterLenIndx); + Value filterMulInput2 = + rewriter.create(loc, ifOp3.getResult(0), loadFilterMid); + Value sum2 = rewriter.create(loc, filterMulInput2, + forOp2.getResult(0)); + // rewriter.create(loc, forOp2.getResult(0) , alloc, iv); + + // Optimize here, compare with threshold, then if returnoriginal then store + // same value else 1 + + auto thresholdMemRef = firFilterResSymmThresholdUpOpAdaptor.getThreshold(); + auto returnOriginalMemRef = + firFilterResSymmThresholdUpOpAdaptor.getReturnoriginal(); + + auto threshold = + rewriter.create(loc, thresholdMemRef, ValueRange{}); + auto returnOriginal = + rewriter.create(loc, returnOriginalMemRef, ValueRange{}); + Value constant00 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + + Value constant11 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1)); + // Compare a[i] >= threshold + auto cmp1 = rewriter.create(loc, arith::CmpFPredicate::OGE, + sum2, threshold); + // Compare if return original is true or false and return 1 or original + // value + auto cmpro = rewriter.create(loc, arith::CmpFPredicate::OEQ, + constant11, returnOriginal); + + // Use select to choose between inputX and 1 + auto selectreturn = + rewriter.create(loc, cmpro, sum2, constant11); + + // Use select to choose between 0 and selectreturn + auto selectOp = + rewriter.create(loc, cmp1, selectreturn, constant00); + + // Store the result + rewriter.create(loc, selectOp, alloc, iv); + + // rewriter.create(loc, sum2, alloc, iv); + rewriter.setInsertionPointAfter(forOp1); + DEBUG_PRINT_NO_ARGS(); + // ifOp->dump(); + rewriter.replaceOp(op, alloc); + return success(); + } +}; + +//===----------------------------------------------------------------------===// +// ToyToAffine RewritePatterns: FFTOp operations +//===----------------------------------------------------------------------===// + +struct FFTOpLowering : public ConversionPattern { + FFTOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::FFTOp::getOperationName(), 1, ctx) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + auto loc = op->getLoc(); + auto tensorType = llvm::cast((*op->result_type_begin())); + auto memrefType = convertTensorToMemRef(tensorType); + + auto alloc_temp_real = insertAllocAndDealloc(memrefType, loc, rewriter); + auto alloc_temp_imag = insertAllocAndDealloc(memrefType, loc, rewriter); + + FFTRealOpAdaptor fftRealOpAdaptor(operands); + + auto input = fftRealOpAdaptor.getLhs(); + auto lb = rewriter.create(loc, 0); + auto ub = + rewriter.create(loc, tensorType.getShape()[0]); + auto step = rewriter.create(loc, 1); + + // alloc memory for reversed and dealloc when not required + auto alloc_reversed_real = insertAllocAndDealloc(memrefType, loc, rewriter); + auto alloc_reversed_imag = insertAllocAndDealloc(memrefType, loc, rewriter); + + // bits needed for bit reversal + auto ubInt = + rewriter.create(loc, rewriter.getI64Type(), ub); + auto ubFloat = + rewriter.create(loc, rewriter.getF64Type(), ubInt); + auto bitsNeededFloat = rewriter.create(loc, ubFloat); + auto bitsNeededInt = rewriter.create( + loc, rewriter.getI64Type(), bitsNeededFloat); + auto bitsNeeded = rewriter.create( + loc, rewriter.getIndexType(), bitsNeededInt); + + // bit reversal + auto bitReversalLoop = rewriter.create(loc, lb, ub, step); + rewriter.setInsertionPointToStart(bitReversalLoop.getBody()); + auto i = bitReversalLoop.getInductionVar(); + auto iInt = rewriter.create(loc, rewriter.getI64Type(), + i); // check here + + // Calculate reversed index + // auto zero = rewriter.create(loc, 0); + auto initialRevIndex = rewriter.create(loc, 0, 64); + + auto innerLoop = rewriter.create(loc, lb, bitsNeeded, step, + ValueRange{initialRevIndex}); + rewriter.setInsertionPointToStart(innerLoop.getBody()); + auto j = innerLoop.getInductionVar(); + auto jInt = + rewriter.create(loc, rewriter.getI64Type(), j); + auto carriedRevIndex = innerLoop.getRegionIterArgs()[0]; + + auto bitMask = rewriter.create( + loc, rewriter.create(loc, 1, 64), jInt); + auto iAndMask = rewriter.create(loc, iInt, bitMask); + auto isNonZero = rewriter.create( + loc, arith::CmpIPredicate::ne, iAndMask, + rewriter.create(loc, 0, 64)); + auto shiftAmount = rewriter.create( + loc, rewriter.create(loc, bitsNeeded, j), + rewriter.create(loc, 1)); + auto shiftAmountI64 = rewriter.create( + loc, rewriter.getI64Type(), shiftAmount); + auto bitToSet = rewriter.create( + loc, rewriter.create(loc, 1, 64), shiftAmountI64); + + // Update newRevIndex using a select operation + auto updatedRevIndex = rewriter.create( + loc, carriedRevIndex, + rewriter.create( + loc, isNonZero, bitToSet, + rewriter.create(loc, 0, 64))); + + // Yield the updated value to carry it forward + rewriter.create(loc, ValueRange{updatedRevIndex}); + + // auto revIndex = rewriter.create(loc, + // rewriter.getIndexType(), newRevIndex); + + rewriter.setInsertionPointAfter(innerLoop); + + auto finalRevIndex = innerLoop.getResult(0); + auto revIndex = rewriter.create( + loc, rewriter.getIndexType(), finalRevIndex); + + // Load from alloc_temp and store in alloc_reversed + auto realValue = rewriter.create(loc, input, ValueRange{i}); + auto imagValue = rewriter.create( + loc, llvm::APFloat(0.0), rewriter.getF64Type()); + rewriter.create(loc, realValue, alloc_reversed_real, + ValueRange{revIndex}); + rewriter.create(loc, imagValue, alloc_reversed_imag, + ValueRange{revIndex}); + + rewriter.setInsertionPointAfter(bitReversalLoop); + + // Cooley-Tukey FFT implementation + auto N = tensorType.getShape()[0]; + auto stages = static_cast(std::log2(N)); + auto stagesValue = rewriter.create(loc, stages); + + // Constants for complex arithmetic + auto pi = rewriter.create(loc, llvm::APFloat(M_PI), + rewriter.getF64Type()); + auto neg2 = rewriter.create( + loc, llvm::APFloat(-2.0), rewriter.getF64Type()); + + auto fftLoop = rewriter.create(loc, lb, stagesValue, step); + rewriter.setInsertionPointToStart(fftLoop.getBody()); + auto stage = fftLoop.getInductionVar(); + auto half_size = rewriter.create( + loc, rewriter.create(loc, 1), stage); + auto full_size = rewriter.create( + loc, half_size, rewriter.create(loc, 1)); + + auto outerLoop = rewriter.create(loc, lb, ub, full_size); + rewriter.setInsertionPointToStart(outerLoop.getBody()); + auto start = outerLoop.getInductionVar(); + + auto butterflyLoop = rewriter.create(loc, lb, half_size, step); + rewriter.setInsertionPointToStart(butterflyLoop.getBody()); + auto k = butterflyLoop.getInductionVar(); + + // Calculate indices for even and odd elements + auto even_index = rewriter.create(loc, start, k); + auto odd_index = rewriter.create(loc, even_index, half_size); + + // Calculate twiddle factor + auto k_i64 = + rewriter.create(loc, rewriter.getI64Type(), k); + auto k_f64 = + rewriter.create(loc, rewriter.getF64Type(), k_i64); + auto full_size_i64 = rewriter.create( + loc, rewriter.getI64Type(), full_size); + auto full_size_f64 = rewriter.create( + loc, rewriter.getF64Type(), full_size_i64); + auto angle_div = rewriter.create(loc, k_f64, full_size_f64); + auto angle_mul = rewriter.create(loc, neg2, angle_div); + auto angle_final = rewriter.create(loc, pi, angle_mul); + auto cos = rewriter.create(loc, angle_final); + auto sin = rewriter.create(loc, angle_final); + + // Load odd value + auto odd_real = rewriter.create(loc, alloc_reversed_real, + ValueRange{odd_index}); + auto odd_imag = rewriter.create(loc, alloc_reversed_imag, + ValueRange{odd_index}); + + // Multiply by twiddle factor + auto odd_real_cos = rewriter.create(loc, odd_real, cos); + auto odd_imag_sin = rewriter.create(loc, odd_imag, sin); + auto t_real = + rewriter.create(loc, odd_real_cos, odd_imag_sin); + + auto odd_real_sin = rewriter.create(loc, odd_real, sin); + auto odd_imag_cos = rewriter.create(loc, odd_imag, cos); + auto t_imag = + rewriter.create(loc, odd_real_sin, odd_imag_cos); + + // Load even value + auto even_real = rewriter.create(loc, alloc_reversed_real, + ValueRange{even_index}); + auto even_imag = rewriter.create(loc, alloc_reversed_imag, + ValueRange{even_index}); + // Butterfly operation + auto new_even_real = rewriter.create(loc, even_real, t_real); + auto new_even_imag = rewriter.create(loc, even_imag, t_imag); + auto new_odd_real = rewriter.create(loc, even_real, t_real); + auto new_odd_imag = rewriter.create(loc, even_imag, t_imag); + + // Store results + rewriter.create(loc, new_even_real, alloc_reversed_real, + ValueRange{even_index}); + rewriter.create(loc, new_even_imag, alloc_reversed_imag, + ValueRange{even_index}); + rewriter.create(loc, new_odd_real, alloc_reversed_real, + ValueRange{odd_index}); + rewriter.create(loc, new_odd_imag, alloc_reversed_imag, + ValueRange{odd_index}); + + // replace the operation with the final value + rewriter.replaceOp(op, + ValueRange{alloc_reversed_real, alloc_reversed_imag}); + return success(); + } +}; + +//===----------------------------------------------------------------------===// +// ToyToAffine RewritePatterns: FFTAbsOp operations +//===----------------------------------------------------------------------===// + +struct FFTAbsOpLowering : public ConversionPattern { + FFTAbsOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::FFTAbsOp::getOperationName(), 1, ctx) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + auto loc = op->getLoc(); + auto tensorType = llvm::cast((*op->result_type_begin())); + auto memrefType = convertTensorToMemRef(tensorType); + + auto alloc_temp_real = insertAllocAndDealloc(memrefType, loc, rewriter); + auto alloc_temp_imag = insertAllocAndDealloc(memrefType, loc, rewriter); + + FFTAbsOpAdaptor fftAbsOpAdaptor(operands); + + auto input = fftAbsOpAdaptor.getInput(); + auto lb = rewriter.create(loc, 0); + auto ub = + rewriter.create(loc, tensorType.getShape()[0]); + auto step = rewriter.create(loc, 1); + + // alloc memory for reversed and dealloc when not required + auto alloc_reversed_real = insertAllocAndDealloc(memrefType, loc, rewriter); + auto alloc_reversed_imag = insertAllocAndDealloc(memrefType, loc, rewriter); + auto alloc_amplitude = insertAllocAndDealloc(memrefType, loc, rewriter); + + // bits needed for bit reversal + auto ubInt = + rewriter.create(loc, rewriter.getI64Type(), ub); + auto ubFloat = + rewriter.create(loc, rewriter.getF64Type(), ubInt); + auto bitsNeededFloat = rewriter.create(loc, ubFloat); + auto bitsNeededInt = rewriter.create( + loc, rewriter.getI64Type(), bitsNeededFloat); + auto bitsNeeded = rewriter.create( + loc, rewriter.getIndexType(), bitsNeededInt); + + // bit reversal + auto bitReversalLoop = rewriter.create(loc, lb, ub, step); + rewriter.setInsertionPointToStart(bitReversalLoop.getBody()); + auto i = bitReversalLoop.getInductionVar(); + auto iInt = rewriter.create(loc, rewriter.getI64Type(), + i); // check here + + // Calculate reversed index + // auto zero = rewriter.create(loc, 0); + auto initialRevIndex = rewriter.create(loc, 0, 64); + + auto innerLoop = rewriter.create(loc, lb, bitsNeeded, step, + ValueRange{initialRevIndex}); + rewriter.setInsertionPointToStart(innerLoop.getBody()); + auto j = innerLoop.getInductionVar(); + auto jInt = + rewriter.create(loc, rewriter.getI64Type(), j); + auto carriedRevIndex = innerLoop.getRegionIterArgs()[0]; + + auto bitMask = rewriter.create( + loc, rewriter.create(loc, 1, 64), jInt); + auto iAndMask = rewriter.create(loc, iInt, bitMask); + auto isNonZero = rewriter.create( + loc, arith::CmpIPredicate::ne, iAndMask, + rewriter.create(loc, 0, 64)); + auto shiftAmount = rewriter.create( + loc, rewriter.create(loc, bitsNeeded, j), + rewriter.create(loc, 1)); + auto shiftAmountI64 = rewriter.create( + loc, rewriter.getI64Type(), shiftAmount); + auto bitToSet = rewriter.create( + loc, rewriter.create(loc, 1, 64), shiftAmountI64); + + // Update newRevIndex using a select operation + auto updatedRevIndex = rewriter.create( + loc, carriedRevIndex, + rewriter.create( + loc, isNonZero, bitToSet, + rewriter.create(loc, 0, 64))); + + // Yield the updated value to carry it forward + rewriter.create(loc, ValueRange{updatedRevIndex}); + + // auto revIndex = rewriter.create(loc, + // rewriter.getIndexType(), newRevIndex); + + rewriter.setInsertionPointAfter(innerLoop); + + auto finalRevIndex = innerLoop.getResult(0); + auto revIndex = rewriter.create( + loc, rewriter.getIndexType(), finalRevIndex); + + // Load from alloc_temp and store in alloc_reversed + auto realValue = rewriter.create(loc, input, ValueRange{i}); + auto imagValue = rewriter.create( + loc, llvm::APFloat(0.0), rewriter.getF64Type()); + rewriter.create(loc, realValue, alloc_reversed_real, + ValueRange{revIndex}); + rewriter.create(loc, imagValue, alloc_reversed_imag, + ValueRange{revIndex}); + + rewriter.setInsertionPointAfter(bitReversalLoop); + + // Cooley-Tukey FFT implementation + auto N = tensorType.getShape()[0]; + auto stages = static_cast(std::log2(N)); + auto stagesValue = rewriter.create(loc, stages); + + // Constants for complex arithmetic + auto pi = rewriter.create(loc, llvm::APFloat(M_PI), + rewriter.getF64Type()); + auto neg2 = rewriter.create( + loc, llvm::APFloat(-2.0), rewriter.getF64Type()); + + auto fftLoop = rewriter.create(loc, lb, stagesValue, step); + rewriter.setInsertionPointToStart(fftLoop.getBody()); + auto stage = fftLoop.getInductionVar(); + auto half_size = rewriter.create( + loc, rewriter.create(loc, 1), stage); + auto full_size = rewriter.create( + loc, half_size, rewriter.create(loc, 1)); + + auto outerLoop = rewriter.create(loc, lb, ub, full_size); + rewriter.setInsertionPointToStart(outerLoop.getBody()); + auto start = outerLoop.getInductionVar(); + + auto butterflyLoop = rewriter.create(loc, lb, half_size, step); + rewriter.setInsertionPointToStart(butterflyLoop.getBody()); + auto k = butterflyLoop.getInductionVar(); + + // Calculate indices for even and odd elements + auto even_index = rewriter.create(loc, start, k); + auto odd_index = rewriter.create(loc, even_index, half_size); + + // Calculate twiddle factor + auto k_i64 = + rewriter.create(loc, rewriter.getI64Type(), k); + auto k_f64 = + rewriter.create(loc, rewriter.getF64Type(), k_i64); + auto full_size_i64 = rewriter.create( + loc, rewriter.getI64Type(), full_size); + auto full_size_f64 = rewriter.create( + loc, rewriter.getF64Type(), full_size_i64); + auto angle_div = rewriter.create(loc, k_f64, full_size_f64); + auto angle_mul = rewriter.create(loc, neg2, angle_div); + auto angle_final = rewriter.create(loc, pi, angle_mul); + auto cos = rewriter.create(loc, angle_final); + auto sin = rewriter.create(loc, angle_final); + + // Load odd value + auto odd_real = rewriter.create(loc, alloc_reversed_real, + ValueRange{odd_index}); + auto odd_imag = rewriter.create(loc, alloc_reversed_imag, + ValueRange{odd_index}); + + // Multiply by twiddle factor + auto odd_real_cos = rewriter.create(loc, odd_real, cos); + auto odd_imag_sin = rewriter.create(loc, odd_imag, sin); + auto t_real = + rewriter.create(loc, odd_real_cos, odd_imag_sin); + + auto odd_real_sin = rewriter.create(loc, odd_real, sin); + auto odd_imag_cos = rewriter.create(loc, odd_imag, cos); + auto t_imag = + rewriter.create(loc, odd_real_sin, odd_imag_cos); + + // Load even value + auto even_real = rewriter.create(loc, alloc_reversed_real, + ValueRange{even_index}); + auto even_imag = rewriter.create(loc, alloc_reversed_imag, + ValueRange{even_index}); + // Butterfly operation + auto new_even_real = rewriter.create(loc, even_real, t_real); + auto new_even_imag = rewriter.create(loc, even_imag, t_imag); + auto new_odd_real = rewriter.create(loc, even_real, t_real); + auto new_odd_imag = rewriter.create(loc, even_imag, t_imag); + + // Calculate amplitude for even index + auto new_even_real_squared = + rewriter.create(loc, new_even_real, new_even_real); + auto new_even_imag_squared = + rewriter.create(loc, new_even_imag, new_even_imag); + auto sum_even = rewriter.create(loc, new_even_real_squared, + new_even_imag_squared); + auto sqrt_even = rewriter.create(loc, sum_even); + + // Calculate amplitude for odd index + auto new_odd_real_squared = + rewriter.create(loc, new_odd_real, new_odd_real); + auto new_odd_imag_squared = + rewriter.create(loc, new_odd_imag, new_odd_imag); + auto sum_odd = rewriter.create(loc, new_odd_real_squared, + new_odd_imag_squared); + auto sqrt_odd = rewriter.create(loc, sum_odd); + + // Store results + rewriter.create(loc, new_even_real, alloc_reversed_real, + ValueRange{even_index}); + rewriter.create(loc, new_even_imag, alloc_reversed_imag, + ValueRange{even_index}); + rewriter.create(loc, new_odd_real, alloc_reversed_real, + ValueRange{odd_index}); + rewriter.create(loc, new_odd_imag, alloc_reversed_imag, + ValueRange{odd_index}); + rewriter.create(loc, sqrt_even, alloc_amplitude, + ValueRange{even_index}); + rewriter.create(loc, sqrt_odd, alloc_amplitude, + ValueRange{odd_index}); + + // replace the operation with the final value + rewriter.replaceOp(op, alloc_amplitude); + return success(); + } +}; + +//===----------------------------------------------------------------------===// +// ToyToAffine RewritePatterns: DFTAbsOp operations +//===----------------------------------------------------------------------===// + +struct DFTAbsOpLowering : public ConversionPattern { + DFTAbsOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::DFTAbsOp::getOperationName(), 1, ctx) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + + auto loc = op->getLoc(); + + // Pseudo-code: + // y[k] = y_real[k] + j *y_img[k] + // y_real = sumOver_n(x[n]*cos[2*pi * k *n/N ] + // y_img = sumOver_n(x[n]*sin[2*pi * k *n/N ] * -1 + // init output mem for y_real & y_img as 0 + // iterate for output from k=0 to last + // iterate for all x from n=0 to last + // perform the calculations : ie x[n] * cos[2*pi * k *n/N ] and sum and + // store them at y[k] + // + // replace this upsampling op with the output_mem_allocation op + + // DEBUG_PRINT_NO_ARGS() ; + + // output for result type + auto tensorType = llvm::cast((*op->result_type_begin())); + // iterate to result1 --not needed for now but for future reference + // auto tensorType1 = + // llvm::cast(*std::next(op->result_type_begin(), 1)); + + // DEBUG_PRINT_NO_ARGS() ; + // tensorType.getShape()[0] + // llvm::errs() << "tensorType1.getShape()[0] " << tensorType1.getShape()[0] + // << " func= " << __func__ << "\n"; + + // allocation & deallocation for the result of this operation + auto memRefType = convertTensorToMemRef(tensorType); + // auto memRefType2 = convertTensorToMemRef(tensorType1); + auto alloc_real = insertAllocAndDealloc(memRefType, loc, rewriter); + auto alloc_img = insertAllocAndDealloc(memRefType, loc, rewriter); + auto alloc_mag = insertAllocAndDealloc(memRefType, loc, rewriter); + + // construct affine loops for the input + SmallVector lowerBounds(tensorType.getRank(), /*Value*/ 0); + SmallVector steps(tensorType.getRank(), /*Value=*/1); + + // affine.for %y = 0 to 4 { + // affine.store %cst_3, %alloc_real[%y] : memref<4xf64> + // affine.store %cst_3, %alloc_img[%y] : memref<4xf64> + // } + Value constant0 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + + // For loop -- iterate from 1 to last + int64_t lb = 0; + int64_t ub = tensorType.getShape()[0]; + int64_t step = 1; + + affine::AffineForOp forOp1 = + rewriter.create(loc, lb, ub, step); + auto iv = forOp1.getInductionVar(); + rewriter.setInsertionPointToStart(forOp1.getBody()); + rewriter.create(loc, constant0, alloc_real, ValueRange{iv}); + rewriter.create(loc, constant0, alloc_img, ValueRange{iv}); + rewriter.create(loc, constant0, alloc_mag, ValueRange{iv}); + rewriter.setInsertionPointAfter(forOp1); + + // loop for Y + affine::AffineForOp forOpY = + rewriter.create(loc, lb, ub, step); + auto ivY = forOpY.getInductionVar(); + rewriter.setInsertionPointToStart(forOpY.getBody()); + + // loop for X + affine::AffineForOp forOpX = + rewriter.create(loc, lb, ub, step); + auto ivX = forOpX.getInductionVar(); + rewriter.setInsertionPointToStart(forOpX.getBody()); + + // load from X, & y1 & y2 + DFTAbsOpAdaptor fft1DAdaptor(operands); + Value inputX = rewriter.create(loc, fft1DAdaptor.getInput(), + ValueRange{ivX}); + Value loadYReal = + rewriter.create(loc, alloc_real, ValueRange{ivY}); + Value loadYImg = + rewriter.create(loc, alloc_img, ValueRange{ivY}); + + // convert index to f64 + Value IndxY = rewriter.create( + loc, rewriter.getIntegerType(32), ivY); + Value k = + rewriter.create(loc, rewriter.getF64Type(), IndxY); + + Value IndxX = rewriter.create( + loc, rewriter.getIntegerType(32), ivX); + Value i = + rewriter.create(loc, rewriter.getF64Type(), IndxX); + + // get 2*pi * k * i / N + Value muli_k = rewriter.create(loc, k, i); + + Value const2pi = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(6.28318530718)); + Value mul2piKI = rewriter.create(loc, const2pi, muli_k); + + // getOperand().getType() + // auto inputTensorType = + // llvm::cast(op->getOperand(0).getType()); + float LengthOfInput = (float)ub; + Value N = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(LengthOfInput)); + // Value N = inputTensorType.getShape()[0]; + + Value divIndxByN = rewriter.create(loc, mul2piKI, N); + + // Real part = Sum(x[i] * cos(div) ) + Value GetCos = rewriter.create(loc, divIndxByN); + Value xMulCos = rewriter.create(loc, inputX, GetCos); + Value realSum = rewriter.create(loc, loadYReal, xMulCos); + rewriter.create(loc, realSum, alloc_real, ValueRange{ivY}); + + // Img part = -1 * Sum(x[i] * sin(div) ) + Value GetSin = rewriter.create(loc, divIndxByN); + Value xMulSin = rewriter.create(loc, inputX, GetSin); + Value imgSum = rewriter.create(loc, loadYImg, xMulSin); + + rewriter.create(loc, imgSum, alloc_img, ValueRange{ivY}); + rewriter.setInsertionPointAfter(forOpX); + Value final_real = + rewriter.create(loc, alloc_real, ValueRange{ivY}); + Value final_img = + rewriter.create(loc, alloc_img, ValueRange{ivY}); + + // Calculate amplitude + auto real_squared = + rewriter.create(loc, final_real, final_real); + auto img_squared = + rewriter.create(loc, final_img, final_img); + auto sum_odd = + rewriter.create(loc, real_squared, img_squared); + auto amplitude = rewriter.create(loc, sum_odd); + + // replace the operation with the final value + rewriter.create(loc, amplitude, alloc_mag, ValueRange{ivY}); + rewriter.setInsertionPointAfter(forOpY); + rewriter.replaceOp(op, alloc_mag); + return success(); + } +}; + +//===----------------------------------------------------------------------===// +// ToyToAffine RewritePatterns: DFTAbsThresholdUpOp operations +//===----------------------------------------------------------------------===// + +struct DFTAbsThresholdUpOpLowering : public ConversionPattern { + DFTAbsThresholdUpOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::DFTAbsThresholdUpOp::getOperationName(), 1, + ctx) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + + auto loc = op->getLoc(); + // output for result type + auto tensorType = llvm::cast((*op->result_type_begin())); + + // allocation & deallocation for the result of this operation + auto memRefType = convertTensorToMemRef(tensorType); + // auto memRefType2 = convertTensorToMemRef(tensorType1); + auto alloc_real = insertAllocAndDealloc(memRefType, loc, rewriter); + auto alloc_img = insertAllocAndDealloc(memRefType, loc, rewriter); + auto alloc_mag = insertAllocAndDealloc(memRefType, loc, rewriter); + + // construct affine loops for the input + SmallVector lowerBounds(tensorType.getRank(), /*Value*/ 0); + SmallVector steps(tensorType.getRank(), /*Value=*/1); + + Value constant0 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + + // For loop -- iterate from 1 to last + int64_t lb = 0; + int64_t ub = tensorType.getShape()[0]; + int64_t step = 1; + + affine::AffineForOp forOp1 = + rewriter.create(loc, lb, ub, step); + auto iv = forOp1.getInductionVar(); + rewriter.setInsertionPointToStart(forOp1.getBody()); + rewriter.create(loc, constant0, alloc_real, ValueRange{iv}); + rewriter.create(loc, constant0, alloc_img, ValueRange{iv}); + rewriter.create(loc, constant0, alloc_mag, ValueRange{iv}); + rewriter.setInsertionPointAfter(forOp1); + + // loop for Y + affine::AffineForOp forOpY = + rewriter.create(loc, lb, ub, step); + auto ivY = forOpY.getInductionVar(); + rewriter.setInsertionPointToStart(forOpY.getBody()); + + // loop for X + affine::AffineForOp forOpX = + rewriter.create(loc, lb, ub, step); + auto ivX = forOpX.getInductionVar(); + rewriter.setInsertionPointToStart(forOpX.getBody()); + + // load from X, & y1 & y2 + DFTAbsThresholdUpOpAdaptor dftAbsThresholdUpOp(operands); + Value inputX = rewriter.create( + loc, dftAbsThresholdUpOp.getInput(), ValueRange{ivX}); + Value loadYReal = + rewriter.create(loc, alloc_real, ValueRange{ivY}); + Value loadYImg = + rewriter.create(loc, alloc_img, ValueRange{ivY}); + + // convert index to f64 + Value IndxY = rewriter.create( + loc, rewriter.getIntegerType(32), ivY); + Value k = + rewriter.create(loc, rewriter.getF64Type(), IndxY); + + Value IndxX = rewriter.create( + loc, rewriter.getIntegerType(32), ivX); + Value i = + rewriter.create(loc, rewriter.getF64Type(), IndxX); + + // get 2*pi * k * i / N + Value muli_k = rewriter.create(loc, k, i); + + Value const2pi = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(6.28318530718)); + Value mul2piKI = rewriter.create(loc, const2pi, muli_k); + + // getOperand().getType() + // auto inputTensorType = + // llvm::cast(op->getOperand(0).getType()); + float LengthOfInput = (float)ub; + Value N = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(LengthOfInput)); + // Value N = inputTensorType.getShape()[0]; + + Value divIndxByN = rewriter.create(loc, mul2piKI, N); + + // Real part = Sum(x[i] * cos(div) ) + Value GetCos = rewriter.create(loc, divIndxByN); + Value xMulCos = rewriter.create(loc, inputX, GetCos); + Value realSum = rewriter.create(loc, loadYReal, xMulCos); + rewriter.create(loc, realSum, alloc_real, ValueRange{ivY}); + + // Img part = -1 * Sum(x[i] * sin(div) ) + Value GetSin = rewriter.create(loc, divIndxByN); + Value xMulSin = rewriter.create(loc, inputX, GetSin); + Value imgSum = rewriter.create(loc, loadYImg, xMulSin); + + rewriter.create(loc, imgSum, alloc_img, ValueRange{ivY}); + rewriter.setInsertionPointAfter(forOpX); + Value final_real = + rewriter.create(loc, alloc_real, ValueRange{ivY}); + Value final_img = + rewriter.create(loc, alloc_img, ValueRange{ivY}); + + // Calculate amplitude + auto real_squared = + rewriter.create(loc, final_real, final_real); + auto img_squared = + rewriter.create(loc, final_img, final_img); + auto sum_odd = + rewriter.create(loc, real_squared, img_squared); + auto amplitude = rewriter.create(loc, sum_odd); + + auto thresholdMemRef = dftAbsThresholdUpOp.getThreshold(); + auto returnOriginalMemRef = dftAbsThresholdUpOp.getReturnoriginal(); + + auto threshold = + rewriter.create(loc, thresholdMemRef, ValueRange{}); + auto returnOriginal = + rewriter.create(loc, returnOriginalMemRef, ValueRange{}); + Value constant00 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + + Value constant11 = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1)); + // Compare a[i] >= threshold + auto cmp1 = rewriter.create(loc, arith::CmpFPredicate::OGE, + amplitude, threshold); + // Compare if return original is true or false and return 1 or original + // value + auto cmpro = rewriter.create(loc, arith::CmpFPredicate::OEQ, + constant11, returnOriginal); + + // Use select to choose between inputX and 1 + auto selectreturn = + rewriter.create(loc, cmpro, amplitude, constant11); + + // Use select to choose between 0 and selectreturn + auto selectOp = + rewriter.create(loc, cmp1, selectreturn, constant00); + + // replace the operation with the final value + rewriter.create(loc, selectOp, alloc_mag, ValueRange{ivY}); + rewriter.setInsertionPointAfter(forOpY); + rewriter.replaceOp(op, alloc_mag); + return success(); + } +}; + + +struct CorrelateOpLowering : public ConversionPattern { + CorrelateOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::CorrelateOp::getOperationName(), 1, ctx) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + + auto loc = op->getLoc(); + + auto tensorType = llvm::cast((*op->result_type_begin())); + auto memRefType = convertTensorToMemRef(tensorType); + auto alloc_output = insertAllocAndDealloc(memRefType, loc, rewriter); + + typename dsp::CorrelateOp::Adaptor correlateOpAdaptor(operands); + + Value cst_idx_zero = rewriter.create(loc, 0); + Value cst_idx_one = rewriter.create(loc, 1); + + // ranked tensor type + auto inputType = + llvm::dyn_cast(op->getOperand(0).getType()); + + ArrayRef inputShape = inputType.getShape(); + + int64_t N = inputShape[0]; + + // First outer loop for k in range (0, N) + auto lb1 = rewriter.create(loc, 0); + auto ub1 = rewriter.create(loc, N); + auto step = rewriter.create(loc, 1); + + Value constant_N_minus_one = rewriter.create( + loc, rewriter.getIndexType(), rewriter.getIndexAttr(N-1)); + + auto floatMemRefType = MemRefType::get({}, rewriter.getF64Type()); + auto alloc_iter_sum = + insertAllocAndDealloc(floatMemRefType, loc, rewriter); + + Value constant_zero = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + + auto forOp1 = rewriter.create(loc, lb1, ub1, step); + auto k1 = forOp1.getInductionVar(); + rewriter.setInsertionPointToStart(forOp1.getBody()); + + rewriter.create(loc, constant_zero, alloc_iter_sum, ValueRange{}); + + Value lb1_inner = rewriter.create(loc, constant_N_minus_one, k1); + + auto forOp1_1 = rewriter.create(loc, lb1_inner, ub1, step); + auto iy1 = forOp1_1.getInductionVar(); + rewriter.setInsertionPointToStart(forOp1_1.getBody()); + + Value ix1 = rewriter.create(loc, iy1, lb1_inner); + Value loadedLhs = rewriter.create(loc, + correlateOpAdaptor.getLhs(), ValueRange{ix1}); + Value loadedRhs = rewriter.create(loc, + correlateOpAdaptor.getRhs(), ValueRange{iy1}); + Value mul1 = rewriter.create(loc, loadedLhs, loadedRhs); + + Value loaded_sum1 = rewriter.create(loc, + alloc_iter_sum, ValueRange{}); + + Value inter_sum1 = rewriter.create(loc, loaded_sum1, mul1); + + rewriter.create(loc, inter_sum1, alloc_iter_sum, ValueRange{}); + + rewriter.setInsertionPointAfter(forOp1_1); + + auto loaded_sum1_outer = rewriter.create(loc, + alloc_iter_sum, ValueRange{}); + rewriter.create(loc, loaded_sum1_outer, alloc_output, ValueRange{k1}); + + rewriter.setInsertionPointAfter(forOp1); + + // Second outer loop for k in range (N, 2*N-1) + auto ub2 = rewriter.create(loc, 2*N-1); + + //lb2 = ub1 + auto forOp2 = rewriter.create(loc, ub1, ub2, step); + auto k2 = forOp2.getInductionVar(); + rewriter.setInsertionPointToStart(forOp2.getBody()); + + rewriter.create(loc, constant_zero, alloc_iter_sum, ValueRange{}); + + Value lb2_inner = rewriter.create(loc, k2, constant_N_minus_one); + + //NOTE: ub = ub1 (N) + auto forOp2_1 = rewriter.create(loc, lb2_inner, ub1, step); + auto ix2 = forOp2_1.getInductionVar(); + rewriter.setInsertionPointToStart(forOp2_1.getBody()); + + Value iy2 = rewriter.create(loc, ix2, lb2_inner); + Value loadedLhs2 = rewriter.create(loc, + correlateOpAdaptor.getLhs(), ValueRange{ix2}); + Value loadedRhs2 = rewriter.create(loc, + correlateOpAdaptor.getRhs(), ValueRange{iy2}); + Value mul2 = rewriter.create(loc, loadedLhs2, loadedRhs2); + + Value loaded_sum2 = rewriter.create(loc, + alloc_iter_sum, ValueRange{}); + + Value inter_sum2 = rewriter.create(loc, loaded_sum2, mul2); + + rewriter.create(loc, inter_sum2, alloc_iter_sum, ValueRange{}); + + rewriter.setInsertionPointAfter(forOp2_1); + + auto loaded_sum2_outer = rewriter.create(loc, + alloc_iter_sum, ValueRange{}); + rewriter.create(loc, loaded_sum2_outer, alloc_output, ValueRange{k2}); + + rewriter.setInsertionPointAfter(forOp2); + + + rewriter.replaceOp(op, alloc_output); + + return success(); + } +}; + + +struct SetSingleElemAtIdxOpLowering : public ConversionPattern { + SetSingleElemAtIdxOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::SetSingleElemAtIdxOp::getOperationName(), 1, ctx) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + auto loc = op->getLoc(); + + // output for result type + SetSingleElemAtIdxOpAdaptor setSingleElemAtIdxAdaptor(operands); + auto tensorType = llvm::cast((*op->result_type_begin())); + + // allocation & deallocation for the result of this operation + auto memRefType = convertTensorToMemRef(tensorType); + auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); + + + auto indxArgType = + llvm::dyn_cast(op->getOperand(1).getType()); + + int indxArgShape = indxArgType.getShape().size(); + + ValueRange indexValueRange; + + Value cst_idx_zero = rewriter.create(loc, 0); + + if (indxArgShape == 0) + indexValueRange = ValueRange{}; + else + indexValueRange = ValueRange{cst_idx_zero}; + + Value loadedIndx = rewriter.create( + loc, setSingleElemAtIdxAdaptor.getIndx(), indexValueRange); + + // f64 to index + Value indx_ui = rewriter.create( + loc, rewriter.getIntegerType(32), loadedIndx); + Value indx_index = rewriter.create( + loc, rewriter.getIndexType(), indx_ui); + + ValueRange valValueRange; + + if (indxArgShape == 0) + valValueRange = ValueRange{}; + else + valValueRange = ValueRange{cst_idx_zero}; + + Value loadedVal = rewriter.create( + loc, setSingleElemAtIdxAdaptor.getVal(), valValueRange); + + rewriter.create(loc, loadedVal, + setSingleElemAtIdxAdaptor.getInput(), + ValueRange{indx_index}); + + rewriter.replaceOp(op, alloc); + + return success(); + } +}; + + + +struct Correl2MaxOptimizedOpLowering : public ConversionPattern { + Correl2MaxOptimizedOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::Correl2MaxOptimizedOp::getOperationName(), 1, ctx) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + + auto loc = op->getLoc(); + + auto tensorType = llvm::cast((*op->result_type_begin())); + auto memRefType = convertTensorToMemRef(tensorType); + auto alloc_output = insertAllocAndDealloc(memRefType, loc, rewriter); + + typename dsp::Correl2MaxOptimizedOp::Adaptor correl2MaxOpAdaptor(operands); + + Value cst_idx_zero = rewriter.create(loc, 0); + Value cst_idx_one = rewriter.create(loc, 1); + + // ranked tensor type + auto inputType = + llvm::dyn_cast(op->getOperand(0).getType()); + + ArrayRef inputShape = inputType.getShape(); + + int64_t N = inputShape[0]; + + // First outer loop for k in range (0, N) + auto lb1 = rewriter.create(loc, 0); + auto ub1 = rewriter.create(loc, N); + auto step = rewriter.create(loc, 1); + + Value constant_N_minus_one = rewriter.create( + loc, rewriter.getIndexType(), rewriter.getIndexAttr(N-1)); + + auto floatMemRefType = MemRefType::get({}, rewriter.getF64Type()); + auto alloc_iter_sum = + insertAllocAndDealloc(floatMemRefType, loc, rewriter); + + Value constant_zero = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + + rewriter.create(loc, constant_zero, alloc_output, ValueRange{}); + + auto forOp1 = rewriter.create(loc, lb1, ub1, step); + auto k1 = forOp1.getInductionVar(); + rewriter.setInsertionPointToStart(forOp1.getBody()); + + rewriter.create(loc, constant_zero, alloc_iter_sum, ValueRange{}); + + Value lb1_inner = rewriter.create(loc, constant_N_minus_one, k1); + + auto forOp1_1 = rewriter.create(loc, lb1_inner, ub1, step); + auto iy1 = forOp1_1.getInductionVar(); + rewriter.setInsertionPointToStart(forOp1_1.getBody()); + + Value ix1 = rewriter.create(loc, iy1, lb1_inner); + Value loadedLhs = rewriter.create(loc, + correl2MaxOpAdaptor.getLhs(), ValueRange{ix1}); + Value loadedRhs = rewriter.create(loc, + correl2MaxOpAdaptor.getRhs(), ValueRange{iy1}); + Value mul1 = rewriter.create(loc, loadedLhs, loadedRhs); + + Value loaded_sum1 = rewriter.create(loc, + alloc_iter_sum, ValueRange{}); + + Value inter_sum1 = rewriter.create(loc, loaded_sum1, mul1); + + rewriter.create(loc, inter_sum1, alloc_iter_sum, ValueRange{}); + + rewriter.setInsertionPointAfter(forOp1_1); + + auto loaded_sum1_outer = rewriter.create(loc, + alloc_iter_sum, ValueRange{}); + auto loaded_output1 = rewriter.create(loc, + alloc_output, ValueRange{}); + + // If this is larger than current max, we need to change max + auto compare_sum1_output1 = rewriter.create( + loc, arith::CmpFPredicate::OGT, loaded_sum1_outer, loaded_output1); + + auto ifOp1 = rewriter.create(loc, compare_sum1_output1, false); + + rewriter.setInsertionPointToStart(ifOp1.thenBlock()); + + rewriter.create(loc, loaded_sum1_outer, alloc_output, ValueRange{}); + + rewriter.setInsertionPointAfter(forOp1); + + // Second outer loop for k in range (N, 2*N-1) + auto ub2 = rewriter.create(loc, 2*N-1); + + //lb2 = ub1 + auto forOp2 = rewriter.create(loc, ub1, ub2, step); + auto k2 = forOp2.getInductionVar(); + rewriter.setInsertionPointToStart(forOp2.getBody()); + + rewriter.create(loc, constant_zero, alloc_iter_sum, ValueRange{}); + + Value lb2_inner = rewriter.create(loc, k2, constant_N_minus_one); + + //NOTE: ub = ub1 (N) + auto forOp2_1 = rewriter.create(loc, lb2_inner, ub1, step); + auto ix2 = forOp2_1.getInductionVar(); + rewriter.setInsertionPointToStart(forOp2_1.getBody()); + + Value iy2 = rewriter.create(loc, ix2, lb2_inner); + Value loadedLhs2 = rewriter.create(loc, + correl2MaxOpAdaptor.getLhs(), ValueRange{ix2}); + Value loadedRhs2 = rewriter.create(loc, + correl2MaxOpAdaptor.getRhs(), ValueRange{iy2}); + Value mul2 = rewriter.create(loc, loadedLhs2, loadedRhs2); + + Value loaded_sum2 = rewriter.create(loc, + alloc_iter_sum, ValueRange{}); + + Value inter_sum2 = rewriter.create(loc, loaded_sum2, mul2); + + rewriter.create(loc, inter_sum2, alloc_iter_sum, ValueRange{}); + + rewriter.setInsertionPointAfter(forOp2_1); + + auto loaded_sum2_outer = rewriter.create(loc, + alloc_iter_sum, ValueRange{}); + auto loaded_output2 = rewriter.create(loc, + alloc_output, ValueRange{}); + + // If this is larger than current max, we need to change max + auto compare_sum2_output2 = rewriter.create( + loc, arith::CmpFPredicate::OGT, loaded_sum2_outer, loaded_output2); + + auto ifOp2 = rewriter.create(loc, compare_sum2_output2, false); + + rewriter.setInsertionPointToStart(ifOp2.thenBlock()); + + rewriter.create(loc, loaded_sum2_outer, alloc_output, ValueRange{}); + + + rewriter.setInsertionPointAfter(forOp2); + + + rewriter.replaceOp(op, alloc_output); + + return success(); + } +}; + + + + +//===----------------------------------------------------------------------===// +// ToyToAffine RewritePatterns: lmsFilterResponse operations +//===----------------------------------------------------------------------===// + +struct LMSFilterResponse2GainOpLowering : public ConversionPattern { + LMSFilterResponse2GainOpLowering(MLIRContext *ctx) + : ConversionPattern(dsp::LMSFilterResponse2GainOp::getOperationName(), 1, + ctx) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + auto loc = op->getLoc(); + + // Pseudo-code: + // for (int n = 0; n < NUM_SAMPLES; n++) { + // // we also need to initialize w + // // w[n] = 0; + // // Calculate the filter output y[n] + // y[n] = 0; + // for (int i = 0; i < FILTER_LENGTH; i++) { + // if (n - i >= 0) { // affine if + // y[n] = y[n] + (w[i] * x[n - i]); + // } + // } + // // Calculate the error e[n] + // e[n] = d[n] - y[n]; + // y[n] = y[n] * gain; + // // Update the filter weights w[i] + // for (int i = 0; i < FILTER_LENGTH; i++) { + // if (n - i >= 0) { + // w[i] += MU * e[n] * x[n - i]; + // } + // } + // } + + auto tensorType = llvm::cast((*op->result_type_begin())); + + // allocation & deallocation for the result of this operation + auto memRefType = convertTensorToMemRef(tensorType); + auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter); + + // construct affine loops for the input + SmallVector lowerBounds(tensorType.getRank(), /*Value*/ 0); + SmallVector steps(tensorType.getRank(), /*Value=*/1); + + LMSFilterResponse2GainOpAdaptor lmsFilterResponse2GainAdaptor(operands); + // Value alpha = rewriter.create(loc, + // rewriter.getF64Type(), + // rewriter.getF64FloatAttr(1)); + Value zeroval = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + Value mu = rewriter.create(loc, lmsFilterResponse2GainAdaptor.getMu()); + + // Before for loop, load the gain value + Value gain = rewriter.create(loc, lmsFilterResponse2GainAdaptor.getGain()); + + // For loop -- iterate from 0 to last + int64_t lb = 0; + int64_t numSamples = tensorType.getShape()[0]; + int64_t step = 1; + + Value GetFilterLOp = op->getOperand(3); + dsp::ConstantOp constantOp3rdArg = + GetFilterLOp.getDefiningOp(); + DenseElementsAttr constant3rdValue = constantOp3rdArg.getValue(); + ; + auto elements1 = constant3rdValue.getValues(); + float filterlenval = elements1[0].getValueAsDouble(); + auto FilterLength = (uint64_t)filterlenval; + + auto yMemRefType = MemRefType::get({numSamples}, rewriter.getF64Type()); + auto wAlloc = rewriter.create(loc, yMemRefType); + + affine::AffineForOp forOp1 = + rewriter.create(loc, lb, numSamples, step); + auto iv = forOp1.getInductionVar(); + + rewriter.setInsertionPointToStart(forOp1.getBody()); + + // For affine expression: #map1 = affine_map<(%arg0)[] : (%arg0 - 1) + AffineExpr d0, d1, s0; + bindDims(rewriter.getContext(), d0, d1); + // AffineExpr ExprForXSlice = rewriter.getAffineDimExpr(0) - + // rewriter.getAffineDimExpr(1); //d0 - d1; + AffineExpr ExprForXSlice = d0 - d1; + AffineMap addMapForLMSFilter = AffineMap::get(2, 0, ExprForXSlice); + IntegerSet set1 = IntegerSet::get(2, 0, {ExprForXSlice}, {false}); + + // w[n] = 0; + // y[n] = 0; + // rewriter.create(loc, zeroval, alloc, ValueRange{iv}); + // Allocate and initialize array for y + // Value constantIndx0 = rewriter.create(loc, 0); + + rewriter.create(loc, zeroval, wAlloc, ValueRange{iv}); + rewriter.create(loc, zeroval, alloc, ValueRange{iv}); + + affine::AffineForOp forOp2 = + rewriter.create(loc, lb, FilterLength, step); + auto iv2 = forOp2.getInductionVar(); + + rewriter.setInsertionPointToStart(forOp2.getBody()); + + auto ifOp = rewriter.create( + loc, set1, ValueRange{iv, iv2}, false /*no else*/); + rewriter.setInsertionPointToStart(ifOp.getThenBlock()); + + Value inputX = + rewriter.create(loc, lmsFilterResponse2GainAdaptor.getLhs(), + addMapForLMSFilter, ValueRange{iv, iv2}); + Value w = rewriter.create(loc, wAlloc, + ValueRange{iv2}); // memRefType + + Value wmulx = rewriter.create(loc, inputX, w); + Value ybefore = rewriter.create(loc, alloc, ValueRange{iv}); + Value sumNext = rewriter.create(loc, wmulx, ybefore); + rewriter.create(loc, sumNext, alloc, ValueRange{iv}); + rewriter.setInsertionPointAfter(ifOp); + rewriter.setInsertionPointAfter(forOp2); + + // get e[n] = d[n] - y[n] + + Value desiredX = rewriter.create( + loc, lmsFilterResponse2GainAdaptor.getRhs(), ValueRange{iv}); + Value ynew = rewriter.create(loc, alloc, ValueRange{iv}); + + Value err = rewriter.create(loc, desiredX, ynew); + + // y[n] = y[n] * gain for fusion + Value ynewGain = rewriter.create(loc, ynew, gain); + rewriter.create(loc, ynewGain, alloc, ValueRange{iv}); + + + affine::AffineForOp forOp3 = + rewriter.create(loc, lb, FilterLength, step); + auto iv3 = forOp3.getInductionVar(); + + rewriter.setInsertionPointToStart(forOp3.getBody()); + + auto ifOp2 = rewriter.create( + loc, set1, ValueRange{iv, iv3}, false /*no else*/); + rewriter.setInsertionPointToStart(ifOp2.getThenBlock()); + + Value inputX2 = + rewriter.create(loc, lmsFilterResponse2GainAdaptor.getLhs(), + addMapForLMSFilter, ValueRange{iv, iv3}); + + Value Prevw2 = rewriter.create(loc, wAlloc, ValueRange{iv3}); + + // f(u(n),e(n),μ)=μe(n)u∗(n) + Value mul1 = rewriter.create(loc, err, inputX2); + Value mul2 = rewriter.create(loc, mu, mul1); + + // FInal w[n] + Value answer = rewriter.create(loc, Prevw2, mul2); + + rewriter.create(loc, answer, wAlloc, ValueRange{iv3}); + rewriter.setInsertionPointAfter(ifOp2); + rewriter.setInsertionPointAfter(forOp3); + + rewriter.setInsertionPointAfter(forOp1); + // debug + // forOp1->dump(); + + rewriter.replaceOp(op, alloc); + + return success(); + } +}; + + +// namespace //===----------------------------------------------------------------------===// // ToyToAffineLoweringPass @@ -6235,9 +12082,9 @@ struct ToyToAffineLoweringPass MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(ToyToAffineLoweringPass) void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); + registry + .insert(); } void runOnOperation() final; }; @@ -6271,23 +12118,42 @@ void ToyToAffineLoweringPass::runOnOperation() { // Now that the conversion target has been defined, we just need to provide // the set of patterns that will lower the Toy operations. RewritePatternSet patterns(&getContext()); - patterns.add( - &getContext()); + patterns.add< + AddOpLowering, ModuloOpLowering, ConstantOpLowering, FuncOpLowering, + MulOpLowering, PrintOpLowering, ReturnOpLowering, TransposeOpLowering, + DelayOpLowering, GainOpLowering, SubOpLowering, + FIRFilterResponseOpLowering, SlidingWindowAvgOpLowering, + DownSamplingOpLowering, UpSamplingOpLowering, + LowPassFilter1stOrderOpLowering, HighPassFilterOpLowering, + FFT1DOpLowering, IFFT1DOpLowering, HammingWindowOpLowering, DCTOpLowering, + filterOpLowering, DivOpLowering, BitwiseAndOpLowering, PowOpLowering, + zeroCrossCountOpLowering, SumOpLowering, SinOpLowering, CosOpLowering, + SquareOpLowering, FFT1DRealOpLowering, FFT1DImgOpLowering, SincOpLowering, + GetElemAtIndxOpLowering, SetElemAtIndxOpLowering, + LowPassFIRFilterOpLowering, HighPassFIRFilterOpLowering, + GetRangeOfVectorOpLowering, FIRFilterHammingOptimizedOpLowering, + HighPassFIRHammingOptimizedOpLowering, LMSFilterOpLowering, + ThresholdOpLowering, QuantizationOpLowering, LMSFilterResponseOpLowering, + RunLenEncodingOpLowering, FIRFilterResSymmOptimizedOpLowering, + LengthOpLowering, ReverseInputOpLowering, PaddingOpLowering, + FIRFilterYSymmOptimizedOpLowering, FFT1DRealSymmOpLowering, + FFT1DImgConjSymmOpLowering, FFTRealOpLowering, FFTImagOpLowering, + Conv2DOpLowering, ShiftRightOpLowering, MatmulOpLowering, + ThresholdUpOpLowering, QamModulateRealOpLowering, + QamModulateImgOpLowering, QamDemodulateOpLowering, FindPeaksOpLowering, + BeamFormOpLowering, SpaceModulateOpLowering, SpaceDemodulateOpLowering, + SpaceErrCorrectionOpLowering, FindPeaksOpLowering, MaxOpLowering, + MeanOpLowering, DiffOpLowering, GetSingleElemAtIdxOpLowering, + Diff2MeanOptimizedOpLowering, Median2SlidingOptimizedOpLowering, + NormalizeOpLowering, AbsOpLowering, MedianFilterOpLowering, + LMS2FindPeaksOptimizedOpLowering, FindPeaks2Diff2MeanOptimizedOpLowering, + NormLMSFilterResponseOptimizeOpLowering, + FIRFilterResSymmThresholdUpOptimizedOpLowering, FFTCombineOpLowering, + GenerateDTMFOpLowering, GenerateVoiceSignatureOpLowering, SqrtOpLowering, + FFTFreqOpLowering, FindDominantPeaksOpLowering, + RecoverDTMFDigitOpLowering, FFTOpLowering, FFTAbsOpLowering, + DFTAbsOpLowering, DFTAbsThresholdUpOpLowering, ArgMaxOpLowering, CorrelateOpLowering, + SetSingleElemAtIdxOpLowering, Correl2MaxOptimizedOpLowering, LMSFilterResponse2GainOpLowering>(&getContext()); // With the target and rewrite patterns defined, we can now attempt the // conversion. The conversion will signal failure if any of our `illegal` diff --git a/mlir/examples/dsp/SimpleBlocks/mlir/MLIRGen.cpp b/mlir/examples/dsp/SimpleBlocks/mlir/MLIRGen.cpp index 24017a99f81e..98a43ca2cb1b 100644 --- a/mlir/examples/dsp/SimpleBlocks/mlir/MLIRGen.cpp +++ b/mlir/examples/dsp/SimpleBlocks/mlir/MLIRGen.cpp @@ -15,25 +15,25 @@ #include "toy/AST.h" #include "toy/Dialect.h" - -#include "mlir/IR/Block.h" -#include "mlir/IR/Diagnostics.h" -#include "mlir/IR/Value.h" -#include "mlir/Support/LogicalResult.h" #include "mlir/IR/Attributes.h" +#include "mlir/IR/Block.h" #include "mlir/IR/Builders.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/Diagnostics.h" #include "mlir/IR/MLIRContext.h" +#include "mlir/IR/Value.h" #include "mlir/IR/Verifier.h" +#include "mlir/Support/LogicalResult.h" #include "toy/Lexer.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopedHashTable.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/Support/raw_ostream.h" +#include #include #include #include @@ -218,6 +218,8 @@ class MLIRGenImpl { return builder.create(location, lhs, rhs); case '-': return builder.create(location, lhs, rhs); + case '^': + return builder.create(location, lhs, rhs); } emitError(location, "invalid binary operator '") << binop.getOp() << "'"; @@ -332,6 +334,16 @@ class MLIRGenImpl { // Builtin calls have their custom operation, meaning this is a // straightforward emission. + + if (callee == "bitwiseand") { + if (call.getArgs().size() != 2) { + emitError(location, "MLIR codegen encountered an error: dsp.bitwiseand " + "accepts only 2 arguments"); + return nullptr; + } + return builder.create(location, operands[0], operands[1]); + } + if (callee == "transpose") { if (call.getArgs().size() != 1) { emitError(location, "MLIR codegen encountered an error: dsp.transpose " @@ -342,99 +354,263 @@ class MLIRGenImpl { } // - if(callee == "delay"){ - if(call.getArgs().size() != 2){ + if (callee == "delay") { + if (call.getArgs().size() != 2) { emitError(location, "MLIR codegen encountered an error: dsp.delay " "accepts only 2 arguments"); return nullptr; } - return builder.create(location, operands[0] , operands[1]); + return builder.create(location, operands[0], operands[1]); } - if(callee == "gain"){ - if(call.getArgs().size() != 2){ + if (callee == "gain") { + if (call.getArgs().size() != 2) { emitError(location, "MLIR codegen encountered an error: dsp.gain " "accepts only 2 arguments"); return nullptr; } - return builder.create(location, operands[0] , operands[1]); + return builder.create(location, operands[0], operands[1]); } // Sub Op - if(callee == "sub"){ + if (callee == "sub") { + if (call.getArgs().size() != 2) { + emitError(location, "MLIR codegen encountered an error: dsp.sub " + "accepts only 2 arguments"); + return nullptr; + } + return builder.create(location, operands[0], operands[1]); + } + if(callee == "pow"){ if(call.getArgs().size() != 2){ - emitError(location, "MLIR codegen encountered an error: dsp.sub " + emitError(location, "MLIR codegen encountered an error: dsp.pow " "accepts only 2 arguments"); return nullptr; } - return builder.create(location, operands[0], operands[1]); + return builder.create(location, operands[0], operands[1]); } - if(callee == "zeroCrossCount"){ - if(call.getArgs().size() != 1){ - emitError(location, "MLIR codegen encountered an error: dsp.zeroCrossCount " - "accepts only 1 arguments"); + + // Modulo Op + if (callee == "modulo") { + if (call.getArgs().size() != 2) { + emitError(location, "MLIR codegen encountered an error: dsp.modulo " + "accepts only 2 arguments"); return nullptr; } - return builder.create(location, operands[0]); + return builder.create(location, operands[0], operands[1]); } - if(callee == "FIRFilterResponse"){ - if(call.getArgs().size() != 2){ - emitError(location, "MLIR codegen encountered an error: dsp.FIRFilterResponse " - "accepts only 2 arguments"); + if (callee == "fftReal") { + if (call.getArgs().size() != 1) { + emitError(location, + "MLIR codegen encountered an error: dsp.zeroCrossCount " + "accepts only 1 arguments"); return nullptr; } - return builder.create(location, operands[0] , operands[1]); + return builder.create(location, operands[0]); } - if(callee == "slidingWindowAvg"){ - if(call.getArgs().size() != 1){ - emitError(location, "MLIR codegen encountered an error: dsp.slidingWindowAvg " - "accepts only 1 arguments"); + if (callee == "fftImag") { + if (call.getArgs().size() != 1) { + emitError(location, + "MLIR codegen encountered an error: dsp.zeroCrossCount " + "accepts only 1 arguments"); + return nullptr; + } + return builder.create(location, operands[0]); + } + + // FindPeaks Op + if (callee == "find_peaks") { + if (call.getArgs().size() != 3) { + emitError(location, + "MLIR codegen encountered an error: dsp.find_peaks " + "accepts only 3 arguments: signal, height, and distance"); + return nullptr; + } + return builder.create(location, operands[0], operands[1], + operands[2]); + } + + // Max Op + if (callee == "max") { + if (call.getArgs().size() != 1) { + emitError(location, "MLIR codegen encountered an error: dsp.max " + "accepts only 1 argument."); + return nullptr; + } + return builder.create(location, operands[0]); + } + + // Mean Op + if (callee == "mean") { + if (call.getArgs().size() != 2) { + emitError(location, "MLIR codegen encountered an error: dsp.mean " + "accepts only 2 arguments: input tensor, length"); + return nullptr; + } + return builder.create(location, operands[0], operands[1]); + } + + // Diff Op + if (callee == "diff") { + if (call.getArgs().size() != 2) { + emitError(location, "MLIR codegen encountered an error: dsp.diff " + "accepts only 2 arguments: input tensor, legnth"); + return nullptr; + } + return builder.create(location, operands[0], operands[1]); + } + + // Abs Op + if(callee == "abs") { + if (call.getArgs().size() != 1) { + emitError(location, "MLIR codegen encountered an error: dsp.abs " + "accepts only 1 arguments: input tensor."); + return nullptr; + } + return builder.create(location, operands[0]); + } + + // ArgMax Op + if(callee == "argmax") { + if (call.getArgs().size() != 2) { + emitError(location, "MLIR codegen encountered an error: dsp.argmax " + "accepts only 2 arguments: input tensor, axis."); return nullptr; } - return builder.create(location, operands[0] ); + + auto axisOp = operands[1].getDefiningOp(); + auto axisVal = axisOp.getValue().getValues(); + double axis = axisVal[0].getValueAsDouble(); + + return builder.create(location, operands[0], axis); } - if(callee == "downsampling"){ - if(call.getArgs().size() != 2){ - emitError(location, "MLIR codegen encountered an error: dsp.downsampling " + // Normalize Op + if (callee == "normalize") { + if (call.getArgs().size() != 1) { + emitError(location, "MLIR codegen encountered an error: dsp.normalize " + "accepts only 1 arguments: input tensor"); + return nullptr; + } + return builder.create(location, operands[0]); + } + + // Normalize LMS filter Op + if (callee == "norm_LMSFilterResponse_opt") { + if (call.getArgs().size() != 4) { + emitError(location, "MLIR codegen encountered an error: dsp.norm_LMSFilterResponse_opt " + "accepts 4 arguments "); + return nullptr; + } + return builder.create(location, operands[0], operands[1], operands[2], operands[3]); + } + + // Shift right Op + if (callee == "shiftRight") { + if (call.getArgs().size() != 2) { + emitError(location, "MLIR codegen encountered an error: dsp.shiftRight " "accepts only 2 arguments"); return nullptr; } - return builder.create(location, operands[0] , operands[1]); + return builder.create(location, operands[0], operands[1]); } - if(callee == "upsampling"){ - if(call.getArgs().size() != 2){ - emitError(location, "MLIR codegen encountered an error: dsp.upsampling " + // Matmul Op + if (callee == "matmul") { + if (call.getArgs().size() != 2) { + emitError(location, "MLIR codegen encountered an error: dsp.matmul " "accepts only 2 arguments"); return nullptr; } - return builder.create(location, operands[0] , operands[1]); + return builder.create(location, operands[0], operands[1]); } - if(callee == "lowPassFilter"){ - if(call.getArgs().size() != 2){ - emitError(location, "MLIR codegen encountered an error: dsp.lowPassFilter " + if (callee == "zeroCrossCount") { + if (call.getArgs().size() != 1) { + emitError(location, + "MLIR codegen encountered an error: dsp.zeroCrossCount " + "accepts only 1 arguments"); + return nullptr; + } + return builder.create(location, operands[0]); + } + + if (callee == "FIRFilterResponse") { + if (call.getArgs().size() != 2) { + emitError(location, + "MLIR codegen encountered an error: dsp.FIRFilterResponse " + "accepts only 2 arguments"); + return nullptr; + } + return builder.create(location, operands[0], + operands[1]); + } + + if (callee == "medianFilter") { + if (call.getArgs().size() != 1) { + emitError(location, + "MLIR codegen encountered an error: dsp.medianFilter " + "accepts only 1 argument"); + return nullptr; + } + return builder.create(location, operands[0]); + } + + if (callee == "slidingWindowAvg") { + if (call.getArgs().size() != 1) { + emitError(location, + "MLIR codegen encountered an error: dsp.slidingWindowAvg " + "accepts only 1 arguments"); + return nullptr; + } + return builder.create(location, operands[0]); + } + + if (callee == "downsampling") { + if (call.getArgs().size() != 2) { + emitError(location, + "MLIR codegen encountered an error: dsp.downsampling " + "accepts only 2 arguments"); + return nullptr; + } + return builder.create(location, operands[0], operands[1]); + } + + if (callee == "upsampling") { + if (call.getArgs().size() != 2) { + emitError(location, "MLIR codegen encountered an error: dsp.upsampling " "accepts only 2 arguments"); return nullptr; } - return builder.create(location, operands[0] , operands[1]); + return builder.create(location, operands[0], operands[1]); } - if(callee == "highPassFilter"){ - if(call.getArgs().size() != 1){ - emitError(location, "MLIR codegen encountered an error: dsp.highPassFilter " - "accepts only 1 arguments"); + if (callee == "lowPassFilter") { + if (call.getArgs().size() != 2) { + emitError(location, + "MLIR codegen encountered an error: dsp.lowPassFilter " + "accepts only 2 arguments"); + return nullptr; + } + return builder.create(location, operands[0], + operands[1]); + } + + if (callee == "highPassFilter") { + if (call.getArgs().size() != 1) { + emitError(location, + "MLIR codegen encountered an error: dsp.highPassFilter " + "accepts only 1 arguments"); return nullptr; } - return builder.create(location, operands[0] ); + return builder.create(location, operands[0]); } - if(callee == "fft1d"){ - if(call.getArgs().size() != 1){ + if (callee == "fft1d") { + if (call.getArgs().size() != 1) { emitError(location, "MLIR codegen encountered an error: dsp.fft1d " "accepts only 1 arguments"); return nullptr; @@ -442,26 +618,26 @@ class MLIRGenImpl { // return builder.create(location, operands[0] ); } - if(callee == "fft1dreal"){ - if(call.getArgs().size() != 1){ + if (callee == "fft1dreal") { + if (call.getArgs().size() != 1) { emitError(location, "MLIR codegen encountered an error: dsp.fft1dreal " "accepts only 1 arguments"); return nullptr; } - return builder.create(location, operands[0] ); + return builder.create(location, operands[0]); } - if(callee == "fft1dimg"){ - if(call.getArgs().size() != 1){ + if (callee == "fft1dimg") { + if (call.getArgs().size() != 1) { emitError(location, "MLIR codegen encountered an error: dsp.fft1dimg " "accepts only 1 arguments"); return nullptr; } - return builder.create(location, operands[0] ); + return builder.create(location, operands[0]); } - if(callee == "ifft1d"){ - if(call.getArgs().size() != 2){ + if (callee == "ifft1d") { + if (call.getArgs().size() != 2) { emitError(location, "MLIR codegen encountered an error: dsp.ifft1d " "accepts only 1 arguments"); return nullptr; @@ -469,264 +645,578 @@ class MLIRGenImpl { return builder.create(location, operands[0], operands[1]); } - if(callee == "hamming"){ - if(call.getArgs().size() != 1){ + if (callee == "hamming") { + if (call.getArgs().size() != 1) { emitError(location, "MLIR codegen encountered an error: dsp.hamming " "accepts only 1 arguments"); return nullptr; } - return builder.create(location, operands[0] ); + return builder.create(location, operands[0]); } - if(callee == "dct"){ - if(call.getArgs().size() != 1){ + if (callee == "dct") { + if (call.getArgs().size() != 1) { emitError(location, "MLIR codegen encountered an error: dsp.dct " "accepts only 1 arguments"); return nullptr; } - return builder.create(location, operands[0] ); + return builder.create(location, operands[0]); } - if(callee == "filter"){ - if(call.getArgs().size() != 3){ + if (callee == "filter") { + if (call.getArgs().size() != 3) { emitError(location, "MLIR codegen encountered an error: dsp.filter " "accepts only 1 arguments"); return nullptr; } - return builder.create(location, operands[0],operands[1], operands[2] ); + return builder.create(location, operands[0], operands[1], + operands[2]); } - if(callee == "div"){ - if(call.getArgs().size() != 2){ + if (callee == "div") { + if (call.getArgs().size() != 2) { emitError(location, "MLIR codegen encountered an error: dsp.div " "accepts only 2 arguments"); return nullptr; } - return builder.create(location, operands[0] , operands[1]); + return builder.create(location, operands[0], operands[1]); } - if(callee == "sum"){ - if(call.getArgs().size() != 1){ + if (callee == "sum") { + if (call.getArgs().size() != 1) { emitError(location, "MLIR codegen encountered an error: dsp.sum " "accepts only 1 arguments"); return nullptr; } - return builder.create(location, operands[0] ); + return builder.create(location, operands[0]); } - if(callee == "sin"){ - if(call.getArgs().size() != 1){ - emitError(location, "MLIR codegen encountered an error: dsp.sin " - "accepts only 1 arguments"); - return nullptr; - } - return builder.create(location, operands[0] ); - } + if (callee == "sin") { + if (call.getArgs().size() != 1) { + emitError(location, "MLIR codegen encountered an error: dsp.sin " + "accepts only 1 arguments"); + return nullptr; + } + return builder.create(location, operands[0]); + } - if(callee == "cos"){ - if(call.getArgs().size() != 1){ - emitError(location, "MLIR codegen encountered an error: dsp.cos " - "accepts only 1 arguments"); - return nullptr; - } - return builder.create(location, operands[0] ); - } + if (callee == "cos") { + if (call.getArgs().size() != 1) { + emitError(location, "MLIR codegen encountered an error: dsp.cos " + "accepts only 1 arguments"); + return nullptr; + } + return builder.create(location, operands[0]); + } - if(callee == "square"){ - if(call.getArgs().size() != 1){ + if (callee == "square") { + if (call.getArgs().size() != 1) { emitError(location, "MLIR codegen encountered an error: dsp.square " "accepts only 1 arguments"); return nullptr; } - return builder.create(location, operands[0] ); + return builder.create(location, operands[0]); } // Sinc Op - if(callee == "sinc"){ - if(call.getArgs().size() != 2){ - emitError(location, "MLIR codegen encountered an error: dsp.sinc " - "accepts only 2 arguments"); - return nullptr; - } - return builder.create(location, operands[0], operands[1]); + if (callee == "sinc") { + if (call.getArgs().size() != 2) { + emitError(location, "MLIR codegen encountered an error: dsp.sinc " + "accepts only 2 arguments"); + return nullptr; + } + return builder.create(location, operands[0], operands[1]); } // Get Elem At Op - if(callee == "getElemAtIndx"){ - if(call.getArgs().size() != 2){ - emitError(location, "MLIR codegen encountered an error: dsp.getElemAtIndx " - "accepts only 2 arguments"); - return nullptr; - } - return builder.create(location, operands[0], operands[1]); + if (callee == "getElemAtIndx") { + if (call.getArgs().size() != 2) { + emitError(location, + "MLIR codegen encountered an error: dsp.getElemAtIndx " + "accepts only 2 arguments"); + return nullptr; + } + return builder.create(location, operands[0], + operands[1]); + } + + // Get Single Element At Op + if (callee == "getSingleElemAtIndx") { + if (call.getArgs().size() != 2) { + emitError(location, + "MLIR codegen encountered an error: dsp.getSingleElemAtIndx " + "accepts only 2 arguments"); + return nullptr; + } + return builder.create(location, operands[0], + operands[1]); + } + + // Diff2MeanOptimized Op + if (callee == "diff2meanOpt") { + if (call.getArgs().size() != 2) { + emitError(location, + "MLIR codegen encountered an error: dsp.diff2meanOpt " + "accepts only 2 arguments"); + return nullptr; + } + return builder.create(location, operands[0], + operands[1]); + } + + // FindPeaksDiff2MeanOptimized Op + if (callee == "findpeaks2diff2meanOpt") { + if (call.getArgs().size() != 1) { + emitError(location, + "MLIR codegen encountered an error: dsp.findpeaks2diff2meanOpt " + "accepts only 3 arguments."); + return nullptr; + } + return builder.create(location, operands[0], operands[1], operands[2]); + } + + // LMS2FindPeaksOptimizedOp Op + if (callee == "lms2findPeaks") { + if (call.getArgs().size() != 6) { + emitError(location, + "MLIR codegen encountered an error: dsp.lmsFilterResponse2findPeaks " + "accepts only 6 arguments"); + return nullptr; + } + return builder.create(location, operands[0], + operands[1], operands[2], operands[3], operands[4], operands[5]); } + // Median2SlidingOptimized Op + if (callee == "median2slidingOp") { + if (call.getArgs().size() != 1) { + emitError(location, + "MLIR codegen encountered an error: dsp.median2slidingOp" + "accepts only 1 argument."); + return nullptr; + } + return builder.create(location, operands[0]); + } + + // Set Elem At Indx - if(callee == "setElemAtIndx"){ - if(call.getArgs().size() != 3){ - emitError(location, "MLIR codegen encountered an error: dsp.setElemAtIndx " - "accepts only 2 arguments"); - return nullptr; - } - return builder.create(location, operands[0], operands[1], operands[2]); + if (callee == "setElemAtIndx") { + if (call.getArgs().size() != 3) { + emitError(location, + "MLIR codegen encountered an error: dsp.setElemAtIndx " + "accepts only 2 arguments"); + return nullptr; + } + return builder.create(location, operands[0], operands[1], + operands[2]); } // lowPassFilter Op - if(callee == "lowPassFIRFilter"){ - if(call.getArgs().size() != 2){ - emitError(location, "MLIR codegen encountered an error: dsp.lowPassFilter " - "accepts only 2 arguments"); - return nullptr; - } - return builder.create(location, operands[0], operands[1]); + if (callee == "lowPassFIRFilter") { + if (call.getArgs().size() != 2) { + emitError(location, + "MLIR codegen encountered an error: dsp.lowPassFilter " + "accepts only 2 arguments"); + return nullptr; + } + return builder.create(location, operands[0], + operands[1]); } // highPassFilter Op - if(callee == "highPassFIRFilter"){ - if(call.getArgs().size() != 2){ - emitError(location, "MLIR codegen encountered an error: dsp.highPassFilter " - "accepts only 2 arguments"); - return nullptr; - } - return builder.create(location, operands[0], operands[1]); + if (callee == "highPassFIRFilter") { + if (call.getArgs().size() != 2) { + emitError(location, + "MLIR codegen encountered an error: dsp.highPassFilter " + "accepts only 2 arguments"); + return nullptr; + } + return builder.create(location, operands[0], + operands[1]); } - if(callee == "getRangeOfVector"){ - if(call.getArgs().size() != 3){ - emitError(location, "MLIR codegen encountered an error: dsp.getRangeOfVector " - "accepts only 3 arguments"); + if (callee == "getRangeOfVector") { + if (call.getArgs().size() != 3) { + emitError(location, + "MLIR codegen encountered an error: dsp.getRangeOfVector " + "accepts only 3 arguments"); return nullptr; } - return builder.create(location, operands[0],operands[1], operands[2] ); + return builder.create(location, operands[0], + operands[1], operands[2]); } // FIRHammingOptimizedOp - if(callee == "FIRFilterHammingOptimized"){ - if(call.getArgs().size() != 2){ - emitError(location, "MLIR codegen encountered an error: dsp.FIRFilterHammingOptimized " - "accepts only 2 arguments"); - return nullptr; - } - return builder.create(location, operands[0], operands[1]); + if (callee == "FIRFilterHammingOptimized") { + if (call.getArgs().size() != 2) { + emitError( + location, + "MLIR codegen encountered an error: dsp.FIRFilterHammingOptimized " + "accepts only 2 arguments"); + return nullptr; + } + return builder.create(location, operands[0], + operands[1]); } // HighPassFIRHammingOptimizedOp - if(callee == "highPassFIRHammingOptimized"){ - if(call.getArgs().size() != 2){ - emitError(location, "MLIR codegen encountered an error: dsp.HighPassFIRHammingOptimizedOp " - "accepts only 2 arguments"); - return nullptr; - } - return builder.create(location, operands[0], operands[1]); + if (callee == "highPassFIRHammingOptimized") { + if (call.getArgs().size() != 2) { + emitError(location, "MLIR codegen encountered an error: " + "dsp.HighPassFIRHammingOptimizedOp " + "accepts only 2 arguments"); + return nullptr; + } + return builder.create( + location, operands[0], operands[1]); } // LMS FILTER - if(callee == "lmsFilter"){ - if(call.getArgs().size() != 5){ + if (callee == "lmsFilter") { + if (call.getArgs().size() != 5) { emitError(location, "MLIR codegen encountered an error: dsp.lmsFilter" "accepts only 5 arguments"); return nullptr; } - return builder.create(location, operands[0] , operands[1], operands[2], operands[3],operands[4] ); + return builder.create(location, operands[0], operands[1], + operands[2], operands[3], operands[4]); } - if(callee == "threshold"){ - if(call.getArgs().size() != 2){ - emitError(location, "MLIR codegen encountered an error: dsp.ThresholdOp " - "accepts only 2 arguments"); - return nullptr; - } - return builder.create(location, operands[0], operands[1]); + if (callee == "threshold") { + if (call.getArgs().size() != 2) { + emitError(location, + "MLIR codegen encountered an error: dsp.ThresholdOp " + "accepts only 2 arguments"); + return nullptr; + } + return builder.create(location, operands[0], operands[1]); } - if(callee == "quantization"){ - if(call.getArgs().size() != 4){ - emitError(location, "MLIR codegen encountered an error: dsp.quantization " - "accepts only 4 arguments"); - return nullptr; - } - return builder.create(location, operands[0], operands[1],operands[2], operands[3]); + if (callee == "quantization") { + if (call.getArgs().size() != 4) { + emitError(location, + "MLIR codegen encountered an error: dsp.quantization " + "accepts only 4 arguments"); + return nullptr; + } + return builder.create(location, operands[0], operands[1], + operands[2], operands[3]); } - if(callee == "lmsFilterResponse"){ - if(call.getArgs().size() != 4){ + if (callee == "lmsFilterResponse") { + if (call.getArgs().size() != 4) { emitError(location, "MLIR codegen encountered an error: dsp.lmsFilter" "accepts only 4 arguments"); return nullptr; } - return builder.create(location, operands[0] , operands[1], operands[2], operands[3]); + return builder.create( + location, operands[0], operands[1], operands[2], operands[3]); } - if(callee == "runLenEncoding"){ - if(call.getArgs().size() != 1){ - emitError(location, "MLIR codegen encountered an error: dsp.runLenEncoding " - "accepts only 1 arguments"); - return nullptr; - } - return builder.create(location, operands[0]); + if (callee == "runLenEncoding") { + if (call.getArgs().size() != 1) { + emitError(location, + "MLIR codegen encountered an error: dsp.runLenEncoding " + "accepts only 1 arguments"); + return nullptr; + } + return builder.create(location, operands[0]); } - if(callee == "FIRFilterResSymmOptimized"){ - if(call.getArgs().size() != 2){ - emitError(location, "MLIR codegen encountered an error: dsp.FIRFilterResSymmOptimized " - "accepts only 2 arguments"); + if (callee == "FIRFilterResSymmOptimized") { + if (call.getArgs().size() != 2) { + emitError( + location, + "MLIR codegen encountered an error: dsp.FIRFilterResSymmOptimized " + "accepts only 2 arguments"); return nullptr; } - return builder.create(location, operands[0] , operands[1]); + return builder.create(location, operands[0], + operands[1]); } - if(callee == "len"){ - if(call.getArgs().size() != 1){ + if (callee == "len") { + if (call.getArgs().size() != 1) { emitError(location, "MLIR codegen encountered an error: dsp.len " "accepts only 1 arguments"); return nullptr; } - return builder.create(location, operands[0] ); + return builder.create(location, operands[0]); } + if (callee == "reverseInput") { + if (call.getArgs().size() != 1) { + emitError(location, + "MLIR codegen encountered an error: dsp.reverseInput " + "accepts only 1 arguments"); + return nullptr; + } + return builder.create(location, operands[0]); + } - if(callee == "reverseInput"){ - if(call.getArgs().size() != 1){ - emitError(location, "MLIR codegen encountered an error: dsp.reverseInput " - "accepts only 1 arguments"); + if (callee == "padding") { + if (call.getArgs().size() != 3) { + emitError(location, "MLIR codegen encountered an error: dsp.padding " + "accepts only 3 arguments"); return nullptr; } - return builder.create(location, operands[0] ); + return builder.create(location, operands[0], operands[1], + operands[2]); } - if(callee == "padding"){ - if(call.getArgs().size() != 3){ - emitError(location, "MLIR codegen encountered an error: dsp.padding " - "accepts only 3 arguments"); - return nullptr; - } - return builder.create(location, operands[0], operands[1], operands[2]); + if (callee == "FIRFilterYSymmOptimized") { + if (call.getArgs().size() != 2) { + emitError( + location, + "MLIR codegen encountered an error: dsp.FIRFilterYSymmOptimizedOp " + "accepts only 2 arguments"); + return nullptr; + } + return builder.create(location, operands[0], + operands[1]); + } + if (callee == "fft1DRealSymm") { + if (call.getArgs().size() != 1) { + emitError(location, + "MLIR codegen encountered an error: dsp.FFT1DRealSymmOp " + "accepts only 1 arguments"); + return nullptr; + } + return builder.create(location, operands[0]); + } // FFT1DImgConjSymmOpLowering + if (callee == "fft1DimgConjSymm") { + if (call.getArgs().size() != 1) { + emitError(location, + "MLIR codegen encountered an error: dsp.FFT1DImgConjSymmOp " + "accepts only 1 arguments"); + return nullptr; + } + return builder.create(location, operands[0]); } - if(callee == "FIRFilterYSymmOptimized"){ - if(call.getArgs().size() != 2){ - emitError(location, "MLIR codegen encountered an error: dsp.FIRFilterYSymmOptimizedOp " - "accepts only 2 arguments"); + if (callee == "conv2d") { + if (call.getArgs().size() != 3) { + emitError(location, "MLIR codegen encountered an error: dsp.Conv2DOp " + "accepts 3 arguments"); return nullptr; } - return builder.create(location, operands[0] , operands[1]); + return builder.create(location, operands[0], operands[1], + operands[2]); } - if(callee == "fft1DRealSymm"){ - if(call.getArgs().size() != 1){ - emitError(location, "MLIR codegen encountered an error: dsp.FFT1DRealSymmOp " - "accepts only 1 arguments"); + + if (callee == "thresholdUp") { + if (call.getArgs().size() != 3) { + emitError(location, + "MLIR codegen encountered an error: dsp.thresholdUp " + "accepts 3 arguments"); return nullptr; } - return builder.create(location, operands[0] ); - } //FFT1DImgConjSymmOpLowering - if(callee == "fft1DimgConjSymm"){ - if(call.getArgs().size() != 1){ - emitError(location, "MLIR codegen encountered an error: dsp.FFT1DImgConjSymmOp " - "accepts only 1 arguments"); + return builder.create(location, operands[0], operands[1], + operands[2]); + } + + if (callee == "generateDtmf") { + if (call.getArgs().size() != 3) { + emitError(location, + "MLIR codegen encountered an error: dsp.GenerateDTMFOp " + "accepts 3 arguments"); + return nullptr; + } + return builder.create(location, operands[0], operands[1], + operands[2]); + } + + if (callee == "fftfreq") { + if (call.getArgs().size() != 2) { + emitError(location, + "MLIR codegen encountered an error: dsp.GenerateDTMFOp " + "accepts 2 arguments"); + return nullptr; + } + return builder.create(location, operands[0], operands[1]); + } + + if (callee == "findDominantPeaks") { + if (call.getArgs().size() != 2) { + emitError(location, + "MLIR codegen encountered an error: dsp.FindDominantPeaksOp " + "accepts 2 arguments"); + return nullptr; + } + return builder.create(location, operands[0], operands[1]); + } + + if (callee == "recoverDtmfDigit") { + if (call.getArgs().size() != 2) { + emitError(location, + "MLIR codegen encountered an error: dsp.RecoverDTMFDigitOp " + "accepts 2 arguments"); + return nullptr; + } + return builder.create(location, operands[0], operands[1]); + } + + if (callee == "fftCombine") { + if (call.getArgs().size() != 2) { + emitError(location, + "MLIR codegen encountered an error: dsp.FFTCombinOp " + "accepts 2 arguments"); + return nullptr; + } + return builder.create(location, operands[0], operands[1]); + } + + if (callee == "sqrt") { + if (call.getArgs().size() != 1) { + emitError(location, + "MLIR codegen encountered an error: dsp.GenerateDTMFOp " + "accepts only 1 argument."); + return nullptr; + } + return builder.create(location, operands[0]); + } + + if (callee == "generateVoiceSignature") { + if (call.getArgs().size() != 4) { + emitError(location, + "MLIR codegen encountered an error: dsp.GenerateVoiceSignatureOp " + "accepts 4 arguments"); return nullptr; } - return builder.create(location, operands[0] ); + return builder.create(location, operands[0], operands[1], + operands[2], operands[3]); } + + // beam form + if (callee == "beam_form") { + if (call.getArgs().size() != 4) { + emitError(location, + "MLIR codegen encountered an error: dsp.GenerateDTMFOp " + "accepts 4 argument"); + return nullptr; + } + auto antennaConst = operands[0].getDefiningOp(); + auto freqConst = operands[1].getDefiningOp(); + auto antennaVal = antennaConst.getValue().getValues(); + auto freqVal = freqConst.getValue().getValues(); + + double antenna = antennaVal[0].getValueAsDouble(); + double freq = freqVal[0].getValueAsDouble(); + + return builder.create(location, antenna, freq, operands[2], + operands[3]); + } + // qam modulate op + if (callee == "qam_modulate_real") { + if (call.getArgs().size() != 1) { + emitError(location, + "MLIR codegen encountered an error: dsp.QamModulateRealOp " + "accepts 1 arguments"); + return nullptr; + } + + return builder.create(location, operands[0]); + } + + if (callee == "qam_modulate_imagine") { + if (call.getArgs().size() != 1) { + emitError(location, + "MLIR codegen encountered an error: dsp.QamModualteImgOp " + "accepts 1 arguments"); + return nullptr; + } + + return builder.create(location, operands[0]); + } + // qam_demodulate + if (callee == "qam_demodulate") { + if (call.getArgs().size() != 2) { + emitError(location, + "MLIR codegen encountered an error: dsp.QamDemodulateOp" + "accepts 2 arguments"); + return nullptr; + } + return builder.create(location, operands[0], + operands[1]); + } + // space_demodulate + if (callee == "space_demodulate") { + if (call.getArgs().size() != 1) { + emitError(location, + "MLIR codegen encountered an error: dsp.SpaceDemodulateOp" + "accepts 1 arguments"); + return nullptr; + } + return builder.create(location, operands[0]); + } + // space_modulate + if (callee == "space_modulate") { + if (call.getArgs().size() != 1) { + emitError(location, + "MLIR codegen encountered an error: dsp.SpaceModulateOp" + "accepts 1 arguments"); + return nullptr; + } + return builder.create(location, operands[0]); + } + // space_err_correction + if (callee == "space_err_correction") { + if (call.getArgs().size() != 1) { + emitError(location, + "MLIR codegen encountered an error: dsp.SpaceErrCorrectionOp" + "accepts 1 arguments"); + return nullptr; + } + return builder.create(location, operands[0]); + } + + // Correlate Op + if (callee == "correlate") { + if (call.getArgs().size() != 2) { + emitError(location, "MLIR codegen encountered an error: dsp.correlate " + "accepts only 2 arguments: lhs rhs"); + return nullptr; + } + return builder.create(location, operands[0], operands[1]); + } + + // Set Single Element At Op + if (callee == "setSingleElemAtIndx") { + if (call.getArgs().size() != 3) { + emitError(location, + "MLIR codegen encountered an error: dsp.setSingleElemAtIndx " + "accepts only 3 arguments"); + return nullptr; + } + return builder.create(location, operands[0], + operands[1], operands[2]); + } + + // Correl2MaxOptimizedOp Op + if (callee == "correl2max") { + if (call.getArgs().size() != 1) { + emitError(location, "MLIR codegen encountered an error: dsp.correl2max " + "accepts only 2 argument."); + return nullptr; + } + return builder.create(location, operands[0], operands[1]); + } + + + //LMSFilterResponse2GainOp + if (callee == "lmsFilterResponse2gain") { + if (call.getArgs().size() != 5) { + emitError(location, "MLIR codegen encountered an error: dsp.lmsFilterResponse2gain " + "accepts only 5 argument."); + return nullptr; + } + return builder.create(location, operands[0], operands[1], operands[2], operands[3], operands[4]); + } + + + + + // Builtin calls have their custom operation, meaning this is a // straightforward emission. // if(callee == "delay"){ @@ -760,6 +1250,31 @@ class MLIRGenImpl { return builder.create(loc(num.loc()), num.getValue()); } + /// Emit a string exression + mlir::Value mlirGen(StringExprAST &expr) { + auto string_val = expr.getStringVal(); + + std::vector signals; + for (char ch : string_val) { + std::bitset<8> bits(static_cast(ch)), reversed; + int n = 8; + for (int i = 0; i < n; ++i) + reversed[i] = bits[n - i - 1]; + for (int i = 0; i < n; ++i) + signals.push_back(reversed[i]); + } + + mlir::Type eleType = builder.getF64Type(); + auto dataType = mlir::RankedTensorType::get(signals.size(), eleType); + + auto dataAttr = + mlir::DenseElementsAttr::get(dataType, llvm::ArrayRef(signals)); + + auto type = getType(signals.size()); + + return builder.create(loc(expr.loc()), type, dataAttr); + } + /// Dispatch codegen for the right expression subclass using RTTI. mlir::Value mlirGen(ExprAST &expr) { switch (expr.getKind()) { @@ -773,6 +1288,8 @@ class MLIRGenImpl { return mlirGen(cast(expr)); case dsp::ExprAST::Expr_Num: return mlirGen(cast(expr)); + case dsp::ExprAST::Expr_String: + return mlirGen(cast(expr)); default: emitError(loc(expr.loc())) << "MLIR codegen encountered an unhandled expr kind '" @@ -793,7 +1310,9 @@ class MLIRGenImpl { return nullptr; } - mlir::Value value = mlirGen(*init); + mlir::Value value; + // Register the value in the symbol table. + value = mlirGen(*init); if (!value) return nullptr; @@ -804,8 +1323,6 @@ class MLIRGenImpl { value = builder.create(loc(vardecl.loc()), getType(vardecl.getType()), value); } - - // Register the value in the symbol table. if (failed(declare(vardecl.getName(), value))) return nullptr; return value; diff --git a/mlir/examples/dsp/SimpleBlocks/mlir/ToyCombine.cpp b/mlir/examples/dsp/SimpleBlocks/mlir/ToyCombine.cpp index e2c461afa434..ea0ab6abbc9b 100644 --- a/mlir/examples/dsp/SimpleBlocks/mlir/ToyCombine.cpp +++ b/mlir/examples/dsp/SimpleBlocks/mlir/ToyCombine.cpp @@ -11,15 +11,20 @@ // //===----------------------------------------------------------------------===// +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" + #include "mlir/IR/MLIRContext.h" #include "mlir/IR/PatternMatch.h" #include "mlir/IR/Value.h" #include "mlir/Support/LogicalResult.h" -#include "toy/Dialect.h" #include "toy/DebugConfig.h" +#include "toy/Dialect.h" #include using namespace mlir; using namespace dsp; +using namespace std; namespace { /// Include the patterns defined in the Declarative Rewrite framework. @@ -58,16 +63,15 @@ struct SimplifyRedundantTranspose : public mlir::OpRewritePattern { } }; - - -//Pseudo-Code -//Find back to back gain operation - // result1 = upsampling(input1, rate1) - // result2 = downsampling(result1, rate2) +// Pseudo-Code +// Find back to back gain operation +// result1 = upsampling(input1, rate1) +// result2 = downsampling(result1, rate2) // if rate1 == rate2 then result2 = input1 - // result2 will be now delay(input1, gain1 + gain2) - // replaceOp -struct SimplifyUpsamplingDownsampling : public mlir::OpRewritePattern { +// result2 will be now delay(input1, gain1 + gain2) +// replaceOp +struct SimplifyUpsamplingDownsampling + : public mlir::OpRewritePattern { /// We register this pattern to match every dsp.downsampling in the IR. /// The "benefit" is used by the framework to order the patterns and process /// them in order of profitability. @@ -83,152 +87,300 @@ struct SimplifyUpsamplingDownsampling : public mlir::OpRewritePattern(); + dsp::UpsamplingOp prev_UpSamplingOp = + downsamplingOperand0_input.getDefiningOp(); // Input defined by another downsampling? If not, no match. if (!prev_UpSamplingOp) return failure(); - //Get operands for UpSamplingOp + // Get operands for UpSamplingOp mlir::Value UpsamplingOperand1_Rate = prev_UpSamplingOp.getOperand(1); mlir::Value UpsamplingOperand0_input = prev_UpSamplingOp.getOperand(0); - //get constant value from the downsamplingOp -- operand1 - dsp::ConstantOp constant_Op1_downsamplingOp = downsamplingOperand1_Rate.getDefiningOp(); - // DEBUG_PRINT_NO_ARGS(); - DenseElementsAttr DenseValueFrmDownsampling = constant_Op1_downsamplingOp.getValue(); - // DEBUG_PRINT_NO_ARGS(); + // get constant value from the downsamplingOp -- operand1 + dsp::ConstantOp constant_Op1_downsamplingOp = + downsamplingOperand1_Rate.getDefiningOp(); + // DEBUG_PRINT_NO_ARGS(); + DenseElementsAttr DenseValueFrmDownsampling = + constant_Op1_downsamplingOp.getValue(); + // DEBUG_PRINT_NO_ARGS(); auto elements = DenseValueFrmDownsampling.getValues(); float FirstValue = elements[0].getValueAsDouble(); - int64_t DownsamplingRate = (int64_t) FirstValue; + int64_t DownsamplingRate = (int64_t)FirstValue; - //Get constant value from upsampling: -- operand1 - dsp::ConstantOp constant_Op1_upSamplingOp = UpsamplingOperand1_Rate.getDefiningOp(); - // DEBUG_PRINT_NO_ARGS(); - DenseElementsAttr DenseValueFrmUpsampling = constant_Op1_upSamplingOp.getValue(); - // DEBUG_PRINT_NO_ARGS(); + // Get constant value from upsampling: -- operand1 + dsp::ConstantOp constant_Op1_upSamplingOp = + UpsamplingOperand1_Rate.getDefiningOp(); + // DEBUG_PRINT_NO_ARGS(); + DenseElementsAttr DenseValueFrmUpsampling = + constant_Op1_upSamplingOp.getValue(); + // DEBUG_PRINT_NO_ARGS(); elements = DenseValueFrmUpsampling.getValues(); FirstValue = elements[0].getValueAsDouble(); - int64_t UpsamplingRate = (int64_t) FirstValue; - - llvm::errs() << "DownsamplingRate = " << DownsamplingRate << " UpsamplingRate" << UpsamplingRate << "\n"; - if(DownsamplingRate == UpsamplingRate) - { - // Otherwise, we have a redundant downsampling. Use the rewriter. - // rewriter.replaceOp(op, {downsamplingInputOp.getOperand()}); //downsamplingOperand0_input + int64_t UpsamplingRate = (int64_t)FirstValue; + + llvm::errs() << "DownsamplingRate = " << DownsamplingRate + << " UpsamplingRate" << UpsamplingRate << "\n"; + if (DownsamplingRate == UpsamplingRate) { + // Otherwise, we have a redundant downsampling. Use the rewriter. + // rewriter.replaceOp(op, {downsamplingInputOp.getOperand()}); + // //downsamplingOperand0_input llvm::errs() << "Going for Downsampling pass\n"; rewriter.replaceOp(op, UpsamplingOperand0_input); - return success(); + return success(); - } - else if(UpsamplingRate > DownsamplingRate) - { - //check if UpSamplingRate is a multiple of DownsamplingRate - //if yes, final result should be UpSampling with SamplingRate as division - if(UpsamplingRate % DownsamplingRate != 0) - { + } else if (UpsamplingRate > DownsamplingRate) { + // check if UpSamplingRate is a multiple of DownsamplingRate + // if yes, final result should be UpSampling with SamplingRate as division + if (UpsamplingRate % DownsamplingRate != 0) { return failure(); } // - if(DownsamplingRate == 0) - { - llvm::errs() << "DownSamplingRate= 0 Not allowed" << "\n"; + if (DownsamplingRate == 0) { + llvm::errs() << "DownSamplingRate= 0 Not allowed" << "\n"; return failure(); } - double finalUpSamplingRate = (double) UpsamplingRate / DownsamplingRate; + double finalUpSamplingRate = (double)UpsamplingRate / DownsamplingRate; - auto constOp_finalSamplingRate = rewriter.create(op.getLoc(), finalUpSamplingRate); + auto constOp_finalSamplingRate = + rewriter.create(op.getLoc(), finalUpSamplingRate); - auto finalUpSamplingOp = rewriter.create(op.getLoc(), - UpsamplingOperand0_input , constOp_finalSamplingRate); + auto finalUpSamplingOp = rewriter.create( + op.getLoc(), UpsamplingOperand0_input, constOp_finalSamplingRate); llvm::errs() << "Going for Downsampling pass\n"; rewriter.replaceOp(op, finalUpSamplingOp); - } return failure(); - } }; -//Pseudo-Code -//Find back to back gain operation - // result1 = gain(input1, gain1) - // result2 = gain(result1, gain2) +// Pseudo-Code +// Find back to back gain operation +// result1 = gain(input1, gain1) +// result2 = gain(result1, gain2) // if result1 is coming from another delay operation - // result2 will be now delay(input1, gain1 + gain2) - // replaceOp -struct SimplifyBack2BackGain: public mlir::OpRewritePattern{ +// result2 will be now delay(input1, gain1 + gain2) +// replaceOp +struct SimplifyBack2BackGain : public mlir::OpRewritePattern { // - SimplifyBack2BackGain(mlir::MLIRContext *context) - : OpRewritePattern(context, 1) {} - - mlir::LogicalResult matchAndRewrite(GainOp op, - mlir::PatternRewriter &rewriter) const override { - - // - mlir::Value gainOp_operand0 = op.getOperand(0); - - //check if this is coming from another gain operation - GainOp prev_gainOp = gainOp_operand0.getDefiningOp(); - - if(!prev_gainOp) - return failure(); + SimplifyBack2BackGain(mlir::MLIRContext *context) + : OpRewritePattern(context, 1) {} - mlir::Value gainOp_operand1 = op.getOperand(1); - mlir::Value prev_gainOp_operand0 = prev_gainOp.getOperand(0); - mlir::Value prev_gainOp_operand1 = prev_gainOp.getOperand(1); + mlir::LogicalResult + matchAndRewrite(GainOp op, mlir::PatternRewriter &rewriter) const override { + + // + mlir::Value gainOp_operand0 = op.getOperand(0); + + // check if this is coming from another gain operation + GainOp prev_gainOp = gainOp_operand0.getDefiningOp(); + + if (!prev_gainOp) + return failure(); + + mlir::Value gainOp_operand1 = op.getOperand(1); + mlir::Value prev_gainOp_operand0 = prev_gainOp.getOperand(0); + mlir::Value prev_gainOp_operand1 = prev_gainOp.getOperand(1); - //create add op - auto addOp = rewriter.create(op.getLoc(), prev_gainOp_operand1, gainOp_operand1); - auto newGainOp = rewriter.create(op.getLoc(), - prev_gainOp_operand0 , addOp.getResult()); - - //Repalce the use of original gain operation with this newGainOp + // create add op + auto addOp = rewriter.create(op.getLoc(), prev_gainOp_operand1, + gainOp_operand1); + auto newGainOp = rewriter.create(op.getLoc(), prev_gainOp_operand0, + addOp.getResult()); + + // Repalce the use of original gain operation with this newGainOp rewriter.replaceOp(op, newGainOp.getResult()); return mlir::success(); + } +}; - } +// Pseudo-Code +// Mean of diff is equal to (input[-1] - input[0])/len(input). +// For example, for array (a, b, c, d, e) +// diff(array) = (b-a, c-b, d-c, e-d) +// mean(diff(array)) = ((b-a) + (c-b) + (d-c) + (e-d))/4 = (e-a)/4 +// result1 = diff(input1, diff_length) //NOTE: len(result1) == diff_length-1 +// virtually (tensor size is fixed as len(input)-1). result2 = mean(result1, +// mean_length) +// if mean_length <= (diff_length-1), +// result2 will be now (input1[mean_length] - input[0])/mean_length +// replaceOp +struct SimplifyDiff2Mean : public mlir::OpRewritePattern { + // + SimplifyDiff2Mean(mlir::MLIRContext *context) + : OpRewritePattern(context, 1) {} + + mlir::LogicalResult + matchAndRewrite(MeanOp op, mlir::PatternRewriter &rewriter) const override { + + // + mlir::Value meanOp_operand0 = op.getOperand(0); + + // check if this is coming from diff operation. + DiffOp prev_diffOp = meanOp_operand0.getDefiningOp(); + + if (!prev_diffOp) + return failure(); + + mlir::Value meanOp_operand1 = op.getOperand(1); + mlir::Value prev_diffOp_operand0 = prev_diffOp.getOperand(0); + + auto optimizedOp = rewriter.create( + op.getLoc(), prev_diffOp_operand0, meanOp_operand1); + + // Repalce the use of original diff operation with this operation + rewriter.replaceOp(op, optimizedOp.getResult()); + return mlir::success(); + } }; +struct SimplifyLMS2FindPeaks : public mlir::OpRewritePattern { + // + SimplifyLMS2FindPeaks(mlir::MLIRContext *context) + : OpRewritePattern(context, 1) {} + + mlir::LogicalResult + matchAndRewrite(FindPeaksOp op, + mlir::PatternRewriter &rewriter) const override { + // + mlir::Value findPeaksOp_operand0 = op.getOperand(0); + + // check if this is coming from diff operation. + LMSFilterResponseOp prev_lmsFilterResponseOp = + findPeaksOp_operand0.getDefiningOp(); + + if (!prev_lmsFilterResponseOp) + return failure(); -struct SimplifyBack2BackDelay: public mlir::OpRewritePattern{ + mlir::Value findPeaksOp_operand1 = op.getOperand(1); + mlir::Value findPeaksOp_operand2 = op.getOperand(2); + mlir::Value prev_lmsFilterResponseOp_operand0 = + prev_lmsFilterResponseOp.getOperand(0); + mlir::Value prev_lmsFilterResponseOp_operand1 = + prev_lmsFilterResponseOp.getOperand(1); + mlir::Value prev_lmsFilterResponseOp_operand2 = + prev_lmsFilterResponseOp.getOperand(2); + mlir::Value prev_lmsFilterResponseOp_operand3 = + prev_lmsFilterResponseOp.getOperand(3); + + auto optimizedOp = rewriter.create( + op.getLoc(), prev_lmsFilterResponseOp_operand0, + prev_lmsFilterResponseOp_operand1, prev_lmsFilterResponseOp_operand2, + prev_lmsFilterResponseOp_operand3, findPeaksOp_operand1, + findPeaksOp_operand2); + + // Repalce the use of original diff operation with this operation + rewriter.replaceOp(op, optimizedOp.getResult()); + return mlir::success(); + } +}; + +struct SimplifyFindPeaks2Diff2Mean : public mlir::OpRewritePattern { // - SimplifyBack2BackDelay(mlir::MLIRContext *context) - : OpRewritePattern(context, 1) {} - - mlir::LogicalResult matchAndRewrite(DelayOp op, - mlir::PatternRewriter &rewriter) const override { - - // - mlir::Value delayOp_operand0 = op.getOperand(0); - - //check if this is coming from another delay operation - DelayOp prev_delayOp = delayOp_operand0.getDefiningOp(); - - if(!prev_delayOp) - return failure(); + SimplifyFindPeaks2Diff2Mean(mlir::MLIRContext *context) + : OpRewritePattern(context, 1) {} + + mlir::LogicalResult + matchAndRewrite(MeanOp op, mlir::PatternRewriter &rewriter) const override { - mlir::Value delayOp_operand1 = op.getOperand(1); - mlir::Value prev_delayOp_operand0 = prev_delayOp.getOperand(0); - mlir::Value prev_delayOp_operand1 = prev_delayOp.getOperand(1); + // + mlir::Value meanOp_operand0 = op.getOperand(0); - //create add op - auto addOp = rewriter.create(op.getLoc(), prev_delayOp_operand1, delayOp_operand1); - auto newDelayOp = rewriter.create(op.getLoc(), - prev_delayOp_operand0 , addOp.getResult()); - - //Repalce the use of original delay operation with this newDelayOp - rewriter.replaceOp(op, newDelayOp.getResult()); + // check if this is coming from diff operation. + DiffOp prev_diffOp = meanOp_operand0.getDefiningOp(); + + if (!prev_diffOp) + return failure(); + + mlir::Value prev_diffOp_operand0 = prev_diffOp.getOperand(0); + FindPeaksOp prev_findPeaksOp = + prev_diffOp_operand0.getDefiningOp(); + + if (!prev_findPeaksOp) + return failure(); + + mlir::Value prev_findPeaksOp_operand0 = prev_findPeaksOp.getOperand(0); + mlir::Value prev_findPeaksOp_operand1 = prev_findPeaksOp.getOperand(1); + mlir::Value prev_findPeaksOp_operand2 = prev_findPeaksOp.getOperand(2); + + auto optimizedOp = rewriter.create( + op.getLoc(), prev_findPeaksOp_operand0, prev_findPeaksOp_operand1, + prev_findPeaksOp_operand2); + + // Repalce the use of original diff operation with this operation + rewriter.replaceOp(op, optimizedOp.getResult()); return mlir::success(); + } +}; - } +struct SimplifyMedian2Sliding + : public mlir::OpRewritePattern { + // + SimplifyMedian2Sliding(mlir::MLIRContext *context) + : OpRewritePattern(context, 1) {} + + mlir::LogicalResult + matchAndRewrite(SlidingWindowAvgOp op, + mlir::PatternRewriter &rewriter) const override { + + mlir::Value slidingOp_operand0 = op.getOperand(); + + // check if this is coming from medianFilter operation. + MedianFilterOp prev_medianFilterOp = + slidingOp_operand0.getDefiningOp(); + + if (!prev_medianFilterOp) + return failure(); + + mlir::Value prev_medianFilterOp_operand0 = prev_medianFilterOp.getOperand(); + + auto optimizedOp = rewriter.create( + op.getLoc(), prev_medianFilterOp_operand0); + + rewriter.replaceOp(op, optimizedOp.getResult()); + return mlir::success(); + } +}; + +struct SimplifyBack2BackDelay : public mlir::OpRewritePattern { + // + SimplifyBack2BackDelay(mlir::MLIRContext *context) + : OpRewritePattern(context, 1) {} + + mlir::LogicalResult + matchAndRewrite(DelayOp op, mlir::PatternRewriter &rewriter) const override { + + // + mlir::Value delayOp_operand0 = op.getOperand(0); + + // check if this is coming from another delay operation + DelayOp prev_delayOp = delayOp_operand0.getDefiningOp(); + + if (!prev_delayOp) + return failure(); + + mlir::Value delayOp_operand1 = op.getOperand(1); + mlir::Value prev_delayOp_operand0 = prev_delayOp.getOperand(0); + mlir::Value prev_delayOp_operand1 = prev_delayOp.getOperand(1); + + // create add op + auto addOp = rewriter.create(op.getLoc(), prev_delayOp_operand1, + delayOp_operand1); + auto newDelayOp = rewriter.create( + op.getLoc(), prev_delayOp_operand0, addOp.getResult()); + + // Repalce the use of original delay operation with this newDelayOp + rewriter.replaceOp(op, newDelayOp.getResult()); + return mlir::success(); + } }; // Pseudo-code -// if operand of square is coming from real part of fft1d -// replace fft1d with fft1dreal +// if operand of square is coming from real part of fft1d +// replace fft1d with fft1dreal // still squareOp will remain same struct SimplifyFFTSquare : public mlir::OpRewritePattern { /// We register this pattern to match every dsp.downsampling in the IR. @@ -241,8 +393,7 @@ struct SimplifyFFTSquare : public mlir::OpRewritePattern { /// argument is the orchestrator of the sequence of rewrites. The pattern is /// expected to interact with it to perform any changes to the IR from here. mlir::LogicalResult - matchAndRewrite(SquareOp op, - mlir::PatternRewriter &rewriter) const override { + matchAndRewrite(SquareOp op, mlir::PatternRewriter &rewriter) const override { // Look through the input of the current downsampling. // mlir::Value squareOperand1_Rate = op.getOperand(1); mlir::Value squareOperand0_input = op.getInput(); @@ -252,63 +403,64 @@ struct SimplifyFFTSquare : public mlir::OpRewritePattern { if (!prev_FFT1DOp) return failure(); - //Replace fft1d with fft1dreal - DEBUG_PRINT_WITH_ARGS( squareOperand0_input) ; - DEBUG_PRINT_WITH_ARGS( "Going fr some") ; - DEBUG_PRINT_NO_ARGS() ; - mlir::Value prev_FFT1DOp_Operand = prev_FFT1DOp.getInput(); - auto fft1drealOp1 = rewriter.create(op.getLoc(), - prev_FFT1DOp_Operand ); + // Replace fft1d with fft1dreal + DEBUG_PRINT_WITH_ARGS(squareOperand0_input); + DEBUG_PRINT_WITH_ARGS("Going fr some"); + DEBUG_PRINT_NO_ARGS(); + mlir::Value prev_FFT1DOp_Operand = prev_FFT1DOp.getInput(); + auto fft1drealOp1 = + rewriter.create(op.getLoc(), prev_FFT1DOp_Operand); // DEBUG_PRINT_NO_ARGS(); - auto SquareOp1 = rewriter.create(op.getLoc(), fft1drealOp1); + auto SquareOp1 = rewriter.create(op.getLoc(), fft1drealOp1); rewriter.replaceOp(op, SquareOp1); return mlir::success(); } }; -struct SimplifyGainwZero: public mlir::OpRewritePattern{ - SimplifyGainwZero(mlir::MLIRContext *context) - : OpRewritePattern(context, 1) {} - - mlir::LogicalResult matchAndRewrite(GainOp op, - mlir::PatternRewriter &rewriter) const override { - - // - mlir::Value gainOp_operand1 = op.getOperand(1); - - //check if the value is zero - DEBUG_PRINT_NO_ARGS(); - dsp::ConstantOp constant_Op1 = gainOp_operand1.getDefiningOp(); +struct SimplifyGainwZero : public mlir::OpRewritePattern { + SimplifyGainwZero(mlir::MLIRContext *context) + : OpRewritePattern(context, 1) {} + + mlir::LogicalResult + matchAndRewrite(GainOp op, mlir::PatternRewriter &rewriter) const override { + + // + mlir::Value gainOp_operand1 = op.getOperand(1); + + // check if the value is zero + DEBUG_PRINT_NO_ARGS(); + dsp::ConstantOp constant_Op1 = + gainOp_operand1.getDefiningOp(); DenseElementsAttr DenseValueFrmgainOp = constant_Op1.getValue(); auto elements = DenseValueFrmgainOp.getValues(); float FirstValue = elements[0].getValueAsDouble(); - int64_t GainRate = (int64_t) FirstValue; + int64_t GainRate = (int64_t)FirstValue; - if(!GainRate==0) - return failure(); + if (!GainRate == 0) + return failure(); mlir::Value gainOp_operand0 = op.getOperand(0); - dsp::ConstantOp constant_Op0 = gainOp_operand0.getDefiningOp(); + dsp::ConstantOp constant_Op0 = + gainOp_operand0.getDefiningOp(); DenseElementsAttr InputValueFrmgainOp = constant_Op0.getValue(); int64_t inputSize = InputValueFrmgainOp.size(); - // Define the type of the tensor (tensor). - RankedTensorType tensorType = RankedTensorType::get({inputSize}, rewriter.getF64Type()); - - // Create a constant operation with the specified value and type. - DenseElementsAttr zerovalue = DenseElementsAttr::get(tensorType, 0.0); - Operation* constantOp = rewriter.create(op.getLoc(), zerovalue); + // Define the type of the tensor (tensor). + RankedTensorType tensorType = + RankedTensorType::get({inputSize}, rewriter.getF64Type()); + // Create a constant operation with the specified value and type. + DenseElementsAttr zerovalue = DenseElementsAttr::get(tensorType, 0.0); + Operation *constantOp = rewriter.create(op.getLoc(), zerovalue); rewriter.replaceOp(op, constantOp); return mlir::success(); - - } + } }; // Pseudo-code -// if operands of MulOp are coming from lowPassFIRFilter & hamming +// if operands of MulOp are coming from lowPassFIRFilter & hamming // then replace the MulOp with the symmetrical operation struct SimplifyFilterMulHamming : public mlir::OpRewritePattern { /// We register this pattern to match every dsp.downsampling in the IR. @@ -321,8 +473,7 @@ struct SimplifyFilterMulHamming : public mlir::OpRewritePattern { /// argument is the orchestrator of the sequence of rewrites. The pattern is /// expected to interact with it to perform any changes to the IR from here. mlir::LogicalResult - matchAndRewrite(MulOp op, - mlir::PatternRewriter &rewriter) const override { + matchAndRewrite(MulOp op, mlir::PatternRewriter &rewriter) const override { // Get the operands operation from MulFOp // check if op0 is Low/HighPassFIRFilterOp & op1 is HammingWindowOp // if this true then get the operands of op0 ie, Low/HighPassFIRFilterOp @@ -330,32 +481,35 @@ struct SimplifyFilterMulHamming : public mlir::OpRewritePattern { // mlir::Value squareOperand1_Rate = op.getOperand(1); mlir::Value mulOperand0_Lhs = op.getLhs(); mlir::Value mulOperand1_Rhs = op.getRhs(); - dsp::LowPassFIRFilterOp op_LowPassFIRFilterOp = mulOperand0_Lhs.getDefiningOp(); - dsp::HammingWindowOp op_HammingWindowOp = mulOperand1_Rhs.getDefiningOp(); + dsp::LowPassFIRFilterOp op_LowPassFIRFilterOp = + mulOperand0_Lhs.getDefiningOp(); + dsp::HammingWindowOp op_HammingWindowOp = + mulOperand1_Rhs.getDefiningOp(); DEBUG_PRINT_NO_ARGS(); // Inputs are LowPassFIRFilterOp && HammingWindowOp => If not, no match. if (!op_LowPassFIRFilterOp || !op_HammingWindowOp) return failure(); - //Replace fft1d with fft1dreal - DEBUG_PRINT_WITH_ARGS( mulOperand0_Lhs) ; - DEBUG_PRINT_WITH_ARGS( "SimplifyFilterMulHamming - ConditionMet") ; - DEBUG_PRINT_NO_ARGS() ; + // Replace fft1d with fft1dreal + DEBUG_PRINT_WITH_ARGS(mulOperand0_Lhs); + DEBUG_PRINT_WITH_ARGS("SimplifyFilterMulHamming - ConditionMet"); + DEBUG_PRINT_NO_ARGS(); mlir::Value LowPassFIRFilterOperand_wc = op_LowPassFIRFilterOp.getWc(); mlir::Value LowPassFIRFilterOperand_N = op_LowPassFIRFilterOp.getN(); - auto firFilterHammingOptimized = rewriter.create(op.getLoc(), - LowPassFIRFilterOperand_wc, LowPassFIRFilterOperand_N ); + auto firFilterHammingOptimized = + rewriter.create( + op.getLoc(), LowPassFIRFilterOperand_wc, LowPassFIRFilterOperand_N); DEBUG_PRINT_NO_ARGS(); - + rewriter.replaceOp(op, firFilterHammingOptimized); return mlir::success(); } }; // Pseudo-code -// if operands of MulOp are coming from highPassFIRFilter & hamming +// if operands of MulOp are coming from highPassFIRFilter & hamming // then replace the MulOp with the symmetrical operation struct SimplifyHighPassFIRHamming : public mlir::OpRewritePattern { /// We register this pattern to match every dsp.downsampling in the IR. @@ -368,8 +522,7 @@ struct SimplifyHighPassFIRHamming : public mlir::OpRewritePattern { /// argument is the orchestrator of the sequence of rewrites. The pattern is /// expected to interact with it to perform any changes to the IR from here. mlir::LogicalResult - matchAndRewrite(MulOp op, - mlir::PatternRewriter &rewriter) const override { + matchAndRewrite(MulOp op, mlir::PatternRewriter &rewriter) const override { // Get the operands operation from MulFOp // check if op0 is Low/HighPassFIRFilterOp & op1 is HammingWindowOp // if this true then get the operands of op0 ie, Low/HighPassFIRFilterOp @@ -377,38 +530,43 @@ struct SimplifyHighPassFIRHamming : public mlir::OpRewritePattern { // mlir::Value squareOperand1_Rate = op.getOperand(1); mlir::Value mulOperand0_Lhs = op.getLhs(); mlir::Value mulOperand1_Rhs = op.getRhs(); - dsp::HighPassFIRFilterOp op_HighPassFIRFilterOp = mulOperand0_Lhs.getDefiningOp(); - dsp::HammingWindowOp op_HammingWindowOp = mulOperand1_Rhs.getDefiningOp(); + dsp::HighPassFIRFilterOp op_HighPassFIRFilterOp = + mulOperand0_Lhs.getDefiningOp(); + dsp::HammingWindowOp op_HammingWindowOp = + mulOperand1_Rhs.getDefiningOp(); DEBUG_PRINT_NO_ARGS(); // Inputs are HighPassFIRFilterOp && HammingWindowOp => If not, no match. if (!op_HighPassFIRFilterOp || !op_HammingWindowOp) return failure(); - //Replace fft1d with fft1dreal - DEBUG_PRINT_WITH_ARGS( mulOperand0_Lhs) ; - DEBUG_PRINT_WITH_ARGS( "SimplifyHighPassFIRHamming - ConditionMet") ; - DEBUG_PRINT_NO_ARGS() ; + // Replace fft1d with fft1dreal + DEBUG_PRINT_WITH_ARGS(mulOperand0_Lhs); + DEBUG_PRINT_WITH_ARGS("SimplifyHighPassFIRHamming - ConditionMet"); + DEBUG_PRINT_NO_ARGS(); mlir::Value HighPassFIRFilterOperand_wc = op_HighPassFIRFilterOp.getWc(); mlir::Value HighPassFIRFilterOperand_N = op_HighPassFIRFilterOp.getN(); - auto highPassFIRHammingOptimized = rewriter.create(op.getLoc(), - HighPassFIRFilterOperand_wc, HighPassFIRFilterOperand_N ); + auto highPassFIRHammingOptimized = + rewriter.create( + op.getLoc(), HighPassFIRFilterOperand_wc, + HighPassFIRFilterOperand_N); DEBUG_PRINT_NO_ARGS(); - + rewriter.replaceOp(op, highPassFIRHammingOptimized); return mlir::success(); } }; -//Pseudo-Code -//Find FIRFilterResponse & FIRFilterHammingOptimized & operation - // result1 = dsp.FIRFilterHammingOptimized(input1, rate1) //filter and hamming - // result2 = dsp.FIRFilterResponse(result1, rate2) //FilterResponse -// For above pattern , replace dsp.FIRFilterResponse with FIRFilterResSymmOptimized - // result1 = dsp.FIRFilterHammingOptimized(input1, rate1) - // result2 = dsp.FIRFilterResSymmOptimized(result1, rate2) -struct SimplifyFIRFilterRespnseWithSymmFilter : public mlir::OpRewritePattern { +// Pseudo-Code +// Find FIRFilterResponse & FIRFilterHammingOptimized & operation +// result1 = dsp.FIRFilterHammingOptimized(input1, rate1) //filter and hamming +// result2 = dsp.FIRFilterResponse(result1, rate2) //FilterResponse +// For above pattern , replace dsp.FIRFilterResponse with +// FIRFilterResSymmOptimized result1 = dsp.FIRFilterHammingOptimized(input1, +// rate1) result2 = dsp.FIRFilterResSymmOptimized(result1, rate2) +struct SimplifyFIRFilterRespnseWithSymmFilter + : public mlir::OpRewritePattern { /// We register this pattern to match every dsp.downsampling in the IR. /// The "benefit" is used by the framework to order the patterns and process /// them in order of profitability. @@ -422,37 +580,42 @@ struct SimplifyFIRFilterRespnseWithSymmFilter : public mlir::OpRewritePattern(); + dsp::FIRFilterHammingOptimizedOp prev_FIRFilterSymmOp = + Operand1_forFIRFilterResp.getDefiningOp(); // Input defined by another downsampling? If not, no match. - if (!prev_FIRFilterSymmOp){ + if (!prev_FIRFilterSymmOp) { return failure(); } // create FIRFilterHammingOptimizedOp with current operands - DEBUG_PRINT_WITH_ARGS("Going for FIRFilterresponse Opt when the operand1 is a symmetric filter"); - - auto firFilterResSymmOptimizedOp = rewriter.create(op.getLoc(), - Operand0_forFIRFilterResp , Operand1_forFIRFilterResp); + DEBUG_PRINT_WITH_ARGS("Going for FIRFilterresponse Opt when the operand1 " + "is a symmetric filter"); - DEBUG_PRINT_NO_ARGS() ; + auto firFilterResSymmOptimizedOp = + rewriter.create( + op.getLoc(), Operand0_forFIRFilterResp, Operand1_forFIRFilterResp); + + DEBUG_PRINT_NO_ARGS(); rewriter.replaceOp(op, firFilterResSymmOptimizedOp); return mlir::success(); } }; -//Pseudo code: -// if the FFT1DRealOp & FFT1DImgOp has same input then replace them with single -// %4 = "dsp.fft1dreal"(%3) : (tensor<10xf64>) -> tensor<10xf64> -// %5 = "dsp.fft1dimg"(%3) : (tensor<10xf64>) -> tensor<10xf64> -// replace with %4, %5 = "dsp.fft1d"(%3) : (tensor<10xf64>) -> (tensor<10xf64 , tensor<10xf64)> +// label: pass 1st +// Pseudo code: +// if the FFT1DRealOp & FFT1DImgOp has same input then replace them with single +// %4 = "dsp.fft1dreal"(%3) : (tensor<10xf64>) -> tensor<10xf64> +// %5 = "dsp.fft1dimg"(%3) : (tensor<10xf64>) -> tensor<10xf64> +// replace with %4, %5 = "dsp.fft1d"(%3) : (tensor<10xf64>) -> (tensor<10xf64 , +// tensor<10xf64)> // -// Define the canonicalization pattern. +// Define the canonicalization pattern. struct SimplifyFFTRealAndImg : public OpRewritePattern { SimplifyFFTRealAndImg(MLIRContext *context) : OpRewritePattern(context, /*benefit=*/1) {} @@ -464,14 +627,15 @@ struct SimplifyFFTRealAndImg : public OpRewritePattern { if (!nextOp || !isa(nextOp)) return failure(); - DEBUG_PRINT_NO_ARGS() ; + DEBUG_PRINT_NO_ARGS(); auto imgOp = cast(nextOp); if (realOp.getInput() != imgOp.getInput()) return failure(); // Replace the two operations with the combined FFT1D operation. - DEBUG_PRINT_NO_ARGS() ; - auto combinedOp = rewriter.create(realOp.getLoc(), realOp.getInput()); + DEBUG_PRINT_NO_ARGS(); + auto combinedOp = + rewriter.create(realOp.getLoc(), realOp.getInput()); rewriter.replaceOp(realOp, combinedOp.getResult(0)); rewriter.replaceOp(imgOp, combinedOp.getResult(1)); @@ -479,15 +643,16 @@ struct SimplifyFFTRealAndImg : public OpRewritePattern { } }; - -//Pseudo-Code -//Find FIRFilterResponse & reverseInput - // %1 = "dsp.reverseInput"(%0) : (tensor<4xf64>) -> tensor<*xf64> - // %2 = "dsp.FIRFilterResponse"(%0, %1) : (tensor<4xf64>, tensor<*xf64>) -> tensor<*xf64> -// For above pattern , replace dsp.FIRFilterResponse with FIRFilterYSymmOptimized - // %1 = "dsp.reverseInput"(%0) - // result2 = dsp.FIRFilterYSymmOptimized(result1, rate2) -struct SimplifyFilterRespX_ReverseXYSymmFilter : public mlir::OpRewritePattern { +// Pseudo-Code +// Find FIRFilterResponse & reverseInput +// %1 = "dsp.reverseInput"(%0) : (tensor<4xf64>) -> tensor<*xf64> +// %2 = "dsp.FIRFilterResponse"(%0, %1) : (tensor<4xf64>, tensor<*xf64>) -> +// tensor<*xf64> +// For above pattern , replace dsp.FIRFilterResponse with +// FIRFilterYSymmOptimized %1 = "dsp.reverseInput"(%0) result2 = +// dsp.FIRFilterYSymmOptimized(result1, rate2) +struct SimplifyFilterRespX_ReverseXYSymmFilter + : public mlir::OpRewritePattern { /// We register this pattern to match every dsp.downsampling in the IR. /// The "benefit" is used by the framework to order the patterns and process /// them in order of profitability. @@ -501,33 +666,36 @@ struct SimplifyFilterRespX_ReverseXYSymmFilter : public mlir::OpRewritePattern(); + dsp::ReverseInputOp prev_ReverseOp = + Operand1_forFIRFilterResp.getDefiningOp(); // Operand1 defined by another ReverseOp? If not, no match. - if (!prev_ReverseOp){ + if (!prev_ReverseOp) { return failure(); } // create FIRFilterYSymmOptimizedOp with current operands - DEBUG_PRINT_WITH_ARGS("Going for FIRFilterResponse Opt when the operand1 is a ReverseInputOp"); - - auto firFilterResYSymmOptimizedOp = rewriter.create(op.getLoc(), - Operand0_forFIRFilterResp , Operand1_forFIRFilterResp); + DEBUG_PRINT_WITH_ARGS("Going for FIRFilterResponse Opt when the operand1 " + "is a ReverseInputOp"); - DEBUG_PRINT_NO_ARGS() ; + auto firFilterResYSymmOptimizedOp = + rewriter.create( + op.getLoc(), Operand0_forFIRFilterResp, Operand1_forFIRFilterResp); + + DEBUG_PRINT_NO_ARGS(); rewriter.replaceOp(op, firFilterResYSymmOptimizedOp); return mlir::success(); } }; -//Pseudo code: -// if the input of FFT1DRealOp = FIRFilterYSymmOptimizedOp then replace it with FFT1DRealSymmOp -// Define the canonicalization pattern. +// Pseudo code: +// if the input of FFT1DRealOp = FIRFilterYSymmOptimizedOp then replace it +// with FFT1DRealSymmOp Define the canonicalization pattern. struct SimplifyFFTRealAtInputRealSymm : public OpRewritePattern { SimplifyFFTRealAtInputRealSymm(MLIRContext *context) : OpRewritePattern(context, /*benefit=*/1) {} @@ -536,27 +704,28 @@ struct SimplifyFFTRealAtInputRealSymm : public OpRewritePattern { PatternRewriter &rewriter) const override { // Check if there is a corresponding FFT1DImgOp with the same input. mlir::Value fftOperand_input = Op.getInput(); - dsp::FIRFilterYSymmOptimizedOp op_FIRFilterYSymmOptimizedOp = fftOperand_input.getDefiningOp(); - + dsp::FIRFilterYSymmOptimizedOp op_FIRFilterYSymmOptimizedOp = + fftOperand_input.getDefiningOp(); + if (!op_FIRFilterYSymmOptimizedOp) return failure(); - DEBUG_PRINT_NO_ARGS() ; - + DEBUG_PRINT_NO_ARGS(); // Replace the two operations with the combined FFT1D operation. - auto fft1dRealSymmOp = rewriter.create(Op.getLoc(), Op.getInput()); - DEBUG_PRINT_NO_ARGS() ; - rewriter.replaceOp(Op, fft1dRealSymmOp.getResult()); - // rewriter.replaceOp(Op, fft1dRealSymmOp); - DEBUG_PRINT_NO_ARGS() ; + auto fft1dRealSymmOp = + rewriter.create(Op.getLoc(), Op.getInput()); + DEBUG_PRINT_NO_ARGS(); + // rewriter.replaceOp(Op, fft1dRealSymmOp.getResult()); + rewriter.replaceOp(Op, fft1dRealSymmOp); + DEBUG_PRINT_NO_ARGS(); return success(); } }; -//Pseudo code: -// if the input of FFT1DImgOp = FIRFilterYSymmOptimizedOp then replace it with FFT1DImgConjSymmOp -// Define the canonicalization pattern. +// Pseudo code: +// if the input of FFT1DImgOp = FIRFilterYSymmOptimizedOp then replace it with +// FFT1DImgConjSymmOp Define the canonicalization pattern. struct SimplifyFFTImgAtInputRealSymm : public OpRewritePattern { SimplifyFFTImgAtInputRealSymm(MLIRContext *context) : OpRewritePattern(context, /*benefit=*/1) {} @@ -565,64 +734,696 @@ struct SimplifyFFTImgAtInputRealSymm : public OpRewritePattern { PatternRewriter &rewriter) const override { // Check if there is a corresponding FFT1DImgOp with the same input. mlir::Value fftOperand_input = Op.getInput(); - dsp::FIRFilterYSymmOptimizedOp op_FIRFilterYSymmOptimizedOp = fftOperand_input.getDefiningOp(); - + dsp::FIRFilterYSymmOptimizedOp op_FIRFilterYSymmOptimizedOp = + fftOperand_input.getDefiningOp(); + if (!op_FIRFilterYSymmOptimizedOp) return failure(); - DEBUG_PRINT_NO_ARGS() ; - + DEBUG_PRINT_NO_ARGS(); // Replace the two operations with the combined FFT1D operation. - - auto fft1dImgConjSymmOp = rewriter.create(Op.getLoc(), Op.getInput()); - DEBUG_PRINT_NO_ARGS() ; + + auto fft1dImgConjSymmOp = + rewriter.create(Op.getLoc(), Op.getInput()); + DEBUG_PRINT_NO_ARGS(); // rewriter.replaceOp(Op, fft1dImgConjSymmOp.getResult()); rewriter.replaceOp(Op, fft1dImgConjSymmOp); - DEBUG_PRINT_NO_ARGS() ; + DEBUG_PRINT_NO_ARGS(); return success(); } }; +// Pseudo-Code +// Find lmsFIlter with gain operation +// result1 = lmsFilter(noisy_sig, clean_sig, mu, filterSize, iter); +// result2 = gain(result1, G1) +// result2 will be now lmsFilter(noisy_sig, clean_sig, mu*g1, +// filterSize, iter); replaceOp +struct SimplifyLMSFilterwithGain + : public mlir::OpRewritePattern { + SimplifyLMSFilterwithGain(mlir::MLIRContext *context) + : OpRewritePattern(context, 1) {} + + mlir::LogicalResult + matchAndRewrite(GainOp op, mlir::PatternRewriter &rewriter) const override { + + mlir::Value gainOp_operand0 = op.getOperand(0); + + LMSFilterOp prev_LMSFilterOp = + gainOp_operand0.getDefiningOp(); + + if (!prev_LMSFilterOp) + return failure(); + + mlir::Value gainOp_operand1 = op.getOperand(1); + mlir::Value prev_LMSFilterOp_0 = + prev_LMSFilterOp.getOperand(0); + mlir::Value prev_LMSFilterOp_1 = + prev_LMSFilterOp.getOperand(1); + mlir::Value prev_LMSFilterOp_mu = + prev_LMSFilterOp.getOperand(2); + mlir::Value prev_LMSFilterOp_3 = + prev_LMSFilterOp.getOperand(3); + mlir::Value prev_LMSFilterOp_4 = + prev_LMSFilterOp.getOperand(4); + + // create mul op + auto mulOp = rewriter.create( + op.getLoc(), prev_LMSFilterOp_mu, gainOp_operand1); + auto newLMSFilterOp = rewriter.create( + op.getLoc(), prev_LMSFilterOp_0, prev_LMSFilterOp_1, + mulOp.getResult(), prev_LMSFilterOp_3, prev_LMSFilterOp_4); + + // Repalce the use of original gain operation with this newGainOp + rewriter.replaceOp(op, newLMSFilterOp.getResult()); + return mlir::success(); + } +}; + + + + + + + + +// Pseudo-Code +// Find lmsFIlterResponse with gain operation +// result1 = lmsFilterResponse(noisy_sig, clean_sig, mu, filterSize); +// result2 = gain(result1, G1) +// result2 will be now lmsFilterResponse(noisy_sig, clean_sig, mu*g1, +// filterSize); replaceOp +struct SimplifyLMSFilterResponsewithGain + : public mlir::OpRewritePattern { + SimplifyLMSFilterResponsewithGain(mlir::MLIRContext *context) + : OpRewritePattern(context, 1) {} + + mlir::LogicalResult + matchAndRewrite(GainOp op, mlir::PatternRewriter &rewriter) const override { + + mlir::Value gainOp_operand0 = op.getOperand(0); + + LMSFilterResponseOp prev_LMSFilterResponseOp = + gainOp_operand0.getDefiningOp(); + + if (!prev_LMSFilterResponseOp) + return failure(); + + mlir::Value gainOp_operand1 = op.getOperand(1); + mlir::Value prev_LMSFilterResponseOp_0 = + prev_LMSFilterResponseOp.getOperand(0); + mlir::Value prev_LMSFilterResponseOp_1 = + prev_LMSFilterResponseOp.getOperand(1); + mlir::Value prev_LMSFilterResponseOp_2 = + prev_LMSFilterResponseOp.getOperand(2); + mlir::Value prev_LMSFilterResponseOp_3 = + prev_LMSFilterResponseOp.getOperand(3); + + auto OptimizedOp = rewriter.create( + op.getLoc(), prev_LMSFilterResponseOp_0, prev_LMSFilterResponseOp_1, + prev_LMSFilterResponseOp_2, prev_LMSFilterResponseOp_3, gainOp_operand1); + + // Repalce the use of original gain operation with this newGainOp + rewriter.replaceOp(op, OptimizedOp.getResult()); + return mlir::success(); + } +}; -//Pseudo-Code -//Find lmsFIlter with gain operation - // result1 = lmsFilterResponse(noisy_sig, clean_sig, mu, filterSize); - // result2 = gain(result1, G1) - // result2 will be now lmsFilterResponse(noisy_sig, clean_sig, mu*g1, filterSize); - // replaceOp -struct SimplifyLMSFilterResponsewithGain: public mlir::OpRewritePattern{ - SimplifyLMSFilterResponsewithGain(mlir::MLIRContext *context) - : OpRewritePattern(context, 1) {} - - mlir::LogicalResult matchAndRewrite(GainOp op, - mlir::PatternRewriter &rewriter) const override { - - mlir::Value gainOp_operand0 = op.getOperand(0); - - LMSFilterResponseOp prev_LMSFilterResponseOp = gainOp_operand0.getDefiningOp(); - - if(!prev_LMSFilterResponseOp) - return failure(); - mlir::Value gainOp_operand1 = op.getOperand(1); - mlir::Value prev_LMSFilterResponseOp_0 = prev_LMSFilterResponseOp.getOperand(0); - mlir::Value prev_LMSFilterResponseOp_1 = prev_LMSFilterResponseOp.getOperand(1); - mlir::Value prev_LMSFilterResponseOp_mu = prev_LMSFilterResponseOp.getOperand(2); - mlir::Value prev_LMSFilterResponseOp_3 = prev_LMSFilterResponseOp.getOperand(3); - - //create mul op - auto mulOp = rewriter.create(op.getLoc(), prev_LMSFilterResponseOp_mu, gainOp_operand1); - auto newLMSFilterResponseOp = rewriter.create(op.getLoc(), - prev_LMSFilterResponseOp_0, prev_LMSFilterResponseOp_1, mulOp.getResult(), prev_LMSFilterResponseOp_3); - - //Repalce the use of original gain operation with this newGainOp - rewriter.replaceOp(op, newLMSFilterResponseOp.getResult()); + + + + + + + + +struct SimplifySpaceModDemodulate + : public mlir::OpRewritePattern { + SimplifySpaceModDemodulate(mlir::MLIRContext *context) + : OpRewritePattern(context, 1) {} + + mlir::LogicalResult + matchAndRewrite(SpaceDemodulateOp op, + mlir::PatternRewriter &rewriter) const override { + + // a flag checking if the define operation chain of demod op contains mod op + bool opt = false; + SpaceModulateOp prev_mod; + auto iter = op.getOperand(); + while (iter.getDefiningOp()) { + auto pred = iter.getDefiningOp(); + // llvm::errs() << pred->getName().getStringRef() << "\n"; + if (llvm::dyn_cast(*pred)) { + opt = true; + prev_mod = llvm::dyn_cast(*pred); + break; + } + iter = (*pred).getOperand(0); + } + + if (!opt) + return failure(); + + auto constVal = prev_mod.getOperand().getDefiningOp(); + rewriter.replaceOp(op, constVal); return mlir::success(); + } +}; + +struct SimplifyNormLMSFilterResponse + : public mlir::OpRewritePattern { + SimplifyNormLMSFilterResponse(mlir::MLIRContext *ctx) + : OpRewritePattern(ctx, 1) {} + + mlir::LogicalResult + matchAndRewrite(NormalizeOp op, + mlir::PatternRewriter &rewriter) const override { + + Value signal = op.getOperand(); + Operation *filterOp = signal.getDefiningOp(); + + if (!filterOp) + return failure(); + + Value filterOp_operand0 = filterOp->getOperand(0); + Value filterOp_operand1 = filterOp->getOperand(1); + Value filterOp_operand2 = filterOp->getOperand(2); + Value filterOp_operand3 = filterOp->getOperand(3); + auto normLMSfilterOpt = rewriter.create( + op.getLoc(), filterOp_operand0, filterOp_operand1, filterOp_operand2, + filterOp_operand3); + + rewriter.replaceOp(op, normLMSfilterOpt); + if (filterOp->use_empty()) { + rewriter.eraseOp(filterOp); } + + return mlir::success(); + } +}; + +struct SimplifyDSSDPass : public mlir::OpRewritePattern { + SimplifyDSSDPass(mlir::MLIRContext *ctx) : OpRewritePattern(ctx, 1) {} + + mlir::LogicalResult + matchAndRewrite(DivOp op, mlir::PatternRewriter &rewriter) const override { + +#define CHECK(x) \ + if (!x) \ + return failure(); +#define REMOVE(x) \ + if (x->use_empty()) \ + rewriter.eraseOp(x); +#define DEBUG(x) \ + { llvm::errs() << "check for " << x << "\n"; } +#define PASS llvm::errs() << "pass\n"; + + auto loc = op.getLoc(); + + // pattern -> CHECK() + Operation *sumOp = op.getOperand(0).getDefiningOp(); + CHECK(sumOp); + + Operation *addOp = sumOp->getOperand(0).getDefiningOp(); + CHECK(addOp); + + Operation *sqrtOp0 = addOp->getOperand(0).getDefiningOp(); + CHECK(sqrtOp0); + + Operation *sqrtOp1 = addOp->getOperand(1).getDefiningOp(); + CHECK(sqrtOp1); + + Operation *fftRealOp = sqrtOp0->getOperand(0).getDefiningOp(); + CHECK(fftRealOp); + + // See defining op: suppose to be fftImg, but modified beforhand by