diff --git a/.gitignore b/.gitignore
index a68b971cdb5e..ba8dc4a2f44e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -35,6 +35,7 @@
 # Explicit files to ignore (only matches one).
 #==============================================================================#
 # Various tag programs
+tags
 /tags
 /TAGS
 /GPATH
@@ -73,3 +74,13 @@ pythonenv*
 # automodapi puts generated documentation files here.
 /lldb/docs/python_api/
 mlir_opt_helper.txt
+mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/Output/*
+mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/Output/*
+mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/Output/*
+mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Output/*
+# csv files
+*.csv
+/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/LClanglogs
+/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/HClanglogs
+/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/ServerExeLogs
+/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/ServerExeLogs
diff --git a/LICENSE.TXT b/LICENSE.TXT
index fa6ac5400070..3901fa02e64f 100644
--- a/LICENSE.TXT
+++ b/LICENSE.TXT
@@ -1,3 +1,219 @@
+==============================================================================
+Dual License
+==============================================================================
+
+The DSP-MLIR project is dual-licensed under:
+
+1. The DSP-MLIR Apache License, Version 2.0  
+2. The standard LLVM Project License: Apache License v2.0 with LLVM Exceptions
+==============================================================================
+The DSP-MLIR Apache License, Version 2.0:
+
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+
+
 ==============================================================================
 The LLVM Project is under the Apache License v2.0 with LLVM Exceptions:
 ==============================================================================
diff --git a/README.md b/README.md
index a9b29ecbc1a3..bba0b2efcf8c 100644
--- a/README.md
+++ b/README.md
@@ -1,44 +1,47 @@
-# The LLVM Compiler Infrastructure
+# DSP-MLIR Compiler
 
-[![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/llvm/llvm-project/badge)](https://securityscorecards.dev/viewer/?uri=github.com/llvm/llvm-project)
-[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/8273/badge)](https://www.bestpractices.dev/projects/8273)
-[![libc++](https://github.com/llvm/llvm-project/actions/workflows/libcxx-build-and-test.yaml/badge.svg?branch=main&event=schedule)](https://github.com/llvm/llvm-project/actions/workflows/libcxx-build-and-test.yaml?query=event%3Aschedule)
+This repository contains the source code for **DSP-MLIR**, a compiler tailored for Digital Signal Processing (DSP) applications. It provides highly optimized tools and environments for building, optimizing, and running DSP operations like Fast Fourier Transforms (FFT), Finite Impulse Response (FIR) filters, and more.
 
-Welcome to the LLVM project!
+The project is built on top of the **LLVM** infrastructure and leverages the **MLIR** (Multi-Level Intermediate Representation) framework for implementing DSP-specific operations and transformations.
 
-This repository contains the source code for LLVM, a toolkit for the
-construction of highly optimized compilers, optimizers, and run-time
-environments.
 
-The LLVM project has multiple components. The core of the project is
-itself called "LLVM". This contains all of the tools, libraries, and header
-files needed to process intermediate representations and convert them into
-object files. Tools include an assembler, disassembler, bitcode analyzer, and
-bitcode optimizer.
 
-C-like languages use the [Clang](https://clang.llvm.org/) frontend. This
-component compiles C, C++, Objective-C, and Objective-C++ code into LLVM bitcode
--- and from there into object files, using LLVM.
 
-Other components include:
-the [libc++ C++ standard library](https://libcxx.llvm.org),
-the [LLD linker](https://lld.llvm.org), and more.
+## Build Instructions
 
-## Getting the Source Code and Building LLVM
+To build the DSP-MLIR compiler, follow these steps:
 
-Consult the
-[Getting Started with LLVM](https://llvm.org/docs/GettingStarted.html#getting-the-source-code-and-building-llvm)
-page for information on building and running LLVM.
+### Step 1: Clone this repository and cd into the DSP-MLIR folder.
 
-For information on how to contribute to the LLVM project, please take a look at
-the [Contributing to LLVM](https://llvm.org/docs/Contributing.html) guide.
 
-## Getting in touch
+### Step 2: Make and cd into the build directory using the following command:
 
-Join the [LLVM Discourse forums](https://discourse.llvm.org/), [Discord
-chat](https://discord.gg/xS7Z362),
-[LLVM Office Hours](https://llvm.org/docs/GettingInvolved.html#office-hours) or
-[Regular sync-ups](https://llvm.org/docs/GettingInvolved.html#online-sync-ups).
+```bash
+mkdir build
+cd build
+
+```
+### Step 3: To build the project, run the following command:
+```bash
+cmake -G Ninja ../llvm \
+   -DLLVM_ENABLE_PROJECTS=mlir \
+   -DLLVM_BUILD_EXAMPLES=ON \
+   -DLLVM_TARGETS_TO_BUILD="Native" \
+   -DCMAKE_BUILD_TYPE=Release \
+   -DLLVM_ENABLE_ASSERTIONS=ON
+```
+
+### Step 4: After configuring the build, compile the project by running:
+```bash
+ninja
+```
+
+## Running an Example
+
+After the build completes, you can run an example to test the DSP operations. From the build directory:
+
+```bash
+ninja && ./bin/dsp1 ../mlir/test/Examples/DspExample/dsp_gain_op.py -emit=mlir-affine
+ninja && ./bin/dsp1 ../mlir/test/Examples/DspExample/dsp_gain_op.py -emit=jit
+```
 
-The LLVM project has adopted a [code of conduct](https://llvm.org/docs/CodeOfConduct.html) for
-participants to all modes of communication within the project.
diff --git a/matmul_test/dsp_matmul.py b/matmul_test/dsp_matmul.py
new file mode 100644
index 000000000000..0c866fd2dc4c
--- /dev/null
+++ b/matmul_test/dsp_matmul.py
@@ -0,0 +1,12 @@
+def main() {
+  var x = [[1.0, 2.0], [4.0, 5.0]];
+  var y = [[1.0, 2.0], [4.0, 5.0]];
+  var z = matmul(x, y);
+  print(z);
+  
+  
+  var x2 = [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]];
+  var y2 = [[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];  
+  var z2 = matmul(x2, y2);
+  print(z2);  
+}
diff --git a/mlir/.gitignore b/mlir/.gitignore
new file mode 100644
index 000000000000..d61db156e85f
--- /dev/null
+++ b/mlir/.gitignore
@@ -0,0 +1,2 @@
+/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/logs
+/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab_Functions
diff --git a/mlir/examples/dsp/CMakeLists.txt b/mlir/examples/dsp/CMakeLists.txt
index 9d2fc3fb3b85..10092b7c3658 100644
--- a/mlir/examples/dsp/CMakeLists.txt
+++ b/mlir/examples/dsp/CMakeLists.txt
@@ -1,11 +1,11 @@
-add_custom_target(Dsp)
-set_target_properties(Dsp PROPERTIES FOLDER Examples)
+# add_custom_target(Dsp)
+# set_target_properties(Dsp PROPERTIES FOLDER Examples)
 
-macro(add_dsp_chapter name)
-  add_dependencies(Dsp ${name})
-  add_llvm_example(${name} ${ARGN})
-endmacro(add_dsp_chapter name)
+# macro(add_dsp_chapter name)
+#   add_dependencies(Dsp ${name})
+#   add_llvm_example(${name} ${ARGN})
+# endmacro(add_dsp_chapter name)
 
 
-add_subdirectory(SimpleBlocks)
+# add_subdirectory(SimpleBlocks)
 
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/FIRFilterDesign.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/FIRFilterDesign.c
new file mode 100644
index 000000000000..e2361143850d
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/FIRFilterDesign.c
@@ -0,0 +1,95 @@
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#define INPUT_LENGTH 101
+#define PI M_PI
+#define FS 8000
+#define FC1 500
+#define FC2 600
+#define FC3 1000
+#define FC4 1200
+
+double *hamming(int length) {
+    double *window = malloc(length * sizeof(double));
+    if (!window) {
+        perror("Memory allocation failed in hamming");
+        exit(EXIT_FAILURE);
+    }
+    for (int i = 0; i < length; i++) {
+        window[i] = 0.54 - 0.46 * cos(2 * PI * i / (length - 1));
+    }
+    return window;
+}
+
+double *highPassFIRFilter(double wc, int length) {
+    double *filter = malloc(length * sizeof(double));
+    if (!filter) {
+        perror("Memory allocation failed in highPassFIRFilter");
+        exit(EXIT_FAILURE);
+    }
+    int mid = (length - 1) / 2;
+    for (int n = 0; n < length; n++) {
+        if (n == mid) {
+            filter[n] = 1 - (wc / PI);
+        } else {
+            filter[n] = -sin(wc * (n - mid)) / (PI * (n - mid));
+        }
+    }
+    return filter;
+}
+
+void elementWiseMultiplication(double *output, const double *array1, const double *array2, int length) {
+    for (int i = 0; i < length; i++) {
+        output[i] = array1[i] * array2[i];
+    }
+}
+
+double getElemAtIndx(const double *array, int index) {
+    return array[index];
+}
+
+int main() {
+    double wc1 = 2 * PI * FC1 / FS;
+    double wc2 = 2 * PI * FC2 / FS;
+    double wc3 = 2 * PI * FC3 / FS;
+    double wc4 = 2 * PI * FC4 / FS;
+
+    double *hamming_window = hamming(INPUT_LENGTH);
+
+    double *hpf1 = highPassFIRFilter(wc1, INPUT_LENGTH);
+    double *hpf_w1 = malloc(INPUT_LENGTH * sizeof(double));
+    elementWiseMultiplication(hpf_w1, hpf1, hamming_window, INPUT_LENGTH);
+
+    double *hpf2 = highPassFIRFilter(wc2, INPUT_LENGTH);
+    double *hpf_w2 = malloc(INPUT_LENGTH * sizeof(double));
+    elementWiseMultiplication(hpf_w2, hpf2, hamming_window, INPUT_LENGTH);
+
+    double *hpf3 = highPassFIRFilter(wc3, INPUT_LENGTH);
+    double *hpf_w3 = malloc(INPUT_LENGTH * sizeof(double));
+    elementWiseMultiplication(hpf_w3, hpf3, hamming_window, INPUT_LENGTH);
+
+    double *hpf4 = highPassFIRFilter(wc4, INPUT_LENGTH);
+    double *hpf_w4 = malloc(INPUT_LENGTH * sizeof(double));
+    elementWiseMultiplication(hpf_w4, hpf4, hamming_window, INPUT_LENGTH);
+
+    double final1 = getElemAtIndx(hpf_w1, 6);
+    double final2 = getElemAtIndx(hpf_w2, 7);
+    double final3 = getElemAtIndx(hpf_w3, 8);
+
+    printf("%f\n", final1);
+    printf("%f\n", final2);
+    printf("%f\n", final3);
+
+    free(hamming_window);
+    free(hpf1);
+    free(hpf2);
+    free(hpf3);
+    free(hpf4);
+    free(hpf_w1);
+    free(hpf_w2);
+    free(hpf_w3);
+    free(hpf_w4);
+
+    return 0;
+}
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/ResultScript.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/ResultScript.py
new file mode 100644
index 000000000000..9cd5fc3e02a7
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/ResultScript.py
@@ -0,0 +1,499 @@
+import os
+import subprocess
+import time
+import sys
+import math
+# The script does the following
+# Input : filename.c
+# Output : TimeOfExecution for different IP sizes :
+# Steps to run:
+# Open a terminal at the path of the script --
+# Run: python ScriptForCases.c #3.11 validated
+
+# Pseudo-code:
+# Iterate for all the input-size & update the input value in file
+# Update logic -- change the 2nd parameter of line: var c = getRangeOfVector(init , Count, StepSize)
+# Run the respective commands on the file
+
+# Path to the input file
+# Apps = "lowPassFIRFilterDesign.c", "noisecancelling.c" , "echocancelling.c",  "hearingAid.c", "audioEqualizer.c", "vibrationAnalysis.c", "underWaterCommunication.c", "voiceActivityDetection.c", "signalSmoothing",  "targetDetection", "biomedicalSignalProcessing", "periodogram2Conv", "spaceCommunication", "dtmfDetection", "speakerIdentification"
+input_file_name = sys.argv[1]
+BasePathForLLVM = "/home/local/ASURITE/apkhedka/ForLLVM/"
+OutputScriptPath = "mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/"
+# OutputPath = BasePathForLLVM + "mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/Output/"
+input_file_path = BasePathForLLVM + OutputScriptPath + input_file_name
+
+print(f"Running Application {input_file_path}")
+# Construct full output path
+if sys.argv[2]:
+    OutputPath = os.path.join(BasePathForLLVM, OutputScriptPath, "Output", sys.argv[2])
+
+else:
+    OutputPath = os.path.join(BasePathForLLVM, OutputScriptPath, "Output")
+
+# Check if the Output folder exists, create it if it doesn't
+if not os.path.exists(OutputPath):
+    os.makedirs(OutputPath)
+
+# Now OutputPath is ready for use
+print("InputPath:{}".format(BasePathForLLVM))
+print(f"OutputPath: {OutputPath}")
+# exit()
+
+# ************ Don't change unless u required
+# Define the values dictionary
+
+inputValues = {
+    "10": 10,
+    "100": 100,
+    "500": 500,
+    "1K": 1000,
+    "2K": 2000,
+    "5K": 5000,
+    "10K": 10000,
+    "20K": 20000,
+    "30K": 30000,
+    "40K": 40000,
+    "50K": 50000,
+    "100K": 100000,
+    "1M": 1000000,
+    # "10M": 10000000,
+    # "20M": 20000000,
+    # "30M": 30000000,
+    # "40M": 40000000,
+    # "50M": 50000000,
+    # "100M": 100000000,
+    # "1B": 1000000000
+}
+
+if sys.argv[1] == "noiseCancellation.c":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "1K": 1000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        "1M": 1000000,
+        "10M": 10000000,
+        "20M": 20000000,
+        "30M": 30000000,
+        "40M": 40000000,
+        "50M": 50000000,
+        "100M": 100000000,
+    }
+
+elif sys.argv[1] == "echoCancellation.c":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "1K": 1000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        "1M": 1000000,
+        "10M": 10000000,
+        "20M": 20000000,
+        "30M": 30000000,
+        "40M": 40000000,
+        "50M": 50000000,
+        "100M": 100000000,
+    }
+
+elif sys.argv[1] == "periodogram.c":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "1K": 1000,
+        "10K": 10000,
+        "20K": 20000,
+    }
+
+elif sys.argv[1] == "lowPassFiltering.c":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "1K": 1000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        "1M": 1000000,
+        "10M": 10000000,
+        "20M": 20000000,
+        "30M": 30000000,
+        "40M": 40000000,
+        "50M": 50000000,
+        "100M": 100000000,
+    }
+
+elif sys.argv[1] == "hearingAid.c":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "1K": 1000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        "1M": 1000000,
+        "10M": 10000000,
+        "20M": 20000000,
+        "30M": 30000000,
+        "40M": 40000000,
+        "50M": 50000000,
+        "100M": 100000000,
+    }
+
+elif sys.argv[1] == "FIRFilterDesign.c":
+    inputValues = {
+        "100": 100,
+        "1K": 1000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        "1M": 1000000,
+        "10M": 10000000,
+        "20M": 20000000,
+        "30M": 30000000,
+        "40M": 40000000,
+        "50M": 50000000,
+        "100M": 100000000,
+    }
+
+elif sys.argv[1] == "spectralAnalysis.c":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "1K": 1000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+    }
+
+elif sys.argv[1] == "audioEqualization.c":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "1K": 1000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        "1M": 1000000,
+        "10M": 10000000,
+        "20M": 20000000,
+        "30M": 30000000,
+        "40M": 40000000,
+        "50M": 50000000,
+        "100M": 100000000,
+    }
+
+elif sys.argv[1] == "audioCompression.c":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "1K": 1000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+    }
+
+elif sys.argv[1] == "vibrationAnalysis.c":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "1K": 1000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+    }
+
+elif sys.argv[1] == "underWaterCommunication.c":
+    inputValues = {
+        "100": 100,
+        "1K": 1000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        "1M": 1000000,
+        "10M": 10000000,
+        "20M": 20000000,
+        "30M": 30000000,
+        "40M": 40000000,
+        "50M": 50000000,
+        "100M": 100000000,
+    }
+
+elif sys.argv[1] == "voiceActivityDetection.c":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "1K": 1000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        "1M": 1000000,
+        "10M": 10000000,
+        "20M": 20000000,
+        "30M": 30000000,
+        "40M": 40000000,
+        "50M": 50000000,
+        "100M": 100000000,
+    }
+
+elif sys.argv[1] == "signalSmoothing.c":
+    inputValues = {
+        "100": 100,
+        "1K": 1000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        "1M": 1000000,
+        "10M": 10000000,
+        "20M": 20000000,
+        "30M": 30000000,
+        "40M": 40000000,
+        "50M": 50000000,
+        "100M": 100000000,
+    }
+
+elif sys.argv[1] == "targetDetection.c":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "1K": 1000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        "1M": 1000000,
+        "10M": 10000000,
+        "20M": 20000000,
+        "30M": 30000000,
+        "40M": 40000000,
+        "50M": 50000000,
+        "100M": 100000000,
+    }
+
+elif sys.argv[1] == "biomedicalSignalProcessing.c":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "1K": 1000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        "1M": 1000000,
+        "10M": 10000000,
+        "20M": 20000000,
+        "30M": 30000000,
+        "40M": 40000000,
+        "50M": 50000000,
+        "100M": 100000000,
+    }
+
+elif sys.argv[1] == "digitalModulation.c":
+    inputValues = {
+        "100": 100,
+        "1K": 1000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        "1M": 1000000,
+        "10M": 10000000,
+        "20M": 20000000,
+        "30M": 30000000,
+        "40M": 40000000,
+        "50M": 50000000,
+        "100M": 100000000,
+    }
+
+elif sys.argv[1] == "spaceCommunication.c":
+    inputValues = {
+        "100": 100,
+        "1K": 1000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        "1M": 1000000,
+        "10M": 10000000,
+        "20M": 20000000,
+        "30M": 30000000,
+        "40M": 40000000,
+        "50M": 50000000,
+        "100M": 100000000,
+    }
+
+elif sys.argv[1] == "radarSignalProcessing.c":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "1K": 1000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        "1M": 1000000,
+        "10M": 10000000,
+        "20M": 20000000,
+        "30M": 30000000,
+        "40M": 40000000,
+        "50M": 50000000,
+        "100M": 100000000,
+    }
+
+elif sys.argv[1] == "dtmfDetection.c":
+    inputValues = {
+        "100": 100,
+        "1K": 1000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+    }
+
+elif sys.argv[1] == "speakerIdentification.c":
+    inputValues = {
+        "100": 100,
+        "1K": 1000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        "1M": 1000000,
+    }
+
+NoOfIterations = 3
+
+
+# Define the cases
+cases = [
+    {
+        "gcc": True,
+        "clang": False,
+        "exe": "fileGCCOptExe",
+    },
+    {
+        "clang": True,
+        "gcc": False,
+        "exe": "fileClangOptExe",
+    },
+]
+
+
+with open(input_file_path, "r") as file:
+    lines = file.readlines()
+
+print("", end="\t")
+
+for case in cases:
+    print(f"{case['exe']}", end="\t")
+
+for key, value in inputValues.items():
+    # Update the specific line in the file
+    # print("Updating for {}".format(value))
+    print("\n{}".format(key), end="\t")
+    with open(input_file_path, "w") as file:
+        for line in lines:
+            if line.strip().startswith("#define INPUT_LENGTH"):
+                if sys.argv[1] == "speakerIdentification.c":
+                    updated_line = f"#define INPUT_LENGTH {math.floor(value/8.192)}\n"
+                else:     
+                    updated_line = f"#define INPUT_LENGTH {value}\n"
+                file.write(updated_line)
+            else:
+                file.write(line)
+
+    for case in cases:
+        
+        if case["gcc"]:
+            command = f"gcc -O3 -o {OutputPath}/{case['exe']} {input_file_path} -lm",
+        if case["clang"]:
+            command = f"{BasePathForLLVM}/build/bin/clang-19 -O3 {input_file_path} -o {OutputPath}/{case['exe']} -lm",
+        
+        result = subprocess.run(command, shell=True, capture_output=True, text=True)
+
+        sum_exe_time = 0
+        for i in range(0, NoOfIterations):
+            try:
+                process = subprocess.run(
+                    "sudo sh -c 'sync; echo 3 > /proc/sys/vm/drop_caches'",
+                    shell=True,
+                    check=True,
+                )
+                # process.wait()
+            except subprocess.CalledProcessError as exc:
+                print(exc)
+                process.terminate()
+            # The command to be executed
+
+            command2 = f"taskset -c 0 ./Output/{sys.argv[2]}/{case['exe']}"
+
+            # Record the start time
+            start_time = time.time()
+
+            # Execute the command
+            try:
+                subprocess.run(
+                    command2,
+                    shell=True,
+                    stdout=subprocess.DEVNULL,
+                    stderr=subprocess.DEVNULL,
+                    check=True,
+                )
+                # subprocess.run(command2, shell=True)
+            except subprocess.CalledProcessError as exc:
+                print(
+                    f"Process failed because did not return a successful return code. "
+                    f"Returned {exc.returncode}\n{exc}"
+                )
+
+            end_time = time.time()
+            execution_time = end_time - start_time
+            sum_exe_time = sum_exe_time + execution_time
+        avg_exe_time = sum_exe_time / NoOfIterations
+        print("{}".format(avg_exe_time), end="\t")
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/RunResults.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/RunResults.py
new file mode 100644
index 000000000000..d9a30f95a8b5
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/RunResults.py
@@ -0,0 +1,49 @@
+import subprocess
+import os
+
+# Ensure the log directory exists
+log_dir = "ServerExeLogs"
+os.makedirs(log_dir, exist_ok=True)
+
+# Corrected list of application names (without non-application entries)
+app_names = [
+    "speakerIdentification",
+    "targetDetection",
+    "underWaterCommunication",
+    "voiceActivityDetection",
+    "spectralAnalysis",
+    "audioCompression",
+    "audioEqualization",
+    "biomedicalSignalProcessing",
+    "digitalModulation",
+    "dtmfDetection",
+    "echoCancellation",
+    "FIRFilterDesign",
+    "hearingAid",
+    "lowPassFiltering",
+    "noiseCancellation",
+    "periodogram",
+    "vibrationAnalysis",
+    "radarSignalProcessing",
+    "signalSmoothing",
+    "spaceCommunication"
+]
+
+# Loop through each application and execute the  script
+for app_name in app_names:
+    app_script = f"{app_name}.c"
+    log_file = os.path.join(log_dir, f"{app_name}.log")
+    with open(log_file, "w") as log:
+        command = ["python", "ResultScript.py", app_script, app_name]
+        print(f"Running command: {' '.join(command)}")
+        process = subprocess.Popen(
+            command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
+        )
+        for line in process.stdout:
+            print(line, end="")
+            log.write(line)
+        for line in process.stderr:
+            print(line, end="")
+            log.write(line)
+        process.wait()
+
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/audioCompression.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/audioCompression.c
new file mode 100644
index 000000000000..f8239d55d35b
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/audioCompression.c
@@ -0,0 +1,143 @@
+#include <complex.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#define INPUT_LENGTH 10
+#define NLEVELS 16
+#define MIN 0.0
+#define MAX 8.0
+#define THRESHOLD_VAL 4.0
+
+double *getRangeOfVector(double start, int noOfSamples, double increment) {
+  double *output = malloc(noOfSamples * sizeof(double));
+  if (!output) {
+    perror("Memory allocation failed in getRangeOfVector");
+    exit(EXIT_FAILURE);
+  }
+
+  for (int i = 0; i < noOfSamples; i++) {
+    output[i] = start + i * increment;
+  }
+
+  return output;
+}
+
+void dft(double complex *output, const double *input, int length) {
+  for (int k = 0; k < length; k++) {
+    output[k] = 0;
+    for (int n = 0; n < length; n++) {
+      double angle = 2 * M_PI * k * n / length;
+      output[k] += input[n] * cexp(-I * angle);
+    }
+  }
+}
+
+void threshold(double *output, const double *input, double thresh, int length) {
+  for (int i = 0; i < length; i++) {
+    output[i] = (fabs(input[i]) >= thresh) ? input[i] : 0;
+  }
+}
+
+void quantization(double *output, const double *input, int nlevels, double max,
+                  double min, int length) {
+  double stepSize = (max - min) / nlevels;
+
+  for (int i = 0; i < length; ++i) {
+    double level = (input[i] - min) / stepSize;
+    int roundedLevel = (unsigned int)(level);
+    output[i] = roundedLevel * stepSize + min;
+  }
+}
+
+void runLenEncoding(double *output, const double *input, int length) {
+  int k = 0;     // Index for storing encoded values
+  int count = 1; // Initialize count
+
+  output[k] = input[0];      // First value
+  int half_len = length / 2; // Output array's second half starts here
+
+  for (int i = 1; i < half_len; i++) {
+    if (input[i] == input[i - 1]) {
+      count++; // Increase count if value is the same as previous
+    } else {
+      output[k + half_len] = count; // Store count at second half
+      k++;                          // Move to next unique value position
+      output[k] = input[i];         // Store new value
+      count = 1;                    // Reset count
+    }
+  }
+  output[k + half_len] = count; // Store count of last element
+}
+
+double getElemAtIndx(const double *rle, int indx) { return rle[indx]; }
+
+int main() {
+  double *input = getRangeOfVector(0, INPUT_LENGTH, 1);
+
+  double complex *fft = malloc(INPUT_LENGTH * sizeof(double complex));
+  if (!fft) {
+    perror("Memory allocation failed");
+    free(input);
+    return EXIT_FAILURE;
+  }
+
+  dft(fft, input, INPUT_LENGTH);
+
+  double *GetThresholdReal = malloc(INPUT_LENGTH * sizeof(double));
+  double *GetThresholdImg = malloc(INPUT_LENGTH * sizeof(double));
+  if (!GetThresholdReal || !GetThresholdImg) {
+    perror("Memory allocation failed");
+    free(input);
+    free(fft);
+    free(GetThresholdReal);
+    free(GetThresholdImg);
+    return EXIT_FAILURE;
+  }
+
+  for (int i = 0; i < INPUT_LENGTH; i++) {
+    GetThresholdReal[i] = creal(fft[i]);
+    GetThresholdImg[i] = cimag(fft[i]);
+  }
+
+  threshold(GetThresholdReal, GetThresholdReal, THRESHOLD_VAL, INPUT_LENGTH);
+  threshold(GetThresholdImg, GetThresholdImg, THRESHOLD_VAL, INPUT_LENGTH);
+
+  double *QuantOutReal = malloc(INPUT_LENGTH * sizeof(double));
+  double *QuantOutImg = malloc(INPUT_LENGTH * sizeof(double));
+  if (!QuantOutReal || !QuantOutImg) {
+    perror("Memory allocation failed");
+    free(input);
+    free(fft);
+    free(GetThresholdReal);
+    free(GetThresholdImg);
+    free(QuantOutReal);
+    free(QuantOutImg);
+    return EXIT_FAILURE;
+  }
+
+  quantization(QuantOutReal, GetThresholdReal, NLEVELS, MAX, MIN, INPUT_LENGTH);
+  quantization(QuantOutImg, GetThresholdImg, NLEVELS, MAX, MIN, INPUT_LENGTH);
+
+  double *rLEOutReal = (double *)malloc(2 * INPUT_LENGTH * sizeof(double));
+  double *rLEOutImg = (double *)malloc(2 * INPUT_LENGTH * sizeof(double));
+
+  runLenEncoding(rLEOutReal, QuantOutReal, INPUT_LENGTH);
+  runLenEncoding(rLEOutImg, QuantOutImg, INPUT_LENGTH);
+
+  double final1 = getElemAtIndx(rLEOutReal, 0);
+  double final2 = getElemAtIndx(rLEOutImg, 1);
+  printf("%f\t", final1);
+  printf("%f", final2);
+
+  free(input);
+  free(fft);
+  free(GetThresholdReal);
+  free(GetThresholdImg);
+  free(QuantOutReal);
+  free(QuantOutImg);
+  free(rLEOutReal);
+  free(rLEOutImg);
+
+  return 0;
+}
\ No newline at end of file
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/audioEqualization.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/audioEqualization.c
new file mode 100644
index 000000000000..df13d82a8bd5
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/audioEqualization.c
@@ -0,0 +1,229 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+#define PI 3.14159265359
+#define INPUT_LENGTH 10
+#define FILTER_LENGTH 101
+#define OUTPUT_LENGTH (INPUT_LENGTH + FILTER_LENGTH - 1)
+
+// Function prototypes
+double* getRangeOfVector(double start, int length, double increment);
+double* lowPassFIRFilter(double wc, int length);
+double* highPassFIRFilter(double wc, int length);
+double* hamming(int length);
+void elementWiseMultiplication(double* output, const double* array1, const double* array2, int length);
+void FIRFilterResponse(double* output, const double* input, const double* filter, int inputLength);
+void gain(double* output, const double* input, double gainFactor, int length);
+void add(double* output, const double* input1, const double* input2, int length);
+void sub(double* output, const double* input1, const double* input2, int length);
+void printArray(const double* array, int length);
+double getElementAtIndex(const double* array, int index);
+
+// Generate a range of values
+double* getRangeOfVector(double start, int length, double increment) {
+    double* vector = malloc(length * sizeof(double));
+    if (!vector) {
+        perror("Memory allocation failed in getRangeOfVector");
+        exit(EXIT_FAILURE);
+    }
+    for (int i = 0; i < length; i++) {
+        vector[i] = start + i * increment;
+    }
+    return vector;
+}
+
+// Generate a Hamming window
+double* hamming(int length) {
+    double* window = malloc(length * sizeof(double));
+    if (!window) {
+        perror("Memory allocation failed in hamming");
+        exit(EXIT_FAILURE);
+    }
+    for (int i = 0; i < length; i++) {
+        window[i] = 0.54 - 0.46 * cos(2 * PI * i / (length - 1));
+    }
+    return window;
+}
+
+// Generate an ideal low-pass FIR filter
+double* lowPassFIRFilter(double wc, int length) {
+    double* filter = malloc(length * sizeof(double));
+    if (!filter) {
+        perror("Memory allocation failed in lowPassFIRFilter");
+        exit(EXIT_FAILURE);
+    }
+
+    int mid = (length - 1) / 2;
+    for (int n = 0; n < length; n++) {
+        if (n == mid) {
+            filter[n] = wc / PI;
+        } else {
+            filter[n] = sin(wc * (n - mid)) / (PI * (n - mid));
+        }
+    }
+    return filter;
+}
+
+// Generate an ideal high-pass FIR filter
+double* highPassFIRFilter(double wc, int length) {
+    double* lpf = lowPassFIRFilter(wc, length);
+    double* hpf = malloc(length * sizeof(double));
+    if (!hpf) {
+        perror("Memory allocation failed in highPassFIRFilter");
+        exit(EXIT_FAILURE);
+    }
+    for (int i = 0; i < length; i++) {
+        hpf[i] = -lpf[i];
+    }
+    int mid = (length - 1) / 2;
+    hpf[mid] += 1.0;
+    free(lpf);
+    return hpf;
+}
+
+// Perform element-wise multiplication
+void elementWiseMultiplication(double* output, const double* array1, const double* array2, int length) {
+    for (int i = 0; i < length; i++) {
+        output[i] = array1[i] * array2[i];
+    }
+}
+
+void FIRFilterResponse(double* output, const double* input, const double* filter, int inputLength) {
+    int conv_length = inputLength + FILTER_LENGTH - 1; 
+    
+    // Initialize output to zero
+    for (int n = 0; n < conv_length; n++) {
+        output[n] = 0;
+    }
+
+    // Perform convolution
+    for (int n = 0; n < conv_length; n++) {
+        for (int k = 0; k < FILTER_LENGTH; k++) {
+            if (n - k >= 0 && n - k < inputLength) {
+                output[n] += input[n - k] * filter[k];
+            }
+        }
+    }
+}
+
+// Apply gain to a signal
+void gain(double* output, const double* input, double gainFactor, int length) {
+    for (int i = 0; i < length; i++) {
+        output[i] = input[i] * gainFactor;
+    }
+}
+
+// Perform element-wise addition
+void add(double* output, const double* input1, const double* input2, int length) {
+    for (int i = 0; i < length; i++) {
+        output[i] = input1[i] + input2[i];
+    }
+}
+
+// Perform element-wise subtraction
+void sub(double* output, const double* input1, const double* input2, int length) {
+    for (int i = 0; i < length; i++) {
+        output[i] = input1[i] - input2[i];
+    }
+}
+
+// Print an array
+void printArray(const double* array, int length) {
+    for (int i = 0; i < length; i++) {
+        printf("%f ", array[i]);
+    }
+    printf("\n");
+}
+
+// Get element at index
+double getElementAtIndex(const double* array, int index) {
+    return array[index];
+}
+
+int main() {
+    // Step 1: Generate Input Signal
+    double* input = getRangeOfVector(0, INPUT_LENGTH, 1);
+    
+    // Step 2: Define constants
+    double pi = PI;
+    double Fs = 8000;
+    double gainForBass = 2;
+    double gainForMid = 1.5;
+    double gainForTreble = 0.8;
+
+    // Step 3: Low-pass filter
+    double fc = 300;
+    double wc = 2 * pi * fc / Fs;
+    double* lpf = lowPassFIRFilter(wc, FILTER_LENGTH);
+    double* hamming_window = hamming(FILTER_LENGTH);
+    double* lpf_w = malloc(FILTER_LENGTH * sizeof(double));
+    elementWiseMultiplication(lpf_w, lpf, hamming_window, FILTER_LENGTH);
+
+    double* FIRfilterResponseForLpf = malloc(OUTPUT_LENGTH * sizeof(double));
+    FIRFilterResponse(FIRfilterResponseForLpf, input, lpf_w, INPUT_LENGTH);
+
+    double* gainWithLpf = malloc(OUTPUT_LENGTH * sizeof(double));
+    gain(gainWithLpf, FIRfilterResponseForLpf, gainForBass, OUTPUT_LENGTH);
+
+    // Step 4: High-pass filter
+    double fc2 = 1500;
+    double wc2 = 2 * pi * fc2 / Fs;
+    double* hpf = highPassFIRFilter(wc2, FILTER_LENGTH);
+    double* hpf_w = malloc(FILTER_LENGTH * sizeof(double));
+    elementWiseMultiplication(hpf_w, hpf, hamming_window, FILTER_LENGTH);
+
+    double* FIRfilterResponseForHpf = malloc(OUTPUT_LENGTH * sizeof(double));
+    FIRFilterResponse(FIRfilterResponseForHpf, input, hpf_w, INPUT_LENGTH);
+
+    double* gainWithHpf = malloc(OUTPUT_LENGTH * sizeof(double));
+    gain(gainWithHpf, FIRfilterResponseForHpf, gainForTreble, OUTPUT_LENGTH);
+
+    // Step 5: Band-pass filter
+    double* lpf2 = lowPassFIRFilter(wc2, FILTER_LENGTH);
+    double* lpf2_w = malloc(FILTER_LENGTH * sizeof(double));
+    elementWiseMultiplication(lpf2_w, lpf2, hamming_window, FILTER_LENGTH);
+
+    double* bpf_w = malloc(FILTER_LENGTH * sizeof(double));
+    sub(bpf_w, lpf2_w, lpf_w, FILTER_LENGTH);
+
+    double* FIRfilterResponseForBpf = malloc(OUTPUT_LENGTH * sizeof(double));
+    FIRFilterResponse(FIRfilterResponseForBpf, input, bpf_w, INPUT_LENGTH);
+
+  
+
+    // Apply gain to Band-pass filter response
+    double* gainWithBpf = malloc(OUTPUT_LENGTH * sizeof(double));
+    gain(gainWithBpf, FIRfilterResponseForBpf, gainForTreble, OUTPUT_LENGTH);
+
+    // Compute final audio by summing all filter responses
+    double* final_audio = malloc(OUTPUT_LENGTH * sizeof(double));
+    add(final_audio, gainWithLpf, gainWithHpf, OUTPUT_LENGTH);
+    add(final_audio, final_audio, gainWithBpf, OUTPUT_LENGTH);
+
+
+    // Extract and print the element at index 3
+    double final1 = getElementAtIndex(final_audio, 3);
+    printf("%f\n", final1);
+
+    // Free allocated memory
+    free(input);
+    free(lpf);
+    free(hamming_window);
+    free(lpf_w);
+    free(FIRfilterResponseForLpf);
+    free(gainWithLpf);
+    free(hpf);
+    free(hpf_w);
+    free(FIRfilterResponseForHpf);
+    free(gainWithHpf);
+    free(lpf2);
+    free(lpf2_w);
+    free(bpf_w);
+    free(FIRfilterResponseForBpf);
+    free(gainWithBpf);
+    free(final_audio);
+
+    return 0;
+}
+
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/biomedicalSignalProcessing.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/biomedicalSignalProcessing.c
new file mode 100644
index 000000000000..7606d2796e61
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/biomedicalSignalProcessing.c
@@ -0,0 +1,250 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+#define PI 3.14159265359
+#define FS 8000
+#define INPUT_LENGTH 2000
+#define FILTER_SIZE 101
+#define MAX_PEAKS 950
+
+// Function prototypes
+void getRangeOfVector(double* vector, double start, int length, double increment);
+void gain(double* output, double* input, double multiplier, int length);
+void sine(double* output, double* input, int length);
+void add(double* output, double* input1, double* input2, int length);
+void sub(double* output, double* input1, double* input2, int length);
+void lowPassFIRFilter(double* lpf, double wc, int N);
+void hamming(double* window, int length);
+void FIRFilterResponse(double* output, double* input, double* filter, int input_length, int filter_length);
+double max_signal(double* signal, int length);
+void find_peaks(double* peaks, double* input, int length, double height, int distance);
+void diff(double* output, double* input, int length);
+double mean(double* input, int length);
+
+int main() {
+    double fc1 = 1000, fc2 = 7500;
+    int N = FILTER_SIZE, distance = 950;
+
+    double* input = (double*)malloc(INPUT_LENGTH * sizeof(double));
+    getRangeOfVector(input, 0, INPUT_LENGTH, 0.000125);
+
+    double f_sig = 500;
+    double getMultiplier = 2 * PI * f_sig;
+    double* getSinDuration = (double*)malloc(INPUT_LENGTH * sizeof(double));
+    gain(getSinDuration, input, getMultiplier, INPUT_LENGTH);
+
+    double* clean_sig = (double*)malloc(INPUT_LENGTH * sizeof(double));
+    sine(clean_sig, getSinDuration, INPUT_LENGTH);
+
+    double f_noise = 3000;
+    double* getNoiseSinDuration = (double*)malloc(INPUT_LENGTH * sizeof(double));
+    gain(getNoiseSinDuration, input, 2 * PI * f_noise, INPUT_LENGTH);
+
+    double* noise = (double*)malloc(INPUT_LENGTH * sizeof(double));
+    sine(noise, getNoiseSinDuration, INPUT_LENGTH);
+
+    double* noise1 = (double*)malloc(INPUT_LENGTH * sizeof(double));
+    gain(noise1, noise, 0.5, INPUT_LENGTH);
+
+    double* noisy_sig = (double*)malloc(INPUT_LENGTH * sizeof(double));
+    add(noisy_sig, clean_sig, noise1, INPUT_LENGTH);
+
+    // FIR Bandpass Filter
+    double wc1 = 2 * PI * fc1 / FS;
+    double wc2 = 2 * PI * fc2 / FS;
+
+    double* lpf1 = (double*)malloc(N * sizeof(double));
+    double* lpf2 = (double*)malloc(N * sizeof(double));
+    lowPassFIRFilter(lpf1, wc1, N);
+    lowPassFIRFilter(lpf2, wc2, N);
+
+    double hamming_window[FILTER_SIZE];
+    hamming(hamming_window, FILTER_SIZE);
+
+    double* lpf1_w = (double*)malloc(N * sizeof(double));
+    double* lpf2_w = (double*)malloc(N * sizeof(double));
+
+    for (int i = 0; i < N; i++) {
+        lpf1_w[i] = lpf1[i] * hamming_window[i];
+        lpf2_w[i] = lpf2[i] * hamming_window[i];
+    }
+
+    double* bpf_w = (double*)malloc(N * sizeof(double));
+    sub(bpf_w, lpf2_w, lpf1_w, N);
+
+    int conv_length = INPUT_LENGTH + N - 1;
+    double* FIRfilterResponseForBpf = (double*)malloc(conv_length * sizeof(double));
+    FIRFilterResponse(FIRfilterResponseForBpf, noisy_sig, bpf_w, INPUT_LENGTH, N);
+
+    double max_val = max_signal(FIRfilterResponseForBpf, conv_length);
+    double height = 0.3 * max_val;
+
+    double* r_peaks = (double*)malloc(MAX_PEAKS * sizeof(double));
+    find_peaks(r_peaks, FIRfilterResponseForBpf, conv_length, height, distance);
+    // Get peak count from last index of r_peaks array (converted to double)
+    double len_r_peaks = (double)r_peaks[MAX_PEAKS - 1]; // Number of detected peaks as double
+    double last_peaks_index = len_r_peaks - 1.0;        // Last peak index as double
+    
+    double* diff_val = (double*)malloc((int)len_r_peaks * sizeof(double)); 
+    diff(diff_val, r_peaks, (int)len_r_peaks);
+
+
+// Compute peaks_count - 1
+double peaks_count_minus_one = len_r_peaks - 1.0;
+
+// Compute mean of peak differences
+double diff_mean = mean(diff_val, (int)peaks_count_minus_one);
+
+// Compute heart rate
+double avg_hr = (60.0 * FS) / diff_mean;
+
+printf("%f", avg_hr);
+
+
+    // Free memory
+    free(diff_val);
+    free(input);
+    free(getSinDuration);
+    free(clean_sig);
+    free(getNoiseSinDuration);
+    free(noise);
+    free(noise1);
+    free(noisy_sig);
+    free(lpf1);
+    free(lpf2);
+    free(lpf1_w);
+    free(lpf2_w);
+    free(bpf_w);
+    free(FIRfilterResponseForBpf);
+    free(r_peaks);
+
+    return 0;
+}
+
+// Function implementations
+void getRangeOfVector(double* vector, double start, int length, double increment) {
+    for (int i = 0; i < length; i++) {
+        vector[i] = start + i * increment;
+    }
+}
+
+void gain(double* output, double* input, double multiplier, int length) {
+    for (int i = 0; i < length; i++) {
+        output[i] = input[i] * multiplier;
+    }
+}
+
+void sine(double* output, double* input, int length) {
+    for (int i = 0; i < length; i++) {
+        output[i] = sin(input[i]);
+    }
+}
+
+void add(double* output, double* input1, double* input2, int length) {
+    for (int i = 0; i < length; i++) {
+        output[i] = input1[i] + input2[i];
+    }
+}
+
+void sub(double* output, double* input1, double* input2, int length) {
+    for (int i = 0; i < length; i++) {
+        output[i] = input1[i] - input2[i];
+    }
+}
+
+void hamming(double* window, int length) {
+    for (int i = 0; i < length; i++) {
+        window[i] = 0.54 - 0.46 * cos(2 * PI * i / (length - 1));
+    }
+}
+
+void lowPassFIRFilter(double* lpf, double wc, int N) {
+    int mid = (N - 1) / 2;
+    for (int n = 0; n < N; n++) {
+        if (n == mid) {
+            lpf[n] = wc / PI;
+        } else {
+            double x = wc * (n - mid);
+            lpf[n] = (wc / PI) * (sin(x) / x);
+        }
+    }
+}
+
+// Perform full convolution for FIR filtering
+void FIRFilterResponse(double* output, double* input, double* filter, int input_length, int filter_length) {
+    int conv_length = input_length + filter_length - 1;
+    
+    // Initialize output to zero
+    for (int n = 0; n < conv_length; n++) {
+        output[n] = 0;
+    }
+
+    // Perform full convolution
+    for (int n = 0; n < conv_length; n++) {
+        for (int k = 0; k < filter_length; k++) {
+            if (n - k >= 0 && n - k < input_length) {
+                output[n] += input[n - k] * filter[k];
+            }
+        }
+    }
+}
+
+
+double max_signal(double* signal, int length) {
+    double max = signal[0];
+    for (int i = 1; i < length; i++) {
+        if (signal[i] > max) {
+            max = signal[i];
+        }
+    }
+    return max;
+}
+
+// Find peaks in a signal based on a threshold and minimum distance
+void find_peaks(double* peaks, double* input, int length, double height, int distance) {
+    int peakCount = 0;
+
+    // Initialize peaks array with -1 (default no peaks)
+    for (int i = 0; i < MAX_PEAKS; i++) {
+        peaks[i] = -1;
+    }
+
+    for (int i = 1; i < length - 1; i++) {
+        if (input[i] > input[i - 1] && input[i] > input[i + 1] && input[i] >= height) {
+            // If it's the first peak, store it
+            if (peakCount == 0) {
+                peaks[peakCount++] = i;
+            } else {
+                // Ensure minimum distance between peaks
+                if (i - (int)peaks[peakCount - 1] >= distance) {
+                    peaks[peakCount++] = i;
+                }
+            }
+
+            // Stop if max peaks reached
+            if (peakCount >= MAX_PEAKS - 1) {
+                break;
+            }
+        }
+    }
+
+    // Store peak count at the last index
+    peaks[MAX_PEAKS - 1] = peakCount;
+}
+
+
+
+void diff(double* output, double* input, int length) {
+    for (int i = 0; i < length - 1; i++) {
+        output[i] = (double)(input[i + 1] - input[i]);
+    }
+}
+
+double mean(double* input, int length) {
+    double sum = 0;
+    for (int i = 0; i < length; i++) {
+        sum += input[i];
+    }
+    return sum / length;
+}
\ No newline at end of file
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/digitalModulation.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/digitalModulation.c
new file mode 100644
index 000000000000..a464f139cac2
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/digitalModulation.c
@@ -0,0 +1,129 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <complex.h>
+#include <time.h>
+
+#define PI 3.14159265359
+#define INPUT_LENGTH 100
+
+void getRangeOfVector(double* vector, double start, int length, double increment) {
+    for (int i = 0; i < length; i++) {
+        vector[i] = start + i * increment;
+    }
+}
+
+void gain(double* output, const double* input, double gainFactor, int length) {
+    for (int i = 0; i < length; i++) {
+        output[i] = input[i] * gainFactor;
+    }
+}
+
+void thresholdUp(double* output, const double* input, double threshold, double low_value) {
+    for (int i = 0; i < INPUT_LENGTH; i++) {
+        output[i] = (input[i] >= threshold) ? 1.0 : low_value;
+    }
+}
+
+void qam_modulate_real(double* symbols_real, double* binary_sig) {
+    for (int i = 0; i < INPUT_LENGTH; i += 2) {
+        double bit1 = binary_sig[i];
+        double bit2 = binary_sig[i + 1];
+
+        if (bit1 == 0.0 && bit2 == 0.0)
+            symbols_real[i / 2] = -1.0;
+        else if (bit1 == 0.0 && bit2 == 1.0)
+            symbols_real[i / 2] = -1.0;
+        else if (bit1 == 1.0 && bit2 == 0.0)
+            symbols_real[i / 2] = 1.0;
+        else if (bit1 == 1.0 && bit2 == 1.0)
+            symbols_real[i / 2] = 1.0;
+    }
+}
+
+void qam_modulate_imag(double* symbols_imag, double* binary_sig) {
+    for (int i = 0; i < INPUT_LENGTH; i += 2) {
+        double bit1 = binary_sig[i];
+        double bit2 = binary_sig[i + 1];
+
+        if (bit1 == 0.0 && bit2 == 0.0)
+            symbols_imag[i / 2] = -1.0;
+        else if (bit1 == 0.0 && bit2 == 1.0)
+            symbols_imag[i / 2] = 1.0;
+        else if (bit1 == 1.0 && bit2 == 0.0)
+            symbols_imag[i / 2] = -1.0;
+        else if (bit1 == 1.0 && bit2 == 1.0)
+            symbols_imag[i / 2] = 1.0;
+    }
+}
+
+void qam_demodulate(double* decoded_data, double* symbols_real, double* symbols_imag) {
+    for (int i = 0; i < INPUT_LENGTH / 2; i++) {
+        double real = symbols_real[i];
+        double imag = symbols_imag[i];
+
+        if (real == -1.0 && imag == -1.0) {
+            decoded_data[2 * i] = 0.0;
+            decoded_data[2 * i + 1] = 0.0;
+        } else if (real == -1.0 && imag == 1.0) {
+            decoded_data[2 * i] = 0.0;
+            decoded_data[2 * i + 1] = 1.0;
+        } else if (real == 1.0 && imag == -1.0) {
+            decoded_data[2 * i] = 1.0;
+            decoded_data[2 * i + 1] = 0.0;
+        } else if (real == 1.0 && imag == 1.0) {
+            decoded_data[2 * i] = 1.0;
+            decoded_data[2 * i + 1] = 1.0;
+        }
+    }
+}
+
+int main() {
+    srand(time(NULL)); // Seed random number generator
+
+    // Step 1: Generate Input Signal
+    double* input = (double*)malloc(sizeof(double) * INPUT_LENGTH);
+    getRangeOfVector(input, 0, INPUT_LENGTH, 0.000125);
+
+    // Step 2: Generate clean signal
+    double f_sig = 500;
+    double getMultiplier = 2 * PI * f_sig;
+    double* getSinDuration = (double*)malloc(sizeof(double) * INPUT_LENGTH);
+    gain(getSinDuration, input, getMultiplier, INPUT_LENGTH);
+
+    double* clean_sig = (double*)malloc(sizeof(double) * INPUT_LENGTH);
+    for (int i = 0; i < INPUT_LENGTH; i++) {
+        clean_sig[i] = sin(getSinDuration[i]);
+    }
+
+    // Step 3: Apply thresholdUp to get binary signal (stored in double array)
+    double* binary_sig = (double*)malloc(sizeof(double) * INPUT_LENGTH);
+    thresholdUp(binary_sig, clean_sig, 0.4, 0.0);
+
+    // Step 4: Perform QAM modulation
+    double* modulate_symbol_real = (double*)malloc(sizeof(double) * (INPUT_LENGTH / 2));
+    double* modulate_symbol_imag = (double*)malloc(sizeof(double) * (INPUT_LENGTH / 2));
+
+    qam_modulate_real(modulate_symbol_real, binary_sig);
+    qam_modulate_imag(modulate_symbol_imag, binary_sig);
+
+
+    // Step 5: Perform QAM Demodulation
+    double* decode_data = (double*)malloc(sizeof(double) * INPUT_LENGTH);
+    qam_demodulate(decode_data, modulate_symbol_real, modulate_symbol_imag);
+
+    
+    printf("%f ", decode_data[2]); 
+
+
+    // Free allocated memory
+    free(input);
+    free(getSinDuration);
+    free(clean_sig);
+    free(binary_sig);
+    free(modulate_symbol_real);
+    free(modulate_symbol_imag);
+    free(decode_data);
+
+    return 0;
+}
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/dtmfDetection.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/dtmfDetection.c
new file mode 100644
index 000000000000..a700990ffe56
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/dtmfDetection.c
@@ -0,0 +1,148 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <stdbool.h>
+
+#define M_PI 3.14159265358979323846
+#define INPUT_LENGTH 1000
+
+void dftReal(double* real, double* input, int length) {
+    for (int k = 0; k < length; k++) {
+        real[k] = 0;
+        for (int n = 0; n < length; n++) {
+            double angle = 2 * M_PI * k * n / length;
+            real[k] += input[n] * cos(angle);
+        }
+    }
+}
+
+void dftImag(double* imag, double* input, int length) {
+    for (int k = 0; k < length; k++) {
+        imag[k] = 0;
+        for (int n = 0; n < length; n++) {
+            double angle = 2 * M_PI * k * n / length;
+            imag[k] -= input[n] * sin(angle);
+        }
+    }
+}
+
+void generateDtmf(double* dtmf_tone, int digit, double duration, int fs) {
+    double freqPairs[10][2] = {
+        {941, 1336}, {697, 1209}, {697, 1336}, {697, 1477},
+        {770, 1209}, {770, 1336}, {770, 1477}, {852, 1209},
+        {852, 1336}, {852, 1477}
+    };
+    
+    double f1 = freqPairs[digit][0];
+    double f2 = freqPairs[digit][1];
+    int N = fs * duration;
+    
+    for (int i = 0; i < N; i++) {
+        double t = (double)i / fs;
+        dtmf_tone[i] = 10* sin(2 * M_PI * f1 * t) + sin(2 * M_PI * f2 * t);
+    }
+}
+
+void findDominantPeaks(double* frequencies, double* magnitudes, int fft_size, double* peaks) {
+    double max1 = 0.0, max2 = 0.0;
+    double freq1 = 0.0, freq2 = 0.0;
+
+    for (int i = 0; i < fft_size; i++) {
+        double currentFreq = frequencies[i];
+        double currentMag = magnitudes[i];
+
+        // Check if frequency is positive
+        if (currentFreq >= 0.0) {
+            // Compare current magnitude with max1
+            if (currentMag > max1) {
+                // Update max2 and freq2 with previous max1 and freq1
+                max2 = max1;
+                freq2 = freq1;
+                // Update max1 and freq1 with current values
+                max1 = currentMag;
+                freq1 = currentFreq;
+            } else if (currentMag > max2) {
+                // Update max2 and freq2 with current values
+                max2 = currentMag;
+                freq2 = currentFreq;
+            }
+        }
+        // No update for negative frequencies
+    }
+
+    // Compare freq1 and freq2 to determine the order
+    if (freq1 < freq2) {
+        peaks[0] = freq1;
+        peaks[1] = freq2;
+    } else {
+        peaks[0] = freq2;
+        peaks[1] = freq1;
+    }
+}
+
+// Function to recover the DTMF digit from frequency peaks
+int recoverDTMFDigit(double* peaks, const double freqPairs[10][2], int peak_count) {
+    for (int i = 0; i < 10; i++) {
+        double f1 = freqPairs[i][0];
+        double f2 = freqPairs[i][1];
+
+        if ((fabs(peaks[0] - f1) < 10 && fabs(peaks[1] - f2) < 10) ||
+            (fabs(peaks[0] - f2) < 10 && fabs(peaks[1] - f1) < 10)) {
+            return i; // Digit found
+        }
+    }
+    return -1; // No match found
+}
+
+
+int main() {
+    int digit = 8;
+    int fs = 8192;
+    double duration = (double)INPUT_LENGTH / fs;
+    int N = fs * duration;
+
+    double* dtmf_tone = (double*)malloc(N * sizeof(double));
+    generateDtmf(dtmf_tone, digit, duration, fs);
+
+    double* fft_real = (double*)malloc(N * sizeof(double));
+    double* fft_imag = (double*)malloc(N * sizeof(double));
+    
+    dftReal(fft_real, dtmf_tone, N);
+    dftImag(fft_imag, dtmf_tone, N);
+
+    double* magnitudes = (double*)malloc(N * sizeof(double));
+    for (int i = 0; i < N; i++) {
+        magnitudes[i] = sqrt(fft_real[i] * fft_real[i] + fft_imag[i] * fft_imag[i]);
+    }
+
+    double* frequencies = (double*)malloc(N * sizeof(double));
+    for (int i = 0; i < N; i++) {
+        magnitudes[i] = sqrt(fft_real[i] * fft_real[i] + fft_imag[i] * fft_imag[i]);
+        if (i <= N / 2) {
+            frequencies[i] = (double)i * fs / N;
+        } else {
+            frequencies[i] = ((double)i - N) * fs / N;
+        }
+    }
+
+    double peaks[2];
+    findDominantPeaks(frequencies, magnitudes, N, peaks);
+    printf("%f %f\t", peaks[0], peaks[1]);
+
+    double freqPairs[10][2] = {
+        {941, 1336}, {697, 1209}, {697, 1336}, {697, 1477},
+        {770, 1209}, {770, 1336}, {770, 1477}, {852, 1209},
+        {852, 1336}, {852, 1477}
+    };
+
+    double recovered_digit = recoverDTMFDigit(peaks, freqPairs, 10);
+    printf("%f", recovered_digit);
+
+    free(dtmf_tone);
+    free(fft_real);
+    free(fft_imag);
+    free(magnitudes);
+    free(frequencies);
+
+    return 0;
+}
\ No newline at end of file
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/echoCancellation.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/echoCancellation.c
new file mode 100644
index 000000000000..5836e7d81b4c
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/echoCancellation.c
@@ -0,0 +1,133 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <float.h>
+
+#define PI 3.14159265359
+#define INPUT_LENGTH 1000
+
+// Function to generate a range of values
+void getRangeOfVector(double* vector, double start, int length, double increment) {
+    for (int i = 0; i < length; i++) {
+        vector[i] = start + i * increment;
+    }
+}
+
+// Function to apply gain (multiplier) to a signal
+void gain(double* output, double* input, double multiplier, int length) {
+    for (int i = 0; i < length; i++) {
+        output[i] = input[i] * multiplier;
+    }
+}
+
+// Function to compute the sine of each element in the input array
+void sine(double* output, double* input, int length) {
+    for (int i = 0; i < length; i++) {
+        output[i] = sin(input[i]);
+    }
+}
+
+// Function to add two signals element-wise
+void add(double* output, double* input1, double* input2, int length) {
+    for (int i = 0; i < length; i++) {
+        output[i] = input1[i] + input2[i];
+    }
+}
+
+// Function to delay the signal by a certain number of samples
+void delay(double* input, double* output, int delaySamples, int length) {
+    for (int i = 0; i < length; i++) {
+        if (i < delaySamples) {
+            output[i] = 0; // Initial delay period is zeroed
+        } else {
+            output[i] = input[i - delaySamples];
+        }
+    }
+}
+
+// LMS filter response function
+void lmsFilterResponse(double* output, double* noisy_sig, double* clean_sig, double mu, int filterSize, int length) {
+    double w[32] = {0}; // Initialize weights to zero
+    for (int n = 0; n < length; n++) {
+        double y = 0;
+        for (int i = 0; i < filterSize; i++) {
+            if (n - i >= 0) {
+                y += w[i] * noisy_sig[n - i];
+            }
+        }
+        double e = clean_sig[n] - y;
+        for (int i = 0; i < filterSize; i++) {
+            if (n - i >= 0) {
+                w[i] += mu * e * noisy_sig[n - i];
+            }
+        }
+        output[n] = y;
+    }
+}
+
+void normalize(double *output, double *input, int length) {
+    double min_val = DBL_MAX;
+    double max_val = -DBL_MAX;
+
+    // Find min and max values
+    for (int i = 0; i < length; i++) {
+        if (input[i] < min_val) min_val = input[i];
+        if (input[i] > max_val) max_val = input[i];
+    }
+
+    // Normalize the array
+    double range = max_val - min_val;
+    for (int i = 0; i < length; i++) {
+        output[i] = (input[i] - min_val) / range;
+    }
+}
+
+int main() {
+    int fs = 8000;
+    double step = 1.0 / fs;
+
+    // Allocate memory for vectors
+    double* input = (double*)malloc(INPUT_LENGTH * sizeof(double));
+    double* getSinDuration = (double*)malloc(INPUT_LENGTH * sizeof(double));
+    double* clean_sig = (double*)malloc(INPUT_LENGTH * sizeof(double));
+    double* noise = (double*)malloc(INPUT_LENGTH * sizeof(double));
+    double* noisy_sig = (double*)malloc(INPUT_LENGTH * sizeof(double));
+    double* y = (double*)malloc(INPUT_LENGTH * sizeof(double));
+    double *normalized_sol = (double *)malloc(INPUT_LENGTH * sizeof(double));
+
+    // Generate input range
+    getRangeOfVector(input, 0.0, INPUT_LENGTH, step);
+
+    // Generate clean signal
+    double f_sig = 500;
+    gain(getSinDuration, input, 2 * PI * f_sig, INPUT_LENGTH);
+    
+    sine(clean_sig, getSinDuration, INPUT_LENGTH);
+
+    // Generate noise signal with a delay of 2 samples
+    delay(clean_sig, noise, 2, INPUT_LENGTH);
+
+    // Create noisy signal by adding noise to clean signal
+    add(noisy_sig, clean_sig, noise, INPUT_LENGTH);
+
+    // Apply LMS filter
+    double mu = 0.01;
+    int filterSize = 32;
+
+    lmsFilterResponse(y, noisy_sig, clean_sig, mu, filterSize, INPUT_LENGTH);
+    normalize(normalized_sol, y, INPUT_LENGTH);
+
+
+    printf("%f", normalized_sol[5]);
+    
+
+    // Free allocated memory
+    free(input);
+    free(getSinDuration);
+    free(clean_sig);
+    free(noise);
+    free(noisy_sig);
+    free(y);
+    free(normalized_sol);
+    return 0;
+}
\ No newline at end of file
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/getSize.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/getSize.py
new file mode 100644
index 000000000000..8e22146721d0
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/getSize.py
@@ -0,0 +1,149 @@
+import os
+import subprocess
+import pandas as pd
+
+# The script does the following
+# Input : filename.c
+# Output : TimeOfExecution for different IP sizes :
+# Steps to run:
+# Open a terminal at the path of the script --
+# Run: python ScriptForCases.c #3.11 validated
+
+# Pseudo-code:
+# Iterate for all the input-size & update the input value in file
+# Update logic -- change the 2nd parameter of line: var c = getRangeOfVector(init , Count, StepSize)
+# Run the respective commands on the file
+
+# Path to the input file
+# Apps = "lowPassFIRFilterDesign.c", "noisecancelling.c" , "echocancelling.c",  "hearingAid.c", "audioEqualizer.c", "vibrationAnalysis.c", "underWaterCommunication.c", "voiceActivityDetection.c", "signalSmoothing",  "targetDetection", "biomedicalSignalProcessing", "periodogram2Conv", "spaceCommunication", "dtmfDetection"
+input_files = ["audioCompression.c", "biomedicalSignalProcessing.c", "dtmfDetection.c", "lowPassFIRFilterDesign.c", "noisecancelling.c", \
+"radarSignalProcessing.c", "signalSmoothing.c", "speakerIdentification.c", "targetDetection.c", "vibrationAnalysis.c", "audioEqualizer.c", \
+"digitalModulation.c", "echocancelling.c", "hearingAid.c", "lowPassFull.c", "periodogram2Conv1.c", "spaceCommunication.c", "spectralAnalysis.c", \
+"underWaterCommunication.c", "voiceActivityDetection.c"]
+data = []
+
+for input_file_path in input_files:
+    BasePathForLLVM = "/home/local/ASURITE/megan/ForLLVM/"
+    OutputScriptPath = "mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/"
+    # OutputPath = BasePathForLLVM + "mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/Output/"
+    print(f"Running Application {input_file_path}")
+    # Construct full output path
+    OutputPath = os.path.join(BasePathForLLVM, OutputScriptPath, "Output")
+
+    # Check if the Output folder exists, create it if it doesn't
+    if not os.path.exists(OutputPath):
+        os.makedirs(OutputPath)
+
+    # Now OutputPath is ready for use
+    print("InputPath:{}".format(BasePathForLLVM))
+    print(f"OutputPath: {OutputPath}")
+    # exit()
+
+    # ************ Don't change unless u required
+    # Define the values dictionary
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "1K": 1000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        "1M": 1000000,
+        "10M": 10000000,
+        "20M": 20000000,
+        "30M": 30000000,
+        "40M": 40000000,
+        "50M": 50000000,
+        "100M": 100000000,
+        # "1B": 1000000000
+    }
+    NoOfIterations = 3
+
+
+    # Define the cases
+    cases = [
+        {
+            "gcc": True,
+            "clang": False,
+            "exe": "fileGCCOptExe",
+        },
+        {
+            "clang": True,
+            "gcc": False,
+            "exe": "fileClangOptExe",
+        },
+    ]
+
+    try:
+        with open(input_file_path, "r") as file:
+            lines = file.readlines()
+    except:
+        continue
+
+    print("", end="\t")
+
+    for case in cases:
+        print(f"{case['exe']}", end="\t")
+
+    size_test = {"100M": 100000000}
+    for key, value in size_test.items():
+        # Update the specific line in the file
+        # print("Updating for {}".format(value))
+        print("\n{}".format(key), end="\t")
+        with open(input_file_path, "w") as file:
+            for line in lines:
+                if line.strip().startswith("#define INPUT_LENGTH"):
+                    updated_line = f"#define INPUT_LENGTH {value}\n"
+                    file.write(updated_line)
+                else:
+                    file.write(line)
+
+        for case in cases:
+            
+            test_size = 0
+            gcc_flag = ["O3", "Os"]
+            clang_flag = ["O3", "Oz"]
+            if case["gcc"]:
+                command = f"gcc -{gcc_flag[test_size]} -o {OutputPath}/{case['exe']} {input_file_path} -lm", # -Os
+            if case["clang"]:
+                command = f"clang-17 -{clang_flag[test_size]} {input_file_path} -o {OutputPath}/{case['exe']} -lm", # -Oz
+            
+            result = subprocess.run(command, shell=True, capture_output=True, text=True)
+
+            command2 = f"size ./Output/{case['exe']}"
+
+            # Execute the command
+            try:
+                result = subprocess.run(
+                        command2,
+                        shell=True,
+                        capture_output=True, text=True
+                        )
+                
+                output_parts = result.stdout.splitlines()
+                if len(output_parts) > 1:
+                    size_data = output_parts[1].split()
+
+                    data.append({
+                        "filename": input_file_path,
+                        # "input size" : key,
+                        "opt": case['exe'],
+                        # "text": size_data[0],
+                        # "data": size_data[1],
+                        # "bss": size_data[2],
+                        # "dec": size_data[3],
+                        # "hex": size_data[4],
+                        "total": sum(map(int, size_data[:4]))
+                    })
+            except subprocess.CalledProcessError as exc:
+                print(
+                        f"Process failed because did not return a successful return code. "
+                        f"Returned {exc.returncode}\n{exc}"
+                        )
+
+        df = pd.DataFrame(data)
+        
+        df.to_csv("codesize.csv", index=False)
\ No newline at end of file
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/hearingAid.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/hearingAid.c
new file mode 100644
index 000000000000..21657587aec4
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/hearingAid.c
@@ -0,0 +1,104 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+#define PI 3.14159265359
+#define INPUT_LENGTH 10
+#define FILTER_LENGTH 32
+
+// Function to generate a range of values
+void getRangeOfVector(double* vector, double start, int length, double increment) {
+    for (int i = 0; i < length; i++) {
+        vector[i] = start + i * increment;
+    }
+}
+
+// Function to apply gain (multiplier) to a signal
+void gain(double* output, double* input, double multiplier, int length) {
+    for (int i = 0; i < length; i++) {
+        output[i] = input[i] * multiplier;
+    }
+}
+
+// Function to compute the sine of each element in the input array
+void sine(double* output, double* input, int length) {
+    for (int i = 0; i < length; i++) {
+        output[i] = sin(input[i]);
+    }
+}
+
+// Function to add two signals element-wise
+void add(double* output, double* input1, double* input2, int length) {
+    for (int i = 0; i < length; i++) {
+        output[i] = input1[i] + input2[i];
+    }
+}
+
+// Corrected LMS filter response function
+void lmsFilterResponse(double* y, double* noisy_sig, double* clean_sig, double mu, int filterSize, int length) {
+    double w[FILTER_LENGTH] = {0}; // Initialize weights to zero
+    for (int n = 0; n < length; n++) {
+        y[n] = 0;
+        for (int i = 0; i < filterSize; i++) {
+            if (n - i >= 0) {
+                y[n] += w[i] * noisy_sig[n - i];
+            }
+        }
+        double e = clean_sig[n] - y[n];
+        for (int i = 0; i < filterSize; i++) {
+            if (n - i >= 0) {
+                w[i] += mu * e * noisy_sig[n - i];
+            }
+        }
+        
+    }
+}
+
+int main() {
+    int fs = 8000;
+    double step = 1.0 / fs;
+
+    // Allocate memory for vectors
+    double input[INPUT_LENGTH];
+    double getSinDuration[INPUT_LENGTH];
+    double clean_sig[INPUT_LENGTH];
+    double getNoiseSinDuration[INPUT_LENGTH];
+    double noise[INPUT_LENGTH];
+    double noise1[INPUT_LENGTH];
+    double noisy_sig[INPUT_LENGTH];
+    double y[INPUT_LENGTH];
+    double sol[INPUT_LENGTH];
+
+    // Generate input range
+    getRangeOfVector(input, 0.0, INPUT_LENGTH, step);
+
+    // Generate clean signal
+    double f_sig = 500;
+    gain(getSinDuration, input, 2 * PI * f_sig, INPUT_LENGTH);
+    sine(clean_sig, getSinDuration, INPUT_LENGTH);
+
+    // Generate noise signal with frequency of 3000 Hz
+    double f_noise = 3000;
+    gain(getNoiseSinDuration, input, 2 * PI * f_noise, INPUT_LENGTH);
+    sine(noise, getNoiseSinDuration, INPUT_LENGTH);
+
+
+    gain(noise1, noise, 0.5, INPUT_LENGTH);
+
+    // Create noisy signal by adding noise to clean signal
+    add(noisy_sig, clean_sig, noise1, INPUT_LENGTH);
+
+
+    double mu = 0.01;
+    lmsFilterResponse(y, noisy_sig, clean_sig, mu, FILTER_LENGTH, INPUT_LENGTH);
+
+
+    double G1 = 123;
+    gain(sol, y, G1, INPUT_LENGTH);
+
+   
+    printf("%f\n", sol[3]);
+
+
+    return 0;
+}
\ No newline at end of file
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/lowPassFiltering.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/lowPassFiltering.c
new file mode 100644
index 000000000000..3903a26caf13
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/lowPassFiltering.c
@@ -0,0 +1,71 @@
+#include <stdio.h>
+#include <math.h>
+#include <stdlib.h>
+
+#define PI 3.14159265359
+#define FS 8000
+#define N 101
+#define INPUT_LENGTH 100
+#define FILTER_LENGTH 200
+
+void generate_signal(double *signal, double freq, int length) {
+    for (int i = 0; i < length; i++) {
+        signal[i] = sin(2 * PI * freq * i / FS);
+    }
+}
+
+void generate_lowpass_filter(double *filter, double cutoff_freq) {
+    double wc = 2 * PI * cutoff_freq / FS;
+    for (int i = 0; i < N; i++) {
+        int n = i - (N / 2);
+        if (n == 0) {
+            filter[i] = wc / PI;
+        } else {
+            filter[i] = sin(wc * n) / (PI * n);
+        }
+        // Apply Hamming window
+        filter[i] *= (0.54 - 0.46 * cos(2 * PI * i / (N - 1)));
+    }
+}
+
+void apply_fir_filter(double *input, double *output, double *filter) {
+    for (int i = 0; i < FILTER_LENGTH; i++) {
+        double sum = 0.0;
+        for (int j = 0; j < N; j++) {
+            if (i - j >= 0 && i - j < INPUT_LENGTH) {
+                sum += input[i - j] * filter[j];
+            }
+        }
+        output[i] = sum;
+    }
+}
+
+int main() {
+    double clean_signal[INPUT_LENGTH];
+    double noise_signal[INPUT_LENGTH];
+    double noisy_signal[INPUT_LENGTH];
+    double fir_filter[N];
+    double filtered_signal[FILTER_LENGTH] = {0};
+
+    // Generate clean signal with frequency 500Hz
+    generate_signal(clean_signal, 500, INPUT_LENGTH);
+    
+    // Generate noise signal with frequency 3000Hz and scale it
+    generate_signal(noise_signal, 3000, INPUT_LENGTH);
+    for (int i = 0; i < INPUT_LENGTH; i++) {
+        noise_signal[i] *= 0.5;
+        noisy_signal[i] = clean_signal[i] + noise_signal[i];
+    }
+
+    // Design low-pass filter with cutoff frequency 1000Hz
+    generate_lowpass_filter(fir_filter, 1000);
+
+    // Apply FIR filter
+    apply_fir_filter(noisy_signal, filtered_signal, fir_filter);
+
+ 
+    printf("%f\n", filtered_signal[6]);
+
+
+    return 0;
+}
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/noiseCancellation.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/noiseCancellation.c
new file mode 100644
index 000000000000..d9fdc28e1324
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/noiseCancellation.c
@@ -0,0 +1,140 @@
+#include <float.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#define INPUT_LENGTH 1000
+
+void getRangeOfVector(double *vector, double start, int length,
+                      double increment) {
+  for (int i = 0; i < length; i++) {
+    vector[i] = start + i * increment;
+  }
+}
+
+void gain(double *output, double *input, double multiplier, int length) {
+  for (int i = 0; i < length; i++) {
+    output[i] = input[i] * multiplier;
+  }
+}
+
+void sine(double *output, double *input, int length) {
+  for (int i = 0; i < length; i++) {
+    output[i] = sin(input[i]);
+  }
+}
+
+void add(double *output, double *input1, double *input2, int length) {
+  for (int i = 0; i < length; i++) {
+    output[i] = input1[i] + input2[i];
+  }
+}
+
+void lmsFilterResponse(double *output, double *noisy_sig, double *clean_sig,
+                       double mu, int filterSize, int length) {
+  double w[32] = {0};
+  for (int n = 0; n < length; n++) {
+    double y = 0;
+    for (int i = 0; i < filterSize; i++) {
+      if (n - i >= 0) {
+        y += w[i] * noisy_sig[n - i];
+      }
+    }
+    double e = clean_sig[n] - y;
+    for (int i = 0; i < filterSize; i++) {
+      if (n - i >= 0) {
+        w[i] += mu * e * noisy_sig[n - i];
+      }
+    }
+    output[n] = y;
+  }
+}
+
+void normalize(double *output, double *input, int length) {
+  double min_val = DBL_MAX;
+  double max_val = -DBL_MAX;
+
+  // Find min and max values
+  for (int i = 0; i < length; i++) {
+    if (input[i] < min_val)
+      min_val = input[i];
+    if (input[i] > max_val)
+      max_val = input[i];
+  }
+
+  // Normalize the array
+  double range = max_val - min_val;
+  for (int i = 0; i < length; i++) {
+    output[i] = (input[i] - min_val) / range;
+  }
+}
+
+int main() {
+  // Allocate memory dynamically
+  double *t = (double *)malloc(INPUT_LENGTH * sizeof(double));
+  double *getSinDuration = (double *)malloc(INPUT_LENGTH * sizeof(double));
+  double *clean_sig = (double *)malloc(INPUT_LENGTH * sizeof(double));
+  double *getNoiseSinDuration = (double *)malloc(INPUT_LENGTH * sizeof(double));
+  double *noise = (double *)malloc(INPUT_LENGTH * sizeof(double));
+  double *noise1 = (double *)malloc(INPUT_LENGTH * sizeof(double));
+  double *noisy_sig = (double *)malloc(INPUT_LENGTH * sizeof(double));
+  double *y = (double *)malloc(INPUT_LENGTH * sizeof(double));
+  double *sol = (double *)malloc(INPUT_LENGTH * sizeof(double));
+  double *normalized_sol = (double *)malloc(INPUT_LENGTH * sizeof(double));
+
+  // Check if memory allocation was successful
+  if (!t || !getSinDuration || !clean_sig || !getNoiseSinDuration || !noise ||
+      !noise1 || !noisy_sig || !y || !sol || !normalized_sol) {
+    perror("Memory allocation failed");
+    free(t);
+    free(getSinDuration);
+    free(clean_sig);
+    free(getNoiseSinDuration);
+    free(noise);
+    free(noise1);
+    free(noisy_sig);
+    free(y);
+    free(sol);
+    free(normalized_sol);
+    exit(EXIT_FAILURE);
+  }
+
+  // Signal processing steps
+  getRangeOfVector(t, 0, INPUT_LENGTH, 0.000125);
+
+  double f_sig = 500;
+  double pi = 3.14159265359;
+  gain(getSinDuration, t, 2 * pi * f_sig, INPUT_LENGTH);
+
+  sine(clean_sig, getSinDuration, INPUT_LENGTH);
+
+  double f_noise = 3000;
+  gain(getNoiseSinDuration, t, 2 * pi * f_noise, INPUT_LENGTH);
+
+  sine(noise, getNoiseSinDuration, INPUT_LENGTH);
+
+  gain(noise1, noise, 0.5, INPUT_LENGTH);
+
+  add(noisy_sig, clean_sig, noise1, INPUT_LENGTH);
+
+  // LMS filter response
+  lmsFilterResponse(y, noisy_sig, clean_sig, 0.01, 32, INPUT_LENGTH);
+
+  gain(sol, y, 10, INPUT_LENGTH);
+  normalize(normalized_sol, sol, INPUT_LENGTH);
+
+  printf("%f\n", normalized_sol[5]);
+
+  // Free allocated memory at the end
+  free(t);
+  free(getSinDuration);
+  free(clean_sig);
+  free(getNoiseSinDuration);
+  free(noise);
+  free(noise1);
+  free(noisy_sig);
+  free(y);
+  free(sol);
+  free(normalized_sol);
+  return 0;
+}
\ No newline at end of file
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/CCode/periodogram2Conv.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/periodogram.c
similarity index 52%
rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/CCode/periodogram2Conv.c
rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/periodogram.c
index f1b201ccf8d2..6303ddae7b7b 100644
--- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/CCode/periodogram2Conv.c
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/periodogram.c
@@ -1,9 +1,14 @@
 #include <stdio.h>
 #include <math.h>
 
-void getRangeOfVector(double* input, int start, int NoOfElements, double Increment) {
-    for (int i = 0; i < NoOfElements; i++) {
-        input[i] = start + i * Increment;
+// Define INPUT_LENGTH globally
+#define INPUT_LENGTH 500
+#define PI 3.14159265358
+
+
+void getRangeOfVector(double* vector, double start, int length, double increment) {
+    for (int i = 0; i < length; i++) {
+        vector[i] = start + i * increment;
     }
 }
 
@@ -14,10 +19,11 @@ void reverseInput(double* output, double* input, int length) {
 }
 
 void FIRFilterResponse(double* output, double* input, double* filter, int length) {
-    for (int n = 0; n < length; n++) {
+    int conv_length = 2 * length - 1;
+    for (int n = 0; n < conv_length; n++) {
         output[n] = 0;
         for (int k = 0; k < length; k++) {
-            if (n - k >= 0) {
+            if (n - k >= 0 && n - k < length) {
                 output[n] += input[n - k] * filter[k];
             }
         }
@@ -28,7 +34,7 @@ void dftReal(double* real, double* input, int length) {
     for (int k = 0; k < length; k++) {
         real[k] = 0;
         for (int n = 0; n < length; n++) {
-            double angle = 2 * M_PI * k * n / length;
+            double angle = 2.0 * PI * k * n / length;
             real[k] += input[n] * cos(angle);
         }
     }
@@ -38,7 +44,7 @@ void dftImag(double* imag, double* input, int length) {
     for (int k = 0; k < length; k++) {
         imag[k] = 0;
         for (int n = 0; n < length; n++) {
-            double angle = 2 * M_PI * k * n / length;
+            double angle = 2.0 * PI * k * n / length;
             imag[k] -= input[n] * sin(angle);
         }
     }
@@ -51,27 +57,28 @@ void squareMagnitude(double* output, double* real, double* imag, int length) {
 }
 
 int main() {
-    int length = 10;
-    double input[10];
-    getRangeOfVector(input, 0, length, 1);
+    double input[INPUT_LENGTH];
+    getRangeOfVector(input, 0.0, INPUT_LENGTH, 1.0);
 
-    double reverse_input[10];
-    reverseInput(reverse_input, input, length);
+    double reverse_input[INPUT_LENGTH];
+    reverseInput(reverse_input, input, INPUT_LENGTH);
 
-    double conv1d[10];
-    FIRFilterResponse(conv1d, input, reverse_input, length);
+    int conv_length = 2 * INPUT_LENGTH - 1;
+    double conv1d[conv_length];
+    FIRFilterResponse(conv1d, input, reverse_input, INPUT_LENGTH);
 
-    double fft_real[10];
-    double fft_img[10];
-    dftReal(fft_real, conv1d, length);
-    dftImag(fft_img, conv1d, length);
+    double fft_real[conv_length];
+    double fft_img[conv_length];
+    dftReal(fft_real, conv1d, conv_length);
+    dftImag(fft_img, conv1d, conv_length);
 
-    double sq[10];
-    squareMagnitude(sq, fft_real, fft_img, length);
+    double sq[conv_length];
+    squareMagnitude(sq, fft_real, fft_img, conv_length);
+
+
+    printf("%f\n", sq[2]);
+    
 
-    for (int i = 0; i < length; i++) {
-        printf("%f\n", sq[i]);
-    }
 
     return 0;
 }
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/radarSignalProcessing.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/radarSignalProcessing.c
new file mode 100644
index 000000000000..8b322a2084f8
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/radarSignalProcessing.c
@@ -0,0 +1,309 @@
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <complex.h>
+
+#define PI 3.1415926
+#define INPUT_LENGTH 10
+
+// Function prototypes
+double* getrangeofvector(double first, int64_t N, double step);
+double* beamForm(int antennas, double frequency, double* time, double* weights, int timeDim);
+double* abs_array(double* arr, int size);
+double* power_profile(double* arr, int size);
+double* lowPassFIRFilter(double wc, int N);
+double* highPassFIRFilter(double wc, int N);
+double* hamming(int N);
+double* multiply_arrays(const double* arr1, const double* arr2, int size);
+double* subtract_arrays(const double* arr1, const double* arr2, int size);
+double* FirFilterResponse(const double *input, int inputLen, const double *filter, int filterLen);
+
+int main() {
+    // Parameters
+    int antennas = 4;
+    double input_fc = 5;
+    int N = 101;
+    int input_length = INPUT_LENGTH;
+    double fc1 = 1000;
+    double fc2 = 7500;
+    double Fs = 8000;
+
+    double* input = getrangeofvector(0, input_length, 0.000125);
+    double* weights = getrangeofvector(-90, 180, 1);
+    double* signal = beamForm(antennas, input_fc, input, weights, input_length);
+    double* b1 = abs_array(signal, input_length);
+    double* power = power_profile(b1, input_length);
+    double wc1 = 2 * PI * fc1 / Fs;
+    double* filter1 = lowPassFIRFilter(wc1, N);
+    double* filter_hamming_1 = multiply_arrays(filter1, hamming(N), N);
+    double wc2 = 2 * PI * fc2 / Fs;
+    double* filter2 = highPassFIRFilter(wc2, N);
+    double* filter_hamming_2 = multiply_arrays(filter2, hamming(N), N);
+    double* bpf = subtract_arrays(filter_hamming_2, filter_hamming_1, N);
+    double* firFilterResponse = FirFilterResponse(power, input_length, bpf, N);
+    double final = firFilterResponse[10];
+    printf("%f", final);
+
+    // for (int i = 0; i < (input_length + N - 1); ++i) {
+    //     printf("%f\t", firFilterResponse[i]);
+    // }
+    
+    // Free allocated memory
+    free(input);
+    free(weights);
+    free(signal);
+    free(b1);
+    free(power);
+    free(filter1);
+    free(filter2);
+    free(filter_hamming_1);
+    free(filter_hamming_2);
+    free(bpf);
+    free(firFilterResponse);
+    return 0;
+}
+
+double* getrangeofvector(double first, int64_t N, double step) {
+    double* result = (double*)malloc(N * sizeof(double));
+    if (result == NULL) {
+        fprintf(stderr, "Memory allocation failed\n");
+        exit(1);
+    }
+    
+    // Initialize the first element
+    result[0] = first;
+    
+    // Calculate the rest of the elements
+    for (int64_t i = 1; i < N; ++i) {
+        result[i] = result[i-1] + step;
+    }
+    
+    return result;
+}
+
+double* beamForm(int antennas, double frequency, double* time, double* weights, int timeDim) {
+    // Allocate space for output
+    double* output = (double*)malloc(timeDim * sizeof(double));
+    if (output == NULL) {
+        fprintf(stderr, "Memory allocation failed for output\n");
+        exit(1);
+    }
+
+    // Allocate space for internal generated signals
+    double** signal = (double**)malloc(antennas * sizeof(double*));
+    if (signal == NULL) {
+        fprintf(stderr, "Memory allocation failed for signal\n");
+        free(output);
+        exit(1);
+    }
+    
+    for (int i = 0; i < antennas; i++) {
+        signal[i] = (double*)malloc(timeDim * sizeof(double));
+        if (signal[i] == NULL) {
+            fprintf(stderr, "Memory allocation failed for signal[%d]\n", i);
+            for (int j = 0; j < i; j++) {
+                free(signal[j]);
+            }
+            free(signal);
+            free(output);
+            exit(1);
+        }
+    }
+
+    // Generate input signals
+    double phase_var = 2 * PI * frequency;
+    for (int i = 0; i < antennas; i++) {
+        double iter_args = (i * PI) / 4.0;
+        for (int j = 0; j < timeDim; j++) {
+            double sin_body = time[j] * phase_var + iter_args;
+            signal[i][j] = sin(sin_body);
+        }
+    }
+
+    // Beam forming
+    for (int i = 0; i < timeDim; i++) {
+        double sum = 0.0;
+        for (int j = 0; j < antennas; j++) {
+            sum += signal[j][i] * weights[j];
+        }
+        output[i] = sum;
+    }
+
+    // Free allocated memory for signal
+    for (int i = 0; i < antennas; i++) {
+        free(signal[i]);
+    }
+    
+    free(signal);
+
+    return output;
+}
+
+// Function to calculate absolute values of an array
+double* abs_array(double* arr, int size) {
+    double* result = (double*)malloc(size * sizeof(double));
+    
+    if (result == NULL) {
+        fprintf(stderr, "Memory allocation failed\n");
+        exit(1);
+    }
+
+    for (int i = 0; i < size; i++) {
+        result[i] = fabs(arr[i]);
+    }
+
+    return result;
+}
+
+// Function to calculate power profile (element-wise square)
+double* power_profile(double* arr, int size) {
+    double* result = (double*)malloc(size * sizeof(double));
+    
+    if (result == NULL) {
+        fprintf(stderr, "Memory allocation failed\n");
+        exit(1);
+    }
+
+    for (int i = 0; i < size; i++) {
+        result[i] = arr[i] * arr[i];
+    }
+
+    return result;
+}
+
+double* lowPassFIRFilter(double wc, int N) {
+    double* output = (double*)malloc(N * sizeof(double));
+    if (output == NULL) {
+        fprintf(stderr, "Memory allocation failed\n");
+        exit(1);
+    }
+
+    int midIndex = (N - 1) / 2;
+    double wcByPi = wc / PI;
+
+    // Handle middle point
+    output[midIndex] = wcByPi;
+
+    // First loop: 0 <= i <= (N-1)/2 - 1
+    for (int i = 0; i < midIndex; i++) {
+        double iMinusMid = i - midIndex;
+        double sinArg = wc * iMinusMid;
+        double sinValue = sin(sinArg);
+        output[i] = sinValue / (PI * iMinusMid);
+    }
+
+    // Second loop: (N-1)/2 + 1 <= i < N
+    for (int i = midIndex + 1; i < N; i++) {
+        double iMinusMid = i - midIndex;
+        double sinArg = wc * iMinusMid;
+        double sinValue = sin(sinArg);
+        output[i] = sinValue / (PI * iMinusMid);
+    }
+
+    return output;
+}
+
+
+double* highPassFIRFilter(double wc, int N) {
+    double* output = (double*)malloc(N * sizeof(double));
+    if (output == NULL) {
+        fprintf(stderr, "Memory allocation failed\n");
+        exit(1);
+    }
+
+    int midIndex = (N - 1) / 2;
+    double wcByPi = wc / PI;
+
+    // Handle middle point
+    output[midIndex] = 1.0 - wcByPi;
+
+    // First loop: 0 <= i <= (N-1)/2 - 1
+    for (int i = 0; i < midIndex; i++) {
+        double iMinusMid = i - midIndex;
+        double sinArg = wc * iMinusMid;
+        double sinValue = sin(sinArg);
+        output[i] = -1.0 * sinValue / (PI * iMinusMid);
+    }
+
+    // Second loop: (N-1)/2 + 1 <= i < N
+    for (int i = midIndex + 1; i < N; i++) {
+        double iMinusMid = i - midIndex;
+        double sinArg = wc * iMinusMid;
+        double sinValue = sin(sinArg);
+        output[i] = -1.0 * sinValue / (PI * iMinusMid);
+    }
+
+    return output;
+}
+
+double* hamming(int N) {
+    double* window = (double*)malloc(N * sizeof(double));
+    if (window == NULL) {
+        fprintf(stderr, "Memory allocation failed\n");
+        exit(1);
+    }
+
+    const double a0 = 0.54;
+    const double a1 = 0.46;
+    const double twoPi = 2.0 * PI;
+
+    for (int k = 0; k < N; k++) {
+        double angle = twoPi * k / (N - 1);
+        window[k] = a0 - a1 * cos(angle);
+    }
+
+    return window;
+}
+
+double* multiply_arrays(const double* arr1, const double* arr2, int size) {
+    double* result = (double*)malloc(size * sizeof(double));
+    if (result == NULL) {
+        fprintf(stderr, "Memory allocation failed\n");
+        exit(1);
+    }
+
+    for (int i = 0; i < size; i++) {
+        result[i] = arr1[i] * arr2[i];
+    }
+
+    return result;
+}
+
+double* subtract_arrays(const double* arr1, const double* arr2, int size) {
+    double* result = (double*)malloc(size * sizeof(double));
+    if (result == NULL) {
+        fprintf(stderr, "Memory allocation failed\n");
+        exit(1);
+    }
+
+    for (int i = 0; i < size; i++) {
+        result[i] = arr1[i] - arr2[i];
+    }
+
+    return result;
+}
+
+double* FirFilterResponse(const double *input, int inputLen, const double *filter, int filterLen) {
+    int outputLen = inputLen + filterLen - 1;
+    double *output = (double*)malloc(outputLen * sizeof(double));
+    if (output == NULL) {
+        fprintf(stderr, "Memory allocation failed\n");
+        return NULL;
+    }
+
+    // Initialize output array to zero
+    for (int i = 0; i < outputLen; i++) {
+        output[i] = 0.0;
+    }
+
+    // Perform full convolution
+    for (int i = 0; i < outputLen; i++) {
+        for (int k = 0; k < filterLen; k++) {
+            if (i - k >= 0 && i - k < inputLen) {
+                output[i] += filter[k] * input[i - k];
+            }
+        }
+    }
+
+    return output;
+}
\ No newline at end of file
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/signalSmoothing.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/signalSmoothing.c
new file mode 100644
index 000000000000..9a68cc038b7e
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/signalSmoothing.c
@@ -0,0 +1,202 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+#define PI 3.14159265359
+#define INPUT_LENGTH 10
+#define SAMPLE_RATE 8000
+#define TIME_INCREMENT 0.000125
+#define WINDOW_SIZE 3
+
+// Function declarations
+void getRangeOfVector(double* vector, double start, int length, double increment);
+void gain(double* output, double* input, double multiplier, int length);
+void sine(double* output, double* input, int length);
+void sliding_median_filter(double* input, double* output, int length);
+void sliding_avg_filter(double* input, double* output, int length);
+double min_of_three(double a, double b, double c);
+double max_of_three(double a, double b, double c);
+
+int main() {
+    double fs = SAMPLE_RATE;
+    double* input = (double*)malloc(INPUT_LENGTH * sizeof(double));
+    if (input == NULL) {
+        fprintf(stderr, "Memory allocation failed for input\n");
+        return 1;
+    }
+
+    double f_sig = 500;
+    double getMultiplier = 2 * PI * f_sig;
+    
+    getRangeOfVector(input, 0, INPUT_LENGTH, TIME_INCREMENT);
+    
+    double* getSinDuration = (double*)malloc(INPUT_LENGTH * sizeof(double));
+    if (getSinDuration == NULL) {
+        fprintf(stderr, "Memory allocation failed for getSinDuration\n");
+        free(input);
+        return 1;
+    }
+    gain(getSinDuration, input, getMultiplier, INPUT_LENGTH);
+    
+    double* clean_sig = (double*)malloc(INPUT_LENGTH * sizeof(double));
+    if (clean_sig == NULL) {
+        fprintf(stderr, "Memory allocation failed for clean_sig\n");
+        free(input);
+        free(getSinDuration);
+        return 1;
+    }
+    sine(clean_sig, getSinDuration, INPUT_LENGTH);
+    
+    double f_noise = 3000;
+    double* getNoiseSinDuration = (double*)malloc(INPUT_LENGTH * sizeof(double));
+    if (getNoiseSinDuration == NULL) {
+        fprintf(stderr, "Memory allocation failed for getNoiseSinDuration\n");
+        free(input);
+        free(getSinDuration);
+        free(clean_sig);
+        return 1;
+    }
+    gain(getNoiseSinDuration, input, 2 * PI * f_noise, INPUT_LENGTH);
+    
+    double* noise = (double*)malloc(INPUT_LENGTH * sizeof(double));
+    if (noise == NULL) {
+        fprintf(stderr, "Memory allocation failed for noise\n");
+        free(input);
+        free(getSinDuration);
+        free(clean_sig);
+        free(getNoiseSinDuration);
+        return 1;
+    }
+    sine(noise, getNoiseSinDuration, INPUT_LENGTH);
+    
+    double* noise1 = (double*)malloc(INPUT_LENGTH * sizeof(double));
+    if (noise1 == NULL) {
+        fprintf(stderr, "Memory allocation failed for noise1\n");
+        free(input);
+        free(getSinDuration);
+        free(clean_sig);
+        free(getNoiseSinDuration);
+        free(noise);
+        return 1;
+    }
+    gain(noise1, noise, 0.5, INPUT_LENGTH);
+    
+    double* noisy_sig = (double*)malloc(INPUT_LENGTH * sizeof(double));
+    if (noisy_sig == NULL) {
+        fprintf(stderr, "Memory allocation failed for noisy_sig\n");
+        free(input);
+        free(getSinDuration);
+        free(clean_sig);
+        free(getNoiseSinDuration);
+        free(noise);
+        free(noise1);
+        return 1;
+    }
+    for (int i = 0; i < INPUT_LENGTH; i++) {
+        noisy_sig[i] = clean_sig[i] + noise1[i];
+    }
+    
+    double* median = (double*)malloc((INPUT_LENGTH - WINDOW_SIZE + 1) * sizeof(double));
+    if (median == NULL) {
+        fprintf(stderr, "Memory allocation failed for median\n");
+        free(input);
+        free(getSinDuration);
+        free(clean_sig);
+        free(getNoiseSinDuration);
+        free(noise);
+        free(noise1);
+        free(noisy_sig);
+        return 1;
+    }
+    sliding_median_filter(noisy_sig, median, INPUT_LENGTH);
+    
+    double* average = (double*)malloc((INPUT_LENGTH - WINDOW_SIZE + 1) * sizeof(double));
+    if (average == NULL) {
+        fprintf(stderr, "Memory allocation failed for average\n");
+        free(input);
+        free(getSinDuration);
+        free(clean_sig);
+        free(getNoiseSinDuration);
+        free(noise);
+        free(noise1);
+        free(noisy_sig);
+        free(median);
+        return 1;
+    }
+    sliding_avg_filter(median, average, INPUT_LENGTH - WINDOW_SIZE + 1);
+    
+    printf("%f\n", average[3]); 
+    
+    
+    // Free allocated memory
+    free(input);
+    free(getSinDuration);
+    free(clean_sig);
+    free(getNoiseSinDuration);
+    free(noise);
+    free(noise1);
+    free(noisy_sig);
+    free(median);
+    free(average);
+    
+    return 0;
+}
+
+// Function to generate a range of values
+void getRangeOfVector(double* vector, double start, int length, double increment) {
+    for (int i = 0; i < length; i++) {
+        vector[i] = start + i * increment;
+    }
+}
+
+// Function to apply gain (multiplier) to a signal
+void gain(double* output, double* input, double multiplier, int length) {
+    for (int i = 0; i < length; i++) {
+        output[i] = input[i] * multiplier;
+    }
+}
+
+// Function to compute the sine of each element in the input array
+void sine(double* output, double* input, int length) {
+    for (int i = 0; i < length; i++) {
+        output[i] = sin(input[i]);
+    }
+}
+
+// Function to find the minimum of three values
+double min_of_three(double a, double b, double c) {
+    double min = a;
+    if (b < min) min = b;
+    if (c < min) min = c;
+    return min;
+}
+
+// Function to find the maximum of three values
+double max_of_three(double a, double b, double c) {
+    double max = a;
+    if (b > max) max = b;
+    if (c > max) max = c;
+    return max;
+}
+
+// Function to apply sliding window average filter with kernel size of 3
+void sliding_avg_filter(double* input, double* output, int length) {
+    int new_length = length - WINDOW_SIZE + 1;
+    for (int i = 0; i < new_length; i++) {
+        output[i] = (input[i] + input[i + 1] + input[i + 2]) / 3.0;
+    }
+}
+
+// Function to apply sliding window median filter with kernel size of 3
+void sliding_median_filter(double* input, double* output, int length) {
+    int new_length = length - WINDOW_SIZE + 1;
+    for (int i = 0; i < new_length; i++) {
+        double a = input[i];
+        double b = input[i + 1];
+        double c = input[i + 2];
+        // Median formula: median = a + b + c - max(a, b, c) - min(a, b, c)
+        double max_val = max_of_three(a, b, c);
+        double min_val = min_of_three(a, b, c);
+        output[i] = a + b + c - max_val - min_val;
+    }
+}
\ No newline at end of file
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/spaceCommunication.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/spaceCommunication.c
new file mode 100644
index 000000000000..bbf6f1d34b1d
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/spaceCommunication.c
@@ -0,0 +1,195 @@
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define INPUT_LENGTH 40000
+
+double *getRangeOfVector(double start, int length, double increment) {
+    double *vector = malloc(length * sizeof(double));
+    if (!vector) {
+      perror("Memory allocation failed in getRangeOfVector");
+      exit(EXIT_FAILURE);
+    }
+    for (int i = 0; i < length; i++) {
+      vector[i] = start + i * increment;
+    //   printf("%.6f ", vector[i]); 
+    }
+    return vector;
+  }
+
+double *gain(const double *input, int length, double increment) {
+    double *vector = malloc(length * sizeof(double));
+    if (!vector) {
+      perror("Memory allocation failed in getRangeOfVector");
+      exit(EXIT_FAILURE);
+    }
+    for (int i = 0; i < length; i++) {
+      vector[i] = input[i] * increment;
+    //   printf("%.6f ", vector[i]); 
+    }
+    return vector;
+  }
+
+double *Sin(const double *input, int length) {
+    double *vector = malloc(length * sizeof(double));
+    if (!vector) {
+      perror("Memory allocation failed in getRangeOfVector");
+      exit(EXIT_FAILURE);
+    }
+    for (int i = 0; i < length; i++) {
+      vector[i] = sin(input[i]);
+    //   printf("%.6f ", vector[i]); 
+    }
+    return vector;
+  }
+
+double *thresholdUp(const double *input, int length, double threshold, int returnOrignal) {
+    double *vector = malloc(length * sizeof(double));
+    if (!vector) {
+      perror("Memory allocation failed in getRangeOfVector");
+      exit(EXIT_FAILURE);
+    }
+
+    if (returnOrignal == 0) {
+        for (int i = 0; i < length; i++) {
+            if (input[i] >= threshold) {
+                vector[i] = 1;
+            }
+            else {
+                vector[i] = 0;
+            }
+            // printf("%.6f ", vector[i]);
+        }
+    }
+    else {
+        for (int i = 0; i < length; i++) {
+            if (input[i] >= threshold) {
+                vector[i] = input[i];
+            }
+            else {
+                vector[i] = 0;
+            }
+            // printf("%.6f ", vector[i]);
+        }
+    }
+    return vector;
+  }
+
+double *space_modulate(const double *input, int length) {
+    double *vector = malloc(length * sizeof(double));
+    if (!vector) {
+      perror("Memory allocation failed in getRangeOfVector");
+      exit(EXIT_FAILURE);
+    }
+    for (int i = 0; i < length; i++) {
+        vector[i] = (input[i] == 1) ? 1 : -1;
+        // printf("%.6f ", vector[i]);
+      }
+    return vector;
+}
+
+double *add_noise(const double *input, int length) {
+    double *vector = malloc(length * sizeof(double));
+    if (!vector) {
+      perror("Memory allocation failed in getRangeOfVector");
+      exit(EXIT_FAILURE);
+    }
+    for (int i = 0; i < length; i++) {
+        double noise = sin(input[i]);
+        vector[i] = input[i] + noise;
+        // printf("%.6f ", vector[i]);
+    }
+    return vector;
+}
+
+double *space_demodulate(const double *input, int length) {
+    double *vector = malloc(length * sizeof(double));
+    if (!vector) {
+      perror("Memory allocation failed in getRangeOfVector");
+      exit(EXIT_FAILURE);
+    }
+    for (int i = 0; i < length; i++) {
+        vector[i] = (input[i] > 0) ? 1 : 0;
+        // printf("%.6f ", vector[i]);
+    }
+    return vector;
+}
+
+double *error_correction(const double *data, int length) {
+    double *corrected = malloc(length * sizeof(double));
+    if (!corrected) {
+        perror("Memory allocation failed for corrected");
+        exit(EXIT_FAILURE);
+    }
+  
+    int corrected_index = 0;
+    for (int i = 0; i < length; i += 8) {
+        int count = 0;
+        for (int j = 0; j < 8 && (i + j) < length; j++) {  // Ensure within bounds
+            if (data[i + j] == 1)  
+                count++;
+        }
+  
+        if (count % 2 == 0) {
+            // Copy the original 8-bit chunk if parity is even
+            for (int j = 0; j < 8 && (i + j) < length; j++) {
+                corrected[corrected_index + j] = data[i + j];
+                // printf("%.6f ", corrected[i]);
+            }
+        } else {
+            // If parity is odd, correct the first bit by setting it to 0
+            corrected[corrected_index] = 0;
+            for (int j = 1; j < 8 && (i + j) < length; j++) {
+                corrected[corrected_index + j] = data[i + j];
+                // printf("%.6f ", corrected[i]);
+            }
+        }
+        corrected_index += 8;
+    }
+    return corrected;
+}
+
+void print_array(const double *arr, int length) {
+    for (int i = 0; i < length; i++) {
+        printf("%.6f", arr[i]); // Print each element with 6 decimal places
+        if (i < length - 1) {
+            printf(", "); // Add comma between elements except the last one
+        }
+    }
+}
+
+int main() {
+    double *input = getRangeOfVector(0, INPUT_LENGTH, 0.000125);
+    if (!input) {
+      perror("Memory allocation failed for input");
+      return EXIT_FAILURE;
+    }
+  
+    double pi = 3.14159265359;
+    double f_sig = 500;
+    double getMultiplier = 2 * pi * f_sig;
+    double threshold = 0.4;
+    int returnOrignal = 0;
+  
+    double *getSinDuration = gain(input, INPUT_LENGTH, getMultiplier);
+    double *clean_sig      = Sin(getSinDuration, INPUT_LENGTH);
+    double *binary_sig     = thresholdUp(clean_sig, INPUT_LENGTH, threshold, returnOrignal);
+    double *a              = space_modulate(binary_sig, INPUT_LENGTH);
+    double *noisy_signal   = add_noise(a, INPUT_LENGTH);
+    double *b              = space_demodulate(noisy_signal, INPUT_LENGTH);
+    double *e              = error_correction(b, INPUT_LENGTH);
+    printf("%f", e[8]);
+
+    // Free allocated memory
+    free(input);
+    free(getSinDuration);
+    free(clean_sig);
+    free(binary_sig);
+    free(a);
+    free(noisy_signal);
+    free(b);
+    free(e);
+
+    return 0;
+}
\ No newline at end of file
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/speakerIdentification.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/speakerIdentification.c
new file mode 100644
index 000000000000..1fb7ff58fc5c
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/speakerIdentification.c
@@ -0,0 +1,95 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+#define SAMPLE_RATE 1000
+#define INPUT_LENGTH 12207
+#define DURATION ((double)INPUT_LENGTH / SAMPLE_RATE)
+#define CORRELATION_LENGTH (2 * INPUT_LENGTH - 1)
+
+void generateVoiceSignature(double *signal, double freq1, double freq2) {
+    for (int i = 0; i < INPUT_LENGTH; i++) {
+        double t = i / (double)SAMPLE_RATE;
+        signal[i] = sin(2 * M_PI * freq1 * t) + sin(2 * M_PI * freq2 * t);
+    }
+}
+
+void correlate(const double *signal1, const double *signal2, double *result) {
+    for (int lag = 0; lag < CORRELATION_LENGTH; lag++) {
+        result[lag] = 0;
+        for (int i = 0; i < INPUT_LENGTH; i++) {
+            int j = lag - INPUT_LENGTH + 1 + i;
+            if (j >= 0 && j < INPUT_LENGTH) {
+                result[lag] += signal1[i] * signal2[j];
+            }
+        }
+    }
+}
+
+double max(const double *arr, int length) {
+    double max_value = arr[0];
+    for (int i = 1; i < length; i++) {
+        if (arr[i] > max_value) {
+            max_value = arr[i];
+        }
+    }
+    return max_value;
+}
+
+int argmax(const double *arr, int length) {
+    int max_index = 0;
+    for (int i = 1; i < length; i++) {
+        if (arr[i] > arr[max_index]) {
+            max_index = i;
+        }
+    }
+    return max_index;
+}
+
+int main() {
+    double *person1 = (double *)malloc(INPUT_LENGTH * sizeof(double));
+    double *person2 = (double *)malloc(INPUT_LENGTH * sizeof(double));
+    double *person3 = (double *)malloc(INPUT_LENGTH * sizeof(double));
+    double *unknown_signal = (double *)malloc(INPUT_LENGTH * sizeof(double));
+    double *correlation1 = (double *)malloc(CORRELATION_LENGTH * sizeof(double));
+    double *correlation2 = (double *)malloc(CORRELATION_LENGTH * sizeof(double));
+    double *correlation3 = (double *)malloc(CORRELATION_LENGTH * sizeof(double));
+    
+    generateVoiceSignature(person1, 100, 200);
+    generateVoiceSignature(person2, 150, 250);
+    generateVoiceSignature(person3, 120, 180);
+    generateVoiceSignature(unknown_signal, 150, 250);
+    
+    correlate(person1, unknown_signal, correlation1);
+    correlate(person2, unknown_signal, correlation2);
+    correlate(person3, unknown_signal, correlation3);
+    
+    double total_maxes[3];
+    total_maxes[0] = max(correlation1, CORRELATION_LENGTH);
+    total_maxes[1] = max(correlation2, CORRELATION_LENGTH);
+    total_maxes[2] = max(correlation3, CORRELATION_LENGTH);
+    
+    double temp2 = total_maxes[0];
+    double temp3 = total_maxes[1];
+    double temp4 = total_maxes[2];
+    
+    int max_index = argmax(total_maxes, 3);
+    double max_value = total_maxes[max_index];
+    
+    printf("%d\t", max_index);
+    printf("%f\t", temp2);
+    printf("%f\t", max_value);
+    printf("%f\t", temp3);
+    printf("%f %f %f\t", total_maxes[0], total_maxes[1], total_maxes[2]);
+    printf("%f\t", temp4);
+    
+    free(person1);
+    free(person2);
+    free(person3);
+    free(unknown_signal);
+    free(correlation1);
+    free(correlation2);
+    free(correlation3);
+    
+    return 0;
+}
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/spectralAnalysis.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/spectralAnalysis.c
new file mode 100644
index 000000000000..1371f761ceb4
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/spectralAnalysis.c
@@ -0,0 +1,81 @@
+#include <stdio.h>
+#include <math.h>
+#include <stdlib.h>
+#include <complex.h>
+
+#define INPUT_LENGTH 400
+#define M_PI 3.14159265358979323846
+
+double* getRange(double start, int noOfSamples, double increment) {
+    double* output = malloc(noOfSamples * sizeof(double));
+    if (!output) {
+        perror("Memory allocation failed in getRange");
+        exit(EXIT_FAILURE);
+    }
+
+    for (int i = 0; i < noOfSamples; i++) {
+        output[i] = start + i * increment;
+    }
+
+    return output;
+}
+
+void dft(double complex* output, const double* input, int length) {
+    for (int k = 0; k < length; k++) {
+        output[k] = 0;
+        for (int n = 0; n < length; n++) {
+            double angle = 2 * M_PI * k * n / length;
+            output[k] += input[n] * cexp(-I * angle);
+        }
+    }
+}
+
+void square(double* output, const double* input, int length) {
+    for (int i = 0; i < length; i++) {
+        output[i] = input[i] * input[i];
+    }
+}
+
+double sum(const double* input, int length) {
+    double total = 0;
+    for (int i = 0; i < length; i++) {
+        total += input[i];
+    }
+    return total;
+}
+
+int main() {
+    double* input = getRange(0, INPUT_LENGTH, 1);
+
+    double complex* fft = malloc(INPUT_LENGTH * sizeof(double complex));
+    if (!fft) {
+        perror("Memory allocation failed");
+        free(input);
+        return EXIT_FAILURE;
+    }
+
+    dft(fft, input, INPUT_LENGTH);
+
+    double* sq_abs = malloc(INPUT_LENGTH * sizeof(double));
+    if (!sq_abs) {
+        perror("Memory allocation failed");
+        free(input);
+        free(fft);
+        return EXIT_FAILURE;
+    }
+
+    for (int i = 0; i < INPUT_LENGTH; i++) {
+        sq_abs[i] = creal(fft[i]) * creal(fft[i]) + cimag(fft[i]) * cimag(fft[i]);
+    }
+
+    double sum_result = sum(sq_abs, INPUT_LENGTH);
+    double res = sum_result / INPUT_LENGTH;
+
+    printf("%f\n", res);
+
+    free(input);
+    free(fft);
+    free(sq_abs);
+
+    return 0;
+}
\ No newline at end of file
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/targetDetection.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/targetDetection.c
new file mode 100644
index 000000000000..0235c5c5933d
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/targetDetection.c
@@ -0,0 +1,151 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+#define PI 3.14159265359
+#define FS 1000
+#define INPUT_LENGTH 1000  
+#define FILTER_SIZE 20
+#define MAX_PEAKS 50
+
+// Function prototypes
+void getRangeOfVector(double* vector, double start, int length, double increment);
+void gain(double* output, const double* input, double multiplier, int length);
+void sine(double* output, const double* input, int length);
+void delay(double* output, const double* input, int delaySamples, int length);
+void add(double* output, const double* input1, const double* input2, int length);
+void lmsFilterResponse(double* y, double* noisy_sig, double* clean_sig, double mu, int filterSize, int length);
+void find_peaks(double* peaks, double* input, int length, double height, int distance);
+double getElemAtIndx(double* input, int index);
+
+// Function implementations
+void getRangeOfVector(double* vector, double start, int length, double increment) {
+    for (int i = 0; i < length; i++) {
+        vector[i] = start + i * increment;
+    }
+}
+
+void gain(double* output, const double* input, double multiplier, int length) {
+    for (int i = 0; i < length; i++) {
+        output[i] = input[i] * multiplier;
+    }
+}
+
+void sine(double* output, const double* input, int length) {
+    for (int i = 0; i < length; i++) {
+        output[i] = sin(input[i]);
+    }
+}
+
+void delay(double* output, const double* input, int delaySamples, int length) {
+    for (int i = 0; i < length; i++) {
+        output[i] = (i < delaySamples) ? 0.0 : input[i - delaySamples];
+    }
+}
+
+void add(double* output, const double* input1, const double* input2, int length) {
+    for (int i = 0; i < length; i++) {
+        output[i] = input1[i] + input2[i];
+    }
+}
+
+void lmsFilterResponse(double* y, double* noisy_sig, double* clean_sig, double mu, int filterSize, int length) {
+    double w[FILTER_SIZE] = {0}; // Initialize weights to zero
+    for (int n = 0; n < length; n++) {
+        y[n] = 0;
+        for (int i = 0; i < filterSize; i++) {
+            if (n - i >= 0) {
+                y[n] += w[i] * noisy_sig[n - i];
+            }
+        }
+        double e = clean_sig[n] - y[n];
+        for (int i = 0; i < filterSize; i++) {
+            if (n - i >= 0) {
+                w[i] += mu * e * noisy_sig[n - i];
+            }
+        }
+    }
+}
+
+void find_peaks(double* peaks, double* input, int length, double height, int distance) {
+    int peakCount = 0;
+
+    // Initialize peaks array with -1 (default no peaks)
+    for (int i = 0; i < MAX_PEAKS; i++) {
+        peaks[i] = -1;
+    }
+
+    for (int i = 1; i < length - 1; i++) {
+        if (input[i] > input[i - 1] && input[i] > input[i + 1] && input[i] >= height) {
+            // If it's the first peak, store it
+            if (peakCount == 0) {
+                peaks[peakCount++] = i;
+            } else {
+                // Ensure distance between peaks
+                if (i - peaks[peakCount - 1] >= distance) {
+                    peaks[peakCount++] = i;
+                }
+            }
+
+            // Stop if max peaks reached
+            if (peakCount >= MAX_PEAKS - 1) {
+                break;
+            }
+        }
+    }
+
+    // Store peak count at the last index
+    peaks[MAX_PEAKS - 1] = peakCount;
+}
+
+double getElemAtIndx(double* input, int index) {
+    return input[index];
+}
+
+int main() {
+    double pi = PI;
+    double input[INPUT_LENGTH];
+    getRangeOfVector(input, 0, INPUT_LENGTH, 0.000125);
+
+    double getMultiplier = 2 * pi * 10;
+    double getSinDuration[INPUT_LENGTH];
+    gain(getSinDuration, input, getMultiplier, INPUT_LENGTH);
+
+    double sig1[INPUT_LENGTH];
+    sine(sig1, getSinDuration, INPUT_LENGTH);
+
+    double getMultiplier2 = 2 * pi * 20;
+    double getSinDuration2[INPUT_LENGTH];
+    gain(getSinDuration2, input, getMultiplier2, INPUT_LENGTH);
+
+    double sinsig2[INPUT_LENGTH];
+    sine(sinsig2, getSinDuration2, INPUT_LENGTH);
+
+    double sig2[INPUT_LENGTH];
+    gain(sig2, sinsig2, 0.5, INPUT_LENGTH);
+
+    double signal[INPUT_LENGTH];
+    add(signal, sig1, sig2, INPUT_LENGTH);
+
+    double noise[INPUT_LENGTH];
+    delay(noise, signal, 5, INPUT_LENGTH);
+
+    double noisy_sig[INPUT_LENGTH];
+    add(noisy_sig, signal, noise, INPUT_LENGTH);
+
+    double mu = 0.01;
+    double y[INPUT_LENGTH];
+    lmsFilterResponse(y, noisy_sig, signal, mu, FILTER_SIZE, INPUT_LENGTH);
+
+    double peaks[MAX_PEAKS];
+    find_peaks(peaks, y, INPUT_LENGTH, 1.0, 50);
+
+    double final1 = getElemAtIndx(peaks, 1);
+    double final2 = getElemAtIndx(peaks, 0);
+
+    printf("%f\t", final1);
+    printf("%f", final2);
+   
+
+    return 0;
+}
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/underWaterCommunication.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/underWaterCommunication.c
new file mode 100644
index 000000000000..8aff517fa10e
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/underWaterCommunication.c
@@ -0,0 +1,128 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+#define PI 3.14159265359
+#define FS 1000
+#define INPUT_LENGTH 50 
+#define FILTER_ORDER 5
+
+// Function prototypes
+void getRangeOfVector(double* vector, double start, int length, double increment);
+void gain(double* output, const double* input, double multiplier, int length);
+void sine(double* output, const double* input, int length);
+void delay(double* output, const double* input, int delaySamples, int length);
+void add(double* output, const double* input1, const double* input2, int length);
+double lowPassFIRFilter(double wc, int length);
+void hamming(double* window, int length);
+void FIRFilterResponse(double* output, double* input, double filter, int input_length);
+void thresholdUp(double* output, const double* input, double threshold, double defaultValue, int length);
+double getElemAtIndx(double* input, int index);
+
+// Function implementations
+void getRangeOfVector(double* vector, double start, int length, double increment) {
+    for (int i = 0; i < length; i++) {
+        vector[i] = start + i * increment;
+    }
+}
+
+void gain(double* output, const double* input, double multiplier, int length) {
+    for (int i = 0; i < length; i++) {
+        output[i] = input[i] * multiplier;
+    }
+}
+
+void sine(double* output, const double* input, int length) {
+    for (int i = 0; i < length; i++) {
+        output[i] = sin(input[i]);
+    }
+}
+
+void delay(double* output, const double* input, int delaySamples, int length) {
+    for (int i = 0; i < length; i++) {
+        output[i] = (i < delaySamples) ? 0.0 : input[i - delaySamples];
+    }
+}
+
+void add(double* output, const double* input1, const double* input2, int length) {
+    for (int i = 0; i < length; i++) {
+        output[i] = input1[i] + input2[i];
+    }
+}
+
+double sinc(double x) {
+    return (fabs(x) < 1e-8) ? 1.0 : sin(x) / x;  // Handle division by zero
+}
+
+double lowPassFIRFilter(double wc, int length) {
+    if (length == 1) {
+        return wc / PI;  
+    }
+    return 0.0;  
+}
+
+void hamming(double* window, int length) {
+    for (int i = 0; i < length; i++) {
+        window[i] = 0.54 - 0.46 * cos(2 * PI * i / (length - 1));
+    }
+}
+
+void FIRFilterResponse(double* output, double* input, double filter, int input_length) {
+    for (int i = 0; i < input_length; i++) {
+        output[i] = input[i] * filter;  // Element-wise multiplication with single value
+    }
+}
+
+void thresholdUp(double* output, const double* input, double threshold, double defaultValue, int length) {
+    for (int i = 0; i < length; i++) {
+        output[i] = (input[i] >= threshold) ? 1 : defaultValue;
+    }
+}
+
+double getElemAtIndx(double* input, int index) {
+    return input[index];
+}
+
+int main() {
+    double pi = PI;
+    double input[INPUT_LENGTH];
+    getRangeOfVector(input, 0, INPUT_LENGTH, 0.000125);
+
+    double getMultiplier = 2 * pi * 5;
+    double getSinDuration[INPUT_LENGTH];
+    gain(getSinDuration, input, getMultiplier, INPUT_LENGTH);
+
+    double signal[INPUT_LENGTH];
+    sine(signal, getSinDuration, INPUT_LENGTH);
+
+    double noise[INPUT_LENGTH];
+    delay(noise, signal, 5, INPUT_LENGTH);
+
+    double noisy_sig[INPUT_LENGTH];
+    add(noisy_sig, signal, noise, INPUT_LENGTH);
+
+    // Low-pass filter design
+    double wc = 2 * pi * 1000 / 500;
+    int N = 5;
+
+    double lpf = lowPassFIRFilter(wc, 1);  
+
+    double hamming_window[N];
+    hamming(hamming_window, N);
+
+ 
+    double lpf_w = lpf * hamming_window[0];  
+
+    double FIRfilterResponseArray[INPUT_LENGTH];
+    FIRFilterResponse(FIRfilterResponseArray, noisy_sig, lpf_w, INPUT_LENGTH);
+
+    double threshold = 0.05;
+    double GetThresholdReal[INPUT_LENGTH];
+    thresholdUp(GetThresholdReal, FIRfilterResponseArray, threshold, 0, INPUT_LENGTH);
+
+    double final1 = getElemAtIndx(GetThresholdReal, 3);
+
+    printf("%f", final1);
+
+    return 0;
+}
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/vibrationAnalysis.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/vibrationAnalysis.c
new file mode 100644
index 000000000000..a30a3c4f5a13
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/vibrationAnalysis.c
@@ -0,0 +1,179 @@
+#include <complex.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#define PI 3.14159265359
+#define INPUT_LENGTH 10
+
+// Function prototypes
+double *getRangeOfVector(double start, int length, double increment);
+void gain(double *output, const double *input, double multiplier, int length);
+void sine(double *output, const double *input, int length);
+void add(double *output, const double *input1, const double *input2,
+         int length);
+void delay(double *output, const double *input, int delaySamples, int length);
+void dft(double complex *output, const double *input, int length);
+void square(double *output, const double *input, int length);
+double sum(const double *input, int length);
+void threshold(double *output, const double *input, double thresholdValue,
+               int length);
+void sqrt_array(double *output, const double *input, int length);
+
+// Function implementations
+double *getRangeOfVector(double start, int length, double increment) {
+  double *vector = malloc(length * sizeof(double));
+  if (!vector) {
+    perror("Memory allocation failed in getRangeOfVector");
+    exit(EXIT_FAILURE);
+  }
+  for (int i = 0; i < length; i++) {
+    vector[i] = start + i * increment;
+  }
+  return vector;
+}
+
+void gain(double *output, const double *input, double multiplier, int length) {
+  for (int i = 0; i < length; i++) {
+    output[i] = input[i] * multiplier;
+  }
+}
+
+void sine(double *output, const double *input, int length) {
+  for (int i = 0; i < length; i++) {
+    output[i] = sin(input[i]);
+  }
+}
+
+void add(double *output, const double *input1, const double *input2,
+         int length) {
+  for (int i = 0; i < length; i++) {
+    output[i] = input1[i] + input2[i];
+  }
+}
+
+void delay(double *output, const double *input, int delaySamples, int length) {
+  for (int i = 0; i < length; i++) {
+    if (i < delaySamples) {
+      output[i] = 0;
+    } else {
+      output[i] = input[i - delaySamples];
+    }
+  }
+}
+
+void dft(double complex *output, const double *input, int length) {
+  for (int k = 0; k < length; k++) {
+    output[k] = 0;
+    for (int n = 0; n < length; n++) {
+      double angle = 2 * PI * k * n / length;
+      output[k] += input[n] * cexp(-I * angle);
+    }
+  }
+}
+
+void square(double *output, const double *input, int length) {
+  for (int i = 0; i < length; i++) {
+    output[i] = input[i] * input[i];
+  }
+}
+
+double sum(const double *input, int length) {
+  double total = 0;
+  for (int i = 0; i < length; i++) {
+    total += input[i];
+  }
+  return total;
+}
+
+void threshold(double *output, const double *input, double thresholdValue,
+               int length) {
+  for (int i = 0; i < length; i++) {
+    if (input[i] >= thresholdValue) {
+      output[i] = input[i];
+    } else {
+      output[i] = 0;
+    }
+  }
+}
+void sqrt_array(double *output, const double *input, int length) {
+  for (int i = 0; i < length; i++) {
+    output[i] = sqrt(input[i]);
+  }
+}
+
+int main() {
+  int fs = 1000;
+  double *input = getRangeOfVector(0, INPUT_LENGTH, 0.000125);
+
+  double getMultiplier = 2 * PI * 50;
+  double *getSinDuration = malloc(INPUT_LENGTH * sizeof(double));
+  gain(getSinDuration, input, getMultiplier, INPUT_LENGTH);
+
+  double *sig1 = malloc(INPUT_LENGTH * sizeof(double));
+  sine(sig1, getSinDuration, INPUT_LENGTH);
+
+  double getMultiplier2 = 2 * PI * 120;
+  double *getSinDuration2 = malloc(INPUT_LENGTH * sizeof(double));
+  gain(getSinDuration2, input, getMultiplier2, INPUT_LENGTH);
+
+  double *sinsig2 = malloc(INPUT_LENGTH * sizeof(double));
+  sine(sinsig2, getSinDuration2, INPUT_LENGTH);
+
+  double *sig2 = malloc(INPUT_LENGTH * sizeof(double));
+  gain(sig2, sinsig2, 0.5, INPUT_LENGTH);
+
+  double *signal = malloc(INPUT_LENGTH * sizeof(double));
+  add(signal, sig1, sig2, INPUT_LENGTH);
+
+  double *noise = malloc(INPUT_LENGTH * sizeof(double));
+  delay(noise, signal, 5, INPUT_LENGTH);
+
+  double *noisy_sig = malloc(INPUT_LENGTH * sizeof(double));
+  add(noisy_sig, signal, noise, INPUT_LENGTH);
+
+  double threshold_value = 2;
+
+  double complex *dft_output = malloc(INPUT_LENGTH * sizeof(double complex));
+  dft(dft_output, noisy_sig, INPUT_LENGTH);
+
+  double *fft_real = malloc(INPUT_LENGTH * sizeof(double));
+  double *fft_img = malloc(INPUT_LENGTH * sizeof(double));
+  for (int i = 0; i < INPUT_LENGTH; i++) {
+    fft_real[i] = creal(dft_output[i]);
+    fft_img[i] = cimag(dft_output[i]);
+  }
+
+  double *sq_abs = malloc(INPUT_LENGTH * sizeof(double));
+  double *temp_real = malloc(INPUT_LENGTH * sizeof(double));
+  double *temp_img = malloc(INPUT_LENGTH * sizeof(double));
+  square(temp_real, fft_real, INPUT_LENGTH);
+  square(temp_img, fft_img, INPUT_LENGTH);
+  add(sq_abs, temp_real, temp_img, INPUT_LENGTH);
+  double *magnitude = malloc(INPUT_LENGTH * sizeof(double));
+  sqrt_array(magnitude, sq_abs, INPUT_LENGTH);
+  double *GetThresholdReal = malloc(INPUT_LENGTH * sizeof(double));
+  threshold(GetThresholdReal, magnitude, threshold_value, INPUT_LENGTH);
+
+  printf("%f ", GetThresholdReal[0]);
+
+  // Free allocated memory
+  free(input);
+  free(getSinDuration);
+  free(sig1);
+  free(getSinDuration2);
+  free(sinsig2);
+  free(sig2);
+  free(signal);
+  free(noise);
+  free(noisy_sig);
+  free(dft_output);
+  free(fft_real);
+  free(fft_img);
+  free(sq_abs);
+  free(temp_real);
+  free(temp_img);
+  free(GetThresholdReal);
+
+  return 0;
+}
\ No newline at end of file
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/voiceActivityDetection.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/voiceActivityDetection.c
new file mode 100644
index 000000000000..423c6f9586bc
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CCode/voiceActivityDetection.c
@@ -0,0 +1,134 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+#define PI 3.14159265359
+#define SAMPLE_RATE 1000
+#define INPUT_LENGTH 100
+#define THRESHOLD 0.01
+
+// Function prototypes
+double* getRangeOfVector(double start, int length, double increment);
+void gain(double* output, const double* input, double gainFactor, int length);
+void sine(double* output, const double* input, int length);
+void delay(double* output, const double* input, int length, int delay_steps);
+void add(double* output, const double* input1, const double* input2, int length);
+void threshold(double* output, const double* input, double threshold);
+double zeroCrossCount(const double* input, int length);
+void printArray(const double* array, int length);
+
+// Generate a range of values
+double* getRangeOfVector(double start, int length, double increment) {
+    double* vector = malloc(length * sizeof(double));
+    if (!vector) {
+        perror("Memory allocation failed in getRangeOfVector");
+        exit(EXIT_FAILURE);
+    }
+    for (int i = 0; i < length; i++) {
+        vector[i] = start + i * increment;
+    }
+    return vector;
+}
+
+// Apply gain to a signal
+void gain(double* output, const double* input, double gainFactor, int length) {
+    for (int i = 0; i < length; i++) {
+        output[i] = input[i] * gainFactor;
+    }
+}
+
+// Compute sine wave signal
+void sine(double* output, const double* input, int length) {
+    for (int i = 0; i < length; i++) {
+        output[i] = sin(input[i]);
+    }
+}
+
+// Apply delay to a signal
+void delay(double* output, const double* input, int length, int delay_steps) {
+    for (int i = 0; i < length; i++) {
+        if (i >= delay_steps) {
+            output[i] = input[i - delay_steps];
+        } else {
+            output[i] = 0.0;
+        }
+    }
+}
+
+// Perform element-wise addition
+void add(double* output, const double* input1, const double* input2, int length) {
+    for (int i = 0; i < length; i++) {
+        output[i] = input1[i] + input2[i];
+    }
+}
+
+// Apply thresholding
+void threshold(double* output, const double* input, double threshold) {
+    for (int i = 0; i < INPUT_LENGTH; i++) {
+        if (input[i] >= -threshold && input[i] <= threshold) {
+            output[i] = 0.0;
+        } else {
+            output[i] = input[i];
+        }
+    }
+}
+
+// Count zero crossings and return as double
+double zeroCrossCount(const double* input, int length) {
+    double count = 0.0;
+    for (int i = 1; i < length; i++) {
+        if ((input[i - 1] > 0 && input[i] < 0) || (input[i - 1] < 0 && input[i] > 0)) {
+            count += 1.0;  // Increment as double
+        }
+    }
+    return count;
+}
+
+// Print an array
+void printArray(const double* array, int length) {
+    for (int i = 0; i < length; i++) {
+        printf("%f ", array[i]);
+    }
+    printf("\n");
+}
+
+int main() {
+    // Step 1: Generate Input Vector
+    double* input = getRangeOfVector(0, INPUT_LENGTH, 0.0125);
+
+    // Step 2: Apply Gain
+    double* getSinDuration = malloc(INPUT_LENGTH * sizeof(double));
+    gain(getSinDuration, input, 2 * PI * 5, INPUT_LENGTH);
+
+    // Step 3: Compute Sine Wave Signal
+    double* signal = malloc(INPUT_LENGTH * sizeof(double));
+    sine(signal, getSinDuration, INPUT_LENGTH);
+
+    // Step 4: Generate Delayed Noise Signal
+    double* noise = malloc(INPUT_LENGTH * sizeof(double));
+    delay(noise, signal, INPUT_LENGTH, 5);
+
+    // Step 5: Compute Noisy Signal
+    double* noisy_sig = malloc(INPUT_LENGTH * sizeof(double));
+    add(noisy_sig, signal, noise, INPUT_LENGTH);
+
+    // Step 6: Apply Thresholding
+    double* GetThresholdReal = malloc(INPUT_LENGTH * sizeof(double));
+    threshold(GetThresholdReal, noisy_sig, THRESHOLD);
+
+    // Step 7: Compute Zero-Crossing Rate (as double)
+    double zcr = zeroCrossCount(GetThresholdReal, INPUT_LENGTH);
+
+    // Print final result
+    printf("%f\n", zcr);
+
+    // Free allocated memory
+    free(input);
+    free(getSinDuration);
+    free(signal);
+    free(noise);
+    free(noisy_sig);
+    free(GetThresholdReal);
+
+    return 0;
+}
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CountLinesFile.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CountLinesFile.py
new file mode 100644
index 000000000000..283069c90136
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/CountLinesFile.py
@@ -0,0 +1,145 @@
+import os
+import pandas as pd
+current_dir = os.path.dirname(os.path.abspath(__file__))
+folderC = os.path.join(current_dir, 'CCode')
+folderDSL = os.path.join(current_dir, 'DSP-DSL')  # Renamed this folder
+folderMatlab = os.path.join(current_dir, 'Matlab')
+
+os.makedirs('Output', exist_ok=True)
+
+output_fileC = os.path.join(current_dir, 'Output', 'NoOfLinesInC.txt')
+output_fileDSL = os.path.join(current_dir, 'Output', 'NoOfLinesInPython.txt')
+output_fileMatlab = os.path.join(current_dir, 'Output', 'NoOfLinesInMatlab.txt')
+
+def count_non_empty_linesInC(file_path):
+    with open(file_path, 'r') as file:
+        lines = file.readlines()
+        non_empty_code_lines = 0
+        in_multiline_comment = False
+        for line in lines:
+            stripped_line = line.strip()
+            if in_multiline_comment:
+                if '*/' in stripped_line:
+                    in_multiline_comment = False
+                    stripped_line = stripped_line.split('*/', 1)[1]
+                else:
+                    continue
+            if stripped_line.startswith('//'):
+                continue
+            if '/*' in stripped_line:
+                if '*/' in stripped_line:
+                    stripped_line = stripped_line.split('/*', 1)[0] + stripped_line.split('*/', 1)[1]
+                else:
+                    in_multiline_comment = True
+                    stripped_line = stripped_line.split('/*', 1)[0]
+            if stripped_line:
+                non_empty_code_lines += 1
+        return non_empty_code_lines
+
+def count_valid_code_lines_in_dsl(file_path):
+    valid_code_lines = 0
+    with open(file_path, 'r') as file:
+        for line in file:
+            stripped_line = line.strip()
+            if stripped_line and not stripped_line.startswith('#'):
+                valid_code_lines += 1
+    return valid_code_lines
+
+def count_valid_code_lines_in_matlab(file_path):
+    valid_code_lines = 0
+    with open(file_path, 'r') as file:
+        in_multiline_comment = False
+        for line in file:
+            stripped_line = line.strip()
+            if in_multiline_comment:
+                if stripped_line.endswith('%}'):
+                    in_multiline_comment = False
+                continue
+            if stripped_line.startswith('%{'):
+                in_multiline_comment = True
+                continue
+            if stripped_line and not stripped_line.startswith('%'):
+                valid_code_lines += 1
+    return valid_code_lines
+
+def count_lines_across_languages():
+    line_counts = {}
+    if os.path.exists(folderC):
+        for filename in sorted(os.listdir(folderC)):
+            file_path = os.path.join(folderC, filename)
+            if os.path.isfile(file_path) and filename.endswith('.c'):
+                count = count_non_empty_linesInC(file_path)
+                line_counts[filename] = {'lines_in_c': count, 'lines_in_dsl': 0, 'lines_in_matlab': 0}
+
+    # Count Python files
+    if os.path.exists(folderDSL):
+        for filename in sorted(os.listdir(folderDSL)):
+            file_path = os.path.join(folderDSL, filename)
+            if os.path.isfile(file_path) and filename.endswith('.py'):
+                count = count_valid_code_lines_in_dsl(file_path)
+                if filename in line_counts:
+                    line_counts[filename]['lines_in_dsl'] = count
+                else:
+                    line_counts[filename] = {'lines_in_c': 0, 'lines_in_dsl': count, 'lines_in_matlab': 0}
+    
+    # Count MATLAB files
+    if os.path.exists(folderMatlab):
+        for filename in sorted(os.listdir(folderMatlab)):
+            file_path = os.path.join(folderMatlab, filename)
+            if os.path.isfile(file_path) and filename.endswith('.m'):
+                count = count_valid_code_lines_in_matlab(file_path)
+                if filename in line_counts:
+                    line_counts[filename]['lines_in_matlab'] = count
+                else:
+                    line_counts[filename] = {'lines_in_c': 0, 'lines_in_dsl': 0, 'lines_in_matlab': count}
+    
+    return line_counts
+
+def create_consolidated_table():
+    line_counts = count_lines_across_languages()
+    
+    # Create a DataFrame
+    df = pd.DataFrame.from_dict(line_counts, orient='index')
+    
+    # Reset index to make filename a column
+    df.reset_index(inplace=True)
+    df.rename(columns={'index': 'filename'}, inplace=True)
+    
+    # Reorder columns
+    df = df[['filename', 'lines_in_dsl', 'lines_in_c', 'lines_in_matlab']]
+    
+    # Fill NaN values with 0
+    df.fillna(0, inplace=True)
+    
+    # Convert line count columns to integers
+    for col in ['lines_in_dsl', 'lines_in_c', 'lines_in_matlab']:
+        df[col] = df[col].astype(int)
+    
+    return df
+
+def list_files_and_write_line_counts(folder, output_path, count_function, extension):
+    files = sorted(os.listdir(folder))
+    with open(output_path, 'w') as output:
+        for filename in files:
+            file_path = os.path.join(folder, filename)
+            if os.path.isfile(file_path) and filename.endswith(extension):
+                line_count = count_function(file_path)
+                output.write(f"{filename}: \t{line_count}\n")
+
+if __name__ == "__main__":
+     # Create the consolidated table
+    consolidated_table = create_consolidated_table()
+
+    # Save the consolidated table to a CSV file
+    output_file = os.path.join('Output', 'consolidated_lines_of_code.csv')
+    consolidated_table.to_csv(output_file, index=False)
+
+    # Display the table
+    print(consolidated_table)
+
+    # Output file paths
+    print(f"\nConsolidated table saved to: {output_file}")
+    
+    list_files_and_write_line_counts(folderC, output_fileC, count_non_empty_linesInC, '.c')
+    list_files_and_write_line_counts(folderDSL, output_fileDSL, count_valid_code_lines_in_dsl, '.py')
+    list_files_and_write_line_counts(folderMatlab, output_fileMatlab, count_valid_code_lines_in_matlab, '.m')
\ No newline at end of file
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/lowPassFIRFilterDesign.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/FIRFilterDesign.py
similarity index 71%
rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/lowPassFIRFilterDesign.py
rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/FIRFilterDesign.py
index 3be61f2b89b7..40081eeefdf2 100644
--- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/lowPassFIRFilterDesign.py
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/FIRFilterDesign.py
@@ -8,7 +8,7 @@ def main() {
   # var a10 = getRangeOfVector(0, 400, 0.000125);
   # var orig = sin(a10);
 
-    var N = 20000001 ;
+	var N_input = 101;
 
   # for cut-off freq
   var pi = 3.14159265359;
@@ -17,6 +17,7 @@ def main() {
   var fc3 = 1000;
   var fc4 = 1200;
   var Fs = 8000;
+  
   var wc1 = 2 * pi * fc1 / Fs; #wc should vary from 0 to pi
   var wc2 = 2 * pi * fc2 / Fs;
   var wc3 = 2 * pi * fc3 / Fs;
@@ -26,17 +27,17 @@ def main() {
   # var lpf = lowPassFIRFilter(wc, N); #ideal low -pass filter
   # var lpf_w = lpf * hamming(N);
   # var lpf_w2 = FIRFilterHammingOptimized(wc, N);
-  var hpf = highPassFIRFilter(wc1, N); #ideal high-pass filter
-  var hpf_w = hpf * hamming(N);
+  var hpf = highPassFIRFilter(wc1, N_input); #ideal high-pass filter
+  var hpf_w = hpf * hamming(N_input);
 
-  var hpf2 = highPassFIRFilter(wc2, N); #ideal high-pass filter
-  var hpf_w2 = hpf2 * hamming(N);
+  var hpf2 = highPassFIRFilter(wc2, N_input); #ideal high-pass filter
+  var hpf_w2 = hpf2 * hamming(N_input);
 
-  var hpf3 = highPassFIRFilter(wc3, N); #ideal high-pass filter
-  var hpf_w3 = hpf3 * hamming(N);
+  var hpf3 = highPassFIRFilter(wc3, N_input); #ideal high-pass filter
+  var hpf_w3 = hpf3 * hamming(N_input);
 
-  var hpf4 = highPassFIRFilter(wc4, N); #ideal high-pass filter
-  var hpf_w4 = hpf4 * hamming(N);
+  var hpf4 = highPassFIRFilter(wc4, N_input); #ideal high-pass filter
+  var hpf_w4 = hpf4 * hamming(N_input);
   # var hpf_w2 = highPassFIRHammingOptimized(wc, N); 
   # print(lpf_w2);
   var final1 = getElemAtIndx(hpf_w , [6]);
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/HexagonClangResultScript.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/HexagonClangResultScript.py
new file mode 100644
index 000000000000..3f26f9ce3a79
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/HexagonClangResultScript.py
@@ -0,0 +1,566 @@
+import os
+import subprocess
+import time
+import sys
+
+
+# The script does the following
+# Input : filename.py
+# Output : TimeOfExecution for different IP sizes :
+# Steps to run:
+# Open a terminal at the path of the script --
+# Run: python ScriptForCases.py #3.11 validated
+
+# Pseudo-code:
+# Iterate for all the input-size & update the input value in file
+# Update logic -- change the 2nd parameter of line: var c = getRangeOfVector(init , Count, StepSize)
+# Run the respective commands on the file
+
+# Path to the input file
+# Apps = "hearingAid.py" , "lowPassFull.py" , " audioCompression.py", "lowPassFIRFilterDesign.py" , "EnergyOfSignal.py", "periodogram2Conv1.py", "audioEqualizer.py", "vibrationAnalysis.py", "signalSmoothing.py", "targetDetection.py", "biomedicalSignalProcessing.py", "spaceCommunication.py", "echocancelling", "noisecancelling.py", "digitalModulation", "underWaterCommunication", "voiceActivityDetection", "radarSignalProcessing", "speakerIdentification"
+# input_file_name = "speakerIdentification.py"
+input_file_name = sys.argv[1]
+
+
+
+BasePathForLLVM = "/home/local/ASURITE/apkhedka/ForLLVM/"
+OutputScriptPath = (
+    "mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/"
+)
+input_file_path = BasePathForLLVM + OutputScriptPath + input_file_name
+
+print(f"Running Application {input_file_path}")
+# Construct full output path
+
+if sys.argv[2]:
+    OutputPath = os.path.join(BasePathForLLVM, OutputScriptPath, "Output", sys.argv[2])
+
+else:
+    OutputPath = os.path.join(BasePathForLLVM, OutputScriptPath, "Output")
+
+
+
+# Check if the Output folder exists, create it if it doesn't
+if not os.path.exists(OutputPath):
+    os.makedirs(OutputPath)
+
+
+# Now OutputPath is ready for use
+print("InputPath:{}".format(BasePathForLLVM))
+print(f"OutputPath: {OutputPath}")
+
+# ************ Don't change unless u required
+# Define the values dictionary
+inputValues = {
+    "10": 10,
+    "100": 100,
+    "500": 500,
+    "1K": 1000,
+    "2K": 2000,
+    "5K": 5000,
+    "10K": 10000,
+    "20K": 20000,
+    "30K": 30000,
+    "40K": 40000,
+    "50K": 50000,
+    "100K": 100000,
+    "1M": 1000000,
+
+    
+    #"10M": 10000000,
+    #"20M": 20000000,
+    #"30M": 30000000,
+    #"40M": 40000000,
+    #"50M": 50000000,
+    #"100M": 100000000,
+    # "1B": 1000000000
+}
+
+if sys.argv[1] == "noiseCancellation.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "500": 500,
+        "1K": 1000,
+        "2K": 2000,
+        "5K": 5000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        }
+    
+elif sys.argv[1] == "echoCancellation.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "500": 500,
+        "1K": 1000,
+        "2K": 2000,
+        "5K": 5000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        }
+
+elif sys.argv[1] == "periodogram.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "500": 500,
+        "1K": 1000,
+        }
+
+elif sys.argv[1] == "lowPassFiltering.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "500": 500,
+        "1K": 1000,
+        "2K": 2000,
+        "5K": 5000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        }
+
+
+elif sys.argv[1] == "hearingAid.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "500": 500,
+        "1K": 1000,
+        "2K": 2000,
+        "5K": 5000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        }
+
+elif sys.argv[1] == "FIRFilterDesign.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "500": 500,
+        "1K": 1000,
+        "2K": 2000,
+        "5K": 5000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        }
+
+
+elif sys.argv[1] == "spectralAnalysis.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "500": 500,
+        "1K": 1000,
+        "2K": 2000,
+        }
+
+
+elif sys.argv[1] == "audioEqualization.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "500": 500,
+        "1K": 1000,
+        "2K": 2000,
+        "5K": 5000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        }
+
+
+elif sys.argv[1] == "audioCompression.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "500": 500,
+        "1K": 1000,
+        "2K": 2000,
+        }
+
+
+elif sys.argv[1] == "vibrationAnalysis.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "500": 500,
+        "1K": 1000,
+        "2K": 2000,
+        }
+
+
+elif sys.argv[1] == "underWaterCommunication.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "500": 500,
+        "1K": 1000,
+        "2K": 2000,
+        "5K": 5000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        "1M": 1000000,
+        }
+
+elif sys.argv[1] == "voiceActivityDetection.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "500": 500,
+        "1K": 1000,
+        "2K": 2000,
+        "5K": 5000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        }
+
+
+elif sys.argv[1] == "signalSmoothing.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "500": 500,
+        "1K": 1000,
+        "2K": 2000,
+        "5K": 5000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        "1M": 1000000,
+        }
+
+
+
+elif sys.argv[1] == "targetDetection.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "500": 500,
+        "1K": 1000,
+        "2K": 2000,
+        "5K": 5000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        }
+
+
+elif sys.argv[1] == "biomedicalSignalProcessing.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "500": 500,
+        "1K": 1000,
+        "2K": 2000,
+        "5K": 5000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        }
+        
+
+elif sys.argv[1] == "digitalModulation.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "500": 500,
+        "1K": 1000,
+        "2K": 2000,
+        "5K": 5000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        "1M": 1000000,
+        }
+
+
+
+elif sys.argv[1] == "spaceCommunication.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "500": 500,
+        "1K": 1000,
+        "2K": 2000,
+        "5K": 5000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        "1M": 1000000,
+        }
+
+
+elif sys.argv[1] == "radarSignalProcessing.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "500": 500,
+        "1K": 1000,
+        "2K": 2000,
+        "5K": 5000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        }
+        
+        
+elif sys.argv[1] == "speakerIdentification.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "500": 500,
+        "1K": 1000,
+        "2K": 2000,
+        "5K": 5000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        }
+
+
+elif sys.argv[1] == "dtmfDetection.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "500": 500,
+        "1K": 1000,
+        "2K": 2000,
+        }
+NoOfIterations = 1
+
+# --------------------------------------------------
+commands_base = [
+    # "./dsp1 lowPassFull.py -emit=mlir-affine",
+    # f"./dsp1 {input_file_path} -emit=llvm",
+    f"{BasePathForLLVM}/build/bin/dsp1 {input_file_path} -emit=llvm-hexagonv68",
+    # "clang-17 -O0 file.ll -o fileexe -lm",
+]
+
+# clang = f"{BasePathForLLVM}/build/bin/clang LL_FILE_PATH -O3 -o OUT_FILE_PATH --target=hexagon -mcpu=hexagonv68 -fuse-ld=/local/mnt/workspace/Qualcomm/Hexagon_SDK/6.2.0.1/tools/HEXAGON_Tools/8.8.06/Tools/bin/hexagon-link"
+
+clang = f"/local/mnt/workspace/Qualcomm/Hexagon_SDK/6.2.0.1/tools/HEXAGON_Tools/8.8.06/Tools/bin/hexagon-clang LL_FILE_PATH -O3 -o OUT_FILE_PATH --target=hexagon -mcpu=hexagonv68 -fuse-ld=/local/mnt/workspace/Qualcomm/Hexagon_SDK/6.2.0.1/tools/HEXAGON_Tools/8.8.06/Tools/bin/hexagon-link"
+
+# Define the cases
+cases = [
+    # {
+    #     "affineOpt": False,
+    #     "canonOpt": False,
+    #     "suffix": "fileNoOpt.ll",
+    #     "exe": "fileNoOptExe",
+    # },
+    
+    #HWISOO: Temporal for debugging
+    # {
+    #     "affineOpt": False,
+    #     "canonOpt": False,
+    #     "suffix": "fileNoOpt.ll",
+    #     "exe": "fileNoOptExe",
+    # },
+
+    {
+        "affineOpt": True,
+        "canonOpt": False,
+        "suffix": "fileAffineOpt.ll",
+        "exe": "fileAffineOptExe",
+    },
+    # {
+    #     "affineOpt": False,
+    #     "canonOpt": True,
+    #     "suffix": "fileOnlyCanonOpt.ll",
+    #     "exe": "fileOnlyCanonOptExe",
+    # },
+    {
+        "affineOpt": True,
+        "canonOpt": True,
+        "suffix": "fileAffineCanonOpt.ll",
+        "exe": "fileAffineCanonOptExe",
+    },
+]
+
+
+
+print(input_file_path + " with hexagon clang")
+
+# Read the input file
+with open(input_file_path, "r") as file:
+    lines = file.readlines()
+
+print("", end="\t")
+for case in cases:
+    print(f"{case['exe']}", end="\t")
+    
+for key, value in inputValues.items():
+    value2 = 1 / value
+    dur = value / 8192
+    print(f"\n{key}", end="\t")
+
+    with open(input_file_path, "r") as file:
+        lines = file.readlines()
+
+    with open(input_file_path, "w") as file:
+        for line in lines:
+            if line.strip().startswith("var input = getRangeOfVector("):
+                if input_file_name in ["audioCompression.py", "audioEqualization.py",  "periodogram.py", "spectralAnalysis.py"]:
+                    updated_line = (
+                    f"\tvar input = getRangeOfVector(0, {value}, 1);\n"
+                )
+                if input_file_name in ["voiceActivityDetection.py"]:
+                    updated_line = (
+                    f"\tvar input = getRangeOfVector(0, {value}, 0.125);\n"
+                )
+                else: 
+                    updated_line = (
+                        f"\tvar input = getRangeOfVector(0, {value}, 0.000125);\n"
+                    )
+                    
+                file.write(updated_line)
+            elif line.strip().startswith("var duration ="):
+                updated_line = f"\tvar duration = {dur};\n"
+                file.write(updated_line)
+            elif line.strip().startswith("var N_input ="):
+                updated_line = f"\tvar N_input = {value+1};\n"
+                file.write(updated_line)
+            elif line.strip().startswith("var frequencies = fftfreq"):
+                updated_line = f"\tvar frequencies = fftfreq({value}, 0.000122);\n"
+                file.write(updated_line)
+            else:
+                file.write(line)
+
+    
+
+    # Iterate through the cases and run the commands
+    for case in cases:
+        command_llvm = commands_base[0]
+        if case["affineOpt"]:
+            command_llvm += " -affineOpt"
+        if case["canonOpt"]:
+            command_llvm += " -canonOpt"
+        # command_llvm += f" 2> {case['suffix']}" #OutputPath
+        
+        
+
+        
+        ll_file_path = f"{OutputPath}/{case['suffix']}"
+        command_llvm += f" 2> {OutputPath}/{case['suffix']}"  # OutputPath
+        
+        out_file_path = ll_file_path.replace(".ll", ".out")
+        clang_command = clang.replace("LL_FILE_PATH", ll_file_path)
+        clang_command = clang_command.replace("OUT_FILE_PATH", out_file_path)
+        
+
+        commands = [
+            "rm "+ll_file_path,
+            "rm "+out_file_path,
+            command_llvm,
+            clang_command,
+            
+        ]
+
+        # Iterate over each value and perform the necessary operations
+        for command in commands:
+            # Run the commands for the current case
+            result = subprocess.run(command, shell=True, capture_output=True, text=True)
+            
+            
+
+        sum_exe_time = 0
+        for i in range(0, NoOfIterations):
+            #NOTE: for simulation environment, we don't need to take care of cachine impact on host
+
+            # The command to be executed
+            # command2 = "./fileexe"
+            # Limit execution to a single core
+            # command2 = "taskset -c 0 ./fileexe"
+            # command2 = f"taskset -c 0 ./{case['exe']}" #{OutputPath}
+            # command2 = f"taskset -c 0 ./Output/{case['exe']}"
+            
+            command2 = "/local/mnt/workspace/Qualcomm/Hexagon_SDK/6.2.0.1/tools/HEXAGON_Tools/8.8.06/Tools/bin/hexagon-sim --mv68 OUT_FILE_PATH".replace("OUT_FILE_PATH", out_file_path)
+            
+            
+
+            # Record the start time
+            start_time = time.time()
+
+
+            pcycle = "failed"
+            # Execute the command
+            try:
+                result = subprocess.run(
+                    command2,
+                    shell=True,
+                    #stdout=subprocess.DEVNULL,
+                    #stdout="test_stdout",
+                    #stderr="test_stderr",
+                    #stderr=subprocess.DEVNULL,
+                    capture_output = True,
+                    text = True,
+                    check=True,
+                )
+                # subprocess.run(command2, shell=True)
+                
+                pcycle = result.stderr.split("Pcycles=")[1].replace("\n","").replace(" ","").replace("\t","")
+            except subprocess.CalledProcessError as exc:
+                print(
+                    f"Process failed because did not return a successful return code. "
+                    f"Returned {exc.returncode}\n{exc}"
+                )
+            
+            
+
+            
+            # Record the end time
+            end_time = time.time()
+
+            # Calculate the elapsed time
+            execution_time = end_time - start_time
+            sum_exe_time = sum_exe_time + execution_time
+            # print("{}".format(execution_time), end="\t")
+        avg_exe_time = sum_exe_time / NoOfIterations
+        # print(pcycle + "/" + "{}".format(round(avg_exe_time, 4)), end="\t")
+        print(pcycle, end="\t")
+        
+        
+    # print(f"The command took {execution_time} seconds to execute.")
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/HexagonResultScript.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/HexagonResultScript.py
new file mode 100644
index 000000000000..0de93a9a3ac3
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/HexagonResultScript.py
@@ -0,0 +1,566 @@
+import os
+import subprocess
+import time
+import sys
+
+
+# The script does the following
+# Input : filename.py
+# Output : TimeOfExecution for different IP sizes :
+# Steps to run:
+# Open a terminal at the path of the script --
+# Run: python ScriptForCases.py #3.11 validated
+
+# Pseudo-code:
+# Iterate for all the input-size & update the input value in file
+# Update logic -- change the 2nd parameter of line: var c = getRangeOfVector(init , Count, StepSize)
+# Run the respective commands on the file
+
+# Path to the input file
+# Apps = "hearingAid.py" , "lowPassFull.py" , " audioCompression.py", "lowPassFIRFilterDesign.py" , "EnergyOfSignal.py", "periodogram2Conv1.py", "audioEqualizer.py", "vibrationAnalysis.py", "signalSmoothing.py", "targetDetection.py", "biomedicalSignalProcessing.py", "spaceCommunication.py", "echocancelling", "noisecancelling.py", "digitalModulation", "underWaterCommunication", "voiceActivityDetection", "radarSignalProcessing", "speakerIdentification"
+# input_file_name = "speakerIdentification.py"
+input_file_name = sys.argv[1]
+
+
+
+BasePathForLLVM = "/home/local/ASURITE/apkhedka/ForLLVM/"
+OutputScriptPath = (
+    "mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/"
+)
+input_file_path = BasePathForLLVM + OutputScriptPath + input_file_name
+
+print(f"Running Application {input_file_path}")
+# Construct full output path
+
+if sys.argv[2]:
+    OutputPath = os.path.join(BasePathForLLVM, OutputScriptPath, "Output", sys.argv[2])
+
+else:
+    OutputPath = os.path.join(BasePathForLLVM, OutputScriptPath, "Output")
+
+
+
+# Check if the Output folder exists, create it if it doesn't
+if not os.path.exists(OutputPath):
+    os.makedirs(OutputPath)
+
+
+# Now OutputPath is ready for use
+print("InputPath:{}".format(BasePathForLLVM))
+print(f"OutputPath: {OutputPath}")
+
+# ************ Don't change unless u required
+# Define the values dictionary
+inputValues = {
+    "10": 10,
+    "100": 100,
+    "500": 500,
+    "1K": 1000,
+    "2K": 2000,
+    "5K": 5000,
+    "10K": 10000,
+    "20K": 20000,
+    "30K": 30000,
+    "40K": 40000,
+    "50K": 50000,
+    "100K": 100000,
+    "1M": 1000000,
+
+    
+    #"10M": 10000000,
+    #"20M": 20000000,
+    #"30M": 30000000,
+    #"40M": 40000000,
+    #"50M": 50000000,
+    #"100M": 100000000,
+    # "1B": 1000000000
+}
+
+if sys.argv[1] == "noiseCancellation.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "500": 500,
+        "1K": 1000,
+        "2K": 2000,
+        "5K": 5000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        }
+    
+elif sys.argv[1] == "echoCancellation.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "500": 500,
+        "1K": 1000,
+        "2K": 2000,
+        "5K": 5000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        }
+
+elif sys.argv[1] == "periodogram.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "500": 500,
+        "1K": 1000,
+        }
+
+elif sys.argv[1] == "lowPassFiltering.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "500": 500,
+        "1K": 1000,
+        "2K": 2000,
+        "5K": 5000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        }
+
+
+elif sys.argv[1] == "hearingAid.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "500": 500,
+        "1K": 1000,
+        "2K": 2000,
+        "5K": 5000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        }
+
+elif sys.argv[1] == "FIRFilterDesign.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "500": 500,
+        "1K": 1000,
+        "2K": 2000,
+        "5K": 5000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        }
+
+
+elif sys.argv[1] == "spectralAnalysis.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "500": 500,
+        "1K": 1000,
+        "2K": 2000,
+        }
+
+
+elif sys.argv[1] == "audioEqualization.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "500": 500,
+        "1K": 1000,
+        "2K": 2000,
+        "5K": 5000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        }
+
+
+elif sys.argv[1] == "audioCompression.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "500": 500,
+        "1K": 1000,
+        "2K": 2000,
+        }
+
+
+elif sys.argv[1] == "vibrationAnalysis.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "500": 500,
+        "1K": 1000,
+        "2K": 2000,
+        }
+
+
+elif sys.argv[1] == "underWaterCommunication.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "500": 500,
+        "1K": 1000,
+        "2K": 2000,
+        "5K": 5000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        "1M": 1000000,
+        }
+
+elif sys.argv[1] == "voiceActivityDetection.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "500": 500,
+        "1K": 1000,
+        "2K": 2000,
+        "5K": 5000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        }
+
+
+elif sys.argv[1] == "signalSmoothing.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "500": 500,
+        "1K": 1000,
+        "2K": 2000,
+        "5K": 5000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        "1M": 1000000,
+        }
+
+
+
+elif sys.argv[1] == "targetDetection.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "500": 500,
+        "1K": 1000,
+        "2K": 2000,
+        "5K": 5000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        }
+
+
+elif sys.argv[1] == "biomedicalSignalProcessing.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "500": 500,
+        "1K": 1000,
+        "2K": 2000,
+        "5K": 5000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        }
+        
+
+elif sys.argv[1] == "digitalModulation.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "500": 500,
+        "1K": 1000,
+        "2K": 2000,
+        "5K": 5000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        "1M": 1000000,
+        }
+
+
+
+elif sys.argv[1] == "spaceCommunication.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "500": 500,
+        "1K": 1000,
+        "2K": 2000,
+        "5K": 5000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        "1M": 1000000,
+        }
+
+
+elif sys.argv[1] == "radarSignalProcessing.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "500": 500,
+        "1K": 1000,
+        "2K": 2000,
+        "5K": 5000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        }
+        
+        
+elif sys.argv[1] == "speakerIdentification.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "500": 500,
+        "1K": 1000,
+        "2K": 2000,
+        "5K": 5000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        }
+
+
+elif sys.argv[1] == "dtmfDetection.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "500": 500,
+        "1K": 1000,
+        "2K": 2000,
+        }
+NoOfIterations = 1
+
+# --------------------------------------------------
+commands_base = [
+    # "./dsp1 lowPassFull.py -emit=mlir-affine",
+    # f"./dsp1 {input_file_path} -emit=llvm",
+    f"{BasePathForLLVM}/build/bin/dsp1 {input_file_path} -emit=llvm-hexagonv68",
+    # "clang-17 -O0 file.ll -o fileexe -lm",
+]
+
+clang = f"{BasePathForLLVM}/build/bin/clang LL_FILE_PATH -O3 -o OUT_FILE_PATH --target=hexagon -mcpu=hexagonv68 -fuse-ld=/local/mnt/workspace/Qualcomm/Hexagon_SDK/6.2.0.1/tools/HEXAGON_Tools/8.8.06/Tools/bin/hexagon-link"
+
+# clang = f"/local/mnt/workspace/Qualcomm/Hexagon_SDK/6.2.0.1/tools/HEXAGON_Tools/8.8.06/Tools/bin/hexagon-clang LL_FILE_PATH -O3 -o OUT_FILE_PATH --target=hexagon -mcpu=hexagonv68 -fuse-ld=/local/mnt/workspace/Qualcomm/Hexagon_SDK/6.2.0.1/tools/HEXAGON_Tools/8.8.06/Tools/bin/hexagon-link"
+
+# Define the cases
+cases = [
+    # {
+    #     "affineOpt": False,
+    #     "canonOpt": False,
+    #     "suffix": "fileNoOpt.ll",
+    #     "exe": "fileNoOptExe",
+    # },
+    
+    #HWISOO: Temporal for debugging
+    # {
+    #     "affineOpt": False,
+    #     "canonOpt": False,
+    #     "suffix": "fileNoOpt.ll",
+    #     "exe": "fileNoOptExe",
+    # },
+
+    {
+        "affineOpt": True,
+        "canonOpt": False,
+        "suffix": "fileAffineOpt.ll",
+        "exe": "fileAffineOptExe",
+    },
+    # {
+    #     "affineOpt": False,
+    #     "canonOpt": True,
+    #     "suffix": "fileOnlyCanonOpt.ll",
+    #     "exe": "fileOnlyCanonOptExe",
+    # },
+    {
+        "affineOpt": True,
+        "canonOpt": True,
+        "suffix": "fileAffineCanonOpt.ll",
+        "exe": "fileAffineCanonOptExe",
+    },
+]
+
+
+
+print(input_file_path)
+
+# Read the input file
+with open(input_file_path, "r") as file:
+    lines = file.readlines()
+
+print("", end="\t")
+for case in cases:
+    print(f"{case['exe']}", end="\t")
+    
+for key, value in inputValues.items():
+    value2 = 1 / value
+    dur = value / 8192
+    print(f"\n{key}", end="\t")
+
+    with open(input_file_path, "r") as file:
+        lines = file.readlines()
+
+    with open(input_file_path, "w") as file:
+        for line in lines:
+            if line.strip().startswith("var input = getRangeOfVector("):
+                if input_file_name in ["audioCompression.py", "audioEqualization.py",  "periodogram.py", "spectralAnalysis.py"]:
+                    updated_line = (
+                    f"\tvar input = getRangeOfVector(0, {value}, 1);\n"
+                )
+                if input_file_name in ["voiceActivityDetection.py"]:
+                    updated_line = (
+                    f"\tvar input = getRangeOfVector(0, {value}, 0.125);\n"
+                )
+                else: 
+                    updated_line = (
+                        f"\tvar input = getRangeOfVector(0, {value}, 0.000125);\n"
+                    )
+                    
+                file.write(updated_line)
+            elif line.strip().startswith("var duration ="):
+                updated_line = f"\tvar duration = {dur};\n"
+                file.write(updated_line)
+            elif line.strip().startswith("var N_input ="):
+                updated_line = f"\tvar N_input = {value+1};\n"
+                file.write(updated_line)
+            elif line.strip().startswith("var frequencies = fftfreq"):
+                updated_line = f"\tvar frequencies = fftfreq({value}, 0.000122);\n"
+                file.write(updated_line)
+            else:
+                file.write(line)
+
+    
+
+    # Iterate through the cases and run the commands
+    for case in cases:
+        command_llvm = commands_base[0]
+        if case["affineOpt"]:
+            command_llvm += " -affineOpt"
+        if case["canonOpt"]:
+            command_llvm += " -canonOpt"
+        # command_llvm += f" 2> {case['suffix']}" #OutputPath
+        
+        
+
+        
+        ll_file_path = f"{OutputPath}/{case['suffix']}"
+        command_llvm += f" 2> {OutputPath}/{case['suffix']}"  # OutputPath
+        
+        out_file_path = ll_file_path.replace(".ll", ".out")
+        clang_command = clang.replace("LL_FILE_PATH", ll_file_path)
+        clang_command = clang_command.replace("OUT_FILE_PATH", out_file_path)
+        
+
+        commands = [
+            "rm "+ll_file_path,
+            "rm "+out_file_path,
+            command_llvm,
+            clang_command,
+            
+        ]
+
+        # Iterate over each value and perform the necessary operations
+        for command in commands:
+            # Run the commands for the current case
+            result = subprocess.run(command, shell=True, capture_output=True, text=True)
+            
+            
+
+        sum_exe_time = 0
+        for i in range(0, NoOfIterations):
+            #NOTE: for simulation environment, we don't need to take care of cachine impact on host
+
+            # The command to be executed
+            # command2 = "./fileexe"
+            # Limit execution to a single core
+            # command2 = "taskset -c 0 ./fileexe"
+            # command2 = f"taskset -c 0 ./{case['exe']}" #{OutputPath}
+            # command2 = f"taskset -c 0 ./Output/{case['exe']}"
+            
+            command2 = "/local/mnt/workspace/Qualcomm/Hexagon_SDK/6.2.0.1/tools/HEXAGON_Tools/8.8.06/Tools/bin/hexagon-sim --mv68 OUT_FILE_PATH".replace("OUT_FILE_PATH", out_file_path)
+            
+            
+
+            # Record the start time
+            start_time = time.time()
+
+
+            pcycle = "failed"
+            # Execute the command
+            try:
+                result = subprocess.run(
+                    command2,
+                    shell=True,
+                    #stdout=subprocess.DEVNULL,
+                    #stdout="test_stdout",
+                    #stderr="test_stderr",
+                    #stderr=subprocess.DEVNULL,
+                    capture_output = True,
+                    text = True,
+                    check=True,
+                )
+                # subprocess.run(command2, shell=True)
+                
+                pcycle = result.stderr.split("Pcycles=")[1].replace("\n","").replace(" ","").replace("\t","")
+            except subprocess.CalledProcessError as exc:
+                print(
+                    f"Process failed because did not return a successful return code. "
+                    f"Returned {exc.returncode}\n{exc}"
+                )
+            
+            
+
+            
+            # Record the end time
+            end_time = time.time()
+
+            # Calculate the elapsed time
+            execution_time = end_time - start_time
+            sum_exe_time = sum_exe_time + execution_time
+            # print("{}".format(execution_time), end="\t")
+        avg_exe_time = sum_exe_time / NoOfIterations
+        # print(pcycle + "/" + "{}".format(round(avg_exe_time, 4)), end="\t")
+        print(pcycle, end="\t")
+        
+        
+    # print(f"The command took {execution_time} seconds to execute.")
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/ResultScript.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/ResultScript.py
new file mode 100644
index 000000000000..2a5cdeb9e708
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/ResultScript.py
@@ -0,0 +1,574 @@
+import os
+import subprocess
+import time
+import sys
+
+
+# The script does the following
+# Input : filename.py
+# Output : TimeOfExecution for different IP sizes :
+# Steps to run:
+# Open a terminal at the path of the script --
+# Run: python ScriptForCases.py #3.11 validated
+
+# Pseudo-code:
+# Iterate for all the input-size & update the input value in file
+# Update logic -- change the 2nd parameter of line: var c = getRangeOfVector(init , Count, StepSize)
+# Run the respective commands on the file
+
+# Path to the input file
+# Apps = "hearingAid.py" , "lowPassFull.py" , " audioCompression.py", "lowPassFIRFilterDesign.py" , "EnergyOfSignal.py", "periodogram2Conv1.py", "audioEqualizer.py", "vibrationAnalysis.py", "signalSmoothing.py", "targetDetection.py", "biomedicalSignalProcessing.py", "spaceCommunication.py", "echocancelling", "noisecancelling.py", "digitalModulation", "underWaterCommunication", "voiceActivityDetection", "radarSignalProcessing", "speakerIdentification"
+# input_file_name = "speakerIdentification.py"
+input_file_name = sys.argv[1]
+
+
+BasePathForLLVM = "/home/local/ASURITE/apkhedka/ForLLVM/"
+OutputScriptPath = (
+    "mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/"
+)
+input_file_path = BasePathForLLVM + OutputScriptPath + input_file_name
+
+print(f"Running Application {input_file_path}")
+# Construct full output path
+
+if sys.argv[2]:
+    OutputPath = os.path.join(BasePathForLLVM, OutputScriptPath, "Output", sys.argv[2])
+
+else:
+    OutputPath = os.path.join(BasePathForLLVM, OutputScriptPath, "Output")
+
+
+# Check if the Output folder exists, create it if it doesn't
+if not os.path.exists(OutputPath):
+    os.makedirs(OutputPath)
+
+
+# Now OutputPath is ready for use
+print("InputPath:{}".format(BasePathForLLVM))
+print(f"OutputPath: {OutputPath}")
+
+# ************ Don't change unless u required
+# Define the values dictionary
+
+inputValues = {
+    "10": 10,
+    "100": 100,
+    "500": 500,
+    "1K": 1000,
+    "2K": 2000,
+    "5K": 5000,
+    "10K": 10000,
+    "20K": 20000,
+    "30K": 30000,
+    "40K": 40000,
+    "50K": 50000,
+    "100K": 100000,
+    "1M": 1000000,
+    # "10M": 10000000,
+    # "20M": 20000000,
+    # "30M": 30000000,
+    # "40M": 40000000,
+    # "50M": 50000000,
+    # "100M": 100000000,
+    # "1B": 1000000000
+}
+
+if sys.argv[1] == "noiseCancellation.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "1K": 1000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        "1M": 1000000,
+        "10M": 10000000,
+        "20M": 20000000,
+        "30M": 30000000,
+        "40M": 40000000,
+        "50M": 50000000,
+        "100M": 100000000,
+    }
+
+elif sys.argv[1] == "echoCancellation.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "1K": 1000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        "1M": 1000000,
+        "10M": 10000000,
+        "20M": 20000000,
+        "30M": 30000000,
+        "40M": 40000000,
+        "50M": 50000000,
+        "100M": 100000000,
+    }
+
+elif sys.argv[1] == "periodogram.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "1K": 1000,
+        "10K": 10000,
+        "20K": 20000,
+    }
+
+elif sys.argv[1] == "lowPassFiltering.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "1K": 1000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        "1M": 1000000,
+        "10M": 10000000,
+        "20M": 20000000,
+        "30M": 30000000,
+        "40M": 40000000,
+        "50M": 50000000,
+        "100M": 100000000,
+    }
+
+elif sys.argv[1] == "hearingAid.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "1K": 1000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        "1M": 1000000,
+        "10M": 10000000,
+        "20M": 20000000,
+        "30M": 30000000,
+        "40M": 40000000,
+        "50M": 50000000,
+        "100M": 100000000,
+    }
+
+elif sys.argv[1] == "FIRFilterDesign.py":
+    inputValues = {
+        "100": 100,
+        "1K": 1000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        "1M": 1000000,
+        "10M": 10000000,
+        "20M": 20000000,
+        "30M": 30000000,
+        "40M": 40000000,
+        "50M": 50000000,
+        "100M": 100000000,
+    }
+
+elif sys.argv[1] == "spectralAnalysis.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "1K": 1000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+    }
+
+elif sys.argv[1] == "audioEqualization.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "1K": 1000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        "1M": 1000000,
+        "10M": 10000000,
+        "20M": 20000000,
+        "30M": 30000000,
+        "40M": 40000000,
+        "50M": 50000000,
+        "100M": 100000000,
+    }
+
+elif sys.argv[1] == "audioCompression.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "1K": 1000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+    }
+
+elif sys.argv[1] == "vibrationAnalysis.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "1K": 1000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+    }
+
+elif sys.argv[1] == "underWaterCommunication.py":
+    inputValues = {
+        "100": 100,
+        "1K": 1000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        "1M": 1000000,
+        "10M": 10000000,
+        "20M": 20000000,
+        "30M": 30000000,
+        "40M": 40000000,
+        "50M": 50000000,
+        "100M": 100000000,
+    }
+
+elif sys.argv[1] == "voiceActivityDetection.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "1K": 1000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        "1M": 1000000,
+        "10M": 10000000,
+        "20M": 20000000,
+        "30M": 30000000,
+        "40M": 40000000,
+        "50M": 50000000,
+        "100M": 100000000,
+    }
+
+elif sys.argv[1] == "signalSmoothing.py":
+    inputValues = {
+        "100": 100,
+        "1K": 1000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        "1M": 1000000,
+        "10M": 10000000,
+        "20M": 20000000,
+        "30M": 30000000,
+        "40M": 40000000,
+        "50M": 50000000,
+        "100M": 100000000,
+    }
+
+elif sys.argv[1] == "targetDetection.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "1K": 1000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        "1M": 1000000,
+        "10M": 10000000,
+        "20M": 20000000,
+        "30M": 30000000,
+        "40M": 40000000,
+        "50M": 50000000,
+        "100M": 100000000,
+    }
+
+elif sys.argv[1] == "biomedicalSignalProcessing.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "1K": 1000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        "1M": 1000000,
+        "10M": 10000000,
+        "20M": 20000000,
+        "30M": 30000000,
+        "40M": 40000000,
+        "50M": 50000000,
+        "100M": 100000000,
+    }
+
+elif sys.argv[1] == "digitalModulation.py":
+    inputValues = {
+        "100": 100,
+        "1K": 1000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        "1M": 1000000,
+        "10M": 10000000,
+        "20M": 20000000,
+        "30M": 30000000,
+        "40M": 40000000,
+        "50M": 50000000,
+        "100M": 100000000,
+    }
+
+elif sys.argv[1] == "spaceCommunication.py":
+    inputValues = {
+        "100": 100,
+        "1K": 1000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        "1M": 1000000,
+        "10M": 10000000,
+        "20M": 20000000,
+        "30M": 30000000,
+        "40M": 40000000,
+        "50M": 50000000,
+        "100M": 100000000,
+    }
+
+elif sys.argv[1] == "radarSignalProcessing.py":
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "1K": 1000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        "1M": 1000000,
+        "10M": 10000000,
+        "20M": 20000000,
+        "30M": 30000000,
+        "40M": 40000000,
+        "50M": 50000000,
+        "100M": 100000000,
+    }
+
+elif sys.argv[1] == "dtmfDetection.py":
+    inputValues = {
+        "100": 100,
+        "1K": 1000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+    }
+
+elif sys.argv[1] == "speakerIdentification.py":
+    inputValues = {
+        "100": 100,
+        "1K": 1000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        "1M": 1000000,
+    }
+
+NoOfIterations = 3
+
+# --------------------------------------------------
+commands_base = [
+    # "./dsp1 lowPassFull.py -emit=mlir-affine",
+    # f"./dsp1 {input_file_path} -emit=llvm",
+    f"{BasePathForLLVM}/build/bin/dsp1 {input_file_path} -emit=llvm",
+    # "clang-17 -O0 file.ll -o fileexe -lm",
+]
+
+# Define the cases
+cases = [
+    # {
+    #     "affineOpt": False,
+    #     "canonOpt": False,
+    #     "suffix": "fileNoOpt.ll",
+    #     "exe": "fileNoOptExe",
+    # },
+    {
+        "affineOpt": True,
+        "canonOpt": False,
+        "suffix": "fileAffineOpt.ll",
+        "exe": "fileAffineOptExe",
+    },
+    {
+        "affineOpt": True,
+        "canonOpt": True,
+        "suffix": "fileAffineCanonOpt.ll",
+        "exe": "fileAffineCanonOptExe",
+    },
+]
+
+# Read the input file
+with open(input_file_path, "r") as file:
+    lines = file.readlines()
+
+print("", end="\t")
+for case in cases:
+    print(f"{case['exe']}", end="\t")
+
+for key, value in inputValues.items():
+    value2 = 1 / value
+    dur = value / 8192
+    print(f"\n{key}", end="\t")
+
+    with open(input_file_path, "r") as file:
+        lines = file.readlines()
+
+    with open(input_file_path, "w") as file:
+        for line in lines:
+            if line.strip().startswith("var input = getRangeOfVector("):
+                if input_file_name in [
+                    "audioCompression.py",
+                    "audioEqualization.py",
+                    "periodogram.py",
+                    "spectralAnalysis.py",
+                ]:
+                    updated_line = f"\tvar input = getRangeOfVector(0, {value}, 1);\n"
+                if input_file_name in ["voiceActivityDetection.py"]:
+                    updated_line = (
+                        f"\tvar input = getRangeOfVector(0, {value}, 0.125);\n"
+                    )
+                else:
+                    updated_line = (
+                        f"\tvar input = getRangeOfVector(0, {value}, 0.000125);\n"
+                    )
+
+                file.write(updated_line)
+            elif line.strip().startswith("var duration ="):
+                updated_line = f"\tvar duration = {dur};\n"
+                file.write(updated_line)
+            elif line.strip().startswith("var N_input ="):
+                updated_line = f"\tvar N_input = {value+1};\n"
+                file.write(updated_line)
+            elif line.strip().startswith("var frequencies = fftfreq"):
+                updated_line = f"\tvar frequencies = fftfreq({value}, 0.000122);\n"
+                file.write(updated_line)
+            else:
+                file.write(line)
+
+    # Iterate through the cases and run the commands
+    for case in cases:
+        command_llvm = commands_base[0]
+        if case["affineOpt"]:
+            command_llvm += " -affineOpt"
+        if case["canonOpt"]:
+            command_llvm += " -canonOpt"
+        # command_llvm += f" 2> {case['suffix']}" #OutputPath
+        command_llvm += f" 2> {OutputPath}/{case['suffix']}"  # OutputPath
+
+        commands = [
+            command_llvm,
+            # f"clang-17 -O0 {case['suffix']} -o fileexe -lm",
+            f"{BasePathForLLVM}/build/bin/clang-19 -O3 {OutputPath}/{case['suffix']} -o {OutputPath}/{case['exe']} -lm",
+        ]
+        # print(case,end="\n")
+        # print("\n")
+
+        # Iterate over each value and perform the necessary operations
+        for command in commands:
+            # Run the commands for the current case
+            result = subprocess.run(command, shell=True, capture_output=True, text=True)
+
+        sum_exe_time = 0
+        for i in range(0, NoOfIterations):
+            # for command in commands:
+            #     # print("running command {}".format(command))
+            #     # os.system(command)
+            #     result = subprocess.run(command, shell=True, capture_output=True, text=True)
+
+            # Clear the cache to minimize caching effects
+            # subprocess.run("sync; echo 3 > /proc/sys/vm/drop_caches", shell=True)
+            try:
+                process = subprocess.run(
+                    "sudo sh -c 'sync; echo 3 > /proc/sys/vm/drop_caches'",
+                    shell=True,
+                    check=True,
+                )
+                # process.wait()
+            except subprocess.CalledProcessError as exc:
+                print(exc)
+                process.terminate()
+            # The command to be executed
+            # command2 = "./fileexe"
+            # Limit execution to a single core
+            # command2 = "taskset -c 0 ./fileexe"
+            command2 = f"taskset -c 0 ./Output/{sys.argv[2]}/{case['exe']}" #{OutputPath}
+            # command2 = f"taskset -c 0 {OutputPath}/{case['exe']}"
+
+            # Record the start time
+            start_time = time.time()
+
+            # Execute the command
+            try:
+                subprocess.run(
+                    command2,
+                    shell=True,
+                    stdout=subprocess.DEVNULL,
+                    stderr=subprocess.DEVNULL,
+                    check=True,
+                )
+                # subprocess.run(command2, shell=True)
+            except subprocess.CalledProcessError as exc:
+                print(
+                    f"Process failed because did not return a successful return code. "
+                    f"Returned {exc.returncode}\n{exc}"
+                )
+
+            # Record the end time
+            end_time = time.time()
+
+            # Calculate the elapsed time
+            execution_time = end_time - start_time
+            sum_exe_time = sum_exe_time + execution_time
+            # print("{}".format(execution_time), end="\t")
+        avg_exe_time = sum_exe_time / NoOfIterations
+        print("{}".format(avg_exe_time), end="\t")
+    # print(f"The command took {execution_time} seconds to execute.")
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/RunHexagon.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/RunHexagon.py
new file mode 100644
index 000000000000..f58f706418ce
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/RunHexagon.py
@@ -0,0 +1,71 @@
+import subprocess
+import os
+
+# Ensure the log directory exists
+log_dir = "LClanglogs"
+os.makedirs(log_dir, exist_ok=True)
+
+# Corrected list of application names (without non-application entries)
+app_names = [
+    "spectralAnalysis",
+    "audioCompression",
+    "audioEqualization",
+    "biomedicalSignalProcessing",
+    "digitalModulation",
+    "dtmfDetection",
+    "echoCancellation",
+    "FIRFilterDesign",
+    "hearingAid",
+    "lowPassFiltering",
+    "noiseCancellation",
+    "periodogram",
+    "vibrationAnalysis",
+    "radarSignalProcessing",
+    "signalSmoothing",
+    "spaceCommunication",
+    "speakerIdentification",
+    "targetDetection",
+    "underWaterCommunication",
+    "voiceActivityDetection"
+]
+
+# Loop through each application and execute the HexagonResultScript.py script
+for app_name in app_names:
+    app_script = f"{app_name}.py"
+    log_file = os.path.join(log_dir, f"{app_name}.log")
+    with open(log_file, "w") as log:
+        command = ["python", "HexagonResultScript.py", app_script, app_name]
+        print(f"Running command: {' '.join(command)}")
+        process = subprocess.Popen(
+            command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
+        )
+        for line in process.stdout:
+            print(line, end="")
+            log.write(line)
+        for line in process.stderr:
+            print(line, end="")
+            log.write(line)
+        process.wait()
+
+
+log_dir2 = "HClanglogs"
+os.makedirs(log_dir2, exist_ok=True)
+
+
+
+for app_name in app_names:
+    app_script = f"{app_name}.py"
+    log_file2 = os.path.join(log_dir2, f"{app_name}.log")
+    with open(log_file2, "w") as log:
+        command = ["python", "HexagonClangResultScript.py", app_script, app_name]
+        print(f"Running command: {' '.join(command)}")
+        process = subprocess.Popen(
+            command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
+        )
+        for line in process.stdout:
+            print(line, end="")
+            log.write(line)
+        for line in process.stderr:
+            print(line, end="")
+            log.write(line)
+        process.wait()
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/RunResults.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/RunResults.py
new file mode 100644
index 000000000000..82f82c941ae9
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/RunResults.py
@@ -0,0 +1,49 @@
+import subprocess
+import os
+
+# Ensure the log directory exists
+log_dir = "ServerExeLogs"
+os.makedirs(log_dir, exist_ok=True)
+
+# Corrected list of application names (without non-application entries)
+app_names = [
+    "spectralAnalysis",
+    "audioCompression",
+    "audioEqualization",
+    "biomedicalSignalProcessing",
+    "digitalModulation",
+    "dtmfDetection",
+    "echoCancellation",
+    "FIRFilterDesign",
+    "hearingAid",
+    "lowPassFiltering",
+    "noiseCancellation",
+    "periodogram",
+    "vibrationAnalysis",
+    "radarSignalProcessing",
+    "signalSmoothing",
+    "spaceCommunication",
+    "speakerIdentification",
+    "targetDetection",
+    "underWaterCommunication",
+    "voiceActivityDetection"
+]
+
+# Loop through each application and execute the  script
+for app_name in app_names:
+    app_script = f"{app_name}.py"
+    log_file = os.path.join(log_dir, f"{app_name}.log")
+    with open(log_file, "w") as log:
+        command = ["python", "ResultScript.py", app_script, app_name]
+        print(f"Running command: {' '.join(command)}")
+        process = subprocess.Popen(
+            command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
+        )
+        for line in process.stdout:
+            print(line, end="")
+            log.write(line)
+        for line in process.stderr:
+            print(line, end="")
+            log.write(line)
+        process.wait()
+
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/audioCompression.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/audioCompression.py
similarity index 84%
rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/audioCompression.py
rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/audioCompression.py
index 0b190a877c9a..2702463cc262 100644
--- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/audioCompression.py
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/audioCompression.py
@@ -9,7 +9,7 @@ def main() {
     
     # var a10 = [ 3.2, 1.5,  0.8, 2.9,  4.5,10 , 0,5,5.5, 1.1];
     # var a10 = getRangeOfVector(3.2, 10, 1);
-    var input = getRangeOfVector(0, 20000, 10);
+	var input = getRangeOfVector(0, 10, 1);
     var nlevels = 16; #powerOf2
     var min = 0;
     var max = 8;
@@ -25,20 +25,20 @@ def main() {
     #Threshold
     var GetThresholdReal = threshold(fft10real , threshold);
     var GetThresholdImg = threshold(fft10img , threshold);
-    # print(GetThresholdReal);
     # print(GetThresholdImg);
     #Quant
     var QuantOutReal = quantization(GetThresholdReal , nlevels, max, min);
     var QuantOutImg = quantization(GetThresholdImg , nlevels, max, min);
-
-    print(QuantOutReal);
-    print(QuantOutImg);
+    # print(QuantOutImg);
     #RLE
     var rLEOutReal = runLenEncoding(QuantOutReal);
     var rLEOutImg = runLenEncoding(QuantOutImg);
-
     # print(rLEOutReal);
     # print(rLEOutImg);
+    var final1 = getElemAtIndx(rLEOutReal , [0]);
+    var final2 = getElemAtIndx(rLEOutImg , [1]); 
+    print(final1);
+    print(final2);
 
 }
 
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/audioEqualizer.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/audioEqualization.py
similarity index 92%
rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/audioEqualizer.py
rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/audioEqualization.py
index 0dcea71954f9..3d2312572b09 100644
--- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/audioEqualizer.py
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/audioEqualization.py
@@ -5,7 +5,7 @@
 def main() {
 
   # var input = [1,2,3,4,5];
-	var input = getRangeOfVector(0, 5000000, 1);
+	var input = getRangeOfVector(0, 10, 1);
   var pi = 3.14159265359;
   var fc = 300;
   var Fs = 8000;
@@ -32,15 +32,12 @@ def main() {
   var lpf2 = lowPassFIRFilter(wc2, N);
   var lpf2_w = lpf2 * hamming(N);
   # var bpf = lpf2 - lpf;
-  var bpf_w = lpf2_w - lpf_w;
+  var bpf_w = sub(lpf2_w,lpf_w);
   var FIRfilterResponseForBpf = FIRFilterResponse(input, bpf_w);
   var gainWithBpf = gain(FIRfilterResponseForBpf , gainForTreble);
-
-  
   var final_audio = gainWithLpf + gainWithHpf + gainWithBpf ;
   var final1 = getElemAtIndx(final_audio , [3]); 
   print(final1);
-  # print(final_audio);
 }
 
   
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/biomedicalSignalProcessing.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/biomedicalSignalProcessing.py
new file mode 100644
index 000000000000..b6e9106927e7
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/biomedicalSignalProcessing.py
@@ -0,0 +1,56 @@
+def main() {
+    var fc1 = 1000;
+    var fc2 = 7500;
+    var Fs = 8000;
+    var N = 101;
+    var distance = 950;
+	var input = getRangeOfVector(0, 1000, 0.000125);
+    # print(c);
+    var pi = 3.14159265359;
+    var f_sig = 500;
+    var getMultiplier = 2 * pi * f_sig;
+    # print(getMultiplier);
+    var getSinDuration = gain(input, getMultiplier);
+    # print(getSinDuration);
+    var clean_sig = sin(getSinDuration );
+
+    var f_noise = 3000;
+    var getNoiseSinDuration = gain(input, 2 * pi * f_noise);
+    var noise = sin(getNoiseSinDuration);
+    var noise1 = gain(noise, 0.5);
+
+    var noisy_sig = clean_sig + noise1;
+    # Step 1: FIR Bandpass Filter
+    var wc1 = 2 * pi * fc1 / Fs; #wc should vary from 0 to pi
+    var lpf1 = lowPassFIRFilter(wc1, N); #ideal low -pass filter
+    var lpf1_w = lpf1 * hamming(N);
+
+    var wc2 = 2 * pi * fc2 / Fs;
+    var lpf2 = lowPassFIRFilter(wc2, N);
+    var lpf2_w = lpf2 * hamming(N);
+
+    # var bpf = lpf2 - lpf;
+    var bpf_w = sub(lpf2_w,lpf1_w);
+    var FIRfilterResponseForBpf = FIRFilterResponse(noisy_sig, bpf_w);
+
+    # Step 2: Artifact Removal (R-peak detection)
+    var max_signal = max(FIRfilterResponseForBpf);
+
+    var height = 0.3 * max_signal;
+
+    var r_peaks = find_peaks(FIRfilterResponseForBpf, height, distance);
+
+    var len_r_peaks = len(r_peaks);
+    var last_peaks_index = sub(len_r_peaks, [1]);
+    var peaks_count = getSingleElemAtIndx(r_peaks, last_peaks_index);
+
+    var diff_val = diff(r_peaks, peaks_count);
+    var peaks_count_minus_one = sub(peaks_count, 1);
+    var diff_mean = mean(diff_val, peaks_count_minus_one);
+
+    var avg_hr = (60 * Fs) / diff_mean;
+
+    print(avg_hr);
+
+}
+
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/convolutionthm.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/convolutionthm.py
new file mode 100644
index 000000000000..831b75b995e8
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/convolutionthm.py
@@ -0,0 +1,37 @@
+def main() {
+    var a = getRangeOfVector(0, 100, 1);
+    var b = getRangeOfVector(0, 100, 2);
+    # var a = [1,2,3,4];
+    # var b = [2,3,4,5];
+
+    # print(a);
+
+    var ra = padding(a, 0, 99);
+    var rb = padding(b, 0, 99);
+    
+    # print(ra);
+
+    var x1 = fft1dreal(ra);
+    var y1 = fft1dimg(ra);
+    var x2 = fft1dreal(rb);
+    var y2 = fft1dimg(rb);
+
+    # # print(x1);
+    # # print(y1);
+    # # print(x2);
+    # # print(y2);
+
+    var tempreal = x1 * x2;
+    var negreal = y1 * y2;
+    var imag = x1 * y2 + x2 * y1; # the order matters!
+    var real = sub(tempreal, negreal);
+
+    # print(real);
+    # print(imag);
+
+    var result = ifft1d(real, imag);
+    print(result);
+
+    # var t = FIRFilterResponse(b, a);
+    # print(t);
+}
\ No newline at end of file
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/digitalModulation.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/digitalModulation.py
new file mode 100644
index 000000000000..f36dc87df15d
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/digitalModulation.py
@@ -0,0 +1,23 @@
+def main() {
+        # var input = [1,0,1,1,0,1,0,0];
+	var input = getRangeOfVector(0, 100, 0.000125);
+        # print(c);
+        var pi = 3.14159265359;
+        var f_sig = 500;
+        var getMultiplier = 2 * pi * f_sig;
+        # print(getMultiplier);
+        var getSinDuration = gain(input, getMultiplier);
+        # print(getSinDuration);
+        var clean_sig = sin(getSinDuration );
+        var binary_sig = thresholdUp(clean_sig, 0.4,0);
+        # print(binary_sig);
+        var modulate_symbol_real = qam_modulate_real(binary_sig);
+        # print(modulate_symbol_real);
+        var modulate_symbol_imagine = qam_modulate_imagine(binary_sig);
+        # print(modulate_symbol_imagine);
+        var decode_data = qam_demodulate(modulate_symbol_real, modulate_symbol_imagine);
+        # print(decode_data);
+        var final1 = getElemAtIndx(decode_data , 2);
+        print(final1);
+}
+
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/dtmfDetection.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/dtmfDetection.py
new file mode 100644
index 000000000000..fa5364d6baee
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/dtmfDetection.py
@@ -0,0 +1,33 @@
+def main() {
+     var digit = 8; 
+     var duration = 0.5;  
+     var fs = 8192; 
+     var d = 1/fs;
+     var N = fs * duration;
+     var dtmf_tone = generateDtmf(digit, duration, fs);
+     var fft_real = fft1dreal(dtmf_tone);
+     var fft_imag = fft1dimg(dtmf_tone); 
+     var squared_fft_real = square(fft_real);
+     var squared_fft_imag = square(fft_imag);
+     var sum = squared_fft_real + squared_fft_imag;
+     # print(sum);
+     var magnitudes = sqrt(sum);
+    #  print(magnitudes);
+     var frequencies = fftfreq(4096, 0.000122);
+    #  # print(frequencies);
+     var peaks = findDominantPeaks(frequencies, magnitudes);
+     print(peaks);
+     var freqPairs = [
+     [941, 1336],
+     [697, 1209],
+     [697, 1336],
+     [697, 1477], 
+     [770, 1209],
+     [770, 1336],
+     [770, 1477],
+     [852, 1209],
+     [852, 1336],
+     [852, 1477]];
+     var recovered_digit = recoverDtmfDigit(peaks, freqPairs);
+     print(recovered_digit);
+ }
\ No newline at end of file
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/echoCancellation.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/echoCancellation.py
new file mode 100644
index 000000000000..91a8f56941ef
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/echoCancellation.py
@@ -0,0 +1,28 @@
+def main() {
+  var fs = 8000;
+  # var step = 1/8000;
+  # print(step);
+	var input = getRangeOfVector(0, 100, 0.000125);
+  var f_sig = 500;
+  var pi = 3.14159265359;
+  var getMultiplier = 2 * pi * f_sig;
+  # print(getMultiplier);
+  var getSinDuration = gain(input, getMultiplier);
+  # print(getSinDuration);
+  var clean_sig = sin(getSinDuration );
+
+  #define a noise signal with freq = 3000
+  var noise = delay(clean_sig, 2);
+  # var noise1 = gain(noise, 0.5);
+
+  var noisy_sig = clean_sig + noise;
+  # print(noisy_sig);
+  # print(clean_sig);
+  var mu = 0.01;
+  var filterSize = 32;
+  var y = lmsFilterResponse(noisy_sig, clean_sig, mu, filterSize);
+  var z = normalize(y);
+  var final = getElemAtIndx(z , [5]); 
+  print(final);
+
+}
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/getCompileTime.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/getCompileTime.py
new file mode 100644
index 000000000000..a443dde2aa6f
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/getCompileTime.py
@@ -0,0 +1,159 @@
+import os
+import subprocess
+import time
+import pandas as pd
+
+# Path to the input file
+input_files = [
+    "spectralAnalysis",
+    "audioCompression",
+    "audioEqualization",
+    "biomedicalSignalProcessing",
+    "digitalModulation",
+    "dtmfDetection",
+    "echoCancellation",
+    "FIRFilterDesign",
+    "hearingAid",
+    "lowPassFiltering",
+    "noiseCancellation",
+    "periodogram",
+    "vibrationAnalysis",
+    "radarSignalProcessing",
+    "signalSmoothing",
+    "spaceCommunication",
+    "speakerIdentification",
+    "targetDetection",
+    "underWaterCommunication",
+    "voiceActivityDetection"
+]
+data = []
+
+for input_file_path in input_files:
+    input_file_path +=".py"
+    BasePathForLLVM = "/home/local/ASURITE/apkhedka/ForLLVM/"
+    OutputScriptPath = (
+        "mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/"
+        )
+
+    try: 
+        with open(input_file_path, "r") as file:
+            lines = file.readlines()
+    except FileNotFoundError:
+        print(f"File {input_file_path} not found")
+        continue
+
+    # Construct full output path
+    OutputPath = os.path.join(BasePathForLLVM, OutputScriptPath, "Output")
+
+    # Check if the Output folder exists, create it if it doesn't
+    if not os.path.exists(OutputPath):
+        os.makedirs(OutputPath)
+
+    print(f"Running Application: {input_file_path}")
+    print("InputPath: {}".format(BasePathForLLVM))
+    print(f"OutputPath: {OutputPath}")
+
+    # Input values dictionary
+    inputValues = {
+        "10": 10,
+        "100": 100,
+        "1K": 1000,
+        "10K": 10000,
+        "20K": 20000,
+        "30K": 30000,
+        "40K": 40000,
+        "50K": 50000,
+        "100K": 100000,
+        "1M": 1000000,
+        "10M": 10000000,
+        "20M": 20000000,
+        "30M": 30000000,
+        "40M": 40000000,
+        "50M": 50000000,
+        "100M": 100000000,
+    }
+
+    NoOfIterations = 3
+
+    # Base command
+    commands_base = [
+        f"{BasePathForLLVM}/build/bin/dsp1 {input_file_path} -emit=llvm",
+    ]
+
+    # Define the cases
+    cases = [
+        {
+            "affineOpt": True,
+            "canonOpt": False,
+            "suffix": "fileAffineOpt.ll",
+            "exe": "fileAffineOptExe",
+        },
+        {
+            "affineOpt": True,
+            "canonOpt": True,
+            "suffix": "fileAffineCanonOpt.ll",
+            "exe": "fileAffineCanonOptExe",
+        },
+    ]
+
+    # Print header
+    print("", end="\t")
+    for case in cases:
+        print(f"{case['suffix']}", end="\t")
+
+    # Iterate over input sizes
+    for key, value in inputValues.items():
+        print(f"\n{key}", end="\t")
+
+        # Update the input file with the current value
+        with open(input_file_path, "r") as file:
+            lines = file.readlines()
+
+        with open(input_file_path, "w") as file:
+            for line in lines:
+                if line.strip().startswith("var input = getRangeOfVector("):
+                    updated_line = (
+                        f"\tvar input = getRangeOfVector(0, {value}, 0.000125);\n"
+                    )
+                    file.write(updated_line)
+                else:
+                    file.write(line)
+
+        # Measure compiler time for each case
+        for case in cases:
+            command_llvm = commands_base[0]
+            if case["affineOpt"]:
+                command_llvm += " -affineOpt"
+            if case["canonOpt"]:
+                command_llvm += " -canonOpt"
+            command_llvm += f" 2> {OutputPath}/{case['suffix']}"  # OutputPath
+
+            sum_compile_time = 0
+            for _ in range(NoOfIterations):
+                # Record the start time
+                start_time = time.time()
+
+                # Execute the compilation command
+                result = subprocess.run(command_llvm, shell=True, capture_output=True, text=True)
+
+                # Record the end time
+                end_time = time.time()
+
+                # Calculate the elapsed time for compilation
+                compile_time = end_time - start_time
+                sum_compile_time += compile_time
+
+                
+
+            # Calculate average compile time
+            avg_compile_time = sum_compile_time / NoOfIterations
+            print(f"{avg_compile_time:.6f}", end="\t")
+            data.append({
+                    "filename": input_file_path,
+                    "input size" : key,
+                    # "opt": case['exe'],
+                    f"{case['exe']}": avg_compile_time,
+                })
+        df = pd.DataFrame(data)
+        
+        df.to_csv("compile_time.csv", index=False)
\ No newline at end of file
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/getSize.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/getSize.py
new file mode 100644
index 000000000000..f40e208fb7b2
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/getSize.py
@@ -0,0 +1,204 @@
+import os
+import subprocess
+import pandas as pd
+
+# The script does the following
+# Input : filename.py
+# Output : TimeOfExecution for different IP sizes :
+# Steps to run:
+# Open a terminal at the path of the script --
+# Run: python ScriptForCases.py #3.11 validated
+
+# Pseudo-code:
+# Iterate for all the input-size & update the input value in file
+# Update logic -- change the 2nd parameter of line: var c = getRangeOfVector(init , Count, StepSize)
+# Run the respective commands on the file
+
+# Path to the input file
+# Apps = "hearingAid.py" , "lowPassFull.py" , " audioCompression.py", "lowPassFIRFilterDesign.py" , "EnergyOfSignal.py", "periodogram2Conv1.py", "audioEqualizer.py", "vibrationAnalysis.py", "signalSmoothing.py", "targetDetection.py", "biomedicalSignalProcessing.py", "spaceCommunication.py", "echocancelling", "noisecancelling.py", "digitalModulation", "underWaterCommunication", "voiceActivityDetection", "radarSignalProcessing", "speakerIdentification"
+input_files = [
+    "spectralAnalysis",
+    "audioCompression",
+    "audioEqualization",
+    "biomedicalSignalProcessing",
+    "digitalModulation",
+    "dtmfDetection",
+    "echoCancellation",
+    "FIRFilterDesign",
+    "hearingAid",
+    "lowPassFiltering",
+    "noiseCancellation",
+    "periodogram",
+    "vibrationAnalysis",
+    "radarSignalProcessing",
+    "signalSmoothing",
+    "spaceCommunication",
+    "speakerIdentification",
+    "targetDetection",
+    "underWaterCommunication",
+    "voiceActivityDetection"
+]
+data = []
+
+for input_file_path in input_files:
+    BasePathForLLVM = "/home/local/ASURITE/apkhedka/ForLLVM/"
+    OutputScriptPath = (
+        "mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/"
+        )
+    # OutputPath = BasePathForLLVM + "mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/Output/"
+    print(f"Running Application {input_file_path}")
+    # Construct full output path
+    OutputPath = os.path.join(BasePathForLLVM, OutputScriptPath, "Output")
+
+    # Check if the Output folder exists, create it if it doesn't
+    if not os.path.exists(OutputPath):
+        os.makedirs(OutputPath)
+
+    # Now OutputPath is ready for use
+    print("InputPath:{}".format(BasePathForLLVM))
+    print(f"OutputPath: {OutputPath}")
+    # exit()
+
+    # ************ Don't change unless u required
+    # Define the values dictionary
+    inputValues = {
+            "10": 10,
+            "100": 100,
+            "1K": 1000,
+            "10K": 10000,
+            "20K": 20000,
+            "30K": 30000,
+            "40K": 40000,
+            "50K": 50000,
+            "100K": 100000,
+            "1M": 1000000,
+            "10M": 10000000,
+            "20M": 20000000,
+            "30M": 30000000,
+            "40M": 40000000,
+            "50M": 50000000,
+            "100M": 100000000,
+            # "1B": 1000000000
+            }
+    NoOfIterations = 3
+
+    # --------------------------------------------------
+    commands_base = [
+            # "./dsp1 lowPassFull.py -emit=mlir-affine",
+            # f"./dsp1 {input_file_path} -emit=llvm",
+            f"{BasePathForLLVM}/build/bin/dsp1 {input_file_path}.py -emit=llvm",
+            # "clang-17 -O0 file.ll -o fileexe -lm",
+            ]
+
+    # Define the cases
+    cases = [
+            # {
+            #     "affineOpt": False,
+            #     "canonOpt": False,
+            #     "suffix": "fileNoOpt.ll",
+            #     "exe": "fileNoOptExe",
+            # },
+            {
+                "affineOpt": True,
+                "canonOpt": False,
+                "suffix": "fileAffineOpt.ll",
+                "exe": "fileAffineOptExe",
+                },
+            {
+                "affineOpt": True,
+                "canonOpt": True,
+                "suffix": "fileAffineCanonOpt.ll",
+                "exe": "fileAffineCanonOptExe",
+                },
+            ]
+
+    # Read the input file
+    with open(input_file_path+".py", "r") as file:
+        lines = file.readlines()
+
+    print("", end="\t")
+    for case in cases:
+        print(f"{case['exe']}", end="\t")
+
+    size_test = {"100M": 100000000}
+    for key, value in size_test.items():
+        value2 = 1 / value
+        dur = value / 8192
+        print(f"\n{key}", end="\t")
+
+        with open(input_file_path+".py", "r") as file:
+            lines = file.readlines()
+
+        with open(input_file_path+".py", "w") as file:
+            for line in lines:
+                if line.strip().startswith("var input = getRangeOfVector("):
+                    updated_line = (
+                            f"\tvar input = getRangeOfVector(0, {value}, 0.000125);\n"
+                            )
+                    file.write(updated_line)
+                elif line.strip().startswith("var duration ="):
+                    updated_line = f"\tvar duration = {dur};\n"
+                    file.write(updated_line)
+                elif line.strip().startswith("var frequencies = fftfreq"):
+                    updated_line = f"\tvar frequencies = fftfreq({value}, 0.000122);\n"
+                    file.write(updated_line)
+                else:
+                    file.write(line)
+
+        # Iterate through the cases and run the commands
+        for case in cases:
+            command_llvm = commands_base[0]
+            if case["affineOpt"]:
+                command_llvm += " -affineOpt"
+            if case["canonOpt"]:
+                command_llvm += " -canonOpt"
+            # command_llvm += f" 2> {case['suffix']}" #OutputPath
+            command_llvm += f" 2> {OutputPath}/{case['suffix']}"  # OutputPath
+
+            commands = [
+                    command_llvm,
+                    # f"clang-17 -O0 {case['suffix']} -o fileexe -lm",
+                    f"{BasePathForLLVM}/build/bin/clang-19 -O3 {OutputPath}/{case['suffix']} -o {OutputPath}/{case['exe']} -lm",
+                    ]
+            # print(case,end="\n")
+            # print("\n")
+
+            # Iterate over each value and perform the necessary operations
+            for command in commands:
+                # Run the commands for the current case
+                result = subprocess.run(command, shell=True, capture_output=True, text=True)
+
+            command2 = f"size ./Output/{case['exe']}"
+
+            # Execute the command
+            try:
+                result = subprocess.run(
+                        command2,
+                        shell=True,
+                        capture_output=True, text=True
+                        )
+                
+                output_parts = result.stdout.splitlines()
+                if len(output_parts) > 1:
+                    size_data = output_parts[1].split()
+
+                    data.append({
+                        "filename": input_file_path,
+                        # "input size" : key,
+                        "opt": case['exe'],
+                        # "text": size_data[0],
+                        # "data": size_data[1],
+                        # "bss": size_data[2],
+                        # "dec": size_data[3],
+                        # "hex": size_data[4],
+                        "total": sum(map(int, size_data[:4]))
+                    })
+            except subprocess.CalledProcessError as exc:
+                print(
+                        f"Process failed because did not return a successful return code. "
+                        f"Returned {exc.returncode}\n{exc}"
+                        )
+
+        df = pd.DataFrame(data)
+        
+        df.to_csv("codesize.csv", index=False)
\ No newline at end of file
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/noisecancelling.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/hearingAid.py
similarity index 79%
rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/noisecancelling.py
rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/hearingAid.py
index ed37be3a42d8..d81e05eb2b3a 100644
--- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/noisecancelling.py
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/hearingAid.py
@@ -2,7 +2,7 @@ def main() {
 var fs = 8000;
   # var step = 1/8000; 
   # print(step);
-	var input = getRangeOfVector(0, 20000000, 0.000125);
+	var input = getRangeOfVector(0, 100, 0.000125);
   var f_sig = 500;
   var pi = 3.14159265359;
   var getMultiplier = 2 * pi * f_sig;
@@ -23,8 +23,12 @@ def main() {
   var mu = 0.01;
   var filterSize = 32;
   var y = lmsFilterResponse(noisy_sig, clean_sig, mu, filterSize);
-  var final1 = getElemAtIndx(y , [6]); 
-  print(final1);
+  var G1 = 123;
+  var sol = gain(y,G1);
+  # print(y);
+  var final = getElemAtIndx(sol, [3]); 
+  print(final);
+  # print(sol);
 
 }
 
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/lowPassFull.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/lowPassFiltering.py
similarity index 97%
rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/lowPassFull.py
rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/lowPassFiltering.py
index c0b8d7851621..e08a55791347 100644
--- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/lowPassFull.py
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/lowPassFiltering.py
@@ -12,7 +12,7 @@ def main() {
   # var step = 1/8000; 
   # print(step);
   var duration = 0.05 ; # 50 milli-secs
-	var input = getRangeOfVector(0, 30000000, 0.000125);
+	var input = getRangeOfVector(0, 100, 0.000125);
   # print(c);
   # var c = getRangeOfVector(0,10, 0.000125);
   var f_sig = 500;
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/noiseCancellation.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/noiseCancellation.py
new file mode 100644
index 000000000000..5a296d6347b3
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/noiseCancellation.py
@@ -0,0 +1,30 @@
+def main() {
+var fs = 8000;
+  # var step = 1/8000;
+  # print(step);
+	var input = getRangeOfVector(0, 1000, 0.000125);
+  var f_sig = 500;
+  var pi = 3.14159265359;
+  var getMultiplier = 2 * pi * f_sig;
+  # print(getMultiplier);
+  var getSinDuration = gain(input, getMultiplier);
+  # print(getSinDuration);
+  var clean_sig = sin(getSinDuration );
+
+  #define a noise signal with freq = 3000
+  var f_noise = 3000;
+  var getNoiseSinDuration = gain(input, 2 * pi * f_noise);
+  var noise = sin(getNoiseSinDuration);
+  var noise1 = gain(noise, 0.5);
+
+  var noisy_sig = clean_sig + noise1;
+  # print(noisy_sig);
+  var mu = 0.01;
+  var filterSize = 32;
+  var y = lmsFilterResponse(noisy_sig, clean_sig, mu, filterSize);
+  # var a = norm_LMSFilterResponse_opt(noisy_sig, clean_sig, mu, filterSize);
+  var z = normalize(y);
+  var final = getElemAtIndx(z , [5]); 
+  print(final);
+}
+
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/periodogram2Conv1.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/periodogram.py
similarity index 62%
rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/periodogram2Conv1.py
rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/periodogram.py
index 9ee480e6c033..af9e30275355 100644
--- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/periodogram2Conv1.py
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/periodogram.py
@@ -14,7 +14,7 @@ def main() {
 
   #size 10
   # var a10 = [ 10,20,30,40,50,60,70,80,90,100];
-	var input = getRangeOfVector(0, 10, 1);
+	var input = getRangeOfVector(0, 500, 1);
   # var input = [1,2,3,4];
   # print(a10);
 
@@ -23,30 +23,14 @@ def main() {
   # y[n] = sum(h(k) . x(n-k)) k=0 to N-1 & 0<= n < N
   var conv1d = FIRFilterResponse(input, reverse_input);
   # var fft_real = fft1DRealSymm(conv1d); #fft1DRealSymm
-  var fft_real = fft1dreal(conv1d);
-  var fft_img = fft1dimg(conv1d);
-  var sq = fft_real * fft_real + fft_img * fft_img;
-  print(sq);
-  # var final1 = getElemAtIndx(fft_real , [6]); 
-  # var final2 = getElemAtIndx(fft_real , [7]);
-  # print(final1);
-  # print(final2);
   # print(conv1d);
+  var fft_real = fft1dreal(conv1d);
   # print(fft_real);
+  var fft_img = fft1dimg(conv1d);
   # print(fft_img);
-  #Pad the input , reverse_input for the size of conv o/p
-  #Calculate 
-    # var padLen = 9 ; #10 + 10 - 1 - 10
-    # var input_padded = padding(input , 0, padLen );
-
-    
-    # var fft10real = fft1dreal(input);
-    # var fft10img = fft1dimg(input);
-    
-    # #try input * -input
-    # var neg_input = gain(input , -1);
-  # var sq = fft10real * fft10real + fft10img * fft10img;
+  var sq = fft_real * fft_real + fft_img * fft_img;
   # print(sq);
-
+  var final1 = getElemAtIndx(sq , [2]); 
+  print(final1);
 }
 
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/radarSignalProcessing.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/radarSignalProcessing.py
new file mode 100644
index 000000000000..9a68946cc7e9
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/radarSignalProcessing.py
@@ -0,0 +1,32 @@
+def main() {
+
+        var antennas = 4;
+        var input_fc = 5;
+        var N = 101;
+	var input = getRangeOfVector(0, 10, 0.000125);
+        var weights = getRangeOfVector(-90, 180, 1);
+
+        var signal = beam_form(antennas, input_fc, input, weights);
+        var b1 = abs(signal);
+        var power_profile = b1 * b1;
+        var power_angle_max_idx = argmax(power_profile, 0);
+        var power_angle_max_ele = argmax(power_profile,0);
+        var pi = 3.1415926;
+        var fc1 = 1000;
+        var fc2 = 7500;
+        var Fs = 8000;
+
+        var wc1 = 2*pi*fc1 / Fs;
+        var filter1 = lowPassFIRFilter(wc1, N);
+        var filter_hamming_1 = filter1 * hamming(N);
+        var wc2 = 2*pi*fc2 / Fs;
+        var filter2 = highPassFIRFilter(wc2, N);
+        var filter_hamming_2 = filter2 * hamming(N);
+
+        var bpf = sub(filter_hamming_2, filter_hamming_1);
+        var firFilterResponse = FIRFilterResponse(power_profile, bpf);
+        var length = len(firFilterResponse);
+        var final = getElemAtIndx(firFilterResponse , 10);
+        print(final);
+}
+
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/signalSmoothing.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/signalSmoothing.py
new file mode 100644
index 000000000000..3ac2474a345f
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/signalSmoothing.py
@@ -0,0 +1,19 @@
+def main() {
+  var fs = 8000;
+	var input = getRangeOfVector(0, 10, 0.000125);
+  var f_sig = 500;
+  var pi = 3.14159265359;
+  var getMultiplier = 2 * pi * f_sig;
+  var getSinDuration = gain(input, getMultiplier);
+  var clean_sig = sin(getSinDuration );
+  var f_noise = 3000;
+  var getNoiseSinDuration = gain(input, 2 * pi * f_noise);
+  var noise = sin(getNoiseSinDuration);
+  var noise1 = gain(noise, 0.5);
+
+  var noisy_sig = clean_sig + noise1;
+  var median = medianFilter(noisy_sig);
+  var average = slidingWindowAvg(median);
+  var final1 = getElemAtIndx(average , 3); 
+  print(final1);
+}
\ No newline at end of file
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/spaceCommunication.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/spaceCommunication.py
new file mode 100644
index 000000000000..c21a388fa630
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/spaceCommunication.py
@@ -0,0 +1,20 @@
+def main() {
+        # var input = "HELLO FROM SPACE";
+	var input = getRangeOfVector(0, 10, 0.000125);
+        # print(c);
+        var pi = 3.14159265359;
+        var f_sig = 500;
+        var getMultiplier = 2 * pi * f_sig;
+        # print(getMultiplier);
+        var getSinDuration = gain(input, getMultiplier);
+        # print(getSinDuration);
+        var clean_sig = sin(getSinDuration );
+        var binary_sig = thresholdUp(clean_sig, 0.4,0);
+        var a = space_modulate(binary_sig);
+        var noise = sin(a);
+        var noisy_signal = a+noise;
+        var b = space_demodulate(noisy_signal);
+        var e = space_err_correction(b);
+        var final = getElemAtIndx(e, [8]);
+        print(final);
+}
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/speakerIdentification.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/speakerIdentification.py
new file mode 100644
index 000000000000..6501e5f474fb
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/speakerIdentification.py
@@ -0,0 +1,33 @@
+def main() {
+  var sample_rate = 1000;
+	var duration = 12.20703125;
+  var person1 = generateVoiceSignature(100, 200, duration, sample_rate); #Alice
+  var person2 = generateVoiceSignature(150, 250, duration, sample_rate); #Bob
+  var person3 = generateVoiceSignature(120, 180, duration, sample_rate); #Charlie
+  
+  # var unknown_signal = generateVoiceSignature(100, 200, duration, sample_rate);
+  var unknown_signal = generateVoiceSignature(150, 250, duration, sample_rate);
+  # var unknown_signal = generateVoiceSignature(120, 180, duration, sample_rate);
+  
+  var max1 = max(correlate(person1, unknown_signal));
+  var max2 = max(correlate(person2, unknown_signal));
+  var max3 = max(correlate(person3, unknown_signal));
+  
+  var total_maxes = [0, 0, 0];
+
+  var temp2 = setSingleElemAtIndx(total_maxes, 0, max1); #work
+  var temp3 = setSingleElemAtIndx(total_maxes, 1, max2); #work
+  var temp4 = setSingleElemAtIndx(total_maxes, 2, max3); #work
+  
+  var max_index = argmax(total_maxes,0);
+  
+  var max_value = getSingleElemAtIndx(total_maxes, max_index);
+  
+  print(max_index);
+  print(temp2);
+  print(max_value);
+  print(temp3);
+  print(total_maxes);
+  print(temp4);
+}
+ 
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/spectralAnalysis.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/spectralAnalysis.py
new file mode 100644
index 000000000000..9d32dec6fd4a
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/spectralAnalysis.py
@@ -0,0 +1,13 @@
+def main() {
+
+	var input = getRangeOfVector(0, 400, 1);
+        var fft_real = fft1dreal(input);
+        var fft_img = fft1dimg(input);
+        var sq_abs = square(fft_real) + square(fft_img)  ;
+        var sum1 = sum(sq_abs);
+        # print(sum1);
+        var len1 = len(input);
+        var res = sum1 / len1;
+        print(res);
+}
+
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/targetDetection.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/targetDetection.py
new file mode 100644
index 000000000000..4ce8c4a32c6a
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/targetDetection.py
@@ -0,0 +1,27 @@
+def main() {
+  var fs = 1000;
+  # var step = 1/fs; 
+  # print(step);
+	var input = getRangeOfVector(0, 1000, 0.000125);
+  var pi = 3.14159265359;
+  var getMultiplier = 2 * pi * 10;
+  # print(getMultiplier);
+  var getSinDuration = gain(input, getMultiplier);
+  var sig1 = sin(getSinDuration );
+  var getMultiplier2 = 2 * pi * 20;
+  var getSinDuration2 = gain(input, getMultiplier2);
+  var sinsig2 = sin(getSinDuration2);
+  var sig2 = gain(sinsig2, 0.5);
+  var signal = sig1 + sig2;
+  var noise = delay(signal, 5);
+  var noisy_sig = signal + noise;
+  
+  var mu = 0.01;
+  var filterSize = 20;
+  var y = lmsFilterResponse(noisy_sig, signal, mu, filterSize);
+  var peaks = find_peaks(y, 1, 50); 
+  var final1 = getElemAtIndx(peaks , [1]); 
+  var final2 = getElemAtIndx(peaks , [0]); 
+  print(final1);
+  print(final2);
+}
\ No newline at end of file
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/underWaterCommunication.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/underWaterCommunication.py
new file mode 100644
index 000000000000..ac3a7f2131cd
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/underWaterCommunication.py
@@ -0,0 +1,34 @@
+def main() {
+  var fs = 1000;
+  # var step = 1/fs; 
+  # print(step);
+	var input = getRangeOfVector(0, 50, 0.000125);
+  var pi = 3.14159265359;
+  var getMultiplier = 2 * pi * 5;
+  # print(getMultiplier);
+  var getSinDuration = gain(input, getMultiplier);
+  var signal = sin(getSinDuration );
+
+  var noise = delay(signal, 5);
+  var noisy_sig = signal + noise;
+
+
+ #design a low-pass filter : filterOrder = 5(odd) , cut-off freq=10
+  # get wc = 2 * pi * cutoff_freq / fs
+  # get the filter response using filter(b,a, noisy_sig)
+  var fc = 1000;
+  # var Fs = 8000;
+  var wc = 2 * pi * 1000 / 500; #wc should vary from 0 to pi
+  var N = 5;
+  # var hid = sinc(wc, N);
+  var lpf = lowPassFIRFilter(wc, 1); #ideal low -pass filter
+  var lpf_w = lpf * hamming(N);
+  var FIRfilterResponse = FIRFilterResponse(noisy_sig, lpf_w);
+ 
+  var threshold = 0.05;
+  var GetThresholdReal = thresholdUp(FIRfilterResponse, threshold, 0);
+  # print(GetThresholdReal);
+  var final1 = getElemAtIndx(GetThresholdReal , [3]); 
+  print(final1);
+
+}
\ No newline at end of file
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/vibrationAnalysis.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/vibrationAnalysis.py
new file mode 100644
index 000000000000..945c7cb40fc6
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/vibrationAnalysis.py
@@ -0,0 +1,27 @@
+def main() {
+  var fs = 1000;
+  # var step = 1/fs; 
+  # print(step);
+	var input = getRangeOfVector(0, 10, 0.000125);
+  var pi = 3.14159265359;
+  var getMultiplier = 2 * pi * 50;
+  # print(getMultiplier);
+  var getSinDuration = gain(input, getMultiplier);
+  var sig1 = sin(getSinDuration );
+  var getMultiplier2 = 2 * pi * 120;
+  var getSinDuration2 = gain(input, getMultiplier2);
+  var sinsig2 = sin(getSinDuration2);
+  var sig2 = gain(sinsig2, 0.5);
+  var signal = sig1 + sig2;
+  var noise = delay(signal, 5);
+  var noisy_sig = signal + noise;
+  var threshold = 2;
+  
+  var fft_real = fft1dreal(noisy_sig);
+  var fft_img = fft1dimg(noisy_sig);
+  var sq_abs = square(fft_real) + square(fft_img);
+  var magnitudes = sqrt(sq_abs);
+  var GetThresholdReal = thresholdUp( magnitudes , threshold,1);
+  var final1 = getElemAtIndx(GetThresholdReal , 0); 
+  print(final1);
+}
\ No newline at end of file
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/voiceActivityDetection.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/voiceActivityDetection.py
new file mode 100644
index 000000000000..1cf37d8a34ad
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/DSP-DSL/voiceActivityDetection.py
@@ -0,0 +1,18 @@
+def main() {
+  var fs = 1000;
+  # var step = 1/fs; 
+  # print(step);
+	var input = getRangeOfVector(0, 100, 0.0125);
+  var pi = 3.14159265359;
+  var getMultiplier = 2 * pi * 5;
+  # print(getMultiplier);
+  var getSinDuration = gain(input, getMultiplier);
+  var signal = sin(getSinDuration );
+
+  var noise = delay(signal, 5);
+  var noisy_sig = signal + noise;
+  var threshold = 0.01;
+  var GetThresholdReal = threshold( noisy_sig , threshold);
+  var zcr = zeroCrossCount(GetThresholdReal);
+  print(zcr);
+}
\ No newline at end of file
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/FIRFilterDesign.m b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/FIRFilterDesign.m
new file mode 100644
index 000000000000..0b364f0d3e89
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/FIRFilterDesign.m
@@ -0,0 +1,48 @@
+% Constants
+INPUT_LENGTH = 100000000;
+FS = 8000;
+FC1 = 500;
+FC2 = 600;
+FC3 = 1000;
+
+% Calculate normalized frequencies
+wc1 = 2 * pi * FC1 / FS;
+wc2 = 2 * pi * FC2 / FS;
+wc3 = 2 * pi * FC3 / FS;
+
+% Create Hamming window
+hamming_window = hamming(INPUT_LENGTH);
+
+% Create high-pass filters
+hpf1 = highPassFIRFilter(wc1, INPUT_LENGTH);
+hpf2 = highPassFIRFilter(wc2, INPUT_LENGTH);
+hpf3 = highPassFIRFilter(wc3, INPUT_LENGTH);
+
+% Element-wise multiplication with Hamming window
+hpf_w1 = hpf1 .* hamming_window';
+hpf_w2 = hpf2 .* hamming_window';
+hpf_w3 = hpf3 .* hamming_window';
+
+% Get specific elements
+final1 = hpf_w1(7);  
+final2 = hpf_w2(8);
+final3 = hpf_w3(9);
+
+% Display results
+fprintf('%f\n', final1);
+fprintf('%f\n', final2);
+fprintf('%f\n', final3);
+
+% High-pass FIR filter function
+function h = highPassFIRFilter(wc, filterLength)
+    n = 0:(filterLength-1);
+    mid = (filterLength-1) / 2;
+    h = zeros(1, filterLength);
+    
+    % Use logical indexing to avoid issues with non-integer indices
+    midIndex = (n ~= mid);
+    h(midIndex) = -sin(wc * (n(midIndex) - mid)) ./ (pi * (n(midIndex) - mid));
+    
+    % Handle the middle point separately
+    h(floor(mid)+1) = 1 - (wc / pi);
+end
\ No newline at end of file
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/ResultScript.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/ResultScript.py
new file mode 100644
index 000000000000..8da6b86da1fa
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/ResultScript.py
@@ -0,0 +1,145 @@
+import os
+import subprocess
+import time
+import re
+# The script does the following
+# Input : filename.py
+# Output : TimeOfExecution for different IP sizes :
+# Steps to run:
+# Open a terminal at the path of the script --
+# Run: python ScriptForCases.py #3.11 validated
+
+# Pseudo-code:
+# Iterate for all the input-size & update the input value in file
+# Update logic -- change the 2nd parameter of line: var c = getRangeOfVector(init , Count, StepSize)
+# Run the respective commands on the file
+
+# Path to the input file
+# Apps = "noiseCancelling.m" , "echoCancelling.m", "periodogram.m", "lowPassFull.m", "hearingAid.m", "lowPassFIRFilterDesign", "energyOfSignal", "audioEqualizer", "audioCompression","vibrationAnalysis", "underWaterCommunication", "voiceActivityDetection", "signalSmoothing", "targetDetection", "biomedicalSignalProcessing", "digitalModulation", "spaceCommunication", "radarSignalProcessing"
+input_file = "speakerIdentification"
+input_file_path = input_file + ".m"
+BasePathForLLVM = "/home/local/ASURITE/apkhedka/ForLLVM/"
+OutputScriptPath = "mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/"
+mcc_path ="/home/local/ASURITE/apkhedka/Matlab_Installation/bin/mcc"
+mrt_path ="/home/local/ASURITE/apkhedka/Matlab_Runtime/R2024b/"
+# OutputPath = BasePathForLLVM + "mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/Output/"
+print(f"Running Application {input_file_path}")
+# Construct full output path
+OutputPath = os.path.join(BasePathForLLVM, OutputScriptPath, "Output")
+
+# Check if the Output folder exists, create it if it doesn't
+if not os.path.exists(OutputPath):
+    os.makedirs(OutputPath)
+
+# Now OutputPath is ready for use
+print("InputPath:{}".format(BasePathForLLVM))
+print(f"OutputPath: {OutputPath}")
+# exit()
+
+# ************ Don't change unless u required
+# Define the values dictionary
+inputValues = {
+    # "10": 10,
+    "100": 100,
+    "1K": 1000,
+    "10K": 10000,
+    "20K": 20000,
+    "30K": 30000,
+    "40K": 40000,
+    "50K": 50000,
+    "100K": 100000,
+     "1M": 1000000,
+    "10M": 10000000,
+    "20M": 20000000,
+    "30M": 30000000,
+    "40M": 40000000,
+    "50M": 50000000,
+    "100M": 100000000,
+    # "1B": 1000000000
+}
+NoOfIterations = 3
+
+def delete_folder_contents(folder_path):
+    for filename in os.listdir(folder_path):
+        file_path = os.path.join(folder_path, filename)
+        try:
+            if os.path.isfile(file_path) or os.path.islink(file_path):
+                os.unlink(file_path)
+            elif os.path.isdir(file_path):
+                shutil.rmtree(file_path)
+        except Exception as e:
+            print(f'Failed to delete {file_path}. Reason: {e}')
+
+
+with open(input_file_path, "r") as file:
+    lines = file.readlines()
+
+print("", end="\t")
+
+
+for key, value in inputValues.items():
+    # Update the specific line in the file
+    # print("Updating for {}".format(value))
+    print("\n{}".format(key), end="\t")
+    with open(input_file_path, "w") as file:
+        for line in lines:
+            if line.strip().startswith("INPUT_LENGTH = "):
+                updated_line = f"INPUT_LENGTH = {value};\n"
+                file.write(updated_line)
+            else:
+                file.write(line)
+
+    command = f"{mcc_path} -m {input_file_path} -d 'Output/' -o {input_file}{key}"
+    result = subprocess.run(command, shell=True, capture_output=True, text=True)
+
+    # Modify the generated shell script
+    script_path = f"./Output/run_{input_file}{key}.sh"
+    # Modify the generated shell script
+    script_path = f"./Output/run_{input_file}{key}.sh"
+    with open(script_path, 'r') as file:
+        script_content = file.readlines()
+
+    # Find the line with the eval command and modify it
+    for i, line in enumerate(script_content):
+        if line.strip().startswith('eval'):
+            script_content[i] = f"""  start_time=$(date +%s.%N)
+  {line.strip()}
+  end_time=$(date +%s.%N)
+  execution_time=$(echo "$end_time - $start_time" | bc)
+  echo "Execution time: $execution_time"
+"""
+            break
+
+    # Write the modified content back to the script
+    with open(script_path, 'w') as file:
+        file.writelines(script_content)
+
+
+    sum_exe_time = 0
+    for i in range(0, NoOfIterations):
+        try:
+            subprocess.run("sudo sh -c 'sync; echo 3 > /proc/sys/vm/drop_caches'", shell=True, check=True)
+        except subprocess.CalledProcessError as exc:
+            print(exc)
+
+        command2 = f"taskset -c 0 ./Output/run_{input_file}{key}.sh {mrt_path}"
+
+        try:
+            result = subprocess.run(command2, shell=True, capture_output=True, text=True, check=True)
+            output = result.stdout
+            
+            # Extract execution time from the output
+            match = re.search(r"Execution time: (\d+\.\d+)", output)
+            if match:
+                execution_time = float(match.group(1))
+                sum_exe_time += execution_time
+            else:
+                print(f"Execution time not found in output: {output}")
+        except subprocess.CalledProcessError as exc:
+            print(f"Process failed. Returned {exc.returncode}\n{exc}")
+
+    avg_exe_time = sum_exe_time / NoOfIterations
+    print(f"{avg_exe_time}", end="\t")
+    # delete_folder_contents("./Output")
+
+
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/audioCompression.m b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/audioCompression.m
new file mode 100644
index 000000000000..c388ce8f13fa
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/audioCompression.m
@@ -0,0 +1,71 @@
+% Constants
+INPUT_LENGTH = 100000000;
+NLEVELS = 16;
+MIN = 0.0;
+MAX = 8.0;
+THRESHOLD_VAL = 4.0;
+
+% Function to get range of vector
+function output = getRangeOfVector(start, noOfSamples, increment)
+    output = start + (0:noOfSamples-1) * increment;
+end
+
+% DFT function
+function output = dft(input)
+    N = length(input);
+    n = 0:N-1;
+    k = n';
+    M = exp(-1j * 2 * pi * k * n / N);
+    output = M * input(:);
+end
+
+% Threshold function
+function output = threshold(input, thresh)
+    output = input .* (abs(input) >= thresh);
+end
+
+% Quantization function
+function output = quantization(input, nlevels, max, min)
+    step = (max - min) / nlevels;
+    output = round((input - min) / step) * step + min;
+end
+
+% Run Length Encoding function
+function [rle, rleLength] = runLenEncoding(input)
+    diffs = diff([input(:); NaN]);
+    runs = find(diffs ~= 0);
+    lengths = diff([0; runs]);
+    values = input(runs);
+    rle = [values, lengths];
+    rle = rle';
+    rle = rle(:);
+    rleLength = length(rle);
+end
+
+% Get element at index function
+function elem = getElemAtIndx(rle, indx)
+    elem = rle(indx);
+end
+
+% Main script
+input = getRangeOfVector(0, INPUT_LENGTH, 1);
+
+fft_result = dft(input);
+
+GetThresholdReal = real(fft_result);
+GetThresholdImg = imag(fft_result);
+
+GetThresholdReal = threshold(GetThresholdReal, THRESHOLD_VAL);
+GetThresholdImg = threshold(GetThresholdImg, THRESHOLD_VAL);
+
+QuantOutReal = quantization(GetThresholdReal, NLEVELS, MAX, MIN);
+QuantOutImg = quantization(GetThresholdImg, NLEVELS, MAX, MIN);
+
+[rLEOutReal, rleLengthReal] = runLenEncoding(QuantOutReal);
+[rLEOutImg, rleLengthImg] = runLenEncoding(QuantOutImg);
+
+final1 = getElemAtIndx(rLEOutReal, 2);
+final2 = getElemAtIndx(rLEOutImg, 1);
+
+fprintf('%f\n', final1);
+fprintf('%f\n', final2);
\ No newline at end of file
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/audioEqualization.m b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/audioEqualization.m
new file mode 100644
index 000000000000..8b3f85c42e18
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/audioEqualization.m
@@ -0,0 +1,57 @@
+% Constants
+INPUT_LENGTH = 100000000;
+pi = 3.14159265359;
+fc = 300;
+Fs = 8000;
+gainForBass = 2;
+gainForMid = 1.5;
+gainForTreble = 0.8;
+wc = 2 * pi * fc / Fs;
+N = 101;
+
+% Input signal
+input = 0:(INPUT_LENGTH-1);
+
+% Low-pass filter
+lpf = lowPassFIRFilter(wc, N);
+hamming_window = hamming(N)';
+lpf_w = lpf .* hamming_window;
+FIRfilterResponseForLpf = conv(input, lpf_w, 'same');
+gainWithLpf = FIRfilterResponseForLpf * gainForBass;
+
+% High-pass filter
+fc2 = 1500;
+wc2 = 2 * pi * fc2 / Fs;
+hpf = highPassFIRFilter(wc2, N);
+hpf_w = hpf .* hamming_window;
+FIRfilterResponseForHpf = conv(input, hpf_w, 'same');
+gainWithHpf = FIRfilterResponseForHpf * gainForTreble;
+
+% Band-pass filter
+lpf2 = lowPassFIRFilter(wc2, N);
+lpf2_w = lpf2 .* hamming_window;
+bpf_w = lpf2_w - lpf_w;
+FIRfilterResponseForBpf = conv(input, bpf_w, 'same');
+gainWithBpf = FIRfilterResponseForBpf * gainForMid;
+
+% Final audio
+final_audio = gainWithLpf + gainWithHpf + gainWithBpf;
+
+% Print results
+fprintf('Element at index 4: %f\n', final_audio(4));
+disp(final_audio);
+
+% Helper functions
+function h = lowPassFIRFilter(wc, length)
+    n = 0:(length-1);
+    mid = (length - 1) / 2;
+    h = zeros(1, length);
+    h(n ~= mid) = sin(wc * (n(n ~= mid) - mid)) ./ (pi * (n(n ~= mid) - mid));
+    h(mid+1) = wc / pi;
+end
+
+function h = highPassFIRFilter(wc, length)
+    lpf = lowPassFIRFilter(wc, length);
+    h = -lpf;
+    h((length+1)/2) = h((length+1)/2) + 1;
+end
\ No newline at end of file
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/biomedicalSignalProcessing.m b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/biomedicalSignalProcessing.m
new file mode 100644
index 000000000000..9212913001f2
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/biomedicalSignalProcessing.m
@@ -0,0 +1,60 @@
+% Constants
+INPUT_LENGTH = 20000000;
+MAX_PEAKS = 1000;
+N = 101;
+
+% Signal parameters
+fc1 = 1000;
+fc2 = 7500;
+Fs = 8000;
+distance = 950;
+f_sig = 500;
+f_noise = 3000;
+
+% Generate input signal
+t = (0:0.000125:(INPUT_LENGTH-1)*0.000125)';
+
+% Generate clean signal
+clean_sig = sin(2*pi*f_sig*t);
+
+% Generate noise
+noise = 0.5 * sin(2*pi*f_noise*t);
+
+% Create noisy signal
+noisy_sig = clean_sig + noise;
+
+% Step 1: FIR Bandpass Filter
+wc1 = 2 * pi * fc1 / Fs;
+wc2 = 2 * pi * fc2 / Fs;
+
+% Design lowpass filters
+n = 0:N-1;
+mid = (N-1)/2;
+lpf1 = (wc1/pi) * sinc(wc1*(n-mid)/pi);
+lpf2 = (wc2/pi) * sinc(wc2*(n-mid)/pi);
+
+% Apply Hamming window
+hamming_window = hamming(N)';
+lpf1_w = lpf1 .* hamming_window;
+lpf2_w = lpf2 .* hamming_window;
+
+% Create bandpass filter
+bpf_w = lpf2_w - lpf1_w;
+
+% Apply bandpass filter
+FIRfilterResponseForBpf = filter(bpf_w, 1, noisy_sig);
+
+% Step 2: Artifact Removal (R-peak detection)
+max_val = max(FIRfilterResponseForBpf);
+height = 0.3 * max_val;
+
+% Find peaks
+[~, r_peaks] = findpeaks(FIRfilterResponseForBpf, 'MinPeakHeight', height, 'MinPeakDistance', distance);
+
+% Calculate heart rate
+diff_val = diff(r_peaks);
+diff_mean = mean(diff_val);
+
+avg_hr = (60 * Fs) / diff_mean;
+
+fprintf('%f\n', avg_hr);
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/digitalModulation.m b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/digitalModulation.m
new file mode 100644
index 000000000000..656fee46c2f3
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/digitalModulation.m
@@ -0,0 +1,58 @@
+% Define constants
+INPUT_LENGTH = 100000000;
+
+% Generate random input data
+data = randi([0 1], 1, INPUT_LENGTH);
+
+% QAM Modulation
+function symbols = qam_modulate(data)
+    symbols = zeros(1, length(data)/2);
+    for i = 1:2:length(data)
+        bit1 = data(i);
+        bit2 = data(i+1);
+        
+        if bit1 == 0 && bit2 == 0
+            symbols((i+1)/2) = -1 - 1i;
+        elseif bit1 == 0 && bit2 == 1
+            symbols((i+1)/2) = -1 + 1i;
+        elseif bit1 == 1 && bit2 == 0
+            symbols((i+1)/2) = 1 - 1i;
+        elseif bit1 == 1 && bit2 == 1
+            symbols((i+1)/2) = 1 + 1i;
+        end
+    end
+end
+
+% QAM Demodulation
+function bits = qam_demodulate(symbols)
+    bits = zeros(1, length(symbols)*2);
+    for i = 1:length(symbols)
+        symbol = symbols(i);
+        
+        if symbol == -1 - 1i
+            bits(2*i-1) = 0;
+            bits(2*i) = 0;
+        elseif symbol == -1 + 1i
+            bits(2*i-1) = 0;
+            bits(2*i) = 1;
+        elseif symbol == 1 - 1i
+            bits(2*i-1) = 1;
+            bits(2*i) = 0;
+        elseif symbol == 1 + 1i
+            bits(2*i-1) = 1;
+            bits(2*i) = 1;
+        end
+    end
+end
+
+% Main script
+rng('shuffle'); % Seed random number generator
+
+% Perform QAM modulation
+symbols = qam_modulate(data);
+
+% Perform QAM demodulation
+bits = qam_demodulate(symbols);
+
+% Print the 6th bit (equivalent to bits[5] in C, as MATLAB uses 1-based indexing)
+disp(bits(6));
\ No newline at end of file
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/dtmfDetection.m b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/dtmfDetection.m
new file mode 100644
index 000000000000..20f4c3517247
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/dtmfDetection.m
@@ -0,0 +1,119 @@
+% DTMF Detection in MATLAB using DFT
+
+% Constants
+SAMPLING_FREQUENCY = 8192;  % Sampling frequency
+DURATION = 0.5;             % Duration of the DTMF signal
+N_SAMPLES = SAMPLING_FREQUENCY * DURATION; % Number of samples for the DTMF signal
+
+% DTMF frequencies
+freqPairs = [
+    941, 1336;   % 0
+    697, 1209;   % 1
+    697, 1336;   % 2
+    697, 1477;   % 3
+    770, 1209;   % 4
+    770, 1336;   % 5
+    770, 1477;   % 6
+    852, 1209;   % 7
+    852, 1336;   % 8
+    852, 1477    % 9
+];
+
+% Main script
+digit = 0; % DTMF digit to be generated
+fs = SAMPLING_FREQUENCY;
+duration = DURATION;
+
+% Generate the DTMF tone
+dtmf_tone = generateDtmf(digit, fs, duration, freqPairs);
+
+% Perform DFT
+[real_out, imag_out] = dft(dtmf_tone);
+
+% Calculate magnitudes and frequencies
+N = length(dtmf_tone);
+magnitudes = sqrt(real_out.^2 + imag_out.^2);
+frequencies = (0:N-1)' * fs / N;
+frequencies(frequencies > fs/2) = frequencies(frequencies > fs/2) - fs;
+
+% Find dominant frequency peaks
+peaks = findDominantPeaks(frequencies, magnitudes);
+
+% Recover the DTMF digit
+recovered_digit = recoverDtmfDigit(peaks, freqPairs);
+
+% Display results
+if recovered_digit >= 0
+    fprintf('Recovered DTMF digit: %d\n', recovered_digit);
+else
+    fprintf('No DTMF digit detected.\n');
+end
+
+% Function definitions
+function dtmf_tone = generateDtmf(digit, fs, duration, freqPairs)
+    f1 = freqPairs(digit + 1, 1);
+    f2 = freqPairs(digit + 1, 2);
+    t = (0:1/fs:duration-1/fs)';
+    dtmf_tone = 10 * (sin(2 * pi * f1 * t) + sin(2 * pi * f2 * t));
+end
+
+function [real_out, imag_out] = dft(signal)
+    N = length(signal);
+    real_out = zeros(N, 1);
+    imag_out = zeros(N, 1);
+    for k = 0:N-1
+        for n = 0:N-1
+            angle = 2 * pi * k * n / N;
+            real_out(k+1) = real_out(k+1) + signal(n+1) * cos(angle);
+            imag_out(k+1) = imag_out(k+1) - signal(n+1) * sin(angle);
+        end
+    end
+end
+
+function peaks = findDominantPeaks(frequencies, magnitudes)
+    max1 = 0; max2 = 0;
+    freq1 = 0; freq2 = 0;
+
+    for i = 1:length(frequencies)
+        currentFreq = frequencies(i);
+        currentMag = magnitudes(i);
+
+        % Check if frequency is positive
+        if currentFreq >= 0
+            % Compare current magnitude with max1
+            if currentMag > max1
+                % Update max2 and freq2 with previous max1 and freq1
+                max2 = max1;
+                freq2 = freq1;
+                % Update max1 and freq1 with current values
+                max1 = currentMag;
+                freq1 = currentFreq;
+            elseif currentMag > max2
+                % Update max2 and freq2 with current values
+                max2 = currentMag;
+                freq2 = currentFreq;
+            end
+        end
+    end
+
+    % Compare freq1 and freq2 to determine the order
+    if freq1 < freq2
+        peaks = [freq1, freq2];
+    else
+        peaks = [freq2, freq1];
+    end
+end
+
+function digit = recoverDtmfDigit(peaks, freqPairs)
+    for i = 1:size(freqPairs, 1)
+        f1 = freqPairs(i, 1);
+        f2 = freqPairs(i, 2);
+
+        if (abs(peaks(1) - f1) < 10 && abs(peaks(2) - f2) < 10) || ...
+           (abs(peaks(1) - f2) < 10 && abs(peaks(2) - f1) < 10)
+            digit = i - 1; % Digit found (subtract 1 because MATLAB is 1-indexed)
+            return;
+        end
+    end
+    digit = -1; % No match found
+end
\ No newline at end of file
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/echoCancellation.m b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/echoCancellation.m
new file mode 100644
index 000000000000..2fae6b58462e
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/echoCancellation.m
@@ -0,0 +1,36 @@
+% Constants
+INPUT_LENGTH = 100000000;
+PI = pi; % MATLAB has pi built-in
+fs = 8000;
+step = 1 / fs;
+
+% Generate input range
+input = (0:step:(INPUT_LENGTH-1)*step)';
+
+% Generate clean signal
+f_sig = 500;
+clean_sig = sin(2 * PI * f_sig * input);
+
+% Generate noise signal with a delay of 2 samples
+noise = [zeros(2, 1); clean_sig(1:end-2)];
+
+% Create noisy signal by adding noise to clean signal
+noisy_sig = clean_sig + noise;
+
+% LMS filter parameters
+mu = 0.01;
+filterSize = 32;
+
+% LMS filter implementation
+w = zeros(filterSize, 1);
+y = zeros(INPUT_LENGTH, 1);
+
+for n = filterSize:INPUT_LENGTH
+    x = noisy_sig(n:-1:n-filterSize+1);
+    y(n) = w' * x;
+    e = clean_sig(n) - y(n);
+    w = w + mu * e * x;
+end
+
+% Print result
+fprintf('%f\n', y);
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/getSize.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/getSize.py
new file mode 100644
index 000000000000..3c5fce6ef316
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/getSize.py
@@ -0,0 +1,112 @@
+import os
+import subprocess
+import pandas as pd
+# The script does the following
+# Input : filename
+# Output : TimeOfExecution for different IP sizes :
+# Steps to run:
+# Open a terminal at the path of the script --
+# Run: python ScriptForCases #3.11 validated
+
+# Pseudo-code:
+# Iterate for all the input-size & update the input value in file
+# Update logic -- change the 2nd parameter of line: var c = getRangeOfVector(init , Count, StepSize)
+# Run the respective commands on the file
+
+# Path to the input file
+# Apps = "noiseCancelling.m" , "echoCancelling.m", "periodogram.m", "lowPassFull.m", "hearingAid.m", "lowPassFIRFilterDesign", "energyOfSignal", "audioEqualizer", "audioCompression","vibrationAnalysis", "underWaterCommunication", "voiceActivityDetection", "signalSmoothing", "targetDetection", "biomedicalSignalProcessing"
+input_files = ["audioCompression", "biomedicalSignalProcessing", "dtmfDetection", "lowPassFIRFilterDesign", "noisecancelling", \
+"radarSignalProcessing", "signalSmoothing", "speakerIdentification", "targetDetection", "vibrationAnalysis", "audioEqualizer", \
+"digitalModulation", "echocancelling", "hearingAid", "lowPassFull", "periodogram2Conv1", "spaceCommunication", "spectralAnalysis", \
+"underWaterCommunication", "voiceActivityDetection"]
+data = []
+
+for input_file in input_files:
+    input_file_path = input_file + ".m"
+    BasePathForLLVM = "/home/local/ASURITE/megan/ForLLVM/"
+    OutputScriptPath = "mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/"
+    mcc_path ="/home/local/ASURITE/apkhedka/Matlab_Installation/bin/mcc"
+    mrt_path ="/home/local/ASURITE/apkhedka/Matlab_Runtime/R2024b/"
+    # OutputPath = BasePathForLLVM + "mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/Output/"
+    print(f"Running Application {input_file_path}")
+    # Construct full output path
+    OutputPath = os.path.join(BasePathForLLVM, OutputScriptPath, "Output")
+
+    # Check if the Output folder exists, create it if it doesn't
+    if not os.path.exists(OutputPath):
+        os.makedirs(OutputPath)
+
+
+    def delete_folder_contents(folder_path):
+        for filename in os.listdir(folder_path):
+            file_path = os.path.join(folder_path, filename)
+            try:
+                if os.path.isfile(file_path) or os.path.islink(file_path):
+                    os.unlink(file_path)
+                elif os.path.isdir(file_path):
+                    shutil.rmtree(file_path)
+            except Exception as e:
+                print(f'Failed to delete {file_path}. Reason: {e}')
+
+    try:
+        with open(input_file_path, "r") as file:
+            lines = file.readlines()
+    except:
+        continue
+
+    print("", end="\t")
+
+    size_test = {"100M": 100000000}
+    for key, value in size_test.items():
+        # Update the specific line in the file
+        # print("Updating for {}".format(value))
+        print("\n{}".format(key), end="\t")
+        with open(input_file_path, "w") as file:
+            for line in lines:
+                if line.strip().startswith("INPUT_LENGTH = "):
+                    updated_line = f"INPUT_LENGTH = {value};\n"
+                    file.write(updated_line)
+                else:
+                    file.write(line)
+
+        command = f"{mcc_path} -m {input_file_path} -d 'Output/' -o {input_file}{key}"
+        print(command)
+        result = subprocess.run(command, shell=True, capture_output=True, text=True)
+
+        command2 = f"size ./Output/{input_file}{key}"
+
+        # Execute the command
+        try:
+            result = subprocess.run(
+                    command2,
+                    shell=True,
+                    capture_output=True, text=True
+                    )
+            
+            output_parts = result.stdout.splitlines()
+            if len(output_parts) > 1:
+                size_data = output_parts[1].split()
+
+                data.append({
+                    "filename": input_file_path,
+                    # "input size" : key,
+                    # "text": size_data[0],
+                    # "data": size_data[1],
+                    # "bss": size_data[2],
+                    # "dec": size_data[3],
+                    # "hex": size_data[4],
+                    "total": sum(map(int, size_data[:4]))
+                })
+        except subprocess.CalledProcessError as exc:
+            print(
+                    f"Process failed because did not return a successful return code. "
+                    f"Returned {exc.returncode}\n{exc}"
+                    )
+
+    df = pd.DataFrame(data)
+    
+    df.to_csv("codesize.csv", index=False)
+
+    delete_folder_contents("./Output")
+
+
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/hearingAid.m b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/hearingAid.m
new file mode 100644
index 000000000000..47208238ddc7
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/hearingAid.m
@@ -0,0 +1,45 @@
+% Define constants
+INPUT_LENGTH = 100000000;
+fs = 8000;
+step = 1 / fs;
+
+% Generate input range
+input = (0:step:(INPUT_LENGTH-1)*step)';
+
+% Generate clean signal
+f_sig = 500;
+clean_sig = sin(2 * pi * f_sig * input);
+
+% Generate noise signal with frequency of 3000 Hz
+f_noise = 3000;
+noise = 0.5 * sin(2 * pi * f_noise * input);
+
+% Create noisy signal by adding noise to clean signal
+noisy_sig = clean_sig + noise;
+
+% LMS filter response function
+function y = lmsFilterResponse(noisy_sig, clean_sig, mu, filterSize)
+    w = zeros(filterSize, 1);
+    y = zeros(size(noisy_sig));
+    
+    for n = 1:length(noisy_sig)
+        x = noisy_sig(max(1, n-filterSize+1):n);
+        x = [zeros(filterSize - length(x), 1); x];
+        y(n) = w' * x;
+        e = clean_sig(n) - y(n);
+        w = w + mu * e * x;
+        y(n) = e;
+    end
+end
+
+% Apply LMS filter
+mu = 0.01;
+filterSize = 32;
+y = lmsFilterResponse(noisy_sig, clean_sig, mu, filterSize);
+
+% Apply final gain factor G1 to the LMS filter output
+G1 = 1002300;
+sol = G1 * y;
+
+% Display 
+disp(sol);
\ No newline at end of file
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/lowPassFiltering.m b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/lowPassFiltering.m
new file mode 100644
index 000000000000..4b5348e6641f
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/lowPassFiltering.m
@@ -0,0 +1,40 @@
+% Define constants
+PI = pi;
+INPUT_LENGTH = 100000000;
+fs = 8000;
+
+% Generate input vector
+input = (0:0.000125:(INPUT_LENGTH-1)*0.000125)';
+
+% Signal processing steps
+f_sig = 500;
+getSinDuration = 2 * PI * f_sig * input;
+clean_sig = sin(getSinDuration);
+
+f_noise = 3000;
+getNoiseSinDuration = 2 * PI * f_noise * input;
+noise = sin(getNoiseSinDuration);
+
+scaled_noise = 0.5 * noise;
+noisy_sig = clean_sig + scaled_noise;
+
+% Filter design
+fc = 1000;
+wc = 2 * PI * fc / fs;
+N = 101;
+
+% Low-pass FIR filter
+n = -(N-1)/2:(N-1)/2;
+lpf = (wc / PI) * sinc(wc * n / PI);
+
+% Hamming window
+hamming = 0.54 - 0.46 * cos(2 * PI * (0:N-1) / (N-1));
+
+% Apply window to filter
+lpf_w = lpf .* hamming;
+
+% Apply FIR filter
+FIRfilterResponse = filter(lpf_w, 1, noisy_sig);
+
+% Display results
+disp(FIRfilterResponse(2));
\ No newline at end of file
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/noiseCancellation.m b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/noiseCancellation.m
new file mode 100644
index 000000000000..be40a7369d23
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/noiseCancellation.m
@@ -0,0 +1,32 @@
+% Constants
+INPUT_LENGTH = 100000000;
+
+% Main script
+t = linspace(0, INPUT_LENGTH * 0.000125, INPUT_LENGTH);
+
+f_sig = 500;
+clean_sig = sin(2 * pi * f_sig * t);
+
+f_noise = 3000;
+noise = 0.5 * sin(2 * pi * f_noise * t);
+
+noisy_sig = clean_sig + noise;
+
+% LMS filter response
+mu = 0.01;
+filterSize = 32;
+
+% Preallocate arrays
+w = zeros(1, filterSize);
+y = zeros(1, INPUT_LENGTH);
+
+% Implement LMS filter
+for n = filterSize:INPUT_LENGTH
+    x = noisy_sig(n:-1:n-filterSize+1);
+    y(n) = w * x';
+    e = clean_sig(n) - y(n);
+    w = w + mu * e * x;
+end
+
+sol = 10 * y;
+fprintf('%f\n', sol);
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/periodogram.m b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/periodogram.m
new file mode 100644
index 000000000000..8484900de377
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/periodogram.m
@@ -0,0 +1,20 @@
+% Define INPUT_LENGTH globally
+INPUT_LENGTH = 10;
+
+% Generate input range
+input = 0:1:(INPUT_LENGTH-1);
+
+% Reverse input
+reverse_input = flip(input);
+
+% FIR Filter Response (Convolution)
+conv1d = conv(input, reverse_input, 'same');
+
+% Compute DFT using FFT
+fft_result = fft(conv1d);
+
+% Compute square magnitude
+sq = abs(fft_result).^2;
+
+% Display results
+fprintf('%f\n', sq);
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/radarSignalProcessing.m b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/radarSignalProcessing.m
new file mode 100644
index 000000000000..3874976f3833
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/radarSignalProcessing.m
@@ -0,0 +1,79 @@
+% Constants
+PI = pi;
+INPUT_LENGTH = 10000;
+
+% Function prototypes
+input = linspace(0, (INPUT_LENGTH - 1) * 0.000125, INPUT_LENGTH); % Using linspace instead of getrangeofvector
+weights = linspace(-90, 180, 4); % Example antenna weights
+antennas = 4;
+input_fc = 5;
+N = 101;
+fc1 = 1000;
+fc2 = 7500;
+Fs = 8000;
+
+% Generate beamformed signal
+signal = beamForm(antennas, input_fc, input, weights);
+
+% Compute absolute values and power profile
+b1 = abs(signal);
+power = b1 .^ 2; % element-wise square instead of power_profile
+
+% Low-pass and high-pass FIR filters with Hamming window
+wc1 = 2 * PI * fc1 / Fs;
+filter1 = lowPassFIRFilter(wc1, N);
+filter_hamming_1 = filter1 .* hamming(N, 'symmetric')'; % Using 'symmetric' Hamming window
+
+wc2 = 2 * PI * fc2 / Fs;
+filter2 = highPassFIRFilter(wc2, N);
+filter_hamming_2 = filter2 .* hamming(N, 'symmetric')'; % Using 'symmetric' Hamming window
+
+% Band-pass filter by subtracting the filters
+bpf = filter_hamming_2 - filter_hamming_1;
+
+% Apply FIR filter to the power profile (use full convolution)
+firFilterResponse = conv(power, bpf, 'full'); % Use 'full' to match C code
+
+% Output final value at the 10000th index (adjust if necessary)
+final = firFilterResponse(2); % Adjust to match desired index in C code
+fprintf('final: %f\n', final);
+
+% Functions
+
+function output = beamForm(antennas, frequency, time, weights)
+    phase_var = 2 * pi * frequency;
+    signal = zeros(antennas, length(time));
+
+    for i = 1:antennas
+        iter_args = (i - 1) * pi / 4.0;
+        signal(i, :) = sin(time * phase_var + iter_args);
+    end
+
+    output = sum(signal .* weights', 1); % Beamforming by weighted summation
+end
+
+function output = lowPassFIRFilter(wc, N)
+    midIndex = (N - 1) / 2;
+    output = zeros(1, N);
+
+    for i = 1:N
+        if i == midIndex + 1
+            output(i) = wc / pi;
+        else
+            output(i) = sin(wc * (i - midIndex - 1)) / (pi * (i - midIndex - 1));
+        end
+    end
+end
+
+function output = highPassFIRFilter(wc, N)
+    midIndex = (N - 1) / 2;
+    output = zeros(1, N);
+
+    for i = 1:N
+        if i == midIndex + 1
+            output(i) = 1 - wc / pi;
+        else
+            output(i) = -sin(wc * (i - midIndex - 1)) / (pi * (i - midIndex - 1));
+        end
+    end
+end
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/signalSmoothing.m b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/signalSmoothing.m
new file mode 100644
index 000000000000..0753b6e78ef9
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/signalSmoothing.m
@@ -0,0 +1,30 @@
+% Define constants
+INPUT_LENGTH = 1000;
+SAMPLE_RATE = 8000;
+step = 0.000125;
+WINDOW_SIZE = 3;
+
+% Generate input range
+input = (0:step:(INPUT_LENGTH-1)*step)';
+
+% Signal parameters
+f_sig = 500;
+f_noise = 3000;
+
+% Generate clean signal
+clean_sig = sin(2*pi*f_sig*input);
+
+% Generate noise
+noise = 0.5 * sin(2*pi*f_noise*input);
+
+% Create noisy signal
+noisy_sig = clean_sig + noise;
+
+% Apply median filter
+median_filtered = medfilt1(noisy_sig, WINDOW_SIZE);
+
+% Apply moving average filter
+avg_filtered = movmean(median_filtered, WINDOW_SIZE);
+
+% Print the 4th element of the final result
+disp(avg_filtered(4));
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/spaceCommunication.m b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/spaceCommunication.m
new file mode 100644
index 000000000000..6dbc04445c1e
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/spaceCommunication.m
@@ -0,0 +1,95 @@
+
+
+function main()
+    % Define constants
+    INPUT_LENGTH = 100000000;
+    
+    % Generate input vector
+    input = getRangeOfVector(0, INPUT_LENGTH, 1);
+    
+    % Threshold
+    binary_sig = thresholdUp(input, INPUT_LENGTH, 50);
+    
+    % Modulate
+    modulated_signal = space_modulate(binary_sig, INPUT_LENGTH);
+    
+    % Transmit and receive (add noise)
+    received_signal = transmit_and_receive(modulated_signal, INPUT_LENGTH, 1.0);
+    
+    % Demodulate
+    demodulated_data = demodulate(received_signal, INPUT_LENGTH);
+    
+    % Error correction
+    corrected_data = error_correction(demodulated_data);
+    
+    % Decode data
+    decoded_data = decode_data(corrected_data);
+    
+    % Display first corrected byte (equivalent to printing corrected_data[8] in C)
+    fprintf('%c\n', corrected_data(9));
+end
+
+% Function to generate a vector with a given range and increment
+function vector = getRangeOfVector(start, length, increment)
+    vector = start:increment:(start + (length - 1) * increment);
+end
+
+% Thresholding function (creates a binary string from a vector)
+function output = thresholdUp(input, length, threshold)
+    output = char(zeros(1, length));  % Preallocate output
+    output(input > threshold) = '1';
+    output(input <= threshold) = '0';
+end
+
+% Space modulation: convert binary string to modulated signal
+function output = space_modulate(input, length)
+    output = zeros(1, length);
+    output(input == '1') = 1;
+    output(input == '0') = -1;
+end
+
+% Transmit and receive (add noise based on sine of the signal)
+function received_signal = transmit_and_receive(signal, length, noise_level)
+    received_signal = signal + sin(signal);  % Add noise (sine-based in this case)
+end
+
+% Demodulate: convert received signal back into binary data
+function demodulated_data = demodulate(signal, length)
+    demodulated_data = char(zeros(1, length));
+    demodulated_data(signal > 0) = '1';
+    demodulated_data(signal <= 0) = '0';
+end
+
+% Error correction function
+function corrected = error_correction(data)
+    length = numel(data);
+    corrected = char(zeros(1, length));  % Preallocate corrected array
+    corrected_index = 1;
+    
+    for i = 1:8:length
+        segment = data(i:i+7);
+        count = sum(segment == '1');
+        
+        if mod(count, 2) == 0
+            corrected(corrected_index:corrected_index+7) = segment;
+        else
+            corrected(corrected_index) = '0';
+            corrected(corrected_index+1:corrected_index+7) = segment(2:8);
+        end
+        
+        corrected_index = corrected_index + 8;
+    end
+end
+
+% Decode binary data to ASCII characters
+function decoded = decode_data(binary)
+    length = numel(binary);
+    decoded = char(zeros(1, length / 8));  % Preallocate decoded data array
+    decoded_index = 1;
+    
+    for i = 1:8:length
+        byte = binary(i:i+7);
+        decoded(decoded_index) = char(bin2dec(byte));
+        decoded_index = decoded_index + 1;
+    end
+end
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/speakerIdentification.m b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/speakerIdentification.m
new file mode 100644
index 000000000000..bf7e9a18991b
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/speakerIdentification.m
@@ -0,0 +1,46 @@
+% Function to generate voice signature (sinusoidal wave with two frequencies)
+function signal = generateVoiceSignature(freq1, freq2, duration, sample_rate)
+    t = linspace(0, duration, sample_rate * duration);
+    signal = sin(2 * pi * freq1 * t) + cos(2 * pi * freq2 * t);
+end
+
+% Function to compute the dot product (correlation) between two signals
+function result = correlate(signal1, signal2)
+    result = sum(signal1 .* signal2);
+end
+
+% Main function
+function main()
+    % Sample rate and duration
+    sample_rate = 1000;
+    duration = 1;
+    
+    % Generate voice signatures for Alice, Bob, Charlie
+    person1 = generateVoiceSignature(100, 200, duration, sample_rate); % Alice
+    person2 = generateVoiceSignature(150, 250, duration, sample_rate); % Bob
+    person3 = generateVoiceSignature(120, 180, duration, sample_rate); % Charlie
+    
+    % Generate an unknown signal (Bob's signature in this case)
+    unknown_signal = generateVoiceSignature(150, 250, duration, sample_rate); % Change this to test
+    
+    % Correlate unknown signal with each person's signature
+    max1 = correlate(person1, unknown_signal);
+    max2 = correlate(person2, unknown_signal);
+    max3 = correlate(person3, unknown_signal);
+    
+    % Store correlation results
+    total_maxes = [max1, max2, max3];
+    
+    % Find the index of the maximum correlation result
+    [max_value, max_index] = max(total_maxes);
+    
+    % Output results
+    fprintf('Max Index: %d\n', max_index);
+    fprintf('Max Value: %f\n', max_value);
+    fprintf('Correlation with Alice: %f\n', max1);
+    fprintf('Correlation with Bob: %f\n', max2);
+    fprintf('Correlation with Charlie: %f\n', max3);
+end
+
+% Call the main function
+main();
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/spectralAnalysis.m b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/spectralAnalysis.m
new file mode 100644
index 000000000000..787269371ac3
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/spectralAnalysis.m
@@ -0,0 +1,22 @@
+% Constants
+INPUT_LENGTH = 100000000;
+
+% getRange function
+input = getRange(0, INPUT_LENGTH, 1);
+
+% DFT function (using built-in FFT)
+fft_result = fft(input);
+
+% Square of absolute values
+sq_abs = abs(fft_result).^2;
+
+% Sum and average
+res = mean(sq_abs);
+
+% Display result
+fprintf('%f\n', res);
+
+%  getRange function
+function output = getRange(start, noOfSamples, increment)
+    output = start + (0:noOfSamples-1) * increment;
+end
\ No newline at end of file
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/targetDetection.m b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/targetDetection.m
new file mode 100644
index 000000000000..fda717133060
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/targetDetection.m
@@ -0,0 +1,51 @@
+% Constants
+INPUT_LENGTH = 100000000;
+MAX_PEAKS = 100;
+
+% Generate input range
+input = (0:0.000125:(INPUT_LENGTH-1)*0.000125)';
+
+% Generate signals
+getMultiplier = 2 * pi * 10;
+getSinDuration = input * getMultiplier;
+sig1 = sin(getSinDuration);
+
+getMultiplier2 = 2 * pi * 20;
+getSinDuration2 = input * getMultiplier2;
+sinsig2 = sin(getSinDuration2);
+sig2 = 0.5 * sinsig2;
+
+% Combine signals
+signal = sig1 + sig2;
+
+% Add delayed noise
+noise = [zeros(5, 1); signal(1:end-5)];
+noisy_sig = signal + noise;
+
+% LMS Filter
+mu = 0.01;
+filterSize = 20;
+y = lmsFilterResponse(noisy_sig, signal, mu, filterSize);
+
+% Find peaks
+[peaks, ~] = findpeaks(signal, 'MinPeakHeight', 1, 'MinPeakDistance', 50);
+
+% Display results
+fprintf('%d %d\n', peaks(2), peaks(3));
+
+
+% LMS Filter Response Function
+function output = lmsFilterResponse(noisy_sig, clean_sig, mu, filterSize)
+    length = numel(noisy_sig);
+    w = zeros(filterSize, 1);
+    output = zeros(length, 1);
+    
+    for n = 1:length
+        x = noisy_sig(max(1, n-filterSize+1):n);
+        x = [zeros(filterSize - numel(x), 1); x];
+        y = w' * x;
+        e = clean_sig(n) - y;
+        w = w + mu * e * x;
+        output(n) = e;
+    end
+end
\ No newline at end of file
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/underWaterCommunication.m b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/underWaterCommunication.m
new file mode 100644
index 000000000000..bd4e3f04fe17
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/underWaterCommunication.m
@@ -0,0 +1,79 @@
+% Constants
+INPUT_LENGTH = 100000000;
+FILTER_ORDER = 5;
+
+% Sampling frequency
+fs = 1000;
+
+% Generate input vector
+input = getRangeOfVector(0, INPUT_LENGTH, 1);
+
+% Gain calculation
+getMultiplier = 2 * pi * 5;
+getSinDuration = gain(input, getMultiplier);
+
+% Sine wave generation
+signal = sine(getSinDuration);
+
+% Adding delay (noise)
+noise = delay(signal, 5);
+
+% Adding signal and noise
+noisy_sig = add(signal, noise);
+
+% Low-pass filter parameters
+fc = 1000;
+wc = 2 * pi * fc / 500;  % wc should vary from 0 to pi
+
+% Low-pass FIR filter design
+lpf = lowPassFIRFilter(wc, FILTER_ORDER);
+hamming_window = hamming(FILTER_ORDER);
+
+% Apply Hamming window to the filter
+lpf_w = lpf .* hamming_window;
+
+% FIR filter response
+FIRfilterResponse = FIRFilterResponse(noisy_sig, lpf_w);
+
+% Thresholding operation
+threshold = 0.5;
+GetThresholdReal = thresholdUp(FIRfilterResponse, threshold, 0);
+
+% Display the result
+disp(GetThresholdReal(3));
+
+% Function implementations
+
+function vector = getRangeOfVector(start, length, increment)
+    vector = (start : increment : start + (length-1)*increment)';
+end
+
+function output = gain(input, multiplier)
+    output = input * multiplier;
+end
+
+function output = sine(input)
+    output = sin(input);
+end
+
+function output = delay(input, delaySamples)
+    output = [zeros(delaySamples, 1); input(1:end-delaySamples)];
+end
+
+function output = add(input1, input2)
+    output = input1 + input2;
+end
+
+function filter = lowPassFIRFilter(wc, length)
+    n = (-(length-1)/2:(length-1)/2)';
+    filter = wc/pi * sinc(wc/pi * n);
+end
+
+function output = FIRFilterResponse(input, filter)
+    output = conv(input, filter, 'same');
+end
+
+function output = thresholdUp(input, threshold, defaultValue)
+    output = max(input, threshold);
+    output(output == threshold) = defaultValue;
+end
\ No newline at end of file
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/vibrationAnalysis.m b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/vibrationAnalysis.m
new file mode 100644
index 000000000000..934c4e9501a2
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/vibrationAnalysis.m
@@ -0,0 +1,39 @@
+% Constants
+INPUT_LENGTH = 10000000;
+fs = 1000;
+
+% Generate input signal
+input = 0:(INPUT_LENGTH-1);
+
+% Generate first sinusoidal signal
+getMultiplier = 2 * pi * 50;
+getSinDuration = input * getMultiplier;
+sig1 = sin(getSinDuration);
+
+% Generate second sinusoidal signal
+getMultiplier2 = 2 * pi * 120;
+getSinDuration2 = input * getMultiplier2;
+sig2 = 0.5 * sin(getSinDuration2);
+
+% Combine signals
+signal = sig1 + sig2;
+
+% Add delayed noise
+noise = [zeros(1, 5), signal(1:end-5)];
+noisy_sig = signal + noise;
+
+% Perform DFT
+dft_output = fft(noisy_sig);
+
+% Calculate squared magnitude
+sq_abs = abs(dft_output).^2;
+
+% Calculate mean
+res = mean(sq_abs);
+
+% Apply threshold
+threshold_value = 0.2;
+GetThresholdReal = sq_abs .* (sq_abs >= threshold_value);
+
+% Display results
+disp(GetThresholdReal);
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/voiceActivityDetection.m b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/voiceActivityDetection.m
new file mode 100644
index 000000000000..a3cc47b620c8
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/BenchmarkTest/Matlab/voiceActivityDetection.m
@@ -0,0 +1,57 @@
+% Constants
+INPUT_LENGTH = 100000000;
+
+% Main script
+fs = 1000;
+input = getRangeOfVector(0, INPUT_LENGTH, 1);
+
+getMultiplier = 2 * pi * 5;
+getSinDuration = gain(input, getMultiplier);
+
+signal = sine(getSinDuration);
+
+noise = delay(signal, 5);
+
+noisy_sig = add(signal, noise);
+
+threshold_value = 0.8;
+GetThresholdReal = threshold(noisy_sig, threshold_value);
+
+zcr = zeroCrossCount(GetThresholdReal);
+
+% Display results
+disp(GetThresholdReal(4));
+
+% Print zero-crossing count
+fprintf('Zero-crossing count: %d\n', zcr);
+
+% Function implementations
+function vector = getRangeOfVector(start, length, increment)
+    vector = (start : increment : start + (length-1)*increment)';
+end
+
+function output = gain(input, multiplier)
+    output = input * multiplier;
+end
+
+function output = sine(input)
+    output = sin(input);
+end
+
+function output = delay(input, delaySamples)
+    output = [zeros(delaySamples, 1); input(1:end-delaySamples)];
+end
+
+function output = add(input1, input2)
+    output = input1 + input2;
+end
+
+function output = threshold(input, thresholdValue)
+    output = input;
+    output(abs(input) < thresholdValue) = 0;
+end
+
+function count = zeroCrossCount(input)
+    signs = sign(input);
+    count = sum(abs(diff(signs)) == 2);
+end
\ No newline at end of file
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/DTMFToneDetection.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/DTMFToneDetection.py
new file mode 100644
index 000000000000..c271fe777e77
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/DTMFToneDetection.py
@@ -0,0 +1,43 @@
+def main() {
+  # GENERATE SIGNAL FOR '5'
+  var fs = 8000;
+  var duration = 0.5;
+  var f1 = 770;
+  var f2 = 1336;
+#   # var step = 1/fs; 
+#   # print(step);
+#   # total instances = fs * duration
+#   var total_instances = fs * duration;
+#   var t = getRangeOfVector(0,4000,0.000125);
+#   var pi = 3.14159265359;
+#   var getMultiplier = 2 * pi * f1;
+#   var getSinDuration = gain(t, getMultiplier);
+#   var sig1 = sin(getSinDuration);
+ 
+
+#   var getMultiplier2 = 2 * pi * f2;
+#   var getSinDuration2 = gain(t, getMultiplier2);
+#   var sig2 = sin(getSinDuration2);
+#   var signal = sig1 + sig2;
+#   var finalsig = gain(signal, 0.5);
+  
+
+
+#   var noise = delay(signal, 5);
+#   var noisy_sig = signal + noise;
+#   var threshold = 4;
+  
+#   var fft_real = fft1dreal(noisy_sig);
+#   var fft_img = fft1dimg(noisy_sig);
+
+#   var magnitude = square(fft_real) + square(fft_img);
+# print(magnitude);  
+#   # res = gain(sum , 1/N)
+#   var len1 = len(t);
+#   # var res = sum1 / len1;
+#   # print(sq_abs);
+#   var GetThresholdReal = threshold( magnitude , threshold);
+var dtmf_sig = generateDtmf(5,duration,fs);
+print(dtmf_sig);
+
+}
\ No newline at end of file
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/ExtractOpName.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/ExtractOpName.py
similarity index 90%
rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/ExtractOpName.py
rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/ExtractOpName.py
index a7962eb4c8c0..6fe49536d05d 100644
--- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/ExtractOpName.py
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/ExtractOpName.py
@@ -2,9 +2,9 @@
 import os
 
 fileNamePath = "mlir/examples/dsp/SimpleBlocks/include/toy/Ops.td"
-BasePathForLLVM = "/mnt/sharedDrive/SourceCode/llvm-project/"
+# BasePathForLLVM = "/mnt/sharedDrive/SourceCode/llvm-project/"
 # OutputScriptPath = "mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/"
-
+BasePathForLLVM = "/home/local/ASURITE/apkhedka/ForLLVM/"
 fileName = BasePathForLLVM + fileNamePath
 print(fileName)
 # Create 'Output' folder if it doesn't exist
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/HammingWindow.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/HammingWindow.py
similarity index 100%
rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/HammingWindow.py
rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/HammingWindow.py
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/LMSNoiseFilter.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/LMSNoiseFilter.py
similarity index 100%
rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/LMSNoiseFilter.py
rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/LMSNoiseFilter.py
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/FIRFilterHammingOpt.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/FIRFilterHammingOpt.py
similarity index 100%
rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/FIRFilterHammingOpt.py
rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/FIRFilterHammingOpt.py
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/ScriptForMlirAffine.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/ScriptForMlirAffine.py
similarity index 100%
rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/ScriptForMlirAffine.py
rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/ScriptForMlirAffine.py
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/ScriptForSingleRun.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/ScriptForSingleRun.py
similarity index 100%
rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/ScriptForSingleRun.py
rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/ScriptForSingleRun.py
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/PyDSL/audioCompression.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/audioCompression.py
similarity index 100%
rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/PyDSL/audioCompression.py
rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/audioCompression.py
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/back2backDelay.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/back2backDelay.py
similarity index 100%
rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/back2backDelay.py
rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/back2backDelay.py
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/PyDSL/energyOfSignal.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/energyOfSignal.py
similarity index 99%
rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/PyDSL/energyOfSignal.py
rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/energyOfSignal.py
index 974d81365d2c..673f4dc80941 100644
--- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/PyDSL/energyOfSignal.py
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/energyOfSignal.py
@@ -33,8 +33,5 @@ def main() {
   print(res);
   # var final1 = getElemAtIndx(fft_real , [6]); 
   # print(final1);
-
-
-
 }
 
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/firFilter10.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/firFilter10.py
similarity index 100%
rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/firFilter10.py
rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/firFilter10.py
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/PyDSL/lowPassFIRFilterDesign1.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/lowPassFIRFilterDesign1.py
similarity index 100%
rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/PyDSL/lowPassFIRFilterDesign1.py
rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/lowPassFIRFilterDesign1.py
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/PyDSL/lowPassFull1.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/lowPassFull1.py
similarity index 100%
rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/PyDSL/lowPassFull1.py
rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/lowPassFull1.py
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/PyDSL/noisecancelling.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/noisecancelling.py
similarity index 100%
rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/PyDSL/noisecancelling.py
rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/noisecancelling.py
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/periodogram2Conv.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/periodogram2Conv.py
similarity index 100%
rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/periodogram2Conv.py
rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/PyDSL/periodogram2Conv.py
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/Quantization.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/Quantization.py
similarity index 100%
rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/Quantization.py
rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/Quantization.py
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/TryHearingAid copy.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/TryHearingAid copy.py
similarity index 100%
rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/TryHearingAid copy.py
rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/TryHearingAid copy.py
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/TryHearingAid.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/TryHearingAid.py
similarity index 100%
rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/TryHearingAid.py
rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/TryHearingAid.py
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/audioEqualizer.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/audioEqualizer.py
similarity index 100%
rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/audioEqualizer.py
rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/audioEqualizer.py
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/bandPassfilter.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/bandPassfilter.py
similarity index 100%
rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/bandPassfilter.py
rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/bandPassfilter.py
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/filterDesign.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/filterDesign.py
similarity index 100%
rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/filterDesign.py
rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/filterDesign.py
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/hearingAid.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/hearingAid.py
similarity index 100%
rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/hearingAid.py
rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/hearingAid.py
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/highPassfilter.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/highPassfilter.py
similarity index 100%
rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/highPassfilter.py
rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/highPassfilter.py
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/lmsNoiseCancelling.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/lmsNoiseCancelling.py
similarity index 100%
rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/lmsNoiseCancelling.py
rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/lmsNoiseCancelling.py
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/lowPassFilterApp.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/lowPassFilterApp.py
similarity index 100%
rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/lowPassFilterApp.py
rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/lowPassFilterApp.py
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/periodogramHelp.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/periodogramHelp.py
similarity index 100%
rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/periodogramHelp.py
rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/periodogramHelp.py
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/periodogramHelp2.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/periodogramHelp2.py
similarity index 100%
rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/periodogramHelp2.py
rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/ExamplePythonApps/periodogramHelp2.py
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/ScriptSteps.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/HelperScripts/ScriptSteps.py
similarity index 100%
rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/ScriptSteps.py
rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/HelperScripts/ScriptSteps.py
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/generate_dense_inputs.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/HelperScripts/generate_dense_inputs.py
similarity index 100%
rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/generate_dense_inputs.py
rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/HelperScripts/generate_dense_inputs.py
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/matlab_result.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/HelperScripts/matlab_result.py
similarity index 100%
rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/matlab_result.py
rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/HelperScripts/matlab_result.py
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/working_slidingwind.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/HelperScripts/working_slidingwind.py
similarity index 100%
rename from mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/working_slidingwind.py
rename to mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/HelperScripts/working_slidingwind.py
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/CCode/audioCompression.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/CCode/audioCompression.c
deleted file mode 100644
index 24bcd1f4030b..000000000000
--- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/CCode/audioCompression.c
+++ /dev/null
@@ -1,107 +0,0 @@
-#include <stdio.h>
-#include <math.h>
-#include <stdlib.h>
-
-void getRangeOfVector(double* input, int start, int NoOfElements, double Increment) {
-    for (int i = 0; i < NoOfElements; i++) {
-        input[i] = start + i * Increment;
-    }
-}
-
-void dftReal(double* real, double* input, int length) {
-    for (int k = 0; k < length; k++) {
-        real[k] = 0;
-        for (int n = 0; n < length; n++) {
-            double angle = 2 * M_PI * k * n / length;
-            real[k] += input[n] * cos(angle);
-        }
-    }
-}
-
-void dftImag(double* imag, double* input, int length) {
-    for (int k = 0; k < length; k++) {
-        imag[k] = 0;
-        for (int n = 0; n < length; n++) {
-            double angle = 2 * M_PI * k * n / length;
-            imag[k] -= input[n] * sin(angle);
-        }
-    }
-}
-
-void threshold(double* output, double* input, double thresh, int length) {
-    for (int i = 0; i < length; i++) {
-        if (input[i] >= thresh || input[i] <= -thresh) {
-            output[i] = input[i];
-        } else {
-            output[i] = 0;
-        }
-    }
-}
-
-void quantization(double* output, double* input, int nlevels, double max, double min, int length) {
-    double step = (max - min) / nlevels;
-    for (int i = 0; i < length; i++) {
-        output[i] = round((input[i] - min) / step) * step + min;
-    }
-}
-
-int* runLenEncoding(double* input, int length, int* rleLength) {
-    int* rle = (int*)malloc(length * sizeof(int));
-    int index = 0;
-    for (int i = 1; i < length; i++) {
-        if (input[i] != input[i - 1]) {
-            rle[index++] = input[i - 1];
-            rle[index++] = 1;
-        } else {
-            rle[index - 1]++;
-        }
-    }
-    *rleLength = index;
-    return rle;
-}
-
-double getElemAtIndx(int* rle, int indx) {
-    return rle[indx];
-}
-
-int main() {
-    int input_length = 50000;
-    double input[50000];
-    getRangeOfVector(input, 0, input_length, 1);
-
-    int nlevels = 16;
-    double min = 0;
-    double max = 8;
-
-    double threshold_val = 4;
-
-    double fft10real[50000];
-    double fft10img[50000];
-    
-    dftReal(fft10real, fft10img, input, input_length);
-    dftImag(fft10real, fft10img, input, input_length);
-
-    double GetThresholdReal[50000];
-    double GetThresholdImg[50000];
-    threshold(GetThresholdReal, fft10real, threshold_val, input_length);
-    threshold(GetThresholdImg, fft10img, threshold_val, input_length);
-
-    double QuantOutReal[50000];
-    double QuantOutImg[50000];
-    quantization(QuantOutReal, GetThresholdReal, nlevels, max, min, input_length);
-    quantization(QuantOutImg, GetThresholdImg, nlevels, max, min, input_length);
-
-    int rleLengthReal, rleLengthImg;
-    int* rLEOutReal = runLenEncoding(QuantOutReal, input_length, &rleLengthReal);
-    int* rLEOutImg = runLenEncoding(QuantOutImg, input_length, &rleLengthImg);
-
-    double final1 = getElemAtIndx(rLEOutReal, 6);
-    double final2 = getElemAtIndx(rLEOutImg, 7);
-    printf("%f\n", final1);
-    printf("%f\n", final2);
-
-    free(rLEOutReal);
-    free(rLEOutImg);
-
-    return 0;
-}
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/CCode/energyOfSignal.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/CCode/energyOfSignal.c
deleted file mode 100644
index f95be8171a11..000000000000
--- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/CCode/energyOfSignal.c
+++ /dev/null
@@ -1,75 +0,0 @@
-#include <stdio.h>
-#include <math.h>
-
-void getRangeOfVector(double* input, int start, int NoOfElements, double Increment) {
-    for (int i = 0; i < NoOfElements; i++) {
-        input[i] = start + i * Increment;
-    }
-}
-
-void dftReal(double* real, double* input, int length) {
-    for (int k = 0; k < length; k++) {
-        real[k] = 0;
-        for (int n = 0; n < length; n++) {
-            double angle = 2 * M_PI * k * n / length;
-            real[k] += input[n] * cos(angle);
-        }
-    }
-}
-
-void dftImag(double* imag, double* input, int length) {
-    for (int k = 0; k < length; k++) {
-        imag[k] = 0;
-        for (int n = 0; n < length; n++) {
-            double angle = 2 * M_PI * k * n / length;
-            imag[k] -= input[n] * sin(angle);
-        }
-    }
-}
-
-void square(double* output, double* input, int length) {
-    for (int i = 0; i < length; i++) {
-        output[i] = input[i] * input[i];
-    }
-}
-
-double sum(double* input, int length) {
-    double total = 0;
-    for (int i = 0; i < length; i++) {
-        total += input[i];
-    }
-    return total;
-}
-
-int len(double* input) {
-    return sizeof(input) / sizeof(input[0]);
-}
-
-int main() {
-    int input_length = 10;
-    double input[10];
-    getRangeOfVector(input, 0, input_length, 1);
-
-    double fft_real[10];
-    double fft_img[10];
-    dftReal(fft_real, input, input_length);
-    dftImag(fft_img, input, input_length);
-
-    double sq_real[10];
-    double sq_img[10];
-    square(sq_real, fft_real, input_length);
-    square(sq_img, fft_img, input_length);
-
-    double sq_abs[10];
-    for (int i = 0; i < input_length; i++) {
-        sq_abs[i] = sq_real[i] + sq_img[i];
-    }
-
-    double sum1 = sum(sq_abs, input_length);
-    int len1 = input_length;
-    double res = sum1 / len1;
-
-    printf("%f\n", res);
-
-    return 0;
-}
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/CCode/lowPassFIRFilterDesign.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/CCode/lowPassFIRFilterDesign.c
deleted file mode 100644
index 6b8d6b49b9d4..000000000000
--- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/CCode/lowPassFIRFilterDesign.c
+++ /dev/null
@@ -1,53 +0,0 @@
-#include <stdio.h>
-#include <math.h>
-#include <stdlib.h>
-
-void hamming(double* window, int N) {
-    for (int i = 0; i < N; i++) {
-        window[i] = 0.54 - 0.46 * cos(2 * M_PI * i / (N - 1));
-    }
-}
-
-void lowPassFIRFilter(double* filter, double wc, int N) {
-    int mid = (N - 1) / 2;
-    for (int n = 0; n < N; n++) {
-        if (n == mid) {
-            filter[n] = wc / M_PI;
-        } else {
-            filter[n] = sin(wc * (n - mid)) / (M_PI * (n - mid));
-        }
-    }
-}
-
-void elementWiseMultiplication(double* output, double* array1, double* array2, int N) {
-    for (int i = 0; i < N; i++) {
-        output[i] = array1[i] * array2[i];
-    }
-}
-
-double getElemAtIndx(double* array, int index) {
-    return array[index];
-}
-
-int main() {
-    int N = 51;
-    double pi = 3.14159265359;
-    double fc1 = 500;
-    double Fs = 8000;
-    double wc1 = 2 * pi * fc1 / Fs;
-
-    double lpf[51];
-    lowPassFIRFilter(lpf, wc1, N);
-
-    double hamming_window[51];
-    hamming(hamming_window, N);
-
-    double lpf_w[51];
-    elementWiseMultiplication(lpf_w, lpf, hamming_window, N);
-
-    double final1 = getElemAtIndx(lpf_w, 6);
-
-    printf("%f\n", final1);
-
-    return 0;
-}
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/CCode/lowPassFull1.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/CCode/lowPassFull1.c
deleted file mode 100644
index f00a7f8c1bcf..000000000000
--- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/CCode/lowPassFull1.c
+++ /dev/null
@@ -1,110 +0,0 @@
-#include <stdio.h>
-#include <math.h>
-
-#define PI 3.14159265359
-
-void getRangeOfVector(double* input, int start, int NoOfElements, double Increment) {
-    for (int i = 0; i < NoOfElements; i++) {
-        input[i] = start + i * Increment;
-    }
-}
-
-void gain(double* output, double* input, double multiplier, int length) {
-    for (int i = 0; i < length; i++) {
-        output[i] = input[i] * multiplier;
-    }
-}
-
-void elementWiseAdd(double* output, double* input1, double* input2, int length) {
-    for (int i = 0; i < length; i++) {
-        output[i] = input1[i] + input2[i];
-    }
-}
-
-void elementWiseMultiply(double* output, double* input1, double* input2, int length) {
-    for (int i = 0; i < length; i++) {
-        output[i] = input1[i] * input2[i];
-    }
-}
-
-void lowPassFIRFilter(double* lpf, double wc, int N) {
-    int mid = (N - 1) / 2;
-    for (int n = 0; n < N; n++) {
-        if (n == mid) {
-            lpf[n] = wc / PI;
-        } else {
-            lpf[n] = (wc / PI) * sin(wc * (n - mid)) / (wc * (n - mid));
-        }
-    }
-}
-
-void hammingWindow(double* hamming, int N) {
-    for (int n = 0; n < N; n++) {
-        hamming[n] = 0.54 - 0.46 * cos(2 * PI * n / (N - 1));
-    }
-}
-
-void FIRFilterResponse(double* output, double* input, double* filter, int input_length, int filter_length) {
-    int i, j;
-    for (i = 0; i < input_length; i++) {
-        output[i] = 0;
-        for (j = 0; j < filter_length; j++) {
-            if (i - j >= 0) {
-                output[i] += input[i - j] * filter[j];
-            }
-        }
-    }
-}
-
-int main() {
-    int fs = 8000;
-    int input_length = 30;
-    double input[30];
-    getRangeOfVector(input, 0, input_length, 0.000125);
-
-    double f_sig = 500;
-    double getMultiplier = 2 * PI * f_sig;
-
-    double getSinDuration[30];
-    gain(getSinDuration, input, getMultiplier, input_length);
-    
-    double clean_sig[30];
-    for (int i = 0; i < input_length; i++) {
-        clean_sig[i] = sin(getSinDuration[i]);
-    }
-
-    double f_noise = 3000;
-    double getNoiseSinDuration[30];
-    gain(getNoiseSinDuration, input, 2 * PI * f_noise, input_length);
-    
-    double noise[30];
-    for (int i = 0; i < input_length; i++) {
-        noise[i] = sin(getNoiseSinDuration[i]);
-    }
-
-    double noisy_sig[30];
-    double scaled_noise[30];
-    gain(scaled_noise, noise, 0.5, input_length);
-    elementWiseAdd(noisy_sig, clean_sig, scaled_noise, input_length);
-
-    double fc = 1000;
-    double wc = 2 * PI * fc / fs;
-    int N = 101;
-    double lpf[101];
-    lowPassFIRFilter(lpf, wc, N);
-
-    double hamming[101];
-    hammingWindow(hamming, N);
-
-    double lpf_w[101];
-    elementWiseMultiply(lpf_w, lpf, hamming, N);
-
-    double FIRfilterResponse[30];
-    FIRFilterResponse(FIRfilterResponse, noisy_sig, lpf_w, input_length, N);
-
-    for (int i = 0; i < input_length; i++) {
-        printf("%f\n", FIRfilterResponse[i]);
-    }
-
-    return 0;
-}
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/CCode/noisecancelling.c b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/CCode/noisecancelling.c
deleted file mode 100644
index 235d64705c05..000000000000
--- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/CCode/noisecancelling.c
+++ /dev/null
@@ -1,91 +0,0 @@
-#include <stdio.h>
-#include <math.h>
-
-void getRangeOfVector(double* vector, double start, int length, double increment) {
-    for (int i = 0; i < length; i++) {
-        vector[i] = start + i * increment;
-    }
-}
-
-void gain(double* output, double* input, double multiplier, int length) {
-    for (int i = 0; i < length; i++) {
-        output[i] = input[i] * multiplier;
-    }
-}
-
-void sine(double* output, double* input, int length) {
-    for (int i = 0; i < length; i++) {
-        output[i] = sin(input[i]);
-    }
-}
-
-void add(double* output, double* input1, double* input2, int length) {
-    for (int i = 0; i < length; i++) {
-        output[i] = input1[i] + input2[i];
-    }
-}
-
-void lmsFilterResponse(double* output, double* noisy_sig, double* clean_sig, double mu, int filterSize, int length) {
-    double w[32] = {0};
-    for (int n = 0; n < length; n++) {
-        double y = 0;
-        for (int i = 0; i < filterSize; i++) {
-            if (n - i >= 0) {
-                y += w[i] * noisy_sig[n - i];
-            }
-        }
-        double e = clean_sig[n] - y;
-        for (int i = 0; i < filterSize; i++) {
-            if (n - i >= 0) {
-                w[i] += mu * e * noisy_sig[n - i];
-            }
-        }
-        output[n] = y;
-    }
-}
-
-int main() {
-    int length = 100;
-    double fs = 8000;
-    double t[100];
-    getRangeOfVector(t, 0, length, 0.000125);
-
-    double f_sig = 500;
-    double pi = 3.14159265359;
-    double getMultiplier = 2 * pi * f_sig;
-
-    double getSinDuration[100];
-    gain(getSinDuration, t, getMultiplier, length);
-
-    double clean_sig[100];
-    sine(clean_sig, getSinDuration, length);
-
-    double f_noise = 3000;
-    double getNoiseMultiplier = 2 * pi * f_noise;
-
-    double getNoiseSinDuration[100];
-    gain(getNoiseSinDuration, t, getNoiseMultiplier, length);
-
-    double noise[100];
-    sine(noise, getNoiseSinDuration, length);
-
-    double noise1[100];
-    gain(noise1, noise, 0.5, length);
-
-    double noisy_sig[100];
-    add(noisy_sig, clean_sig, noise1, length);
-
-    double mu = 0.01;
-    int filterSize = 32;
-    double y[100];
-    lmsFilterResponse(y, noisy_sig, clean_sig, mu, filterSize, length);
-
-    double sol[100];
-    gain(sol, y, 10, length);
-
-    for (int i = 0; i < length; i++) {
-        printf("%f\n", sol[i]);
-    }
-
-    return 0;
-}
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/CountLinesFile.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/CountLinesFile.py
deleted file mode 100644
index 565e5d680e96..000000000000
--- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/CountLinesFile.py
+++ /dev/null
@@ -1,60 +0,0 @@
-import os
-
-# folder1 = "./CCode"  # Replace with your folder path
-# Get the current Python file's directory
-current_dir = os.path.dirname(os.path.abspath(__file__))
-
-# Specify the folder path relative to the current directory
-folderC = os.path.join(current_dir, 'CCode')
-folderPy = os.path.join(current_dir, 'PyDSL')
-
-os.makedirs('Output', exist_ok=True)
-
-# Specify the output file path
-output_fileC = os.path.join(current_dir, 'Output', 'NoOfLinesInC.txt')
-output_filePy = os.path.join(current_dir, 'Output', 'NoOfLinesInPy.txt')
-
-def count_non_empty_linesInC(file_path):
-    with open(file_path, 'r') as file:
-        lines = file.readlines()
-        non_empty_lines = [line for line in lines if line.strip()]
-        return len(non_empty_lines)
-
-def count_valid_code_linesInPyFile(file_path):
-    valid_code_lines = 0
-
-    with open(file_path, 'r') as file:
-        for line in file:
-            stripped_line = line.strip()
-            # Check if the line is not empty and does not start with a comment
-            if stripped_line and not stripped_line.startswith('#'):
-                valid_code_lines += 1
-
-    return valid_code_lines
-    
-
-def list_files_and_write_line_counts(folder, output_path):
-    # List files in the folder and sort them by filename
-    files = sorted(os.listdir(folder))
-    with open(output_path, 'w') as output:
-        for filename in files:
-            file_path = os.path.join(folder, filename)
-            if os.path.isfile(file_path) and filename.endswith('.c'):  # Check if it's a text file
-                line_count = count_non_empty_linesInC(file_path)
-                output.write(f"{filename}: \t{line_count} \n")
-
-def list_files_and_write_line_countsPy(folder, output_path):
-    # List files in the folder and sort them by filename
-    files = sorted(os.listdir(folder))
-    with open(output_path, 'w') as output:
-        for filename in files:
-            file_path = os.path.join(folder, filename)
-            if os.path.isfile(file_path) and filename.endswith('.py'):  # Check if it's a text file
-                line_count = count_valid_code_linesInPyFile(file_path)
-                output.write(f"{filename}: \t{line_count}\n")
-
-
-# Call the function
-list_files_and_write_line_counts(folderC, output_fileC)
-list_files_and_write_line_countsPy(folderPy, output_filePy)
-
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/Output/NoOfLinesInC.txt b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/Output/NoOfLinesInC.txt
deleted file mode 100644
index c033b33ae0b5..000000000000
--- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/Output/NoOfLinesInC.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-audioCompression.c: 	89 
-energyOfSignal.c: 	62 
-lowPassFIRFilterDesign.c: 	42 
-lowPassFull1.c: 	89 
-noisecancelling.c: 	73 
-periodogram2Conv.c: 	64 
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/Output/NoOfLinesInPy.txt b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/Output/NoOfLinesInPy.txt
deleted file mode 100644
index 2e215af6f463..000000000000
--- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/Output/NoOfLinesInPy.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-audioCompression.py: 	19
-energyOfSignal.py: 	10
-lowPassFIRFilterDesign1.py: 	12
-lowPassFull1.py: 	20
-noisecancelling.py: 	19
-periodogram2Conv.py: 	11
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/Output/OpsNameDump.txt b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/Output/OpsNameDump.txt
deleted file mode 100644
index 7cbda332fad6..000000000000
--- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/Output/OpsNameDump.txt
+++ /dev/null
@@ -1 +0,0 @@
-constant, add, cast, func, generic_call, mul, div, print, reshape, return, transpose, delay, gain, sub, zeroCrossCount, FIRFilterResponse, slidingWindowAvg, downsampling, upsampling, lowPassFilter, highPassFilter, fft1d, ifft1d, hamming, dct, filter, sum, sin, cos, square, fft1dreal, fft1dimg, sinc, getElemAtIndx, setElemAtIndx, lowPassFIRFilter, lmsFilter, highPassFIRFilter, getRangeOfVector, FIRFilterHammingOptimized, highPassFIRHammingOptimizedOp, threshold, quantization, lmsFilterResponse, runLenEncoding, FIRFilterResSymmOptimized, len, reverseInput, padding, FIRFilterYSymmOptimized, fft1DRealSymm, fft1DimgConjSymm, 
\ No newline at end of file
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/PyDSL/periodogram2Conv.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/PyDSL/periodogram2Conv.py
deleted file mode 100644
index f19baf5fa17d..000000000000
--- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/PyDSL/periodogram2Conv.py
+++ /dev/null
@@ -1,51 +0,0 @@
-
-def main() {
-
-  #Steps:
-    #calculate x[l] , x[-l]
-    #calculate conv1d of x[l] , x[-l] ie, conv1 = conv(x[l] , x[-l])
-    #calculate fft : res = fft(conv1)
-    #then periodogram = |abs(fft)|^2 = real^2 + img^2 
-
-    #Another way:
-      #pad x[l] & x[-l] with zeroes
-      #calculate fft of x[l] & x[-l] ie, fft_x , fft_reverse_x
-      #multiply them to get final real ans : fft_x * fft_reverse_x
-
-  #size 10
-  # var a10 = [ 10,20,30,40,50,60,70,80,90,100];
-	var input = getRangeOfVector(0, 10, 1);
-  # var input = [1,2,3,4];
-  # print(a10);
-
-  #Get x[-l] ie, reverseInput & 
-  var reverse_input = reverseInput(input);
-  var conv1d = FIRFilterResponse(input, reverse_input);
-  # var fft_real = fft1DRealSymm(conv1d); #fft1DRealSymm
-  var fft_real = fft1dreal(conv1d);
-  var fft_img = fft1dimg(conv1d);
-  # var sq = fft_real * fft_real + fft_img * fft_img;
-  # print(sq);
-  var final1 = getElemAtIndx(fft_real , [6]); 
-  var final2 = getElemAtIndx(fft_real , [7]);
-  print(final1);
-  print(final2);
-  # print(conv1d);
-  # print(fft_real);
-  # print(fft_img);
-  #Pad the input , reverse_input for the size of conv o/p
-  #Calculate 
-    # var padLen = 9 ; #10 + 10 - 1 - 10
-    # var input_padded = padding(input , 0, padLen );
-
-    
-    # var fft10real = fft1dreal(input);
-    # var fft10img = fft1dimg(input);
-    
-    # #try input * -input
-    # var neg_input = gain(input , -1);
-  # var sq = fft10real * fft10real + fft10img * fft10img;
-  # print(sq);
-
-}
-
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/PythonCodeRough.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/PythonCodeRough.py
deleted file mode 100644
index 1ae321552603..000000000000
--- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/PythonCodeRough.py
+++ /dev/null
@@ -1,13 +0,0 @@
-import numpy as np
-import matplotlib.pyplot as plt
-from scipy.signal import lfilter, freqz
-
-def TestNumpyFuncs():
-    t1 = np.arange(0,10,2);
-    print(t1)
-
-TestNumpyFuncs()
-
-# if __name__=="main":
-# if __name__=="PythonCodeRough":    
-#     TestNumpyFuncs()
\ No newline at end of file
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/tokenCount.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/tokenCount.py
deleted file mode 100644
index 3382e035fcb4..000000000000
--- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/PythonCodeForTest/tokenCount.py
+++ /dev/null
@@ -1,139 +0,0 @@
-# Let's count the number of tokens in the provided answer.
-answer = """
-Here is a list of 30 DSP algorithms or blocks commonly implemented in software rather than hardware, along with their input and output:
-
-1. Fast Fourier Transform (FFT)
-   - Input: Time-domain signal
-   - Output: Frequency-domain representation
-
-2. Inverse Fast Fourier Transform (IFFT)
-   - Input: Frequency-domain signal
-   - Output: Time-domain representation
-
-3. Finite Impulse Response (FIR) Filter
-   - Input: Input signal
-   - Output: Filtered signal
-
-4. Infinite Impulse Response (IIR) Filter
-   - Input: Input signal
-   - Output: Filtered signal
-
-5. Discrete Fourier Transform (DFT)
-   - Input: Time-domain signal
-   - Output: Frequency-domain representation
-
-6. Convolution
-   - Input: Two signals
-   - Output: Convolved signal
-
-7. Cross-Correlation
-   - Input: Two signals
-   - Output: Cross-correlation sequence
-
-8. Autocorrelation
-   - Input: Single signal
-   - Output: Autocorrelation sequence
-
-9. Spectrogram
-   - Input: Time-domain signal
-   - Output: Time-frequency representation
-
-10. Wavelet Transform
-    - Input: Time-domain signal
-    - Output: Time-scale representation
-
-11. Inverse Wavelet Transform
-    - Input: Time-scale representation
-    - Output: Time-domain signal
-
-12. Hilbert Transform
-    - Input: Real signal
-    - Output: Analytic signal
-
-13. Short-Time Fourier Transform (STFT)
-    - Input: Time-domain signal
-    - Output: Time-frequency representation
-
-14. Linear Predictive Coding (LPC)
-    - Input: Speech signal
-    - Output: Linear prediction coefficients
-
-15. Cepstral Analysis
-    - Input: Speech signal
-    - Output: Cepstral coefficients
-
-16. Adaptive Filtering
-    - Input: Input signal, desired signal
-    - Output: Filtered signal
-
-17. Phase Vocoder
-    - Input: Audio signal
-    - Output: Time-stretched or pitch-shifted audio
-
-18. Echo Cancellation
-    - Input: Input signal, echo signal
-    - Output: Echo-canceled signal
-
-19. Noise Reduction
-    - Input: Noisy signal
-    - Output: Cleaned signal
-
-20. Compression
-    - Input: Audio signal
-    - Output: Compressed signal
-
-21. Decompression
-    - Input: Compressed signal
-    - Output: Audio signal
-
-22. Pitch Detection
-    - Input: Audio signal
-    - Output: Pitch frequency
-
-23. Modulation
-    - Input: Baseband signal
-    - Output: Modulated signal
-
-24. Demodulation
-    - Input: Modulated signal
-    - Output: Baseband signal
-
-25. Channel Equalization
-    - Input: Received signal
-    - Output: Equalized signal
-
-26. Digital Down Conversion (DDC)
-    - Input: High-frequency signal
-    - Output: Baseband signal
-
-27. Digital Up Conversion (DUC)
-    - Input: Baseband signal
-    - Output: High-frequency signal
-
-28. Amplitude Modulation (AM)
-    - Input: Carrier signal, modulating signal
-    - Output: Amplitude modulated signal
-
-29. Frequency Modulation (FM)
-    - Input: Carrier signal, modulating signal
-    - Output: Frequency modulated signal
-
-30. Quantization
-    - Input: Continuous signal
-    - Output: Discrete signal
-
-These blocks and algorithms represent fundamental components of DSP that are frequently implemented in software to leverage the flexibility and processing power of general-purpose processors.
-"""
-
-# Counting the number of tokens using the tiktoken library
-import tiktoken
-
-# Initialize tokenizer
-tokenizer = tiktoken.get_encoding("gpt-3.5-turbo")
-
-# Tokenize the text
-tokens = tokenizer.encode(answer)
-
-# Get the number of tokens
-num_tokens = len(tokens)
-num_tokens
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/.gitignore b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/.gitignore
deleted file mode 100644
index 1ee5b456d2cd..000000000000
--- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-periodogram/*
\ No newline at end of file
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/.gitignore b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/.gitignore
deleted file mode 100644
index 698bdfbad524..000000000000
--- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-Output/*
-dsp1
-dsp1_Debug
\ No newline at end of file
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/EnergyOfSignal.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/EnergyOfSignal.py
deleted file mode 100644
index 2e6cd6fd2d18..000000000000
--- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/EnergyOfSignal.py
+++ /dev/null
@@ -1,40 +0,0 @@
-
-def main() {
-
-  #Steps:
-    #calculate x[l] 
-    #calculate fft : fft1 = fft(conv1)
-    #then sq_abs = |abs(fft)|^2 = real^2 + img^2 
-    # sum = sum(sq_abs)
-    # res = gain(sum , 1/N)
-
-    #Optimized res:
-      #sq1 = input * input
-      #sum1 = sum(sq1)
-      
-
-  #size 10
-  # var a10 = [ 10,20,30,40,50,60,70,80,90,100];
-	var input = getRangeOfVector(0, 100, 1);
-  #calculate x[l] 
-  #calculate fft : fft1 = fft(conv1)
-  var fft_real = fft1dreal(input);
-  var fft_img = fft1dimg(input);
-
-  #then sq_abs = |abs(fft)|^2 = real^2 + img^2 
-  # var sq_abs = fft_real * fft_real + fft_img * fft_img  ;
-  var sq_abs = square(fft_real) + square(fft_img)  ;
-  # sum = sum(sq_abs)
-  var sum1 = sum(sq_abs);
-  # res = gain(sum , 1/N)
-  var len1 = len(input);
-  var res = sum1 / len1;
-
-  print(res);
-  # var final1 = getElemAtIndx(fft_real , [6]); 
-  # print(final1);
-
-
-
-}
-
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/ScriptForCases.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/ScriptForCases.py
deleted file mode 100644
index c21d58de1049..000000000000
--- a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/ScriptForCases.py
+++ /dev/null
@@ -1,167 +0,0 @@
-import os
-import subprocess
-import time
-
-# The script does the following
-# Input : filename.py 
-# Output : TimeOfExecution for different IP sizes : 
-# Steps to run:
-    # Open a terminal at the path of the script -- 
-    # Run: python ScriptForCases.py #3.11 validated 
-
-# Pseudo-code:
-    # Iterate for all the input-size & update the input value in file
-    # Update logic -- change the 2nd parameter of line: var c = getRangeOfVector(init , Count, StepSize)
-    # Run the respective commands on the file
-
-# Path to the input file
-# Apps = "noisecancelling.py" , "lowPassFull.py" , " audioCompression.py" ,
-        #  "back2backDelay.py" , "lowPassFIRFilterDesign.py" ,
-input_file_path =  "noisecancelling.py" 
-BasePathForLLVM = "/mnt/sharedDrive/SourceCode/llvm-project/"
-OutputScriptPath = "mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/"
-# OutputPath = BasePathForLLVM + "mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/Output/"
-
-# Construct full output path
-OutputPath = os.path.join(BasePathForLLVM, OutputScriptPath, "Output")
-
-# Check if the Output folder exists, create it if it doesn't
-if not os.path.exists(OutputPath):
-    os.makedirs(OutputPath)
-
-# Now OutputPath is ready for use
-print("InputPath:{}".format(BasePathForLLVM))
-print(f"OutputPath: {OutputPath}")
-# exit()
-
-# ************ Don't change unless u required 
-# Define the values dictionary
-inputValues = {
-    # "10": 10,
-    # "100": 100,
-    # "1K": 1000,
-    # "10K": 10000,
-    # "20K": 20000,
-    # "30K": 30000,
-    # "40K": 40000,
-    # "50K": 50000,
-    # "100K": 100000,
-     "1M": 1000000,
-    "10M": 10000000,
-    "20M": 20000000,
-    "30M": 30000000,
-    "40M": 40000000,
-    "50M": 50000000,
-    "100M": 100000000,
-    # "1B": 1000000000
-}
-NoOfIterations = 3
-
-# --------------------------------------------------
-commands_base = [
-    # "./dsp1 lowPassFull.py -emit=mlir-affine",
-    f"./dsp1 {input_file_path} -emit=llvm",
-    # f"{BasePathForLLVM}build/bin/dsp1 {input_file_path} -emit=llvm",
-    "clang-17 -O0 file.ll -o fileexe -lm",
-]
-
-# Define the cases
-cases = [  
-    {"affineOpt": False, "canonOpt": False, "suffix": "fileNoOpt.ll" , "exe" : "fileNoOptExe"}, 
-    {"affineOpt": True, "canonOpt": False, "suffix": "fileAffineOpt.ll" , "exe" : "fileAffineOptExe"},
-    {"affineOpt": True, "canonOpt": True, "suffix": "fileAffineCanonOpt.ll", "exe" : "fileAffineCanonOptExe"},   
-]
-
-# Read the input file
-with open(input_file_path, "r") as file:
-    lines = file.readlines()
-
-print("",end="\t")
-for case in cases:
-    print(f"{case['exe']}",end="\t")
-
-# print("\n")   
-for key, value in inputValues.items():
-    # Update the specific line in the file
-    # print("Updating for {}".format(value))
-    # print("\n")
-    print("\n{}".format(key), end="\t")
-    with open(input_file_path, "w") as file:
-        for line in lines:
-            if line.strip().startswith("var input = getRangeOfVector("):
-            # if line.strip().startswith("var N = "):
-                # Replace the second parameter with the current value
-                updated_line = f"\tvar input = getRangeOfVector(0, {value}, 0.000125);\n"
-                # updated_line = f"\tvar input = getRangeOfVector(0, {value}, 1);\n"
-                # updated_line = f"    var N = {value + 1} ;\n"
-                file.write(updated_line)
-            else:
-                file.write(line)
-        # print(lines)
-
-    # Iterate through the cases and run the commands
-    for case in cases:
-        command_llvm = commands_base[0]
-        if case["affineOpt"]:
-            command_llvm += " -affineOpt"
-        if case["canonOpt"]:
-            command_llvm += " -canonOpt"
-        # command_llvm += f" 2> {case['suffix']}" #OutputPath
-        command_llvm += f" 2> {OutputPath}/{case['suffix']}" #OutputPath
-
-        commands = [
-            command_llvm,
-            # f"clang-17 -O0 {case['suffix']} -o fileexe -lm",
-            f"clang-17 -O0 {OutputPath}/{case['suffix']} -o {OutputPath}/{case['exe']} -lm",
-        ] 
-        # print(case,end="\n")
-        # print("\n")
-        
-        # Iterate over each value and perform the necessary operations       
-        for command in commands:
-            # Run the commands for the current case
-            result = subprocess.run(command, shell=True, capture_output=True, text=True)  
-            
-        sum_exe_time = 0
-        for i in range(0,NoOfIterations):
-            # for command in commands:
-            #     # print("running command {}".format(command))
-            #     # os.system(command)
-            #     result = subprocess.run(command, shell=True, capture_output=True, text=True)
-            
-            # Clear the cache to minimize caching effects
-            # subprocess.run("sync; echo 3 > /proc/sys/vm/drop_caches", shell=True)
-            try:
-                process = subprocess.run("sudo sh -c 'sync; echo 3 > /proc/sys/vm/drop_caches'", shell=True, check=True)
-                # process.wait()
-            except subprocess.CalledProcessError:
-                process.terminate()
-            # The command to be executed
-            # command2 = "./fileexe"
-            # Limit execution to a single core
-            # command2 = "taskset -c 0 ./fileexe"  
-            # command2 = f"taskset -c 0 ./{case['exe']}" #{OutputPath}
-            command2 = f"taskset -c 0 ./Output/{case['exe']}"
-
-            # Record the start time
-            start_time = time.time()
-
-            # Execute the command
-            try:
-                subprocess.run(command2, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True)
-                # subprocess.run(command2, shell=True)
-            except subprocess.CalledProcessError as exc:
-                print(f"Process failed because did not return a successful return code. "
-                        f"Returned {exc.returncode}\n{exc}")
-            
-
-            # Record the end time
-            end_time = time.time()
-
-            # Calculate the elapsed time
-            execution_time = end_time - start_time
-            sum_exe_time = sum_exe_time + execution_time
-            # print("{}".format(execution_time), end="\t")
-        avg_exe_time = sum_exe_time / NoOfIterations
-        print("{}".format(avg_exe_time), end="\t")
-    # print(f"The command took {execution_time} seconds to execute.")
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/dsp1 b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/dsp1
deleted file mode 100755
index 4c6877a9a9c0..000000000000
Binary files a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/dsp1 and /dev/null differ
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/dsp1_Debug b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/dsp1_Debug
deleted file mode 100755
index 2835db055a2f..000000000000
Binary files a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/Results/TryResultScript/dsp1_Debug and /dev/null differ
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/find_peaks.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/find_peaks.py
new file mode 100644
index 000000000000..4290babc8e72
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/find_peaks.py
@@ -0,0 +1,8 @@
+def main() {
+
+  var signal = [0.4, 0.3, 0.6, 1.8, 0.9, 0.5, 0.2, 0.7, 1.2, 0.8, 2.0, 1.9, 1.8, 1.7, 1.8, 1.7];
+  var peaks = find_peaks(signal, 0.5, 1); 
+  
+  print(peaks);
+  
+}
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/qam_demodulate.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/qam_demodulate.py
new file mode 100644
index 000000000000..6de430689119
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/qam_demodulate.py
@@ -0,0 +1,10 @@
+def main() {
+    # var input_data = [1,1,1,0,1,1,1,0];
+    # print(input_data);
+    # var modulated_symbols = qam_modulate(input_data);
+    # print(modulated_symbols);
+    var real_part = [1, 1, 1, 1];
+    var img_part = [1, -1, 1, -1];
+    var decoded_data = qam_demodulate(real_part, img_part);
+    print(decoded_data);
+}
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/qam_modulate.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/qam_modulate.py
new file mode 100644
index 000000000000..87ec2cfbab9e
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/qam_modulate.py
@@ -0,0 +1,12 @@
+def main() {
+    var input_data = [0,1,1,0,1,1,1,0];
+    # print(input_data);
+    var real = qam_modulate_real(input_data);
+    var imagine = qam_modulate_imagine(input_data);
+    print(real);
+    print(imagine);
+    # var real_part = [1, 1, 1, 1];
+    # var img_part = [1, -1, 1, -1];
+    # var decoded_data = qam_demodulate(real_part, img_part);
+    # print(decoded_data);
+}
diff --git a/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/target_identification.py b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/target_identification.py
new file mode 100644
index 000000000000..ae1d280d624b
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/Output/TryDSPApps/target_identification.py
@@ -0,0 +1,35 @@
+def main() {
+
+
+  var fs = 8000;
+  # var step = 1/8000; 
+  # print(step);
+  var t = getRangeOfVector(0,100, 0.000125);
+  var f_sig = 500;
+  var pi = 3.14159265359;
+  var getMultiplier = 2 * pi * f_sig;
+  # print(getMultiplier);
+  var getSinDuration = gain(t, getMultiplier);
+  # print(getSinDuration);
+  var clean_sig = sin(getSinDuration );
+
+  #define a noise signal with freq = 3000
+  var f_noise = 3000;
+  var getNoiseSinDuration = gain(t, 2 * pi * f_noise);
+  var noise = sin(getNoiseSinDuration);
+  var noise1 = gain(noise, 0.5);
+
+  var noisy_sig = clean_sig + noise1;
+  # print(noisy_sig);
+  # print(clean_sig);
+  var mu = 0.01;
+  var filterSize = 32;
+  var y = lmsFilterResponse(noisy_sig, clean_sig, mu, filterSize);
+  print(y);
+
+  #var signal = [0.4, 0.3, 0.6, 1.8, 0.9, 0.5, 0.2, 0.7, 1.2, 0.8, 2.0, 1.9, 1.8, 1.7, 1.8, 1.7];
+  var peaks = find_peaks(y, 1.0, 20); 
+  
+  print(peaks);
+  
+}
diff --git a/mlir/examples/dsp/SimpleBlocks/include/toy/AST.h b/mlir/examples/dsp/SimpleBlocks/include/toy/AST.h
index c13b287bdd2f..d6b6f2c0e50e 100644
--- a/mlir/examples/dsp/SimpleBlocks/include/toy/AST.h
+++ b/mlir/examples/dsp/SimpleBlocks/include/toy/AST.h
@@ -43,6 +43,7 @@ class ExprAST {
     Expr_BinOp,
     Expr_Call,
     Expr_Print,
+    Expr_String,
   };
 
   ExprAST(ExprASTKind kind, Location location)
@@ -107,6 +108,20 @@ class VariableExprAST : public ExprAST {
   static bool classof(const ExprAST *c) { return c->getKind() == Expr_Var; }
 };
 
+/// Expression class for string val.
+class StringExprAST : public ExprAST {
+  std::string string_val;
+
+public:
+  StringExprAST(Location loc, llvm::StringRef string_val)
+      : ExprAST(Expr_String, std::move(loc)), string_val(string_val) {}
+
+  llvm::StringRef getStringVal() { return string_val; }
+
+  /// LLVM style RTTI
+  static bool classof(const ExprAST *c) { return c->getKind() == Expr_String; }
+};
+
 /// Expression class for defining a variable.
 class VarDeclExprAST : public ExprAST {
   std::string name;
diff --git a/mlir/examples/dsp/SimpleBlocks/include/toy/Lexer.h b/mlir/examples/dsp/SimpleBlocks/include/toy/Lexer.h
index d6bc5443fb30..3d5827638470 100644
--- a/mlir/examples/dsp/SimpleBlocks/include/toy/Lexer.h
+++ b/mlir/examples/dsp/SimpleBlocks/include/toy/Lexer.h
@@ -36,6 +36,7 @@ enum Token : int {
   tok_bracket_close = '}',
   tok_sbracket_open = '[',
   tok_sbracket_close = ']',
+  tok_comma = ',',
 
   tok_eof = -1,
 
@@ -43,6 +44,7 @@ enum Token : int {
   tok_return = -2,
   tok_var = -3,
   tok_def = -4,
+  tok_string_val = -7,
 
   // primary
   tok_identifier = -5,
@@ -83,6 +85,11 @@ class Lexer {
     return identifierStr;
   }
 
+  llvm::StringRef getString() {
+      assert(curTok == tok_string_val);
+      return stringVal;
+  }
+
   /// Return the current number (prereq: getCurToken() == tok_number)
   double getValue() {
     assert(curTok == tok_number);
@@ -145,11 +152,13 @@ class Lexer {
         return tok_def;
       if (identifierStr == "var")
         return tok_var;
+      if(identifierStr == ",")
+          return tok_comma;
       return tok_identifier;
     }
 
     // Number: [0-9.]+
-    if (isdigit(lastChar) || lastChar == '.') {
+    if (lastChar == '-' || isdigit(lastChar) || lastChar == '.') {
       std::string numStr;
       do {
         numStr += lastChar;
@@ -170,6 +179,17 @@ class Lexer {
         return getTok();
     }
 
+    // String val: "..."
+    if(lastChar == '"') {
+        stringVal = "";
+        while (isalnum((lastChar = Token(getNextChar()))) || lastChar == '_' || lastChar== ' ') {
+            if(lastChar == '"') break;
+            stringVal += (char)lastChar;
+        }
+        lastChar = Token(getNextChar());
+        return tok_string_val;
+    }
+
     // Check for end of file.  Don't eat the EOF.
     if (lastChar == EOF)
       return tok_eof;
@@ -188,6 +208,9 @@ class Lexer {
 
   /// If the current Token is an identifier, this string contains the value.
   std::string identifierStr;
+    
+  // If current Token is a string val
+  std::string stringVal;
 
   /// If the current Token is a number, this contains the value.
   double numVal = 0;
diff --git a/mlir/examples/dsp/SimpleBlocks/include/toy/Ops.td b/mlir/examples/dsp/SimpleBlocks/include/toy/Ops.td
index 6d714f9832b6..32b8fff48410 100644
--- a/mlir/examples/dsp/SimpleBlocks/include/toy/Ops.td
+++ b/mlir/examples/dsp/SimpleBlocks/include/toy/Ops.td
@@ -92,6 +92,26 @@ def ConstantOp : Dsp_Op<"constant", [Pure]> {
   let hasVerifier = 1;
 }
 
+//===----------------------------------------------------------------------===//
+// ModuloOp
+//===----------------------------------------------------------------------===//
+
+def ModuloOp : Dsp_Op<"modulo",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "element-wise modulo operation";
+  let description = [{
+    The "modulo" operation performs element-wise modulo op between two tensors.
+    The shapes of the tensor operands are expected to match.
+  }];
+
+  let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs);
+  let results = (outs F64Tensor);
+  
+  let builders = [
+    OpBuilder<(ins "Value":$lhs, "Value":$rhs)>
+  ];
+}
+
 //===----------------------------------------------------------------------===//
 // AddOp
 //===----------------------------------------------------------------------===//
@@ -284,8 +304,61 @@ def DivOp : Dsp_Op<"div",
   let builders = [
     OpBuilder<(ins "Value":$lhs, "Value":$rhs)>
   ];
+
+  let hasCanonicalizer = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// BitwiseAndOp
+//===----------------------------------------------------------------------===//
+
+def BitwiseAndOp : Dsp_Op<"bitwiseand",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "bit-wise and operation";
+  let description = [{
+    The "bitwiseand" operation performs bit-wise and between two
+    tensors. The shapes of the tensor operands are expected to match.
+  }];
+
+  let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs);
+  let results = (outs F64Tensor);
+
+  // Indicate that the operation has a custom parser and printer method.
+  let hasCustomAssemblyFormat = 1;
+
+  // Allow building a BitwiseAndOp with from the two input operands.
+  let builders = [
+    OpBuilder<(ins "Value":$lhs, "Value":$rhs)>
+  ];
 }
 
+//===----------------------------------------------------------------------===//
+// PowerOp
+//===----------------------------------------------------------------------===//
+
+def PowOp : Dsp_Op<"pow",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]>{
+        let summary = "element-wise power operation for tensor";
+        let description = [{
+            The "pow" operation performs element-wise power for base tensor.
+                The accepted operand is restrict to a scaler constant.
+        }];
+
+        let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs);
+        let results = (outs F64Tensor);
+
+        // has custom parser and printer for method
+        // FIXME: pow op should have custom assembly format
+        // let hasCustomAssemblyFormat = 1;
+
+        // Allow building a PowOp from two operands.
+        let builders = [
+            OpBuilder<(ins "Value":$lhs, "Value":$rhs)>
+        ];
+        let hasVerifier = 1;
+    }
+
+
 //===----------------------------------------------------------------------===//
 // PrintOp
 //===----------------------------------------------------------------------===//
@@ -443,6 +516,7 @@ def DelayOp : Dsp_Op<"delay" ,
   let hasCanonicalizer = 1;
 
   let hasVerifier = 1;
+
 }
 
 
@@ -516,14 +590,56 @@ def SubOp : Dsp_Op<"sub", [Pure , DeclareOpInterfaceMethods<ShapeInferenceOpInte
    // let hasVerifier = 1;
  }
 
+//===----------------------------------------------------------------------===//
+// FFTRealOp
+//===----------------------------------------------------------------------===//
+
+def FFTRealOp : Dsp_Op<"fftReal", [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "Performs FFT Operation on the input";
+  let description = [{
+      This function accepts a 1D input array of size 2^n and returns the real part of its Fourier transform, 
+      producing an output array of the same size. The function is designed to work exclusively with input sizes that are powers of 2. 
+      Providing an array of any other size will result in a segmentation fault.
+  }];
+
+  let arguments = (ins F64Tensor:$lhs);
+  let results = (outs F64Tensor);
+
+  let builders = [
+    OpBuilder<(ins "Value":$lhs)>
+  ]; 
+
+  let hasCanonicalizer = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// FFTImagOp
+//===----------------------------------------------------------------------===//
+
+def FFTImagOp : Dsp_Op<"fftImag", [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "Performs FFT Operation on the input";
+  let description = [{
+      This function accepts a 1D input array of size 2^n and returns the imaginary part of its Fourier transform, 
+      producing an output array of the same size. The function is designed to work exclusively with input sizes that are powers of 2. 
+      Providing an array of any other size will result in a segmentation fault.
+  }];
+
+  let arguments = (ins F64Tensor:$lhs);
+  let results = (outs F64Tensor);
+
+  let builders = [
+    OpBuilder<(ins "Value":$lhs)>
+  ]; 
+}
+
 //===----------------------------------------------------------------------===//
 // zeroCrossCountOp
 //===----------------------------------------------------------------------===//
 def zeroCrossCountOp : Dsp_Op<"zeroCrossCount" ,
     [Pure , DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
-  let summary = "shifting tensor by given number";
+  let summary = "count the crosses through zero";
   let description = [{
-    The "zeroCrossCountOp" operation detects no of zero crosses in a given array -- 
+    The "zeroCrossCount" operation detects no of zero crosses in a given array -- 
     ex: [-1 , -2 , 3, 0 , 0, -2] has 2 zero-crosses 
   }];
 
@@ -586,6 +702,21 @@ def FIRFilterResponseOp : Dsp_Op<"FIRFilterResponse" ,
   let hasVerifier = 1;
 }
 
+//===----------------------------------------------------------------------===//
+// MedianFilterOp
+//===----------------------------------------------------------------------===//
+
+def MedianFilterOp : Dsp_Op<"medianFilter", 
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  
+  let arguments = (ins F64Tensor:$input);
+  let results = (outs F64Tensor);
+
+  let builders = [
+    OpBuilder<(ins "Value":$input)>
+  ];
+}
+
 //===----------------------------------------------------------------------===//
 // SlidingWindowAvg
 //===----------------------------------------------------------------------===//
@@ -611,6 +742,7 @@ def SlidingWindowAvgOp : Dsp_Op<"slidingWindowAvg",
 
   // Indicate that additional verification for this operation is necessary.
   let hasVerifier = 1;
+  let hasCanonicalizer = 1;
 }
 
 
@@ -829,17 +961,8 @@ def IFFT1DOp : Dsp_Op<"ifft1d",
   let arguments = (ins F64Tensor:$real ,  F64Tensor:$img);
   // let results = (outs F64Tensor:$real, F64Tensor:$img); //(ins F64Tensor:$lhs, F64Tensor:$rhs);
   let results = (outs F64Tensor);
-  // let results = (outs F64Tensor:$real);
-  // let assemblyFormat = [{
-  //   `(` $input `:` type($input) `)` attr-dict `to` type(results)
-  // }];
-
-  // let assemblyFormat = [{
-  //   `(` $input `:` type($input) `)` attr-dict `to` `(` type($real) `,` type($img) `)`
-  // }];
 
-  // Enable registering canonicalization patterns with this operation.
-  // let hasCanonicalizer = 1;
+  let hasCanonicalizer = 1;
 
   // Allow building a IFFT1D with from the input operand.
   let builders = [
@@ -1237,6 +1360,93 @@ def GetElemAtIndxOp : Dsp_Op<"getElemAtIndx",
 }
 
 
+
+
+
+
+//===----------------------------------------------------------------------===//
+// GetSingleElemAtIdxOp
+//===----------------------------------------------------------------------===//
+
+def GetSingleElemAtIdxOp : Dsp_Op<"getSingleElemAtIndx",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "This one access ranked tensor at idx and returns signle tensor without dimension.";
+
+  let arguments = (ins F64Tensor:$input, F64Tensor:$indx);
+  let results = (outs F64Tensor);
+
+  let builders = [
+    OpBuilder<(ins "Value":$input, "Value":$indx)>
+  ];
+
+  // Indicate that additional verification for this operation is necessary.
+  //let hasVerifier = 1;
+}
+
+
+
+
+//===----------------------------------------------------------------------===//
+// Diff2MeanOptimizedOp
+//===----------------------------------------------------------------------===//
+
+def Diff2MeanOptimizedOp : Dsp_Op<"diff2meanOpt",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "This one implemnets mean(diff(input)) as (input[-1] - input[0])/len(input). Note that mean uses length of diff, this operation consider input[-1] == input[length paramter of mean], not input[length paramter of mean-1]";
+
+  let arguments = (ins F64Tensor:$input, F64Tensor:$length);
+  let results = (outs F64Tensor);
+
+  let builders = [
+    OpBuilder<(ins "Value":$input, "Value":$length)>
+  ];
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// FindPeaks2Diff2MeanOptimizedOp
+//===----------------------------------------------------------------------===//
+
+def FindPeaks2Diff2MeanOptimizedOp : Dsp_Op<"findpeaks2diff2meanOpt",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "This one implemnets mean(diff(find_peaks(input))) as (peak[-1] - peak[0])/(len(peaks)-1).";
+
+   let arguments = (ins F64Tensor:$signal, F64Tensor:$height, F64Tensor:$distance);
+   let results = (outs F64Tensor);
+   
+   let builders = [
+     OpBuilder<(ins "Value":$signal, "Value":$height, "Value":$distance)>
+   ];
+}
+
+
+
+
+
+
+
+//===----------------------------------------------------------------------===//
+// LMS2FindPeaksOptimizedOp
+//===----------------------------------------------------------------------===//
+
+def LMS2FindPeaksOptimizedOp : Dsp_Op<"lms2findPeaks",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "Fusing loop for LMSFilterResponseOp and FindPeaksOp";
+
+  let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs, F64Tensor:$mu, F64Tensor:$filterLen, F64Tensor:$height, F64Tensor:$distance);
+  let results = (outs F64Tensor);
+
+  let builders = [
+    OpBuilder<(ins "Value":$lhs, "Value":$rhs, "Value":$mu, "Value":$filterLen, "Value":$height, "Value":$distance)>
+  ];
+}
+
+
+
+
+
+
 //===----------------------------------------------------------------------===//
 // SetElemAtIndxOp
 //===----------------------------------------------------------------------===//
@@ -1909,4 +2119,856 @@ def FFT1DImgConjSymmOp : Dsp_Op<"fft1DimgConjSymm",
   let hasVerifier = 1;
 }
 
-#endif // TOY_OPS
+
+
+//===----------------------------------------------------------------------===//
+// ShiftRightOp
+//===----------------------------------------------------------------------===//
+
+def ShiftRightOp : Dsp_Op<"shiftRight", [Pure , DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+   let summary = "Bit-wise shift right a by b";
+   let description = [{
+     The shift right block shifts each element of a vector by right-hand side integer. 
+   }]; 
+
+   let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs); //Problem: how can we do logical shift with floating point tensor?
+   let results = (outs F64Tensor);
+
+   // Indicate that the operation has a custom parser and printer method.
+   // let hasCustomAssemblyFormat = 1;
+   // let assemblyFormat = [{
+   //   `(` $input `:` type($input1 , $input2) `)` attr-dict `to` type(results)
+   // }];
+   // Allow building a MulOp with from the two input operands.
+   let builders = [
+     OpBuilder<(ins "Value":$lhs, "Value":$rhs)>
+   ];
+
+   // Indicate that the operation has a custom parser and printer method.
+   // let hasCustomAssemblyFormat = 1;
+
+   // let hasVerifier = 1;
+ }
+
+
+//===----------------------------------------------------------------------===//
+// MatmulOp
+//===----------------------------------------------------------------------===//
+
+def MatmulOp : Dsp_Op<"matmul", [Pure , DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+   let summary = "Matrix multiplication a * b";
+   let description = [{
+     Matrix multiplication between the left-hand side and right-hand side.
+   }]; 
+
+   let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs); //Problem: how can we do logical shift with floating point tensor?
+   let results = (outs F64Tensor);
+
+   // Indicate that the operation has a custom parser and printer method.
+   // let hasCustomAssemblyFormat = 1;
+   // let assemblyFormat = [{
+   //   `(` $input `:` type($input1 , $input2) `)` attr-dict `to` type(results)
+   // }];
+   // Allow building a MulOp with from the two input operands.
+   let builders = [
+     OpBuilder<(ins "Value":$lhs, "Value":$rhs)>
+   ];
+
+   // Indicate that the operation has a custom parser and printer method.
+   // let hasCustomAssemblyFormat = 1;
+
+   let hasVerifier = 1;
+ }
+
+
+
+
+//===----------------------------------------------------------------------===//
+// Conv2DOp
+//===----------------------------------------------------------------------===//
+
+def Conv2DOp : Dsp_Op<"conv2d", 
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+    let summary = "Dsp dialect convolution 2d operation";
+    let description = [{
+        Performs a 2D convolution on the input tensor using specified kernel.
+    }];
+
+    let arguments = (ins F64Tensor:$input, F64Tensor:$kernel, F64Tensor:$bias);
+
+    let results = (outs F64Tensor:$output);
+
+    let builders = [
+        OpBuilder<(ins "Value":$input, "Value":$kernel, "Value":$bias)>
+    ];
+
+    let extraClassDeclaration = [{
+        static StringRef getStrideName() { return "stride"; }
+        static StringRef getPaddingName() { return "padding"; }
+    }];
+
+    let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// ThresholdUpOp
+//===----------------------------------------------------------------------===//
+
+def ThresholdUpOp : Dsp_Op<"thresholdUp", 
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+    let summary = "Converts all the values above threhold to 1 else 0";
+    let description = [{
+        Converts all the values above threhold to 1 if returnoriginal is false, returns original value if returnoriginal is true else 0
+    }];
+
+    let arguments = (ins F64Tensor:$input, F64Tensor:$threshold, F64Tensor:$returnoriginal);
+
+    let results = (outs F64Tensor:$output);
+
+    let builders = [
+        OpBuilder<(ins "Value":$input, "Value":$threshold, "Value":$returnoriginal)>
+    ];
+    let hasVerifier = 1;
+    let hasCanonicalizer = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// GenerateDTMFOp
+//===----------------------------------------------------------------------===//
+
+def GenerateDTMFOp : Dsp_Op<"generateDtmf", 
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+    let summary = "Generates signal for the digit input.";
+    let description = [{
+        Converts the digit into a signal.
+    }];
+
+    let arguments = (ins F64Tensor:$digit, F64Tensor:$duration, F64Tensor:$fs);
+
+    let results = (outs F64Tensor:$output);
+
+    let builders = [
+        OpBuilder<(ins "Value":$digit, "Value":$duration, "Value":$fs)>
+    ];
+    let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// FFTFreqOp
+//===----------------------------------------------------------------------===//
+
+def FFTFreqOp : Dsp_Op<"fftfreq", 
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+    let summary = "Based on --> np.fft.fftfreq(N, d=1/fs)";
+    let description = [{
+        Generates frequency bins for fft.
+    }];
+
+    let arguments = (ins F64Tensor:$length, F64Tensor:$distance);
+
+    let results = (outs F64Tensor:$output);
+
+    let builders = [
+        OpBuilder<(ins "Value":$length, "Value":$distance)>
+    ];
+    let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// FindDominantPeaksOp
+//===----------------------------------------------------------------------===//
+
+def FindDominantPeaksOp : Dsp_Op<"findDominantPeaks", 
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+    let summary = "finds two dominant peaks in the frequency array.";
+    let description = [{
+        Designed for the DTMF Application.
+    }];
+
+    let arguments = (ins F64Tensor:$frequencies, F64Tensor:$magnitudes);
+
+    let results = (outs F64Tensor:$output);
+
+    let builders = [
+        OpBuilder<(ins "Value":$frequencies, "Value":$magnitudes)>
+    ];
+    let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// RecoverDTMFDigitOp
+//===----------------------------------------------------------------------===//
+
+def RecoverDTMFDigitOp : Dsp_Op<"recoverDtmfDigit", 
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+    let summary = "Recover digit from given frequency pair else return -1.";
+    let description = [{
+        -----------------------------.
+    }];
+
+    let arguments = (ins F64Tensor:$frequencies, F64Tensor:$freqPairs);
+
+    let results = (outs F64Tensor:$output);
+
+    let builders = [
+        OpBuilder<(ins "Value":$frequencies, "Value":$freqPairs)>
+    ];
+    let hasVerifier = 1; 
+} 
+
+//===----------------------------------------------------------------------===//
+// FFTCombineOp
+//===----------------------------------------------------------------------===//
+
+def FFTCombineOp : Dsp_Op<"fftCombine", 
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+    let summary = "Combines the real and imaginary parts to obtain an amplitude array.";
+    let description = [{
+        This function takes arrays of real and imaginary parts of frequency components 
+        and computes the amplitude of each frequency. The amplitudes are returned as an 
+        array representing the magnitudes of the corresponding complex values.
+    }];
+
+    let arguments = (ins F64Tensor:$real, F64Tensor:$imag);
+
+    let results = (outs F64Tensor:$output);
+
+    let builders = [
+        OpBuilder<(ins "Value":$real, "Value":$imag)>
+    ];
+    let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// GenerateVoiceSignatureOp
+//===----------------------------------------------------------------------===//
+
+def GenerateVoiceSignatureOp : Dsp_Op<"generateVoiceSignature", 
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+    let summary = "Generate voice signature of the speaker.";
+    let description = [{
+        This function takes two frequencies as input along with the duration and 
+        the sampling frequency, and generate the voice signature of the speaker. 
+    }];
+
+    let arguments = (ins F64Tensor:$f1, F64Tensor:$f2, F64Tensor:$duration, F64Tensor:$fs);
+
+    let results = (outs F64Tensor:$output);
+
+    let builders = [
+        OpBuilder<(ins "Value":$f1, "Value":$f2, "Value":$duration, "Value":$fs)>
+    ];
+    let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// SqrtOp
+//===----------------------------------------------------------------------===//
+
+def SqrtOp : Dsp_Op<"sqrt", 
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+    let summary = "Takes the element wise square root of the tensor.";
+    let description = [{
+        Takes the element wise square root of the tensor.
+    }];
+
+    let arguments = (ins F64Tensor:$input);
+
+    let results = (outs F64Tensor:$output);
+
+    let builders = [
+        OpBuilder<(ins "Value":$input)>
+    ];
+    let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// QamModulateRealOp real
+//===----------------------------------------------------------------------===//
+
+def QamModulateRealOp : Dsp_Op<"qam_modulate_real", 
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+    let summary = "Dsp dialect qam modulation real operation";
+    let description = [{
+        Performs a digital modulation on input tensor.
+    }];
+
+    let arguments = (ins F64Tensor:$signal);
+
+    let results = (outs F64Tensor:$real);
+
+
+    let builders = [
+        OpBuilder<(ins "Value":$signal)>
+    ];
+
+    let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// QamModulateImgOp imagine
+//===----------------------------------------------------------------------===//
+
+def QamModulateImgOp : Dsp_Op<"qam_modulate_imagine", 
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+    let summary = "Dsp dialect qam modulation imagine operation";
+    let description = [{
+        Performs a digital modulation on input tensor.
+    }];
+
+    let arguments = (ins F64Tensor:$signal);
+
+    let results = (outs F64Tensor:$imagine);
+
+
+    let builders = [
+        OpBuilder<(ins "Value":$signal)>
+    ];
+
+    let hasVerifier = 1;
+}
+
+
+//===----------------------------------------------------------------------===//
+// QamDemodulateOp
+//===----------------------------------------------------------------------===//
+
+def QamDemodulateOp : Dsp_Op<"qam_demodulate", 
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+    let summary = "Dsp dialect qam demodulation operation";
+    let description = [{
+        Takes in 2 arrays, one is the real part of a signal the other is the imaginary part of a signal.
+        Returns the decoded binary output.
+    }];
+
+    let arguments = (ins F64Tensor:$real, F64Tensor:$imagine);
+
+    let results = (outs F64Tensor:$output);
+
+    let builders = [
+        OpBuilder<(ins "Value":$real, "Value":$imagine)>
+    ];
+
+
+    let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// FindPeaksOp
+//===----------------------------------------------------------------------===//
+
+def FindPeaksOp : Dsp_Op<"find_peaks", [Pure , DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+   let summary = "Find peaks from the signal. Since the number of peaks vary, the output is initialized as -1.";
+   let description = [{
+		Input: signal, height, distance
+		Output: indices of peaks. All of none-used values are initialized as -1, so the length can be measured by this.
+
+		Functionality: check the below original python-level code.
+		
+		def manual_find_peaks(signal, height, distance):
+			peaks = []
+			for i in range(1, len(signal) - 1):
+				# Check if the current point is higher than its neighbors
+				if signal[i] > signal[i-1] and signal[i] > signal[i+1]:
+					# Check if it meets the height criterion
+					if signal[i] >= height:
+						# Check if it's far enough from the previously detected peak
+						if not peaks or i - peaks[-1] >= distance:
+							peaks.append(i)
+			return np.array(peaks)
+
+   }]; 
+
+   let arguments = (ins F64Tensor:$signal, F64Tensor:$height, F64Tensor:$distance);
+   let results = (outs F64Tensor);
+
+   // Indicate that the operation has a custom parser and printer method.
+   // let hasCustomAssemblyFormat = 1;
+   // let assemblyFormat = [{
+   //   `(` $input `:` type($input1 , $input2) `)` attr-dict `to` type(results)
+   // }];
+   // Allow building a MulOp with from the two input operands.
+   
+   let builders = [
+     OpBuilder<(ins "Value":$signal, "Value":$height, "Value":$distance)>
+   ];
+
+   // Indicate that the operation has a custom parser and printer method.
+   // let hasCustomAssemblyFormat = 1;
+
+   // let hasVerifier = 1;
+   let hasCanonicalizer = 1;
+
+ }
+
+
+//===----------------------------------------------------------------------===//
+// BeamFormOp
+//===----------------------------------------------------------------------===//
+
+def BeamFormOp : Dsp_Op<"beam_form", 
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+    let summary = "Dsp dialect Beam forming operation";
+    let description = [{
+        Performs a beam forming signal encoding on the input tensor using specified weights.
+    }];
+
+    let arguments = (ins I64Attr:$antennas, I64Attr:$freq, F64Tensor:$time, F64Tensor:$weights);
+
+    let results = (outs F64Tensor:$output);
+
+    let builders = [
+        OpBuilder<(ins "int64_t":$antennas, "int64_t":$freq, "Value":$time, "Value":$weights)>
+    ];
+
+    let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// SpaceModulateOp
+//===----------------------------------------------------------------------===//
+
+def SpaceModulateOp : Dsp_Op<"space_modulate", 
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+    let summary = "Dsp dialect space modulation operation";
+    let description = [{
+        Takes in string input and convert it to binary.
+    }];
+
+    let arguments = (ins F64Tensor:$signal);
+
+    let results = (outs F64Tensor:$output);
+
+    let builders = [
+        OpBuilder<(ins "Value":$signal)>
+    ];
+
+    let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// SpaceDemodulateOp
+//===----------------------------------------------------------------------===//
+
+def SpaceDemodulateOp : Dsp_Op<"space_demodulate", 
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+    let summary = "Dsp dialect space demodulation operation";
+    let description = [{
+        Takes in binary input and convert it to string.
+    }];
+
+    let arguments = (ins F64Tensor:$binary);
+
+    let results = (outs F64Tensor:$output);
+
+    let builders = [
+        OpBuilder<(ins "Value":$binary)>
+    ];
+
+    let hasVerifier = 1;
+    let hasCanonicalizer = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// SpaceErrCorrectionOp
+//===----------------------------------------------------------------------===//
+
+def SpaceErrCorrectionOp : Dsp_Op<"space_err_correction", 
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+    let summary = "Dsp dialect space error correction operation";
+    let description = [{
+        Remove noise operation for signal transmission in space.
+    }];
+
+    let arguments = (ins F64Tensor:$signal);
+
+    let results = (outs F64Tensor:$output);
+
+    let builders = [
+        OpBuilder<(ins "Value":$signal)>
+    ];
+
+    let hasVerifier = 1;
+}
+
+
+//===----------------------------------------------------------------------===//
+// MaxOp
+//===----------------------------------------------------------------------===//
+
+def MaxOp : Dsp_Op<"max", [Pure , DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+   let summary = "Find maximum value in tensor";
+   let description = [{
+		This operation finds and returns the maximum value of the tensor.
+   }]; 
+
+   let arguments = (ins F64Tensor:$input);
+   let results = (outs F64Tensor);
+
+   // Indicate that the operation has a custom parser and printer method.
+   // let hasCustomAssemblyFormat = 1;
+   // let assemblyFormat = [{
+   //   `(` $input `:` type($input1 , $input2) `)` attr-dict `to` type(results)
+   // }];
+   // Allow building a MulOp with from the two input operands.
+   
+   let builders = [
+     OpBuilder<(ins "Value":$input)>
+   ];
+
+   // Indicate that the operation has a custom parser and printer method.
+   // let hasCustomAssemblyFormat = 1;
+
+   // let hasVerifier = 1;
+   let hasCanonicalizer = 1;
+ }
+ 
+
+//===----------------------------------------------------------------------===//
+// MeanOp
+//===----------------------------------------------------------------------===//
+
+def MeanOp : Dsp_Op<"mean", [Pure , DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+   let summary = "Find mean value of tensor. Requires (input, length).";
+   let description = [{
+		This operation finds and returns the mean value of the tensor.
+        Note that it requires length.
+		It would be better if we can implement both versions 
+		- no length argument -> automatically use the length of tensor
+		- with length argument -> use the provided length
+   }]; 
+
+   let arguments = (ins F64Tensor:$input, F64Tensor:$length);
+   let results = (outs F64Tensor);
+   
+   let builders = [
+     OpBuilder<(ins "Value":$input, "Value":$length)>
+   ];
+
+   // Indicate that the operation has a custom parser and printer method.
+   // let hasCustomAssemblyFormat = 1;
+
+   // let hasVerifier = 1;
+   let hasCanonicalizer = 1;
+ }
+ 
+
+
+//===----------------------------------------------------------------------===//
+// DiffOp
+//===----------------------------------------------------------------------===//
+
+def DiffOp : Dsp_Op<"diff", [Pure , DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+   let summary = "np.diff (out[i] = a[i+1] - a[i]). It receives second argument as length";
+   let description = [{
+		This operation returns a tensor that contains diff (out[i] = a[i+1] - a[i]).
+		The length of the output tensor is len(input)-1, regardless of length parameter.
+		Note that it requires length.
+		It would be better if we can implement both versions 
+		- no length argument -> automatically use the length of tensor
+		- with length argument -> use the provided length
+   }]; 
+
+   let arguments = (ins F64Tensor:$input, F64Tensor:$length);
+   let results = (outs F64Tensor);
+   
+   let builders = [
+     OpBuilder<(ins "Value":$input, "Value":$length)>
+   ];
+
+   // Indicate that the operation has a custom parser and printer method.
+   // let hasCustomAssemblyFormat = 1;
+
+   // let hasVerifier = 1;
+ }
+ 
+//===----------------------------------------------------------------------===//
+// AbsOp
+//===----------------------------------------------------------------------===//
+
+def AbsOp : Dsp_Op<"abs", [Pure , DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+   let summary = "np.abs -> calculate the absolute value element-wise";
+   let description = [{
+       This operation calculates the absolute value element-wise.
+   }]; 
+
+   let arguments = (ins F64Tensor:$input);
+   let results = (outs F64Tensor:$output);
+   
+   let builders = [
+     OpBuilder<(ins "Value":$input)>
+   ];
+ }
+
+
+//===----------------------------------------------------------------------===//
+// ArgMaxOp
+//===----------------------------------------------------------------------===//
+
+def ArgMaxOp : Dsp_Op<"argmax", [Pure , DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+   let summary = "np.argmax -> find the indices of the maximum values along a specifies axis in an array.";
+   let description = [{
+       This operation find the indices of the maximum values along a specifies axis in an array.
+   }]; 
+
+   let arguments = (ins F64Tensor:$input, I64Attr:$axis);
+   let results = (outs F64Tensor:$output);
+   
+   let builders = [
+     OpBuilder<(ins "Value":$input, "int64_t":$axis)>
+   ];
+ }
+//===----------------------------------------------------------------------===//
+// NormalizeOp
+//===----------------------------------------------------------------------===//
+
+def NormalizeOp : Dsp_Op<"normalize", [Pure , DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+   let summary = "normalize operation.";
+   let description = [{
+       normalization dsp operation.
+   }]; 
+
+   let arguments = (ins F64Tensor:$signal);
+   let results = (outs F64Tensor);
+   
+   let builders = [
+     OpBuilder<(ins "Value":$signal)>
+   ];
+
+    let hasCanonicalizer = 1;
+ }
+
+//===----------------------------------------------------------------------===//
+// NormLMSFilterResponseOptimizeOp
+//===----------------------------------------------------------------------===//
+
+
+def NormLMSFilterResponseOptimizeOp : Dsp_Op<"norm_LMSFilterResponse_opt",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "LMS filter Response + norm optimize";
+  let description = [{
+      norm + lmsfilter
+  }];
+
+  let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs, F64Tensor:$mu, F64Tensor:$filterLen);
+
+   let results = (outs F64Tensor);
+
+   let builders = [
+     OpBuilder<(ins "Value":$lhs, "Value":$rhs, "Value":$mu, "Value":$filterLen)>
+   ];
+
+   let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Median2SlidingOptimizedOp
+//===----------------------------------------------------------------------===//
+
+def Median2SlidingOptimizedOp : Dsp_Op<"median2slidingOp", 
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  
+  let arguments = (ins F64Tensor:$input);
+  let results = (outs F64Tensor);
+
+  let builders = [
+    OpBuilder<(ins "Value":$input)>
+  ];
+}
+
+
+//===----------------------------------------------------------------------===//
+// FIRFilterResSymmThresholdUpOptimizedOp
+//===----------------------------------------------------------------------===//
+def FIRFilterResSymmThresholdUpOptimizedOp : Dsp_Op<"FIRFilterResSymmThresholdUpOptimizedOp" ,
+    [Pure , DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "FIRFilterResSymmThresholdUpOptimizedOp";
+  let description = [{
+    The "FIRFilterResSymmThresholdUpOptimizedOp" operation is basically the convolution of input 1-D and filter vector when the filter is symmetrical ie,
+    h[0] = h[L-1] , h[1] = h[L-2] .. h[middle ie, (L-1)/2] is single element & filter length is odd always
+    ex: x[n] = [2,1,3,2,4], h[l] = [1, -1 ,1]  then
+      y[n] = sum(h(k) . x(n-k)) k=0 to N-1 can be rewritten as 
+      y[n] = sum(h[k] .{ x[n-k] + x[n-(L-1-k)]}) + h[L-1/2].x[n-(L-1)/2] , k=0 to L-1/2
+
+      Basically, we are trying to reduce the number of load/store operations by half -- so that we can reduce the operations
+  }];
+
+  let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs, F64Tensor:$threshold, F64Tensor:$returnoriginal); 
+  let results =  (outs F64Tensor);
+
+  let builders = [
+    OpBuilder<(ins "Value":$lhs, "Value":$rhs, "Value":$threshold, "Value":$returnoriginal)>
+  ];
+
+}
+
+//===----------------------------------------------------------------------===//
+// FFTOp
+//===----------------------------------------------------------------------===//
+
+def FFTOp : Dsp_Op<"fft", [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "Performs FFT Operation on the input";
+  let description = [{
+      This function accepts a 1D input array of size 2^n and returns the real part of its Fourier transform, 
+      producing an output array of the same size. The function is designed to work exclusively with input sizes that are powers of 2. 
+      Providing an array of any other size will result in a segmentation fault.
+  }];
+
+  let arguments = (ins F64Tensor:$lhs);
+  let results = (outs F64Tensor:$real, F64Tensor:$imag);
+
+  let builders = [
+    OpBuilder<(ins "Value":$lhs)>
+  ]; 
+
+  // let hasCanonicalizer = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// FFTAbsOp
+//===----------------------------------------------------------------------===//
+
+def FFTAbsOp : Dsp_Op<"FFTAbsOp", [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "FFTAbsOp";
+  let description = [{
+      ------------------
+  }];
+
+  let arguments = (ins F64Tensor:$input);
+  let results = (outs F64Tensor:$amplitude);
+
+  let builders = [
+    OpBuilder<(ins "Value":$input)>
+  ]; 
+}
+
+//===----------------------------------------------------------------------===//
+// DFTAbsOp
+//===----------------------------------------------------------------------===//
+
+def DFTAbsOp : Dsp_Op<"DFTAbsOp", [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "DFTAbsOp";
+  let description = [{
+      ------------------
+  }];
+
+  let arguments = (ins F64Tensor:$input);
+  let results = (outs F64Tensor:$amplitude);
+
+  let builders = [
+    OpBuilder<(ins "Value":$input)>
+  ]; 
+}
+
+//===----------------------------------------------------------------------===//
+// DFTAbsThresholdUpOp
+//===----------------------------------------------------------------------===//
+
+def DFTAbsThresholdUpOp : Dsp_Op<"DFTAbsThresholdUpOp", [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "DFTAbsThresholdUpOp";
+  let description = [{
+      ------------------
+  }];
+
+  let arguments = (ins F64Tensor:$input, F64Tensor:$threshold, F64Tensor:$returnoriginal);
+  let results = (outs F64Tensor:$amplitude);
+
+  let builders = [
+    OpBuilder<(ins "Value":$input, "Value":$threshold, "Value":$returnoriginal)>
+  ]; 
+  let hasVerifier = 1;
+}
+
+
+//===----------------------------------------------------------------------===//
+// CorrelateOp
+//===----------------------------------------------------------------------===//
+
+def CorrelateOp : Dsp_Op<"correlate", [Pure , DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+   let summary = "This implements scp.correlate. Right now we assume size of lhs == size of rhs";
+   let description = [{
+		This operation finds and returns the maximum value of the tensor.
+   }]; 
+
+   let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs);
+   let results = (outs F64Tensor);
+
+   // Indicate that the operation has a custom parser and printer method.
+   // let hasCustomAssemblyFormat = 1;
+   // let assemblyFormat = [{
+   //   `(` $input `:` type($input1 , $input2) `)` attr-dict `to` type(results)
+   // }];
+   // Allow building a MulOp with from the two input operands.
+   
+   let builders = [
+     OpBuilder<(ins "Value":$lhs, "Value":$rhs)>
+   ];
+
+   // let hasVerifier = 1;
+ }
+ 
+ 
+//===----------------------------------------------------------------------===//
+// SetSingleElemAtIdxOp
+//===----------------------------------------------------------------------===//
+
+def SetSingleElemAtIdxOp : Dsp_Op<"setSingleElemAtIndx",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "This one access ranked tensor at idx and sets signle tensor with or without dimension.";
+
+  let arguments = (ins F64Tensor:$input, F64Tensor:$indx , F64Tensor:$val);
+  let results = (outs F64Tensor);
+
+  let builders = [
+    OpBuilder<(ins "Value":$input, "Value":$indx, "Value":$val)>
+  ];
+
+  // Indicate that additional verification for this operation is necessary.
+  //let hasVerifier = 1;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Correl2MaxOptimizedOp
+//===----------------------------------------------------------------------===//
+
+def Correl2MaxOptimizedOp : Dsp_Op<"correl2max", [Pure , DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+   let summary = "Find the index of maximum value in tensor. outputs with floating point-converted index";
+   let description = [{
+		This operation fuses loops for correlate and max.
+   }]; 
+
+   let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs);
+   let results = (outs F64Tensor);
+   
+   let builders = [
+     OpBuilder<(ins "Value":$lhs, "Value":$rhs)>
+   ];
+ }
+
+
+
+
+//===----------------------------------------------------------------------===//
+// LMSFilterResponse2GainOp
+//===----------------------------------------------------------------------===//
+
+def LMSFilterResponse2GainOp : Dsp_Op<"lmsFilterResponse2gain",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "Fusing loop for LMSFilterResponseOp and GainOp";
+
+  let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs, F64Tensor:$mu, F64Tensor:$filterLen, F64Tensor:$gain);
+  let results = (outs F64Tensor);
+
+  let builders = [
+    OpBuilder<(ins "Value":$lhs, "Value":$rhs, "Value":$mu, "Value":$filterLen, "Value":$gain)>
+  ];
+}
+
+
+
+
+#endif // TOY_OPS
+
+
+
diff --git a/mlir/examples/dsp/SimpleBlocks/include/toy/Parser.h b/mlir/examples/dsp/SimpleBlocks/include/toy/Parser.h
index 42bd653b156c..a9d673f8f5f0 100644
--- a/mlir/examples/dsp/SimpleBlocks/include/toy/Parser.h
+++ b/mlir/examples/dsp/SimpleBlocks/include/toy/Parser.h
@@ -167,6 +167,16 @@ class Parser {
     return v;
   }
 
+  /// parenexpr ::= '"' string_val '"'
+  std::unique_ptr<ExprAST> parseStringExpr() {
+    auto loc = lexer.getLastLocation();
+
+    std::string string_val(lexer.getString());
+    lexer.consume(tok_string_val);
+
+    return std::make_unique<StringExprAST>(std::move(loc), string_val);
+  }
+  
   /// identifierexpr
   ///   ::= identifier
   ///   ::= identifier '(' expression ')'
@@ -175,7 +185,7 @@ class Parser {
 
     auto loc = lexer.getLastLocation();
     lexer.getNextToken(); // eat identifier.
-
+    
     if (lexer.getCurToken() != '(') // Simple variable ref.
       return std::make_unique<VariableExprAST>(std::move(loc), name);
 
@@ -216,6 +226,7 @@ class Parser {
   ///   ::= numberexpr
   ///   ::= parenexpr
   ///   ::= tensorliteral
+  ///   ::= stringexpr
   std::unique_ptr<ExprAST> parsePrimary() {
     switch (lexer.getCurToken()) {
     default:
@@ -230,6 +241,8 @@ class Parser {
       return parseParenExpr();
     case '[':
       return parseTensorLiteralExpr();
+    case tok_string_val:
+      return parseStringExpr();
     case ';':
       return nullptr;
     case '}':
@@ -334,7 +347,11 @@ class Parser {
     if (!type)
       type = std::make_unique<VarType>();
     lexer.consume(Token('='));
-    auto expr = parseExpression();
+    std::unique_ptr<ExprAST> expr;
+    if(lexer.getCurToken() == tok_string_val) {
+        expr = parseStringExpr();
+    }
+    else expr = parseExpression();
     return std::make_unique<VarDeclExprAST>(std::move(loc), std::move(id),
                                             std::move(*type), std::move(expr));
   }
@@ -465,6 +482,8 @@ class Parser {
       return 40;
     case '/':
       return 40;
+    case '^':
+      return 60;
     default:
       return -1;
     }
diff --git a/mlir/examples/dsp/SimpleBlocks/include/toy/noopt.mlir b/mlir/examples/dsp/SimpleBlocks/include/toy/noopt.mlir
new file mode 100644
index 000000000000..47dde27be52e
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/include/toy/noopt.mlir
@@ -0,0 +1,821 @@
+module {
+  func.func @main() {
+    %c2047_i64 = arith.constant 2047 : i64
+    %cst = arith.constant 0.49971199035644531 : f64
+    %c11 = arith.constant 11 : index
+    %c10 = arith.constant 10 : index
+    %cst_0 = arith.constant 3.000000e+00 : f64
+    %c-1 = arith.constant -1 : index
+    %cst_1 = arith.constant 7.700000e+02 : f64
+    %cst_2 = arith.constant 1.209000e+03 : f64
+    %cst_3 = arith.constant 6.970000e+02 : f64
+    %cst_4 = arith.constant 1.336000e+03 : f64
+    %cst_5 = arith.constant 9.410000e+02 : f64
+    %cst_6 = arith.constant 1.220000e-04 : f64
+    %cst_7 = arith.constant 4.096000e+03 : f64
+    %cst_8 = arith.constant -2.000000e+00 : f64
+    %cst_9 = arith.constant 3.1415926535897931 : f64
+    %c12 = arith.constant 12 : index
+    %cst_10 = arith.constant 0.000000e+00 : f64
+    %c1_i64 = arith.constant 1 : i64
+    %c0_i64 = arith.constant 0 : i64
+    %c4096 = arith.constant 4096 : index
+    %c1 = arith.constant 1 : index
+    %cst_11 = arith.constant 1.477000e+03 : f64
+    %cst_12 = arith.constant 8.520000e+02 : f64
+    %cst_13 = arith.constant 1.000000e+01 : f64
+    %cst_14 = arith.constant 6.2831853071800001 : f64
+    %cst_15 = arith.constant 8.192000e+03 : f64
+    %cst_16 = arith.constant 5.000000e-01 : f64
+    %cst_17 = arith.constant 9.000000e+00 : f64
+    %c0 = arith.constant 0 : index
+    %alloc = memref.alloc() : memref<index>
+    %alloc_18 = memref.alloc() : memref<1xf64>
+    %alloc_19 = memref.alloc() : memref<10x2xf64>
+    %alloc_20 = memref.alloc() : memref<2xf64>
+    %alloc_21 = memref.alloc() : memref<4096xf64>
+    %alloc_22 = memref.alloc() : memref<f64>
+    %alloc_23 = memref.alloc() : memref<f64>
+    %alloc_24 = memref.alloc() : memref<4096xf64>
+    %alloc_25 = memref.alloc() : memref<4096xf64>
+    %alloc_26 = memref.alloc() : memref<4096xf64>
+    %alloc_27 = memref.alloc() : memref<4096xf64>
+    %alloc_28 = memref.alloc() : memref<f64>
+    %alloc_29 = memref.alloc() : memref<f64>
+    %alloc_30 = memref.alloc() : memref<f64>
+    affine.store %cst_17, %alloc_30[] : memref<f64>
+    affine.store %cst_16, %alloc_29[] : memref<f64>
+    affine.store %cst_15, %alloc_28[] : memref<f64>
+    scf.for %arg0 = %c0 to %c4096 step %c1 {
+      %6 = arith.index_cast %arg0 : index to i64
+      %7 = arith.sitofp %6 : i64 to f64
+      %8 = arith.divf %7, %cst_15 : f64
+      %9 = arith.mulf %8, %cst_12 : f64
+      %10 = arith.mulf %9, %cst_14 : f64
+      %11 = math.sin %10 : f64
+      %12 = arith.mulf %8, %cst_11 : f64
+      %13 = arith.mulf %12, %cst_14 : f64
+      %14 = math.sin %13 : f64
+      %15 = arith.addf %11, %14 : f64
+      %16 = arith.mulf %15, %cst_13 : f64
+      memref.store %16, %alloc_27[%arg0] : memref<4096xf64>
+    }
+    scf.for %arg0 = %c0 to %c4096 step %c1 {
+      %6 = arith.index_cast %arg0 : index to i64
+      %7 = scf.for %arg1 = %c0 to %c12 step %c1 iter_args(%arg2 = %c0_i64) -> (i64) {
+        %10 = arith.index_cast %arg1 : index to i64
+        %11 = arith.shli %c1_i64, %10 : i64
+        %12 = arith.andi %6, %11 : i64
+        %13 = arith.cmpi ne, %12, %c0_i64 : i64
+        %14 = arith.subi %c11, %arg1 : index
+        %15 = arith.index_cast %14 : index to i64
+        %16 = arith.shli %c1_i64, %15 : i64
+        %17 = arith.select %13, %16, %c0_i64 : i64
+        %18 = arith.ori %arg2, %17 : i64
+        scf.yield %18 : i64
+      }
+      %8 = arith.index_cast %7 : i64 to index
+      %9 = memref.load %alloc_27[%arg0] : memref<4096xf64>
+      memref.store %9, %alloc_26[%8] : memref<4096xf64>
+      memref.store %cst_10, %alloc_25[%8] : memref<4096xf64>
+    }
+    scf.for %arg0 = %c0 to %c12 step %c1 {
+      %6 = arith.shli %c1, %arg0 : index
+      %7 = arith.shli %6, %c1 : index
+      scf.for %arg1 = %c0 to %c4096 step %7 {
+        scf.for %arg2 = %c0 to %6 step %c1 {
+          %8 = arith.addi %arg1, %arg2 : index
+          %9 = arith.addi %8, %6 : index
+          %10 = arith.index_cast %arg2 : index to i64
+          %11 = arith.sitofp %10 : i64 to f64
+          %12 = arith.index_cast %7 : index to i64
+          %13 = arith.sitofp %12 : i64 to f64
+          %14 = arith.divf %11, %13 : f64
+          %15 = arith.mulf %14, %cst_8 : f64
+          %16 = arith.mulf %15, %cst_9 : f64
+          %17 = math.cos %16 : f64
+          %18 = math.sin %16 : f64
+          %19 = memref.load %alloc_26[%9] : memref<4096xf64>
+          %20 = memref.load %alloc_25[%9] : memref<4096xf64>
+          %21 = arith.mulf %19, %17 : f64
+          %22 = arith.mulf %20, %18 : f64
+          %23 = arith.subf %21, %22 : f64
+          %24 = arith.mulf %19, %18 : f64
+          %25 = arith.mulf %20, %17 : f64
+          %26 = arith.addf %24, %25 : f64
+          %27 = memref.load %alloc_26[%8] : memref<4096xf64>
+          %28 = memref.load %alloc_25[%8] : memref<4096xf64>
+          %29 = arith.addf %27, %23 : f64
+          %30 = arith.addf %28, %26 : f64
+          %31 = arith.subf %27, %23 : f64
+          %32 = arith.subf %28, %26 : f64
+          %33 = arith.mulf %29, %29 : f64
+          %34 = arith.mulf %30, %30 : f64
+          %35 = arith.addf %33, %34 : f64
+          %36 = math.sqrt %35 : f64
+          %37 = arith.mulf %31, %31 : f64
+          %38 = arith.mulf %32, %32 : f64
+          %39 = arith.addf %37, %38 : f64
+          %40 = math.sqrt %39 : f64
+          memref.store %29, %alloc_26[%8] : memref<4096xf64>
+          memref.store %30, %alloc_25[%8] : memref<4096xf64>
+          memref.store %31, %alloc_26[%9] : memref<4096xf64>
+          memref.store %32, %alloc_25[%9] : memref<4096xf64>
+          memref.store %36, %alloc_24[%8] : memref<4096xf64>
+          memref.store %40, %alloc_24[%9] : memref<4096xf64>
+        }
+      }
+    }
+    affine.store %cst_7, %alloc_23[] : memref<f64>
+    affine.store %cst_6, %alloc_22[] : memref<f64>
+    scf.for %arg0 = %c0 to %c4096 step %c1 {
+      %6 = arith.index_cast %arg0 : index to i64
+      %7 = arith.sitofp %6 : i64 to f64
+      %8 = arith.cmpi sle, %6, %c2047_i64 : i64
+      scf.if %8 {
+        %9 = arith.divf %7, %cst : f64
+        memref.store %9, %alloc_21[%arg0] : memref<4096xf64>
+      } else {
+        %9 = arith.subf %7, %cst_7 : f64
+        %10 = arith.divf %9, %cst : f64
+        memref.store %10, %alloc_21[%arg0] : memref<4096xf64>
+      }
+    }
+    %0:4 = scf.for %arg0 = %c0 to %c4096 step %c1 iter_args(%arg1 = %cst_10, %arg2 = %cst_10, %arg3 = %cst_10, %arg4 = %cst_10) -> (f64, f64, f64, f64) {
+      %6 = memref.load %alloc_21[%arg0] : memref<4096xf64>
+      %7 = memref.load %alloc_24[%arg0] : memref<4096xf64>
+      %8 = arith.cmpf ogt, %6, %cst_10 : f64
+      %9:4 = scf.if %8 -> (f64, f64, f64, f64) {
+        %10 = arith.cmpf ogt, %7, %arg1 : f64
+        %11 = arith.select %10, %7, %arg1 : f64
+        %12 = arith.select %10, %6, %arg3 : f64
+        %13:2 = scf.if %10 -> (f64, f64) {
+          scf.yield %arg1, %arg3 : f64, f64
+        } else {
+          %14 = arith.cmpf ogt, %7, %arg2 : f64
+          %15 = arith.select %14, %7, %arg2 : f64
+          %16 = arith.select %14, %6, %arg4 : f64
+          scf.yield %15, %16 : f64, f64
+        }
+        scf.yield %11, %13#0, %12, %13#1 : f64, f64, f64, f64
+      } else {
+        scf.yield %arg1, %arg2, %arg3, %arg4 : f64, f64, f64, f64
+      }
+      scf.yield %9#0, %9#1, %9#2, %9#3 : f64, f64, f64, f64
+    }
+    memref.store %0#2, %alloc_20[%c0] : memref<2xf64>
+    memref.store %0#3, %alloc_20[%c1] : memref<2xf64>
+    affine.store %cst_5, %alloc_19[0, 0] : memref<10x2xf64>
+    affine.store %cst_4, %alloc_19[0, 1] : memref<10x2xf64>
+    affine.store %cst_3, %alloc_19[1, 0] : memref<10x2xf64>
+    affine.store %cst_2, %alloc_19[1, 1] : memref<10x2xf64>
+    affine.store %cst_3, %alloc_19[2, 0] : memref<10x2xf64>
+    affine.store %cst_4, %alloc_19[2, 1] : memref<10x2xf64>
+    affine.store %cst_3, %alloc_19[3, 0] : memref<10x2xf64>
+    affine.store %cst_11, %alloc_19[3, 1] : memref<10x2xf64>
+    affine.store %cst_1, %alloc_19[4, 0] : memref<10x2xf64>
+    affine.store %cst_2, %alloc_19[4, 1] : memref<10x2xf64>
+    affine.store %cst_1, %alloc_19[5, 0] : memref<10x2xf64>
+    affine.store %cst_4, %alloc_19[5, 1] : memref<10x2xf64>
+    affine.store %cst_1, %alloc_19[6, 0] : memref<10x2xf64>
+    affine.store %cst_11, %alloc_19[6, 1] : memref<10x2xf64>
+    affine.store %cst_12, %alloc_19[7, 0] : memref<10x2xf64>
+    affine.store %cst_2, %alloc_19[7, 1] : memref<10x2xf64>
+    affine.store %cst_12, %alloc_19[8, 0] : memref<10x2xf64>
+    affine.store %cst_4, %alloc_19[8, 1] : memref<10x2xf64>
+    affine.store %cst_12, %alloc_19[9, 0] : memref<10x2xf64>
+    affine.store %cst_11, %alloc_19[9, 1] : memref<10x2xf64>
+    %1 = memref.load %alloc_20[%c0] : memref<2xf64>
+    %2 = memref.load %alloc_20[%c1] : memref<2xf64>
+    affine.store %c-1, %alloc[] : memref<index>
+    scf.for %arg0 = %c0 to %c10 step %c1 {
+      %6 = memref.load %alloc[] : memref<index>
+      %7 = memref.load %alloc_19[%arg0, %c0] : memref<10x2xf64>
+      %8 = memref.load %alloc_19[%arg0, %c1] : memref<10x2xf64>
+      %9 = arith.subf %7, %1 : f64
+      %10 = arith.subf %8, %2 : f64
+      %11 = math.absf %9 : f64
+      %12 = math.absf %10 : f64
+      %13 = arith.cmpf ole, %11, %cst_0 : f64
+      %14 = arith.cmpf ole, %12, %cst_0 : f64
+      %15 = arith.andi %13, %14 : i1
+      %16 = arith.select %15, %arg0, %6 : index
+      memref.store %16, %alloc[] : memref<index>
+    }
+    %3 = memref.load %alloc[] : memref<index>
+    %4 = arith.index_cast %3 : index to i64
+    %5 = arith.sitofp %4 : i64 to f64
+    memref.store %5, %alloc_18[%c0] : memref<1xf64>
+root@f68572e75858:/home/DSP_MLIR# /home/DSP_MLIR/build/bin/dsp1 /home/DSP_MLIR/mlir/test/Examples/DspExample/full_dtmf.py -emit=mlir-affine -affineOpt -canonOpt -opt
+module {
+  func.func @main() {
+    %c2047_i64 = arith.constant 2047 : i64
+    %cst = arith.constant 0.49971199035644531 : f64
+    %c11 = arith.constant 11 : index
+    %c10 = arith.constant 10 : index
+    %cst_0 = arith.constant 3.000000e+00 : f64
+    %c-1 = arith.constant -1 : index
+    %cst_1 = arith.constant 7.700000e+02 : f64
+    %cst_2 = arith.constant 1.209000e+03 : f64
+    %cst_3 = arith.constant 6.970000e+02 : f64
+    %cst_4 = arith.constant 1.336000e+03 : f64
+    %cst_5 = arith.constant 9.410000e+02 : f64
+    %cst_6 = arith.constant 1.220000e-04 : f64
+    %cst_7 = arith.constant 4.096000e+03 : f64
+    %cst_8 = arith.constant -2.000000e+00 : f64
+    %cst_9 = arith.constant 3.1415926535897931 : f64
+    %c12 = arith.constant 12 : index
+    %cst_10 = arith.constant 0.000000e+00 : f64
+    %c1_i64 = arith.constant 1 : i64
+    %c0_i64 = arith.constant 0 : i64
+    %c4096 = arith.constant 4096 : index
+    %c1 = arith.constant 1 : index
+    %cst_11 = arith.constant 1.477000e+03 : f64
+    %cst_12 = arith.constant 8.520000e+02 : f64
+    %cst_13 = arith.constant 1.000000e+01 : f64
+    %cst_14 = arith.constant 6.2831853071800001 : f64
+    %cst_15 = arith.constant 8.192000e+03 : f64
+    %cst_16 = arith.constant 5.000000e-01 : f64
+    %cst_17 = arith.constant 9.000000e+00 : f64
+    %c0 = arith.constant 0 : index
+    %alloc = memref.alloc() : memref<index>
+    %alloc_18 = memref.alloc() : memref<1xf64>
+    %alloc_19 = memref.alloc() : memref<10x2xf64>
+    %alloc_20 = memref.alloc() : memref<2xf64>
+    %alloc_21 = memref.alloc() : memref<4096xf64>
+    %alloc_22 = memref.alloc() : memref<f64>
+    %alloc_23 = memref.alloc() : memref<f64>
+    %alloc_24 = memref.alloc() : memref<4096xf64>
+    %alloc_25 = memref.alloc() : memref<4096xf64>
+    %alloc_26 = memref.alloc() : memref<4096xf64>
+    %alloc_27 = memref.alloc() : memref<4096xf64>
+    %alloc_28 = memref.alloc() : memref<f64>
+    %alloc_29 = memref.alloc() : memref<f64>
+    %alloc_30 = memref.alloc() : memref<f64>
+    affine.store %cst_17, %alloc_30[] : memref<f64>
+    affine.store %cst_16, %alloc_29[] : memref<f64>
+    affine.store %cst_15, %alloc_28[] : memref<f64>
+    scf.for %arg0 = %c0 to %c4096 step %c1 {
+      %6 = arith.index_cast %arg0 : index to i64
+      %7 = arith.sitofp %6 : i64 to f64
+      %8 = arith.divf %7, %cst_15 : f64
+      %9 = arith.mulf %8, %cst_12 : f64
+      %10 = arith.mulf %9, %cst_14 : f64
+      %11 = math.sin %10 : f64
+      %12 = arith.mulf %8, %cst_11 : f64
+      %13 = arith.mulf %12, %cst_14 : f64
+      %14 = math.sin %13 : f64
+      %15 = arith.addf %11, %14 : f64
+      %16 = arith.mulf %15, %cst_13 : f64
+      memref.store %16, %alloc_27[%arg0] : memref<4096xf64>
+    }
+    scf.for %arg0 = %c0 to %c4096 step %c1 {
+      %6 = arith.index_cast %arg0 : index to i64
+      %7 = scf.for %arg1 = %c0 to %c12 step %c1 iter_args(%arg2 = %c0_i64) -> (i64) {
+        %10 = arith.index_cast %arg1 : index to i64
+        %11 = arith.shli %c1_i64, %10 : i64
+        %12 = arith.andi %6, %11 : i64
+        %13 = arith.cmpi ne, %12, %c0_i64 : i64
+        %14 = arith.subi %c11, %arg1 : index
+        %15 = arith.index_cast %14 : index to i64
+        %16 = arith.shli %c1_i64, %15 : i64
+        %17 = arith.select %13, %16, %c0_i64 : i64
+        %18 = arith.ori %arg2, %17 : i64
+        scf.yield %18 : i64
+      }
+      %8 = arith.index_cast %7 : i64 to index
+      %9 = memref.load %alloc_27[%arg0] : memref<4096xf64>
+      memref.store %9, %alloc_26[%8] : memref<4096xf64>
+      memref.store %cst_10, %alloc_25[%8] : memref<4096xf64>
+    }
+    scf.for %arg0 = %c0 to %c12 step %c1 {
+      %6 = arith.shli %c1, %arg0 : index
+      %7 = arith.shli %6, %c1 : index
+      scf.for %arg1 = %c0 to %c4096 step %7 {
+        scf.for %arg2 = %c0 to %6 step %c1 {
+          %8 = arith.addi %arg1, %arg2 : index
+          %9 = arith.addi %8, %6 : index
+          %10 = arith.index_cast %arg2 : index to i64
+          %11 = arith.sitofp %10 : i64 to f64
+          %12 = arith.index_cast %7 : index to i64
+          %13 = arith.sitofp %12 : i64 to f64
+          %14 = arith.divf %11, %13 : f64
+          %15 = arith.mulf %14, %cst_8 : f64
+          %16 = arith.mulf %15, %cst_9 : f64
+          %17 = math.cos %16 : f64
+          %18 = math.sin %16 : f64
+          %19 = memref.load %alloc_26[%9] : memref<4096xf64>
+          %20 = memref.load %alloc_25[%9] : memref<4096xf64>
+          %21 = arith.mulf %19, %17 : f64
+          %22 = arith.mulf %20, %18 : f64
+          %23 = arith.subf %21, %22 : f64
+          %24 = arith.mulf %19, %18 : f64
+          %25 = arith.mulf %20, %17 : f64
+          %26 = arith.addf %24, %25 : f64
+          %27 = memref.load %alloc_26[%8] : memref<4096xf64>
+          %28 = memref.load %alloc_25[%8] : memref<4096xf64>
+          %29 = arith.addf %27, %23 : f64
+          %30 = arith.addf %28, %26 : f64
+          %31 = arith.subf %27, %23 : f64
+          %32 = arith.subf %28, %26 : f64
+          %33 = arith.mulf %29, %29 : f64
+          %34 = arith.mulf %30, %30 : f64
+          %35 = arith.addf %33, %34 : f64
+          %36 = math.sqrt %35 : f64
+          %37 = arith.mulf %31, %31 : f64
+          %38 = arith.mulf %32, %32 : f64
+          %39 = arith.addf %37, %38 : f64
+          %40 = math.sqrt %39 : f64
+          memref.store %29, %alloc_26[%8] : memref<4096xf64>
+          memref.store %30, %alloc_25[%8] : memref<4096xf64>
+          memref.store %31, %alloc_26[%9] : memref<4096xf64>
+          memref.store %32, %alloc_25[%9] : memref<4096xf64>
+          memref.store %36, %alloc_24[%8] : memref<4096xf64>
+          memref.store %40, %alloc_24[%9] : memref<4096xf64>
+        }
+      }
+    }
+    affine.store %cst_7, %alloc_23[] : memref<f64>
+    affine.store %cst_6, %alloc_22[] : memref<f64>
+    scf.for %arg0 = %c0 to %c4096 step %c1 {
+      %6 = arith.index_cast %arg0 : index to i64
+      %7 = arith.sitofp %6 : i64 to f64
+      %8 = arith.cmpi sle, %6, %c2047_i64 : i64
+      scf.if %8 {
+        %9 = arith.divf %7, %cst : f64
+        memref.store %9, %alloc_21[%arg0] : memref<4096xf64>
+      } else {
+        %9 = arith.subf %7, %cst_7 : f64
+        %10 = arith.divf %9, %cst : f64
+        memref.store %10, %alloc_21[%arg0] : memref<4096xf64>
+      }
+    }
+    %0:4 = scf.for %arg0 = %c0 to %c4096 step %c1 iter_args(%arg1 = %cst_10, %arg2 = %cst_10, %arg3 = %cst_10, %arg4 = %cst_10) -> (f64, f64, f64, f64) {
+      %6 = memref.load %alloc_21[%arg0] : memref<4096xf64>
+      %7 = memref.load %alloc_24[%arg0] : memref<4096xf64>
+      %8 = arith.cmpf ogt, %6, %cst_10 : f64
+      %9:4 = scf.if %8 -> (f64, f64, f64, f64) {
+        %10 = arith.cmpf ogt, %7, %arg1 : f64
+        %11 = arith.select %10, %7, %arg1 : f64
+        %12 = arith.select %10, %6, %arg3 : f64
+        %13:2 = scf.if %10 -> (f64, f64) {
+          scf.yield %arg1, %arg3 : f64, f64
+        } else {
+          %14 = arith.cmpf ogt, %7, %arg2 : f64
+          %15 = arith.select %14, %7, %arg2 : f64
+          %16 = arith.select %14, %6, %arg4 : f64
+          scf.yield %15, %16 : f64, f64
+        }
+        scf.yield %11, %13#0, %12, %13#1 : f64, f64, f64, f64
+      } else {
+        scf.yield %arg1, %arg2, %arg3, %arg4 : f64, f64, f64, f64
+      }
+      scf.yield %9#0, %9#1, %9#2, %9#3 : f64, f64, f64, f64
+    }
+    memref.store %0#2, %alloc_20[%c0] : memref<2xf64>
+    memref.store %0#3, %alloc_20[%c1] : memref<2xf64>
+    affine.store %cst_5, %alloc_19[0, 0] : memref<10x2xf64>
+    affine.store %cst_4, %alloc_19[0, 1] : memref<10x2xf64>
+    affine.store %cst_3, %alloc_19[1, 0] : memref<10x2xf64>
+    affine.store %cst_2, %alloc_19[1, 1] : memref<10x2xf64>
+    affine.store %cst_3, %alloc_19[2, 0] : memref<10x2xf64>
+    affine.store %cst_4, %alloc_19[2, 1] : memref<10x2xf64>
+    affine.store %cst_3, %alloc_19[3, 0] : memref<10x2xf64>
+    affine.store %cst_11, %alloc_19[3, 1] : memref<10x2xf64>
+    affine.store %cst_1, %alloc_19[4, 0] : memref<10x2xf64>
+    affine.store %cst_2, %alloc_19[4, 1] : memref<10x2xf64>
+    affine.store %cst_1, %alloc_19[5, 0] : memref<10x2xf64>
+    affine.store %cst_4, %alloc_19[5, 1] : memref<10x2xf64>
+    affine.store %cst_1, %alloc_19[6, 0] : memref<10x2xf64>
+    affine.store %cst_11, %alloc_19[6, 1] : memref<10x2xf64>
+    affine.store %cst_12, %alloc_19[7, 0] : memref<10x2xf64>
+    affine.store %cst_2, %alloc_19[7, 1] : memref<10x2xf64>
+    affine.store %cst_12, %alloc_19[8, 0] : memref<10x2xf64>
+    affine.store %cst_4, %alloc_19[8, 1] : memref<10x2xf64>
+    affine.store %cst_12, %alloc_19[9, 0] : memref<10x2xf64>
+    affine.store %cst_11, %alloc_19[9, 1] : memref<10x2xf64>
+    %1 = memref.load %alloc_20[%c0] : memref<2xf64>
+    %2 = memref.load %alloc_20[%c1] : memref<2xf64>
+    affine.store %c-1, %alloc[] : memref<index>
+    scf.for %arg0 = %c0 to %c10 step %c1 {
+      %6 = memref.load %alloc[] : memref<index>
+      %7 = memref.load %alloc_19[%arg0, %c0] : memref<10x2xf64>
+      %8 = memref.load %alloc_19[%arg0, %c1] : memref<10x2xf64>
+      %9 = arith.subf %7, %1 : f64
+      %10 = arith.subf %8, %2 : f64
+      %11 = math.absf %9 : f64
+      %12 = math.absf %10 : f64
+      %13 = arith.cmpf ole, %11, %cst_0 : f64
+      %14 = arith.cmpf ole, %12, %cst_0 : f64
+      %15 = arith.andi %13, %14 : i1
+      %16 = arith.select %15, %arg0, %6 : index
+      memref.store %16, %alloc[] : memref<index>
+    }
+    %3 = memref.load %alloc[] : memref<index>
+    %4 = arith.index_cast %3 : index to i64
+    %5 = arith.sitofp %4 : i64 to f64
+    memref.store %5, %alloc_18[%c0] : memref<1xf64>
+root@f68572e75858:/home/DSP_MLIR# /home/DSP_MLIR/build/bin/dsp1 /home/DSP_MLIR/mlir/test/Examples/DspExample/full_dtmf.py -emit=mlir-affine 
+module {
+  func.func @main() {
+    %alloc = memref.alloc() : memref<index>
+    %alloc_0 = memref.alloc() : memref<1xf64>
+    %alloc_1 = memref.alloc() : memref<10x2xf64>
+    %alloc_2 = memref.alloc() : memref<2xf64>
+    %alloc_3 = memref.alloc() : memref<4096xf64>
+    %alloc_4 = memref.alloc() : memref<f64>
+    %alloc_5 = memref.alloc() : memref<f64>
+    %alloc_6 = memref.alloc() : memref<4096xf64>
+    %alloc_7 = memref.alloc() : memref<4096xf64>
+    %alloc_8 = memref.alloc() : memref<4096xf64>
+    %alloc_9 = memref.alloc() : memref<4096xf64>
+    %alloc_10 = memref.alloc() : memref<4096xf64>
+    %alloc_11 = memref.alloc() : memref<4096xf64>
+    %alloc_12 = memref.alloc() : memref<4096xf64>
+    %alloc_13 = memref.alloc() : memref<4096xf64>
+    %alloc_14 = memref.alloc() : memref<4096xf64>
+    %alloc_15 = memref.alloc() : memref<4096xf64>
+    %alloc_16 = memref.alloc() : memref<4096xf64>
+    %alloc_17 = memref.alloc() : memref<4096xf64>
+    %alloc_18 = memref.alloc() : memref<4096xf64>
+    %alloc_19 = memref.alloc() : memref<f64>
+    %alloc_20 = memref.alloc() : memref<f64>
+    %alloc_21 = memref.alloc() : memref<f64>
+    %c0 = arith.constant 0 : index
+    %cst = arith.constant 9.000000e+00 : f64
+    affine.store %cst, %alloc_21[] : memref<f64>
+    %c0_22 = arith.constant 0 : index
+    %cst_23 = arith.constant 5.000000e-01 : f64
+    affine.store %cst_23, %alloc_20[] : memref<f64>
+    %c0_24 = arith.constant 0 : index
+    %cst_25 = arith.constant 8.192000e+03 : f64
+    affine.store %cst_25, %alloc_19[] : memref<f64>
+    %cst_26 = arith.constant 6.2831853071800001 : f64
+    %cst_27 = arith.constant 1.000000e+01 : f64
+    %cst_28 = arith.constant 8.192000e+03 : f64
+    %cst_29 = arith.constant 8.520000e+02 : f64
+    %cst_30 = arith.constant 1.477000e+03 : f64
+    %c1 = arith.constant 1 : index
+    %c4096 = arith.constant 4096 : index
+    %c0_31 = arith.constant 0 : index
+    scf.for %arg0 = %c0_31 to %c4096 step %c1 {
+      %19 = arith.index_cast %arg0 : index to i64
+      %20 = arith.sitofp %19 : i64 to f64
+      %21 = arith.divf %20, %cst_28 : f64
+      %22 = arith.mulf %cst_29, %21 : f64
+      %23 = arith.mulf %cst_26, %22 : f64
+      %24 = math.sin %23 : f64
+      %25 = arith.mulf %cst_30, %21 : f64
+      %26 = arith.mulf %cst_26, %25 : f64
+      %27 = math.sin %26 : f64
+      %28 = arith.addf %24, %27 : f64
+      %29 = arith.mulf %cst_27, %28 : f64
+      memref.store %29, %alloc_18[%arg0] : memref<4096xf64>
+    }
+    %c0_32 = arith.constant 0 : index
+    %c4096_33 = arith.constant 4096 : index
+    %c1_34 = arith.constant 1 : index
+    %0 = arith.index_cast %c4096_33 : index to i64
+    %1 = arith.sitofp %0 : i64 to f64
+    %2 = math.log2 %1 : f64
+    %3 = arith.fptosi %2 : f64 to i64
+    %4 = arith.index_cast %3 : i64 to index
+    scf.for %arg0 = %c0_32 to %c4096_33 step %c1_34 {
+      %19 = arith.index_cast %arg0 : index to i64
+      %c0_i64 = arith.constant 0 : i64
+      %20 = scf.for %arg1 = %c0_32 to %4 step %c1_34 iter_args(%arg2 = %c0_i64) -> (i64) {
+        %23 = arith.index_cast %arg1 : index to i64
+        %c1_i64 = arith.constant 1 : i64
+        %24 = arith.shli %c1_i64, %23 : i64
+        %25 = arith.andi %19, %24 : i64
+        %c0_i64_92 = arith.constant 0 : i64
+        %26 = arith.cmpi ne, %25, %c0_i64_92 : i64
+        %c1_93 = arith.constant 1 : index
+        %27 = arith.subi %4, %arg1 : index
+        %28 = arith.subi %27, %c1_93 : index
+        %29 = arith.index_cast %28 : index to i64
+        %c1_i64_94 = arith.constant 1 : i64
+        %30 = arith.shli %c1_i64_94, %29 : i64
+        %c0_i64_95 = arith.constant 0 : i64
+        %31 = arith.select %26, %30, %c0_i64_95 : i64
+        %32 = arith.ori %arg2, %31 : i64
+        scf.yield %32 : i64
+      }
+      %21 = arith.index_cast %20 : i64 to index
+      %22 = memref.load %alloc_18[%arg0] : memref<4096xf64>
+      %cst_91 = arith.constant 0.000000e+00 : f64
+      memref.store %22, %alloc_15[%21] : memref<4096xf64>
+      memref.store %cst_91, %alloc_14[%21] : memref<4096xf64>
+    }
+    %c12 = arith.constant 12 : index
+    %cst_35 = arith.constant 3.1415926535897931 : f64
+    %cst_36 = arith.constant -2.000000e+00 : f64
+    scf.for %arg0 = %c0_32 to %c12 step %c1_34 {
+      %c1_91 = arith.constant 1 : index
+      %19 = arith.shli %c1_91, %arg0 : index
+      %c1_92 = arith.constant 1 : index
+      %20 = arith.shli %19, %c1_92 : index
+      scf.for %arg1 = %c0_32 to %c4096_33 step %20 {
+        scf.for %arg2 = %c0_32 to %19 step %c1_34 {
+          %21 = arith.addi %arg1, %arg2 : index
+          %22 = arith.addi %21, %19 : index
+          %23 = arith.index_cast %arg2 : index to i64
+          %24 = arith.sitofp %23 : i64 to f64
+          %25 = arith.index_cast %20 : index to i64
+          %26 = arith.sitofp %25 : i64 to f64
+          %27 = arith.divf %24, %26 : f64
+          %28 = arith.mulf %cst_36, %27 : f64
+          %29 = arith.mulf %cst_35, %28 : f64
+          %30 = math.cos %29 : f64
+          %31 = math.sin %29 : f64
+          %32 = memref.load %alloc_15[%22] : memref<4096xf64>
+          %33 = memref.load %alloc_14[%22] : memref<4096xf64>
+          %34 = arith.mulf %32, %30 : f64
+          %35 = arith.mulf %33, %31 : f64
+          %36 = arith.subf %34, %35 : f64
+          %37 = arith.mulf %32, %31 : f64
+          %38 = arith.mulf %33, %30 : f64
+          %39 = arith.addf %37, %38 : f64
+          %40 = memref.load %alloc_15[%21] : memref<4096xf64>
+          %41 = memref.load %alloc_14[%21] : memref<4096xf64>
+          %42 = arith.addf %40, %36 : f64
+          %43 = arith.addf %41, %39 : f64
+          %44 = arith.subf %40, %36 : f64
+          %45 = arith.subf %41, %39 : f64
+          memref.store %42, %alloc_15[%21] : memref<4096xf64>
+          memref.store %43, %alloc_14[%21] : memref<4096xf64>
+          memref.store %44, %alloc_15[%22] : memref<4096xf64>
+          memref.store %45, %alloc_14[%22] : memref<4096xf64>
+        }
+      }
+    }
+    %c0_37 = arith.constant 0 : index
+    %c4096_38 = arith.constant 4096 : index
+    %c1_39 = arith.constant 1 : index
+    %5 = arith.index_cast %c4096_38 : index to i64
+    %6 = arith.sitofp %5 : i64 to f64
+    %7 = math.log2 %6 : f64
+    %8 = arith.fptosi %7 : f64 to i64
+    %9 = arith.index_cast %8 : i64 to index
+    scf.for %arg0 = %c0_37 to %c4096_38 step %c1_39 {
+      %19 = arith.index_cast %arg0 : index to i64
+      %c0_i64 = arith.constant 0 : i64
+      %20 = scf.for %arg1 = %c0_37 to %9 step %c1_39 iter_args(%arg2 = %c0_i64) -> (i64) {
+        %23 = arith.index_cast %arg1 : index to i64
+        %c1_i64 = arith.constant 1 : i64
+        %24 = arith.shli %c1_i64, %23 : i64
+        %25 = arith.andi %19, %24 : i64
+        %c0_i64_92 = arith.constant 0 : i64
+        %26 = arith.cmpi ne, %25, %c0_i64_92 : i64
+        %c1_93 = arith.constant 1 : index
+        %27 = arith.subi %9, %arg1 : index
+        %28 = arith.subi %27, %c1_93 : index
+        %29 = arith.index_cast %28 : index to i64
+        %c1_i64_94 = arith.constant 1 : i64
+        %30 = arith.shli %c1_i64_94, %29 : i64
+        %c0_i64_95 = arith.constant 0 : i64
+        %31 = arith.select %26, %30, %c0_i64_95 : i64
+        %32 = arith.ori %arg2, %31 : i64
+        scf.yield %32 : i64
+      }
+      %21 = arith.index_cast %20 : i64 to index
+      %22 = memref.load %alloc_18[%arg0] : memref<4096xf64>
+      %cst_91 = arith.constant 0.000000e+00 : f64
+      memref.store %22, %alloc_11[%21] : memref<4096xf64>
+      memref.store %cst_91, %alloc_10[%21] : memref<4096xf64>
+    }
+    %c12_40 = arith.constant 12 : index
+    %cst_41 = arith.constant 3.1415926535897931 : f64
+    %cst_42 = arith.constant -2.000000e+00 : f64
+    scf.for %arg0 = %c0_37 to %c12_40 step %c1_39 {
+      %c1_91 = arith.constant 1 : index
+      %19 = arith.shli %c1_91, %arg0 : index
+      %c1_92 = arith.constant 1 : index
+      %20 = arith.shli %19, %c1_92 : index
+      scf.for %arg1 = %c0_37 to %c4096_38 step %20 {
+        scf.for %arg2 = %c0_37 to %19 step %c1_39 {
+          %21 = arith.addi %arg1, %arg2 : index
+          %22 = arith.addi %21, %19 : index
+          %23 = arith.index_cast %arg2 : index to i64
+          %24 = arith.sitofp %23 : i64 to f64
+          %25 = arith.index_cast %20 : index to i64
+          %26 = arith.sitofp %25 : i64 to f64
+          %27 = arith.divf %24, %26 : f64
+          %28 = arith.mulf %cst_42, %27 : f64
+          %29 = arith.mulf %cst_41, %28 : f64
+          %30 = math.cos %29 : f64
+          %31 = math.sin %29 : f64
+          %32 = memref.load %alloc_11[%22] : memref<4096xf64>
+          %33 = memref.load %alloc_10[%22] : memref<4096xf64>
+          %34 = arith.mulf %32, %30 : f64
+          %35 = arith.mulf %33, %31 : f64
+          %36 = arith.subf %34, %35 : f64
+          %37 = arith.mulf %32, %31 : f64
+          %38 = arith.mulf %33, %30 : f64
+          %39 = arith.addf %37, %38 : f64
+          %40 = memref.load %alloc_11[%21] : memref<4096xf64>
+          %41 = memref.load %alloc_10[%21] : memref<4096xf64>
+          %42 = arith.addf %40, %36 : f64
+          %43 = arith.addf %41, %39 : f64
+          %44 = arith.subf %40, %36 : f64
+          %45 = arith.subf %41, %39 : f64
+          memref.store %42, %alloc_11[%21] : memref<4096xf64>
+          memref.store %43, %alloc_10[%21] : memref<4096xf64>
+          memref.store %44, %alloc_11[%22] : memref<4096xf64>
+          memref.store %45, %alloc_10[%22] : memref<4096xf64>
+        }
+      }
+    }
+    affine.for %arg0 = 0 to 4096 {
+      %19 = affine.load %alloc_15[%arg0] : memref<4096xf64>
+      %20 = arith.mulf %19, %19 : f64
+      affine.store %20, %alloc_9[%arg0] : memref<4096xf64>
+    }
+    affine.for %arg0 = 0 to 4096 {
+      %19 = affine.load %alloc_10[%arg0] : memref<4096xf64>
+      %20 = arith.mulf %19, %19 : f64
+      affine.store %20, %alloc_8[%arg0] : memref<4096xf64>
+    }
+    affine.for %arg0 = 0 to 4096 {
+      %19 = affine.load %alloc_9[%arg0] : memref<4096xf64>
+      %20 = affine.load %alloc_8[%arg0] : memref<4096xf64>
+      %21 = arith.addf %19, %20 : f64
+      affine.store %21, %alloc_7[%arg0] : memref<4096xf64>
+    }
+    affine.for %arg0 = 0 to 4096 {
+      %19 = affine.load %alloc_7[%arg0] : memref<4096xf64>
+      %20 = math.sqrt %19 : f64
+      affine.store %20, %alloc_6[%arg0] : memref<4096xf64>
+    }
+    %c0_43 = arith.constant 0 : index
+    %cst_44 = arith.constant 4.096000e+03 : f64
+    affine.store %cst_44, %alloc_5[] : memref<f64>
+    %c0_45 = arith.constant 0 : index
+    %cst_46 = arith.constant 1.220000e-04 : f64
+    affine.store %cst_46, %alloc_4[] : memref<f64>
+    %cst_47 = arith.constant 4.096000e+03 : f64
+    %cst_48 = arith.constant 1.2199999764561653E-4 : f64
+    %c0_49 = arith.constant 0 : index
+    %c4096_50 = arith.constant 4096 : index
+    %c1_51 = arith.constant 1 : index
+    %10 = arith.mulf %cst_47, %cst_48 : f64
+    %cst_52 = arith.constant 5.000000e-01 : f64
+    %cst_53 = arith.constant 1.000000e+00 : f64
+    %11 = arith.subf %cst_47, %cst_53 : f64
+    %12 = arith.mulf %11, %cst_52 : f64
+    scf.for %arg0 = %c0_49 to %c4096_50 step %c1_51 {
+      %19 = arith.index_cast %arg0 : index to i64
+      %20 = arith.sitofp %19 : i64 to f64
+      %21 = arith.cmpf ole, %20, %12 : f64
+      %22 = scf.if %21 -> (f64) {
+        %23 = arith.divf %20, %10 : f64
+        memref.store %23, %alloc_3[%arg0] : memref<4096xf64>
+        scf.yield %23 : f64
+      } else {
+        %23 = arith.subf %20, %cst_47 : f64
+        %24 = arith.divf %23, %10 : f64
+        memref.store %24, %alloc_3[%arg0] : memref<4096xf64>
+        scf.yield %24 : f64
+      }
+    }
+    %cst_54 = arith.constant 0.000000e+00 : f64
+    %cst_55 = arith.constant 0.000000e+00 : f64
+    %cst_56 = arith.constant 0.000000e+00 : f64
+    %cst_57 = arith.constant 0.000000e+00 : f64
+    %c0_58 = arith.constant 0 : index
+    %c4096_59 = arith.constant 4096 : index
+    %c1_60 = arith.constant 1 : index
+    %13:4 = scf.for %arg0 = %c0_58 to %c4096_59 step %c1_60 iter_args(%arg1 = %cst_54, %arg2 = %cst_55, %arg3 = %cst_56, %arg4 = %cst_57) -> (f64, f64, f64, f64) {
+      %19 = memref.load %alloc_3[%arg0] : memref<4096xf64>
+      %20 = memref.load %alloc_6[%arg0] : memref<4096xf64>
+      %cst_91 = arith.constant 0.000000e+00 : f64
+      %21 = arith.cmpf ogt, %19, %cst_91 : f64
+      %22:4 = scf.if %21 -> (f64, f64, f64, f64) {
+        %23 = arith.cmpf ogt, %20, %arg1 : f64
+        %24:4 = scf.if %23 -> (f64, f64, f64, f64) {
+          scf.yield %20, %arg1, %19, %arg3 : f64, f64, f64, f64
+        } else {
+          %25 = arith.cmpf ogt, %20, %arg2 : f64
+          %26:4 = scf.if %25 -> (f64, f64, f64, f64) {
+            scf.yield %arg1, %20, %arg3, %19 : f64, f64, f64, f64
+          } else {
+            scf.yield %arg1, %arg2, %arg3, %arg4 : f64, f64, f64, f64
+          }
+          scf.yield %26#0, %26#1, %26#2, %26#3 : f64, f64, f64, f64
+        }
+        scf.yield %24#0, %24#1, %24#2, %24#3 : f64, f64, f64, f64
+      } else {
+        scf.yield %arg1, %arg2, %arg3, %arg4 : f64, f64, f64, f64
+      }
+      scf.yield %22#0, %22#1, %22#2, %22#3 : f64, f64, f64, f64
+    }
+    %c0_61 = arith.constant 0 : index
+    memref.store %13#2, %alloc_2[%c0_61] : memref<2xf64>
+    %c1_62 = arith.constant 1 : index
+    memref.store %13#3, %alloc_2[%c1_62] : memref<2xf64>
+    %c0_63 = arith.constant 0 : index
+    %c1_64 = arith.constant 1 : index
+    %c2 = arith.constant 2 : index
+    %c3 = arith.constant 3 : index
+    %c4 = arith.constant 4 : index
+    %c5 = arith.constant 5 : index
+    %c6 = arith.constant 6 : index
+    %c7 = arith.constant 7 : index
+    %c8 = arith.constant 8 : index
+    %c9 = arith.constant 9 : index
+    %cst_65 = arith.constant 9.410000e+02 : f64
+    affine.store %cst_65, %alloc_1[%c0_63, %c0_63] : memref<10x2xf64>
+    %cst_66 = arith.constant 1.336000e+03 : f64
+    affine.store %cst_66, %alloc_1[%c0_63, %c1_64] : memref<10x2xf64>
+    %cst_67 = arith.constant 6.970000e+02 : f64
+    affine.store %cst_67, %alloc_1[%c1_64, %c0_63] : memref<10x2xf64>
+    %cst_68 = arith.constant 1.209000e+03 : f64
+    affine.store %cst_68, %alloc_1[%c1_64, %c1_64] : memref<10x2xf64>
+    %cst_69 = arith.constant 6.970000e+02 : f64
+    affine.store %cst_69, %alloc_1[%c2, %c0_63] : memref<10x2xf64>
+    %cst_70 = arith.constant 1.336000e+03 : f64
+    affine.store %cst_70, %alloc_1[%c2, %c1_64] : memref<10x2xf64>
+    %cst_71 = arith.constant 6.970000e+02 : f64
+    affine.store %cst_71, %alloc_1[%c3, %c0_63] : memref<10x2xf64>
+    %cst_72 = arith.constant 1.477000e+03 : f64
+    affine.store %cst_72, %alloc_1[%c3, %c1_64] : memref<10x2xf64>
+    %cst_73 = arith.constant 7.700000e+02 : f64
+    affine.store %cst_73, %alloc_1[%c4, %c0_63] : memref<10x2xf64>
+    %cst_74 = arith.constant 1.209000e+03 : f64
+    affine.store %cst_74, %alloc_1[%c4, %c1_64] : memref<10x2xf64>
+    %cst_75 = arith.constant 7.700000e+02 : f64
+    affine.store %cst_75, %alloc_1[%c5, %c0_63] : memref<10x2xf64>
+    %cst_76 = arith.constant 1.336000e+03 : f64
+    affine.store %cst_76, %alloc_1[%c5, %c1_64] : memref<10x2xf64>
+    %cst_77 = arith.constant 7.700000e+02 : f64
+    affine.store %cst_77, %alloc_1[%c6, %c0_63] : memref<10x2xf64>
+    %cst_78 = arith.constant 1.477000e+03 : f64
+    affine.store %cst_78, %alloc_1[%c6, %c1_64] : memref<10x2xf64>
+    %cst_79 = arith.constant 8.520000e+02 : f64
+    affine.store %cst_79, %alloc_1[%c7, %c0_63] : memref<10x2xf64>
+    %cst_80 = arith.constant 1.209000e+03 : f64
+    affine.store %cst_80, %alloc_1[%c7, %c1_64] : memref<10x2xf64>
+    %cst_81 = arith.constant 8.520000e+02 : f64
+    affine.store %cst_81, %alloc_1[%c8, %c0_63] : memref<10x2xf64>
+    %cst_82 = arith.constant 1.336000e+03 : f64
+    affine.store %cst_82, %alloc_1[%c8, %c1_64] : memref<10x2xf64>
+    %cst_83 = arith.constant 8.520000e+02 : f64
+    affine.store %cst_83, %alloc_1[%c9, %c0_63] : memref<10x2xf64>
+    %cst_84 = arith.constant 1.477000e+03 : f64
+    affine.store %cst_84, %alloc_1[%c9, %c1_64] : memref<10x2xf64>
+    %c0_85 = arith.constant 0 : index
+    %c1_86 = arith.constant 1 : index
+    %14 = memref.load %alloc_2[%c0_85] : memref<2xf64>
+    %15 = memref.load %alloc_2[%c1_86] : memref<2xf64>
+    %c-1 = arith.constant -1 : index
+    affine.store %c-1, %alloc[] : memref<index>
+    %cst_87 = arith.constant 3.000000e+00 : f64
+    %c0_88 = arith.constant 0 : index
+    %c10 = arith.constant 10 : index
+    %c1_89 = arith.constant 1 : index
+    scf.for %arg0 = %c0_88 to %c10 step %c1_89 {
+      %19 = memref.load %alloc[] : memref<index>
+      %20 = memref.load %alloc_1[%arg0, %c0_85] : memref<10x2xf64>
+      %21 = memref.load %alloc_1[%arg0, %c1_86] : memref<10x2xf64>
+      %22 = arith.subf %20, %14 : f64
+      %23 = arith.subf %21, %15 : f64
+      %24 = math.absf %22 : f64
+      %25 = math.absf %23 : f64
+      %26 = arith.cmpf ole, %24, %cst_87 : f64
+      %27 = arith.cmpf ole, %25, %cst_87 : f64
+      %28 = arith.andi %26, %27 : i1
+      %29 = arith.select %28, %arg0, %19 : index
+      memref.store %29, %alloc[] : memref<index>
+    }
+    %16 = memref.load %alloc[] : memref<index>
+    %17 = arith.index_cast %16 : index to i64
+    %18 = arith.sitofp %17 : i64 to f64
+    %c0_90 = arith.constant 0 : index
+    memref.store %18, %alloc_0[%c0_90] : memref<1xf64>
+    dsp.print %alloc_0 : memref<1xf64>
+    memref.dealloc %alloc_21 : memref<f64>
+    memref.dealloc %alloc_20 : memref<f64>
+    memref.dealloc %alloc_19 : memref<f64>
+    memref.dealloc %alloc_18 : memref<4096xf64>
+    memref.dealloc %alloc_17 : memref<4096xf64>
+    memref.dealloc %alloc_16 : memref<4096xf64>
+    memref.dealloc %alloc_15 : memref<4096xf64>
+    memref.dealloc %alloc_14 : memref<4096xf64>
+    memref.dealloc %alloc_13 : memref<4096xf64>
+    memref.dealloc %alloc_12 : memref<4096xf64>
+    memref.dealloc %alloc_11 : memref<4096xf64>
+    memref.dealloc %alloc_10 : memref<4096xf64>
+    memref.dealloc %alloc_9 : memref<4096xf64>
+    memref.dealloc %alloc_8 : memref<4096xf64>
+    memref.dealloc %alloc_7 : memref<4096xf64>
+    memref.dealloc %alloc_6 : memref<4096xf64>
+    memref.dealloc %alloc_5 : memref<f64>
+    memref.dealloc %alloc_4 : memref<f64>
+    memref.dealloc %alloc_3 : memref<4096xf64>
+    memref.dealloc %alloc_2 : memref<2xf64>
+    memref.dealloc %alloc_1 : memref<10x2xf64>
+    memref.dealloc %alloc_0 : memref<1xf64>
+    memref.dealloc %alloc : memref<index>
+    return
+  }
+}
\ No newline at end of file
diff --git a/mlir/examples/dsp/SimpleBlocks/include/toy/opt.mlir b/mlir/examples/dsp/SimpleBlocks/include/toy/opt.mlir
new file mode 100644
index 000000000000..9d3f9b34350a
--- /dev/null
+++ b/mlir/examples/dsp/SimpleBlocks/include/toy/opt.mlir
@@ -0,0 +1,435 @@
+module {
+  func.func @main() {
+    %c2047_i64 = arith.constant 2047 : i64
+    %cst = arith.constant 0.49971199035644531 : f64
+    %c11 = arith.constant 11 : index
+    %c10 = arith.constant 10 : index
+    %cst_0 = arith.constant 3.000000e+00 : f64
+    %c-1 = arith.constant -1 : index
+    %cst_1 = arith.constant 7.700000e+02 : f64
+    %cst_2 = arith.constant 1.209000e+03 : f64
+    %cst_3 = arith.constant 6.970000e+02 : f64
+    %cst_4 = arith.constant 1.336000e+03 : f64
+    %cst_5 = arith.constant 9.410000e+02 : f64
+    %cst_6 = arith.constant 1.220000e-04 : f64
+    %cst_7 = arith.constant 4.096000e+03 : f64
+    %cst_8 = arith.constant -2.000000e+00 : f64
+    %cst_9 = arith.constant 3.1415926535897931 : f64
+    %c12 = arith.constant 12 : index
+    %cst_10 = arith.constant 0.000000e+00 : f64
+    %c1_i64 = arith.constant 1 : i64
+    %c0_i64 = arith.constant 0 : i64
+    %c4096 = arith.constant 4096 : index
+    %c1 = arith.constant 1 : index
+    %cst_11 = arith.constant 1.477000e+03 : f64
+    %cst_12 = arith.constant 8.520000e+02 : f64
+    %cst_13 = arith.constant 1.000000e+01 : f64
+    %cst_14 = arith.constant 6.2831853071800001 : f64
+    %cst_15 = arith.constant 8.192000e+03 : f64
+    %cst_16 = arith.constant 5.000000e-01 : f64
+    %cst_17 = arith.constant 9.000000e+00 : f64
+    %c0 = arith.constant 0 : index
+    %alloc = memref.alloc() : memref<index>
+    %alloc_18 = memref.alloc() : memref<1xf64>
+    %alloc_19 = memref.alloc() : memref<10x2xf64>
+    %alloc_20 = memref.alloc() : memref<2xf64>
+    %alloc_21 = memref.alloc() : memref<4096xf64>
+    %alloc_22 = memref.alloc() : memref<f64>
+    %alloc_23 = memref.alloc() : memref<f64>
+    %alloc_24 = memref.alloc() : memref<4096xf64>
+    %alloc_25 = memref.alloc() : memref<4096xf64>
+    %alloc_26 = memref.alloc() : memref<4096xf64>
+    %alloc_27 = memref.alloc() : memref<4096xf64>
+    %alloc_28 = memref.alloc() : memref<f64>
+    %alloc_29 = memref.alloc() : memref<f64>
+    %alloc_30 = memref.alloc() : memref<f64>
+    affine.store %cst_17, %alloc_30[] : memref<f64>
+    affine.store %cst_16, %alloc_29[] : memref<f64>
+    affine.store %cst_15, %alloc_28[] : memref<f64>
+    scf.for %arg0 = %c0 to %c4096 step %c1 {
+      %6 = arith.index_cast %arg0 : index to i64
+      %7 = arith.sitofp %6 : i64 to f64
+      %8 = arith.divf %7, %cst_15 : f64
+      %9 = arith.mulf %8, %cst_12 : f64
+      %10 = arith.mulf %9, %cst_14 : f64
+      %11 = math.sin %10 : f64
+      %12 = arith.mulf %8, %cst_11 : f64
+      %13 = arith.mulf %12, %cst_14 : f64
+      %14 = math.sin %13 : f64
+      %15 = arith.addf %11, %14 : f64
+      %16 = arith.mulf %15, %cst_13 : f64
+      memref.store %16, %alloc_27[%arg0] : memref<4096xf64>
+    }
+    scf.for %arg0 = %c0 to %c4096 step %c1 {
+      %6 = arith.index_cast %arg0 : index to i64
+      %7 = scf.for %arg1 = %c0 to %c12 step %c1 iter_args(%arg2 = %c0_i64) -> (i64) {
+        %10 = arith.index_cast %arg1 : index to i64
+        %11 = arith.shli %c1_i64, %10 : i64
+        %12 = arith.andi %6, %11 : i64
+        %13 = arith.cmpi ne, %12, %c0_i64 : i64
+        %14 = arith.subi %c11, %arg1 : index
+        %15 = arith.index_cast %14 : index to i64
+        %16 = arith.shli %c1_i64, %15 : i64
+        %17 = arith.select %13, %16, %c0_i64 : i64
+        %18 = arith.ori %arg2, %17 : i64
+        scf.yield %18 : i64
+      }
+      %8 = arith.index_cast %7 : i64 to index
+      %9 = memref.load %alloc_27[%arg0] : memref<4096xf64>
+      memref.store %9, %alloc_26[%8] : memref<4096xf64>
+      memref.store %cst_10, %alloc_25[%8] : memref<4096xf64>
+    }
+    scf.for %arg0 = %c0 to %c12 step %c1 {
+      %6 = arith.shli %c1, %arg0 : index
+      %7 = arith.shli %6, %c1 : index
+      scf.for %arg1 = %c0 to %c4096 step %7 {
+        scf.for %arg2 = %c0 to %6 step %c1 {
+          %8 = arith.addi %arg1, %arg2 : index
+          %9 = arith.addi %8, %6 : index
+          %10 = arith.index_cast %arg2 : index to i64
+          %11 = arith.sitofp %10 : i64 to f64
+          %12 = arith.index_cast %7 : index to i64
+          %13 = arith.sitofp %12 : i64 to f64
+          %14 = arith.divf %11, %13 : f64
+          %15 = arith.mulf %14, %cst_8 : f64
+          %16 = arith.mulf %15, %cst_9 : f64
+          %17 = math.cos %16 : f64
+          %18 = math.sin %16 : f64
+          %19 = memref.load %alloc_26[%9] : memref<4096xf64>
+          %20 = memref.load %alloc_25[%9] : memref<4096xf64>
+          %21 = arith.mulf %19, %17 : f64
+          %22 = arith.mulf %20, %18 : f64
+          %23 = arith.subf %21, %22 : f64
+          %24 = arith.mulf %19, %18 : f64
+          %25 = arith.mulf %20, %17 : f64
+          %26 = arith.addf %24, %25 : f64
+          %27 = memref.load %alloc_26[%8] : memref<4096xf64>
+          %28 = memref.load %alloc_25[%8] : memref<4096xf64>
+          %29 = arith.addf %27, %23 : f64
+          %30 = arith.addf %28, %26 : f64
+          %31 = arith.subf %27, %23 : f64
+          %32 = arith.subf %28, %26 : f64
+          %33 = arith.mulf %29, %29 : f64
+          %34 = arith.mulf %30, %30 : f64
+          %35 = arith.addf %33, %34 : f64
+          %36 = math.sqrt %35 : f64
+          %37 = arith.mulf %31, %31 : f64
+          %38 = arith.mulf %32, %32 : f64
+          %39 = arith.addf %37, %38 : f64
+          %40 = math.sqrt %39 : f64
+          memref.store %29, %alloc_26[%8] : memref<4096xf64>
+          memref.store %30, %alloc_25[%8] : memref<4096xf64>
+          memref.store %31, %alloc_26[%9] : memref<4096xf64>
+          memref.store %32, %alloc_25[%9] : memref<4096xf64>
+          memref.store %36, %alloc_24[%8] : memref<4096xf64>
+          memref.store %40, %alloc_24[%9] : memref<4096xf64>
+        }
+      }
+    }
+    affine.store %cst_7, %alloc_23[] : memref<f64>
+    affine.store %cst_6, %alloc_22[] : memref<f64>
+    scf.for %arg0 = %c0 to %c4096 step %c1 {
+      %6 = arith.index_cast %arg0 : index to i64
+      %7 = arith.sitofp %6 : i64 to f64
+      %8 = arith.cmpi sle, %6, %c2047_i64 : i64
+      scf.if %8 {
+        %9 = arith.divf %7, %cst : f64
+        memref.store %9, %alloc_21[%arg0] : memref<4096xf64>
+      } else {
+        %9 = arith.subf %7, %cst_7 : f64
+        %10 = arith.divf %9, %cst : f64
+        memref.store %10, %alloc_21[%arg0] : memref<4096xf64>
+      }
+    }
+    %0:4 = scf.for %arg0 = %c0 to %c4096 step %c1 iter_args(%arg1 = %cst_10, %arg2 = %cst_10, %arg3 = %cst_10, %arg4 = %cst_10) -> (f64, f64, f64, f64) {
+      %6 = memref.load %alloc_21[%arg0] : memref<4096xf64>
+      %7 = memref.load %alloc_24[%arg0] : memref<4096xf64>
+      %8 = arith.cmpf ogt, %6, %cst_10 : f64
+      %9:4 = scf.if %8 -> (f64, f64, f64, f64) {
+        %10 = arith.cmpf ogt, %7, %arg1 : f64
+        %11 = arith.select %10, %7, %arg1 : f64
+        %12 = arith.select %10, %6, %arg3 : f64
+        %13:2 = scf.if %10 -> (f64, f64) {
+          scf.yield %arg1, %arg3 : f64, f64
+        } else {
+          %14 = arith.cmpf ogt, %7, %arg2 : f64
+          %15 = arith.select %14, %7, %arg2 : f64
+          %16 = arith.select %14, %6, %arg4 : f64
+          scf.yield %15, %16 : f64, f64
+        }
+        scf.yield %11, %13#0, %12, %13#1 : f64, f64, f64, f64
+      } else {
+        scf.yield %arg1, %arg2, %arg3, %arg4 : f64, f64, f64, f64
+      }
+      scf.yield %9#0, %9#1, %9#2, %9#3 : f64, f64, f64, f64
+    }
+    memref.store %0#2, %alloc_20[%c0] : memref<2xf64>
+    memref.store %0#3, %alloc_20[%c1] : memref<2xf64>
+    affine.store %cst_5, %alloc_19[0, 0] : memref<10x2xf64>
+    affine.store %cst_4, %alloc_19[0, 1] : memref<10x2xf64>
+    affine.store %cst_3, %alloc_19[1, 0] : memref<10x2xf64>
+    affine.store %cst_2, %alloc_19[1, 1] : memref<10x2xf64>
+    affine.store %cst_3, %alloc_19[2, 0] : memref<10x2xf64>
+    affine.store %cst_4, %alloc_19[2, 1] : memref<10x2xf64>
+    affine.store %cst_3, %alloc_19[3, 0] : memref<10x2xf64>
+    affine.store %cst_11, %alloc_19[3, 1] : memref<10x2xf64>
+    affine.store %cst_1, %alloc_19[4, 0] : memref<10x2xf64>
+    affine.store %cst_2, %alloc_19[4, 1] : memref<10x2xf64>
+    affine.store %cst_1, %alloc_19[5, 0] : memref<10x2xf64>
+    affine.store %cst_4, %alloc_19[5, 1] : memref<10x2xf64>
+    affine.store %cst_1, %alloc_19[6, 0] : memref<10x2xf64>
+    affine.store %cst_11, %alloc_19[6, 1] : memref<10x2xf64>
+    affine.store %cst_12, %alloc_19[7, 0] : memref<10x2xf64>
+    affine.store %cst_2, %alloc_19[7, 1] : memref<10x2xf64>
+    affine.store %cst_12, %alloc_19[8, 0] : memref<10x2xf64>
+    affine.store %cst_4, %alloc_19[8, 1] : memref<10x2xf64>
+    affine.store %cst_12, %alloc_19[9, 0] : memref<10x2xf64>
+    affine.store %cst_11, %alloc_19[9, 1] : memref<10x2xf64>
+    %1 = memref.load %alloc_20[%c0] : memref<2xf64>
+    %2 = memref.load %alloc_20[%c1] : memref<2xf64>
+    affine.store %c-1, %alloc[] : memref<index>
+    scf.for %arg0 = %c0 to %c10 step %c1 {
+      %6 = memref.load %alloc[] : memref<index>
+      %7 = memref.load %alloc_19[%arg0, %c0] : memref<10x2xf64>
+      %8 = memref.load %alloc_19[%arg0, %c1] : memref<10x2xf64>
+      %9 = arith.subf %7, %1 : f64
+      %10 = arith.subf %8, %2 : f64
+      %11 = math.absf %9 : f64
+      %12 = math.absf %10 : f64
+      %13 = arith.cmpf ole, %11, %cst_0 : f64
+      %14 = arith.cmpf ole, %12, %cst_0 : f64
+      %15 = arith.andi %13, %14 : i1
+      %16 = arith.select %15, %arg0, %6 : index
+      memref.store %16, %alloc[] : memref<index>
+    }
+    %3 = memref.load %alloc[] : memref<index>
+    %4 = arith.index_cast %3 : index to i64
+    %5 = arith.sitofp %4 : i64 to f64
+    memref.store %5, %alloc_18[%c0] : memref<1xf64>
+root@f68572e75858:/home/DSP_MLIR# /home/DSP_MLIR/build/bin/dsp1 /home/DSP_MLIR/mlir/test/Examples/DspExample/full_dtmf.py -emit=mlir-affine -affineOpt -canonOpt -opt
+module {
+  func.func @main() {
+    %c2047_i64 = arith.constant 2047 : i64
+    %cst = arith.constant 0.49971199035644531 : f64
+    %c11 = arith.constant 11 : index
+    %c10 = arith.constant 10 : index
+    %cst_0 = arith.constant 3.000000e+00 : f64
+    %c-1 = arith.constant -1 : index
+    %cst_1 = arith.constant 7.700000e+02 : f64
+    %cst_2 = arith.constant 1.209000e+03 : f64
+    %cst_3 = arith.constant 6.970000e+02 : f64
+    %cst_4 = arith.constant 1.336000e+03 : f64
+    %cst_5 = arith.constant 9.410000e+02 : f64
+    %cst_6 = arith.constant 1.220000e-04 : f64
+    %cst_7 = arith.constant 4.096000e+03 : f64
+    %cst_8 = arith.constant -2.000000e+00 : f64
+    %cst_9 = arith.constant 3.1415926535897931 : f64
+    %c12 = arith.constant 12 : index
+    %cst_10 = arith.constant 0.000000e+00 : f64
+    %c1_i64 = arith.constant 1 : i64
+    %c0_i64 = arith.constant 0 : i64
+    %c4096 = arith.constant 4096 : index
+    %c1 = arith.constant 1 : index
+    %cst_11 = arith.constant 1.477000e+03 : f64
+    %cst_12 = arith.constant 8.520000e+02 : f64
+    %cst_13 = arith.constant 1.000000e+01 : f64
+    %cst_14 = arith.constant 6.2831853071800001 : f64
+    %cst_15 = arith.constant 8.192000e+03 : f64
+    %cst_16 = arith.constant 5.000000e-01 : f64
+    %cst_17 = arith.constant 9.000000e+00 : f64
+    %c0 = arith.constant 0 : index
+    %alloc = memref.alloc() : memref<index>
+    %alloc_18 = memref.alloc() : memref<1xf64>
+    %alloc_19 = memref.alloc() : memref<10x2xf64>
+    %alloc_20 = memref.alloc() : memref<2xf64>
+    %alloc_21 = memref.alloc() : memref<4096xf64>
+    %alloc_22 = memref.alloc() : memref<f64>
+    %alloc_23 = memref.alloc() : memref<f64>
+    %alloc_24 = memref.alloc() : memref<4096xf64>
+    %alloc_25 = memref.alloc() : memref<4096xf64>
+    %alloc_26 = memref.alloc() : memref<4096xf64>
+    %alloc_27 = memref.alloc() : memref<4096xf64>
+    %alloc_28 = memref.alloc() : memref<f64>
+    %alloc_29 = memref.alloc() : memref<f64>
+    %alloc_30 = memref.alloc() : memref<f64>
+    affine.store %cst_17, %alloc_30[] : memref<f64>
+    affine.store %cst_16, %alloc_29[] : memref<f64>
+    affine.store %cst_15, %alloc_28[] : memref<f64>
+    scf.for %arg0 = %c0 to %c4096 step %c1 {
+      %6 = arith.index_cast %arg0 : index to i64
+      %7 = arith.sitofp %6 : i64 to f64
+      %8 = arith.divf %7, %cst_15 : f64
+      %9 = arith.mulf %8, %cst_12 : f64
+      %10 = arith.mulf %9, %cst_14 : f64
+      %11 = math.sin %10 : f64
+      %12 = arith.mulf %8, %cst_11 : f64
+      %13 = arith.mulf %12, %cst_14 : f64
+      %14 = math.sin %13 : f64
+      %15 = arith.addf %11, %14 : f64
+      %16 = arith.mulf %15, %cst_13 : f64
+      memref.store %16, %alloc_27[%arg0] : memref<4096xf64>
+    }
+    scf.for %arg0 = %c0 to %c4096 step %c1 {
+      %6 = arith.index_cast %arg0 : index to i64
+      %7 = scf.for %arg1 = %c0 to %c12 step %c1 iter_args(%arg2 = %c0_i64) -> (i64) {
+        %10 = arith.index_cast %arg1 : index to i64
+        %11 = arith.shli %c1_i64, %10 : i64
+        %12 = arith.andi %6, %11 : i64
+        %13 = arith.cmpi ne, %12, %c0_i64 : i64
+        %14 = arith.subi %c11, %arg1 : index
+        %15 = arith.index_cast %14 : index to i64
+        %16 = arith.shli %c1_i64, %15 : i64
+        %17 = arith.select %13, %16, %c0_i64 : i64
+        %18 = arith.ori %arg2, %17 : i64
+        scf.yield %18 : i64
+      }
+      %8 = arith.index_cast %7 : i64 to index
+      %9 = memref.load %alloc_27[%arg0] : memref<4096xf64>
+      memref.store %9, %alloc_26[%8] : memref<4096xf64>
+      memref.store %cst_10, %alloc_25[%8] : memref<4096xf64>
+    }
+    scf.for %arg0 = %c0 to %c12 step %c1 {
+      %6 = arith.shli %c1, %arg0 : index
+      %7 = arith.shli %6, %c1 : index
+      scf.for %arg1 = %c0 to %c4096 step %7 {
+        scf.for %arg2 = %c0 to %6 step %c1 {
+          %8 = arith.addi %arg1, %arg2 : index
+          %9 = arith.addi %8, %6 : index
+          %10 = arith.index_cast %arg2 : index to i64
+          %11 = arith.sitofp %10 : i64 to f64
+          %12 = arith.index_cast %7 : index to i64
+          %13 = arith.sitofp %12 : i64 to f64
+          %14 = arith.divf %11, %13 : f64
+          %15 = arith.mulf %14, %cst_8 : f64
+          %16 = arith.mulf %15, %cst_9 : f64
+          %17 = math.cos %16 : f64
+          %18 = math.sin %16 : f64
+          %19 = memref.load %alloc_26[%9] : memref<4096xf64>
+          %20 = memref.load %alloc_25[%9] : memref<4096xf64>
+          %21 = arith.mulf %19, %17 : f64
+          %22 = arith.mulf %20, %18 : f64
+          %23 = arith.subf %21, %22 : f64
+          %24 = arith.mulf %19, %18 : f64
+          %25 = arith.mulf %20, %17 : f64
+          %26 = arith.addf %24, %25 : f64
+          %27 = memref.load %alloc_26[%8] : memref<4096xf64>
+          %28 = memref.load %alloc_25[%8] : memref<4096xf64>
+          %29 = arith.addf %27, %23 : f64
+          %30 = arith.addf %28, %26 : f64
+          %31 = arith.subf %27, %23 : f64
+          %32 = arith.subf %28, %26 : f64
+          %33 = arith.mulf %29, %29 : f64
+          %34 = arith.mulf %30, %30 : f64
+          %35 = arith.addf %33, %34 : f64
+          %36 = math.sqrt %35 : f64
+          %37 = arith.mulf %31, %31 : f64
+          %38 = arith.mulf %32, %32 : f64
+          %39 = arith.addf %37, %38 : f64
+          %40 = math.sqrt %39 : f64
+          memref.store %29, %alloc_26[%8] : memref<4096xf64>
+          memref.store %30, %alloc_25[%8] : memref<4096xf64>
+          memref.store %31, %alloc_26[%9] : memref<4096xf64>
+          memref.store %32, %alloc_25[%9] : memref<4096xf64>
+          memref.store %36, %alloc_24[%8] : memref<4096xf64>
+          memref.store %40, %alloc_24[%9] : memref<4096xf64>
+        }
+      }
+    }
+    affine.store %cst_7, %alloc_23[] : memref<f64>
+    affine.store %cst_6, %alloc_22[] : memref<f64>
+    scf.for %arg0 = %c0 to %c4096 step %c1 {
+      %6 = arith.index_cast %arg0 : index to i64
+      %7 = arith.sitofp %6 : i64 to f64
+      %8 = arith.cmpi sle, %6, %c2047_i64 : i64
+      scf.if %8 {
+        %9 = arith.divf %7, %cst : f64
+        memref.store %9, %alloc_21[%arg0] : memref<4096xf64>
+      } else {
+        %9 = arith.subf %7, %cst_7 : f64
+        %10 = arith.divf %9, %cst : f64
+        memref.store %10, %alloc_21[%arg0] : memref<4096xf64>
+      }
+    }
+    %0:4 = scf.for %arg0 = %c0 to %c4096 step %c1 iter_args(%arg1 = %cst_10, %arg2 = %cst_10, %arg3 = %cst_10, %arg4 = %cst_10) -> (f64, f64, f64, f64) {
+      %6 = memref.load %alloc_21[%arg0] : memref<4096xf64>
+      %7 = memref.load %alloc_24[%arg0] : memref<4096xf64>
+      %8 = arith.cmpf ogt, %6, %cst_10 : f64
+      %9:4 = scf.if %8 -> (f64, f64, f64, f64) {
+        %10 = arith.cmpf ogt, %7, %arg1 : f64
+        %11 = arith.select %10, %7, %arg1 : f64
+        %12 = arith.select %10, %6, %arg3 : f64
+        %13:2 = scf.if %10 -> (f64, f64) {
+          scf.yield %arg1, %arg3 : f64, f64
+        } else {
+          %14 = arith.cmpf ogt, %7, %arg2 : f64
+          %15 = arith.select %14, %7, %arg2 : f64
+          %16 = arith.select %14, %6, %arg4 : f64
+          scf.yield %15, %16 : f64, f64
+        }
+        scf.yield %11, %13#0, %12, %13#1 : f64, f64, f64, f64
+      } else {
+        scf.yield %arg1, %arg2, %arg3, %arg4 : f64, f64, f64, f64
+      }
+      scf.yield %9#0, %9#1, %9#2, %9#3 : f64, f64, f64, f64
+    }
+    memref.store %0#2, %alloc_20[%c0] : memref<2xf64>
+    memref.store %0#3, %alloc_20[%c1] : memref<2xf64>
+    affine.store %cst_5, %alloc_19[0, 0] : memref<10x2xf64>
+    affine.store %cst_4, %alloc_19[0, 1] : memref<10x2xf64>
+    affine.store %cst_3, %alloc_19[1, 0] : memref<10x2xf64>
+    affine.store %cst_2, %alloc_19[1, 1] : memref<10x2xf64>
+    affine.store %cst_3, %alloc_19[2, 0] : memref<10x2xf64>
+    affine.store %cst_4, %alloc_19[2, 1] : memref<10x2xf64>
+    affine.store %cst_3, %alloc_19[3, 0] : memref<10x2xf64>
+    affine.store %cst_11, %alloc_19[3, 1] : memref<10x2xf64>
+    affine.store %cst_1, %alloc_19[4, 0] : memref<10x2xf64>
+    affine.store %cst_2, %alloc_19[4, 1] : memref<10x2xf64>
+    affine.store %cst_1, %alloc_19[5, 0] : memref<10x2xf64>
+    affine.store %cst_4, %alloc_19[5, 1] : memref<10x2xf64>
+    affine.store %cst_1, %alloc_19[6, 0] : memref<10x2xf64>
+    affine.store %cst_11, %alloc_19[6, 1] : memref<10x2xf64>
+    affine.store %cst_12, %alloc_19[7, 0] : memref<10x2xf64>
+    affine.store %cst_2, %alloc_19[7, 1] : memref<10x2xf64>
+    affine.store %cst_12, %alloc_19[8, 0] : memref<10x2xf64>
+    affine.store %cst_4, %alloc_19[8, 1] : memref<10x2xf64>
+    affine.store %cst_12, %alloc_19[9, 0] : memref<10x2xf64>
+    affine.store %cst_11, %alloc_19[9, 1] : memref<10x2xf64>
+    %1 = memref.load %alloc_20[%c0] : memref<2xf64>
+    %2 = memref.load %alloc_20[%c1] : memref<2xf64>
+    affine.store %c-1, %alloc[] : memref<index>
+    scf.for %arg0 = %c0 to %c10 step %c1 {
+      %6 = memref.load %alloc[] : memref<index>
+      %7 = memref.load %alloc_19[%arg0, %c0] : memref<10x2xf64>
+      %8 = memref.load %alloc_19[%arg0, %c1] : memref<10x2xf64>
+      %9 = arith.subf %7, %1 : f64
+      %10 = arith.subf %8, %2 : f64
+      %11 = math.absf %9 : f64
+      %12 = math.absf %10 : f64
+      %13 = arith.cmpf ole, %11, %cst_0 : f64
+      %14 = arith.cmpf ole, %12, %cst_0 : f64
+      %15 = arith.andi %13, %14 : i1
+      %16 = arith.select %15, %arg0, %6 : index
+      memref.store %16, %alloc[] : memref<index>
+    }
+    %3 = memref.load %alloc[] : memref<index>
+    %4 = arith.index_cast %3 : index to i64
+    %5 = arith.sitofp %4 : i64 to f64
+    memref.store %5, %alloc_18[%c0] : memref<1xf64>
+    dsp.print %alloc_18 : memref<1xf64>
+    memref.dealloc %alloc_30 : memref<f64>
+    memref.dealloc %alloc_29 : memref<f64>
+    memref.dealloc %alloc_28 : memref<f64>
+    memref.dealloc %alloc_27 : memref<4096xf64>
+    memref.dealloc %alloc_26 : memref<4096xf64>
+    memref.dealloc %alloc_25 : memref<4096xf64>
+    memref.dealloc %alloc_24 : memref<4096xf64>
+    memref.dealloc %alloc_23 : memref<f64>
+    memref.dealloc %alloc_22 : memref<f64>
+    memref.dealloc %alloc_21 : memref<4096xf64>
+    memref.dealloc %alloc_20 : memref<2xf64>
+    memref.dealloc %alloc_19 : memref<10x2xf64>
+    memref.dealloc %alloc_18 : memref<1xf64>
+    memref.dealloc %alloc : memref<index>
+    return
+  }
+}
\ No newline at end of file
diff --git a/mlir/examples/dsp/SimpleBlocks/mlir/Dialect.cpp b/mlir/examples/dsp/SimpleBlocks/mlir/Dialect.cpp
index 47f76a2ff96c..93dfaa7fdc9c 100644
--- a/mlir/examples/dsp/SimpleBlocks/mlir/Dialect.cpp
+++ b/mlir/examples/dsp/SimpleBlocks/mlir/Dialect.cpp
@@ -10,9 +10,9 @@
 // operation verification.
 //
 //===----------------------------------------------------------------------===//
-#include <iostream>
 #include "toy/Dialect.h"
 #include "toy/DebugConfig.h"
+#include <iostream>
 
 #include "mlir/IR/Attributes.h"
 #include "mlir/IR/Builders.h"
@@ -216,7 +216,8 @@ void ConstantOp::print(mlir::OpAsmPrinter &printer) {
 mlir::LogicalResult ConstantOp::verify() {
   // If the return type of the constant is not an unranked tensor, the shape
   // must match the shape of the attribute holding the data.
-  auto resultType = llvm::dyn_cast<mlir::RankedTensorType>(getResult().getType());
+  auto resultType =
+      llvm::dyn_cast<mlir::RankedTensorType>(getResult().getType());
   if (!resultType)
     return success();
 
@@ -241,6 +242,18 @@ mlir::LogicalResult ConstantOp::verify() {
   return mlir::success();
 }
 
+//===----------------------------------------------------------------------===//
+// ModuloOp
+//===----------------------------------------------------------------------===//
+
+void ModuloOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                     mlir::Value lhs, mlir::Value rhs) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands({lhs, rhs});
+}
+
+void ModuloOp::inferShapes() { getResult().setType(getLhs().getType()); }
+
 //===----------------------------------------------------------------------===//
 // AddOp
 //===----------------------------------------------------------------------===//
@@ -398,6 +411,27 @@ void DivOp::print(mlir::OpAsmPrinter &p) { printBinaryOp(p, *this); }
 /// interface.
 void DivOp::inferShapes() { getResult().setType(getLhs().getType()); }
 
+//===----------------------------------------------------------------------===//
+// BitwiseAndOp
+//===----------------------------------------------------------------------===//
+
+void BitwiseAndOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                         mlir::Value lhs, mlir::Value rhs) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands({lhs, rhs});
+}
+
+mlir::ParseResult BitwiseAndOp::parse(mlir::OpAsmParser &parser,
+                                      mlir::OperationState &result) {
+  return parseBinaryOp(parser, result);
+}
+
+void BitwiseAndOp::print(mlir::OpAsmPrinter &p) { printBinaryOp(p, *this); }
+
+/// Infer the output shape of the BitwiseAndOp, this is required by the shape
+/// inference interface.
+void BitwiseAndOp::inferShapes() { getResult().setType(getLhs().getType()); }
+
 //===----------------------------------------------------------------------===//
 // ReturnOp
 //===----------------------------------------------------------------------===//
@@ -426,7 +460,8 @@ mlir::LogicalResult ReturnOp::verify() {
   auto resultType = results.front();
 
   // Check that the result type of the function matches the operand type.
-  if (inputType == resultType || llvm::isa<mlir::UnrankedTensorType>(inputType) ||
+  if (inputType == resultType ||
+      llvm::isa<mlir::UnrankedTensorType>(inputType) ||
       llvm::isa<mlir::UnrankedTensorType>(resultType))
     return mlir::success();
 
@@ -466,40 +501,40 @@ mlir::LogicalResult TransposeOp::verify() {
   return mlir::success();
 }
 
-
 //===----------------------------------------------------------------------===//
 // DelayOp
 //===----------------------------------------------------------------------===//
 // void DelayOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
 //                          mlir::Value lhs, unsigned rhs){
 void DelayOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
-                         mlir::Value lhs, mlir::Value rhs){    
-    //
-    // state.addTypes(UnrankedTensorType::get(builder.getF64Type()), builder.getI32Type());
-    state.addTypes(UnrankedTensorType::get(builder.getF64Type())); //working
-    state.addOperands({lhs, rhs});
-    // state.addOperands(value);
-
- }
+                    mlir::Value lhs, mlir::Value rhs) {
+  //
+  // state.addTypes(UnrankedTensorType::get(builder.getF64Type()),
+  // builder.getI32Type());
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type())); // working
+  state.addOperands({lhs, rhs});
+  // state.addOperands(value);
+}
 
- mlir::LogicalResult DelayOp::verify(){
-    // auto inputType1 = llvm::dyn_cast<RankedTensorType>(getOperand(0).getType());
-    // auto inputType2 = llvm::dyn_cast<RankedTensorType>(getOperand(1).getType());
-    // auto resultType = llvm::dyn_cast<RankedTensorType>(getType());
-    // if(!inputType || !resultType)
-    //   return mlir::success();
+mlir::LogicalResult DelayOp::verify() {
+  // auto inputType1 =
+  // llvm::dyn_cast<RankedTensorType>(getOperand(0).getType()); auto inputType2
+  // = llvm::dyn_cast<RankedTensorType>(getOperand(1).getType()); auto
+  // resultType = llvm::dyn_cast<RankedTensorType>(getType()); if(!inputType ||
+  // !resultType)
+  //   return mlir::success();
 
-    return mlir::success();
- }
+  return mlir::success();
+}
 
 // void DelayOp::inferShapes() { getResult().setType(getOperand(0).getType()) ;}
-//getLHS defined with Operation as :
-//   fro addOp 
+// getLHS defined with Operation as :
+//   fro addOp
 //     ::mlir::TypedValue<::mlir::TensorType> AddOp::getLhs() {
-//   return ::llvm::cast<::mlir::TypedValue<::mlir::TensorType>>(*getODSOperands(0).begin());
+//   return
+//   ::llvm::cast<::mlir::TypedValue<::mlir::TensorType>>(*getODSOperands(0).begin());
 // }
-void DelayOp::inferShapes() { getResult().setType(getLhs().getType()) ;}
-
+void DelayOp::inferShapes() { getResult().setType(getLhs().getType()); }
 
 //===----------------------------------------------------------------------===//
 // GainOp
@@ -507,27 +542,31 @@ void DelayOp::inferShapes() { getResult().setType(getLhs().getType()) ;}
 // void GainOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
 //                          mlir::Value lhs, unsigned rhs){
 // void GainOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
-//                          mlir::Value lhs, mlir::Float64Type rhs){    
+//                          mlir::Value lhs, mlir::Float64Type rhs){
 void GainOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
-                         mlir::Value lhs, mlir::Value rhs){ 
-    // state.addTypes(UnrankedTensorType::get(builder.getF64Type()), builder.getI32Type());
-    // state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
-    // state.addTypes({UnrankedTensorType::get(builder.getF64Type()), builder.getF64Type()}); //working
-    state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
-    state.addOperands({lhs, rhs});
-    // state.addOperands({rhs});
-    // state.addTypes();
-    // state.addAttribute("rhs", rhs);
-    // state.addAttribute("rhs", builder.getF64FloatAttr(builder.getF64Type()));
-    // state.addAttribute("rhs", builder.getF64Type());
-    // state.addAttribute("rhs", builder.getFloatAttr(builder.getF64Type() , rhs));
-    // state.addOperands(value);
- }
+                   mlir::Value lhs, mlir::Value rhs) {
+  // state.addTypes(UnrankedTensorType::get(builder.getF64Type()),
+  // builder.getI32Type());
+  // state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  // state.addTypes({UnrankedTensorType::get(builder.getF64Type()),
+  // builder.getF64Type()}); //working
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands({lhs, rhs});
+  // state.addOperands({rhs});
+  // state.addTypes();
+  // state.addAttribute("rhs", rhs);
+  // state.addAttribute("rhs", builder.getF64FloatAttr(builder.getF64Type()));
+  // state.addAttribute("rhs", builder.getF64Type());
+  // state.addAttribute("rhs", builder.getFloatAttr(builder.getF64Type() ,
+  // rhs)); state.addOperands(value);
+}
 
 //  mlir::LogicalResult GainOp::verify(){
-//     auto inputType1 = llvm::dyn_cast<RankedTensorType>(getOperand(0).getType());
-//     auto inputType2 = llvm::dyn_cast<Float64Type>(getOperand(1).getType());
-//     // auto inputType2 = llvm::dyn_cast<RankedTensorType>(getOperand(1).getType());
+//     auto inputType1 =
+//     llvm::dyn_cast<RankedTensorType>(getOperand(0).getType()); auto
+//     inputType2 = llvm::dyn_cast<Float64Type>(getOperand(1).getType());
+//     // auto inputType2 =
+//     llvm::dyn_cast<RankedTensorType>(getOperand(1).getType());
 //     // auto resultType = llvm::dyn_cast<RankedTensorType>(getType());
 //     // if(!inputType || !resultType)
 //     //   return mlir::success();
@@ -536,92 +575,360 @@ void GainOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
 //  }
 
 // void GainOp::inferShapes() { getResult().setType(getOperand(0).getType()) ;}
-//getLHS defined with Operation as :
-//   fro addOp 
+// getLHS defined with Operation as :
+//   fro addOp
 //     ::mlir::TypedValue<::mlir::TensorType> AddOp::getLhs() {
-//   return ::llvm::cast<::mlir::TypedValue<::mlir::TensorType>>(*getODSOperands(0).begin());
+//   return
+//   ::llvm::cast<::mlir::TypedValue<::mlir::TensorType>>(*getODSOperands(0).begin());
 // }
-void GainOp::inferShapes() { getResult().setType(getLhs().getType()) ;}
+void GainOp::inferShapes() { getResult().setType(getLhs().getType()); }
 
 //===----------------------------------------------------------------------===//
- // SubOp
- //===----------------------------------------------------------------------===//
+// SubOp
+//===----------------------------------------------------------------------===//
 
- void SubOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
-                   mlir::Value lhs, mlir::Value rhs) {
-   state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
-   state.addOperands({lhs, rhs});
- }
+void SubOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                  mlir::Value lhs, mlir::Value rhs) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands({lhs, rhs});
+}
+
+// mlir::ParseResult SubOp::parse(mlir::OpAsmParser &parser,
+//                                mlir::OperationState &result) {
+//   return parseBinaryOp(parser, result);
+// }
+
+// void SubOp::print(mlir::OpAsmPrinter &p) { printBinaryOp(p, *this); }
+
+/// Infer the output shape of the SubOp, this is required by the shape inference
+/// interface.
+void SubOp::inferShapes() { getResult().setType(getLhs().getType()); }
+
+//===----------------------------------------------------------------------===//
+// FFTRealOp
+//===----------------------------------------------------------------------===//
+
+void FFTRealOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                      mlir::Value lhs) {
+  state.addTypes(lhs.getType());
+  state.addOperands({lhs});
+}
+
+void FFTRealOp::inferShapes() { getResult().setType(getLhs().getType()); }
+
+//===----------------------------------------------------------------------===//
+// FFTImagOp
+//===----------------------------------------------------------------------===//
+
+void FFTImagOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                      mlir::Value lhs) {
+  state.addTypes(lhs.getType());
+  state.addOperands({lhs});
+}
+
+void FFTImagOp::inferShapes() { getResult().setType(getLhs().getType()); }
+
+//===----------------------------------------------------------------------===//
+// MatmulOp
+//===----------------------------------------------------------------------===//
+
+void MatmulOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                     mlir::Value lhs, mlir::Value rhs) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands({lhs, rhs});
+}
+
+// mlir::ParseResult MatmulOp::parse(mlir::OpAsmParser &parser,
+//                                mlir::OperationState &result) {
+//   return parseBinaryOp(parser, result);
+// }
+
+// void MatmulOp::print(mlir::OpAsmPrinter &p) { printBinaryOp(p, *this); }
+
+mlir::LogicalResult MatmulOp::verify() {
+
+  // auto resultType =
+  // llvm::dyn_cast<mlir::RankedTensorType>(getResult().getType());
+
+  auto tensorLhs = getLhs().getType();
+  auto shapeOfLhs = tensorLhs.getShape();
+
+  auto tensorRhs = getRhs().getType();
+  auto shapeOfRhs = tensorRhs.getShape();
+
+  if (shapeOfLhs[1] != shapeOfRhs[0])
+    return emitOpError("Matmul: the second dimension of LHS should be equal to "
+                       "the first dimention of RHS.");
+  return mlir::success();
+}
+
+/// Infer the output shape of the MatmulOp, this is required by the shape
+/// inference interface.
+void MatmulOp::inferShapes() {
+
+  // get the shape of Lhs & rhs
+  // add the shape for each dimension
+  //  auto tensorInput =  llvm::cast<RankedTensorType>(getLhs().getType());
+  auto tensorLhs = getLhs().getType();
+  auto shapeOfLhs = tensorLhs.getShape();
+
+  auto tensorRhs = getRhs().getType();
+  auto shapeOfRhs = tensorRhs.getShape();
+
+  std::vector<int64_t> shapeForOutput;
+
+  shapeForOutput.push_back(shapeOfLhs[0]);
+  shapeForOutput.push_back(shapeOfRhs[1]);
+
+  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(
+      shapeForOutput, getLhs().getType().getElementType());
+
+  // getResult().setType(getLhs().getType());
+  getResult().setType(manipulatedType);
+}
+
+//===----------------------------------------------------------------------===//
+// FindPeaksOp
+//===----------------------------------------------------------------------===//
+
+void FindPeaksOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                        mlir::Value signal, mlir::Value height,
+                        mlir::Value distance) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands({signal, height, distance});
+}
+
+void FindPeaksOp::inferShapes() {
+  // Maximum possible number of peaks = (length of signal -1) / distance + 1.
+  // We will return a tensor with size (length of signal -1) / distance + 1 +
+  // 1(last one to provide number of peaks).
+  auto signalType = getSignal().getType();
+  auto signalShape = signalType.getShape();
+  int64_t len_signal = signalShape[0];
+
+  Value distanceArg = getOperand(2);
+  dsp::ConstantOp constantOpDistance =
+      distanceArg.getDefiningOp<dsp::ConstantOp>();
+  DenseElementsAttr constantDistanceValue = constantOpDistance.getValue();
+
+  auto elements = constantDistanceValue.getValues<FloatAttr>();
+  float distanceFloat = elements[0].getValueAsDouble();
+  // SecondValueInt = (int64_t)SecondValue;
+
+  int64_t sizeOfOutput = (len_signal - 1) / distanceFloat + 2;
+
+  std::vector<int64_t> shapeForOutput;
+  shapeForOutput.push_back(sizeOfOutput);
+
+  mlir::TensorType manipulatedType =
+      mlir::RankedTensorType::get(shapeForOutput, signalType.getElementType());
+
+  getResult().setType(manipulatedType);
+}
+
+//===----------------------------------------------------------------------===//
+// MaxOp
+//===----------------------------------------------------------------------===//
+
+void MaxOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                  mlir::Value input) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands({input});
+}
+
+/// Infer the output shape of the MaxOp, this is required by the shape inference
+/// interface.
+void MaxOp::inferShapes() {
+  auto tensorInput = getInput().getType();
+  // auto shapeOfInput = tensorInput.getShape();
+
+  std::vector<int64_t> shapeForOutput;
+
+  mlir::TensorType manipulatedType =
+      mlir::RankedTensorType::get(shapeForOutput, tensorInput.getElementType());
+
+  getResult().setType(manipulatedType);
+}
 
- // mlir::ParseResult SubOp::parse(mlir::OpAsmParser &parser,
- //                                mlir::OperationState &result) {
- //   return parseBinaryOp(parser, result);
- // }
+//===----------------------------------------------------------------------===//
+// MeanOp
+//===----------------------------------------------------------------------===//
+
+void MeanOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                   mlir::Value input, mlir::Value length) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands({input, length});
+}
+
+void MeanOp::inferShapes() {
+  auto tensorInput = getInput().getType();
+
+  std::vector<int64_t> shapeForOutput;
+
+  mlir::TensorType manipulatedType =
+      mlir::RankedTensorType::get(shapeForOutput, tensorInput.getElementType());
+
+  getResult().setType(manipulatedType);
+}
+
+//===----------------------------------------------------------------------===//
+// DiffOp
+//===----------------------------------------------------------------------===//
+
+void DiffOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                   mlir::Value input, mlir::Value length) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands({input, length});
+}
+
+void DiffOp::inferShapes() {
+  auto tensorInput = getInput().getType();
+  auto shapeOfInput = tensorInput.getShape();
+
+  std::vector<int64_t> shapeForOutput;
+  shapeForOutput.push_back(shapeOfInput[0] - 1);
+
+  mlir::TensorType manipulatedType =
+      mlir::RankedTensorType::get(shapeForOutput, tensorInput.getElementType());
+
+  getResult().setType(manipulatedType);
+}
+
+//===----------------------------------------------------------------------===//
+// AbsOp
+//===----------------------------------------------------------------------===//
+
+void AbsOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                  mlir::Value input) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands({input});
+}
+
+void AbsOp::inferShapes() { getResult().setType(getInput().getType()); }
+
+//===----------------------------------------------------------------------===//
+// ArgMaxOp
+//===----------------------------------------------------------------------===//
+
+void ArgMaxOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                     mlir::Value input, int64_t axis) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addAttribute("axis", builder.getI64IntegerAttr(axis));
+  state.addOperands({input});
+}
+
+void ArgMaxOp::inferShapes() {
+
+  auto inputType = getInput().getType();
+  auto inputRank = inputType.getRank();
+  auto inputShape = inputType.getShape();
+
+  if (inputRank == 1) {
+    vector<int64_t> outputShape(1, 1);
+    auto outputType =
+        mlir::RankedTensorType::get(outputShape, inputType.getElementType());
+    getResult().setType(outputType);
+    return;
+  }
+
+  int64_t axis = getAxis();
+  int64_t dim = axis == 1 ? 0 : 1;
+
+  auto outputType =
+      mlir::RankedTensorType::get(inputShape[dim], inputType.getElementType());
+
+  getResult().setType(outputType);
+}
+
+//===----------------------------------------------------------------------===//
+// PowOp
+//===----------------------------------------------------------------------===//
+
+void PowOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                  mlir::Value lhs, mlir::Value rhs) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands({lhs, rhs});
+}
+
+void PowOp::inferShapes() { getResult().setType(getLhs().getType()); }
+
+mlir::LogicalResult PowOp::verify() {
+  auto lhsType = llvm::dyn_cast<RankedTensorType>(getLhs().getType());
+  auto resultType = llvm::dyn_cast<RankedTensorType>(getType());
+
+  if (!lhsType || !resultType)
+    return mlir::success();
 
- // void SubOp::print(mlir::OpAsmPrinter &p) { printBinaryOp(p, *this); }
+  // ensure result shape matches lhs shape
+  auto resultShape = resultType.getShape();
+  if (!std::equal(lhsType.getShape().begin(), lhsType.getShape().end(),
+                  resultShape.rbegin())) {
+    return emitError()
+           << "expected result shape to be the same as the lhs input operand.";
+  }
 
- /// Infer the output shape of the SubOp, this is required by the shape inference
- /// interface.
- void SubOp::inferShapes() { getResult().setType(getLhs().getType()); }
+  return mlir::success();
+}
 
 //===----------------------------------------------------------------------===//
 // zeroCrossCountOp
 //===----------------------------------------------------------------------===//
 
-void zeroCrossCountOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
-                  mlir::Value lhs) {
+void zeroCrossCountOp::build(mlir::OpBuilder &builder,
+                             mlir::OperationState &state, mlir::Value lhs) {
   state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
   // state.addTypes(builder.getF64Type()));
   // state.addTypes(builder.getI64Type());
   state.addOperands({lhs});
 }
 
-/// Infer the output shape of the zeroCrossCountOp, this is required by the shape inference
- /// interface.
- void zeroCrossCountOp::inferShapes() { getResult().setType(getLhs().getType()); }
-
+/// Infer the output shape of the zeroCrossCountOp, this is required by the
+/// shape inference interface.
+void zeroCrossCountOp::inferShapes() {
+  getResult().setType(getLhs().getType());
+}
 
 //===----------------------------------------------------------------------===//
 // FIRFilterResponseOp
 //===----------------------------------------------------------------------===//
 
-void FIRFilterResponseOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
-                  mlir::Value lhs, mlir::Value rhs) {
+void FIRFilterResponseOp::build(mlir::OpBuilder &builder,
+                                mlir::OperationState &state, mlir::Value lhs,
+                                mlir::Value rhs) {
   state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
   state.addOperands({lhs, rhs});
 }
 
-
-
-/// Infer the output shape of the FIRFilterResponseOp, this is required by the shape inference
-/// interface.
-//ToDo -- shape should be the length of Lhs + Rhs - 1
-void FIRFilterResponseOp::inferShapes() { 
-  //get the shape of Lhs & rhs 
-  //add the shape for each dimension
-  // auto tensorInput =  llvm::cast<RankedTensorType>(getLhs().getType());
-  auto tensorInput =  getLhs().getType();
+/// Infer the output shape of the FIRFilterResponseOp, this is required by the
+/// shape inference interface.
+// ToDo -- shape should be the length of Lhs + Rhs - 1
+void FIRFilterResponseOp::inferShapes() {
+  // get the shape of Lhs & rhs
+  // add the shape for each dimension
+  //  auto tensorInput =  llvm::cast<RankedTensorType>(getLhs().getType());
+  auto tensorInput = getLhs().getType();
   auto shapeOfInput = tensorInput.getShape();
 
   auto tensorFilter = getRhs().getType();
   auto shapeOfFilter = tensorFilter.getShape();
-  std::vector<int64_t> shapeForOutput ;
+  std::vector<int64_t> shapeForOutput;
 
-  for(size_t i=0; i < shapeOfInput.size() ; i++){
+  for (size_t i = 0; i < shapeOfInput.size(); i++) {
     shapeForOutput.push_back(shapeOfInput[i] + shapeOfFilter[i] - 1);
   }
-  
-  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(shapeForOutput, 
-          getLhs().getType().getElementType());
 
-  // getResult().setType(getLhs().getType()); 
+  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(
+      shapeForOutput, getLhs().getType().getElementType());
+
+  // getResult().setType(getLhs().getType());
   getResult().setType(manipulatedType);
-  }
+}
 
-//get rank of Input & Filter -- make sure it is of rank 1 
+// get rank of Input & Filter -- make sure it is of rank 1
 mlir::LogicalResult FIRFilterResponseOp::verify() {
   // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand(0).getType());
-  // auto filterType = llvm::dyn_cast<RankedTensorType>(getOperand(1).getType());
+  // auto filterType =
+  // llvm::dyn_cast<RankedTensorType>(getOperand(1).getType());
   // // auto resultType = llvm::dyn_cast<RankedTensorType>(getType());
 
   // auto inputRank = inputType.getRank();
@@ -634,41 +941,72 @@ mlir::LogicalResult FIRFilterResponseOp::verify() {
   // }
 
   return mlir::success();
-} 
+}
+
+//===----------------------------------------------------------------------===//
+// MedianFilterOp
+//===----------------------------------------------------------------------===//
+
+void MedianFilterOp::build(mlir::OpBuilder &builder,
+                           mlir::OperationState &state, mlir::Value value) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands(value);
+}
+
+void MedianFilterOp::inferShapes() {
+  // for each rank
+  // Get the shape/size of input
+  // output size = input_size - 2
+  auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand().getType());
+
+  auto shapeOfInput = inputType.getShape();
+
+  std::vector<int64_t> shapeForOutput;
+
+  // Iterate for each rank : tensor<1x2x3x2> = rank 4
+  for (size_t i = 0; i < shapeOfInput.size(); i++) {
+    shapeForOutput.push_back(shapeOfInput[i] - 2);
+  }
+
+  mlir::TensorType outputType = mlir::RankedTensorType::get(
+      shapeForOutput, getInput().getType().getElementType());
+  // getOperand().getType());
+  // getOperand().getType().getElementType());
 
+  getResult().setType(outputType);
+}
 
 //===----------------------------------------------------------------------===//
 // SlidingWindowAvgOp
 //===----------------------------------------------------------------------===//
 
-void SlidingWindowAvgOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
-                        mlir::Value value) {
+void SlidingWindowAvgOp::build(mlir::OpBuilder &builder,
+                               mlir::OperationState &state, mlir::Value value) {
   state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
   state.addOperands(value);
 }
 
 void SlidingWindowAvgOp::inferShapes() {
-  //for each rank
-  //Get the shape/size of input 
-  //output size = input_size - 2
+  // for each rank
+  // Get the shape/size of input
+  // output size = input_size - 2
   auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand().getType());
 
   auto shapeOfInput = inputType.getShape();
 
   std::vector<int64_t> shapeForOutput;
 
-  //Iterate for each rank : tensor<1x2x3x2> = rank 4
-  for(size_t i=0; i < shapeOfInput.size() ; i++){
+  // Iterate for each rank : tensor<1x2x3x2> = rank 4
+  for (size_t i = 0; i < shapeOfInput.size(); i++) {
     shapeForOutput.push_back(shapeOfInput[i] - 2);
   }
 
-  mlir::TensorType outputType = mlir::RankedTensorType::get(shapeForOutput, 
-    getInput().getType().getElementType());
-    // getOperand().getType());
-    // getOperand().getType().getElementType());
+  mlir::TensorType outputType = mlir::RankedTensorType::get(
+      shapeForOutput, getInput().getType().getElementType());
+  // getOperand().getType());
+  // getOperand().getType().getElementType());
 
   getResult().setType(outputType);
-
 }
 
 mlir::LogicalResult SlidingWindowAvgOp::verify() {
@@ -683,7 +1021,8 @@ mlir::LogicalResult SlidingWindowAvgOp::verify() {
 
   // for(size_t i=0; i < shapeOfInput.size() ; i++){
   //   if(shapeOfInput[i] < 3){
-  //     llvm::errs() << "Warning:SlidingWindowAvgOp = Input size < 3 " << "size= " << shapeOfInput[i] << "\n"  ;
+  //     llvm::errs() << "Warning:SlidingWindowAvgOp = Input size < 3 " <<
+  //     "size= " << shapeOfInput[i] << "\n"  ;
   //   }
   // }
 
@@ -694,221 +1033,226 @@ mlir::LogicalResult SlidingWindowAvgOp::verify() {
 // DownsamplingOp
 //===----------------------------------------------------------------------===//
 
-void DownsamplingOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
-                  mlir::Value lhs, mlir::Value rhs) {
+void DownsamplingOp::build(mlir::OpBuilder &builder,
+                           mlir::OperationState &state, mlir::Value lhs,
+                           mlir::Value rhs) {
   state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
   state.addOperands({lhs, rhs});
 }
 
-
-
-/// Infer the output shape of the DownsamplingOp, this is required by the shape inference
-/// interface.
-//ToDo -- shape should be the length of Lhs + Rhs - 1
-void DownsamplingOp::inferShapes() { 
-  //get the shape of Lhs & rhs 
-  //add the shape for each dimension
-  // auto tensorInput =  llvm::cast<RankedTensorType>(getLhs().getType());
-  auto tensorInput =  getLhs().getType();
+/// Infer the output shape of the DownsamplingOp, this is required by the shape
+/// inference interface.
+// ToDo -- shape should be the length of Lhs + Rhs - 1
+void DownsamplingOp::inferShapes() {
+  // get the shape of Lhs & rhs
+  // add the shape for each dimension
+  //  auto tensorInput =  llvm::cast<RankedTensorType>(getLhs().getType());
+  auto tensorInput = getLhs().getType();
   auto shapeOfInput = tensorInput.getShape();
 
-  // auto tensorDownsampling = getRhs().getType(); 
-  // auto shapeOfDownsampling = tensorDownsampling.getShape(); //shape is the dimension
-  
+  // auto tensorDownsampling = getRhs().getType();
+  // auto shapeOfDownsampling = tensorDownsampling.getShape(); //shape is the
+  // dimension
 
-  std::vector<int64_t> shapeForOutput ;
+  std::vector<int64_t> shapeForOutput;
 
   int64_t SecondValueInt = 1;
 
-  //To extract value from the SSA value:
-    //get the Operand 
-    //convert it to ConstantOp
-    //convert it to corresponding elements attribute
-    //extract the value as float then convert to int
+  // To extract value from the SSA value:
+  // get the Operand
+  // convert it to ConstantOp
+  // convert it to corresponding elements attribute
+  // extract the value as float then convert to int
   Value downsampling2ndArg = getOperand(1);
-  dsp::ConstantOp constantOp2ndArg = downsampling2ndArg.getDefiningOp<dsp::ConstantOp>();
-  DenseElementsAttr constantRhsValue = constantOp2ndArg.getValue();;
+  dsp::ConstantOp constantOp2ndArg =
+      downsampling2ndArg.getDefiningOp<dsp::ConstantOp>();
+  DenseElementsAttr constantRhsValue = constantOp2ndArg.getValue();
+  ;
   auto elements = constantRhsValue.getValues<FloatAttr>();
   float SecondValue = elements[0].getValueAsDouble();
-  SecondValueInt = (int64_t) SecondValue;
-  // llvm::errs() << "Downsampling: SamplingRate: " << SecondValueInt << " \n"; //downsamplingRate
-    
-
-  for(size_t i=0; i < shapeOfInput.size() ; i++){
-    double GetLenForOutput  = static_cast<double>(shapeOfInput[i] )/ SecondValueInt ;
-    if(fmod(GetLenForOutput, 1.0) != 0) {
-      //if remainder remains
+  SecondValueInt = (int64_t)SecondValue;
+  // llvm::errs() << "Downsampling: SamplingRate: " << SecondValueInt << " \n";
+  // //downsamplingRate
+
+  for (size_t i = 0; i < shapeOfInput.size(); i++) {
+    double GetLenForOutput =
+        static_cast<double>(shapeOfInput[i]) / SecondValueInt;
+    if (fmod(GetLenForOutput, 1.0) != 0) {
+      // if remainder remains
       GetLenForOutput = ceil(GetLenForOutput);
     }
-    int64_t OutlenInt = static_cast<int64_t> (GetLenForOutput);
+    int64_t OutlenInt = static_cast<int64_t>(GetLenForOutput);
     llvm::errs() << "Downsampling: OutlenInt: " << OutlenInt << " \n";
     shapeForOutput.push_back(OutlenInt);
   }
-  
-  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(shapeForOutput, 
-          getLhs().getType().getElementType());
 
-  // getResult().setType(getLhs().getType()); 
+  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(
+      shapeForOutput, getLhs().getType().getElementType());
+
+  // getResult().setType(getLhs().getType());
   getResult().setType(manipulatedType);
-  }
+}
 
-//get rank of Input & Downsampling -- make sure it is of rank 1 
+// get rank of Input & Downsampling -- make sure it is of rank 1
 mlir::LogicalResult DownsamplingOp::verify() {
   // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand(0).getType());
-  // auto samplingRateType = llvm::dyn_cast<RankedTensorType>(getOperand(1).getType());
+  // auto samplingRateType =
+  // llvm::dyn_cast<RankedTensorType>(getOperand(1).getType());
   // // auto resultType = llvm::dyn_cast<RankedTensorType>(getType());
 
   // auto inputRank = inputType.getRank();
   // auto samplingRateRank = samplingRateType.getRank();
 
-  // // llvm::errs() << "inputRank: " << inputRank << " samplingRateRank: " << samplingRateRank << "\n";
-  // //once ensured only 1 rank from above -- also make sure there is just 1 elem  
-  // if( inputRank != 1 || samplingRateRank != 0 )
+  // // llvm::errs() << "inputRank: " << inputRank << " samplingRateRank: " <<
+  // samplingRateRank << "\n";
+  // //once ensured only 1 rank from above -- also make sure there is just 1
+  // elem if( inputRank != 1 || samplingRateRank != 0 )
   // {
-  //   llvm::errs() << "inputRank: " << inputRank << " samplingRateRank: " << samplingRateRank << "\n";
-  //   return emitError()
+  //   llvm::errs() << "inputRank: " << inputRank << " samplingRateRank: " <<
+  //   samplingRateRank << "\n"; return emitError()
   //          << "expected rank of input & Downsampling is 1";
   // }
   return mlir::success();
-} 
+}
 
 //===----------------------------------------------------------------------===//
 // UpsamplingOp
 //===----------------------------------------------------------------------===//
 
 void UpsamplingOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
-                  mlir::Value lhs, mlir::Value rhs) {
+                         mlir::Value lhs, mlir::Value rhs) {
   state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
   state.addOperands({lhs, rhs});
 }
 
-
-
-/// Infer the output shape of the UpsamplingOp, this is required by the shape inference
-/// interface.
-//ToDo -- shape should be the length of input * UpsamplingRate ie, Rhs
-void UpsamplingOp::inferShapes() { 
-  //get the shape of Lhs & rhs 
-  //add the shape for each dimension
-  // auto tensorInput =  llvm::cast<RankedTensorType>(getLhs().getType());
-  auto tensorInput =  getLhs().getType();
+/// Infer the output shape of the UpsamplingOp, this is required by the shape
+/// inference interface.
+// ToDo -- shape should be the length of input * UpsamplingRate ie, Rhs
+void UpsamplingOp::inferShapes() {
+  // get the shape of Lhs & rhs
+  // add the shape for each dimension
+  //  auto tensorInput =  llvm::cast<RankedTensorType>(getLhs().getType());
+  auto tensorInput = getLhs().getType();
   auto shapeOfInput = tensorInput.getShape();
 
-  // auto tensorUpsampling = getRhs().getType(); 
+  // auto tensorUpsampling = getRhs().getType();
   // auto shapeOfUpsampling = tensorUpsampling.getShape(); //shape is the length
-  
 
-  std::vector<int64_t> shapeForOutput ;
+  std::vector<int64_t> shapeForOutput;
 
   int64_t SecondValueInt = 1;
 
-  //To extract value from the SSA value:
-    //get the Operand 
-    //convert it to ConstantOp
-    //convert it to corresponding elements attribute
-    //extract the value as float then convert to int
+  // To extract value from the SSA value:
+  // get the Operand
+  // convert it to ConstantOp
+  // convert it to corresponding elements attribute
+  // extract the value as float then convert to int
   Value upsampling2ndArg = getOperand(1);
-  dsp::ConstantOp constantOp2ndArg = upsampling2ndArg.getDefiningOp<dsp::ConstantOp>();
-  DenseElementsAttr constantRhsValue = constantOp2ndArg.getValue();;
+  dsp::ConstantOp constantOp2ndArg =
+      upsampling2ndArg.getDefiningOp<dsp::ConstantOp>();
+  DenseElementsAttr constantRhsValue = constantOp2ndArg.getValue();
+  ;
   auto elements = constantRhsValue.getValues<FloatAttr>();
   float SecondValue = elements[0].getValueAsDouble();
-  SecondValueInt = (int64_t) SecondValue;
-  // llvm::errs() << "Upsampling: SamplingRate: " << SecondValueInt << " \n"; //downsamplingRate
-    
-
-  for(size_t i=0; i < shapeOfInput.size() ; i++){
-    double GetLenForOutput  = static_cast<double>(shapeOfInput[i] ) * SecondValueInt ;
-    int64_t OutlenInt = static_cast<int64_t> (GetLenForOutput);
+  SecondValueInt = (int64_t)SecondValue;
+  // llvm::errs() << "Upsampling: SamplingRate: " << SecondValueInt << " \n";
+  // //downsamplingRate
+
+  for (size_t i = 0; i < shapeOfInput.size(); i++) {
+    double GetLenForOutput =
+        static_cast<double>(shapeOfInput[i]) * SecondValueInt;
+    int64_t OutlenInt = static_cast<int64_t>(GetLenForOutput);
     llvm::errs() << "Upsampling: OutlenInt: " << OutlenInt << " \n";
     shapeForOutput.push_back(OutlenInt);
   }
-  
-  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(shapeForOutput, 
-          getLhs().getType().getElementType());
 
-  // getResult().setType(getLhs().getType()); 
+  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(
+      shapeForOutput, getLhs().getType().getElementType());
+
+  // getResult().setType(getLhs().getType());
   getResult().setType(manipulatedType);
-  }
+}
 
-//get rank of Input & Upsampling -- make sure it is of rank 1 
+// get rank of Input & Upsampling -- make sure it is of rank 1
 mlir::LogicalResult UpsamplingOp::verify() {
   // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand(0).getType());
-  // auto samplingRateType = llvm::dyn_cast<RankedTensorType>(getOperand(1).getType());
+  // auto samplingRateType =
+  // llvm::dyn_cast<RankedTensorType>(getOperand(1).getType());
   // // auto resultType = llvm::dyn_cast<RankedTensorType>(getType());
 
   // auto inputRank = inputType.getRank();
   // auto samplingRateRank = samplingRateType.getRank();
 
-  // // llvm::errs() << "inputRank: " << inputRank << " samplingRateRank: " << samplingRateRank << "\n";
-  // //once ensured only 1 rank from above -- also make sure there is just 1 elem  
-  // if( inputRank != 1 || samplingRateRank != 0 )
+  // // llvm::errs() << "inputRank: " << inputRank << " samplingRateRank: " <<
+  // samplingRateRank << "\n";
+  // //once ensured only 1 rank from above -- also make sure there is just 1
+  // elem if( inputRank != 1 || samplingRateRank != 0 )
   // {
-  //   llvm::errs() << "inputRank: " << inputRank << " samplingRateRank: " << samplingRateRank << "\n";
-  //   return emitError()
+  //   llvm::errs() << "inputRank: " << inputRank << " samplingRateRank: " <<
+  //   samplingRateRank << "\n"; return emitError()
   //          << "expected rank of input is 1 & Upsampling is 0";
   // }
   return mlir::success();
-} 
-
+}
 
 //===----------------------------------------------------------------------===//
 // LowPassFilter1stOrderOp
 //===----------------------------------------------------------------------===//
 
-void LowPassFilter1stOrderOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
-                  mlir::Value lhs, mlir::Value rhs) {
+void LowPassFilter1stOrderOp::build(mlir::OpBuilder &builder,
+                                    mlir::OperationState &state,
+                                    mlir::Value lhs, mlir::Value rhs) {
   state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
   state.addOperands({lhs, rhs});
 }
 
-
-
-/// Infer the output shape of the LowPassFilter1stOrderOp, this is required by the shape inference
-/// interface.
-void LowPassFilter1stOrderOp::inferShapes() { 
-  //get the shape of Lhs & rhs 
-  // auto tensorInput =  llvm::cast<RankedTensorType>(getLhs().getType());
-  auto tensorInput =  getLhs().getType(); 
+/// Infer the output shape of the LowPassFilter1stOrderOp, this is required by
+/// the shape inference interface.
+void LowPassFilter1stOrderOp::inferShapes() {
+  // get the shape of Lhs & rhs
+  //  auto tensorInput =  llvm::cast<RankedTensorType>(getLhs().getType());
+  auto tensorInput = getLhs().getType();
   getResult().setType(tensorInput);
 }
 
-//get rank of Input & alphaValue -- make sure it is of rank 1 
+// get rank of Input & alphaValue -- make sure it is of rank 1
 mlir::LogicalResult LowPassFilter1stOrderOp::verify() {
   // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand(0).getType());
-  // auto alphaValueType = llvm::dyn_cast<RankedTensorType>(getOperand(1).getType());
+  // auto alphaValueType =
+  // llvm::dyn_cast<RankedTensorType>(getOperand(1).getType());
   // // auto resultType = llvm::dyn_cast<RankedTensorType>(getType());
 
   // auto inputRank = inputType.getRank();
   // auto alphaValueRank = alphaValueType.getRank();
 
-  // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " << alphaValueRank << "\n";
-  // //once ensured only 1 rank from above -- also make sure there is just 1 elem  
-  // if( inputRank != 1 || alphaValueRank != 0 )
+  // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " <<
+  // alphaValueRank << "\n";
+  // //once ensured only 1 rank from above -- also make sure there is just 1
+  // elem if( inputRank != 1 || alphaValueRank != 0 )
   // {
-  //   llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " << alphaValueRank << "\n";
-  //   return emitError()
+  //   llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " <<
+  //   alphaValueRank << "\n"; return emitError()
   //          << "expected rank of input & Upsampling is 1";
   // }
   return mlir::success();
-} 
+}
 
 //===----------------------------------------------------------------------===//
 // HighPassFilterOp
 //===----------------------------------------------------------------------===//
 
-void HighPassFilterOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
-                        mlir::Value value) {
+void HighPassFilterOp::build(mlir::OpBuilder &builder,
+                             mlir::OperationState &state, mlir::Value value) {
   state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
   state.addOperands(value);
 }
 
 void HighPassFilterOp::inferShapes() {
-  //for each rank
-  //Get the shape/size of input 
-  //output size = input_size 
-  auto tensorInput =  getInput().getType(); 
+  // for each rank
+  // Get the shape/size of input
+  // output size = input_size
+  auto tensorInput = getInput().getType();
   getResult().setType(tensorInput);
-
 }
 
 mlir::LogicalResult HighPassFilterOp::verify() {
@@ -916,7 +1260,7 @@ mlir::LogicalResult HighPassFilterOp::verify() {
   // auto inputRank = inputType.getRank();
 
   // llvm::errs() << "inputRank: " << inputRank <<  "\n";
-  // //once ensured only 1 rank from above --   
+  // //once ensured only 1 rank from above --
   // if( inputRank != 1 )
   // {
   //   llvm::errs() << "inputRank: " << inputRank <<  "\n";
@@ -926,25 +1270,24 @@ mlir::LogicalResult HighPassFilterOp::verify() {
   return mlir::success();
 }
 
-
 //===----------------------------------------------------------------------===//
 // FFT1DOp
 //===----------------------------------------------------------------------===//
 
 void FFT1DOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
-                        mlir::Value value) {
-  DEBUG_PRINT_NO_ARGS() ;
-  state.addTypes({UnrankedTensorType::get(builder.getF64Type()), 
-                UnrankedTensorType::get(builder.getF64Type())});
+                    mlir::Value value) {
+  DEBUG_PRINT_NO_ARGS();
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type()),
+                  UnrankedTensorType::get(builder.getF64Type())});
   state.addOperands(value);
-  DEBUG_PRINT_NO_ARGS() ;
+  DEBUG_PRINT_NO_ARGS();
 }
 
 void FFT1DOp::inferShapes() {
-  //for each rank
-  //Get the shape/size of input 
-  //output size = input_size 
-  auto tensorInput =  getInput().getType(); 
+  // for each rank
+  // Get the shape/size of input
+  // output size = input_size
+  auto tensorInput = getInput().getType();
   // getResult().setType(tensorInput);
   getResult(0).setType(tensorInput);
   getResult(1).setType(tensorInput);
@@ -952,12 +1295,13 @@ void FFT1DOp::inferShapes() {
 }
 
 mlir::LogicalResult FFT1DOp::verify() {
-  DEBUG_PRINT_NO_ARGS() ;
+  DEBUG_PRINT_NO_ARGS();
   // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand().getType());
   // auto inputRank = inputType.getRank();
 
-  // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " << alphaValueRank << "\n";
-  // //once ensured only 1 rank from above --   
+  // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " <<
+  // alphaValueRank << "\n";
+  // //once ensured only 1 rank from above --
   // if( inputRank != 1 )
   // {
   //   llvm::errs() << "inputRank: " << inputRank <<  "\n";
@@ -967,36 +1311,36 @@ mlir::LogicalResult FFT1DOp::verify() {
   return mlir::success();
 }
 
-
 //===----------------------------------------------------------------------===//
 // IFFT1DOp
 //===----------------------------------------------------------------------===//
 
 void IFFT1DOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
-                        mlir::Value real, mlir::Value img) {
-  DEBUG_PRINT_NO_ARGS() ;
+                     mlir::Value real, mlir::Value img) {
+  DEBUG_PRINT_NO_ARGS();
   state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
-  state.addOperands({real , img});
-  DEBUG_PRINT_NO_ARGS() ;
+  state.addOperands({real, img});
+  DEBUG_PRINT_NO_ARGS();
 }
 
 void IFFT1DOp::inferShapes() {
-  //for each rank
-  //Get the shape/size of input 
-  //output size = input_size 
-  auto tensorInput =  getReal().getType(); 
+  // for each rank
+  // Get the shape/size of input
+  // output size = input_size
+  auto tensorInput = getReal().getType();
   getResult().setType(tensorInput);
   // getResult(0).setType(tensorInput);
   // getResult(1).setType(tensorInput);
 }
 
 mlir::LogicalResult IFFT1DOp::verify() {
-  DEBUG_PRINT_NO_ARGS() ;
+  DEBUG_PRINT_NO_ARGS();
   // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand().getType());
   // auto inputRank = inputType.getRank();
 
-  // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " << alphaValueRank << "\n";
-  // //once ensured only 1 rank from above --   
+  // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " <<
+  // alphaValueRank << "\n";
+  // //once ensured only 1 rank from above --
   // if( inputRank != 1 )
   // {
   //   llvm::errs() << "inputRank: " << inputRank <<  "\n";
@@ -1010,55 +1354,54 @@ mlir::LogicalResult IFFT1DOp::verify() {
 // HammingWindowOp
 //===----------------------------------------------------------------------===//
 
-void HammingWindowOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
-                        mlir::Value value) {
+void HammingWindowOp::build(mlir::OpBuilder &builder,
+                            mlir::OperationState &state, mlir::Value value) {
   state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
   state.addOperands(value);
 }
 
 void HammingWindowOp::inferShapes() {
-  //for each rank
-  //Get the shape/size of input 
-  //output size = input_size 
-  // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand().getType());
+  // for each rank
+  // Get the shape/size of input
+  // output size = input_size
+  //  auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand().getType());
 
   // auto shapeOfInput = inputType.getShape();
 
-  std::vector<int64_t> shapeForOutput ;
+  std::vector<int64_t> shapeForOutput;
 
   int64_t FirstOpInt = 1;
 
-  //To extract value from the SSA value:
-    //get the Operand 
-    //convert it to ConstantOp
-    //convert it to corresponding elements attribute
-    //extract the value as float then convert to int
+  // To extract value from the SSA value:
+  // get the Operand
+  // convert it to ConstantOp
+  // convert it to corresponding elements attribute
+  // extract the value as float then convert to int
   // llvm::errs() << "LINE " << __LINE__ << " file= " << __FILE__ << "\n" ;
   Value hammingLen = getOperand();
-  dsp::ConstantOp constantOp1stArg = hammingLen.getDefiningOp<dsp::ConstantOp>();
+  dsp::ConstantOp constantOp1stArg =
+      hammingLen.getDefiningOp<dsp::ConstantOp>();
   // llvm::errs() << "LINE " << __LINE__ << " file= " << __FILE__ << "\n" ;
   DenseElementsAttr constantLhsValue = constantOp1stArg.getValue();
   // llvm::errs() << "LINE " << __LINE__ << " file= " << __FILE__ << "\n" ;
   auto elements = constantLhsValue.getValues<FloatAttr>();
   float FirstValue = elements[0].getValueAsDouble();
-  FirstOpInt = (int64_t) FirstValue;
+  FirstOpInt = (int64_t)FirstValue;
   // llvm::errs() << "FirstOpInt " << FirstOpInt << "\n" ;
   // llvm::errs() << "shapeOfInput.size() " << shapeOfInput.size() << "\n" ;
 
   // for(size_t i=0; i < shapeOfInput.size() ; i++){
-    // llvm::errs() << "LINE " << __LINE__ << " file= " << __FILE__ << "\n" ;
-    shapeForOutput.push_back(FirstOpInt);
+  // llvm::errs() << "LINE " << __LINE__ << " file= " << __FILE__ << "\n" ;
+  shapeForOutput.push_back(FirstOpInt);
   // }
 
-  mlir::TensorType outputType = mlir::RankedTensorType::get(shapeForOutput, 
-    getInput().getType().getElementType());
-    // getOperand().getType());
-    // getOperand().getType().getElementType());
+  mlir::TensorType outputType = mlir::RankedTensorType::get(
+      shapeForOutput, getInput().getType().getElementType());
+  // getOperand().getType());
+  // getOperand().getType().getElementType());
 
   getResult().setType(outputType);
   // llvm::errs() << "LINE " << __LINE__ << " file= " << __FILE__ << "\n" ;
-  
-
 }
 
 mlir::LogicalResult HammingWindowOp::verify() {
@@ -1073,7 +1416,8 @@ mlir::LogicalResult HammingWindowOp::verify() {
 
   // for(size_t i=0; i < shapeOfInput.size() ; i++){
   //   if(shapeOfInput[i] < 3){
-  //     llvm::errs() << "Warning:HammingWindowOp = Input size < 3 " << "size= " << shapeOfInput[i] << "\n"  ;
+  //     llvm::errs() << "Warning:HammingWindowOp = Input size < 3 " << "size= "
+  //     << shapeOfInput[i] << "\n"  ;
   //   }
   // }
 
@@ -1085,7 +1429,7 @@ mlir::LogicalResult HammingWindowOp::verify() {
 //===----------------------------------------------------------------------===//
 
 void DCTOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
-                        mlir::Value value) {
+                  mlir::Value value) {
   // DEBUG_PRINT_NO_ARGS() ;
   state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
   state.addOperands(value);
@@ -1093,10 +1437,10 @@ void DCTOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
 }
 
 void DCTOp::inferShapes() {
-  //for each rank
-  //Get the shape/size of input 
-  //output size = input_size 
-  auto tensorInput =  getInput().getType(); 
+  // for each rank
+  // Get the shape/size of input
+  // output size = input_size
+  auto tensorInput = getInput().getType();
   getResult().setType(tensorInput);
   // getResult(0).setType(tensorInput);
   // getResult(1).setType(tensorInput);
@@ -1107,46 +1451,42 @@ mlir::LogicalResult DCTOp::verify() {
   auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand().getType());
   auto inputRank = inputType.getRank();
 
-  // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " << alphaValueRank << "\n";
-  //once ensured only 1 rank from above --   
-  if( inputRank != 1 )
-  {
-    llvm::errs() << "inputRank: " << inputRank <<  "\n";
-    return emitError()
-           << "expected rank of input  is 1";
+  // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " <<
+  // alphaValueRank << "\n";
+  // once ensured only 1 rank from above --
+  if (inputRank != 1) {
+    llvm::errs() << "inputRank: " << inputRank << "\n";
+    return emitError() << "expected rank of input  is 1";
   }
   return mlir::success();
 }
 
-
-
 //===----------------------------------------------------------------------===//
 // filterOp
 //===----------------------------------------------------------------------===//
 
 void filterOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
-                  mlir::Value b, mlir::Value a, mlir::Value x) {
+                     mlir::Value b, mlir::Value a, mlir::Value x) {
   state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
   state.addOperands({b, a, x});
 }
 
+/// Infer the output shape of the filterOp, this is required by the shape
+/// inference interface.
+// ToDo -- shape should be the length of Lhs + Rhs - 1
+void filterOp::inferShapes() {
+  // get the shape of Lhs & rhs
+  // add the shape for each dimension
+  //  auto tensorInput =  llvm::cast<RankedTensorType>(getLhs().getType());
+  auto tensorInput = getX().getType();
+  getResult().setType(tensorInput);
+}
 
-
-/// Infer the output shape of the filterOp, this is required by the shape inference
-/// interface.
-//ToDo -- shape should be the length of Lhs + Rhs - 1
-void filterOp::inferShapes() { 
-  //get the shape of Lhs & rhs 
-  //add the shape for each dimension
-  // auto tensorInput =  llvm::cast<RankedTensorType>(getLhs().getType());
-  auto tensorInput =  getX().getType();
-  getResult().setType(tensorInput );
-  }
-
-//get rank of Input & Filter -- make sure it is of rank 1 
+// get rank of Input & Filter -- make sure it is of rank 1
 mlir::LogicalResult filterOp::verify() {
   // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand(0).getType());
-  // auto filterType = llvm::dyn_cast<RankedTensorType>(getOperand(1).getType());
+  // auto filterType =
+  // llvm::dyn_cast<RankedTensorType>(getOperand(1).getType());
   // // auto resultType = llvm::dyn_cast<RankedTensorType>(getType());
 
   // auto inputRank = inputType.getRank();
@@ -1159,15 +1499,14 @@ mlir::LogicalResult filterOp::verify() {
   // }
 
   return mlir::success();
-} 
-
+}
 
 //===----------------------------------------------------------------------===//
 // SumOp
 //===----------------------------------------------------------------------===//
 
 void SumOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
-                        mlir::Value value) {
+                  mlir::Value value) {
   state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
   state.addOperands(value);
 }
@@ -1179,8 +1518,8 @@ void SumOp::inferShapes() {
 
   shapeForOutput.push_back(1);
 
-  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(shapeForOutput, 
-          getInput().getType().getElementType());
+  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(
+      shapeForOutput, getInput().getType().getElementType());
   getResult().setType(manipulatedType);
 }
 
@@ -1199,96 +1538,99 @@ mlir::LogicalResult SumOp::verify() {
   return mlir::success();
 }
 
- //===----------------------------------------------------------------------===//
- // CosOp
- //===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// CosOp
+//===----------------------------------------------------------------------===//
 
- void CosOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
-                         mlir::Value value) {
-   // DEBUG_PRINT_NO_ARGS() ;
-   state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
-   state.addOperands(value);
-   // DEBUG_PRINT_NO_ARGS() ;
- }
+void CosOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                  mlir::Value value) {
+  // DEBUG_PRINT_NO_ARGS() ;
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands(value);
+  // DEBUG_PRINT_NO_ARGS() ;
+}
 
- void CosOp::inferShapes() {
-   //for each rank
-   //Get the shape/size of input 
-   //output size = input_size 
-   auto tensorInput =  getInput().getType(); 
-   getResult().setType(tensorInput);
-   // getResult(0).setType(tensorInput);
-   // getResult(1).setType(tensorInput);
- }
+void CosOp::inferShapes() {
+  // for each rank
+  // Get the shape/size of input
+  // output size = input_size
+  auto tensorInput = getInput().getType();
+  getResult().setType(tensorInput);
+  // getResult(0).setType(tensorInput);
+  // getResult(1).setType(tensorInput);
+}
 
- mlir::LogicalResult CosOp::verify() {
-   // DEBUG_PRINT_NO_ARGS() ;
+mlir::LogicalResult CosOp::verify() {
+  // DEBUG_PRINT_NO_ARGS() ;
   //  auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand().getType());
   //  auto inputRank = inputType.getRank();
 
-  //  // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " << alphaValueRank << "\n";
-  //  //once ensured only 1 rank from above --   
+  //  // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " <<
+  //  alphaValueRank << "\n";
+  //  //once ensured only 1 rank from above --
   //  if( inputRank != 1 )
   //  {
   //    llvm::errs() << "inputRank: " << inputRank <<  "\n";
   //    return emitError()
   //           << "expected rank of input  is 1";
   //  }
-   return mlir::success();
- }
+  return mlir::success();
+}
 
- //===----------------------------------------------------------------------===//
- // SinOp
- //===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// SinOp
+//===----------------------------------------------------------------------===//
 
- void SinOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
-                         mlir::Value value) {
-   // DEBUG_PRINT_NO_ARGS() ;
-   state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
-   state.addOperands(value);
-   // DEBUG_PRINT_NO_ARGS() ;
- }
+void SinOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                  mlir::Value value) {
+  // DEBUG_PRINT_NO_ARGS() ;
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands(value);
+  // DEBUG_PRINT_NO_ARGS() ;
+}
 
- void SinOp::inferShapes() {
-   //for each rank
-   //Get the shape/size of input 
-   //output size = input_size 
-   auto tensorInput =  getInput().getType(); 
-   getResult().setType(tensorInput);
-   // getResult(0).setType(tensorInput);
-   // getResult(1).setType(tensorInput);
- }
+void SinOp::inferShapes() {
+  // for each rank
+  // Get the shape/size of input
+  // output size = input_size
+  auto tensorInput = getInput().getType();
+  getResult().setType(tensorInput);
+  // getResult(0).setType(tensorInput);
+  // getResult(1).setType(tensorInput);
+}
 
- mlir::LogicalResult SinOp::verify() {
-   // DEBUG_PRINT_NO_ARGS() ;
+mlir::LogicalResult SinOp::verify() {
+  // DEBUG_PRINT_NO_ARGS() ;
   //  auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand().getType());
   //  auto inputRank = inputType.getRank();
 
-  //  // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " << alphaValueRank << "\n";
-  //  //once ensured only 1 rank from above --   
+  //  // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " <<
+  //  alphaValueRank << "\n";
+  //  //once ensured only 1 rank from above --
   //  if( inputRank != 1 )
   //  {
   //    llvm::errs() << "inputRank: " << inputRank <<  "\n";
   //    return emitError()
   //           << "expected rank of input  is 1";
   //  }
-   return mlir::success();
- }
+  return mlir::success();
+}
 
 //===----------------------------------------------------------------------===//
 // SquareOp
 //===----------------------------------------------------------------------===//
 
 void SquareOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
-                        mlir::Value value) {
+                     mlir::Value value) {
   state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
   state.addOperands(value);
 }
 
 void SquareOp::inferShapes() {
-  auto tensorInput =  getInput().getType();
-  // mlir::TensorType manipulatedType = mlir::RankedTensorType::get(shapeForOutput, 
-          // getInput().getType().getElementType());
+  auto tensorInput = getInput().getType();
+  // mlir::TensorType manipulatedType =
+  // mlir::RankedTensorType::get(shapeForOutput,
+  // getInput().getType().getElementType());
   getResult().setType(tensorInput);
 }
 
@@ -1313,17 +1655,17 @@ mlir::LogicalResult SquareOp::verify() {
 
 void FFT1DRealOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
                         mlir::Value value) {
-  DEBUG_PRINT_NO_ARGS() ;
+  DEBUG_PRINT_NO_ARGS();
   state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
   state.addOperands(value);
-  DEBUG_PRINT_NO_ARGS() ;
+  DEBUG_PRINT_NO_ARGS();
 }
 
 void FFT1DRealOp::inferShapes() {
-  //for each rank
-  //Get the shape/size of input 
-  //output size = input_size 
-  auto tensorInput =  getInput().getType(); 
+  // for each rank
+  // Get the shape/size of input
+  // output size = input_size
+  auto tensorInput = getInput().getType();
   // getResult().setType(tensorInput);
   getResult().setType(tensorInput);
   // getResult(2).setType(tensorInput);
@@ -1334,8 +1676,9 @@ mlir::LogicalResult FFT1DRealOp::verify() {
   // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand().getType());
   // auto inputRank = inputType.getRank();
 
-  // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " << alphaValueRank << "\n";
-  // //once ensured only 1 rank from above --   
+  // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " <<
+  // alphaValueRank << "\n";
+  // //once ensured only 1 rank from above --
   // if( inputRank != 1 )
   // {
   //   llvm::errs() << "inputRank: " << inputRank <<  "\n";
@@ -1350,30 +1693,31 @@ mlir::LogicalResult FFT1DRealOp::verify() {
 //===----------------------------------------------------------------------===//
 
 void FFT1DImgOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
-                        mlir::Value value) {
-  DEBUG_PRINT_NO_ARGS() ;
+                       mlir::Value value) {
+  DEBUG_PRINT_NO_ARGS();
   state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
   state.addOperands(value);
-  DEBUG_PRINT_NO_ARGS() ;
+  DEBUG_PRINT_NO_ARGS();
 }
 
 void FFT1DImgOp::inferShapes() {
-  //for each rank
-  //Get the shape/size of input 
-  //output size = input_size 
-  auto tensorInput =  getInput().getType(); 
+  // for each rank
+  // Get the shape/size of input
+  // output size = input_size
+  auto tensorInput = getInput().getType();
   // getResult().setType(tensorInput);
   getResult().setType(tensorInput);
   // getResult(2).setType(tensorInput);
 }
 
 mlir::LogicalResult FFT1DImgOp::verify() {
-  DEBUG_PRINT_NO_ARGS() ;
+  DEBUG_PRINT_NO_ARGS();
   // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand().getType());
   // auto inputRank = inputType.getRank();
 
-  // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " << alphaValueRank << "\n";
-  // //once ensured only 1 rank from above --   
+  // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " <<
+  // alphaValueRank << "\n";
+  // //once ensured only 1 rank from above --
   // if( inputRank != 1 )
   // {
   //   llvm::errs() << "inputRank: " << inputRank <<  "\n";
@@ -1388,30 +1732,30 @@ mlir::LogicalResult FFT1DImgOp::verify() {
 //===----------------------------------------------------------------------===//
 
 void SincOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
-                        mlir::Value wc, mlir::Value n) {
-  DEBUG_PRINT_NO_ARGS() ;
+                   mlir::Value wc, mlir::Value n) {
+  DEBUG_PRINT_NO_ARGS();
   state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
   state.addOperands({wc, n});
-  DEBUG_PRINT_NO_ARGS() ;
+  DEBUG_PRINT_NO_ARGS();
 }
 
 void SincOp::inferShapes() {
-  //for each rank
-  //Get the shape/size of input 
-  //output size = input_size 
-  // auto inputType = llvm::dyn_cast<RankedTensorType>(getN().getType());
+  // for each rank
+  // Get the shape/size of input
+  // output size = input_size
+  //  auto inputType = llvm::dyn_cast<RankedTensorType>(getN().getType());
 
   // auto shapeOfInput = inputType.getShape();
 
-  std::vector<int64_t> shapeForOutput ;
+  std::vector<int64_t> shapeForOutput;
 
   int64_t GetLen = 1;
 
-  //To extract value from the SSA value:
-    //get the Operand 
-    //convert it to ConstantOp
-    //convert it to corresponding elements attribute
-    //extract the value as float then convert to int
+  // To extract value from the SSA value:
+  // get the Operand
+  // convert it to ConstantOp
+  // convert it to corresponding elements attribute
+  // extract the value as float then convert to int
   DEBUG_PRINT_NO_ARGS();
   Value inputLen = getOperand(1);
   dsp::ConstantOp constantOp1stArg = inputLen.getDefiningOp<dsp::ConstantOp>();
@@ -1419,26 +1763,25 @@ void SincOp::inferShapes() {
   DenseElementsAttr constantLhsValue = constantOp1stArg.getValue();
   auto elements = constantLhsValue.getValues<FloatAttr>();
   float LenN = elements[0].getValueAsDouble();
-  GetLen = (int64_t) LenN;
+  GetLen = (int64_t)LenN;
   DEBUG_PRINT_WITH_ARGS(GetLen);
-  DEBUG_PRINT_WITH_ARGS("GetLen= " , GetLen);
+  DEBUG_PRINT_WITH_ARGS("GetLen= ", GetLen);
 
   shapeForOutput.push_back(GetLen);
-  mlir::TensorType outputType = mlir::RankedTensorType::get(shapeForOutput, 
-    getWc().getType().getElementType());
-
+  mlir::TensorType outputType = mlir::RankedTensorType::get(
+      shapeForOutput, getWc().getType().getElementType());
 
   getResult().setType(outputType);
-
 }
 
 mlir::LogicalResult SincOp::verify() {
-  DEBUG_PRINT_NO_ARGS() ;
+  DEBUG_PRINT_NO_ARGS();
   // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand().getType());
   // auto inputRank = inputType.getRank();
 
-  // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " << alphaValueRank << "\n";
-  // //once ensured only 1 rank from above --   
+  // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " <<
+  // alphaValueRank << "\n";
+  // //once ensured only 1 rank from above --
   // if( inputRank != 1 )
   // {
   //   llvm::errs() << "inputRank: " << inputRank <<  "\n";
@@ -1452,11 +1795,12 @@ mlir::LogicalResult SincOp::verify() {
 // GetElemAtIndxOp
 //===----------------------------------------------------------------------===//
 
-void GetElemAtIndxOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
-                        mlir::Value input, mlir::Value indx) {
+void GetElemAtIndxOp::build(mlir::OpBuilder &builder,
+                            mlir::OperationState &state, mlir::Value input,
+                            mlir::Value indx) {
   DEBUG_PRINT_NO_ARGS();
   state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
-  state.addOperands({input, indx} );
+  state.addOperands({input, indx});
   DEBUG_PRINT_NO_ARGS();
 }
 
@@ -1467,8 +1811,8 @@ void GetElemAtIndxOp::inferShapes() {
   DEBUG_PRINT_NO_ARGS();
   shapeForOutput.push_back(1);
 
-  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(shapeForOutput, 
-          getInput().getType().getElementType());
+  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(
+      shapeForOutput, getInput().getType().getElementType());
   getResult().setType(manipulatedType);
   DEBUG_PRINT_NO_ARGS();
 }
@@ -1488,16 +1832,151 @@ mlir::LogicalResult GetElemAtIndxOp::verify() {
   return mlir::success();
 }
 
+//===----------------------------------------------------------------------===//
+// GetSingleElemAtIdxOp
+//===----------------------------------------------------------------------===//
+
+void GetSingleElemAtIdxOp::build(mlir::OpBuilder &builder,
+                                 mlir::OperationState &state, mlir::Value input,
+                                 mlir::Value indx) {
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({input, indx});
+}
+
+void GetSingleElemAtIdxOp::inferShapes() {
+  std::vector<int64_t> shapeForOutput;
+
+  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(
+      shapeForOutput, getInput().getType().getElementType());
+  getResult().setType(manipulatedType);
+}
+
+//===----------------------------------------------------------------------===//
+// Diff2MeanOptimizedOp
+//===----------------------------------------------------------------------===//
+
+void Diff2MeanOptimizedOp::build(mlir::OpBuilder &builder,
+                                 mlir::OperationState &state, mlir::Value input,
+                                 mlir::Value length) {
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({input, length});
+}
+
+void Diff2MeanOptimizedOp::inferShapes() {
+  mlir::TensorType manipulatedType =
+      mlir::RankedTensorType::get({}, getInput().getType().getElementType());
+  getResult().setType(manipulatedType);
+}
+
+//===----------------------------------------------------------------------===//
+// FindPeaks2Diff2MeanOptimizedOp
+//===----------------------------------------------------------------------===//
+
+void FindPeaks2Diff2MeanOptimizedOp::build(mlir::OpBuilder &builder,
+                                           mlir::OperationState &state,
+                                           mlir::Value signal,
+                                           mlir::Value height,
+                                           mlir::Value distance) {
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({signal, height, distance});
+}
+
+void FindPeaks2Diff2MeanOptimizedOp::inferShapes() {
+  mlir::TensorType manipulatedType =
+      mlir::RankedTensorType::get({}, getSignal().getType().getElementType());
+  getResult().setType(manipulatedType);
+}
+
+//===----------------------------------------------------------------------===//
+// Median2SlidingOptimizedOp
+//===----------------------------------------------------------------------===//
+
+void Median2SlidingOptimizedOp::build(mlir::OpBuilder &builder,
+                                      mlir::OperationState &state,
+                                      mlir::Value input) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands(input);
+}
+
+void Median2SlidingOptimizedOp::inferShapes() {
+  // for each rank
+  // Get the shape/size of input
+  // output size = input_size - 4
+  auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand().getType());
+
+  auto shapeOfInput = inputType.getShape();
+
+  std::vector<int64_t> shapeForOutput;
+
+  // Iterate for each rank : tensor<1x2x3x2> = rank 4
+  for (size_t i = 0; i < shapeOfInput.size(); i++) {
+    shapeForOutput.push_back(shapeOfInput[i] - 4);
+  }
+
+  mlir::TensorType outputType = mlir::RankedTensorType::get(
+      shapeForOutput, getInput().getType().getElementType());
+  // getOperand().getType());
+  // getOperand().getType().getElementType());
+
+  getResult().setType(outputType);
+}
+
+//===----------------------------------------------------------------------===//
+// LMS2FindPeaksOptimizedOp
+//===----------------------------------------------------------------------===//
+
+void LMS2FindPeaksOptimizedOp::build(mlir::OpBuilder &builder,
+                                     mlir::OperationState &state,
+                                     mlir::Value lhs, mlir::Value rhs,
+                                     mlir::Value mu, mlir::Value filterLen,
+                                     mlir::Value height, mlir::Value distance) {
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({lhs, rhs, mu, filterLen, height, distance});
+}
+
+void LMS2FindPeaksOptimizedOp::inferShapes() {
+
+  //  getResult().setType(getLhs().getType());
+
+  // The above is for LMSFilterResponseOp
+
+  // Maximum possible number of peaks = (length of signal -1) / distance + 1.
+  // We will return a tensor with size (length of signal -1) / distance + 1 +
+  // 1(last one to provide number of peaks).
+  auto signalType = getLhs().getType();
+  auto signalShape = signalType.getShape();
+  int64_t len_signal = signalShape[0];
+
+  Value distanceArg = getOperand(5);
+  dsp::ConstantOp constantOpDistance =
+      distanceArg.getDefiningOp<dsp::ConstantOp>();
+  DenseElementsAttr constantDistanceValue = constantOpDistance.getValue();
+
+  auto elements = constantDistanceValue.getValues<FloatAttr>();
+  float distanceFloat = elements[0].getValueAsDouble();
+  // SecondValueInt = (int64_t)SecondValue;
+
+  int64_t sizeOfOutput = (len_signal - 1) / distanceFloat + 2;
+
+  std::vector<int64_t> shapeForOutput;
+  shapeForOutput.push_back(sizeOfOutput);
+
+  mlir::TensorType manipulatedType =
+      mlir::RankedTensorType::get(shapeForOutput, signalType.getElementType());
+
+  getResult().setType(manipulatedType);
+}
 
 //===----------------------------------------------------------------------===//
 // SetElemAtIndxOp
 //===----------------------------------------------------------------------===//
 
-void SetElemAtIndxOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
-                        mlir::Value input, mlir::Value indx, mlir::Value val) {
+void SetElemAtIndxOp::build(mlir::OpBuilder &builder,
+                            mlir::OperationState &state, mlir::Value input,
+                            mlir::Value indx, mlir::Value val) {
   DEBUG_PRINT_NO_ARGS();
   state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
-  state.addOperands({input, indx, val} );
+  state.addOperands({input, indx, val});
   DEBUG_PRINT_NO_ARGS();
 }
 
@@ -1508,45 +1987,44 @@ void SetElemAtIndxOp::inferShapes() {
   DEBUG_PRINT_NO_ARGS();
   shapeForOutput.push_back(1);
 
-  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(shapeForOutput, 
-          getInput().getType().getElementType());
+  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(
+      shapeForOutput, getInput().getType().getElementType());
   getResult().setType(manipulatedType);
   DEBUG_PRINT_NO_ARGS();
 }
 
-mlir::LogicalResult SetElemAtIndxOp::verify() {
-  return mlir::success();
-}
+mlir::LogicalResult SetElemAtIndxOp::verify() { return mlir::success(); }
 
 //===----------------------------------------------------------------------===//
 // LowPassFIRFilterOp
 //===----------------------------------------------------------------------===//
 
-void LowPassFIRFilterOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
-                        mlir::Value wc, mlir::Value n) {
-  DEBUG_PRINT_NO_ARGS() ;
+void LowPassFIRFilterOp::build(mlir::OpBuilder &builder,
+                               mlir::OperationState &state, mlir::Value wc,
+                               mlir::Value n) {
+  DEBUG_PRINT_NO_ARGS();
   state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
   state.addOperands({wc, n});
-  DEBUG_PRINT_NO_ARGS() ;
+  DEBUG_PRINT_NO_ARGS();
 }
 
 void LowPassFIRFilterOp::inferShapes() {
-  //for each rank
-  //Get the shape/size of input 
-  //output size = input_size 
-  // auto inputType = llvm::dyn_cast<RankedTensorType>(getN().getType());
+  // for each rank
+  // Get the shape/size of input
+  // output size = input_size
+  //  auto inputType = llvm::dyn_cast<RankedTensorType>(getN().getType());
 
   // auto shapeOfInput = inputType.getShape();
 
-  std::vector<int64_t> shapeForOutput ;
+  std::vector<int64_t> shapeForOutput;
 
   uint64_t GetLen = 1;
 
-  //To extract value from the SSA value:
-    //get the Operand 
-    //convert it to ConstantOp
-    //convert it to corresponding elements attribute
-    //extract the value as float then convert to int
+  // To extract value from the SSA value:
+  // get the Operand
+  // convert it to ConstantOp
+  // convert it to corresponding elements attribute
+  // extract the value as float then convert to int
   DEBUG_PRINT_NO_ARGS();
   Value inputLen = getOperand(1);
   dsp::ConstantOp constantOp1stArg = inputLen.getDefiningOp<dsp::ConstantOp>();
@@ -1554,30 +2032,27 @@ void LowPassFIRFilterOp::inferShapes() {
   DenseElementsAttr constantLhsValue = constantOp1stArg.getValue();
   auto elements = constantLhsValue.getValues<FloatAttr>();
   float LenN = elements[0].getValueAsDouble();
-  GetLen = (uint64_t) LenN;
+  GetLen = (uint64_t)LenN;
   DEBUG_PRINT_WITH_ARGS(GetLen);
-  DEBUG_PRINT_WITH_ARGS("GetLen= " , GetLen);
-  
-  //int64_t N = tensorType.getShape()[0];  
+  DEBUG_PRINT_WITH_ARGS("GetLen= ", GetLen);
 
+  // int64_t N = tensorType.getShape()[0];
 
   shapeForOutput.push_back(GetLen);
-  mlir::TensorType outputType = mlir::RankedTensorType::get(shapeForOutput, 
-    getWc().getType().getElementType());
-
+  mlir::TensorType outputType = mlir::RankedTensorType::get(
+      shapeForOutput, getWc().getType().getElementType());
 
   getResult().setType(outputType);
-
 }
 
 mlir::LogicalResult LowPassFIRFilterOp::verify() {
   uint64_t GetLen = 1;
 
-  //To extract value from the SSA value:
-    //get the Operand 
-    //convert it to ConstantOp
-    //convert it to corresponding elements attribute
-    //extract the value as float then convert to int
+  // To extract value from the SSA value:
+  // get the Operand
+  // convert it to ConstantOp
+  // convert it to corresponding elements attribute
+  // extract the value as float then convert to int
   DEBUG_PRINT_NO_ARGS();
   Value inputLen = getOperand(1);
   dsp::ConstantOp constantOp1stArg = inputLen.getDefiningOp<dsp::ConstantOp>();
@@ -1585,17 +2060,16 @@ mlir::LogicalResult LowPassFIRFilterOp::verify() {
   DenseElementsAttr constantLhsValue = constantOp1stArg.getValue();
   auto elements = constantLhsValue.getValues<FloatAttr>();
   float LenN = elements[0].getValueAsDouble();
-  GetLen = (uint64_t) LenN;
+  GetLen = (uint64_t)LenN;
   DEBUG_PRINT_WITH_ARGS(GetLen);
-  DEBUG_PRINT_WITH_ARGS("GetLen= " , GetLen);
-  
-  //filter-order even not supported -- so making it odd
-  if(GetLen % 2 == 0 )
-  {
+  DEBUG_PRINT_WITH_ARGS("GetLen= ", GetLen);
+
+  // filter-order even not supported -- so making it odd
+  if (GetLen % 2 == 0) {
     // GetLen = GetLen + 1;
     llvm::errs() << "N for lowPassFilter must be odd but is " << GetLen << "\n";
-    // DEBUG_PRINT_WITH_ARGS("Making LowPassFilterLen Odd= " , GetLen); 
-    return mlir::failure(); 
+    // DEBUG_PRINT_WITH_ARGS("Making LowPassFilterLen Odd= " , GetLen);
+    return mlir::failure();
   }
   return mlir::success();
 }
@@ -1605,19 +2079,20 @@ mlir::LogicalResult LowPassFIRFilterOp::verify() {
 //===----------------------------------------------------------------------===//
 
 void LMSFilterOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
-                   mlir::Value lhs, mlir::Value rhs, mlir::Value mu, mlir::Value filterLen, mlir::Value iters) {
-  
+                        mlir::Value lhs, mlir::Value rhs, mlir::Value mu,
+                        mlir::Value filterLen, mlir::Value iters) {
+
   state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
-   state.addOperands({lhs, rhs, mu, filterLen, iters});
+  state.addOperands({lhs, rhs, mu, filterLen, iters});
 }
 
-
 void LMSFilterOp::inferShapes() { getResult().setType(getLhs().getType()); }
 
 mlir::LogicalResult LMSFilterOp::verify() {
-  DEBUG_PRINT_NO_ARGS() ;
+  DEBUG_PRINT_NO_ARGS();
   // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand(0).getType());
-  // auto filterType = llvm::dyn_cast<RankedTensorType>(getOperand(1).getType());
+  // auto filterType =
+  // llvm::dyn_cast<RankedTensorType>(getOperand(1).getType());
   // // auto resultType = llvm::dyn_cast<RankedTensorType>(getType());
 
   // auto inputRank = inputType.getRank();
@@ -1632,36 +2107,36 @@ mlir::LogicalResult LMSFilterOp::verify() {
   return mlir::success();
 }
 
-
 //===----------------------------------------------------------------------===//
 // HighPassFIRFilterOp
 //===----------------------------------------------------------------------===//
 
-void HighPassFIRFilterOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
-                        mlir::Value wc, mlir::Value n) {
-  DEBUG_PRINT_NO_ARGS() ;
+void HighPassFIRFilterOp::build(mlir::OpBuilder &builder,
+                                mlir::OperationState &state, mlir::Value wc,
+                                mlir::Value n) {
+  DEBUG_PRINT_NO_ARGS();
   state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
   state.addOperands({wc, n});
-  DEBUG_PRINT_NO_ARGS() ;
+  DEBUG_PRINT_NO_ARGS();
 }
 
 void HighPassFIRFilterOp::inferShapes() {
-  //for each rank
-  //Get the shape/size of input 
-  //output size = input_size 
-  // auto inputType = llvm::dyn_cast<RankedTensorType>(getN().getType());
+  // for each rank
+  // Get the shape/size of input
+  // output size = input_size
+  //  auto inputType = llvm::dyn_cast<RankedTensorType>(getN().getType());
 
   // auto shapeOfInput = inputType.getShape();
 
-  std::vector<int64_t> shapeForOutput ;
+  std::vector<int64_t> shapeForOutput;
 
   int64_t GetLen = 1;
 
-  //To extract value from the SSA value:
-    //get the Operand 
-    //convert it to ConstantOp
-    //convert it to corresponding elements attribute
-    //extract the value as float then convert to int
+  // To extract value from the SSA value:
+  // get the Operand
+  // convert it to ConstantOp
+  // convert it to corresponding elements attribute
+  // extract the value as float then convert to int
   DEBUG_PRINT_NO_ARGS();
   Value inputLen = getOperand(1);
   dsp::ConstantOp constantOp1stArg = inputLen.getDefiningOp<dsp::ConstantOp>();
@@ -1669,26 +2144,25 @@ void HighPassFIRFilterOp::inferShapes() {
   DenseElementsAttr constantLhsValue = constantOp1stArg.getValue();
   auto elements = constantLhsValue.getValues<FloatAttr>();
   float LenN = elements[0].getValueAsDouble();
-  GetLen = (int64_t) LenN;
+  GetLen = (int64_t)LenN;
   DEBUG_PRINT_WITH_ARGS(GetLen);
-  DEBUG_PRINT_WITH_ARGS("GetLen= " , GetLen);
+  DEBUG_PRINT_WITH_ARGS("GetLen= ", GetLen);
 
   shapeForOutput.push_back(GetLen);
-  mlir::TensorType outputType = mlir::RankedTensorType::get(shapeForOutput, 
-    getWc().getType().getElementType());
-
+  mlir::TensorType outputType = mlir::RankedTensorType::get(
+      shapeForOutput, getWc().getType().getElementType());
 
   getResult().setType(outputType);
-
 }
 
 mlir::LogicalResult HighPassFIRFilterOp::verify() {
-  DEBUG_PRINT_NO_ARGS() ;
+  DEBUG_PRINT_NO_ARGS();
   // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand().getType());
   // auto inputRank = inputType.getRank();
 
-  // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " << alphaValueRank << "\n";
-  // //once ensured only 1 rank from above --   
+  // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " <<
+  // alphaValueRank << "\n";
+  // //once ensured only 1 rank from above --
   // if( inputRank != 1 )
   // {
   //   llvm::errs() << "inputRank: " << inputRank <<  "\n";
@@ -1702,31 +2176,32 @@ mlir::LogicalResult HighPassFIRFilterOp::verify() {
 // GetRangeOfVectorOp
 //===----------------------------------------------------------------------===//
 
-void GetRangeOfVectorOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
-                        mlir::Value first, mlir::Value N, mlir::Value step) {
-  DEBUG_PRINT_NO_ARGS() ;
+void GetRangeOfVectorOp::build(mlir::OpBuilder &builder,
+                               mlir::OperationState &state, mlir::Value first,
+                               mlir::Value N, mlir::Value step) {
+  DEBUG_PRINT_NO_ARGS();
   state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
   state.addOperands({first, N, step});
-  DEBUG_PRINT_NO_ARGS() ;
+  DEBUG_PRINT_NO_ARGS();
 }
 
 void GetRangeOfVectorOp::inferShapes() {
-  //for each rank
-  //Get the shape/size of input 
-  //output size = input_size 
-  // auto inputType = llvm::dyn_cast<RankedTensorType>(getN().getType());
+  // for each rank
+  // Get the shape/size of input
+  // output size = input_size
+  //  auto inputType = llvm::dyn_cast<RankedTensorType>(getN().getType());
 
   // auto shapeOfInput = inputType.getShape();
 
-  std::vector<int64_t> shapeForOutput ;
+  std::vector<int64_t> shapeForOutput;
 
   int64_t GetLen = 1;
 
-  //To extract value from the SSA value:
-    //get the Operand 
-    //convert it to ConstantOp
-    //convert it to corresponding elements attribute
-    //extract the value as float then convert to int
+  // To extract value from the SSA value:
+  // get the Operand
+  // convert it to ConstantOp
+  // convert it to corresponding elements attribute
+  // extract the value as float then convert to int
   DEBUG_PRINT_NO_ARGS();
   Value inputLen = getOperand(1);
   dsp::ConstantOp constantOp1stArg = inputLen.getDefiningOp<dsp::ConstantOp>();
@@ -1734,26 +2209,25 @@ void GetRangeOfVectorOp::inferShapes() {
   DenseElementsAttr constantLhsValue = constantOp1stArg.getValue();
   auto elements = constantLhsValue.getValues<FloatAttr>();
   float LenN = elements[0].getValueAsDouble();
-  GetLen = (int64_t) LenN;
+  GetLen = (int64_t)LenN;
   DEBUG_PRINT_WITH_ARGS(GetLen);
-  DEBUG_PRINT_WITH_ARGS("GetLen= " , GetLen);
+  DEBUG_PRINT_WITH_ARGS("GetLen= ", GetLen);
 
   shapeForOutput.push_back(GetLen);
-  mlir::TensorType outputType = mlir::RankedTensorType::get(shapeForOutput, 
-    getFirst().getType().getElementType());
-
+  mlir::TensorType outputType = mlir::RankedTensorType::get(
+      shapeForOutput, getFirst().getType().getElementType());
 
   getResult().setType(outputType);
-
 }
 
 mlir::LogicalResult GetRangeOfVectorOp::verify() {
-  DEBUG_PRINT_NO_ARGS() ;
+  DEBUG_PRINT_NO_ARGS();
   // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand().getType());
   // auto inputRank = inputType.getRank();
 
-  // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " << alphaValueRank << "\n";
-  // //once ensured only 1 rank from above --   
+  // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " <<
+  // alphaValueRank << "\n";
+  // //once ensured only 1 rank from above --
   // if( inputRank != 1 )
   // {
   //   llvm::errs() << "inputRank: " << inputRank <<  "\n";
@@ -1767,31 +2241,32 @@ mlir::LogicalResult GetRangeOfVectorOp::verify() {
 // FIRFilterHammingOptimizedOp
 //===----------------------------------------------------------------------===//
 
-void FIRFilterHammingOptimizedOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
-                        mlir::Value wc, mlir::Value n) {
-  DEBUG_PRINT_NO_ARGS() ;
+void FIRFilterHammingOptimizedOp::build(mlir::OpBuilder &builder,
+                                        mlir::OperationState &state,
+                                        mlir::Value wc, mlir::Value n) {
+  DEBUG_PRINT_NO_ARGS();
   state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
   state.addOperands({wc, n});
-  DEBUG_PRINT_NO_ARGS() ;
+  DEBUG_PRINT_NO_ARGS();
 }
 
 void FIRFilterHammingOptimizedOp::inferShapes() {
-  //for each rank
-  //Get the shape/size of input 
-  //output size = input_size 
-  // auto inputType = llvm::dyn_cast<RankedTensorType>(getN().getType());
+  // for each rank
+  // Get the shape/size of input
+  // output size = input_size
+  //  auto inputType = llvm::dyn_cast<RankedTensorType>(getN().getType());
 
   // auto shapeOfInput = inputType.getShape();
 
-  std::vector<int64_t> shapeForOutput ;
+  std::vector<int64_t> shapeForOutput;
 
   uint64_t GetLen = 1;
 
-  //To extract value from the SSA value:
-    //get the Operand 
-    //convert it to ConstantOp
-    //convert it to corresponding elements attribute
-    //extract the value as float then convert to int
+  // To extract value from the SSA value:
+  // get the Operand
+  // convert it to ConstantOp
+  // convert it to corresponding elements attribute
+  // extract the value as float then convert to int
   DEBUG_PRINT_NO_ARGS();
   Value inputLen = getOperand(1);
   dsp::ConstantOp constantOp1stArg = inputLen.getDefiningOp<dsp::ConstantOp>();
@@ -1799,30 +2274,27 @@ void FIRFilterHammingOptimizedOp::inferShapes() {
   DenseElementsAttr constantLhsValue = constantOp1stArg.getValue();
   auto elements = constantLhsValue.getValues<FloatAttr>();
   float LenN = elements[0].getValueAsDouble();
-  GetLen = (uint64_t) LenN;
+  GetLen = (uint64_t)LenN;
   DEBUG_PRINT_WITH_ARGS(GetLen);
-  DEBUG_PRINT_WITH_ARGS("GetLen= " , GetLen);
-  
-  //int64_t N = tensorType.getShape()[0];  
+  DEBUG_PRINT_WITH_ARGS("GetLen= ", GetLen);
 
+  // int64_t N = tensorType.getShape()[0];
 
   shapeForOutput.push_back(GetLen);
-  mlir::TensorType outputType = mlir::RankedTensorType::get(shapeForOutput, 
-    getWc().getType().getElementType());
-
+  mlir::TensorType outputType = mlir::RankedTensorType::get(
+      shapeForOutput, getWc().getType().getElementType());
 
   getResult().setType(outputType);
-
 }
 
 mlir::LogicalResult FIRFilterHammingOptimizedOp::verify() {
   uint64_t GetLen = 1;
 
-  //To extract value from the SSA value:
-    //get the Operand 
-    //convert it to ConstantOp
-    //convert it to corresponding elements attribute
-    //extract the value as float then convert to int
+  // To extract value from the SSA value:
+  // get the Operand
+  // convert it to ConstantOp
+  // convert it to corresponding elements attribute
+  // extract the value as float then convert to int
   DEBUG_PRINT_NO_ARGS();
   Value inputLen = getOperand(1);
   dsp::ConstantOp constantOp1stArg = inputLen.getDefiningOp<dsp::ConstantOp>();
@@ -1830,17 +2302,16 @@ mlir::LogicalResult FIRFilterHammingOptimizedOp::verify() {
   DenseElementsAttr constantLhsValue = constantOp1stArg.getValue();
   auto elements = constantLhsValue.getValues<FloatAttr>();
   float LenN = elements[0].getValueAsDouble();
-  GetLen = (uint64_t) LenN;
+  GetLen = (uint64_t)LenN;
   DEBUG_PRINT_WITH_ARGS(GetLen);
-  DEBUG_PRINT_WITH_ARGS("GetLen= " , GetLen);
-  
-  //filter-order even not supported -- so making it odd
-  if(GetLen % 2 == 0 )
-  {
+  DEBUG_PRINT_WITH_ARGS("GetLen= ", GetLen);
+
+  // filter-order even not supported -- so making it odd
+  if (GetLen % 2 == 0) {
     // GetLen = GetLen + 1;
     llvm::errs() << "N for lowPassFilter must be odd but is " << GetLen << "\n";
-    // DEBUG_PRINT_WITH_ARGS("Making LowPassFilterLen Odd= " , GetLen); 
-    return mlir::failure(); 
+    // DEBUG_PRINT_WITH_ARGS("Making LowPassFilterLen Odd= " , GetLen);
+    return mlir::failure();
   }
   return mlir::success();
 }
@@ -1849,31 +2320,32 @@ mlir::LogicalResult FIRFilterHammingOptimizedOp::verify() {
 // HighPassFIRHammingOptimizedOp
 //===----------------------------------------------------------------------===//
 
-void HighPassFIRHammingOptimizedOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
-                        mlir::Value wc, mlir::Value n) {
-  DEBUG_PRINT_NO_ARGS() ;
+void HighPassFIRHammingOptimizedOp::build(mlir::OpBuilder &builder,
+                                          mlir::OperationState &state,
+                                          mlir::Value wc, mlir::Value n) {
+  DEBUG_PRINT_NO_ARGS();
   state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
   state.addOperands({wc, n});
-  DEBUG_PRINT_NO_ARGS() ;
+  DEBUG_PRINT_NO_ARGS();
 }
 
 void HighPassFIRHammingOptimizedOp::inferShapes() {
-  //for each rank
-  //Get the shape/size of input 
-  //output size = input_size 
-  // auto inputType = llvm::dyn_cast<RankedTensorType>(getN().getType());
+  // for each rank
+  // Get the shape/size of input
+  // output size = input_size
+  //  auto inputType = llvm::dyn_cast<RankedTensorType>(getN().getType());
 
   // auto shapeOfInput = inputType.getShape();
 
-  std::vector<int64_t> shapeForOutput ;
+  std::vector<int64_t> shapeForOutput;
 
   uint64_t GetLen = 1;
 
-  //To extract value from the SSA value:
-    //get the Operand 
-    //convert it to ConstantOp
-    //convert it to corresponding elements attribute
-    //extract the value as float then convert to int
+  // To extract value from the SSA value:
+  // get the Operand
+  // convert it to ConstantOp
+  // convert it to corresponding elements attribute
+  // extract the value as float then convert to int
   DEBUG_PRINT_NO_ARGS();
   Value inputLen = getOperand(1);
   dsp::ConstantOp constantOp1stArg = inputLen.getDefiningOp<dsp::ConstantOp>();
@@ -1881,30 +2353,27 @@ void HighPassFIRHammingOptimizedOp::inferShapes() {
   DenseElementsAttr constantLhsValue = constantOp1stArg.getValue();
   auto elements = constantLhsValue.getValues<FloatAttr>();
   float LenN = elements[0].getValueAsDouble();
-  GetLen = (uint64_t) LenN;
+  GetLen = (uint64_t)LenN;
   DEBUG_PRINT_WITH_ARGS(GetLen);
-  DEBUG_PRINT_WITH_ARGS("GetLen= " , GetLen);
-  
-  //int64_t N = tensorType.getShape()[0];  
+  DEBUG_PRINT_WITH_ARGS("GetLen= ", GetLen);
 
+  // int64_t N = tensorType.getShape()[0];
 
   shapeForOutput.push_back(GetLen);
-  mlir::TensorType outputType = mlir::RankedTensorType::get(shapeForOutput, 
-    getWc().getType().getElementType());
-
+  mlir::TensorType outputType = mlir::RankedTensorType::get(
+      shapeForOutput, getWc().getType().getElementType());
 
   getResult().setType(outputType);
-
 }
 
 mlir::LogicalResult HighPassFIRHammingOptimizedOp::verify() {
   uint64_t GetLen = 1;
 
-  //To extract value from the SSA value:
-    //get the Operand 
-    //convert it to ConstantOp
-    //convert it to corresponding elements attribute
-    //extract the value as float then convert to int
+  // To extract value from the SSA value:
+  // get the Operand
+  // convert it to ConstantOp
+  // convert it to corresponding elements attribute
+  // extract the value as float then convert to int
   DEBUG_PRINT_NO_ARGS();
   Value inputLen = getOperand(1);
   dsp::ConstantOp constantOp1stArg = inputLen.getDefiningOp<dsp::ConstantOp>();
@@ -1912,22 +2381,20 @@ mlir::LogicalResult HighPassFIRHammingOptimizedOp::verify() {
   DenseElementsAttr constantLhsValue = constantOp1stArg.getValue();
   auto elements = constantLhsValue.getValues<FloatAttr>();
   float LenN = elements[0].getValueAsDouble();
-  GetLen = (uint64_t) LenN;
+  GetLen = (uint64_t)LenN;
   DEBUG_PRINT_WITH_ARGS(GetLen);
-  DEBUG_PRINT_WITH_ARGS("GetLen= " , GetLen);
-  
-  //filter-order even not supported -- so making it odd
-  if(GetLen % 2 == 0 )
-  {
+  DEBUG_PRINT_WITH_ARGS("GetLen= ", GetLen);
+
+  // filter-order even not supported -- so making it odd
+  if (GetLen % 2 == 0) {
     // GetLen = GetLen + 1;
     llvm::errs() << "N for lowPassFilter must be odd but is " << GetLen << "\n";
-    // DEBUG_PRINT_WITH_ARGS("Making LowPassFilterLen Odd= " , GetLen); 
-    return mlir::failure(); 
+    // DEBUG_PRINT_WITH_ARGS("Making LowPassFilterLen Odd= " , GetLen);
+    return mlir::failure();
   }
   return mlir::success();
 }
 
-
 //===----------------------------------------------------------------------===//
 // ThresholdOp
 //===----------------------------------------------------------------------===//
@@ -1936,23 +2403,23 @@ void ThresholdOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
                         mlir::Value input, mlir::Value threshld) {
   DEBUG_PRINT_NO_ARGS();
   state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
-  state.addOperands({input, threshld} );
+  state.addOperands({input, threshld});
   DEBUG_PRINT_NO_ARGS();
 }
 
 void ThresholdOp::inferShapes() {
   DEBUG_PRINT_NO_ARGS();
-  auto tensorInput =  getInput().getType();
+  auto tensorInput = getInput().getType();
   getResult().setType(tensorInput);
   DEBUG_PRINT_NO_ARGS();
 }
 
 mlir::LogicalResult ThresholdOp::verify() {
-  //To extract value from the SSA value:
-    //get the Operand 
-    //convert it to ConstantOp
-    //convert it to corresponding elements attribute
-    //extract the value as float then convert to int
+  // To extract value from the SSA value:
+  // get the Operand
+  // convert it to ConstantOp
+  // convert it to corresponding elements attribute
+  // extract the value as float then convert to int
   DEBUG_PRINT_NO_ARGS();
   Value threshold = getOperand(1);
   dsp::ConstantOp constantOp1stArg = threshold.getDefiningOp<dsp::ConstantOp>();
@@ -1960,52 +2427,54 @@ mlir::LogicalResult ThresholdOp::verify() {
   DenseElementsAttr constantLhsValue = constantOp1stArg.getValue();
   auto elements = constantLhsValue.getValues<FloatAttr>();
   float GetThresholdVal = elements[0].getValueAsDouble();
-  
+
   DEBUG_PRINT_WITH_ARGS(GetThresholdVal);
-  DEBUG_PRINT_WITH_ARGS("GetThresholdVal= " , GetThresholdVal);
-  
-  //filter-order even not supported -- so making it odd
-  if(GetThresholdVal <= 0 )
-  {
+  DEBUG_PRINT_WITH_ARGS("GetThresholdVal= ", GetThresholdVal);
+
+  // filter-order even not supported -- so making it odd
+  if (GetThresholdVal <= 0) {
     // GetThresholdVal = GetThresholdVal + 1;
-    llvm::errs() << "threshold value must be >= 0 but got: " << GetThresholdVal << "\n";
-    // DEBUG_PRINT_WITH_ARGS("Making LowPassFilterLen Odd= " , GetThresholdVal); 
-    return mlir::failure(); 
+    llvm::errs() << "threshold value must be >= 0 but got: " << GetThresholdVal
+                 << "\n";
+    // DEBUG_PRINT_WITH_ARGS("Making LowPassFilterLen Odd= " , GetThresholdVal);
+    return mlir::failure();
   }
   return mlir::success();
-
 }
 
 //===----------------------------------------------------------------------===//
 // QuantizationOp
 //===----------------------------------------------------------------------===//
 
-void QuantizationOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
-                        mlir::Value input, mlir::Value nLevels, mlir::Value max, mlir::Value min) {
+void QuantizationOp::build(mlir::OpBuilder &builder,
+                           mlir::OperationState &state, mlir::Value input,
+                           mlir::Value nLevels, mlir::Value max,
+                           mlir::Value min) {
   DEBUG_PRINT_NO_ARGS();
   state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
-  state.addOperands({input, nLevels, max, min} );
+  state.addOperands({input, nLevels, max, min});
   DEBUG_PRINT_NO_ARGS();
 }
 
 void QuantizationOp::inferShapes() {
   DEBUG_PRINT_NO_ARGS();
-  auto tensorInput =  getInput().getType();
+  auto tensorInput = getInput().getType();
   getResult().setType(tensorInput);
   DEBUG_PRINT_NO_ARGS();
 }
 
 mlir::LogicalResult QuantizationOp::verify() {
-  //To extract value from the SSA value:
-    //get the Operand 
-    //convert it to ConstantOp
-    //convert it to corresponding elements attribute
-    //extract the value as float then convert to int
+  // To extract value from the SSA value:
+  // get the Operand
+  // convert it to ConstantOp
+  // convert it to corresponding elements attribute
+  // extract the value as float then convert to int
   // DEBUG_PRINT_NO_ARGS();
-  // check max > min && NoOfLevels = powerOf2 
+  // check max > min && NoOfLevels = powerOf2
 
   Value maxOperand = getOperand(2);
-  dsp::ConstantOp constantOp1stArg = maxOperand.getDefiningOp<dsp::ConstantOp>();
+  dsp::ConstantOp constantOp1stArg =
+      maxOperand.getDefiningOp<dsp::ConstantOp>();
   DEBUG_PRINT_NO_ARGS();
   DenseElementsAttr constantLhsValue = constantOp1stArg.getValue();
   auto elements = constantLhsValue.getValues<FloatAttr>();
@@ -2014,45 +2483,48 @@ mlir::LogicalResult QuantizationOp::verify() {
   Value minOperand = getOperand(3);
   constantOp1stArg = minOperand.getDefiningOp<dsp::ConstantOp>();
 
-  if(!constantOp1stArg){
-    llvm::errs() << "QuantizationOp: unable to get Constant for minOp -- 4th opernad " << "\n";
-    return mlir::failure(); 
+  if (!constantOp1stArg) {
+    llvm::errs()
+        << "QuantizationOp: unable to get Constant for minOp -- 4th opernad "
+        << "\n";
+    return mlir::failure();
   }
   DEBUG_PRINT_NO_ARGS();
   constantLhsValue = constantOp1stArg.getValue();
   elements = constantLhsValue.getValues<FloatAttr>();
   float getMin = elements[0].getValueAsDouble();
 
-  if(getMax < getMin){
-    llvm::errs() << "QuantizatnOp : Max < Min --" << " Max: " << getMax ;
-    llvm::errs() << " Min: " << getMin ;
+  if (getMax < getMin) {
+    llvm::errs() << "QuantizatnOp : Max < Min --" << " Max: " << getMax;
+    llvm::errs() << " Min: " << getMin;
     return mlir::failure();
   }
-  
 
   return mlir::success();
-
 }
 
-
 //===----------------------------------------------------------------------===//
 // LMSFilterResponseOp
 //===----------------------------------------------------------------------===//
 
-void LMSFilterResponseOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
-                   mlir::Value lhs, mlir::Value rhs, mlir::Value mu, mlir::Value filterLen) {
-  
+void LMSFilterResponseOp::build(mlir::OpBuilder &builder,
+                                mlir::OperationState &state, mlir::Value lhs,
+                                mlir::Value rhs, mlir::Value mu,
+                                mlir::Value filterLen) {
+
   state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
-   state.addOperands({lhs, rhs, mu, filterLen});
+  state.addOperands({lhs, rhs, mu, filterLen});
 }
 
-
-void LMSFilterResponseOp::inferShapes() { getResult().setType(getLhs().getType()); }
+void LMSFilterResponseOp::inferShapes() {
+  getResult().setType(getLhs().getType());
+}
 
 mlir::LogicalResult LMSFilterResponseOp::verify() {
-  DEBUG_PRINT_NO_ARGS() ;
+  DEBUG_PRINT_NO_ARGS();
   // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand(0).getType());
-  // auto filterType = llvm::dyn_cast<RankedTensorType>(getOperand(1).getType());
+  // auto filterType =
+  // llvm::dyn_cast<RankedTensorType>(getOperand(1).getType());
   // // auto resultType = llvm::dyn_cast<RankedTensorType>(getType());
 
   // auto inputRank = inputType.getRank();
@@ -2071,90 +2543,89 @@ mlir::LogicalResult LMSFilterResponseOp::verify() {
 // RunLenEncodingOp
 //===----------------------------------------------------------------------===//
 
-void RunLenEncodingOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
-                        mlir::Value input) {
+void RunLenEncodingOp::build(mlir::OpBuilder &builder,
+                             mlir::OperationState &state, mlir::Value input) {
   DEBUG_PRINT_NO_ARGS();
   state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
-  state.addOperands({input} );
+  state.addOperands({input});
   DEBUG_PRINT_NO_ARGS();
 }
 
 void RunLenEncodingOp::inferShapes() {
   DEBUG_PRINT_NO_ARGS();
-  auto tensorInput =  getInput().getType();
+  auto tensorInput = getInput().getType();
   auto shapeOfInput = tensorInput.getShape();
 
-  // auto tensorUpsampling = getRhs().getType(); 
+  // auto tensorUpsampling = getRhs().getType();
   // auto shapeOfUpsampling = tensorUpsampling.getShape(); //shape is the length
-  //Assume rank is 1 , then get the shape of output
+  // Assume rank is 1 , then get the shape of output
   // shapeOfInput
 
-  std::vector<int64_t> shapeForOutput ;
+  std::vector<int64_t> shapeForOutput;
 
   int64_t LengthOfInput = shapeOfInput[0];
   int64_t lenOfOutput = 2 * LengthOfInput;
   shapeForOutput.push_back(lenOfOutput);
-  
-  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(shapeForOutput, 
-          getInput().getType().getElementType());
+
+  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(
+      shapeForOutput, getInput().getType().getElementType());
 
   getResult().setType(manipulatedType);
   DEBUG_PRINT_NO_ARGS();
 }
 
 mlir::LogicalResult RunLenEncodingOp::verify() {
-  //To extract value from the SSA value:
-    //get the Operand 
-    //convert it to ConstantOp
-    //convert it to corresponding elements attribute
-    //extract the value as float then convert to int
+  // To extract value from the SSA value:
+  // get the Operand
+  // convert it to ConstantOp
+  // convert it to corresponding elements attribute
+  // extract the value as float then convert to int
   // DEBUG_PRINT_NO_ARGS();
-  
-  return mlir::success();
 
+  return mlir::success();
 }
 
 //===----------------------------------------------------------------------===//
 // FIRFilterResSymmOptimizedOp
 //===----------------------------------------------------------------------===//
 
-void FIRFilterResSymmOptimizedOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
-                  mlir::Value lhs, mlir::Value rhs) {
+void FIRFilterResSymmOptimizedOp::build(mlir::OpBuilder &builder,
+                                        mlir::OperationState &state,
+                                        mlir::Value lhs, mlir::Value rhs) {
   state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
   state.addOperands({lhs, rhs});
 }
 
-
-
-/// Infer the output shape of the FIRFilterResSymmOptimizedOp, this is required by the shape inference
-/// interface.
-//ToDo -- shape should be the length of Lhs + Rhs - 1
-void FIRFilterResSymmOptimizedOp::inferShapes() { 
-  //get the shape of Lhs & rhs 
-  //add the shape for each dimension
-  // auto tensorInput =  llvm::cast<RankedTensorType>(getLhs().getType());
-  auto tensorInput =  getLhs().getType();
+/// Infer the output shape of the FIRFilterResSymmOptimizedOp, this is required
+/// by the shape inference interface.
+// ToDo -- shape should be the length of Lhs + Rhs - 1
+void FIRFilterResSymmOptimizedOp::inferShapes() {
+  // get the shape of Lhs & rhs
+  // add the shape for each dimension
+  //  auto tensorInput =  llvm::cast<RankedTensorType>(getLhs().getType());
+  auto tensorInput = getLhs().getType();
   auto shapeOfInput = tensorInput.getShape();
 
   auto tensorFilter = getRhs().getType();
   auto shapeOfFilter = tensorFilter.getShape();
-  std::vector<int64_t> shapeForOutput ;
+  std::vector<int64_t> shapeForOutput;
 
-  for(size_t i=0; i < shapeOfInput.size() ; i++){
+  for (size_t i = 0; i < shapeOfInput.size(); i++) {
     shapeForOutput.push_back(shapeOfInput[i] + shapeOfFilter[i] - 1);
   }
-  
-  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(shapeForOutput, 
-          getLhs().getType().getElementType());
 
-  // getResult().setType(getLhs().getType()); 
+  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(
+      shapeForOutput, getLhs().getType().getElementType());
+
+  // getResult().setType(getLhs().getType());
   getResult().setType(manipulatedType);
 }
 
-//get rank of Input & Filter -- make sure it is of rank 1 
+// get rank of Input & Filter -- make sure it is of rank 1
 mlir::LogicalResult FIRFilterResSymmOptimizedOp::verify() {
   // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand(0).getType());
-  // auto filterType = llvm::dyn_cast<RankedTensorType>(getOperand(1).getType());
+  // auto filterType =
+  // llvm::dyn_cast<RankedTensorType>(getOperand(1).getType());
   // // auto resultType = llvm::dyn_cast<RankedTensorType>(getType());
 
   // auto inputRank = inputType.getRank();
@@ -2169,16 +2640,15 @@ mlir::LogicalResult FIRFilterResSymmOptimizedOp::verify() {
   return mlir::success();
 }
 
-
 //===----------------------------------------------------------------------===//
 // LengthOp
 //===----------------------------------------------------------------------===//
 
 void LengthOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
-                        mlir::Value input) {
+                     mlir::Value input) {
   DEBUG_PRINT_NO_ARGS();
   state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
-  state.addOperands({input} );
+  state.addOperands({input});
   DEBUG_PRINT_NO_ARGS();
 }
 
@@ -2189,8 +2659,8 @@ void LengthOp::inferShapes() {
   DEBUG_PRINT_NO_ARGS();
   shapeForOutput.push_back(1);
 
-  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(shapeForOutput, 
-          getInput().getType().getElementType());
+  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(
+      shapeForOutput, getInput().getType().getElementType());
   getResult().setType(manipulatedType);
   DEBUG_PRINT_NO_ARGS();
 }
@@ -2214,16 +2684,17 @@ mlir::LogicalResult LengthOp::verify() {
 // ReverseInputOp
 //===----------------------------------------------------------------------===//
 
-void ReverseInputOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
-                        mlir::Value input) {
+void ReverseInputOp::build(mlir::OpBuilder &builder,
+                           mlir::OperationState &state, mlir::Value input) {
   state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
   state.addOperands(input);
 }
 
 void ReverseInputOp::inferShapes() {
-  auto tensorInput =  getInput().getType();
-  // mlir::TensorType manipulatedType = mlir::RankedTensorType::get(shapeForOutput, 
-          // getInput().getType().getElementType());
+  auto tensorInput = getInput().getType();
+  // mlir::TensorType manipulatedType =
+  // mlir::RankedTensorType::get(shapeForOutput,
+  // getInput().getType().getElementType());
   getResult().setType(tensorInput);
 }
 
@@ -2242,129 +2713,131 @@ mlir::LogicalResult ReverseInputOp::verify() {
   return mlir::success();
 }
 
-
 //===----------------------------------------------------------------------===//
 // PaddingOp
 //===----------------------------------------------------------------------===//
 
 void PaddingOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
-                  mlir::Value input, mlir::Value PadValue, mlir::Value PadLen) {
+                      mlir::Value input, mlir::Value PadValue,
+                      mlir::Value PadLen) {
   state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
   state.addOperands({input, PadValue, PadLen});
 }
 
-
-
-/// Infer the output shape of the PaddingOp, this is required by the shape inference
-/// interface.
-//ToDo -- shape should be the length of input * UpsamplingRate ie, Rhs
-void PaddingOp::inferShapes() { 
-  //get the shape of Lhs & rhs 
-  //add the shape for each dimension
-  // auto tensorInput =  llvm::cast<RankedTensorType>(getLhs().getType());
-  auto tensorInput =  getInput().getType();
+/// Infer the output shape of the PaddingOp, this is required by the shape
+/// inference interface.
+// ToDo -- shape should be the length of input * UpsamplingRate ie, Rhs
+void PaddingOp::inferShapes() {
+  // get the shape of Lhs & rhs
+  // add the shape for each dimension
+  //  auto tensorInput =  llvm::cast<RankedTensorType>(getLhs().getType());
+  auto tensorInput = getInput().getType();
   auto shapeOfInput = tensorInput.getShape();
 
-  // auto tensorUpsampling = getRhs().getType(); 
+  // auto tensorUpsampling = getRhs().getType();
   // auto shapeOfUpsampling = tensorUpsampling.getShape(); //shape is the length
-  
 
-  std::vector<int64_t> shapeForOutput ;
+  std::vector<int64_t> shapeForOutput;
 
   int64_t SecondValueInt = 1;
 
-  //To extract value from the SSA value:
-    //get the Operand 
-    //convert it to ConstantOp
-    //convert it to corresponding elements attribute
-    //extract the value as float then convert to int
+  // To extract value from the SSA value:
+  // get the Operand
+  // convert it to ConstantOp
+  // convert it to corresponding elements attribute
+  // extract the value as float then convert to int
   DEBUG_PRINT_NO_ARGS();
   Value padding3rdArg = getOperand(2);
-  dsp::ConstantOp constantOp2ndArg = padding3rdArg.getDefiningOp<dsp::ConstantOp>();
+  dsp::ConstantOp constantOp2ndArg =
+      padding3rdArg.getDefiningOp<dsp::ConstantOp>();
   DEBUG_PRINT_NO_ARGS();
-  DenseElementsAttr constantRhsValue = constantOp2ndArg.getValue();;
+  DenseElementsAttr constantRhsValue = constantOp2ndArg.getValue();
+  ;
   auto elements = constantRhsValue.getValues<FloatAttr>();
   float SecondValue = elements[0].getValueAsDouble();
-  SecondValueInt = (int64_t) SecondValue;
-  // llvm::errs() << "Upsampling: SamplingRate: " << SecondValueInt << " \n"; //downsamplingRate
-    
-  DEBUG_PRINT_NO_ARGS();
-  for(size_t i=0; i < shapeOfInput.size() ; i++){
-    double GetLenForOutput  = static_cast<double>(shapeOfInput[i] ) + SecondValueInt ;
-    int64_t OutlenInt = static_cast<int64_t> (GetLenForOutput);
-    DEBUG_PRINT_WITH_ARGS("PaddingLen= " , OutlenInt);
+  SecondValueInt = (int64_t)SecondValue;
+  // llvm::errs() << "Upsampling: SamplingRate: " << SecondValueInt << " \n";
+  // //downsamplingRate
+
+  DEBUG_PRINT_NO_ARGS();
+  for (size_t i = 0; i < shapeOfInput.size(); i++) {
+    double GetLenForOutput =
+        static_cast<double>(shapeOfInput[i]) + SecondValueInt;
+    int64_t OutlenInt = static_cast<int64_t>(GetLenForOutput);
+    DEBUG_PRINT_WITH_ARGS("PaddingLen= ", OutlenInt);
     shapeForOutput.push_back(OutlenInt);
   }
-  
-  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(shapeForOutput, 
-          getInput().getType().getElementType());
 
-  // getResult().setType(getLhs().getType()); 
+  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(
+      shapeForOutput, getInput().getType().getElementType());
+
+  // getResult().setType(getLhs().getType());
   getResult().setType(manipulatedType);
-  }
+}
 
-//get rank of Input & Upsampling -- make sure it is of rank 1 
+// get rank of Input & Upsampling -- make sure it is of rank 1
 mlir::LogicalResult PaddingOp::verify() {
   // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand(0).getType());
-  // auto samplingRateType = llvm::dyn_cast<RankedTensorType>(getOperand(1).getType());
+  // auto samplingRateType =
+  // llvm::dyn_cast<RankedTensorType>(getOperand(1).getType());
   // // auto resultType = llvm::dyn_cast<RankedTensorType>(getType());
 
   // auto inputRank = inputType.getRank();
   // auto samplingRateRank = samplingRateType.getRank();
 
-  // // llvm::errs() << "inputRank: " << inputRank << " samplingRateRank: " << samplingRateRank << "\n";
-  // //once ensured only 1 rank from above -- also make sure there is just 1 elem  
-  // if( inputRank != 1 || samplingRateRank != 0 )
+  // // llvm::errs() << "inputRank: " << inputRank << " samplingRateRank: " <<
+  // samplingRateRank << "\n";
+  // //once ensured only 1 rank from above -- also make sure there is just 1
+  // elem if( inputRank != 1 || samplingRateRank != 0 )
   // {
-  //   llvm::errs() << "inputRank: " << inputRank << " samplingRateRank: " << samplingRateRank << "\n";
-  //   return emitError()
+  //   llvm::errs() << "inputRank: " << inputRank << " samplingRateRank: " <<
+  //   samplingRateRank << "\n"; return emitError()
   //          << "expected rank of input is 1 & Upsampling is 0";
   // }
   return mlir::success();
-} 
-
+}
 
 //===----------------------------------------------------------------------===//
 // FIRFilterYSymmOptimizedOp
 //===----------------------------------------------------------------------===//
 
-void FIRFilterYSymmOptimizedOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
-                  mlir::Value lhs, mlir::Value rhs) {
+void FIRFilterYSymmOptimizedOp::build(mlir::OpBuilder &builder,
+                                      mlir::OperationState &state,
+                                      mlir::Value lhs, mlir::Value rhs) {
   state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
   state.addOperands({lhs, rhs});
 }
 
-
-
-/// Infer the output shape of the FIRFilterYSymmOptimizedOp, this is required by the shape inference
-/// interface.
-//ToDo -- shape should be the length of Lhs + Rhs - 1
-void FIRFilterYSymmOptimizedOp::inferShapes() { 
-  //get the shape of Lhs & rhs 
-  //add the shape for each dimension
-  // auto tensorInput =  llvm::cast<RankedTensorType>(getLhs().getType());
-  auto tensorInput =  getLhs().getType();
+/// Infer the output shape of the FIRFilterYSymmOptimizedOp, this is required by
+/// the shape inference interface.
+// ToDo -- shape should be the length of Lhs + Rhs - 1
+void FIRFilterYSymmOptimizedOp::inferShapes() {
+  // get the shape of Lhs & rhs
+  // add the shape for each dimension
+  //  auto tensorInput =  llvm::cast<RankedTensorType>(getLhs().getType());
+  auto tensorInput = getLhs().getType();
   auto shapeOfInput = tensorInput.getShape();
 
   auto tensorFilter = getRhs().getType();
   auto shapeOfFilter = tensorFilter.getShape();
-  std::vector<int64_t> shapeForOutput ;
+  std::vector<int64_t> shapeForOutput;
 
-  for(size_t i=0; i < shapeOfInput.size() ; i++){
+  for (size_t i = 0; i < shapeOfInput.size(); i++) {
     shapeForOutput.push_back(shapeOfInput[i] + shapeOfFilter[i] - 1);
   }
-  
-  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(shapeForOutput, 
-          getLhs().getType().getElementType());
 
-  // getResult().setType(getLhs().getType()); 
+  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(
+      shapeForOutput, getLhs().getType().getElementType());
+
+  // getResult().setType(getLhs().getType());
   getResult().setType(manipulatedType);
 }
 
-//get rank of Input & Filter -- make sure it is of rank 1 
+// get rank of Input & Filter -- make sure it is of rank 1
 mlir::LogicalResult FIRFilterYSymmOptimizedOp::verify() {
   // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand(0).getType());
-  // auto filterType = llvm::dyn_cast<RankedTensorType>(getOperand(1).getType());
+  // auto filterType =
+  // llvm::dyn_cast<RankedTensorType>(getOperand(1).getType());
   // // auto resultType = llvm::dyn_cast<RankedTensorType>(getType());
 
   // auto inputRank = inputType.getRank();
@@ -2383,19 +2856,19 @@ mlir::LogicalResult FIRFilterYSymmOptimizedOp::verify() {
 // FFT1DRealSymmOp
 //===----------------------------------------------------------------------===//
 
-void FFT1DRealSymmOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
-                        mlir::Value value) {
-  DEBUG_PRINT_NO_ARGS() ;
+void FFT1DRealSymmOp::build(mlir::OpBuilder &builder,
+                            mlir::OperationState &state, mlir::Value value) {
+  DEBUG_PRINT_NO_ARGS();
   state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
   state.addOperands(value);
-  DEBUG_PRINT_NO_ARGS() ;
+  DEBUG_PRINT_NO_ARGS();
 }
 
 void FFT1DRealSymmOp::inferShapes() {
-  //for each rank
-  //Get the shape/size of input 
-  //output size = input_size 
-  auto tensorInput =  getInput().getType(); 
+  // for each rank
+  // Get the shape/size of input
+  // output size = input_size
+  auto tensorInput = getInput().getType();
   // getResult().setType(tensorInput);
   getResult().setType(tensorInput);
   // getResult(2).setType(tensorInput);
@@ -2406,8 +2879,9 @@ mlir::LogicalResult FFT1DRealSymmOp::verify() {
   // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand().getType());
   // auto inputRank = inputType.getRank();
 
-  // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " << alphaValueRank << "\n";
-  // //once ensured only 1 rank from above --   
+  // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " <<
+  // alphaValueRank << "\n";
+  // //once ensured only 1 rank from above --
   // if( inputRank != 1 )
   // {
   //   llvm::errs() << "inputRank: " << inputRank <<  "\n";
@@ -2421,31 +2895,32 @@ mlir::LogicalResult FFT1DRealSymmOp::verify() {
 // FFT1DImgConjSymmOp
 //===----------------------------------------------------------------------===//
 
-void FFT1DImgConjSymmOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
-                        mlir::Value value) {
-  DEBUG_PRINT_NO_ARGS() ;
+void FFT1DImgConjSymmOp::build(mlir::OpBuilder &builder,
+                               mlir::OperationState &state, mlir::Value value) {
+  DEBUG_PRINT_NO_ARGS();
   state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
   state.addOperands(value);
-  DEBUG_PRINT_NO_ARGS() ;
+  DEBUG_PRINT_NO_ARGS();
 }
 
 void FFT1DImgConjSymmOp::inferShapes() {
-  //for each rank
-  //Get the shape/size of input 
-  //output size = input_size 
-  auto tensorInput =  getInput().getType(); 
+  // for each rank
+  // Get the shape/size of input
+  // output size = input_size
+  auto tensorInput = getInput().getType();
   // getResult().setType(tensorInput);
   getResult().setType(tensorInput);
   // getResult(2).setType(tensorInput);
 }
 
 mlir::LogicalResult FFT1DImgConjSymmOp::verify() {
-  DEBUG_PRINT_NO_ARGS() ;
+  DEBUG_PRINT_NO_ARGS();
   // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand().getType());
   // auto inputRank = inputType.getRank();
 
-  // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " << alphaValueRank << "\n";
-  // //once ensured only 1 rank from above --   
+  // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " <<
+  // alphaValueRank << "\n";
+  // //once ensured only 1 rank from above --
   // if( inputRank != 1 )
   // {
   //   llvm::errs() << "inputRank: " << inputRank <<  "\n";
@@ -2455,6 +2930,815 @@ mlir::LogicalResult FFT1DImgConjSymmOp::verify() {
   return mlir::success();
 }
 
+//===----------------------------------------------------------------------===//
+// ShiftRightOp
+//===----------------------------------------------------------------------===//
+
+void ShiftRightOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                         mlir::Value lhs, mlir::Value rhs) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands({lhs, rhs});
+}
+
+// mlir::ParseResult SubOp::parse(mlir::OpAsmParser &parser,
+//                                mlir::OperationState &result) {
+//   return parseBinaryOp(parser, result);
+// }
+
+// void SubOp::print(mlir::OpAsmPrinter &p) { printBinaryOp(p, *this); }
+
+// Infer the output shape of the ShiftRightOp, this is required by the shape
+// inference. interface.
+void ShiftRightOp::inferShapes() { getResult().setType(getLhs().getType()); }
+
+//===----------------------------------------------------------------------===//
+// Conv2DOp
+//===----------------------------------------------------------------------===//
+
+void Conv2DOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                     mlir::Value input, mlir::Value weight, mlir::Value bias) {
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({input, weight, bias});
+}
+void Conv2DOp::inferShapes() {
+  auto inputType = llvm::dyn_cast<RankedTensorType>(getInput().getType());
+  auto kernelType = llvm::dyn_cast<RankedTensorType>(getKernel().getType());
+
+  int64_t IH = inputType.getShape()[0];
+  int64_t IW = inputType.getShape()[1];
+  int64_t KH = kernelType.getShape()[0];
+  int64_t KW = kernelType.getShape()[1];
+  int64_t OH = IH - KH + 1, OW = IW - KW + 1;
+
+  SmallVector<int64_t, 2> dims = {OH, OW};
+  getResult().setType(RankedTensorType::get(dims, inputType.getElementType()));
+}
+
+mlir::LogicalResult Conv2DOp::verify() {
+
+  auto inputType = llvm::dyn_cast<RankedTensorType>(getInput().getType());
+  auto kernelType = llvm::dyn_cast<RankedTensorType>(getKernel().getType());
+  auto biasType = llvm::dyn_cast<RankedTensorType>(getBias().getType());
+
+  if (!inputType) {
+    llvm::errs() << "expect a ranked tensor for input, get " << getInput();
+    return mlir::failure();
+  }
+  if (!kernelType) {
+    llvm::errs() << "expect a ranked tensor for kernel, get " << getKernel();
+    return mlir::failure();
+  }
+  if (!biasType) {
+    llvm::errs() << "expect a one dimensional ranked tensor for bias, get "
+                 << getBias();
+    return mlir::failure();
+  }
+
+  auto inputRank = inputType.getRank();
+  auto kernelRank = kernelType.getRank();
+
+  if (inputRank != 2) {
+    llvm::errs() << "expect 2 dimensional input, format N IH IW IC, get "
+                 << inputRank;
+    return mlir::failure();
+  }
+  if (kernelRank != 2) {
+    llvm::errs() << "expect 2 dimensional kernel, format OC KH KW IC.";
+    return mlir::failure();
+  }
+
+  if (inputType.getShape()[0] < kernelType.getShape()[0]) {
+    llvm::errs() << "input shape < kernel shape at 1st dimension";
+    return mlir::failure();
+  }
+
+  if (inputType.getShape()[1] < kernelType.getShape()[1]) {
+    llvm::errs() << "input shape < kernel shape at 2nd dimension";
+    return mlir::failure();
+  }
+
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// ThresholdUpOp
+//===----------------------------------------------------------------------===//
+
+mlir::LogicalResult ThresholdUpOp::verify() {
+  int64_t returnOriginal = 5;
+  Value returnoriginal = getOperand(2);
+  dsp::ConstantOp constantOp1stArg =
+      returnoriginal.getDefiningOp<dsp::ConstantOp>();
+  DenseElementsAttr constantLhsValue = constantOp1stArg.getValue();
+  auto elements = constantLhsValue.getValues<FloatAttr>();
+  float LenN = elements[0].getValueAsDouble();
+  returnOriginal = (int64_t)LenN;
+
+  // filter-order even not supported -- so making it odd
+  if (returnOriginal != 0 && returnOriginal != 1) {
+    return mlir::failure();
+  }
+  return mlir::success();
+}
+
+void ThresholdUpOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                          mlir::Value input, mlir::Value threshold,
+                          mlir::Value returnoriginal) {
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({input, threshold, returnoriginal});
+}
+void ThresholdUpOp::inferShapes() { getResult().setType(getInput().getType()); }
+
+//===----------------------------------------------------------------------===//
+// GenerateDTMFOp
+//===----------------------------------------------------------------------===//
+
+mlir::LogicalResult GenerateDTMFOp::verify() {
+  auto digitType = llvm::dyn_cast<RankedTensorType>(getDigit().getType());
+  auto durationType = llvm::dyn_cast<RankedTensorType>(getDuration().getType());
+  auto fsType = llvm::dyn_cast<RankedTensorType>(getFs().getType());
+
+  if (!digitType) {
+    return emitError() << "Digit must be a ranked tensor";
+    return mlir::failure();
+  }
+  if (!durationType) {
+    return emitError() << "Duration must be a ranked tensor";
+    return mlir::failure();
+  }
+  if (!fsType) {
+    return emitError() << "Frequency must be a ranked tensor";
+    return mlir::failure();
+  }
+
+  auto digitNoOfElements = digitType.getNumElements();
+  auto durationNoOfElements = durationType.getNumElements();
+  auto fsNoOfElements = fsType.getNumElements();
+
+  if (digitNoOfElements != 1) {
+    return emitError() << "Digit must contain exactly one element";
+    return mlir::failure();
+  }
+  if (durationNoOfElements != 1) {
+    return emitError() << "Duration must contain exactly one element";
+    return mlir::failure();
+  }
+  if (fsNoOfElements != 1) {
+    return emitError() << "Frequency must contain exactly one element";
+    return mlir::failure();
+  }
+
+  auto digit = getDigit();
+  auto digitConst = digit.getDefiningOp<dsp::ConstantOp>();
+  auto digitValue = digitConst.getValue();
+  auto digitFloat = digitValue.getValues<FloatAttr>();
+  auto dig = digitFloat[0].getValueAsDouble();
+
+  if (dig != 0 && dig != 1 && dig != 2 && dig != 3 && dig != 4 && dig != 5 &&
+      dig != 6 && dig != 7 && dig != 8 && dig != 9) {
+    return emitError() << "Digit can only take one of the following values: "
+                          "0,1,2,3,4,5,6,7,8,9";
+    return mlir::failure();
+  }
+
+  return mlir::success();
+}
+
+void GenerateDTMFOp::build(mlir::OpBuilder &builder,
+                           mlir::OperationState &state, mlir::Value digit,
+                           mlir::Value duration, mlir::Value fs) {
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({digit, duration, fs});
+}
+void GenerateDTMFOp::inferShapes() {
+  auto digitType = llvm::dyn_cast<RankedTensorType>(getDigit().getType());
+  auto durationType = llvm::dyn_cast<RankedTensorType>(getDuration().getType());
+  auto fsType = llvm::dyn_cast<RankedTensorType>(getFs().getType());
+  // auto digitElementType = digitType.getElementType();
+
+  auto duration = getDuration();
+  auto durationConst = duration.getDefiningOp<dsp::ConstantOp>();
+  auto durationValue = durationConst.getValue();
+  auto durationFloat = durationValue.getValues<FloatAttr>();
+  auto dur = durationFloat[0].getValueAsDouble();
+
+  auto fs = getFs();
+  auto fsConst = fs.getDefiningOp<dsp::ConstantOp>();
+  auto fsValue = fsConst.getValue();
+  auto fsFloat = fsValue.getValues<FloatAttr>();
+  auto freq = fsFloat[0].getValueAsDouble();
+
+  auto output = dur * freq;
+  auto outputShape = (int64_t)output;
+
+  getResult().setType(
+      RankedTensorType::get(outputShape, digitType.getElementType()));
+}
+
+//===----------------------------------------------------------------------===//
+// FFTFreqOp
+//===----------------------------------------------------------------------===//
+
+void FFTFreqOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                      mlir::Value length, mlir::Value distance) {
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({length, distance});
+}
+
+mlir::LogicalResult FFTFreqOp::verify() { return mlir::success(); }
+
+void FFTFreqOp::inferShapes() {
+  auto lengthType = llvm::dyn_cast<RankedTensorType>(getLength().getType());
+  auto length = getLength();
+  auto lengthConst = length.getDefiningOp<dsp::ConstantOp>();
+  auto lengthValue = lengthConst.getValue();
+  auto lengthFloat = lengthValue.getValues<FloatAttr>();
+  auto l = lengthFloat[0].getValueAsDouble();
+  auto outputShape = (int64_t)l;
+
+  getResult().setType(
+      RankedTensorType::get(outputShape, lengthType.getElementType()));
+}
+
+//===----------------------------------------------------------------------===//
+// FindDominantPeaksOp
+//===----------------------------------------------------------------------===//
+
+void FindDominantPeaksOp::build(mlir::OpBuilder &builder,
+                                mlir::OperationState &state,
+                                mlir::Value frequencies,
+                                mlir::Value magnitudes) {
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({frequencies, magnitudes});
+}
+
+void FindDominantPeaksOp::inferShapes() {
+  auto frequenciesType =
+      llvm::dyn_cast<RankedTensorType>(getFrequencies().getType());
+  SmallVector<int64_t, 1> resultShape{2};
+  auto resultType =
+      RankedTensorType::get(resultShape, frequenciesType.getElementType());
+  getResult().setType(resultType);
+}
+
+mlir::LogicalResult FindDominantPeaksOp::verify() {
+  auto frequenciesType =
+      llvm::dyn_cast<RankedTensorType>(getFrequencies().getType());
+  auto magnitudesType =
+      llvm::dyn_cast<RankedTensorType>(getMagnitudes().getType());
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// RecoverDTMFDigitOp
+//===----------------------------------------------------------------------===//
+
+void RecoverDTMFDigitOp::build(mlir::OpBuilder &builder,
+                               mlir::OperationState &state,
+                               mlir::Value frequencies, mlir::Value freqPairs) {
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({frequencies, freqPairs});
+}
+
+void RecoverDTMFDigitOp::inferShapes() {
+  auto frequenciesType =
+      llvm::dyn_cast<RankedTensorType>(getFrequencies().getType());
+  SmallVector<int64_t, 1> resultShape{1};
+  auto resultType =
+      RankedTensorType::get(resultShape, frequenciesType.getElementType());
+  getResult().setType(resultType);
+}
+
+mlir::LogicalResult RecoverDTMFDigitOp::verify() {
+  auto frequenciesType =
+      llvm::dyn_cast<RankedTensorType>(getFrequencies().getType());
+  auto freqPairsType =
+      llvm::dyn_cast<RankedTensorType>(getFreqPairs().getType());
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// FFTCombineOp
+//===----------------------------------------------------------------------===//
+
+void FFTCombineOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                         mlir::Value real, mlir::Value imag) {
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({real, imag});
+}
+
+mlir::LogicalResult FFTCombineOp::verify() {
+  auto realType = llvm::dyn_cast<RankedTensorType>(getReal().getType());
+  auto imagType = llvm::dyn_cast<RankedTensorType>(getImag().getType());
+
+  auto realNoOfElements = realType.getNumElements();
+  auto imagNoOfElements = imagType.getNumElements();
+
+  if (realNoOfElements != imagNoOfElements) {
+    return emitError()
+           << "Real and Imaginary parts should have same number of elements.\n";
+    return mlir::failure();
+  }
+  return mlir::success();
+}
+
+void FFTCombineOp::inferShapes() { getResult().setType(getReal().getType()); }
+
+//===----------------------------------------------------------------------===//
+// GenerateVoiceSignatureOp
+//===----------------------------------------------------------------------===//
+
+void GenerateVoiceSignatureOp::build(mlir::OpBuilder &builder,
+                                     mlir::OperationState &state,
+                                     mlir::Value f1, mlir::Value f2,
+                                     mlir::Value duration, mlir::Value fs) {
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({f1, f2, duration, fs});
+}
+
+mlir::LogicalResult GenerateVoiceSignatureOp::verify() {
+  auto f1Type = llvm::dyn_cast<RankedTensorType>(getF1().getType());
+  auto f2Type = llvm::dyn_cast<RankedTensorType>(getF2().getType());
+  auto durationType = llvm::dyn_cast<RankedTensorType>(getDuration().getType());
+  auto fsType = llvm::dyn_cast<RankedTensorType>(getFs().getType());
+
+  if (!f1Type) {
+    return emitError() << "f1 must be a ranked tensor";
+    return mlir::failure();
+  }
+  if (!f2Type) {
+    return emitError() << "f2 must be a ranked tensor";
+    return mlir::failure();
+  }
+  if (!durationType) {
+    return emitError() << "Duration must be a ranked tensor";
+    return mlir::failure();
+  }
+  if (!fsType) {
+    return emitError() << "Frequency must be a ranked tensor";
+    return mlir::failure();
+  }
+  auto f1NoOfElements = f1Type.getNumElements();
+  auto f2NoOfElements = f2Type.getNumElements();
+  auto durationNoOfElements = durationType.getNumElements();
+  auto fsNoOfElements = fsType.getNumElements();
+
+  if (f1NoOfElements != 1) {
+    return emitError() << "f1 must contain exactly one element";
+    return mlir::failure();
+  }
+  if (f2NoOfElements != 1) {
+    return emitError() << "f2 must contain exactly one element";
+    return mlir::failure();
+  }
+  if (durationNoOfElements != 1) {
+    return emitError() << "Duration must contain exactly one element";
+    return mlir::failure();
+  }
+  if (fsNoOfElements != 1) {
+    return emitError() << "Frequency must contain exactly one element";
+    return mlir::failure();
+  }
+  return mlir::success();
+}
+
+void GenerateVoiceSignatureOp::inferShapes() {
+  auto durationType = llvm::dyn_cast<RankedTensorType>(getDuration().getType());
+  auto fsType = llvm::dyn_cast<RankedTensorType>(getFs().getType());
+  // auto digitElementType = digitType.getElementType();
+
+  auto duration = getDuration();
+  auto durationConst = duration.getDefiningOp<dsp::ConstantOp>();
+  auto durationValue = durationConst.getValue();
+  auto durationFloat = durationValue.getValues<FloatAttr>();
+  auto dur = durationFloat[0].getValueAsDouble();
+
+  auto fs = getFs();
+  auto fsConst = fs.getDefiningOp<dsp::ConstantOp>();
+  auto fsValue = fsConst.getValue();
+  auto fsFloat = fsValue.getValues<FloatAttr>();
+  auto freq = fsFloat[0].getValueAsDouble();
+
+  auto output = dur * freq;
+  auto outputShape = (int64_t)output;
+
+  getResult().setType(
+      RankedTensorType::get(outputShape, fsType.getElementType()));
+}
+
+//===----------------------------------------------------------------------===//
+// SqrtOp
+//===----------------------------------------------------------------------===//
+
+void SqrtOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                   mlir::Value input) {
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({input});
+}
+
+mlir::LogicalResult SqrtOp::verify() {
+  auto inputType = llvm::dyn_cast<RankedTensorType>(getInput().getType());
+  return mlir::success();
+}
+
+void SqrtOp::inferShapes() { getResult().setType(getInput().getType()); }
+
+//===----------------------------------------------------------------------===//
+// QamDemodulateOp
+//===----------------------------------------------------------------------===//
+
+void QamDemodulateOp::build(mlir::OpBuilder &builder,
+                            mlir::OperationState &state, mlir::Value real,
+                            mlir::Value imagine) {
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({real, imagine});
+}
+
+void QamDemodulateOp::inferShapes() {
+  auto realType = llvm::dyn_cast<RankedTensorType>(getReal().getType());
+  auto realShape = realType.getShape();
+  SmallVector<long int, 2> outputShape(realShape);
+
+  for (size_t i = 0; i < realShape.size(); ++i) {
+    outputShape[i] = realShape[i] * 2;
+  }
+  getResult().setType(
+      RankedTensorType::get(outputShape, realType.getElementType()));
+}
+
+mlir::LogicalResult QamDemodulateOp::verify() {
+  auto realType = llvm::dyn_cast<RankedTensorType>(getReal().getType());
+  auto imagineType = llvm::dyn_cast<RankedTensorType>(getImagine().getType());
+
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// QamModulateRealOp
+//===----------------------------------------------------------------------===//
+
+void QamModulateRealOp::build(mlir::OpBuilder &builder,
+                              mlir::OperationState &state, mlir::Value signal) {
+  auto tensorType = UnrankedTensorType::get(builder.getF64Type());
+  state.addTypes({tensorType});
+
+  state.addOperands({signal});
+}
+void QamModulateRealOp::inferShapes() {
+  auto signalType = llvm::dyn_cast<RankedTensorType>(getSignal().getType());
+  auto signalShape = signalType.getShape();
+
+  SmallVector<long int, 8> outputShape(signalShape);
+  for (size_t i = 0; i < signalShape.size(); ++i) {
+    outputShape[i] = signalShape[i] / 2;
+  }
+
+  getResult().setType(
+      RankedTensorType::get(outputShape, signalType.getElementType()));
+}
+
+mlir::LogicalResult QamModulateRealOp::verify() {
+
+  // auto signalType = llvm::dyn_cast<RankedTensorType>(getSignal().getType());
+  //
+  // if(!signalType) {
+  // llvm::errs() << "expect a ranked tensor for signal input, get " <<
+  // getSignal(); return mlir::failure();
+  //}
+  //
+  // auto signalRank = signalType.getRank();
+  //
+  // if(signalRank != 1 ) {
+  // llvm::errs() << "expect 1 dimensional signal, get " << signalRank;
+  // return mlir::failure();
+  //}
+  //
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// QamModulateImgOp
+//===----------------------------------------------------------------------===//
+
+void QamModulateImgOp::build(mlir::OpBuilder &builder,
+                             mlir::OperationState &state, mlir::Value signal) {
+  auto tensorType = UnrankedTensorType::get(builder.getF64Type());
+  state.addTypes({tensorType});
+
+  state.addOperands({signal});
+}
+void QamModulateImgOp::inferShapes() {
+  auto signalType = llvm::dyn_cast<RankedTensorType>(getSignal().getType());
+  auto signalShape = signalType.getShape();
+
+  SmallVector<long int, 8> outputShape(signalShape);
+  for (size_t i = 0; i < signalShape.size(); ++i) {
+    outputShape[i] = signalShape[i] / 2;
+  }
+
+  getResult().setType(
+      RankedTensorType::get(outputShape, signalType.getElementType()));
+}
+
+mlir::LogicalResult QamModulateImgOp::verify() {
+
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// BeamFormOp
+//===----------------------------------------------------------------------===//
+
+void BeamFormOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                       int64_t antennas, int64_t freq, mlir::Value time,
+                       mlir::Value weights) {
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addAttribute("antennas", builder.getI64IntegerAttr(antennas));
+  state.addAttribute("freq", builder.getI64IntegerAttr(freq));
+  state.addOperands({time, weights});
+}
+
+void BeamFormOp::inferShapes() { getResult().setType(getTime().getType()); }
+
+mlir::LogicalResult BeamFormOp::verify() {
+    return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// SpaceModulateOp
+//===----------------------------------------------------------------------===//
+
+void SpaceModulateOp::build(mlir::OpBuilder &builder,
+                            mlir::OperationState &state, mlir::Value signals) {
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({signals});
+}
+
+void SpaceModulateOp::inferShapes() {
+  getResult().setType(getSignal().getType());
+}
+
+mlir::LogicalResult SpaceModulateOp::verify() { return mlir::success(); }
+
+//===----------------------------------------------------------------------===//
+// SpaceDemodulateOp
+//===----------------------------------------------------------------------===//
+
+void SpaceDemodulateOp::build(mlir::OpBuilder &builder,
+                              mlir::OperationState &state, mlir::Value binary) {
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({binary});
+}
+
+void SpaceDemodulateOp::inferShapes() {
+  getResult().setType(getBinary().getType());
+}
+
+mlir::LogicalResult SpaceDemodulateOp::verify() { return mlir::success(); }
+
+//===----------------------------------------------------------------------===//
+// SpaceDemodulateOp
+//===----------------------------------------------------------------------===//
+
+void SpaceErrCorrectionOp::build(mlir::OpBuilder &builder,
+                                 mlir::OperationState &state,
+                                 mlir::Value signal) {
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({signal});
+}
+
+void SpaceErrCorrectionOp::inferShapes() {
+  getResult().setType(getSignal().getType());
+}
+
+mlir::LogicalResult SpaceErrCorrectionOp::verify() { return mlir::success(); }
+
+//===----------------------------------------------------------------------===//
+// NormalizeOp
+//===----------------------------------------------------------------------===//
+
+void NormalizeOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                        mlir::Value signal) {
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({signal});
+}
+
+void NormalizeOp::inferShapes() { getResult().setType(getSignal().getType()); }
+
+//===----------------------------------------------------------------------===//
+// NormLMSFilterResponseOptimizeOp
+//===----------------------------------------------------------------------===//
+
+void NormLMSFilterResponseOptimizeOp::build(mlir::OpBuilder &builder,
+                                            mlir::OperationState &state,
+                                            mlir::Value lhs, mlir::Value rhs,
+                                            mlir::Value mu,
+                                            mlir::Value filterLen) {
+
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands({lhs, rhs, mu, filterLen});
+}
+
+void NormLMSFilterResponseOptimizeOp::inferShapes() {
+  getResult().setType(getLhs().getType());
+}
+
+mlir::LogicalResult NormLMSFilterResponseOptimizeOp::verify() {
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// FIRFilterResSymmThresholdUpOptimizedOp
+//===----------------------------------------------------------------------===//
+
+void FIRFilterResSymmThresholdUpOptimizedOp::build(
+    mlir::OpBuilder &builder, mlir::OperationState &state, mlir::Value lhs,
+    mlir::Value rhs, mlir::Value threshold, mlir::Value returnoriginal) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands({lhs, rhs, threshold, returnoriginal});
+}
+
+/// Infer the output shape of the FIRFilterResSymmThresholdUpOptimizedOp, this
+/// is required by the shape inference interface.
+// ToDo -- shape should be the length of Lhs + Rhs - 1
+void FIRFilterResSymmThresholdUpOptimizedOp::inferShapes() {
+  // get the shape of Lhs & rh@id:github.copilot-chats
+  // add the shape for each dimension
+  //  auto tensorInput =  llvm::cast<RankedTensorType>(getLhs().getType());
+  auto tensorInput = getLhs().getType();
+  auto shapeOfInput = tensorInput.getShape();
+
+  auto tensorFilter = getRhs().getType();
+  auto shapeOfFilter = tensorFilter.getShape();
+  std::vector<int64_t> shapeForOutput;
+
+  for (size_t i = 0; i < shapeOfInput.size(); i++) {
+    shapeForOutput.push_back(shapeOfInput[i] + shapeOfFilter[i] - 1);
+  }
+
+  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(
+      shapeForOutput, getLhs().getType().getElementType());
+
+  // getResult().setType(getLhs().getType());
+  getResult().setType(manipulatedType);
+}
+
+//===----------------------------------------------------------------------===//
+// FFTOp
+//===----------------------------------------------------------------------===//
+
+void FFTOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                  mlir::Value lhs) {
+  state.addTypes({lhs.getType(), lhs.getType()});
+  state.addOperands({lhs});
+}
+
+void FFTOp::inferShapes() {
+  getResult(0).setType(getLhs().getType());
+  getResult(1).setType(getLhs().getType());
+}
+
+//===----------------------------------------------------------------------===//
+// FFTAbsOp
+//===----------------------------------------------------------------------===//
+
+void FFTAbsOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                     mlir::Value input) {
+  state.addTypes(input.getType());
+  state.addOperands({input});
+}
+
+void FFTAbsOp::inferShapes() { getResult().setType(getInput().getType()); }
+
+//===----------------------------------------------------------------------===//
+// DFTAbsOp
+//===----------------------------------------------------------------------===//
+
+void DFTAbsOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                     mlir::Value input) {
+  state.addTypes(input.getType());
+  state.addOperands({input});
+}
+
+void DFTAbsOp::inferShapes() { getResult().setType(getInput().getType()); }
+
+//===----------------------------------------------------------------------===//
+// DFTAbsThresholdUpOp
+//===----------------------------------------------------------------------===//
+
+void DFTAbsThresholdUpOp::build(mlir::OpBuilder &builder,
+                                mlir::OperationState &state, mlir::Value input,
+                                mlir::Value threshold,
+                                mlir::Value returnoriginal) {
+  state.addTypes(input.getType());
+  state.addOperands({input, threshold, returnoriginal});
+}
+
+void DFTAbsThresholdUpOp::inferShapes() {
+  getResult().setType(getInput().getType());
+}
+
+mlir::LogicalResult DFTAbsThresholdUpOp::verify() {
+  int64_t returnOriginal = 5;
+  Value returnoriginal = getOperand(2);
+  dsp::ConstantOp constantOp1stArg =
+      returnoriginal.getDefiningOp<dsp::ConstantOp>();
+  DenseElementsAttr constantLhsValue = constantOp1stArg.getValue();
+  auto elements = constantLhsValue.getValues<FloatAttr>();
+  float LenN = elements[0].getValueAsDouble();
+  returnOriginal = (int64_t)LenN;
+
+  // filter-order even not supported -- so making it odd
+  if (returnOriginal != 0 && returnOriginal != 1) {
+    return mlir::failure();
+  }
+  return mlir::success();
+}
+
+
+//===----------------------------------------------------------------------===//
+ // CorrelateOp
+ //===----------------------------------------------------------------------===//
+
+ void CorrelateOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                   mlir::Value lhs, mlir::Value rhs) {
+    state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+    state.addOperands({lhs, rhs});
+ }
+
+ void CorrelateOp::inferShapes() {
+  auto tensorLhs = getLhs().getType();
+  auto shapeOfLhs = tensorLhs.getShape();
+
+  std::vector<int64_t> shapeForOutput;   
+  shapeForOutput.push_back(shapeOfLhs[0]*2-1);
+  
+  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(
+	  shapeForOutput, tensorLhs.getElementType());
+
+  getResult().setType(manipulatedType);
+}
+
+//===----------------------------------------------------------------------===//
+// SetSingleElemAtIdxOp
+//===----------------------------------------------------------------------===//
+
+void SetSingleElemAtIdxOp::build(mlir::OpBuilder &builder,
+                            mlir::OperationState &state, mlir::Value input,
+                            mlir::Value indx, mlir::Value val) {
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({input, indx, val});
+}
+
+void SetSingleElemAtIdxOp::inferShapes() {
+  std::vector<int64_t> shapeForOutput;
+  
+  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(
+  shapeForOutput, getInput().getType().getElementType());
+  getResult().setType(manipulatedType);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Correl2MaxOptimizedOp
+//===----------------------------------------------------------------------===//
+
+ void Correl2MaxOptimizedOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                   mlir::Value lhs, mlir::Value rhs) {
+    state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+    state.addOperands({lhs, rhs});
+ }
+
+ void Correl2MaxOptimizedOp::inferShapes() {
+  auto tensorInput = getLhs().getType();
+  std::vector<int64_t> shapeForOutput;
+  
+  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(
+      shapeForOutput, tensorInput.getElementType());
+
+  getResult().setType(manipulatedType);      
+}
+
+
+//===----------------------------------------------------------------------===//
+// LMSFilterResponse2GainOp
+//===----------------------------------------------------------------------===//
+
+
+void LMSFilterResponse2GainOp::build(mlir::OpBuilder &builder,
+                                mlir::OperationState &state, mlir::Value lhs,
+                                mlir::Value rhs, mlir::Value mu,
+                                mlir::Value filterLen, mlir::Value gain) {
+
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands({lhs, rhs, mu, filterLen, gain});
+}
+
+void LMSFilterResponse2GainOp::inferShapes() {
+  getResult().setType(getLhs().getType());
+}
+
+
 
 //===----------------------------------------------------------------------===//
 // TableGen'd op method definitions
diff --git a/mlir/examples/dsp/SimpleBlocks/mlir/LowerToAffineLoops.cpp b/mlir/examples/dsp/SimpleBlocks/mlir/LowerToAffineLoops.cpp
index 537989becb84..d564d89b4a2e 100644
--- a/mlir/examples/dsp/SimpleBlocks/mlir/LowerToAffineLoops.cpp
+++ b/mlir/examples/dsp/SimpleBlocks/mlir/LowerToAffineLoops.cpp
@@ -12,9 +12,16 @@
 //
 //===----------------------------------------------------------------------===//
 
-#pragma GCC diagnostic push 
+#pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wall"
 
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/Linalg/IR/Linalg.h"
+#include "mlir/Dialect/Math/IR/Math.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/IR/BuiltinAttributes.h"
 #include "mlir/IR/BuiltinDialect.h"
 #include "mlir/IR/BuiltinOps.h"
@@ -23,20 +30,14 @@
 #include "mlir/IR/DialectRegistry.h"
 #include "mlir/IR/PatternMatch.h"
 #include "mlir/IR/ValueRange.h"
+#include "mlir/Pass/Pass.h"
 #include "mlir/Support/LLVM.h"
 #include "mlir/Support/LogicalResult.h"
 #include "mlir/Support/TypeID.h"
-#include "toy/Dialect.h"
+#include "mlir/Transforms/DialectConversion.h"
 #include "toy/DebugConfig.h"
+#include "toy/Dialect.h"
 #include "toy/Passes.h"
-
-#include "mlir/Dialect/Affine/IR/AffineOps.h"
-#include "mlir/Dialect/Arith/IR/Arith.h"
-#include "mlir/Dialect/Func/IR/FuncOps.h"
-#include "mlir/Dialect/MemRef/IR/MemRef.h"
-#include "mlir/Dialect/Math/IR/Math.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/DialectConversion.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Sequence.h"
@@ -47,9 +48,9 @@
 #include <memory>
 #include <utility>
 
-//For IntegerSet
-#include "mlir/IR/IntegerSet.h"
+// For IntegerSet
 #include "mlir/Dialect/SCF/IR/SCF.h"
+#include "mlir/IR/IntegerSet.h"
 #include <iostream>
 using namespace mlir;
 using namespace std;
@@ -59,7 +60,6 @@ using namespace dsp;
 // ToyToAffine RewritePatterns
 //===----------------------------------------------------------------------===//
 
-
 // #pragma warning(push, 0)
 /// Convert the given RankedTensorType into the corresponding MemRefType.
 static MemRefType convertTensorToMemRef(RankedTensorType type) {
@@ -73,12 +73,15 @@ static Value insertAllocAndDealloc(MemRefType type, Location loc,
 
   // Make sure to allocate at the beginning of the block.
   auto *parentBlock = alloc->getBlock();
-  alloc->moveBefore(&parentBlock->front()); //Abhinav-- move allock->block->front before alloc operation??
+  alloc->moveBefore(
+      &parentBlock->front()); // Abhinav-- move allock->block->front before
+                              // alloc operation??
 
   // Make sure to deallocate this alloc at the end of the block. This is fine
   // as dsp functions have no control flow.
   auto dealloc = rewriter.create<memref::DeallocOp>(loc, alloc);
-  dealloc->moveBefore(&parentBlock->back()); //move alloc->block->back before dealloc
+  dealloc->moveBefore(
+      &parentBlock->back()); // move alloc->block->back before dealloc
   return alloc;
 }
 
@@ -94,29 +97,32 @@ static void lowerOpToLoops(Operation *op, ValueRange operands,
                            PatternRewriter &rewriter,
                            LoopIterationFn processIteration) {
   auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
-    
-    // for (auto i : tensorType.getShape())
-    // {
-    //    llvm::errs() << "tensortype =" << i << "\n" ;
-    // }
-    // llvm::errs() << "tensortype.getElementType =" << tensorType.getElementType() << "\n" ;
-    // llvm::errs() << "op->getLoc = " << op->getLoc() << "\n"; //getDialect
-    // llvm::errs() << "op->getDialect = " << op->getDialect() << "\n";
-    // llvm::errs() << "op->getName = " << op->getName() << "\n";
-    // // llvm::errs() << "op->getType = " << op->getType() << "\n";
-    // llvm::errs() << "op->getParentRegion = " << op->getParentRegion() << "\n";
-    // llvm::errs() << "op->getParentOp = " << op->getParentOp()->getName() << "\n";
-    
-    // llvm::errs() << "op->getNumOperands = " << op->getNumOperands() << "\n";
-    // for (auto i : op->getOperands())
-    // {
-    //   llvm::errs() << "op->Operand = " << i << "\n";
-    // }
-    
-    // llvm::errs() << "op->getParentOp = " << op->getParentOp()->getName() << "\n";
-    // llvm::errs() << "op->getParentOp = " << op->getParentOp()->getName() << "\n";
-    // llvm::errs() << "op->getParentOp = " << op->getParentOp()->getName() << "\n";
-  
+
+  // for (auto i : tensorType.getShape())
+  // {
+  //    llvm::errs() << "tensortype =" << i << "\n" ;
+  // }
+  // llvm::errs() << "tensortype.getElementType =" <<
+  // tensorType.getElementType() << "\n" ; llvm::errs() << "op->getLoc = " <<
+  // op->getLoc() << "\n"; //getDialect llvm::errs() << "op->getDialect = " <<
+  // op->getDialect() << "\n"; llvm::errs() << "op->getName = " << op->getName()
+  // << "\n";
+  // // llvm::errs() << "op->getType = " << op->getType() << "\n";
+  // llvm::errs() << "op->getParentRegion = " << op->getParentRegion() << "\n";
+  // llvm::errs() << "op->getParentOp = " << op->getParentOp()->getName() <<
+  // "\n";
+
+  // llvm::errs() << "op->getNumOperands = " << op->getNumOperands() << "\n";
+  // for (auto i : op->getOperands())
+  // {
+  //   llvm::errs() << "op->Operand = " << i << "\n";
+  // }
+
+  // llvm::errs() << "op->getParentOp = " << op->getParentOp()->getName() <<
+  // "\n"; llvm::errs() << "op->getParentOp = " << op->getParentOp()->getName()
+  // << "\n"; llvm::errs() << "op->getParentOp = " <<
+  // op->getParentOp()->getName() << "\n";
+
   auto loc = op->getLoc();
 
   // Insert an allocation and deallocation for the result of this operation.
@@ -156,22 +162,21 @@ static void lowerOpToLoops(Operation *op, ValueRange operands,
   rewriter.replaceOp(op, alloc);
 }
 
-
-#define TryJustAffineLoop 0  //working
-#define TryAffineForAndAffineIf 0  // working 
+#define TryJustAffineLoop 0       // working
+#define TryAffineForAndAffineIf 0 // working
 #define TryAffineIf2 0
-#define TryAffineMap  0   //working basic -- TO do --try with symbols
-#define TrySumOfVector 0  //Working
-#define TryMultiDimLoop 0  //Working
-#define TryFIRFilter 1 
-#define TryMultiDimForAndIf 0 //
-#define TryMultiDimLoopAndAffineMap 0  //Working
-#define TryMultiDimLoopAndAffineSet 0  //Working
+#define TryAffineMap 0    // working basic -- TO do --try with symbols
+#define TrySumOfVector 0  // Working
+#define TryMultiDimLoop 0 // Working
+#define TryFIRFilter 1
+#define TryMultiDimForAndIf 0         //
+#define TryMultiDimLoopAndAffineMap 0 // Working
+#define TryMultiDimLoopAndAffineSet 0 // Working
 static void lowerOpToLoopsFIR(Operation *op, ValueRange operands,
-                           PatternRewriter &rewriter,
-                           LoopIterationFn processIteration) {
+                              PatternRewriter &rewriter,
+                              LoopIterationFn processIteration) {
   auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
-      
+
   auto loc = op->getLoc();
 
   // Insert an allocation and deallocation for the result of this operation.
@@ -186,1000 +191,1044 @@ static void lowerOpToLoopsFIR(Operation *op, ValueRange operands,
   SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
 
   // llvm::errs() << "tensorType.getRank() " << tensorType.getRank() << "\n";
-    // cout << "tensorType.getRank() .. " << tensorType.getRank() << "\n";
-    // for (auto i : tensorType.getRank())
-    // {
-    //   llvm::errs() << "tensorType.getRank() = " << i << "\n";
-    // }
-    // for (auto i : tensorType.getShape())
-    // {
-    //   llvm::errs() << "tensorType.getShape() = " << i << "\n";
-    // }
-    // llvm::errs() << "tensorType.getShape() " << tensorType.getShape() << "\n";
+  // cout << "tensorType.getRank() .. " << tensorType.getRank() << "\n";
+  // for (auto i : tensorType.getRank())
+  // {
+  //   llvm::errs() << "tensorType.getRank() = " << i << "\n";
+  // }
+  // for (auto i : tensorType.getShape())
+  // {
+  //   llvm::errs() << "tensorType.getShape() = " << i << "\n";
+  // }
+  // llvm::errs() << "tensorType.getShape() " << tensorType.getShape() << "\n";
 
-    // affine::AffineForOp forOp = rewriter.create<affine::AffineForOp>(
-      //   loc, lowerBounds, tensorType.getShape() , steps, ValueRange());
-      // mlir::IntegerSet set1 = mlir::IntegerSet::get(1, 0, map, {true});
+  // affine::AffineForOp forOp = rewriter.create<affine::AffineForOp>(
+  //   loc, lowerBounds, tensorType.getShape() , steps, ValueRange());
+  // mlir::IntegerSet set1 = mlir::IntegerSet::get(1, 0, map, {true});
 
-      //create an affineFor
-      // affineFor It has one region containing its body & the region must contain a block terminating with affine.yield
-      //block has argument of index type
-      //
+  // create an affineFor
+  //  affineFor It has one region containing its body & the region must contain
+  //  a block terminating with affine.yield
+  // block has argument of index type
+  //
 
 #if TryJustAffineLoop
-    int64_t lb = 0 ;
-    int64_t ub = tensorType.getShape()[0];
-    int64_t step = 1;
+  int64_t lb = 0;
+  int64_t ub = tensorType.getShape()[0];
+  int64_t step = 1;
+
+  // create AffineMap and set
+  //  %1 = affine.load
+  //   if ( %arg0 >= 5)   ie, integerSet <(d0) : (d0 - 5 >= 0) >
+  AffineExpr dimExpr =
+      rewriter.getAffineDimExpr(0) - rewriter.getAffineConstantExpr(5);
+  // AffineMap map = AffineMap::get(1, 0, dimExpr);
+  // AffineMap map = AffineMap::get(1, 0 , rewriter.getAffineDimExpr(0) - 5);
+  IntegerSet set1 = IntegerSet::get(1, 0, {dimExpr}, {false});
+  affine::AffineForOp forOp1 =
+      rewriter.create<affine::AffineForOp>(loc, lb, ub, step);
+
+  // inside the forOp body --> create the operations & then close the body
+  //  OpBuilder::InsertionGuard guard(rewriter);
+  rewriter.setInsertionPointToStart(forOp1.getBody());
+
+  // start adding operations like a arith::constant = 100.0 to the body of
+  // forOp1
+  //  Inside the loop body:
+
+  Value constant15 = rewriter.create<arith::ConstantOp>(
+      loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(15));
+
+  llvm::errs() << "LINE = " << __LINE__ << "\n";
+  auto storeOp = rewriter.create<affine::AffineStoreOp>(
+      loc, constant15, alloc, forOp1.getInductionVar());
 
-    //create AffineMap and set
-    // %1 = affine.load 
-    //  if ( %arg0 >= 5)   ie, integerSet <(d0) : (d0 - 5 >= 0) >
-    AffineExpr dimExpr = rewriter.getAffineDimExpr(0) - rewriter.getAffineConstantExpr(5);
-    // AffineMap map = AffineMap::get(1, 0, dimExpr);
-    // AffineMap map = AffineMap::get(1, 0 , rewriter.getAffineDimExpr(0) - 5);
-    IntegerSet set1 = IntegerSet::get(1, 0, {dimExpr}, {false});
-    affine::AffineForOp forOp1 = rewriter.create<affine::AffineForOp>(loc, 
-                lb, ub, step );
-
-    //inside the forOp body --> create the operations & then close the body
-    // OpBuilder::InsertionGuard guard(rewriter);
-    rewriter.setInsertionPointToStart(forOp1.getBody());
-
-    //start adding operations like a arith::constant = 100.0 to the body of forOp1
-      // Inside the loop body:
-    
-    Value constant15 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(15));
-    
-    llvm::errs() << "LINE = " << __LINE__ << "\n";
-    auto storeOp = rewriter.create<affine::AffineStoreOp>(loc, constant15, alloc, forOp1.getInductionVar());
-
-#endif 
+#endif
 
 #if TryAffineForAndAffineIf
-    int64_t lb = 0 ;
-    int64_t ub = tensorType.getShape()[0];
-    int64_t step = 1;
-
-    //create AffineMap and set
-    // %1 = affine.load 
-    //  if ( %arg0 >= 5)   ie, integerSet <(d0) : (d0 - 5 >= 0) >
-    AffineExpr dimExpr = rewriter.getAffineDimExpr(0) - rewriter.getAffineConstantExpr(5);
-    // AffineExpr dimExpr2 = rewriter
-    // AffineMap map = AffineMap::get(1, 0, dimExpr);
-    // AffineMap map = AffineMap::get(1, 0 , rewriter.getAffineDimExpr(0) - 5);
-    IntegerSet set1 = IntegerSet::get(1, 0, {dimExpr}, {false});
-
-     //affine.if %arg1 >= 0 and %5 <= %1 - 1
-     // n-k >= 0 && n-k <= len -1 //n = %arg0 , k = %arg1
-     // %arg0 >= 0 and %arg0 - %arg1 - %sym1 + 1 <= 0
-
-    affine::AffineForOp forOp1 = rewriter.create<affine::AffineForOp>(loc, 
-                lb, ub, step );
-
-    //inside the forOp body --> create the operations & then close the body
-    // OpBuilder::InsertionGuard guard(rewriter);
-    rewriter.setInsertionPointToStart(forOp1.getBody());
-    auto iv = forOp1.getInductionVar();
-    //start adding operations like a arith::constant = 100.0 to the body of forOp1
-      // Inside the loop body:
-
-    // #set affine_set<(d0) : (d0 - 5 <= 0)>
-    // affine.for %arg0 = 0 to 10 {
-    //   %3 = affine.if #set (%arg0) {
-    //         %1 = arith.const 25
-    //         affine.yield %1
-    //     }
-    // else{
-    //       %2 = arith.const 15
-    //       affine.yield %2
-    //   }
-    //     affine.store %3, alloc[%arg0]
-    // } 
-
-    // auto ifOp = rewriter.create<affine::AffineIfOp>( loc, set1 , ValueRange{iv} , false /*no else*/ );
-    // auto ifOp = rewriter.create<affine::AffineIfOp>( loc, set1 , ValueRange{iv} , true /*no else*/ );
-    
-    //use typeRange too:
-    Type floatType = rewriter.getF64Type();
-    auto ifOp = rewriter.create<affine::AffineIfOp>( loc, TypeRange{ floatType },set1 , ValueRange{iv} , true /*no else*/ );
-
-    rewriter.setInsertionPointToStart(ifOp.getThenBlock());
-    
-    FIRFilterResponseAdaptor firFilterOperands(operands);
-
-    //load from the input
-    Value loadInput = rewriter.create<AffineLoadOp>(loc, firFilterOperands.getLhs(), iv);
-    Value constant25 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(25));
-    Value constsq25 = rewriter.create<arith::MulFOp>(loc, loadInput, constant25)  ;                                                   
-    
-    rewriter.create<AffineStoreOp>(loc, constsq25 , alloc, iv);
-    rewriter.create<AffineYieldOp>(loc, ValueRange{constsq25});
-    // rewriter.setInsertionPointToEnd(ifOp.getThenBlock());
+  int64_t lb = 0;
+  int64_t ub = tensorType.getShape()[0];
+  int64_t step = 1;
+
+  // create AffineMap and set
+  //  %1 = affine.load
+  //   if ( %arg0 >= 5)   ie, integerSet <(d0) : (d0 - 5 >= 0) >
+  AffineExpr dimExpr =
+      rewriter.getAffineDimExpr(0) - rewriter.getAffineConstantExpr(5);
+  // AffineExpr dimExpr2 = rewriter
+  // AffineMap map = AffineMap::get(1, 0, dimExpr);
+  // AffineMap map = AffineMap::get(1, 0 , rewriter.getAffineDimExpr(0) - 5);
+  IntegerSet set1 = IntegerSet::get(1, 0, {dimExpr}, {false});
+
+  // affine.if %arg1 >= 0 and %5 <= %1 - 1
+  //  n-k >= 0 && n-k <= len -1 //n = %arg0 , k = %arg1
+  //  %arg0 >= 0 and %arg0 - %arg1 - %sym1 + 1 <= 0
+
+  affine::AffineForOp forOp1 =
+      rewriter.create<affine::AffineForOp>(loc, lb, ub, step);
+
+  // inside the forOp body --> create the operations & then close the body
+  //  OpBuilder::InsertionGuard guard(rewriter);
+  rewriter.setInsertionPointToStart(forOp1.getBody());
+  auto iv = forOp1.getInductionVar();
+  // start adding operations like a arith::constant = 100.0 to the body of
+  // forOp1
+  //  Inside the loop body:
+
+  // #set affine_set<(d0) : (d0 - 5 <= 0)>
+  // affine.for %arg0 = 0 to 10 {
+  //   %3 = affine.if #set (%arg0) {
+  //         %1 = arith.const 25
+  //         affine.yield %1
+  //     }
+  // else{
+  //       %2 = arith.const 15
+  //       affine.yield %2
+  //   }
+  //     affine.store %3, alloc[%arg0]
+  // }
 
-    rewriter.setInsertionPointToStart(ifOp.getElseBlock());
-    Value loadInput2 = rewriter.create<AffineLoadOp>(loc, firFilterOperands.getRhs(), iv);
-    Value constant15 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(15));
-    Value elseResult = rewriter.create<arith::MulFOp>(loc, loadInput2, constant15)  ; 
-    rewriter.create<AffineStoreOp>(loc, elseResult , alloc, iv);
-    rewriter.create<AffineYieldOp>(loc, ValueRange{elseResult});
-    // rewriter.setInsertionPointToEnd(ifOp.getElseBlock());
-    rewriter.setInsertionPointAfter(ifOp);
-    ifOp->dump();
-    // forOp1->dump();
-    rewriter.create<AffineStoreOp>(loc, ifOp.getResult(0) , alloc, iv);
-    //getParentBlock then use 
-    // rewriter.setInsertionPointToEnd(ifOp.getThenBlock()->getParentOp());
-    // rewriter.setInsertionPointToEnd(ifOp->getBlock());
-    // rewriter.setInsertionPoint(ifOp->getParentOp());
-    // rewriter.create<AffineYieldOp>(loc, ValueRange{constant25});
-    // rewriter.setInsertionPointToEnd(ifOp.getThenBlock());
-    
-    // rewriter.setInsertionPointAfter(ifOp);
-    // rewriter.create<AffineStoreOp>(loc, ifOp.getResult(0) , alloc, iv);
-    
-    //try to add the affine.If condition 
-    //create affine.If , 
-    // use integer set to represent the condition 
-    //check the AffineArgs 
-    // affine.if operation contains two regions for the “then” and “else” clauses
-      //each region of affine.if must contain a single block with no args and terminated by affine.yield op
-      // if affine.if defines no values --> no need for affine.yield
-    
-    // affineIf.setConditional(set1, forOp1.getInductionVar());
-    //start then "block"
-    // "then" block
-    
-    // Value constant15 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-    //                                                      rewriter.getF64FloatAttr(15));
-     
-    //  rewriter.create<affine::AffineYieldOp>(loc, ValueRange{constant15});
-    // rewriter.setInsertionPointToEnd(ifOp.getThenBlock());
-    //else block
-    // rewriter.setInsertionPointToStart(ifOp.getElseBlock());
-    
-    // Set insertion point to the end of the "then" block
-    // rewriter.setInsertionPointAfter(ifOp.getThenBlock()->getTerminator());
-   
-
-    // rewriter.create<affine::AffineYieldOp>(loc, constant25);
-    llvm::errs() << "LINE = " << __LINE__ << "\n";
-    //Back to parentOp -- ifOp stops here
-    // rewriter.setInsertionPointAfter(ifOp);
-    
-
-    //also use affine::AffineStore to store at the loop induction variable
-    // auto storeOp = rewriter.create<affine::AffineStoreOp>(loc, ifOp.getResult(0), alloc, forOp1.getInductionVar());
-    // auto storeOp = rewriter.create<affine::AffineStoreOp>(loc, constant25, alloc, forOp1.getInductionVar());
-    // Back to parentOp -- forOp1
-    // rewriter.setInsertionPointAfter(storeOp);
-
-    llvm::errs() << "LINE = " << __LINE__ << "  xx\n";
-    //create affine yield for the loop
-    // rewriter.create<affine::AffineYieldOp>(loc);  
+  // auto ifOp = rewriter.create<affine::AffineIfOp>( loc, set1 , ValueRange{iv}
+  // , false /*no else*/ ); auto ifOp = rewriter.create<affine::AffineIfOp>(
+  // loc, set1 , ValueRange{iv} , true /*no else*/ );
+
+  // use typeRange too:
+  Type floatType = rewriter.getF64Type();
+  auto ifOp = rewriter.create<affine::AffineIfOp>(
+      loc, TypeRange{floatType}, set1, ValueRange{iv}, true /*no else*/);
+
+  rewriter.setInsertionPointToStart(ifOp.getThenBlock());
+
+  FIRFilterResponseAdaptor firFilterOperands(operands);
+
+  // load from the input
+  Value loadInput =
+      rewriter.create<AffineLoadOp>(loc, firFilterOperands.getLhs(), iv);
+  Value constant25 = rewriter.create<arith::ConstantOp>(
+      loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(25));
+  Value constsq25 = rewriter.create<arith::MulFOp>(loc, loadInput, constant25);
+
+  rewriter.create<AffineStoreOp>(loc, constsq25, alloc, iv);
+  rewriter.create<AffineYieldOp>(loc, ValueRange{constsq25});
+  // rewriter.setInsertionPointToEnd(ifOp.getThenBlock());
+
+  rewriter.setInsertionPointToStart(ifOp.getElseBlock());
+  Value loadInput2 =
+      rewriter.create<AffineLoadOp>(loc, firFilterOperands.getRhs(), iv);
+  Value constant15 = rewriter.create<arith::ConstantOp>(
+      loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(15));
+  Value elseResult =
+      rewriter.create<arith::MulFOp>(loc, loadInput2, constant15);
+  rewriter.create<AffineStoreOp>(loc, elseResult, alloc, iv);
+  rewriter.create<AffineYieldOp>(loc, ValueRange{elseResult});
+  // rewriter.setInsertionPointToEnd(ifOp.getElseBlock());
+  rewriter.setInsertionPointAfter(ifOp);
+  ifOp->dump();
+  // forOp1->dump();
+  rewriter.create<AffineStoreOp>(loc, ifOp.getResult(0), alloc, iv);
+  // getParentBlock then use
+  //  rewriter.setInsertionPointToEnd(ifOp.getThenBlock()->getParentOp());
+  //  rewriter.setInsertionPointToEnd(ifOp->getBlock());
+  //  rewriter.setInsertionPoint(ifOp->getParentOp());
+  //  rewriter.create<AffineYieldOp>(loc, ValueRange{constant25});
+  //  rewriter.setInsertionPointToEnd(ifOp.getThenBlock());
+
+  // rewriter.setInsertionPointAfter(ifOp);
+  // rewriter.create<AffineStoreOp>(loc, ifOp.getResult(0) , alloc, iv);
+
+  // try to add the affine.If condition
+  // create affine.If ,
+  //  use integer set to represent the condition
+  // check the AffineArgs
+  //  affine.if operation contains two regions for the “then” and “else” clauses
+  // each region of affine.if must contain a single block with no args and
+  // terminated by affine.yield op
+  //  if affine.if defines no values --> no need for affine.yield
+
+  // affineIf.setConditional(set1, forOp1.getInductionVar());
+  // start then "block"
+  // "then" block
+
+  // Value constant15 = rewriter.create<arith::ConstantOp>(loc,
+  // rewriter.getF64Type(),
+  //                                                      rewriter.getF64FloatAttr(15));
+
+  //  rewriter.create<affine::AffineYieldOp>(loc, ValueRange{constant15});
+  // rewriter.setInsertionPointToEnd(ifOp.getThenBlock());
+  // else block
+  // rewriter.setInsertionPointToStart(ifOp.getElseBlock());
+
+  // Set insertion point to the end of the "then" block
+  // rewriter.setInsertionPointAfter(ifOp.getThenBlock()->getTerminator());
+
+  // rewriter.create<affine::AffineYieldOp>(loc, constant25);
+  llvm::errs() << "LINE = " << __LINE__ << "\n";
+  // Back to parentOp -- ifOp stops here
+  //  rewriter.setInsertionPointAfter(ifOp);
+
+  // also use affine::AffineStore to store at the loop induction variable
+  //  auto storeOp = rewriter.create<affine::AffineStoreOp>(loc,
+  //  ifOp.getResult(0), alloc, forOp1.getInductionVar()); auto storeOp =
+  //  rewriter.create<affine::AffineStoreOp>(loc, constant25, alloc,
+  //  forOp1.getInductionVar()); Back to parentOp -- forOp1
+  //  rewriter.setInsertionPointAfter(storeOp);
+
+  llvm::errs() << "LINE = " << __LINE__ << "  xx\n";
+  // create affine yield for the loop
+  //  rewriter.create<affine::AffineYieldOp>(loc);
 
 #endif
 
 #if TryAffineIf2
 
-    int64_t lb = 0 ;
-    int64_t ub = tensorType.getShape()[0];
-    int64_t step = 1;
-
-    //create AffineMap and set
-    // %1 = affine.load 
-    //  if ( %arg0 >= 5)   ie, integerSet <(d0) : (d0 - 5 >= 0) >
-    AffineExpr dimExpr = rewriter.getAffineDimExpr(0) - rewriter.getAffineConstantExpr(5);
-    // AffineExpr dimExpr2 = rewriter
-    // AffineMap map = AffineMap::get(1, 0, dimExpr);
-    // AffineMap map = AffineMap::get(1, 0 , rewriter.getAffineDimExpr(0) - 5);
-    IntegerSet set1 = IntegerSet::get(1, 0, {dimExpr}, {false});
-
-     //affine.if %arg1 >= 0 and %5 <= %1 - 1
-     // n-k >= 0 && n-k <= len -1 //n = %arg0 , k = %arg1
-     // %arg0 >= 0 and %arg0 - %arg1 - %sym1 + 1 <= 0
-
-    affine::AffineForOp forOp1 = rewriter.create<affine::AffineForOp>(loc, 
-                lb, ub, step );
+  int64_t lb = 0;
+  int64_t ub = tensorType.getShape()[0];
+  int64_t step = 1;
+
+  // create AffineMap and set
+  //  %1 = affine.load
+  //   if ( %arg0 >= 5)   ie, integerSet <(d0) : (d0 - 5 >= 0) >
+  AffineExpr dimExpr =
+      rewriter.getAffineDimExpr(0) - rewriter.getAffineConstantExpr(5);
+  // AffineExpr dimExpr2 = rewriter
+  // AffineMap map = AffineMap::get(1, 0, dimExpr);
+  // AffineMap map = AffineMap::get(1, 0 , rewriter.getAffineDimExpr(0) - 5);
+  IntegerSet set1 = IntegerSet::get(1, 0, {dimExpr}, {false});
+
+  // affine.if %arg1 >= 0 and %5 <= %1 - 1
+  //  n-k >= 0 && n-k <= len -1 //n = %arg0 , k = %arg1
+  //  %arg0 >= 0 and %arg0 - %arg1 - %sym1 + 1 <= 0
+
+  affine::AffineForOp forOp1 =
+      rewriter.create<affine::AffineForOp>(loc, lb, ub, step);
+
+  // inside the forOp body --> create the operations & then close the body
+  //  OpBuilder::InsertionGuard guard(rewriter);
+  rewriter.setInsertionPointToStart(forOp1.getBody());
+  auto iv = forOp1.getInductionVar();
+  // start adding operations like a arith::constant = 100.0 to the body of
+  // forOp1
+  //  Inside the loop body:
+
+  // #set affine_set<(d0) : (d0 - 5 <= 0)>
+  // affine.for %arg0 = 0 to 10 {
+  //   %3 = affine.if #set (%arg0) {
+  //         %1 = arith.const 25
+  //         affine.yield %1
+  //     }
+  //     affine.store %3, alloc[%arg0]
+  // }
 
-    //inside the forOp body --> create the operations & then close the body
-    // OpBuilder::InsertionGuard guard(rewriter);
-    rewriter.setInsertionPointToStart(forOp1.getBody());
-    auto iv = forOp1.getInductionVar();
-    //start adding operations like a arith::constant = 100.0 to the body of forOp1
-      // Inside the loop body:
-
-    // #set affine_set<(d0) : (d0 - 5 <= 0)>
-    // affine.for %arg0 = 0 to 10 {
-    //   %3 = affine.if #set (%arg0) {
-    //         %1 = arith.const 25
-    //         affine.yield %1
-    //     }
-    //     affine.store %3, alloc[%arg0]
-    // } 
+  // auto ifOp = rewriter.create<affine::AffineIfOp>( loc, set1 , ValueRange{iv}
+  // , false /*no else*/ );
+  auto ifOp = rewriter.create<affine::AffineIfOp>(loc, set1, ValueRange{iv},
+                                                  true /*no else*/);
+  rewriter.setInsertionPointToStart(ifOp.getThenBlock());
+  // rewriter.setInsertionPointToEnd(ifOp.getThenBlock());
+  Value constant25 = rewriter.create<arith::ConstantOp>(
+      loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(25));
+  Value constsq25 = rewriter.create<arith::MulFOp>(loc, constant25, constant25);
+
+  // ifOp.setR
+  // rewriter.create<AffineStoreOp>(loc, constant25 , alloc, iv);
+  // rewriter.setInsertionPointToStart(ifOp.getElseBlock());
+  Value constant15 = rewriter.create<arith::ConstantOp>(
+      loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(15));
+  rewriter.create<AffineStoreOp>(loc, constsq25, alloc, iv);
+
+  // getParentBlock then use
+  //  rewriter.setInsertionPointToEnd(ifOp.getThenBlock()->getParentOp());
+  //  rewriter.setInsertionPointToEnd(ifOp->getBlock());
+  rewriter.setInsertionPoint(ifOp->getParentOp());
+  // rewriter.create<AffineYieldOp>(loc, ValueRange{constant25});
+  // rewriter.setInsertionPointToEnd(ifOp.getThenBlock());
+
+  // rewriter.setInsertionPointAfter(ifOp);
+  // rewriter.create<AffineStoreOp>(loc, ifOp.getResult(0) , alloc, iv);
+  // rewriter.cre
 
-    // auto ifOp = rewriter.create<affine::AffineIfOp>( loc, set1 , ValueRange{iv} , false /*no else*/ );
-    auto ifOp = rewriter.create<affine::AffineIfOp>( loc, set1 , ValueRange{iv} , true /*no else*/ );
-    rewriter.setInsertionPointToStart(ifOp.getThenBlock());
-    // rewriter.setInsertionPointToEnd(ifOp.getThenBlock());
-    Value constant25 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(25));
-    Value constsq25 = rewriter.create<arith::MulFOp>(loc, constant25, constant25)  ;                                                   
-    
-    // ifOp.setR
-    // rewriter.create<AffineStoreOp>(loc, constant25 , alloc, iv);
-    // rewriter.setInsertionPointToStart(ifOp.getElseBlock());
-    Value constant15 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(15));
-    rewriter.create<AffineStoreOp>(loc, constsq25 , alloc, iv);
-
-
-    //getParentBlock then use 
-    // rewriter.setInsertionPointToEnd(ifOp.getThenBlock()->getParentOp());
-    // rewriter.setInsertionPointToEnd(ifOp->getBlock());
-    rewriter.setInsertionPoint(ifOp->getParentOp());
-    // rewriter.create<AffineYieldOp>(loc, ValueRange{constant25});
-    // rewriter.setInsertionPointToEnd(ifOp.getThenBlock());
-    
-    // rewriter.setInsertionPointAfter(ifOp);
-    // rewriter.create<AffineStoreOp>(loc, ifOp.getResult(0) , alloc, iv);
-    // rewriter.cre
-    
 #endif
 
 #if TryAffineMap
-    int64_t lb = 0 ;
-    int64_t ub = tensorType.getShape()[0] - 2;
-    int64_t step = 1;
-
-    affine::AffineForOp forOp1 = rewriter.create<affine::AffineForOp>(loc, 
-                lb, ub, step );
-
-    
-    //inside the forOp body --> create the operations & then close the body
-    // OpBuilder::InsertionGuard guard(rewriter);
-    rewriter.setInsertionPointToStart(forOp1.getBody());
-    auto iv = forOp1.getInductionVar();
-    //start adding operations like a arith::constant = 100.0 to the body of forOp1
-      // Inside the loop body:
-        //create affine for
-    // use affine-map expression for dimension then symbol then combination
-    // affine-map expression for dimension: affine_map<d0, d1)[s0] -> (d0 , d1 + s0, d1 - s0)
-    // use affine map 
-    // Define an affine map: #map2 = affine_map<(d0) -> (d0 + 2)>
-    auto symbol1 = tensorType.getShape()[0];
-    AffineExpr indx = rewriter.getAffineDimExpr(0);
-    AffineExpr constantExpr = rewriter.getAffineConstantExpr(2);
-    AffineMap addMap = AffineMap::get(1, 0, symbol1 - indx);
-    auto outputIndex = rewriter.create<affine::AffineApplyOp>(loc, addMap , iv);
-
-    // Value constant15 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(15));
-    
-    
-    //try replace constant15 ie, with input & filter
-    FIRFilterResponseOpAdaptor firOpAdaptor(operands);
-
-    Value inputForFilter = rewriter.create<affine::AffineLoadOp>(loc, firOpAdaptor.getLhs() , iv);
-    // Value inputForFilterMapped = rewriter.create<affine::AffineLoadOp>(loc,  firOpAdaptor.getLhs() , addMap, iv);
-
-    Value impulseFilter = rewriter.create<AffineLoadOp>(loc, firOpAdaptor.getRhs() , iv);
-
-    auto storeOp = rewriter.create<affine::AffineStoreOp>(loc, inputForFilter,      alloc,ValueRange{outputIndex});
-
-    
-    llvm::errs() << "LINE = " << __LINE__ << "\n";
+  int64_t lb = 0;
+  int64_t ub = tensorType.getShape()[0] - 2;
+  int64_t step = 1;
+
+  affine::AffineForOp forOp1 =
+      rewriter.create<affine::AffineForOp>(loc, lb, ub, step);
+
+  // inside the forOp body --> create the operations & then close the body
+  //  OpBuilder::InsertionGuard guard(rewriter);
+  rewriter.setInsertionPointToStart(forOp1.getBody());
+  auto iv = forOp1.getInductionVar();
+  // start adding operations like a arith::constant = 100.0 to the body of
+  // forOp1
+  //  Inside the loop body:
+  // create affine for
+  // use affine-map expression for dimension then symbol then combination
+  // affine-map expression for dimension: affine_map<d0, d1)[s0] -> (d0 , d1 +
+  // s0, d1 - s0) use affine map Define an affine map: #map2 = affine_map<(d0)
+  // -> (d0 + 2)>
+  auto symbol1 = tensorType.getShape()[0];
+  AffineExpr indx = rewriter.getAffineDimExpr(0);
+  AffineExpr constantExpr = rewriter.getAffineConstantExpr(2);
+  AffineMap addMap = AffineMap::get(1, 0, symbol1 - indx);
+  auto outputIndex = rewriter.create<affine::AffineApplyOp>(loc, addMap, iv);
+
+  // Value constant15 = rewriter.create<arith::ConstantOp>(loc,
+  // rewriter.getF64Type(), rewriter.getF64FloatAttr(15));
+
+  // try replace constant15 ie, with input & filter
+  FIRFilterResponseOpAdaptor firOpAdaptor(operands);
+
+  Value inputForFilter =
+      rewriter.create<affine::AffineLoadOp>(loc, firOpAdaptor.getLhs(), iv);
+  // Value inputForFilterMapped = rewriter.create<affine::AffineLoadOp>(loc,
+  // firOpAdaptor.getLhs() , addMap, iv);
+
+  Value impulseFilter =
+      rewriter.create<AffineLoadOp>(loc, firOpAdaptor.getRhs(), iv);
+
+  auto storeOp = rewriter.create<affine::AffineStoreOp>(
+      loc, inputForFilter, alloc, ValueRange{outputIndex});
+
+  llvm::errs() << "LINE = " << __LINE__ << "\n";
 
 #endif
 
 #if TrySumOfVector
-    // here, we have to use iter
-    int64_t lb = 0 ;
-    int64_t ub = tensorType.getShape()[0] ;
-    int64_t step = 1;
-
-    Value constant0 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
-
-    affine::AffineForOp forOp1 = rewriter.create<affine::AffineForOp>(loc, 
-                lb, ub, step , ValueRange{constant0} );
-
-    rewriter.setInsertionPointToStart(forOp1.getBody());
-    auto iv = forOp1.getInductionVar();
-    
-
-    //inside the forOp body --> create the operations & then close the body
-    // OpBuilder::InsertionGuard guard(rewriter);
-    // Initial sum set to 0.
-        // %sum_0 = arith.constant 0.0 : f32
-        // // iter_args binds initial values to the loop's region arguments.
-        // %sum = affine.for %i = 0 to 10 step 1
-        //     iter_args(%sum_iter = %sum_0) -> (f32) {
-        //   %t = affine.load %buffer[%i] : memref<10xf32>
-        //   %sum_next = arith.addf %sum_iter, %t : f32
-        //   // Yield current iteration sum to next iteration %sum_iter or to %sum
-        //   // if final iteration.
-        //   affine.yield %sum_next : f32
-        // }
-        // return %sum : f32
-        // }
-
-
-      // Inside the loop body:
-
-    //try replace constant15 ie, with input & filter
-    FIRFilterResponseOpAdaptor firOpAdaptor(operands);
-
-    Value inputForFilter = rewriter.create<affine::AffineLoadOp>(loc, firOpAdaptor.getLhs() , iv);
-
-    //Get iter_arg 
-    auto getIterArg =  forOp1.getBody()->getArgument(1);       //forOp1.getIterOperands();
-    Value sumNext = rewriter.create<arith::AddFOp>(loc, inputForFilter, getIterArg);
-    // Value sumNext = rewriter.create<arith::AddFOp>(loc, inputForFilter, constant0);
-
-    //here, at indx 0 , o/p = in[0]
-    // at indx 1 , o/p = in[0] + in[1] & so on
-    //at indx last o/p[9] = sum of all input elements
-    auto storeOp = rewriter.create<affine::AffineStoreOp>(loc, sumNext,  alloc,ValueRange{iv});
-    rewriter.create<AffineYieldOp>(loc, ValueRange{sumNext} );
-    // rewriter.create<AffineYieldOp>(loc);
-    // auto result = forOp1.getResult(0);
-    llvm::errs() << "LINE = " << __LINE__ << "\n";
+  // here, we have to use iter
+  int64_t lb = 0;
+  int64_t ub = tensorType.getShape()[0];
+  int64_t step = 1;
+
+  Value constant0 = rewriter.create<arith::ConstantOp>(
+      loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+
+  affine::AffineForOp forOp1 = rewriter.create<affine::AffineForOp>(
+      loc, lb, ub, step, ValueRange{constant0});
+
+  rewriter.setInsertionPointToStart(forOp1.getBody());
+  auto iv = forOp1.getInductionVar();
+
+  // inside the forOp body --> create the operations & then close the body
+  //  OpBuilder::InsertionGuard guard(rewriter);
+  //  Initial sum set to 0.
+  //  %sum_0 = arith.constant 0.0 : f32
+  //  // iter_args binds initial values to the loop's region arguments.
+  //  %sum = affine.for %i = 0 to 10 step 1
+  //      iter_args(%sum_iter = %sum_0) -> (f32) {
+  //    %t = affine.load %buffer[%i] : memref<10xf32>
+  //    %sum_next = arith.addf %sum_iter, %t : f32
+  //    // Yield current iteration sum to next iteration %sum_iter or to %sum
+  //    // if final iteration.
+  //    affine.yield %sum_next : f32
+  //  }
+  //  return %sum : f32
+  //  }
+
+  // Inside the loop body:
+
+  // try replace constant15 ie, with input & filter
+  FIRFilterResponseOpAdaptor firOpAdaptor(operands);
+
+  Value inputForFilter =
+      rewriter.create<affine::AffineLoadOp>(loc, firOpAdaptor.getLhs(), iv);
+
+  // Get iter_arg
+  auto getIterArg =
+      forOp1.getBody()->getArgument(1); // forOp1.getIterOperands();
+  Value sumNext =
+      rewriter.create<arith::AddFOp>(loc, inputForFilter, getIterArg);
+  // Value sumNext = rewriter.create<arith::AddFOp>(loc, inputForFilter,
+  // constant0);
+
+  // here, at indx 0 , o/p = in[0]
+  //  at indx 1 , o/p = in[0] + in[1] & so on
+  // at indx last o/p[9] = sum of all input elements
+  auto storeOp = rewriter.create<affine::AffineStoreOp>(loc, sumNext, alloc,
+                                                        ValueRange{iv});
+  rewriter.create<AffineYieldOp>(loc, ValueRange{sumNext});
+  // rewriter.create<AffineYieldOp>(loc);
+  // auto result = forOp1.getResult(0);
+  llvm::errs() << "LINE = " << __LINE__ << "\n";
 
 #endif
 
 #if TryMultiDimLoop
-    // here, we have to use iter
-    int64_t lb = 0 ;
-    int64_t ub = tensorType.getShape()[0] ;
-    int64_t step = 1;
-
-    Value constant0 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
-
-    affine::AffineForOp forOp1 = rewriter.create<affine::AffineForOp>(loc, 
-                lb, ub, step  );
-
-    rewriter.setInsertionPointToStart(forOp1.getBody());
-    auto iv = forOp1.getInductionVar();
-
-    //create loadOp
-    FIRFilterResponseOpAdaptor firOpAdaptor(operands);
-
-    Value loadInput = rewriter.create<affine::AffineLoadOp>(loc, firOpAdaptor.getLhs() , iv);
-
-    //create another loop --
-    affine::AffineForOp forOp2 = rewriter.create<affine::AffineForOp>(loc, 
-                lb, ub, step , ValueRange{loadInput} );
-
-    rewriter.setInsertionPointToStart(forOp2.getBody());
-    auto iv2 = forOp2.getInductionVar();
-    Value loadFilter = rewriter.create<AffineLoadOp>(loc, firOpAdaptor.getRhs() , iv2);
-    
-    // get iterArg
-    auto getIterArg =  forOp2.getBody()->getArgument(1);
-    auto sumNext = rewriter.create<arith::AddFOp>(loc, loadInput, loadFilter);
-
-    
-
-    //store the result to output
-    // rewriter.create<AffineStoreOp>(loc, sumNext, alloc, iv );
-    rewriter.create<AffineYieldOp>(loc, ValueRange{sumNext});
-    rewriter.setInsertionPointAfter(forOp2);
-    rewriter.create<AffineStoreOp>(loc, forOp2.getResult(0), alloc, iv );
-    //
-    //yield the 
-    //inside the forOp body --> create the operations & then close the body
-    // OpBuilder::InsertionGuard guard(rewriter);
-    // Initial sum set to 0.
-        // affine.for %arg0 = 0 to 10 {
-        //   %1 = affine.load input[%arg0]
-        //   %4 = affine.for %arg1 = 0 to 10 step 1 
-        //     iter_args(%sum_iter = %1) {
-        //       %2 = affine.load filter[%arg1]
-        //       %3 = arith.add sum_iter , %2
-        //         affine.yield %3 : f64
-        //   }
-        //   affine.store %4, output[%arg0]
-        // }
-
-
-      // Inside the loop body:
+  // here, we have to use iter
+  int64_t lb = 0;
+  int64_t ub = tensorType.getShape()[0];
+  int64_t step = 1;
+
+  Value constant0 = rewriter.create<arith::ConstantOp>(
+      loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+
+  affine::AffineForOp forOp1 =
+      rewriter.create<affine::AffineForOp>(loc, lb, ub, step);
+
+  rewriter.setInsertionPointToStart(forOp1.getBody());
+  auto iv = forOp1.getInductionVar();
+
+  // create loadOp
+  FIRFilterResponseOpAdaptor firOpAdaptor(operands);
+
+  Value loadInput =
+      rewriter.create<affine::AffineLoadOp>(loc, firOpAdaptor.getLhs(), iv);
+
+  // create another loop --
+  affine::AffineForOp forOp2 = rewriter.create<affine::AffineForOp>(
+      loc, lb, ub, step, ValueRange{loadInput});
+
+  rewriter.setInsertionPointToStart(forOp2.getBody());
+  auto iv2 = forOp2.getInductionVar();
+  Value loadFilter =
+      rewriter.create<AffineLoadOp>(loc, firOpAdaptor.getRhs(), iv2);
+
+  // get iterArg
+  auto getIterArg = forOp2.getBody()->getArgument(1);
+  auto sumNext = rewriter.create<arith::AddFOp>(loc, loadInput, loadFilter);
+
+  // store the result to output
+  //  rewriter.create<AffineStoreOp>(loc, sumNext, alloc, iv );
+  rewriter.create<AffineYieldOp>(loc, ValueRange{sumNext});
+  rewriter.setInsertionPointAfter(forOp2);
+  rewriter.create<AffineStoreOp>(loc, forOp2.getResult(0), alloc, iv);
+  //
+  // yield the
+  // inside the forOp body --> create the operations & then close the body
+  // OpBuilder::InsertionGuard guard(rewriter);
+  // Initial sum set to 0.
+  // affine.for %arg0 = 0 to 10 {
+  //   %1 = affine.load input[%arg0]
+  //   %4 = affine.for %arg1 = 0 to 10 step 1
+  //     iter_args(%sum_iter = %1) {
+  //       %2 = affine.load filter[%arg1]
+  //       %3 = arith.add sum_iter , %2
+  //         affine.yield %3 : f64
+  //   }
+  //   affine.store %4, output[%arg0]
+  // }
 
-    
-    llvm::errs() << "LINE = " << __LINE__ << "\n";
+  // Inside the loop body:
 
+  llvm::errs() << "LINE = " << __LINE__ << "\n";
 
 #endif
 
 #if TryMultiDimForAndIf
-    int64_t lb = 0 ;
-    int64_t ub = tensorType.getShape()[0];
-    int64_t step = 1;
-
-    //create AffineMap and set
-    // %1 = affine.load 
-    //  if ( %arg0 >= 5)   ie, integerSet <(d0) : (d0 - 5 >= 0) >
-  
-     //affine.if %arg1 >= 0 and %5 <= %1 - 1
-     // n-k >= 0 && n-k <= len -1 //n = %arg0 , k = %arg1
-     // %arg0 >= 0 and %arg0 - %arg1 - %sym1 + 1 <= 0
-
-    affine::AffineForOp forOp1 = rewriter.create<affine::AffineForOp>(loc, 
-                lb, ub, step );
-
-    //inside the forOp body --> create the operations & then close the body
-    // OpBuilder::InsertionGuard guard(rewriter);
-    rewriter.setInsertionPointToStart(forOp1.getBody());
-    auto iv = forOp1.getInductionVar();
-    //start adding operations like a arith::constant = 100.0 to the body of forOp1
-      // Inside the loop body:
-
-    AffineExpr dimExpr = rewriter.getAffineDimExpr(0) - rewriter.getAffineConstantExpr(5);
-    IntegerSet set1 = IntegerSet::get(1, 0, {dimExpr}, {false});
+  int64_t lb = 0;
+  int64_t ub = tensorType.getShape()[0];
+  int64_t step = 1;
+
+  // create AffineMap and set
+  //  %1 = affine.load
+  //   if ( %arg0 >= 5)   ie, integerSet <(d0) : (d0 - 5 >= 0) >
+
+  // affine.if %arg1 >= 0 and %5 <= %1 - 1
+  //  n-k >= 0 && n-k <= len -1 //n = %arg0 , k = %arg1
+  //  %arg0 >= 0 and %arg0 - %arg1 - %sym1 + 1 <= 0
+
+  affine::AffineForOp forOp1 =
+      rewriter.create<affine::AffineForOp>(loc, lb, ub, step);
+
+  // inside the forOp body --> create the operations & then close the body
+  //  OpBuilder::InsertionGuard guard(rewriter);
+  rewriter.setInsertionPointToStart(forOp1.getBody());
+  auto iv = forOp1.getInductionVar();
+  // start adding operations like a arith::constant = 100.0 to the body of
+  // forOp1
+  //  Inside the loop body:
+
+  AffineExpr dimExpr =
+      rewriter.getAffineDimExpr(0) - rewriter.getAffineConstantExpr(5);
+  IntegerSet set1 = IntegerSet::get(1, 0, {dimExpr}, {false});
+
+  // create 2nd loop
+  // use loop inductn variable for 2nd loop
+  // use if condition on 2nd loop inductn variable
+  // get the result of inner for loop and store at output
+
+  affine::AffineForOp forOp2 =
+      rewriter.create<affine::AffineForOp>(loc, lb, ub, step);
+  rewriter.setInsertionPointToStart(forOp2.getBody());
+  auto iv2 = forOp2.getInductionVar();
+  AffineExpr dimExpr2 =
+      rewriter.getAffineDimExpr(1) - rewriter.getAffineConstantExpr(6);
+  IntegerSet set2 = IntegerSet::get(1, 0, {dimExpr, dimExpr2}, {false});
+
+  auto ifOp = rewriter.create<affine::AffineIfOp>(loc, set2, ValueRange{iv},
+                                                  false /*no else*/);
+  rewriter.setInsertionPointToStart(ifOp.getThenBlock());
+  Value constant25 = rewriter.create<arith::ConstantOp>(
+      loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(25));
+  Value resultFromInnerLoop =
+      rewriter.create<arith::MulFOp>(loc, constant25, constant25);
+
+  // rewriter.setInsertionPointAfter(forOp2);
+  // rewriter.setInsertionPointToEnd(forOp2->getBlock());
+  // rewriter.create<AffineStoreOp>(loc, constant25 , alloc, iv2);
+  // rewriter.create<AffineYieldOp>(loc, ValueRange{resultFromInnerLoop});
+  // rewriter.setInsertionPointAfter(ifOp);
+  // rewriter.create<AffineYieldOp>(loc, ValueRange{resultFromInnerLoop});
+  // rewriter.setInsertionPointAfter(forOp2);
+  rewriter.create<AffineStoreOp>(loc, constant25, alloc, iv);
+  // #set2 = affine_set<(d0, d1)[]: (d0 - 5 >= 0, d1- 5 >= 0 ) >
+  // affine.for %arg0 = 0 to 10 {
+  //     %N = len(output)
+  //   %4 =  affine.for %arg1 = 0 to 10 {
+  //         affine.if #set2(%arg0 , %arg1 )[%N] {
+  //             %1 = const 5
+  //             %2 = const 3
+  //             %3 = arith.mulf %1 , %2
+  //             affine.yield %3
+  //         }
+  //     }
+  //   affine.store %4, alloc[%arg0]
+  // }
 
+  // rewriter.create<AffineYieldOp>(loc, ValueRange{constant25});
+  // rewriter.setInsertionPointAfter(ifOp);
+  // rewriter.create<AffineStoreOp>(loc, ifOp.getResult(0) , alloc, iv);
 
-    // create 2nd loop
-    // use loop inductn variable for 2nd loop
-    // use if condition on 2nd loop inductn variable
-    // get the result of inner for loop and store at output 
+  // try to add the affine.If condition
+  // create affine.If ,
+  //  use integer set to represent the condition
+  // check the AffineArgs
+  //  affine.if operation contains two regions for the “then” and “else” clauses
+  // each region of affine.if must contain a single block with no args and
+  // terminated by affine.yield op
+  //  if affine.if defines no values --> no need for affine.yield
 
-    affine::AffineForOp forOp2 = rewriter.create<affine::AffineForOp>(loc, 
-                lb, ub, step );
-    rewriter.setInsertionPointToStart(forOp2.getBody());
-    auto iv2 = forOp2.getInductionVar();
-    AffineExpr dimExpr2 = rewriter.getAffineDimExpr(1) - rewriter.getAffineConstantExpr(6);
-    IntegerSet set2 = IntegerSet::get(1, 0, {dimExpr,dimExpr2}, {false});
+  // affineIf.setConditional(set1, forOp1.getInductionVar());
+  // start then "block"
+  // "then" block
 
-    auto ifOp = rewriter.create<affine::AffineIfOp>( loc, set2 , ValueRange{iv} , false /*no else*/ );
-    rewriter.setInsertionPointToStart(ifOp.getThenBlock());
-    Value constant25 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(25));
-    Value resultFromInnerLoop = rewriter.create<arith::MulFOp>(loc, constant25 , constant25);
+  // rewriter.create<affine::AffineYieldOp>(loc, constant25);
+  llvm::errs() << "LINE = " << __LINE__ << "\n";
+  // Back to parentOp -- ifOp stops here
+  //  rewriter.setInsertionPointAfter(ifOp);
 
-    // rewriter.setInsertionPointAfter(forOp2);
-    // rewriter.setInsertionPointToEnd(forOp2->getBlock());
-    // rewriter.create<AffineStoreOp>(loc, constant25 , alloc, iv2);
-    // rewriter.create<AffineYieldOp>(loc, ValueRange{resultFromInnerLoop});
-    // rewriter.setInsertionPointAfter(ifOp);
-    // rewriter.create<AffineYieldOp>(loc, ValueRange{resultFromInnerLoop});
-    // rewriter.setInsertionPointAfter(forOp2);
-    rewriter.create<AffineStoreOp>(loc, constant25 , alloc, iv);
-          // #set2 = affine_set<(d0, d1)[]: (d0 - 5 >= 0, d1- 5 >= 0 ) >
-          // affine.for %arg0 = 0 to 10 {
-          //     %N = len(output)
-          //   %4 =  affine.for %arg1 = 0 to 10 {
-          //         affine.if #set2(%arg0 , %arg1 )[%N] {
-          //             %1 = const 5
-          //             %2 = const 3
-          //             %3 = arith.mulf %1 , %2
-          //             affine.yield %3 
-          //         }
-          //     }
-          //   affine.store %4, alloc[%arg0]                
-          // }
-
-   
-
-    // rewriter.create<AffineYieldOp>(loc, ValueRange{constant25});
-    // rewriter.setInsertionPointAfter(ifOp);
-    // rewriter.create<AffineStoreOp>(loc, ifOp.getResult(0) , alloc, iv);
-    
-    //try to add the affine.If condition 
-    //create affine.If , 
-    // use integer set to represent the condition 
-    //check the AffineArgs 
-    // affine.if operation contains two regions for the “then” and “else” clauses
-      //each region of affine.if must contain a single block with no args and terminated by affine.yield op
-      // if affine.if defines no values --> no need for affine.yield
-    
-    // affineIf.setConditional(set1, forOp1.getInductionVar());
-    //start then "block"
-    // "then" block
-    
-    // rewriter.create<affine::AffineYieldOp>(loc, constant25);
-    llvm::errs() << "LINE = " << __LINE__ << "\n";
-    //Back to parentOp -- ifOp stops here
-    // rewriter.setInsertionPointAfter(ifOp);
-    
-    llvm::errs() << "LINE = " << __LINE__ << "  xx\n";
+  llvm::errs() << "LINE = " << __LINE__ << "  xx\n";
 
 #endif
 
 #if TryMultiDimLoopAndAffineMap
-    // here, we have to use iter
-    int64_t lb = 0 ;
-    int64_t ub = tensorType.getShape()[0] ;
-    int64_t step = 1;
-
-    Value constant0 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
-
-    affine::AffineForOp forOp1 = rewriter.create<affine::AffineForOp>(loc, 
-                lb, ub, step  );
-
-    rewriter.setInsertionPointToStart(forOp1.getBody());
-    auto iv = forOp1.getInductionVar();
-
-    //create loadOp
-    FIRFilterResponseOpAdaptor firOpAdaptor(operands);
-
-    Value loadInput = rewriter.create<affine::AffineLoadOp>(loc, firOpAdaptor.getLhs() , iv);
-
-    //create another loop --
-    affine::AffineForOp forOp2 = rewriter.create<affine::AffineForOp>(loc, 
-                lb, ub, step , ValueRange{loadInput} );
-
-    rewriter.setInsertionPointToStart(forOp2.getBody());
-    auto iv2 = forOp2.getInductionVar();
-
-    //Use AffineMap for affine.load alloc_9[%arg0 - %arg1]
-    AffineExpr OuterIndx = rewriter.getAffineDimExpr(0);
-    AffineExpr InnerIndx = rewriter.getAffineDimExpr(1);
-    AffineMap addMap = AffineMap::get(2, 0, OuterIndx - InnerIndx);
-    // auto outputIndex = rewriter.create<affine::AffineApplyOp>(loc, addMap , ValueRange{iv,iv2});
-
-    // Value constant15 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(15));
-    
-
-    // Value loadFilter = rewriter.create<AffineLoadOp>(loc, firOpAdaptor.getRhs() , addMap, ValueRange{iv2,iv});
-    Value loadFilter = rewriter.create<AffineLoadOp>(loc, firOpAdaptor.getRhs() , addMap, ValueRange{iv,iv2});
-    // Value loadFilter = rewriter.create<AffineLoadOp>(loc, firOpAdaptor.getRhs() , outputIndex);
-    // get iterArg
-    auto getIterArg =  forOp2.getBody()->getArgument(1);
-    auto sumNext = rewriter.create<arith::AddFOp>(loc, getIterArg, loadFilter);
-
-    
-
-    //store the result to output
-    // rewriter.create<AffineStoreOp>(loc, sumNext, alloc, iv );
-    rewriter.create<AffineYieldOp>(loc, ValueRange{sumNext});
-    rewriter.setInsertionPointAfter(forOp2);
-    rewriter.create<AffineStoreOp>(loc, forOp2.getResult(0), alloc, iv );
-    //
-    //yield the 
-    //inside the forOp body --> create the operations & then close the body
-    // OpBuilder::InsertionGuard guard(rewriter);
-    // Initial sum set to 0.
-        // affine.for %arg0 = 0 to 10 {
-        //   %1 = affine.load input[%arg0]
-        //   %4 = affine.for %arg1 = 0 to 10 step 1 
-        //     iter_args(%sum_iter = %1) {
-        //       %2 = affine.load filter[%arg1]
-        //       %3 = arith.add sum_iter , %2
-        //         affine.yield %3 : f64
-        //   }
-        //   affine.store %4, output[%arg0]
-        // }
-
-
-      // Inside the loop body:
+  // here, we have to use iter
+  int64_t lb = 0;
+  int64_t ub = tensorType.getShape()[0];
+  int64_t step = 1;
+
+  Value constant0 = rewriter.create<arith::ConstantOp>(
+      loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+
+  affine::AffineForOp forOp1 =
+      rewriter.create<affine::AffineForOp>(loc, lb, ub, step);
+
+  rewriter.setInsertionPointToStart(forOp1.getBody());
+  auto iv = forOp1.getInductionVar();
+
+  // create loadOp
+  FIRFilterResponseOpAdaptor firOpAdaptor(operands);
+
+  Value loadInput =
+      rewriter.create<affine::AffineLoadOp>(loc, firOpAdaptor.getLhs(), iv);
+
+  // create another loop --
+  affine::AffineForOp forOp2 = rewriter.create<affine::AffineForOp>(
+      loc, lb, ub, step, ValueRange{loadInput});
+
+  rewriter.setInsertionPointToStart(forOp2.getBody());
+  auto iv2 = forOp2.getInductionVar();
+
+  // Use AffineMap for affine.load alloc_9[%arg0 - %arg1]
+  AffineExpr OuterIndx = rewriter.getAffineDimExpr(0);
+  AffineExpr InnerIndx = rewriter.getAffineDimExpr(1);
+  AffineMap addMap = AffineMap::get(2, 0, OuterIndx - InnerIndx);
+  // auto outputIndex = rewriter.create<affine::AffineApplyOp>(loc, addMap ,
+  // ValueRange{iv,iv2});
+
+  // Value constant15 = rewriter.create<arith::ConstantOp>(loc,
+  // rewriter.getF64Type(), rewriter.getF64FloatAttr(15));
+
+  // Value loadFilter = rewriter.create<AffineLoadOp>(loc, firOpAdaptor.getRhs()
+  // , addMap, ValueRange{iv2,iv});
+  Value loadFilter = rewriter.create<AffineLoadOp>(loc, firOpAdaptor.getRhs(),
+                                                   addMap, ValueRange{iv, iv2});
+  // Value loadFilter = rewriter.create<AffineLoadOp>(loc, firOpAdaptor.getRhs()
+  // , outputIndex); get iterArg
+  auto getIterArg = forOp2.getBody()->getArgument(1);
+  auto sumNext = rewriter.create<arith::AddFOp>(loc, getIterArg, loadFilter);
+
+  // store the result to output
+  //  rewriter.create<AffineStoreOp>(loc, sumNext, alloc, iv );
+  rewriter.create<AffineYieldOp>(loc, ValueRange{sumNext});
+  rewriter.setInsertionPointAfter(forOp2);
+  rewriter.create<AffineStoreOp>(loc, forOp2.getResult(0), alloc, iv);
+  //
+  // yield the
+  // inside the forOp body --> create the operations & then close the body
+  // OpBuilder::InsertionGuard guard(rewriter);
+  // Initial sum set to 0.
+  // affine.for %arg0 = 0 to 10 {
+  //   %1 = affine.load input[%arg0]
+  //   %4 = affine.for %arg1 = 0 to 10 step 1
+  //     iter_args(%sum_iter = %1) {
+  //       %2 = affine.load filter[%arg1]
+  //       %3 = arith.add sum_iter , %2
+  //         affine.yield %3 : f64
+  //   }
+  //   affine.store %4, output[%arg0]
+  // }
 
-    
-    llvm::errs() << "LINE = " << __LINE__ << "\n";
+  // Inside the loop body:
 
+  llvm::errs() << "LINE = " << __LINE__ << "\n";
 
 #endif
 
 #if TryMultiDimLoopAndAffineSet
-    // here, we have to use iter
-    int64_t lb = 0 ;
-    int64_t ub = tensorType.getShape()[0] ;
-    int64_t step = 1;
-
-    Value constant0 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
-
-    affine::AffineForOp forOp1 = rewriter.create<affine::AffineForOp>(loc, 
-                lb, ub, step  );
-
-    rewriter.setInsertionPointToStart(forOp1.getBody());
-    auto iv = forOp1.getInductionVar();
-
-    //create loadOp
-    FIRFilterResponseOpAdaptor firOpAdaptor(operands);
-
-    Value loadInput = rewriter.create<affine::AffineLoadOp>(loc, firOpAdaptor.getLhs() , iv);
-
-    //create another loop --
-    affine::AffineForOp forOp2 = rewriter.create<affine::AffineForOp>(loc, 
-                lb, ub, step , ValueRange{loadInput} );
-
-    rewriter.setInsertionPointToStart(forOp2.getBody());
-    auto iv2 = forOp2.getInductionVar();
-
-    //Use AffineMap for affine.load alloc_9[%arg0 - %arg1]
-    AffineExpr OuterIndx = rewriter.getAffineDimExpr(0);
-    AffineExpr InnerIndx = rewriter.getAffineDimExpr(1);
-    AffineMap addMap = AffineMap::get(2, 0, OuterIndx - InnerIndx);
-    auto outputIndex = rewriter.create<affine::AffineApplyOp>(loc, addMap , ValueRange{iv,iv2});
-
-    // Value constant15 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(15));
-    AffineExpr dimExpr = OuterIndx - InnerIndx;
-    IntegerSet set1 = IntegerSet::get(2, 0, {dimExpr}, {false});
-
-    auto ifOp = rewriter.create<affine::AffineIfOp>( loc, set1 , ValueRange{iv,iv2} , false /*no else*/ );
-    rewriter.setInsertionPointToStart(ifOp.getThenBlock());
-    // Value loadFilter = rewriter.create<AffineLoadOp>(loc, firOpAdaptor.getRhs() , addMap, ValueRange{iv2,iv});
-    Value loadFilter = rewriter.create<AffineLoadOp>(loc, firOpAdaptor.getRhs() , addMap, ValueRange{iv,iv2});
-    // get iterArg
-    auto getIterArg =  forOp2.getBody()->getArgument(1);
-    auto sumNext = rewriter.create<arith::AddFOp>(loc, loadFilter, loadFilter);
-    // rewriter.create<AffineStoreOp>(loc, sumNext, alloc, iv );
-    rewriter.create<AffineYieldOp>(loc, ValueRange{sumNext});
-
-    //store the result to output
-    // rewriter.create<AffineStoreOp>(loc, sumNext, alloc, iv );
-    rewriter.setInsertionPointAfter(ifOp);
-    rewriter.create<AffineYieldOp>(loc, ValueRange{sumNext});
-    rewriter.setInsertionPointAfter(forOp2);
-    rewriter.create<AffineStoreOp>(loc, forOp2.getResult(0), alloc, iv );
-    //
-    //yield the 
-    //inside the forOp body --> create the operations & then close the body
-    // OpBuilder::InsertionGuard guard(rewriter);
-    // Initial sum set to 0.
-        // affine.for %arg0 = 0 to 10 {
-        //   %1 = affine.load input[%arg0]
-        //   %4 = affine.for %arg1 = 0 to 10 step 1 
-        //     iter_args(%sum_iter = %1) {
-        //       %2 = affine.load filter[%arg1]
-        //       %3 = arith.add sum_iter , %2
-        //         affine.yield %3 : f64
-        //   }
-        //   affine.store %4, output[%arg0]
-        // }
-
-
-      // Inside the loop body:
+  // here, we have to use iter
+  int64_t lb = 0;
+  int64_t ub = tensorType.getShape()[0];
+  int64_t step = 1;
+
+  Value constant0 = rewriter.create<arith::ConstantOp>(
+      loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+
+  affine::AffineForOp forOp1 =
+      rewriter.create<affine::AffineForOp>(loc, lb, ub, step);
+
+  rewriter.setInsertionPointToStart(forOp1.getBody());
+  auto iv = forOp1.getInductionVar();
+
+  // create loadOp
+  FIRFilterResponseOpAdaptor firOpAdaptor(operands);
+
+  Value loadInput =
+      rewriter.create<affine::AffineLoadOp>(loc, firOpAdaptor.getLhs(), iv);
+
+  // create another loop --
+  affine::AffineForOp forOp2 = rewriter.create<affine::AffineForOp>(
+      loc, lb, ub, step, ValueRange{loadInput});
+
+  rewriter.setInsertionPointToStart(forOp2.getBody());
+  auto iv2 = forOp2.getInductionVar();
+
+  // Use AffineMap for affine.load alloc_9[%arg0 - %arg1]
+  AffineExpr OuterIndx = rewriter.getAffineDimExpr(0);
+  AffineExpr InnerIndx = rewriter.getAffineDimExpr(1);
+  AffineMap addMap = AffineMap::get(2, 0, OuterIndx - InnerIndx);
+  auto outputIndex =
+      rewriter.create<affine::AffineApplyOp>(loc, addMap, ValueRange{iv, iv2});
+
+  // Value constant15 = rewriter.create<arith::ConstantOp>(loc,
+  // rewriter.getF64Type(), rewriter.getF64FloatAttr(15));
+  AffineExpr dimExpr = OuterIndx - InnerIndx;
+  IntegerSet set1 = IntegerSet::get(2, 0, {dimExpr}, {false});
+
+  auto ifOp = rewriter.create<affine::AffineIfOp>(
+      loc, set1, ValueRange{iv, iv2}, false /*no else*/);
+  rewriter.setInsertionPointToStart(ifOp.getThenBlock());
+  // Value loadFilter = rewriter.create<AffineLoadOp>(loc, firOpAdaptor.getRhs()
+  // , addMap, ValueRange{iv2,iv});
+  Value loadFilter = rewriter.create<AffineLoadOp>(loc, firOpAdaptor.getRhs(),
+                                                   addMap, ValueRange{iv, iv2});
+  // get iterArg
+  auto getIterArg = forOp2.getBody()->getArgument(1);
+  auto sumNext = rewriter.create<arith::AddFOp>(loc, loadFilter, loadFilter);
+  // rewriter.create<AffineStoreOp>(loc, sumNext, alloc, iv );
+  rewriter.create<AffineYieldOp>(loc, ValueRange{sumNext});
+
+  // store the result to output
+  //  rewriter.create<AffineStoreOp>(loc, sumNext, alloc, iv );
+  rewriter.setInsertionPointAfter(ifOp);
+  rewriter.create<AffineYieldOp>(loc, ValueRange{sumNext});
+  rewriter.setInsertionPointAfter(forOp2);
+  rewriter.create<AffineStoreOp>(loc, forOp2.getResult(0), alloc, iv);
+  //
+  // yield the
+  // inside the forOp body --> create the operations & then close the body
+  // OpBuilder::InsertionGuard guard(rewriter);
+  // Initial sum set to 0.
+  // affine.for %arg0 = 0 to 10 {
+  //   %1 = affine.load input[%arg0]
+  //   %4 = affine.for %arg1 = 0 to 10 step 1
+  //     iter_args(%sum_iter = %1) {
+  //       %2 = affine.load filter[%arg1]
+  //       %3 = arith.add sum_iter , %2
+  //         affine.yield %3 : f64
+  //   }
+  //   affine.store %4, output[%arg0]
+  // }
 
-    
-    llvm::errs() << "LINE = " << __LINE__ << "\n";
+  // Inside the loop body:
 
+  llvm::errs() << "LINE = " << __LINE__ << "\n";
 
 #endif
 
 #if TryFIRFilter
 
-    int64_t lb = 0 ;
-    int64_t ub = tensorType.getShape()[0];
-    int64_t step = 1;
-
-    affine::AffineForOp forOp1 = rewriter.create<affine::AffineForOp>(loc, 
-                lb, ub, step );
-    rewriter.setInsertionPointToStart(forOp1.getBody());
-    auto iv = forOp1.getInductionVar();
-
-    // Value sum0 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(), 
-    //                                             rewriter.getF64FloatAttr(0));
-    //get filter len
-    // auto tensorTypeFilter = llvm::cast<RankedTensorType>((*op->getOperand(1))); //operand_type_end
-    // auto tensorTypeFilter = llvm::cast<RankedTensorType>((*op->operand_type_begin()));
-    auto operandIt = op->operand_type_begin();
-    auto tensorTypeInput = llvm::cast<RankedTensorType>(*operandIt);
-    int64_t ubForInput = tensorTypeInput.getShape()[0];
-    //get second operand
-    operandIt = operandIt + 1;
-
-    // auto tensorTypeFilter = llvm::cast<RankedTensorType>((*op->operand_type_begin())); //operandIt
-    auto tensorTypeFilter = llvm::cast<RankedTensorType>(*operandIt);
-    int64_t ubForFilter = tensorTypeFilter.getShape()[0];
-
-    // llvm::errs() << "ubForFilter= " << ubForFilter << "\n";
-    //create a constant for sum
-    Value constant0 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
-    affine::AffineForOp forOp2 = rewriter.create<affine::AffineForOp>(loc, 
-                lb, ubForFilter, step , ValueRange{constant0});
-    rewriter.setInsertionPointToStart(forOp2.getBody());
-    auto iv2 = forOp2.getInductionVar();
-
-    auto getIterArg =  forOp2.getBody()->getArgument(1);       //forOp1.getIterOperands();
-    
-    // AffineExpr dimExpr = rewriter.getAffineDimExpr(0);
-    AffineExpr dimExpr2 = rewriter.getAffineDimExpr(0) - rewriter.getAffineDimExpr(1);
-    //n-k <= inputLen -1 or, k-n >= 1 - inputLen ie, k - n + inputLen - 1 >= 0
-    AffineExpr ExprForUpperBoundCheck = rewriter.getAffineConstantExpr(ubForInput) + rewriter.getAffineDimExpr(1)
-                     - rewriter.getAffineDimExpr(0) - rewriter.getAffineConstantExpr(1)  ;
-    IntegerSet set2 = IntegerSet::get(2, 0, {dimExpr2,ExprForUpperBoundCheck}, {false, false});
-    
-    //use typeRange too:
-    Type floatType = rewriter.getF64Type();
-    //  if n-k >= 0 
-    auto ifOp = rewriter.create<affine::AffineIfOp>( loc, TypeRange{floatType}, set2 , ValueRange{iv,iv2} , true /*else*/ ); 
-    rewriter.setInsertionPointToStart(ifOp.getThenBlock());
-    
-    AffineMap addMap = AffineMap::get(2, 0, dimExpr2);
-    // auto inputIndex = rewriter.create<affine::AffineApplyOp>(loc, addMap , ValueRange{iv,iv2});
-
-    FIRFilterResponseOpAdaptor firOpAdaptor(operands);
-    Value loadInput = rewriter.create<AffineLoadOp>(loc, firOpAdaptor.getLhs(), addMap , ValueRange{iv,iv2});
+  int64_t lb = 0;
+  int64_t ub = tensorType.getShape()[0];
+  int64_t step = 1;
+
+  affine::AffineForOp forOp1 =
+      rewriter.create<affine::AffineForOp>(loc, lb, ub, step);
+  rewriter.setInsertionPointToStart(forOp1.getBody());
+  auto iv = forOp1.getInductionVar();
+
+  // Value sum0 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
+  //                                             rewriter.getF64FloatAttr(0));
+  // get filter len
+  // auto tensorTypeFilter = llvm::cast<RankedTensorType>((*op->getOperand(1)));
+  // //operand_type_end auto tensorTypeFilter =
+  // llvm::cast<RankedTensorType>((*op->operand_type_begin()));
+  auto operandIt = op->operand_type_begin();
+  auto tensorTypeInput = llvm::cast<RankedTensorType>(*operandIt);
+  int64_t ubForInput = tensorTypeInput.getShape()[0];
+  // get second operand
+  operandIt = operandIt + 1;
+
+  // auto tensorTypeFilter =
+  // llvm::cast<RankedTensorType>((*op->operand_type_begin())); //operandIt
+  auto tensorTypeFilter = llvm::cast<RankedTensorType>(*operandIt);
+  int64_t ubForFilter = tensorTypeFilter.getShape()[0];
+
+  // llvm::errs() << "ubForFilter= " << ubForFilter << "\n";
+  // create a constant for sum
+  Value constant0 = rewriter.create<arith::ConstantOp>(
+      loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+  affine::AffineForOp forOp2 = rewriter.create<affine::AffineForOp>(
+      loc, lb, ubForFilter, step, ValueRange{constant0});
+  rewriter.setInsertionPointToStart(forOp2.getBody());
+  auto iv2 = forOp2.getInductionVar();
+
+  auto getIterArg =
+      forOp2.getBody()->getArgument(1); // forOp1.getIterOperands();
+
+  // AffineExpr dimExpr = rewriter.getAffineDimExpr(0);
+  AffineExpr dimExpr2 =
+      rewriter.getAffineDimExpr(0) - rewriter.getAffineDimExpr(1);
+  // n-k <= inputLen -1 or, k-n >= 1 - inputLen ie, k - n + inputLen - 1 >= 0
+  AffineExpr ExprForUpperBoundCheck =
+      rewriter.getAffineConstantExpr(ubForInput) +
+      rewriter.getAffineDimExpr(1) - rewriter.getAffineDimExpr(0) -
+      rewriter.getAffineConstantExpr(1);
+  IntegerSet set2 =
+      IntegerSet::get(2, 0, {dimExpr2, ExprForUpperBoundCheck}, {false, false});
+
+  // use typeRange too:
+  Type floatType = rewriter.getF64Type();
+  //  if n-k >= 0
+  auto ifOp = rewriter.create<affine::AffineIfOp>(
+      loc, TypeRange{floatType}, set2, ValueRange{iv, iv2}, true /*else*/);
+  rewriter.setInsertionPointToStart(ifOp.getThenBlock());
+
+  AffineMap addMap = AffineMap::get(2, 0, dimExpr2);
+  // auto inputIndex = rewriter.create<affine::AffineApplyOp>(loc, addMap ,
+  // ValueRange{iv,iv2});
+
+  FIRFilterResponseOpAdaptor firOpAdaptor(operands);
+  Value loadInput = rewriter.create<AffineLoadOp>(loc, firOpAdaptor.getLhs(),
+                                                  addMap, ValueRange{iv, iv2});
+
+  rewriter.create<AffineYieldOp>(loc, ValueRange{loadInput});
+  // else block
+  rewriter.setInsertionPointToStart(ifOp.getElseBlock());
+  Value const0ForElse = rewriter.create<arith::ConstantOp>(
+      loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+  rewriter.create<AffineYieldOp>(loc, ValueRange{const0ForElse});
+  rewriter.setInsertionPointAfter(ifOp);
+
+  // load filter and then mult and then sum
+  Value loadFilter =
+      rewriter.create<affine::AffineLoadOp>(loc, firOpAdaptor.getRhs(), iv2);
+  // Value constant25 = rewriter.create<arith::ConstantOp>(loc,
+  // rewriter.getF64Type(),
+  //                                                      rewriter.getF64FloatAttr(25));
+  Value filterMulInput =
+      rewriter.create<arith::MulFOp>(loc, ifOp.getResult(0), loadFilter);
+  Value sumNext =
+      rewriter.create<arith::AddFOp>(loc, filterMulInput, getIterArg);
+  rewriter.create<AffineYieldOp>(loc, ValueRange{sumNext});
+  // rewriter.setInsertionPointToEnd(forOp2->getBlock());
+  rewriter.setInsertionPointAfter(forOp2);
+  rewriter.create<AffineStoreOp>(loc, forOp2.getResult(0), alloc, iv);
+  rewriter.setInsertionPointAfter(forOp1);
+
+  // ifOp->dump();
+
+  // FIRFilterResponse code -- x[n] , h[n]
+
+  // iterate for output
+  // start with sum=0
+  // iterate for filter len
+  // check for input_indx must be within bounds
+  // load filter and input[indx]
+  // multiply them
+  // add this to sum
+  // update output with sum
+
+  // inside the forOp body --> create the operations & then close the body
+  //  OpBuilder::InsertionGuard guard(rewriter);
+
+  // start adding operations like a arith::constant = 100.0 to the body of
+  // forOp1
+  //  Inside the loop body:
+
+  // #set2 = affine_set<(d0, d1)[]: (d0 - 5 >= 0, d1- 5 >= 0 ) >
+  // affine.for %arg0 = 0 to 10 {
+  //     %N = len(output)
+  //   %4 =  affine.for %arg1 = 0 to 10 {
+  //         affine.if #set2(%arg0 , %arg1 )[%N] {
+  //             %1 = const 5
+  //             %2 = const 3
+  //             %3 = arith.mulf %1 , %2
+  //             affine.yield %3
+  //         }
+  //     }
+  //   affine.store %4, alloc[%arg0]
+  // }
 
-    rewriter.create<AffineYieldOp>(loc, ValueRange{loadInput});
-    //else block
-    rewriter.setInsertionPointToStart(ifOp.getElseBlock());
-    Value const0ForElse = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
-    rewriter.create<AffineYieldOp>(loc, ValueRange{const0ForElse});
-    rewriter.setInsertionPointAfter(ifOp);
+  // rewriter.create<AffineYieldOp>(loc, ValueRange{constant25});
+  // rewriter.setInsertionPointAfter(ifOp);
+  // rewriter.create<AffineStoreOp>(loc, ifOp.getResult(0) , alloc, iv);
 
-    //load filter and then mult and then sum
-    Value loadFilter = rewriter.create<affine::AffineLoadOp>(loc, firOpAdaptor.getRhs() ,  iv2);
-    // Value constant25 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-    //                                                      rewriter.getF64FloatAttr(25));
-    Value filterMulInput = rewriter.create<arith::MulFOp>(loc, ifOp.getResult(0) , loadFilter);
-    Value sumNext = rewriter.create<arith::AddFOp>(loc, filterMulInput, getIterArg);
-    rewriter.create<AffineYieldOp>(loc, ValueRange{sumNext});
-    // rewriter.setInsertionPointToEnd(forOp2->getBlock());
-    rewriter.setInsertionPointAfter(forOp2);
-    rewriter.create<AffineStoreOp>(loc, forOp2.getResult(0) , alloc, iv);
-    rewriter.setInsertionPointAfter(forOp1);
+  // try to add the affine.If condition
+  // create affine.If ,
+  //  use integer set to represent the condition
+  // check the AffineArgs
+  //  affine.if operation contains two regions for the “then” and “else” clauses
+  // each region of affine.if must contain a single block with no args and
+  // terminated by affine.yield op
+  //  if affine.if defines no values --> no need for affine.yield
 
-    // ifOp->dump();
-    
-
-    //FIRFilterResponse code -- x[n] , h[n]
-   
-    //iterate for output
-        //start with sum=0
-        //iterate for filter len
-            //check for input_indx must be within bounds
-            //load filter and input[indx]
-            //multiply them
-            //add this to sum
-    //update output with sum
-
-    
-
-    //inside the forOp body --> create the operations & then close the body
-    // OpBuilder::InsertionGuard guard(rewriter);
-    
-    //start adding operations like a arith::constant = 100.0 to the body of forOp1
-      // Inside the loop body:
-
-
-              // #set2 = affine_set<(d0, d1)[]: (d0 - 5 >= 0, d1- 5 >= 0 ) >
-          // affine.for %arg0 = 0 to 10 {
-          //     %N = len(output)
-          //   %4 =  affine.for %arg1 = 0 to 10 {
-          //         affine.if #set2(%arg0 , %arg1 )[%N] {
-          //             %1 = const 5
-          //             %2 = const 3
-          //             %3 = arith.mulf %1 , %2
-          //             affine.yield %3 
-          //         }
-          //     }
-          //   affine.store %4, alloc[%arg0]                
-          // }
-
-   
-
-    // rewriter.create<AffineYieldOp>(loc, ValueRange{constant25});
-    // rewriter.setInsertionPointAfter(ifOp);
-    // rewriter.create<AffineStoreOp>(loc, ifOp.getResult(0) , alloc, iv);
-    
-    //try to add the affine.If condition 
-    //create affine.If , 
-    // use integer set to represent the condition 
-    //check the AffineArgs 
-    // affine.if operation contains two regions for the “then” and “else” clauses
-      //each region of affine.if must contain a single block with no args and terminated by affine.yield op
-      // if affine.if defines no values --> no need for affine.yield
-    
-    // affineIf.setConditional(set1, forOp1.getInductionVar());
-    //start then "block"
-    // "then" block
-    
-    // rewriter.create<affine::AffineYieldOp>(loc, constant25);
-    // llvm::errs() << "LINE = " << __LINE__ << "\n";
-    //Back to parentOp -- ifOp stops here
-    // rewriter.setInsertionPointAfter(ifOp);
-    
-    // llvm::errs() << "LINE = " << __LINE__ << "  xx\n";
+  // affineIf.setConditional(set1, forOp1.getInductionVar());
+  // start then "block"
+  // "then" block
 
+  // rewriter.create<affine::AffineYieldOp>(loc, constant25);
+  // llvm::errs() << "LINE = " << __LINE__ << "\n";
+  // Back to parentOp -- ifOp stops here
+  // rewriter.setInsertionPointAfter(ifOp);
 
+  // llvm::errs() << "LINE = " << __LINE__ << "  xx\n";
 
 #endif
-    // Terminate the loop body with affine.yield.
-    // rewriter.create<affine::AffineYieldOp>(loc);
-
+  // Terminate the loop body with affine.yield.
+  // rewriter.create<affine::AffineYieldOp>(loc);
 
   // Replace this operation with the generated alloc.
   rewriter.replaceOp(op, alloc);
 }
 
-namespace {
-
 //===----------------------------------------------------------------------===//
 // ToyToAffine RewritePatterns: FFT1DImg operations
 //===----------------------------------------------------------------------===//
 
-
 struct FFT1DImgConjSymmOpLowering : public ConversionPattern {
   FFT1DImgConjSymmOpLowering(MLIRContext *ctx)
-      : ConversionPattern(dsp::FFT1DImgConjSymmOp::getOperationName(), 1, ctx) {}
+      : ConversionPattern(dsp::FFT1DImgConjSymmOp::getOperationName(), 1, ctx) {
+  }
 
   LogicalResult
   matchAndRewrite(Operation *op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const final {
     auto loc = op->getLoc();
-    
-    //Pseudo-code:
-      //  y[k] = y_real[k] + j *y_img[k] 
-      // y_img = sumOver_n(x[n]*sin[2*pi * k *n/N ] * -1
-
-    // For k=0: 
-        //y[0] = 0
-
-      // for k=1 to (N+1)/2
-          // sum = 0
-          // for n=0 to N
-              // sum = sum + x[n] * sin(2*pi*k*n/N)
-          //y[k] = -1 * sum
-          //y[N-k] = sum
-      //init  output mem for y_real & y_img as 0 
-      //iterate for output from k=0 to last 
-        //iterate for all x from n=0 to last
-          //perform the calculations : ie x[n] * cos[2*pi * k *n/N ] and sum and store them at y[k]
-          // 
-      // replace this upsampling op with the output_mem_allocation op
-
-    DEBUG_PRINT_NO_ARGS() ;
-
-    //output for result type
-    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));  
-        
-    //allocation & deallocation for the result of this operation
+
+    // Pseudo-code:
+    //   y[k] = y_real[k] + j *y_img[k]
+    //  y_img = sumOver_n(x[n]*sin[2*pi * k *n/N ] * -1
+
+    // For k=0:
+    // y[0] = 0
+
+    // for k=1 to (N+1)/2
+    // sum = 0
+    // for n=0 to N
+    // sum = sum + x[n] * sin(2*pi*k*n/N)
+    // y[k] = -1 * sum
+    // y[N-k] = sum
+    // init  output mem for y_real & y_img as 0
+    // iterate for output from k=0 to last
+    // iterate for all x from n=0 to last
+    // perform the calculations : ie x[n] * cos[2*pi * k *n/N ] and sum and
+    // store them at y[k]
+    //
+    // replace this upsampling op with the output_mem_allocation op
+
+    DEBUG_PRINT_NO_ARGS();
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
     auto memRefType = convertTensorToMemRef(tensorType);
     // auto memRefType2 = convertTensorToMemRef(tensorType1);
     auto alloc_img = insertAllocAndDealloc(memRefType, loc, rewriter);
 
-    //construct affine loops for the input
-    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/0);
-    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);    
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
 
     // affine.for %y = 0 to 4 {
-        //     affine.store %cst_3, %alloc_real[%y] : memref<4xf64>
-        //     affine.store %cst_3, %alloc_img[%y] : memref<4xf64>
-        // }
-    Value constant0 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(0));
-
+    //     affine.store %cst_3, %alloc_real[%y] : memref<4xf64>
+    //     affine.store %cst_3, %alloc_img[%y] : memref<4xf64>
+    // }
+    Value constant0 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
 
-    //For loop -- iterate from 1 to last
-    int64_t lb = 0 ;
+    // For loop -- iterate from 1 to last
+    int64_t lb = 0;
     int64_t ub = tensorType.getShape()[0];
-    int64_t ubBy2 = (ub+1)/2;
+    int64_t ubBy2 = (ub + 1) / 2;
     int64_t step = 1;
 
-    // affine::AffineForOp forOp1 = rewriter.create<AffineForOp>(loc, lb, ub, step);
-    // auto iv = forOp1.getInductionVar();
+    // affine::AffineForOp forOp1 = rewriter.create<AffineForOp>(loc, lb, ub,
+    // step); auto iv = forOp1.getInductionVar();
     // rewriter.setInsertionPointToStart(forOp1.getBody());
-    // rewriter.create<AffineStoreOp>(loc, constant0, alloc_img, ValueRange{iv});
-    // rewriter.setInsertionPointAfter(forOp1);
-    DEBUG_PRINT_NO_ARGS() ;  
-    //for k=0
+    // rewriter.create<AffineStoreOp>(loc, constant0, alloc_img,
+    // ValueRange{iv}); rewriter.setInsertionPointAfter(forOp1);
+    DEBUG_PRINT_NO_ARGS();
+    // for k=0
     Value Indx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
-    rewriter.create<AffineStoreOp>(loc, constant0, alloc_img, ValueRange{Indx0});
+    rewriter.create<AffineStoreOp>(loc, constant0, alloc_img,
+                                   ValueRange{Indx0});
 
-    //loop for Y
-    affine::AffineForOp forOpY = rewriter.create<AffineForOp>(loc, lb+1, ubBy2, step);
+    // loop for Y
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb + 1, ubBy2, step);
     auto ivY = forOpY.getInductionVar();
     rewriter.setInsertionPointToStart(forOpY.getBody());
 
-    
-    //loop for X
-    affine::AffineForOp forOpX = rewriter.create<AffineForOp>(loc, lb, ub, step, ValueRange{constant0});
+    // loop for X
+    affine::AffineForOp forOpX =
+        rewriter.create<AffineForOp>(loc, lb, ub, step, ValueRange{constant0});
     auto ivX = forOpX.getInductionVar();
-    auto getIterArg =  forOpX.getBody()->getArgument(1);
+    auto getIterArg = forOpX.getBody()->getArgument(1);
     rewriter.setInsertionPointToStart(forOpX.getBody());
 
-    //load from X, & y1 & y2
+    // load from X, & y1 & y2
     FFT1DImgConjSymmOpAdaptor fft1DImgConjSymmAdaptor(operands);
-    Value inputX = rewriter.create<AffineLoadOp>(loc, fft1DImgConjSymmAdaptor.getInput(), ValueRange{ivX});
-    // Value loadYImg = rewriter.create<AffineLoadOp>(loc, alloc_img, ValueRange{ivY});
+    Value inputX = rewriter.create<AffineLoadOp>(
+        loc, fft1DImgConjSymmAdaptor.getInput(), ValueRange{ivX});
+    // Value loadYImg = rewriter.create<AffineLoadOp>(loc, alloc_img,
+    // ValueRange{ivY});
+
+    // convert index to f64
+    Value IndxY = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivY);
+    Value k =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxY);
 
-    //convert index to f64
-    Value IndxY = rewriter.create<arith::IndexCastUIOp>(loc, rewriter.getIntegerType(32), ivY);
-    Value k = rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxY);
+    Value IndxX = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivX);
+    Value i =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxX);
 
-    Value IndxX = rewriter.create<arith::IndexCastUIOp>(loc, rewriter.getIntegerType(32), ivX);
-    Value i = rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxX);
+    // get 2*pi * k * i / N
+    Value muli_k = rewriter.create<arith::MulFOp>(loc, k, i);
 
-    //get 2*pi * k * i / N
-    Value muli_k =  rewriter.create<arith::MulFOp>(loc, k , i);
-    
-    Value const2pi = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(6.28318530718));
-    Value mul2piKI = rewriter.create<arith::MulFOp>(loc, const2pi , muli_k);  
+    Value const2pi = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(6.28318530718));
+    Value mul2piKI = rewriter.create<arith::MulFOp>(loc, const2pi, muli_k);
 
     // getOperand().getType()
-    // auto inputTensorType = llvm::cast<RankedTensorType>(op->getOperand(0).getType());
-    float LengthOfInput = (float) ub;
-    Value N = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(LengthOfInput));
+    // auto inputTensorType =
+    // llvm::cast<RankedTensorType>(op->getOperand(0).getType());
+    float LengthOfInput = (float)ub;
+    Value N = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(LengthOfInput));
     // Value N = inputTensorType.getShape()[0];
 
-    Value divIndxByN = rewriter.create<arith::DivFOp>(loc, mul2piKI, N )  ;     
-    
+    Value divIndxByN = rewriter.create<arith::DivFOp>(loc, mul2piKI, N);
+
     // Img part = -1 * Sum(x[i] * sin(div) )
     Value GetSin = rewriter.create<math::SinOp>(loc, divIndxByN);
-    Value xMulSin = rewriter.create<arith::MulFOp>(loc, inputX , GetSin);   
-    Value imgSum = rewriter.create<arith::SubFOp>(loc, getIterArg ,xMulSin) ;
+    Value xMulSin = rewriter.create<arith::MulFOp>(loc, inputX, GetSin);
+    Value imgSum = rewriter.create<arith::SubFOp>(loc, getIterArg, xMulSin);
 
     rewriter.create<AffineYieldOp>(loc, ValueRange{imgSum});
     rewriter.setInsertionPointAfter(forOpX);
-    
-    //store imgSum at y[k]
-    rewriter.create<AffineStoreOp>(loc, forOpX.getResult(0), alloc_img, ValueRange{ivY}); 
-
-    //store -1 * imgSum at y[N-k]
-    AffineExpr ExprNminusK = rewriter.getAffineConstantExpr(ub) - rewriter.getAffineDimExpr(0);
-    AffineMap mapNminusK = AffineMap::get(1, 0 , ExprNminusK);
-    Value constMinus1 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(-1));
-    Value NegImgSum = rewriter.create<arith::MulFOp>(loc, constMinus1 , forOpX.getResult(0));
- 
-    rewriter.create<AffineStoreOp>(loc, NegImgSum, alloc_img, mapNminusK, ValueRange{ivY});
+
+    // store imgSum at y[k]
+    rewriter.create<AffineStoreOp>(loc, forOpX.getResult(0), alloc_img,
+                                   ValueRange{ivY});
+
+    // store -1 * imgSum at y[N-k]
+    AffineExpr ExprNminusK =
+        rewriter.getAffineConstantExpr(ub) - rewriter.getAffineDimExpr(0);
+    AffineMap mapNminusK = AffineMap::get(1, 0, ExprNminusK);
+    Value constMinus1 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(-1));
+    Value NegImgSum =
+        rewriter.create<arith::MulFOp>(loc, constMinus1, forOpX.getResult(0));
+
+    rewriter.create<AffineStoreOp>(loc, NegImgSum, alloc_img, mapNminusK,
+                                   ValueRange{ivY});
 
     rewriter.setInsertionPointAfter(forOpY);
-    //debug
-    // forOpX->dump();
-    // forOpY->dump();
-        // affine.for %y = 0 to 4 {
-        //     affine.store %cst_3, %alloc_real[%y] : memref<4xf64>
-        //     affine.store %cst_3, %alloc_img[%y] : memref<4xf64>
-        // }
-
-
-        // affine.for %y = 0 to 4 {
-        // //   %0 = affine.load %alloc_3[%arg0] : memref<4xf64>
-        // //   affine.store %0, %alloc_real[%arg0] : memref<4xf64>
-        // affine.for %x = 0 to 4 {
-        //     // CAcluations
-        //           %1 = affine.load %alloc_3[%x] : memref<4xf64>
-        //           %2 = affine.load %alloc_real[%y] : memref<4xf64>
-        //           %3 = affine.load %alloc_img[%y] : memref<4xf64>
-        //           // index cast for multiply 
-        //           %4 = arith.index_castui %y : index to i32
-        //           %k = arith.uitofp %4 : i32 to f64
-        //           %6 = arith.index_castui %x : index to i32
-        //           %i = arith.uitofp %6 : i32 to f64
-        //         //   %8 = arith.index_castui %arg3 : index to i32
-        //         //   %9 = arith.uitofp %8 : i32 to f64
-        //         //   %10 = arith.index_castui %arg4 : index to i32
-        //         //   %11 = arith.uitofp %10 : i32 to f64
-                
-        //           %mul_1 = arith.mulf %i, %k : f64
-        //           %mul = arith.mulf %mul_1, %cst_2pi : f64
-        //         //  ixk / N
-        //           %div = arith.divf %mul, %N : f64
-        //         //   cos of the above
-        //           %res_cos = math.cos %div : f64
-        //         //   %16 = arith.addf %14, %15 : f64
-        //         //   %res_sin = arith.mulf %16, %cst_0 : f64
-                 
-        //           %res_sin = math.sin %div : f64
-        //           %real_prod = arith.mulf %1, %res_cos : f64
-        //           %img_prod_1 = arith.mulf %1, %res_sin : f64
-        //           %img_prod = arith.mulf %cst_5, %img_prod_1 : f64
-
-        //           %real = arith.addf %2, %real_prod : f64
-        //           %img = arith.addf %3, %img_prod : f64
-        //           affine.store %real, %alloc_real[%y] : memref<4xf64>
-        //         //    dsp.print %alloc_real : memref<4xf64>
-        //           affine.store %img, %alloc_img[%y] : memref<4xf64>
-
-        // }
-        // }
+    // debug
+    //  forOpX->dump();
+    //  forOpY->dump();
+    //  affine.for %y = 0 to 4 {
+    //      affine.store %cst_3, %alloc_real[%y] : memref<4xf64>
+    //      affine.store %cst_3, %alloc_img[%y] : memref<4xf64>
+    //  }
+
+    // affine.for %y = 0 to 4 {
+    // //   %0 = affine.load %alloc_3[%arg0] : memref<4xf64>
+    // //   affine.store %0, %alloc_real[%arg0] : memref<4xf64>
+    // affine.for %x = 0 to 4 {
+    //     // CAcluations
+    //           %1 = affine.load %alloc_3[%x] : memref<4xf64>
+    //           %2 = affine.load %alloc_real[%y] : memref<4xf64>
+    //           %3 = affine.load %alloc_img[%y] : memref<4xf64>
+    //           // index cast for multiply
+    //           %4 = arith.index_castui %y : index to i32
+    //           %k = arith.uitofp %4 : i32 to f64
+    //           %6 = arith.index_castui %x : index to i32
+    //           %i = arith.uitofp %6 : i32 to f64
+    //         //   %8 = arith.index_castui %arg3 : index to i32
+    //         //   %9 = arith.uitofp %8 : i32 to f64
+    //         //   %10 = arith.index_castui %arg4 : index to i32
+    //         //   %11 = arith.uitofp %10 : i32 to f64
+
+    //           %mul_1 = arith.mulf %i, %k : f64
+    //           %mul = arith.mulf %mul_1, %cst_2pi : f64
+    //         //  ixk / N
+    //           %div = arith.divf %mul, %N : f64
+    //         //   cos of the above
+    //           %res_cos = math.cos %div : f64
+    //         //   %16 = arith.addf %14, %15 : f64
+    //         //   %res_sin = arith.mulf %16, %cst_0 : f64
+
+    //           %res_sin = math.sin %div : f64
+    //           %real_prod = arith.mulf %1, %res_cos : f64
+    //           %img_prod_1 = arith.mulf %1, %res_sin : f64
+    //           %img_prod = arith.mulf %cst_5, %img_prod_1 : f64
+
+    //           %real = arith.addf %2, %real_prod : f64
+    //           %img = arith.addf %3, %img_prod : f64
+    //           affine.store %real, %alloc_real[%y] : memref<4xf64>
+    //         //    dsp.print %alloc_real : memref<4xf64>
+    //           affine.store %img, %alloc_img[%y] : memref<4xf64>
+
+    // }
+    // }
     // rewriter.replaceOp(op, alloc_real);
     rewriter.replaceOp(op, alloc_img);
-    
+
     return success();
   }
 };
 
-
 //===----------------------------------------------------------------------===//
 // ToyToAffine RewritePatterns: FFT1DRealSymmOp operations
 //===----------------------------------------------------------------------===//
 
-
 struct FFT1DRealSymmOpLowering : public ConversionPattern {
   FFT1DRealSymmOpLowering(MLIRContext *ctx)
       : ConversionPattern(dsp::FFT1DRealSymmOp::getOperationName(), 1, ctx) {}
@@ -1188,164 +1237,151 @@ struct FFT1DRealSymmOpLowering : public ConversionPattern {
   matchAndRewrite(Operation *op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const final {
     auto loc = op->getLoc();
-    
-    //Pseudo-code:
-     // y[k] = sumOver_n(x[n]*cos[2*pi * k *n/N ] , 0<=k < (N+1)/2
-     //        & y[N-k] = y[k]  (N+1)/2<= k< N
-      // For k=0: 
-        //sum=0
-        // for n= 0 to N
-            //sum = sum + x[n]
-        //y[0] = sum
-
-      // for k=1 to (N+1)/2
-          // sum = 0
-          // for n=0 to N
-              // sum = sum + x[n] * cos(2*pi*k*n/N)
-          //y[k] = sum
-          //y[N-k] = sum
-
-      //Actual definition
-        //  y[k] = y_real[k] + j *y_img[k] 
-        // y_real = sumOver_n(x[n]*cos[2*pi * k *n/N ] 
-        // y_img = sumOver_n(x[n]*sin[2*pi * k *n/N ] * -1
-        //init  output mem for y_real & y_img as 0 
-        // replace this upsampling op with the output_mem_allocation op
 
+    // Pseudo-code:
+    //  y[k] = sumOver_n(x[n]*cos[2*pi * k *n/N ] , 0<=k < (N+1)/2
+    //         & y[N-k] = y[k]  (N+1)/2<= k< N
+    //  For k=0:
+    // sum=0
+    //  for n= 0 to N
+    // sum = sum + x[n]
+    // y[0] = sum
 
+    // for k=1 to (N+1)/2
+    // sum = 0
+    // for n=0 to N
+    // sum = sum + x[n] * cos(2*pi*k*n/N)
+    // y[k] = sum
+    // y[N-k] = sum
+
+    // Actual definition
+    //   y[k] = y_real[k] + j *y_img[k]
+    //  y_real = sumOver_n(x[n]*cos[2*pi * k *n/N ]
+    //  y_img = sumOver_n(x[n]*sin[2*pi * k *n/N ] * -1
+    // init  output mem for y_real & y_img as 0
+    //  replace this upsampling op with the output_mem_allocation op
 
     // DEBUG_PRINT_NO_ARGS() ;
 
-    //output for result type
-    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));  
-    //iterate to result1 --not needed for now but for future reference  
-    // auto tensorType1 =  llvm::cast<RankedTensorType>(*std::next(op->result_type_begin(), 1));
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    // iterate to result1 --not needed for now but for future reference
+    //  auto tensorType1 =
+    //  llvm::cast<RankedTensorType>(*std::next(op->result_type_begin(), 1));
+
+    // DEBUG_PRINT_NO_ARGS() ;
+    // tensorType.getShape()[0]
+    // llvm::errs() << "tensorType1.getShape()[0] " << tensorType1.getShape()[0]
+    // << " func= " << __func__ << "\n";
 
-    // DEBUG_PRINT_NO_ARGS() ; 
-    //tensorType.getShape()[0]
-    // llvm::errs() << "tensorType1.getShape()[0] " << tensorType1.getShape()[0] << " func= " << __func__ << "\n"; 
-    
-    //allocation & deallocation for the result of this operation
+    // allocation & deallocation for the result of this operation
     auto memRefType = convertTensorToMemRef(tensorType);
     // auto memRefType2 = convertTensorToMemRef(tensorType1);
     auto alloc_real = insertAllocAndDealloc(memRefType, loc, rewriter);
-    
-    //construct affine loops for the input
-    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/0);
-    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);    
 
-    // affine.for %y = 0 to 4 {
-        //     affine.store %cst_3, %alloc_real[%y] : memref<4xf64>
-        //     affine.store %cst_3, %alloc_img[%y] : memref<4xf64>
-        // }
-    Value constant0 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(0));
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
 
+    // affine.for %y = 0 to 4 {
+    //     affine.store %cst_3, %alloc_real[%y] : memref<4xf64>
+    //     affine.store %cst_3, %alloc_img[%y] : memref<4xf64>
+    // }
+    Value constant0 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
 
-    //For loop -- iterate from 1 to last
-    int64_t lb = 0 ;
+    // For loop -- iterate from 1 to last
+    int64_t lb = 0;
     int64_t ub = tensorType.getShape()[0];
-    int64_t ubBy2 = (ub+1)/2;
+    int64_t ubBy2 = (ub + 1) / 2;
     int64_t step = 1;
 
-    //load from X, & y1 & y2
-    FFT1DRealSymmOpAdaptor fft1DRealSymmAdaptor(operands);
-
-    // affine::AffineForOp forOp1 = rewriter.create<AffineForOp>(loc, lb, ub, step);
-    // auto iv = forOp1.getInductionVar();
+    // affine::AffineForOp forOp1 = rewriter.create<AffineForOp>(loc, lb, ub,
+    // step); auto iv = forOp1.getInductionVar();
     // rewriter.setInsertionPointToStart(forOp1.getBody());
-    // rewriter.create<AffineStoreOp>(loc, constant0, alloc_real, ValueRange{iv});
-    // rewriter.setInsertionPointAfter(forOp1);
-
-    //k=0
-    //sum=0
-    // for n= 0 to N
-        //sum = sum + x[n]
-    //y[0] = sum
-    affine::AffineForOp forOp2 = rewriter.create<affine::AffineForOp>(loc, 
-                lb, ub, step , ValueRange{constant0});
-    auto iv2 = forOp2.getInductionVar();
-    rewriter.setInsertionPointToStart(forOp2.getBody());
-    //get previous sum
-    auto getIterArg1 =  forOp2.getBody()->getArgument(1);
-    Value loadX = rewriter.create<AffineLoadOp>(loc, fft1DRealSymmAdaptor.getInput(), ValueRange{iv2});
-    Value sumNext1 = rewriter.create<arith::AddFOp>(loc, loadX, getIterArg1);
-    rewriter.create<AffineYieldOp>(loc, ValueRange{sumNext1});
-    rewriter.setInsertionPointAfter(forOp2);
-
-    //store result for k=0
+    // rewriter.create<AffineStoreOp>(loc, constant0, alloc_real,
+    // ValueRange{iv}); rewriter.setInsertionPointAfter(forOp1);
+    DEBUG_PRINT_NO_ARGS();
+    // for k=0
     Value Indx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
-    rewriter.create<AffineStoreOp>(loc, forOp2.getResult(0), alloc_real, ValueRange{Indx0});
+    rewriter.create<AffineStoreOp>(loc, constant0, alloc_real,
+                                   ValueRange{Indx0});
 
-    // for k=1 to (N+1)/2
-          // sum = 0
-          // for n=0 to N
-              // sum = sum + x[n] * cos(2*pi*k*n/N)
-          //y[k] = sum
-          //y[N-k] = sum
-    //loop for Y ie, k
-    affine::AffineForOp forOpY = rewriter.create<AffineForOp>(loc, lb+1, ubBy2, step);
+    // loop for Y
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb + 1, ubBy2, step);
     auto ivY = forOpY.getInductionVar();
     rewriter.setInsertionPointToStart(forOpY.getBody());
 
-    //loop for X
-    affine::AffineForOp forOpX = rewriter.create<AffineForOp>(loc, lb, ub, step, ValueRange{constant0});
+    // loop for X
+    affine::AffineForOp forOpX =
+        rewriter.create<AffineForOp>(loc, lb, ub, step, ValueRange{constant0});
     auto ivX = forOpX.getInductionVar();
-    //get sum
-    auto getIterArg =  forOpX.getBody()->getArgument(1);
+    auto getIterArg = forOpX.getBody()->getArgument(1);
     rewriter.setInsertionPointToStart(forOpX.getBody());
 
-    //load from X, & y1 & y2
-    Value inputX = rewriter.create<AffineLoadOp>(loc, fft1DRealSymmAdaptor.getInput(), ValueRange{ivX});
-    // Value loadYReal = rewriter.create<AffineLoadOp>(loc, alloc_real, ValueRange{ivY});
-    
-    //convert index to f64
-    Value IndxY = rewriter.create<arith::IndexCastUIOp>(loc, rewriter.getIntegerType(32), ivY);
-    Value k = rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxY);
-
-    Value IndxX = rewriter.create<arith::IndexCastUIOp>(loc, rewriter.getIntegerType(32), ivX);
-    Value i = rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxX);
-
-    //get 2*pi * k * i / N
-    Value muli_k =  rewriter.create<arith::MulFOp>(loc, k , i);
-    
-    Value const2pi = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(6.28318530718));
-    Value mul2piKI = rewriter.create<arith::MulFOp>(loc, const2pi , muli_k);  
-
-    float LengthOfInput = (float) ub;
-    Value N = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(LengthOfInput));
-    // Value N = inputTensorType.getShape()[0];
+    // load from X, & y1 & y2
+    FFT1DRealSymmOpAdaptor fft1DRealSymmAdaptor(operands);
+    Value inputX = rewriter.create<AffineLoadOp>(
+        loc, fft1DRealSymmAdaptor.getInput(), ValueRange{ivX});
+    // Value loadYImg = rewriter.create<AffineLoadOp>(loc, alloc_img,
+    // ValueRange{ivY});
 
-    Value divIndxByN = rewriter.create<arith::DivFOp>(loc, mul2piKI, N )  ;     
+    // convert index to f64
+    Value IndxY = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivY);
+    Value k =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxY);
 
-    // Real part = Sum(x[i] * cos(div) )
-    Value GetCos = rewriter.create<math::CosOp>(loc, divIndxByN);
-    Value xMulCos = rewriter.create<arith::MulFOp>(loc, inputX , GetCos);  
+    Value IndxX = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivX);
+    Value i =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxX);
 
-    //realSu 
-    Value sumNext = rewriter.create<arith::AddFOp>(loc, getIterArg ,xMulCos) ;
-    // rewriter.create<AffineStoreOp>(loc, sumNext, alloc_real, ValueRange{ivX}); 
-    
-    // DEBUG_PRINT_NO_ARGS() ;
+    // get 2*pi * k * i / N
+    Value muli_k = rewriter.create<arith::MulFOp>(loc, k, i);
+
+    Value const2pi = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(6.28318530718));
+    Value mul2piKI = rewriter.create<arith::MulFOp>(loc, const2pi, muli_k);
+
+    // getOperand().getType()
+    // auto inputTensorType =
+    // llvm::cast<RankedTensorType>(op->getOperand(0).getType());
+    float LengthOfInput = (float)ub;
+    Value N = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(LengthOfInput));
+    // Value N = inputTensorType.getShape()[0];
+
+    Value divIndxByN = rewriter.create<arith::DivFOp>(loc, mul2piKI, N);
+
+    Value GetCos = rewriter.create<math::CosOp>(loc, divIndxByN);
+    Value xMulCos = rewriter.create<arith::MulFOp>(loc, inputX, GetCos);
+
+    // realSu
+    Value sumNext = rewriter.create<arith::AddFOp>(loc, getIterArg, xMulCos);
+    // rewriter.create<AffineStoreOp>(loc, sumNext, alloc_real,
+    // ValueRange{ivX});
+
+    // DEBUG_PRINT_NO_ARGS() ;
     rewriter.create<AffineYieldOp>(loc, ValueRange{sumNext});
     rewriter.setInsertionPointAfter(forOpX);
     // forOpX->dump();
-    //store realSum at y[k] 
-    rewriter.create<AffineStoreOp>(loc, forOpX.getResult(0) , alloc_real, ValueRange{ivY});
+    // store realSum at y[k]
+    rewriter.create<AffineStoreOp>(loc, forOpX.getResult(0), alloc_real,
+                                   ValueRange{ivY});
 
-    //store realSum at y[N-k]
-    AffineExpr ExprNminusK = rewriter.getAffineConstantExpr(ub) - rewriter.getAffineDimExpr(0);
-    AffineMap mapNminusK = AffineMap::get(1, 0 , ExprNminusK);
-    rewriter.create<AffineStoreOp>(loc, forOpX.getResult(0), alloc_real, mapNminusK, ValueRange{ivY});
+    // store realSum at y[N-k]
+    AffineExpr ExprNminusK =
+        rewriter.getAffineConstantExpr(ub) - rewriter.getAffineDimExpr(0);
+    AffineMap mapNminusK = AffineMap::get(1, 0, ExprNminusK);
+
+    rewriter.create<AffineStoreOp>(loc, forOpX.getResult(0), alloc_real,
+                                   mapNminusK, ValueRange{ivY});
 
     rewriter.setInsertionPointAfter(forOpY);
-    //debug
-    // forOpX->dump();
-    // forOpY->dump();
     rewriter.replaceOp(op, alloc_real);
-    
+
     return success();
   }
 };
@@ -1353,138 +1389,150 @@ struct FFT1DRealSymmOpLowering : public ConversionPattern {
 //===----------------------------------------------------------------------===//
 // ToyToAffine RewritePatterns: FIRFilterYSymmOptimizedOp operations
 //===----------------------------------------------------------------------===//
-struct FIRFilterYSymmOptimizedOpLowering: public ConversionPattern {
-      FIRFilterYSymmOptimizedOpLowering(MLIRContext *ctx)
-        : ConversionPattern(dsp::FIRFilterYSymmOptimizedOp::getOperationName(), 1 , ctx) {}
-
-    LogicalResult 
-    matchAndRewrite(Operation *op, ArrayRef<Value> operands,
-              ConversionPatternRewriter &rewriter) const final {
-      //dsp.FIRFilterYSymmOptimizedOp has 2 operands -- both of type tensor f64 
-
-      //Get the location of FIRFilterYSymmOptimizedOp
-      auto loc = op->getLoc();
-      
-    //output for result type
-    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));    
-    
-    //allocation & deallocation for the result of this operation
+struct FIRFilterYSymmOptimizedOpLowering : public ConversionPattern {
+  FIRFilterYSymmOptimizedOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::FIRFilterYSymmOptimizedOp::getOperationName(), 1,
+                          ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    // dsp.FIRFilterYSymmOptimizedOp has 2 operands -- both of type tensor f64
+
+    // Get the location of FIRFilterYSymmOptimizedOp
+    auto loc = op->getLoc();
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
     auto memRefType = convertTensorToMemRef(tensorType);
     auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
-    //Pseudo-code:
-        //N=lenY , M=lenX here, output is symm ie, y[n] = y[N-1-n]
-        //y[n] = x[n] conv x[-n] ie, x[M-1-n] ie, x2[n]
-        //y[n] = SumOverAllk x[k] * x2[n-k]  , 0<=k<M  , 0<=n<N
-        //     = SumOverAllk x[k] * x[M-1-(n-k)] , check for 0<=M+k-1-n<M
-
-        //code:
-            //for n=0 to (N+1)/2
-                // sum =0
-                // for k=0 to M
-                    // if( 0<= M+k-n-1 <M)
-                        // sum = sum + x[k] * x[M+k-n-1]
-                        //return sum
-                //y[n]= sum
-                //y[N-1-n] = sum
- 
-
-    int64_t lb = 0 ;
+    // Pseudo-code:
+    // N=lenY , M=lenX here, output is symm ie, y[n] = y[N-1-n]
+    // y[n] = x[n] conv x[-n] ie, x[M-1-n] ie, x2[n]
+    // y[n] = SumOverAllk x[k] * x2[n-k]  , 0<=k<M  , 0<=n<N
+    //      = SumOverAllk x[k] * x[M-1-(n-k)] , check for 0<=M+k-1-n<M
+
+    // code:
+    // for n=0 to (N+1)/2
+    //  sum =0
+    //  for k=0 to M
+    //  if( 0<= M+k-n-1 <M)
+    //  sum = sum + x[k] * x[M+k-n-1]
+    // return sum
+    // y[n]= sum
+    // y[N-1-n] = sum
+
+    int64_t lb = 0;
     int64_t ub = tensorType.getShape()[0];
-    int ubBy2 = (ub+1)/2;
+    int ubBy2 = (ub + 1) / 2;
     int64_t step = 1;
     DEBUG_PRINT_NO_ARGS();
-    affine::AffineForOp forOp1 = rewriter.create<affine::AffineForOp>(loc, 
-                lb, ubBy2, step );
+    affine::AffineForOp forOp1 =
+        rewriter.create<affine::AffineForOp>(loc, lb, ubBy2, step);
     rewriter.setInsertionPointToStart(forOp1.getBody());
     auto iv = forOp1.getInductionVar();
 
-    //for n=0 to N
-            // sum = 0, temp =0
-    //for n=0 to (N+1)/2
-                // sum =0
-    //get filter len 
+    // for n=0 to N
+    //  sum = 0, temp =0
+    // for n=0 to (N+1)/2
+    //  sum =0
+    // get filter len
     auto operandIt = op->operand_type_begin();
     auto tensorTypeInput = llvm::cast<RankedTensorType>(*operandIt);
     int64_t ubForInput = tensorTypeInput.getShape()[0];
     DEBUG_PRINT_NO_ARGS();
-    DEBUG_PRINT_WITH_ARGS("ubForInput=" , ubForInput );
+    DEBUG_PRINT_WITH_ARGS("ubForInput=", ubForInput);
 
-    //create a constant for sum
-    Value constant0 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
-    affine::AffineForOp forOp2 = rewriter.create<affine::AffineForOp>(loc, 
-                lb, ubForInput, step , ValueRange{constant0});
+    // create a constant for sum
+    Value constant0 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    affine::AffineForOp forOp2 = rewriter.create<affine::AffineForOp>(
+        loc, lb, ubForInput, step, ValueRange{constant0});
     rewriter.setInsertionPointToStart(forOp2.getBody());
     auto iv2 = forOp2.getInductionVar();
-    //get sum
-    auto getIterArg =  forOp2.getBody()->getArgument(1); 
+    // get sum
+    auto getIterArg = forOp2.getBody()->getArgument(1);
     DEBUG_PRINT_NO_ARGS();
     FIRFilterYSymmOptimizedOpAdaptor firFilterYSymmOpAdaptor(operands);
 
-    
     // if( 0<= M+k-n-1 <M)
-            // sum = sum + x[k] * x[M+k-n-1]
-    //For M+k-n-1 
-    //LowerBoundSet: M+k-n-1 >=0  ie, 2 dimensions =n & k 
-    //UpperBoundSet: M+k-n-1 <= M-1 ie, n-k>=0
-    
-    //LowerBound Expr: M+k-n-1 >=0 ie, M-1 + k -n >= 0
-    AffineExpr ExprLowerBound = rewriter.getAffineConstantExpr(ubForInput - 1) + rewriter.getAffineDimExpr(1) -
-                rewriter.getAffineDimExpr(0);
-    //UpperBoundSet: M+k-n-1 <= M-1 ie, n-k>=0
-    AffineExpr ExprUpperBound = rewriter.getAffineDimExpr(0) - rewriter.getAffineDimExpr(1) ;
-    IntegerSet setForIf = IntegerSet::get(2,0, {ExprLowerBound , ExprUpperBound}, {false, false});
+    // sum = sum + x[k] * x[M+k-n-1]
+    // For M+k-n-1
+    // LowerBoundSet: M+k-n-1 >=0  ie, 2 dimensions =n & k
+    // UpperBoundSet: M+k-n-1 <= M-1 ie, n-k>=0
+
+    // LowerBound Expr: M+k-n-1 >=0 ie, M-1 + k -n >= 0
+    AffineExpr ExprLowerBound = rewriter.getAffineConstantExpr(ubForInput - 1) +
+                                rewriter.getAffineDimExpr(1) -
+                                rewriter.getAffineDimExpr(0);
+    // UpperBoundSet: M+k-n-1 <= M-1 ie, n-k>=0
+    AffineExpr ExprUpperBound =
+        rewriter.getAffineDimExpr(0) - rewriter.getAffineDimExpr(1);
+    IntegerSet setForIf =
+        IntegerSet::get(2, 0, {ExprLowerBound, ExprUpperBound}, {false, false});
     DEBUG_PRINT_NO_ARGS();
 
     // if( 0<= M+k-n-1 <M)
     Type floatType = rewriter.getF64Type();
-    auto ifOp = rewriter.create<affine::AffineIfOp>( loc, TypeRange{floatType}, setForIf , ValueRange{iv,iv2} , true /*else*/ ); 
+    auto ifOp =
+        rewriter.create<affine::AffineIfOp>(loc, TypeRange{floatType}, setForIf,
+                                            ValueRange{iv, iv2}, true /*else*/);
     rewriter.setInsertionPointToStart(ifOp.getThenBlock());
     DEBUG_PRINT_NO_ARGS();
 
     // sum = sum + x[k] * x[M+k-n-1]
-    //load x[M+k-n-1]
-    AffineMap mapMPlusKMinusNmin1 = AffineMap::get(2, 0 , ExprLowerBound);
-    Value loadInputIndx2 = rewriter.create<AffineLoadOp>(loc, firFilterYSymmOpAdaptor.getLhs(), mapMPlusKMinusNmin1 , ValueRange{iv,iv2});
+    // load x[M+k-n-1]
+    AffineMap mapMPlusKMinusNmin1 = AffineMap::get(2, 0, ExprLowerBound);
+    Value loadInputIndx2 =
+        rewriter.create<AffineLoadOp>(loc, firFilterYSymmOpAdaptor.getLhs(),
+                                      mapMPlusKMinusNmin1, ValueRange{iv, iv2});
     rewriter.create<AffineYieldOp>(loc, ValueRange{loadInputIndx2});
 
-    //else return 0
+    // else return 0
     rewriter.setInsertionPointToStart(ifOp.getElseBlock());
-    Value const0ForElse = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    Value const0ForElse = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
     rewriter.create<AffineYieldOp>(loc, ValueRange{const0ForElse});
     rewriter.setInsertionPointAfter(ifOp);
 
-    //outside if 
-    //Now, sum = sum + val2 * x[k]
-    Value loadX = rewriter.create<AffineLoadOp>(loc, firFilterYSymmOpAdaptor.getLhs(), ValueRange{iv2});
+    // outside if
+    // Now, sum = sum + val2 * x[k]
+    Value loadX = rewriter.create<AffineLoadOp>(
+        loc, firFilterYSymmOpAdaptor.getLhs(), ValueRange{iv2});
     DEBUG_PRINT_NO_ARGS();
 
-    //x[k] * x[M+k-n-1]   here, val2 = x[M+k-n-1]
-    Value XMulReverseXIndx = rewriter.create<arith::MulFOp>(loc, loadX , ifOp.getResult(0));
-    //sum = sum + x[k] * x[M+k-n-1]
-    Value sumNext = rewriter.create<arith::AddFOp>(loc, XMulReverseXIndx, getIterArg);
+    // x[k] * x[M+k-n-1]   here, val2 = x[M+k-n-1]
+    Value XMulReverseXIndx =
+        rewriter.create<arith::MulFOp>(loc, loadX, ifOp.getResult(0));
+    // sum = sum + x[k] * x[M+k-n-1]
+    Value sumNext =
+        rewriter.create<arith::AddFOp>(loc, XMulReverseXIndx, getIterArg);
     rewriter.create<AffineYieldOp>(loc, ValueRange{sumNext});
-    
+
     DEBUG_PRINT_NO_ARGS();
     rewriter.setInsertionPointAfter(forOp2);
     // forOp2->dump();
     DEBUG_PRINT_NO_ARGS();
 
-    //y[n] = sum ie, y[n] = sumNext 
-    rewriter.create<AffineStoreOp>(loc, forOp2.getResult(0) , alloc, iv);
-    //y[N-1-n] = sum
-    AffineExpr ExprNminus1minYn = rewriter.getAffineConstantExpr(ub - 1) - rewriter.getAffineDimExpr(0);
-    AffineMap mapNminus1minYn = AffineMap::get(1, 0 , ExprNminus1minYn);
+    // y[n] = sum ie, y[n] = sumNext
+    rewriter.create<AffineStoreOp>(loc, forOp2.getResult(0), alloc, iv);
+    // y[N-1-n] = sum
+    AffineExpr ExprNminus1minYn =
+        rewriter.getAffineConstantExpr(ub - 1) - rewriter.getAffineDimExpr(0);
+    AffineMap mapNminus1minYn = AffineMap::get(1, 0, ExprNminus1minYn);
 
-    rewriter.create<AffineStoreOp>(loc, forOp2.getResult(0) , alloc, mapNminus1minYn ,  ValueRange{iv});
+    rewriter.create<AffineStoreOp>(loc, forOp2.getResult(0), alloc,
+                                   mapNminus1minYn, ValueRange{iv});
     rewriter.setInsertionPointAfter(forOp1);
     DEBUG_PRINT_NO_ARGS();
-    
+
     rewriter.replaceOp(op, alloc);
     return success();
-    }
+  }
 };
 
-
 //===----------------------------------------------------------------------===//
 // ToyToAffine RewritePatterns: PaddingOp operations
 //===----------------------------------------------------------------------===//
@@ -1497,93 +1545,97 @@ struct PaddingOpLowering : public ConversionPattern {
   matchAndRewrite(Operation *op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const final {
     auto loc = op->getLoc();
-    
-    //Pseudo-code:
-      //  y[n] = x[n]  0<=n<N
-      //  y[n] = val  N<=n < N+len
-      //ie,
-        //for i=0 to N --inputLen
-            //y[n] = x[n]
-        //for i=N to N+len
-            //y[n] = val
-
-
-    //output for result type
-    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));  
-     
-    //allocation & deallocation for the result of this operation
+
+    // Pseudo-code:
+    //   y[n] = x[n]  0<=n<N
+    //   y[n] = val  N<=n < N+len
+    // ie,
+    // for i=0 to N --inputLen
+    // y[n] = x[n]
+    // for i=N to N+len
+    // y[n] = val
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
     auto memRefType = convertTensorToMemRef(tensorType);
     auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
-    
+
     DEBUG_PRINT_NO_ARGS();
-    //construct affine loops for the input
+    // construct affine loops for the input
     PaddingOpAdaptor paddingOpAdaptor(operands);
     Value GetPadLenOperand = op->getOperand(2);
-    dsp::ConstantOp constantOp3rdArg = GetPadLenOperand.getDefiningOp<dsp::ConstantOp>();
+    dsp::ConstantOp constantOp3rdArg =
+        GetPadLenOperand.getDefiningOp<dsp::ConstantOp>();
 
-    if(!constantOp3rdArg){
+    if (!constantOp3rdArg) {
       llvm::errs() << "Fail:padding op 3rd operand is not constant\n";
       return failure();
     }
-    DenseElementsAttr constant3rdValue = constantOp3rdArg.getValue();;
+    DenseElementsAttr constant3rdValue = constantOp3rdArg.getValue();
+    ;
     auto elements1 = constant3rdValue.getValues<FloatAttr>();
     float Padlen = elements1[0].getValueAsDouble();
-    DEBUG_PRINT_WITH_ARGS("Padlen is" , Padlen);
-    //first from 0 <= i < N
-    auto inputType = llvm::dyn_cast<RankedTensorType>(op->getOperand(0).getType());
-    int64_t lb = 0 ;
-    int64_t ub = inputType.getShape()[0];   
+    DEBUG_PRINT_WITH_ARGS("Padlen is", Padlen);
+    // first from 0 <= i < N
+    auto inputType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(0).getType());
+    int64_t lb = 0;
+    int64_t ub = inputType.getShape()[0];
     int64_t step = 1;
 
-    DEBUG_PRINT_NO_ARGS(); 
-  
-    //loop from 0 <= i < N
-    affine::AffineForOp forOpY = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    DEBUG_PRINT_NO_ARGS();
+
+    // loop from 0 <= i < N
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
     auto ivY = forOpY.getInductionVar();
     rewriter.setInsertionPointToStart(forOpY.getBody());
-    Value InputX = rewriter.create<AffineLoadOp>(loc, paddingOpAdaptor.getInput(), ivY);
-    rewriter.create<AffineStoreOp>(loc, InputX, alloc, ivY); 
+    Value InputX =
+        rewriter.create<AffineLoadOp>(loc, paddingOpAdaptor.getInput(), ivY);
+    rewriter.create<AffineStoreOp>(loc, InputX, alloc, ivY);
     rewriter.setInsertionPointAfter(forOpY);
 
-    //loop from N to N+PadLen
+    // loop from N to N+PadLen
     int64_t lb2 = ub;
-    int64_t ub2 = ub + (int64_t) Padlen;
+    int64_t ub2 = ub + (int64_t)Padlen;
 
-    affine::AffineForOp forOp2 = rewriter.create<AffineForOp>(loc, lb2, ub2, step);
+    affine::AffineForOp forOp2 =
+        rewriter.create<AffineForOp>(loc, lb2, ub2, step);
     auto iv2 = forOp2.getInductionVar();
     rewriter.setInsertionPointToStart(forOp2.getBody());
-    Value PaddingValue = rewriter.create<AffineLoadOp>(loc, paddingOpAdaptor.getPadValue(), ValueRange{}); //getPadValue
-    rewriter.create<AffineStoreOp>(loc, PaddingValue, alloc, iv2); 
+    Value PaddingValue = rewriter.create<AffineLoadOp>(
+        loc, paddingOpAdaptor.getPadValue(), ValueRange{}); // getPadValue
+    rewriter.create<AffineStoreOp>(loc, PaddingValue, alloc, iv2);
     rewriter.setInsertionPointAfter(forOp2);
 
-    //debug
-    // forOpX->dump();
-    // forOpY->dump();
-
+    // debug
+    //  forOpX->dump();
+    //  forOpY->dump();
+
+    // %cst = arith.constant 6.2831853071800001 : f64
+    // %cst_0 = arith.constant 4.600000e-01 : f64
+    // %cst_1 = arith.constant 5.400000e-01 : f64
+    // %cst_2 = arith.constant 4.000000e+00 : f64
+    // %alloc = memref.alloc() : memref<4xf64>
+    // %alloc_3 = memref.alloc() : memref<f64>
+    // affine.store %cst_2, %alloc_3[] : memref<f64>
+    // affine.for %arg0 = 0 to 4 {
+    //   %0 = arith.index_castui %arg0 : index to i32
+    //   %1 = arith.uitofp %0 : i32 to f64
+    //   %2 = arith.mulf %1, %cst : f64
+    //   %3 = arith.divf %2, %cst_2 : f64
+    //   %4 = math.cos %3 : f64
+    //   %5 = arith.mulf %4, %cst_0 : f64
+    //   %6 = arith.subf %cst_1, %5 : f64
+    //   affine.store %6, %alloc[%arg0] : memref<4xf64>
+    // }
 
-        // %cst = arith.constant 6.2831853071800001 : f64
-        // %cst_0 = arith.constant 4.600000e-01 : f64
-        // %cst_1 = arith.constant 5.400000e-01 : f64
-        // %cst_2 = arith.constant 4.000000e+00 : f64
-        // %alloc = memref.alloc() : memref<4xf64>
-        // %alloc_3 = memref.alloc() : memref<f64>
-        // affine.store %cst_2, %alloc_3[] : memref<f64>
-        // affine.for %arg0 = 0 to 4 {
-        //   %0 = arith.index_castui %arg0 : index to i32
-        //   %1 = arith.uitofp %0 : i32 to f64
-        //   %2 = arith.mulf %1, %cst : f64
-        //   %3 = arith.divf %2, %cst_2 : f64
-        //   %4 = math.cos %3 : f64
-        //   %5 = arith.mulf %4, %cst_0 : f64
-        //   %6 = arith.subf %cst_1, %5 : f64
-        //   affine.store %6, %alloc[%arg0] : memref<4xf64>
-        // }
-
-
-        // }
-        // }
+    // }
+    // }
     rewriter.replaceOp(op, alloc);
-      
+
     return success();
   }
 };
@@ -1600,70 +1652,68 @@ struct ReverseInputOpLowering : public ConversionPattern {
   matchAndRewrite(Operation *op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const final {
     auto loc = op->getLoc();
-    
-    //Pseudo-code:
-      //output = 0
-      //iterate for len = 0 to N
-      //  output[i] = a[N-1-i]
 
+    // Pseudo-code:
+    // output = 0
+    // iterate for len = 0 to N
+    //   output[i] = a[N-1-i]
+
+    DEBUG_PRINT_NO_ARGS();
 
-    DEBUG_PRINT_NO_ARGS() ;
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
 
-    //output for result type
-    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));    
-    
-    //allocation & deallocation for the result of this operation
+    // allocation & deallocation for the result of this operation
     auto memRefType = convertTensorToMemRef(tensorType);
     auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
 
-    //construct affine loops for the input
-    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/0);
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
     SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
 
-    
-    //For loop
+    // For loop
     ReverseInputOpAdaptor reverseInputOpAdaptor(operands);
     // DEBUG_PRINT_NO_ARGS() ;
-    
-    int64_t lb = 0 ;
+
+    int64_t lb = 0;
     int64_t ub = tensorType.getShape()[0];
     int64_t step = 1;
 
-    //for loop
-    affine::AffineForOp forOp1 = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    // for loop
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
     auto iv = forOp1.getInductionVar();
 
     rewriter.setInsertionPointToStart(forOp1.getBody());
-    
+
     // DEBUG_PRINT_NO_ARGS() ;
     //: N-1 - i
-    AffineExpr reverseIndxExpr = rewriter.getAffineConstantExpr(ub - 1) - rewriter.getAffineDimExpr(0);
+    AffineExpr reverseIndxExpr =
+        rewriter.getAffineConstantExpr(ub - 1) - rewriter.getAffineDimExpr(0);
 
     AffineMap addMap2 = AffineMap::get(1, 0, reverseIndxExpr);
-    //load x[N-1-i]
+    // load x[N-1-i]
     DEBUG_PRINT_NO_ARGS();
-    Value loadInputFrmReverseIndx = rewriter.create<AffineLoadOp>(loc, reverseInputOpAdaptor.getInput(), addMap2 , ValueRange{iv});
-
+    Value loadInputFrmReverseIndx = rewriter.create<AffineLoadOp>(
+        loc, reverseInputOpAdaptor.getInput(), addMap2, ValueRange{iv});
 
-     
-    //store the result at indx i
+    // store the result at indx i
     rewriter.create<AffineStoreOp>(loc, loadInputFrmReverseIndx, alloc, iv);
 
     rewriter.setInsertionPointAfter(forOp1);
-    //debug
-    // forOp1->dump();
-    //   affine.for %arg0 = 0 to 5 {
-    //   %0 = affine.load %alloc_6[%arg0] : memref<5xf64>
-    //   %1 = arith.mulf %0, %0 : f64
-    //   affine.store %1, %alloc_5[%arg0] : memref<5xf64>
-    // }
+    // debug
+    //  forOp1->dump();
+    //    affine.for %arg0 = 0 to 5 {
+    //    %0 = affine.load %alloc_6[%arg0] : memref<5xf64>
+    //    %1 = arith.mulf %0, %0 : f64
+    //    affine.store %1, %alloc_5[%arg0] : memref<5xf64>
+    //  }
     rewriter.replaceOp(op, alloc);
-    
+
     return success();
   }
 };
 
-
 //===----------------------------------------------------------------------===//
 // ToyToAffine RewritePatterns: LengthOp operations
 //===----------------------------------------------------------------------===//
@@ -1675,43 +1725,433 @@ struct LengthOpLowering : public ConversionPattern {
   matchAndRewrite(Operation *op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const final {
     auto loc = op->getLoc();
-    
-    //Pseudo-code:
-      //  output = len(input)
+
+    // Pseudo-code:
+    //   output = len(input)
 
     DEBUG_PRINT_NO_ARGS();
 
-    //output for result type
-    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));  
-    //iterate to result1 --not needed for now but for future reference  
-   
-    //allocation & deallocation for the result of this operation
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    // iterate to result1 --not needed for now but for future reference
+
+    // allocation & deallocation for the result of this operation
     auto memRefType = convertTensorToMemRef(tensorType);
 
     auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
 
-    auto inputType = llvm::dyn_cast<RankedTensorType>(op->getOperand(0).getType()); //op->getOperand(
+    auto inputType = llvm::dyn_cast<RankedTensorType>(
+        op->getOperand(0).getType()); // op->getOperand(
 
     int64_t ub = inputType.getShape()[0];
-    Value constantUb = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(ub));
-
+    Value constantUb = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(ub));
 
     DEBUG_PRINT_WITH_ARGS("\nCheck for index --here");
-    //load from X, using 2nd operand as index
-    // DEBUG_PRINT_WITH_ARGS("Indx is" , SecondValueInt);
+    // load from X, using 2nd operand as index
+    //  DEBUG_PRINT_WITH_ARGS("Indx is" , SecondValueInt);
     Value constantIndx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
-    rewriter.create<AffineStoreOp>(loc, constantUb, alloc, ValueRange{constantIndx0});
+    rewriter.create<AffineStoreOp>(loc, constantUb, alloc,
+                                   ValueRange{constantIndx0});
 
+    // debug
+    //  forOpX->dump();
+    //  forOpY->dump();
+    //  affine.store %cst, %alloc_10[] : memref<f64>
+    //  %0 = affine.load %alloc_11[4] : memref<10xf64>
+    //  affine.store %0, %alloc[0] : memref<1xf64>
 
-    //debug
-    // forOpX->dump();
-    // forOpY->dump();
-      // affine.store %cst, %alloc_10[] : memref<f64>
-      // %0 = affine.load %alloc_11[4] : memref<10xf64>
-      // affine.store %0, %alloc[0] : memref<1xf64>
-    
     rewriter.replaceOp(op, alloc);
-    
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: FFTRealOp operations
+//===----------------------------------------------------------------------===//
+
+struct FFTRealOpLowering : public ConversionPattern {
+  FFTRealOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::FFTRealOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    auto memrefType = convertTensorToMemRef(tensorType);
+
+    auto alloc_temp_real = insertAllocAndDealloc(memrefType, loc, rewriter);
+    auto alloc_temp_imag = insertAllocAndDealloc(memrefType, loc, rewriter);
+
+    FFTRealOpAdaptor fftRealOpAdaptor(operands);
+
+    auto input = fftRealOpAdaptor.getLhs();
+    auto lb = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    auto ub =
+        rewriter.create<arith::ConstantIndexOp>(loc, tensorType.getShape()[0]);
+    auto step = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+
+    // alloc memory for reversed and dealloc when not required
+    auto alloc_reversed_real = insertAllocAndDealloc(memrefType, loc, rewriter);
+    auto alloc_reversed_imag = insertAllocAndDealloc(memrefType, loc, rewriter);
+
+    // bits needed for bit  reversal
+    auto ubInt =
+        rewriter.create<arith::IndexCastOp>(loc, rewriter.getI64Type(), ub);
+    auto ubFloat =
+        rewriter.create<arith::SIToFPOp>(loc, rewriter.getF64Type(), ubInt);
+    auto bitsNeededFloat = rewriter.create<math::Log2Op>(loc, ubFloat);
+    auto bitsNeededInt = rewriter.create<arith::FPToSIOp>(
+        loc, rewriter.getI64Type(), bitsNeededFloat);
+    auto bitsNeeded = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getIndexType(), bitsNeededInt);
+
+    // bit reversal
+    auto bitReversalLoop = rewriter.create<scf::ForOp>(loc, lb, ub, step);
+    rewriter.setInsertionPointToStart(bitReversalLoop.getBody());
+    auto i = bitReversalLoop.getInductionVar();
+    auto iInt = rewriter.create<arith::IndexCastOp>(loc, rewriter.getI64Type(),
+                                                    i); // check here
+
+    // Calculate reversed index
+    // auto zero = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    auto initialRevIndex = rewriter.create<arith::ConstantIntOp>(loc, 0, 64);
+
+    auto innerLoop = rewriter.create<scf::ForOp>(loc, lb, bitsNeeded, step,
+                                                 ValueRange{initialRevIndex});
+    rewriter.setInsertionPointToStart(innerLoop.getBody());
+    auto j = innerLoop.getInductionVar();
+    auto jInt =
+        rewriter.create<arith::IndexCastOp>(loc, rewriter.getI64Type(), j);
+    auto carriedRevIndex = innerLoop.getRegionIterArgs()[0];
+
+    auto bitMask = rewriter.create<arith::ShLIOp>(
+        loc, rewriter.create<arith::ConstantIntOp>(loc, 1, 64), jInt);
+    auto iAndMask = rewriter.create<arith::AndIOp>(loc, iInt, bitMask);
+    auto isNonZero = rewriter.create<arith::CmpIOp>(
+        loc, arith::CmpIPredicate::ne, iAndMask,
+        rewriter.create<arith::ConstantIntOp>(loc, 0, 64));
+    auto shiftAmount = rewriter.create<arith::SubIOp>(
+        loc, rewriter.create<arith::SubIOp>(loc, bitsNeeded, j),
+        rewriter.create<arith::ConstantIndexOp>(loc, 1));
+    auto shiftAmountI64 = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getI64Type(), shiftAmount);
+    auto bitToSet = rewriter.create<arith::ShLIOp>(
+        loc, rewriter.create<arith::ConstantIntOp>(loc, 1, 64), shiftAmountI64);
+
+    // Update newRevIndex using a select operation
+    auto updatedRevIndex = rewriter.create<arith::OrIOp>(
+        loc, carriedRevIndex,
+        rewriter.create<arith::SelectOp>(
+            loc, isNonZero, bitToSet,
+            rewriter.create<arith::ConstantIntOp>(loc, 0, 64)));
+
+    // Yield the updated value to carry it forward
+    rewriter.create<scf::YieldOp>(loc, ValueRange{updatedRevIndex});
+
+    // auto revIndex = rewriter.create<arith::IndexCastOp>(loc,
+    // rewriter.getIndexType(), newRevIndex);
+
+    rewriter.setInsertionPointAfter(innerLoop);
+
+    auto finalRevIndex = innerLoop.getResult(0);
+    auto revIndex = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getIndexType(), finalRevIndex);
+
+    // Load from alloc_temp and store in alloc_reversed
+    auto realValue = rewriter.create<memref::LoadOp>(loc, input, ValueRange{i});
+    auto imagValue = rewriter.create<arith::ConstantFloatOp>(
+        loc, llvm::APFloat(0.0), rewriter.getF64Type());
+    rewriter.create<memref::StoreOp>(loc, realValue, alloc_reversed_real,
+                                     ValueRange{revIndex});
+    rewriter.create<memref::StoreOp>(loc, imagValue, alloc_reversed_imag,
+                                     ValueRange{revIndex});
+
+    rewriter.setInsertionPointAfter(bitReversalLoop);
+
+    // Cooley-Tukey FFT implementation
+    auto N = tensorType.getShape()[0];
+    auto stages = static_cast<int64_t>(std::log2(N));
+    auto stagesValue = rewriter.create<arith::ConstantIndexOp>(loc, stages);
+
+    // Constants for complex arithmetic
+    auto pi = rewriter.create<arith::ConstantFloatOp>(loc, llvm::APFloat(M_PI),
+                                                      rewriter.getF64Type());
+    auto neg2 = rewriter.create<arith::ConstantFloatOp>(
+        loc, llvm::APFloat(-2.0), rewriter.getF64Type());
+
+    auto fftLoop = rewriter.create<scf::ForOp>(loc, lb, stagesValue, step);
+    rewriter.setInsertionPointToStart(fftLoop.getBody());
+    auto stage = fftLoop.getInductionVar();
+    auto half_size = rewriter.create<arith::ShLIOp>(
+        loc, rewriter.create<arith::ConstantIndexOp>(loc, 1), stage);
+    auto full_size = rewriter.create<arith::ShLIOp>(
+        loc, half_size, rewriter.create<arith::ConstantIndexOp>(loc, 1));
+
+    auto outerLoop = rewriter.create<scf::ForOp>(loc, lb, ub, full_size);
+    rewriter.setInsertionPointToStart(outerLoop.getBody());
+    auto start = outerLoop.getInductionVar();
+
+    auto butterflyLoop = rewriter.create<scf::ForOp>(loc, lb, half_size, step);
+    rewriter.setInsertionPointToStart(butterflyLoop.getBody());
+    auto k = butterflyLoop.getInductionVar();
+
+    // Calculate indices for even and odd elements
+    auto even_index = rewriter.create<arith::AddIOp>(loc, start, k);
+    auto odd_index = rewriter.create<arith::AddIOp>(loc, even_index, half_size);
+
+    // Calculate twiddle factor
+    auto k_i64 =
+        rewriter.create<arith::IndexCastOp>(loc, rewriter.getI64Type(), k);
+    auto k_f64 =
+        rewriter.create<arith::SIToFPOp>(loc, rewriter.getF64Type(), k_i64);
+    auto full_size_i64 = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getI64Type(), full_size);
+    auto full_size_f64 = rewriter.create<arith::SIToFPOp>(
+        loc, rewriter.getF64Type(), full_size_i64);
+    auto angle_div = rewriter.create<arith::DivFOp>(loc, k_f64, full_size_f64);
+    auto angle_mul = rewriter.create<arith::MulFOp>(loc, neg2, angle_div);
+    auto angle_final = rewriter.create<arith::MulFOp>(loc, pi, angle_mul);
+    auto cos = rewriter.create<math::CosOp>(loc, angle_final);
+    auto sin = rewriter.create<math::SinOp>(loc, angle_final);
+
+    // Load odd value
+    auto odd_real = rewriter.create<memref::LoadOp>(loc, alloc_reversed_real,
+                                                    ValueRange{odd_index});
+    auto odd_imag = rewriter.create<memref::LoadOp>(loc, alloc_reversed_imag,
+                                                    ValueRange{odd_index});
+
+    // Multiply by twiddle factor
+    auto odd_real_cos = rewriter.create<arith::MulFOp>(loc, odd_real, cos);
+    auto odd_imag_sin = rewriter.create<arith::MulFOp>(loc, odd_imag, sin);
+    auto t_real =
+        rewriter.create<arith::SubFOp>(loc, odd_real_cos, odd_imag_sin);
+
+    auto odd_real_sin = rewriter.create<arith::MulFOp>(loc, odd_real, sin);
+    auto odd_imag_cos = rewriter.create<arith::MulFOp>(loc, odd_imag, cos);
+    auto t_imag =
+        rewriter.create<arith::AddFOp>(loc, odd_real_sin, odd_imag_cos);
+
+    // Load even value
+    auto even_real = rewriter.create<memref::LoadOp>(loc, alloc_reversed_real,
+                                                     ValueRange{even_index});
+    auto even_imag = rewriter.create<memref::LoadOp>(loc, alloc_reversed_imag,
+                                                     ValueRange{even_index});
+    // Butterfly operation
+    auto new_even_real = rewriter.create<arith::AddFOp>(loc, even_real, t_real);
+    auto new_even_imag = rewriter.create<arith::AddFOp>(loc, even_imag, t_imag);
+    auto new_odd_real = rewriter.create<arith::SubFOp>(loc, even_real, t_real);
+    auto new_odd_imag = rewriter.create<arith::SubFOp>(loc, even_imag, t_imag);
+
+    // Store results
+    rewriter.create<memref::StoreOp>(loc, new_even_real, alloc_reversed_real,
+                                     ValueRange{even_index});
+    rewriter.create<memref::StoreOp>(loc, new_even_imag, alloc_reversed_imag,
+                                     ValueRange{even_index});
+    rewriter.create<memref::StoreOp>(loc, new_odd_real, alloc_reversed_real,
+                                     ValueRange{odd_index});
+    rewriter.create<memref::StoreOp>(loc, new_odd_imag, alloc_reversed_imag,
+                                     ValueRange{odd_index});
+
+    // replace the operation with the final value
+    rewriter.replaceOp(op, alloc_reversed_real);
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: FFTImagOp operations
+//===----------------------------------------------------------------------===//
+
+struct FFTImagOpLowering : public ConversionPattern {
+  // constructor takes the mlir context and the operation as inputs
+  FFTImagOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::FFTImagOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    auto memrefType = convertTensorToMemRef(tensorType);
+
+    auto alloc_temp_real = insertAllocAndDealloc(memrefType, loc, rewriter);
+    auto alloc_temp_imag = insertAllocAndDealloc(memrefType, loc, rewriter);
+
+    FFTRealOpAdaptor fftRealOpAdaptor(operands);
+
+    auto input = fftRealOpAdaptor.getLhs();
+    auto lb = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    auto ub =
+        rewriter.create<arith::ConstantIndexOp>(loc, tensorType.getShape()[0]);
+    auto step = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+
+    // alloc memory for reversed and dealloc when not required
+    auto alloc_reversed_real = insertAllocAndDealloc(memrefType, loc, rewriter);
+    auto alloc_reversed_imag = insertAllocAndDealloc(memrefType, loc, rewriter);
+
+    // bits needed for bit  reversal
+    auto ubInt =
+        rewriter.create<arith::IndexCastOp>(loc, rewriter.getI64Type(), ub);
+    auto ubFloat =
+        rewriter.create<arith::SIToFPOp>(loc, rewriter.getF64Type(), ubInt);
+    auto bitsNeededFloat = rewriter.create<math::Log2Op>(loc, ubFloat);
+    auto bitsNeededInt = rewriter.create<arith::FPToSIOp>(
+        loc, rewriter.getI64Type(), bitsNeededFloat);
+    auto bitsNeeded = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getIndexType(), bitsNeededInt);
+
+    // bit reversal
+    auto bitReversalLoop = rewriter.create<scf::ForOp>(loc, lb, ub, step);
+    rewriter.setInsertionPointToStart(bitReversalLoop.getBody());
+    auto i = bitReversalLoop.getInductionVar();
+    auto iInt = rewriter.create<arith::IndexCastOp>(loc, rewriter.getI64Type(),
+                                                    i); // check here
+
+    // Calculate reversed index
+    // auto zero = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    auto initialRevIndex = rewriter.create<arith::ConstantIntOp>(loc, 0, 64);
+
+    auto innerLoop = rewriter.create<scf::ForOp>(loc, lb, bitsNeeded, step,
+                                                 ValueRange{initialRevIndex});
+    rewriter.setInsertionPointToStart(innerLoop.getBody());
+    auto j = innerLoop.getInductionVar();
+    auto jInt =
+        rewriter.create<arith::IndexCastOp>(loc, rewriter.getI64Type(), j);
+    auto carriedRevIndex = innerLoop.getRegionIterArgs()[0];
+
+    auto bitMask = rewriter.create<arith::ShLIOp>(
+        loc, rewriter.create<arith::ConstantIntOp>(loc, 1, 64), jInt);
+    auto iAndMask = rewriter.create<arith::AndIOp>(loc, iInt, bitMask);
+    auto isNonZero = rewriter.create<arith::CmpIOp>(
+        loc, arith::CmpIPredicate::ne, iAndMask,
+        rewriter.create<arith::ConstantIntOp>(loc, 0, 64));
+    auto shiftAmount = rewriter.create<arith::SubIOp>(
+        loc, rewriter.create<arith::SubIOp>(loc, bitsNeeded, j),
+        rewriter.create<arith::ConstantIndexOp>(loc, 1));
+    auto shiftAmountI64 = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getI64Type(), shiftAmount);
+    auto bitToSet = rewriter.create<arith::ShLIOp>(
+        loc, rewriter.create<arith::ConstantIntOp>(loc, 1, 64), shiftAmountI64);
+
+    // Update newRevIndex using a select operation
+    auto updatedRevIndex = rewriter.create<arith::OrIOp>(
+        loc, carriedRevIndex,
+        rewriter.create<arith::SelectOp>(
+            loc, isNonZero, bitToSet,
+            rewriter.create<arith::ConstantIntOp>(loc, 0, 64)));
+
+    // Yield the updated value to carry it forward
+    rewriter.create<scf::YieldOp>(loc, ValueRange{updatedRevIndex});
+
+    // auto revIndex = rewriter.create<arith::IndexCastOp>(loc,
+    // rewriter.getIndexType(), newRevIndex);
+
+    rewriter.setInsertionPointAfter(innerLoop);
+
+    auto finalRevIndex = innerLoop.getResult(0);
+    auto revIndex = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getIndexType(), finalRevIndex);
+
+    // Load from alloc_temp and store in alloc_reversed
+    auto realValue = rewriter.create<memref::LoadOp>(loc, input, ValueRange{i});
+    auto imagValue = rewriter.create<arith::ConstantFloatOp>(
+        loc, llvm::APFloat(0.0), rewriter.getF64Type());
+    rewriter.create<memref::StoreOp>(loc, realValue, alloc_reversed_real,
+                                     ValueRange{revIndex});
+    rewriter.create<memref::StoreOp>(loc, imagValue, alloc_reversed_imag,
+                                     ValueRange{revIndex});
+
+    rewriter.setInsertionPointAfter(bitReversalLoop);
+
+    // Cooley-Tukey FFT implementation
+    auto N = tensorType.getShape()[0];
+    auto stages = static_cast<int64_t>(std::log2(N));
+    auto stagesValue = rewriter.create<arith::ConstantIndexOp>(loc, stages);
+
+    // Constants for complex arithmetic
+    auto pi = rewriter.create<arith::ConstantFloatOp>(loc, llvm::APFloat(M_PI),
+                                                      rewriter.getF64Type());
+    auto neg2 = rewriter.create<arith::ConstantFloatOp>(
+        loc, llvm::APFloat(-2.0), rewriter.getF64Type());
+
+    auto fftLoop = rewriter.create<scf::ForOp>(loc, lb, stagesValue, step);
+    rewriter.setInsertionPointToStart(fftLoop.getBody());
+    auto stage = fftLoop.getInductionVar();
+    auto half_size = rewriter.create<arith::ShLIOp>(
+        loc, rewriter.create<arith::ConstantIndexOp>(loc, 1), stage);
+    auto full_size = rewriter.create<arith::ShLIOp>(
+        loc, half_size, rewriter.create<arith::ConstantIndexOp>(loc, 1));
+
+    auto outerLoop = rewriter.create<scf::ForOp>(loc, lb, ub, full_size);
+    rewriter.setInsertionPointToStart(outerLoop.getBody());
+    auto start = outerLoop.getInductionVar();
+
+    auto butterflyLoop = rewriter.create<scf::ForOp>(loc, lb, half_size, step);
+    rewriter.setInsertionPointToStart(butterflyLoop.getBody());
+    auto k = butterflyLoop.getInductionVar();
+
+    // Calculate indices for even and odd elements
+    auto even_index = rewriter.create<arith::AddIOp>(loc, start, k);
+    auto odd_index = rewriter.create<arith::AddIOp>(loc, even_index, half_size);
+
+    // Calculate twiddle factor
+    auto k_i64 =
+        rewriter.create<arith::IndexCastOp>(loc, rewriter.getI64Type(), k);
+    auto k_f64 =
+        rewriter.create<arith::SIToFPOp>(loc, rewriter.getF64Type(), k_i64);
+    auto full_size_i64 = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getI64Type(), full_size);
+    auto full_size_f64 = rewriter.create<arith::SIToFPOp>(
+        loc, rewriter.getF64Type(), full_size_i64);
+    auto angle_div = rewriter.create<arith::DivFOp>(loc, k_f64, full_size_f64);
+    auto angle_mul = rewriter.create<arith::MulFOp>(loc, neg2, angle_div);
+    auto angle_final = rewriter.create<arith::MulFOp>(loc, pi, angle_mul);
+    auto cos = rewriter.create<math::CosOp>(loc, angle_final);
+    auto sin = rewriter.create<math::SinOp>(loc, angle_final);
+
+    // Load odd value
+    auto odd_real = rewriter.create<memref::LoadOp>(loc, alloc_reversed_real,
+                                                    ValueRange{odd_index});
+    auto odd_imag = rewriter.create<memref::LoadOp>(loc, alloc_reversed_imag,
+                                                    ValueRange{odd_index});
+
+    // Multiply by twiddle factor
+    auto odd_real_cos = rewriter.create<arith::MulFOp>(loc, odd_real, cos);
+    auto odd_imag_sin = rewriter.create<arith::MulFOp>(loc, odd_imag, sin);
+    auto t_real =
+        rewriter.create<arith::SubFOp>(loc, odd_real_cos, odd_imag_sin);
+
+    auto odd_real_sin = rewriter.create<arith::MulFOp>(loc, odd_real, sin);
+    auto odd_imag_cos = rewriter.create<arith::MulFOp>(loc, odd_imag, cos);
+    auto t_imag =
+        rewriter.create<arith::AddFOp>(loc, odd_real_sin, odd_imag_cos);
+
+    // Load even value
+    auto even_real = rewriter.create<memref::LoadOp>(loc, alloc_reversed_real,
+                                                     ValueRange{even_index});
+    auto even_imag = rewriter.create<memref::LoadOp>(loc, alloc_reversed_imag,
+                                                     ValueRange{even_index});
+    // Butterfly operation
+    auto new_even_real = rewriter.create<arith::AddFOp>(loc, even_real, t_real);
+    auto new_even_imag = rewriter.create<arith::AddFOp>(loc, even_imag, t_imag);
+    auto new_odd_real = rewriter.create<arith::SubFOp>(loc, even_real, t_real);
+    auto new_odd_imag = rewriter.create<arith::SubFOp>(loc, even_imag, t_imag);
+
+    // Store results
+    rewriter.create<memref::StoreOp>(loc, new_even_real, alloc_reversed_real,
+                                     ValueRange{even_index});
+    rewriter.create<memref::StoreOp>(loc, new_even_imag, alloc_reversed_imag,
+                                     ValueRange{even_index});
+    rewriter.create<memref::StoreOp>(loc, new_odd_real, alloc_reversed_real,
+                                     ValueRange{odd_index});
+    rewriter.create<memref::StoreOp>(loc, new_odd_imag, alloc_reversed_imag,
+                                     ValueRange{odd_index});
+
+    // replace the operation with the final value
+    rewriter.replaceOp(op, alloc_reversed_imag);
     return success();
   }
 };
@@ -1719,227 +2159,260 @@ struct LengthOpLowering : public ConversionPattern {
 //===----------------------------------------------------------------------===//
 // ToyToAffine RewritePatterns: FIRFilterResSymmOptimizedOp operations
 //===----------------------------------------------------------------------===//
-struct FIRFilterResSymmOptimizedOpLowering: public ConversionPattern {
-      FIRFilterResSymmOptimizedOpLowering(MLIRContext *ctx)
-        : ConversionPattern(dsp::FIRFilterResSymmOptimizedOp::getOperationName(), 1 , ctx) {}
-
-    LogicalResult 
-    matchAndRewrite(Operation *op, ArrayRef<Value> operands,
-              ConversionPatternRewriter &rewriter) const final {
-      //dsp.FIRFilterResSymmOptimizedOp has 2 operands -- both of type tensor f64 
-
-      //Get the location of FIRFilterResSymmOptimizedOp
-      auto loc = op->getLoc();
-      
-    //output for result type
-    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));    
-    
-    //allocation & deallocation for the result of this operation
+struct FIRFilterResSymmOptimizedOpLowering : public ConversionPattern {
+  FIRFilterResSymmOptimizedOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::FIRFilterResSymmOptimizedOp::getOperationName(),
+                          1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    // dsp.FIRFilterResSymmOptimizedOp has 2 operands -- both of type tensor f64
+
+    // Get the location of FIRFilterResSymmOptimizedOp
+    auto loc = op->getLoc();
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
     auto memRefType = convertTensorToMemRef(tensorType);
     auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
-      //Pseudo-Code
-      //y[n] = sum(h[k] .{ x[n-k] + x[n-(L-1-k)]}) + h[L-1/2].x[n-(L-1)/2] , k=0 to L-1/2
-        // N = lenY , M = lenX ,  L = lenH
-        //for n=0 to N
-            // sum = 0, temp =0
-            // for k = 0 to L-1/2
-                //if 0 <= n-k < M
-                    //val1 = x[n-k] else, val1 = 0
-                // if 0 <= n+k - (L-1) < M
-                    // val2 = x[n+k-(L-1)] else, val2 = 0
-                //temp = val1 + val2
-                // sum = sum + h[k] . temp
-
-        //middle-one
-            // if 0 <= n - (L-1)/2 < M
-                // sum2 = sum + h[L-1/2] . x[n-(n - (L-1)/2)]
-            // y[n] = sum2
-
- 
-
-    int64_t lb = 0 ;
+    // Pseudo-Code
+    // y[n] = sum(h[k] .{ x[n-k] + x[n-(L-1-k)]}) + h[L-1/2].x[n-(L-1)/2] , k=0
+    // to L-1/2
+    //  N = lenY , M = lenX ,  L = lenH
+    // for n=0 to N
+    //  sum = 0, temp =0
+    //  for k = 0 to L-1/2
+    // if 0 <= n-k < M
+    // val1 = x[n-k] else, val1 = 0
+    // if 0 <= n+k - (L-1) < M
+    // val2 = x[n+k-(L-1)] else, val2 = 0
+    // temp = val1 + val2
+    //  sum = sum + h[k] . temp
+
+    // middle-one
+    //  if 0 <= n - (L-1)/2 < M
+    //  sum2 = sum + h[L-1/2] . x[n-(n - (L-1)/2)]
+    // y[n] = sum2
+
+    int64_t lb = 0;
     int64_t ub = tensorType.getShape()[0];
     int64_t step = 1;
     DEBUG_PRINT_NO_ARGS();
-    affine::AffineForOp forOp1 = rewriter.create<affine::AffineForOp>(loc, 
-                lb, ub, step );
+    affine::AffineForOp forOp1 =
+        rewriter.create<affine::AffineForOp>(loc, lb, ub, step);
     rewriter.setInsertionPointToStart(forOp1.getBody());
     auto iv = forOp1.getInductionVar();
 
-    //for n=0 to N
-            // sum = 0, temp =0
-    //get filter len
-    // auto tensorTypeFilter = llvm::cast<RankedTensorType>((*op->getOperand(1))); //operand_type_end
-    // auto tensorTypeFilter = llvm::cast<RankedTensorType>((*op->operand_type_begin()));
+    // for n=0 to N
+    //  sum = 0, temp =0
+    // get filter len
+    //  auto tensorTypeFilter =
+    //  llvm::cast<RankedTensorType>((*op->getOperand(1))); //operand_type_end
+    //  auto tensorTypeFilter =
+    //  llvm::cast<RankedTensorType>((*op->operand_type_begin()));
     auto operandIt = op->operand_type_begin();
     auto tensorTypeInput = llvm::cast<RankedTensorType>(*operandIt);
     int64_t ubForInput = tensorTypeInput.getShape()[0];
-    //get second operand
+    // get second operand
     operandIt = operandIt + 1;
 
-    // auto tensorTypeFilter = llvm::cast<RankedTensorType>((*op->operand_type_begin())); //operandIt
+    // auto tensorTypeFilter =
+    // llvm::cast<RankedTensorType>((*op->operand_type_begin())); //operandIt
     auto tensorTypeFilter = llvm::cast<RankedTensorType>(*operandIt);
     int64_t ubForFilter = tensorTypeFilter.getShape()[0];
     DEBUG_PRINT_NO_ARGS();
     // llvm::errs() << "ubForFilter= " << ubForFilter << "\n";
-    //create a constant for sum
-    Value constant0 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
-    affine::AffineForOp forOp2 = rewriter.create<affine::AffineForOp>(loc, 
-                lb, ubForFilter/2, step , ValueRange{constant0});
+    // create a constant for sum
+    Value constant0 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    affine::AffineForOp forOp2 = rewriter.create<affine::AffineForOp>(
+        loc, lb, ubForFilter / 2, step, ValueRange{constant0});
     rewriter.setInsertionPointToStart(forOp2.getBody());
     auto iv2 = forOp2.getInductionVar();
 
-    auto getIterArg =  forOp2.getBody()->getArgument(1);       //forOp1.getIterOperands();
+    auto getIterArg =
+        forOp2.getBody()->getArgument(1); // forOp1.getIterOperands();
     DEBUG_PRINT_NO_ARGS();
     FIRFilterResSymmOptimizedOpAdaptor firFilterResSymmOpAdaptor(operands);
 
-    //if 0 <= n-k < M
-        //val1 = x[n-k] else, val1 = 0
-    //For n-k 
-    //if 0 <= n-k < M or, 0 <= n-k <= M -1
-    AffineExpr d0, d1,s0, s1 ;
+    // if 0 <= n-k < M
+    // val1 = x[n-k] else, val1 = 0
+    // For n-k
+    // if 0 <= n-k < M or, 0 <= n-k <= M -1
+    AffineExpr d0, d1, s0, s1;
     bindDims(rewriter.getContext(), d0, d1);
     AffineExpr ExprNMinusK = d0 - d1;
-    AffineMap mapNMinusK = AffineMap::get(2, 0 , ExprNMinusK);
+    AffineMap mapNMinusK = AffineMap::get(2, 0, ExprNMinusK);
     // n-k <= M -1 or, n-k-(M-1) <= 0
-    bindSymbols(rewriter.getContext() , s0, s1);
-    Value constantMMinus1Indx = rewriter.create<arith::ConstantIndexOp>(loc, ubForInput -1);
+    bindSymbols(rewriter.getContext(), s0, s1);
+    Value constantMMinus1Indx =
+        rewriter.create<arith::ConstantIndexOp>(loc, ubForInput - 1);
 
-    AffineExpr ExprNMinusKMinusMPlus1 = s0 - d0 + d1 ;
-    IntegerSet setForIf = IntegerSet::get(2,1, {ExprNMinusK , ExprNMinusKMinusMPlus1}, {false, false});
+    AffineExpr ExprNMinusKMinusMPlus1 = s0 - d0 + d1;
+    IntegerSet setForIf = IntegerSet::get(
+        2, 1, {ExprNMinusK, ExprNMinusKMinusMPlus1}, {false, false});
     DEBUG_PRINT_NO_ARGS();
 
-    //if 0 <= n-k <= M -1
-    //use typeRange too:
+    // if 0 <= n-k <= M -1
+    // use typeRange too:
     Type floatType = rewriter.getF64Type();
     //  if n-k >= 0 && n-k <= M -1 or, M-1 -n + k >= 0
-    auto ifOp = rewriter.create<affine::AffineIfOp>( loc, TypeRange{floatType}, setForIf , ValueRange{iv,iv2, constantMMinus1Indx} , true /*else*/ ); 
+    auto ifOp = rewriter.create<affine::AffineIfOp>(
+        loc, TypeRange{floatType}, setForIf,
+        ValueRange{iv, iv2, constantMMinus1Indx}, true /*else*/);
     rewriter.setInsertionPointToStart(ifOp.getThenBlock());
 
-    //val1 = x[n-k] else, val1 = 0
-    //load x[n-k]
+    // val1 = x[n-k] else, val1 = 0
+    // load x[n-k]
     DEBUG_PRINT_NO_ARGS();
-    Value loadInput = rewriter.create<AffineLoadOp>(loc, firFilterResSymmOpAdaptor.getLhs(), mapNMinusK , ValueRange{iv,iv2});
+    Value loadInput =
+        rewriter.create<AffineLoadOp>(loc, firFilterResSymmOpAdaptor.getLhs(),
+                                      mapNMinusK, ValueRange{iv, iv2});
     rewriter.create<AffineYieldOp>(loc, ValueRange{loadInput});
-    //else block
+    // else block
     rewriter.setInsertionPointToStart(ifOp.getElseBlock());
-    Value const0ForElse = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    Value const0ForElse = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
     rewriter.create<AffineYieldOp>(loc, ValueRange{const0ForElse});
     rewriter.setInsertionPointAfter(ifOp);
 
-
     // if 0 <= n+k - (L-1) < M
-                    // val2 = x[n+k-(L-1)] else, val2 = 0
-    //val2 lower bound
-    // AffineExpr ExprNMinKMinLPlus1 = d0 - d1 - s0; //s0 = (L-1) => -s0 = -L+1
-    // AffineExpr ExprLowerBoundVal2 = d0 - d1 - s0; //s0 = (L-1) => -s0 = -L+1
-     //Val2 LowerBound: n+k - (L-1) >= 0
-    AffineExpr ExprLowerBoundVal2 = rewriter.getAffineDimExpr(0) + rewriter.getAffineDimExpr(1) - 
-                rewriter.getAffineConstantExpr(ubForFilter - 1);
-    //Val2 UpperBound: n+k - (L-1) <= M -1 ie, M - 1 + L -1 -k -n >= 0 ie, (M+L-2) - k -n >= 0
-    // AffineExpr ExprUpperBoundVal2 = s0 + s1 + d1 - d0; //s1 = M+L-2 = L-1 + M -1
-    AffineExpr ExprUpperBoundVal2 = rewriter.getAffineConstantExpr(ubForInput + ubForFilter - 2) - rewriter.getAffineDimExpr(1) -
-                rewriter.getAffineDimExpr(0);
-    //s0 = L -1
-    // Value s0LMin1Indx = rewriter.create<arith::ConstantIndexOp>(loc, ubForFilter - 1);
-    // s1 = M + L -2 for val2 upperBound
-    // Value s1MPlusLPlus2Indx = rewriter.create<arith::ConstantIndexOp>(loc, ubForInput + ubForFilter - 2);
-    // Value s1MMin1Indx = rewriter.create<arith::ConstantIndexOp>(loc, ubForInput - 1);
-
-    IntegerSet setForIf2 = IntegerSet::get(2,0, {ExprLowerBoundVal2 , ExprUpperBoundVal2}, {false, false});
-
-    auto ifOp2 = rewriter.create<affine::AffineIfOp>( loc, TypeRange{floatType}, setForIf2 , ValueRange{iv,iv2} , true /*else*/ ); 
+    // val2 = x[n+k-(L-1)] else, val2 = 0
+    // val2 lower bound
+    //  AffineExpr ExprNMinKMinLPlus1 = d0 - d1 - s0; //s0 = (L-1) => -s0 = -L+1
+    //  AffineExpr ExprLowerBoundVal2 = d0 - d1 - s0; //s0 = (L-1) => -s0 = -L+1
+    // Val2 LowerBound: n+k - (L-1) >= 0
+    AffineExpr ExprLowerBoundVal2 =
+        rewriter.getAffineDimExpr(0) + rewriter.getAffineDimExpr(1) -
+        rewriter.getAffineConstantExpr(ubForFilter - 1);
+    // Val2 UpperBound: n+k - (L-1) <= M -1 ie, M - 1 + L -1 -k -n >= 0 ie,
+    // (M+L-2) - k -n >= 0
+    //  AffineExpr ExprUpperBoundVal2 = s0 + s1 + d1 - d0; //s1 = M+L-2 = L-1 +
+    //  M -1
+    AffineExpr ExprUpperBoundVal2 =
+        rewriter.getAffineConstantExpr(ubForInput + ubForFilter - 2) -
+        rewriter.getAffineDimExpr(1) - rewriter.getAffineDimExpr(0);
+    // s0 = L -1
+    //  Value s0LMin1Indx = rewriter.create<arith::ConstantIndexOp>(loc,
+    //  ubForFilter - 1); s1 = M + L -2 for val2 upperBound Value
+    //  s1MPlusLPlus2Indx = rewriter.create<arith::ConstantIndexOp>(loc,
+    //  ubForInput + ubForFilter - 2); Value s1MMin1Indx =
+    //  rewriter.create<arith::ConstantIndexOp>(loc, ubForInput - 1);
+
+    IntegerSet setForIf2 = IntegerSet::get(
+        2, 0, {ExprLowerBoundVal2, ExprUpperBoundVal2}, {false, false});
+
+    auto ifOp2 = rewriter.create<affine::AffineIfOp>(
+        loc, TypeRange{floatType}, setForIf2, ValueRange{iv, iv2},
+        true /*else*/);
     rewriter.setInsertionPointToStart(ifOp2.getThenBlock());
 
-    //val2 = x[n+k-(L-1)] else, val2 = 0
+    // val2 = x[n+k-(L-1)] else, val2 = 0
     AffineMap addMap2 = AffineMap::get(2, 0, ExprLowerBoundVal2);
-    //load x[n+k-(L-1)]
+    // load x[n+k-(L-1)]
     DEBUG_PRINT_NO_ARGS();
-    Value loadInputForVal2 = rewriter.create<AffineLoadOp>(loc, firFilterResSymmOpAdaptor.getLhs(), addMap2 , ValueRange{iv,iv2 });
+    Value loadInputForVal2 = rewriter.create<AffineLoadOp>(
+        loc, firFilterResSymmOpAdaptor.getLhs(), addMap2, ValueRange{iv, iv2});
     rewriter.create<AffineYieldOp>(loc, ValueRange{loadInputForVal2});
-    //else block
+    // else block
     rewriter.setInsertionPointToStart(ifOp2.getElseBlock());
-    Value const0ForElse2 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    Value const0ForElse2 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
     rewriter.create<AffineYieldOp>(loc, ValueRange{const0ForElse2});
     rewriter.setInsertionPointAfter(ifOp2);
 
-    //temp = val1 + val2
-    // sum = sum + h[k] . temp
+    // temp = val1 + val2
+    //  sum = sum + h[k] . temp
+
+    Value Val1Plus2 = rewriter.create<arith::AddFOp>(loc, ifOp.getResult(0),
+                                                     ifOp2.getResult(0));
 
-    Value Val1Plus2 = rewriter.create<arith::AddFOp>(loc, ifOp.getResult(0) , ifOp2.getResult(0));
+    // load filter and then mult and then sum
+    Value loadFilter = rewriter.create<affine::AffineLoadOp>(
+        loc, firFilterResSymmOpAdaptor.getRhs(), iv2);
 
-    //load filter and then mult and then sum
-    Value loadFilter = rewriter.create<affine::AffineLoadOp>(loc, firFilterResSymmOpAdaptor.getRhs() ,  iv2);
-    
-    Value filterMulInput = rewriter.create<arith::MulFOp>(loc, Val1Plus2 , loadFilter);
-    Value sumNext = rewriter.create<arith::AddFOp>(loc, filterMulInput, getIterArg);
+    Value filterMulInput =
+        rewriter.create<arith::MulFOp>(loc, Val1Plus2, loadFilter);
+    Value sumNext =
+        rewriter.create<arith::AddFOp>(loc, filterMulInput, getIterArg);
     rewriter.create<AffineYieldOp>(loc, ValueRange{sumNext});
     // rewriter.setInsertionPointToEnd(forOp2->getBlock());
     rewriter.setInsertionPointAfter(forOp2);
     DEBUG_PRINT_NO_ARGS();
-        // Middle - point
-        // if 0 <= n - (L-1)/2 < M
-        // sum2 = sum + h[L-1/2] . x[n-(L-1)/2)]
-        // y[n] = sum2
+    // Middle - point
+    // if 0 <= n - (L-1)/2 < M
+    // sum2 = sum + h[L-1/2] . x[n-(L-1)/2)]
+    // y[n] = sum2
 
     // if 0 <= n - (L-1)/2 < M
     // AffineExpr ExprLowerBoundVal3 = d0 - s0; //s0 = (L-1)/2
     // AffineExpr ExprUpperBoundVal3 = d0 - s1; //s1 = M+ (L-1)/2
-    int64_t midFilterLen = (ubForFilter - 1)/2;
-    AffineExpr ExprLowerBoundVal3 = rewriter.getAffineDimExpr(0) -  
-                rewriter.getAffineConstantExpr(midFilterLen);
-    //UpperBound: n - (L-1)/2 <= M - 1 ie, M-1 + mid - n
-    AffineExpr ExprUpperBoundVal3 = rewriter.getAffineConstantExpr(ubForInput + midFilterLen - 1) -
-                rewriter.getAffineDimExpr(0);
+    int64_t midFilterLen = (ubForFilter - 1) / 2;
+    AffineExpr ExprLowerBoundVal3 =
+        rewriter.getAffineDimExpr(0) -
+        rewriter.getAffineConstantExpr(midFilterLen);
+    // UpperBound: n - (L-1)/2 <= M - 1 ie, M-1 + mid - n
+    AffineExpr ExprUpperBoundVal3 =
+        rewriter.getAffineConstantExpr(ubForInput + midFilterLen - 1) -
+        rewriter.getAffineDimExpr(0);
 
     AffineMap addMap3 = AffineMap::get(1, 0, ExprLowerBoundVal3);
- 
-    IntegerSet setForIf3 = IntegerSet::get(1,0, {ExprLowerBoundVal3 , ExprUpperBoundVal3}, {false, false});
 
-    auto ifOp3 = rewriter.create<affine::AffineIfOp>( loc, TypeRange{floatType}, setForIf3 , ValueRange{iv} , true /*else*/ ); 
+    IntegerSet setForIf3 = IntegerSet::get(
+        1, 0, {ExprLowerBoundVal3, ExprUpperBoundVal3}, {false, false});
+
+    auto ifOp3 = rewriter.create<affine::AffineIfOp>(
+        loc, TypeRange{floatType}, setForIf3, ValueRange{iv}, true /*else*/);
     rewriter.setInsertionPointToStart(ifOp3.getThenBlock());
 
-    //val3 = x[n-(L-1)/2)] else, val3 = 0
-    //load x[n-(L-1)/2)]
+    // val3 = x[n-(L-1)/2)] else, val3 = 0
+    // load x[n-(L-1)/2)]
     DEBUG_PRINT_NO_ARGS();
-    Value loadInputForVal3 = rewriter.create<AffineLoadOp>(loc, firFilterResSymmOpAdaptor.getLhs(), addMap3 , ValueRange{iv});
+    Value loadInputForVal3 = rewriter.create<AffineLoadOp>(
+        loc, firFilterResSymmOpAdaptor.getLhs(), addMap3, ValueRange{iv});
     rewriter.create<AffineYieldOp>(loc, ValueRange{loadInputForVal3});
-    //else block
+    // else block
     rewriter.setInsertionPointToStart(ifOp3.getElseBlock());
-    Value const0ForElse3 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    Value const0ForElse3 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
     rewriter.create<AffineYieldOp>(loc, ValueRange{const0ForElse3});
     rewriter.setInsertionPointAfter(ifOp3);
 
-    //sum2 = sum + h[L-1/2] . x[n-(L-1)/2)]
-    // y[n] = sum2
-    //load filter and then mult and then sum
-    Value midFilterLenIndx = rewriter.create<arith::ConstantIndexOp>(loc, midFilterLen);
-
-    Value loadFilterMid = rewriter.create<affine::AffineLoadOp>(loc, firFilterResSymmOpAdaptor.getRhs() ,  midFilterLenIndx);
-    Value filterMulInput2 = rewriter.create<arith::MulFOp>(loc, ifOp3.getResult(0) , loadFilterMid);
-    Value sum2 = rewriter.create<arith::AddFOp>(loc, filterMulInput2, forOp2.getResult(0));
+    // sum2 = sum + h[L-1/2] . x[n-(L-1)/2)]
+    //  y[n] = sum2
+    // load filter and then mult and then sum
+    Value midFilterLenIndx =
+        rewriter.create<arith::ConstantIndexOp>(loc, midFilterLen);
+
+    Value loadFilterMid = rewriter.create<affine::AffineLoadOp>(
+        loc, firFilterResSymmOpAdaptor.getRhs(), midFilterLenIndx);
+    Value filterMulInput2 =
+        rewriter.create<arith::MulFOp>(loc, ifOp3.getResult(0), loadFilterMid);
+    Value sum2 = rewriter.create<arith::AddFOp>(loc, filterMulInput2,
+                                                forOp2.getResult(0));
     // rewriter.create<AffineStoreOp>(loc, forOp2.getResult(0) , alloc, iv);
-    rewriter.create<AffineStoreOp>(loc, sum2 , alloc, iv);
+    rewriter.create<AffineStoreOp>(loc, sum2, alloc, iv);
     rewriter.setInsertionPointAfter(forOp1);
     DEBUG_PRINT_NO_ARGS();
     // ifOp->dump();
     rewriter.replaceOp(op, alloc);
     return success();
-    }
+  }
 };
 
-
 //===----------------------------------------------------------------------===//
 // ToyToAffine RewritePatterns: RunLenEncodingOp operations
 
-
 //===----------------------------------------------------------------------===//
 
 #define TryWhileLoop 0
 #define TryLoadStoreForWhile 0
-#define TryPassIterIndex 0  //Not working
+#define TryPassIterIndex 0 // Not working
 #define TryScf 0
-#define TryRLE  1 
+#define TryRLE 1
 struct RunLenEncodingOpLowering : public ConversionPattern {
   RunLenEncodingOpLowering(MLIRContext *ctx)
       : ConversionPattern(dsp::RunLenEncodingOp::getOperationName(), 1, ctx) {}
@@ -1948,351 +2421,388 @@ struct RunLenEncodingOpLowering : public ConversionPattern {
   matchAndRewrite(Operation *op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const final {
     auto loc = op->getLoc();
-    
-    //Pseudo-code:
-      //  y_rle[i] =  x[i] , if x[i] != x[i-1] , 1<=i<n
-           // CountOfXi , at n<=i < 2n -1
-
-        //steps:
-        // count = 1 , y[0] = x[0] , k = 0 
-        // for i=1 to len/2 
-           // load prev = a[i-1] , current = a[i]
-           // if prev == current
-                // count = count + 1
-           // else
-                // store count at index k + N/2
-                // y[k] = current
-                // y[k + N/2] = count
-                // count = 1 and k = k+1
-          //if count > 1 ie, for last element
-            // store the count value at k + N/2
+
+    // Pseudo-code:
+    //   y_rle[i] =  x[i] , if x[i] != x[i-1] , 1<=i<n
+    //  CountOfXi , at n<=i < 2n -1
+
+    // steps:
+    //  count = 1 , y[0] = x[0] , k = 0
+    //  for i=1 to len/2
+    //  load prev = a[i-1] , current = a[i]
+    //  if prev == current
+    //  count = count + 1
+    // else
+    // store count at index k + N/2
+    // y[k] = current
+    // y[k + N/2] = count
+    // count = 1 and k = k+1
+    // if count > 1 ie, for last element
+    //  store the count value at k + N/2
 
     DEBUG_PRINT_NO_ARGS();
 
-    //output for result type
-    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));  
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
     auto tensorType1 = RankedTensorType::get({1}, rewriter.getIndexType());
 
-    //allocation & deallocation for the result of this operation
+    // allocation & deallocation for the result of this operation
     auto memRefType = convertTensorToMemRef(tensorType);
     auto memRefType2 = convertTensorToMemRef(tensorType1);
 
     auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
     auto allocK = insertAllocAndDealloc(memRefType2, loc, rewriter);
 
-    // count = 1 , y[0] = x[0] , 
+    // count = 1 , y[0] = x[0] ,
     // loop from 0 to len
     RunLenEncodingOpAdaptor runLenEncodingAdaptor(operands);
     DEBUG_PRINT_NO_ARGS();
 
-    
-
-    
-   
     //  len/2,k = n ie, len/2
-    int64_t lb = 1 ;
-    int64_t N = tensorType.getShape()[0];  
-    int64_t ub = N/2 ; //output len is twice the input len
+    int64_t lb = 1;
+    int64_t N = tensorType.getShape()[0];
+    int64_t ub = N / 2; // output len is twice the input len
     int64_t step = 1;
     int64_t k = 0;
     int64_t lb1 = 0;
 
-    Value const0 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(0));
+    Value const0 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
 
-    //init all output memory with zero
-    affine::AffineForOp forOp1 = rewriter.create<AffineForOp>(loc, lb1, N, step);
+    // init all output memory with zero
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb1, N, step);
     DEBUG_PRINT_NO_ARGS();
     auto iv1 = forOp1.getInductionVar();
     rewriter.setInsertionPointToStart(forOp1.getBody());
-    rewriter.create<AffineStoreOp>(loc,const0, alloc, iv1 );
+    rewriter.create<AffineStoreOp>(loc, const0, alloc, iv1);
     rewriter.setInsertionPointAfter(forOp1);
 
     DEBUG_PRINT_NO_ARGS();
-    //load from X, 
+    // load from X,
     Value constantIndx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
-    Value inputX0 = rewriter.create<AffineLoadOp>(loc, runLenEncodingAdaptor.getInput(), ValueRange{constantIndx0});
-    rewriter.create<AffineStoreOp>(loc, inputX0, alloc, ValueRange{constantIndx0});
-
-
-#if TryRLE 
-
-    // Initial count and k values as SSA values, count = 1 , k = 0 
-    // for i=1 to len/2 
-           // load prev = a[i-1] , current = a[i]
-           // if prev == current
-                // count = count + 1
-           // else
-                // store count at index k + N/2
-                // y[k + N/2] = count
-                // k = k +1
-                // y[k] = current 
-                // count = 1 
-          //for last element
-            // store the count value at k + N/2
-            //y[k + N/2] = count
-    Value countVal = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(1));
+    Value inputX0 = rewriter.create<AffineLoadOp>(
+        loc, runLenEncodingAdaptor.getInput(), ValueRange{constantIndx0});
+    rewriter.create<AffineStoreOp>(loc, inputX0, alloc,
+                                   ValueRange{constantIndx0});
+
+#if TryRLE
+
+    // Initial count and k values as SSA values, count = 1 , k = 0
+    // for i=1 to len/2
+    // load prev = a[i-1] , current = a[i]
+    // if prev == current
+    // count = count + 1
+    // else
+    // store count at index k + N/2
+    // y[k + N/2] = count
+    // k = k +1
+    // y[k] = current
+    // count = 1
+    // for last element
+    //  store the count value at k + N/2
+    // y[k + N/2] = count
+    Value countVal = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1));
     Value Indx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
 
     Value IndxNBy2 = rewriter.create<arith::ConstantIndexOp>(loc, ub);
     Value kVal = rewriter.create<arith::ConstantIndexOp>(loc, k);
-    rewriter.create<AffineStoreOp>(loc, kVal, allocK , ValueRange{Indx0});
-    
+    rewriter.create<AffineStoreOp>(loc, kVal, allocK, ValueRange{Indx0});
+
     Type floatType = rewriter.getF64Type();
     // Type indexType = rewriter.getIndexType();
-    //// // for i=1 to len/2 
-           // load prev = a[i-1] , current = a[i]
-    affine::AffineForOp forOpY = rewriter.create<AffineForOp>(loc, lb, ub, step, ValueRange{countVal});
+    //// // for i=1 to len/2
+    // load prev = a[i-1] , current = a[i]
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step, ValueRange{countVal});
     auto ivY = forOpY.getInductionVar();
     rewriter.setInsertionPointToStart(forOpY.getBody());
     DEBUG_PRINT_NO_ARGS();
 
     auto countArg = forOpY.getRegionIterArgs()[0];
-  
-    Value current = rewriter.create<AffineLoadOp>(loc, runLenEncodingAdaptor.getInput(), ivY );
+
+    Value current = rewriter.create<AffineLoadOp>(
+        loc, runLenEncodingAdaptor.getInput(), ivY);
     //
     AffineExpr d0;
     bindDims(rewriter.getContext(), d0);
     AffineExpr ExprIMinus1 = d0 - rewriter.getAffineConstantExpr(1);
-    AffineMap mapExprIMinus1 = AffineMap::get(1,0, ExprIMinus1);
-    Value prev = rewriter.create<AffineLoadOp>(loc, runLenEncodingAdaptor.getInput(),mapExprIMinus1, ValueRange{ivY} );
+    AffineMap mapExprIMinus1 = AffineMap::get(1, 0, ExprIMinus1);
+    Value prev = rewriter.create<AffineLoadOp>(
+        loc, runLenEncodingAdaptor.getInput(), mapExprIMinus1, ValueRange{ivY});
     DEBUG_PRINT_NO_ARGS();
-    // for i=1 to len/2 
-           // load prev = a[i-1] , current = a[i]
-           // if prev == current
-                // count = count + 1
-           // else
-                // store count at index k + N/2
-                // y[k + N/2] = count
-                // k = k +1
-                // y[k] = current 
-                // count = 1 
-          //for last element
-            // store the count value at k + N/2
-            //y[k + N/2] = count
-    // TypeRange typeRange = TypeRange{rewriter.getF64Type() , rewriter.getIndexType()};
-    // TypeRange typeRange = TypeRange({rewriter.getF64Type(), rewriter.getIndexType()});
-    
-    // auto ifOp = rewriter.create<scf::IfOp>(loc, TypeRange{rewriter.getF64Type(), rewriter.getIndexType()}, rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OEQ, prev, current), true, true); 
-    auto CmpPrevCurrent = rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OEQ, prev, current);
-
-
-    //create if block with else condition
+    // for i=1 to len/2
+    // load prev = a[i-1] , current = a[i]
     // if prev == current
-                // count = count + 1
-    // auto ifOp = rewriter.create<scf::IfOp>(loc, TypeRange{floatType , indexType}, CmpPrevCurrent , true /* else=1 */);   
-    auto ifOp = rewriter.create<scf::IfOp>(loc, TypeRange{floatType }, CmpPrevCurrent , true /* else=1 */);   
+    // count = count + 1
+    // else
+    // store count at index k + N/2
+    // y[k + N/2] = count
+    // k = k +1
+    // y[k] = current
+    // count = 1
+    // for last element
+    //  store the count value at k + N/2
+    // y[k + N/2] = count
+    // TypeRange typeRange = TypeRange{rewriter.getF64Type() ,
+    // rewriter.getIndexType()}; TypeRange typeRange =
+    // TypeRange({rewriter.getF64Type(), rewriter.getIndexType()});
+
+    // auto ifOp = rewriter.create<scf::IfOp>(loc,
+    // TypeRange{rewriter.getF64Type(), rewriter.getIndexType()},
+    // rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OEQ, prev,
+    // current), true, true);
+    auto CmpPrevCurrent = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OEQ, prev, current);
+
+    // create if block with else condition
+    //  if prev == current
+    //  count = count + 1
+    // auto ifOp = rewriter.create<scf::IfOp>(loc, TypeRange{floatType ,
+    // indexType}, CmpPrevCurrent , true /* else=1 */);
+    auto ifOp = rewriter.create<scf::IfOp>(loc, TypeRange{floatType},
+                                           CmpPrevCurrent, true /* else=1 */);
 
     rewriter.setInsertionPointToStart(ifOp.thenBlock());
     DEBUG_PRINT_NO_ARGS();
-    
+
     auto CountPlusOne = rewriter.create<arith::AddFOp>(loc, countArg, countVal);
     DEBUG_PRINT_NO_ARGS();
-    rewriter.create<scf::YieldOp>(loc, ValueRange{CountPlusOne} );
-     // else
-                // store count at index k + N/2
-                // y[k + N/2] = count
-                // k = k +1
-                // y[k] = current 
-                // count = 1 
+    rewriter.create<scf::YieldOp>(loc, ValueRange{CountPlusOne});
+    // else
+    // store count at index k + N/2
+    // y[k + N/2] = count
+    // k = k +1
+    // y[k] = current
+    // count = 1
     rewriter.setInsertionPointToStart(ifOp.elseBlock());
-    // // out[k + N/2]= count   
-    Value loadKVal = rewriter.create<AffineLoadOp>(loc, allocK, ValueRange{Indx0} );
+    // // out[k + N/2]= count
+    Value loadKVal =
+        rewriter.create<AffineLoadOp>(loc, allocK, ValueRange{Indx0});
 
-    Value kPlusNBy2 = rewriter.create<arith::AddIOp>(loc,rewriter.getIndexType(), loadKVal, IndxNBy2);
+    Value kPlusNBy2 = rewriter.create<arith::AddIOp>(
+        loc, rewriter.getIndexType(), loadKVal, IndxNBy2);
     rewriter.create<memref::StoreOp>(loc, countArg, alloc, kPlusNBy2);
-    //k = k+1
+    // k = k+1
     Value Indx1 = rewriter.create<arith::ConstantIndexOp>(loc, 1);
-    Value kPlusOne = rewriter.create<arith::AddIOp>(loc,rewriter.getIndexType(), loadKVal, Indx1);
+    Value kPlusOne = rewriter.create<arith::AddIOp>(
+        loc, rewriter.getIndexType(), loadKVal, Indx1);
     rewriter.create<AffineStoreOp>(loc, kPlusOne, allocK, ValueRange{Indx0});
 
     // y[k + 1] = current
     rewriter.create<memref::StoreOp>(loc, current, alloc, kPlusOne);
-    
+
     DEBUG_PRINT_NO_ARGS();
     rewriter.create<scf::YieldOp>(loc, ValueRange{countVal});
     rewriter.setInsertionPointAfter(ifOp);
     // ifOp.dump();
     Value countRes = ifOp.getResult(0);
-    
-     
-    rewriter.create<AffineYieldOp>(loc, ValueRange{countRes });
+
+    rewriter.create<AffineYieldOp>(loc, ValueRange{countRes});
     rewriter.setInsertionPointAfter(forOpY);
     // forOpY->dump();
 
-    //check for last countArg value if countArg > 1, then store it at last 
+    // check for last countArg value if countArg > 1, then store it at last
     Value finalCountArg = forOpY.getResult(0);
-    Value finalkArg = rewriter.create<AffineLoadOp>(loc, allocK, ValueRange{Indx0} );
-    
+    Value finalkArg =
+        rewriter.create<AffineLoadOp>(loc, allocK, ValueRange{Indx0});
+
     // //if count>1 ,then store count at index k + N/2
-      // auto ifOp1 = rewriter.create<scf::IfOp>(loc, CmpCountGt1 , false /* else=0 */);   
+    // auto ifOp1 = rewriter.create<scf::IfOp>(loc, CmpCountGt1 , false /*
+    // else=0 */);
     // rewriter.setInsertionPointToStart(ifOp1.thenBlock());
     DEBUG_PRINT_NO_ARGS();
-    Value finalkPlusNBy2 = rewriter.create<arith::AddIOp>(loc,rewriter.getIndexType(), finalkArg, IndxNBy2);
+    Value finalkPlusNBy2 = rewriter.create<arith::AddIOp>(
+        loc, rewriter.getIndexType(), finalkArg, IndxNBy2);
 
     rewriter.create<memref::StoreOp>(loc, finalCountArg, alloc, finalkPlusNBy2);
     DEBUG_PRINT_NO_ARGS();
-    // rewriter.setInsertionPointAfter(ifOp1);       
+    // rewriter.setInsertionPointAfter(ifOp1);
 #endif
 
 #if TryPassIterIndex
-    //store k at its location & load and do addition to 1 and 
-    Value kVal = rewriter.create<arith::ConstantIndexOp>(loc, ub-1);
+    // store k at its location & load and do addition to 1 and
+    Value kVal = rewriter.create<arith::ConstantIndexOp>(loc, ub - 1);
     Value Indx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
 
-    auto kValStore = rewriter.create<AffineStoreOp>(loc, kVal, alloc2 , ValueRange{Indx0});
-    
+    auto kValStore =
+        rewriter.create<AffineStoreOp>(loc, kVal, alloc2, ValueRange{Indx0});
+
     Type floatType = rewriter.getF64Type();
     Type indexType = rewriter.getIndexType();
-    affine::AffineForOp forOpY = rewriter.create<AffineForOp>(loc, lb, ub, step, ValueRange{inputX0, kVal});
-    // affine::AffineForOp forOpY = rewriter.create<AffineForOp>(loc, lb, ub, step, ValueRange{countVal, kVal});
+    affine::AffineForOp forOpY = rewriter.create<AffineForOp>(
+        loc, lb, ub, step, ValueRange{inputX0, kVal});
+    // affine::AffineForOp forOpY = rewriter.create<AffineForOp>(loc, lb, ub,
+    // step, ValueRange{countVal, kVal});
 
     auto ivY = forOpY.getInductionVar();
     auto prev = forOpY.getRegionIterArgs()[0];
     auto kArg = forOpY.getRegionIterArgs()[1];
     rewriter.setInsertionPointToStart(forOpY.getBody());
-    
-    Value Indx00 = rewriter.create<arith::ConstantIndexOp>(loc, 0); 
-    Value current = rewriter.create<AffineLoadOp>(loc, runLenEncodingAdaptor.getInput(), ivY );
-    Value loadKVal = rewriter.create<AffineLoadOp>(loc, alloc2, ValueRange{Indx0} );
-    Value const1 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(1));
+
+    Value Indx00 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    Value current = rewriter.create<AffineLoadOp>(
+        loc, runLenEncodingAdaptor.getInput(), ivY);
+    Value loadKVal =
+        rewriter.create<AffineLoadOp>(loc, alloc2, ValueRange{Indx0});
+    Value const1 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1));
     Value currentPlus1 = rewriter.create<arith::AddFOp>(loc, prev, const1);
 
-    auto CmpPrevCurrent = rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OGE, current , const1 );
+    auto CmpPrevCurrent = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OGE, current, const1);
 
+    // create if block with else condition
+    //  if prev == current, count++
+    auto ifOp = rewriter.create<scf::IfOp>(loc, TypeRange{floatType},
+                                           CmpPrevCurrent, true /* else=1 */);
+    // auto ifOp = rewriter.create<scf::IfOp>(loc,  CmpPrevCurrent , true /*
+    // else=1 */);
 
-    //create if block with else condition
-    // if prev == current, count++
-    auto ifOp = rewriter.create<scf::IfOp>(loc, TypeRange{floatType }, CmpPrevCurrent , true /* else=1 */);  
-    // auto ifOp = rewriter.create<scf::IfOp>(loc,  CmpPrevCurrent , true /* else=1 */);   
- 
     rewriter.setInsertionPointToStart(ifOp.thenBlock());
     DEBUG_PRINT_NO_ARGS();
 
-    //store count at N+i
-    // Value countPlus1 = rewriter.create<arith::AddFOp>(loc, countArg, countVal);
+    // store count at N+i
+    //  Value countPlus1 = rewriter.create<arith::AddFOp>(loc, countArg,
+    //  countVal);
     Value Indx1 = rewriter.create<arith::ConstantIndexOp>(loc, 1);
-    Value kPlusOne = rewriter.create<arith::AddIOp>(loc, rewriter.getIndexType() , kArg , Indx1);
+    Value kPlusOne = rewriter.create<arith::AddIOp>(
+        loc, rewriter.getIndexType(), kArg, Indx1);
 
     rewriter.create<AffineStoreOp>(loc, current, alloc, ValueRange{kArg});
-    // rewriter.create<AffineStoreOp>(loc, current, alloc, ValueRange{kPlusOne});
+    // rewriter.create<AffineStoreOp>(loc, current, alloc,
+    // ValueRange{kPlusOne});
     rewriter.create<memref::StoreOp>(loc, current, alloc, ValueRange{kPlusOne});
     rewriter.create<AffineStoreOp>(loc, kPlusOne, alloc2, ValueRange{Indx0});
     rewriter.create<scf::YieldOp>(loc, ValueRange{currentPlus1});
 
     rewriter.setInsertionPointToStart(ifOp.elseBlock());
     rewriter.create<AffineStoreOp>(loc, currentPlus1, alloc, ValueRange{ivY});
-    //yield the values
-    // rewriter.create<AffineYieldOp>(loc, ValueRange{kPlusOne });
+    // yield the values
+    //  rewriter.create<AffineYieldOp>(loc, ValueRange{kPlusOne });
     rewriter.create<scf::YieldOp>(loc, ValueRange{currentPlus1});
 
     rewriter.setInsertionPointAfter(ifOp);
     Value countRes = ifOp.getResult(0);
-    // Value kRes = ifOp.getResult(1); 
+    // Value kRes = ifOp.getResult(1);
     // rewriter.create<AffineYieldOp>(loc, ValueRange{countRes,kRes });
-    rewriter.create<AffineYieldOp>(loc, ValueRange{countRes, Indx00 });
+    rewriter.create<AffineYieldOp>(loc, ValueRange{countRes, Indx00});
 
     rewriter.setInsertionPointAfter(forOpY);
 
-
 #endif
 
 #if TryWhileLoop
 
     auto kVal = rewriter.create<arith::ConstantIndexOp>(loc, k);
-    affine::AffineForOp forOpY = rewriter.create<AffineForOp>(loc, lb, ub, step, ValueRange{kVal});
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step, ValueRange{kVal});
     auto ivY = forOpY.getInductionVar();
     // auto countArg = forOpY.getRegionIterArgs()[0];
     auto kArg = forOpY.getRegionIterArgs()[0];
     rewriter.setInsertionPointToStart(forOpY.getBody());
-     
-    Value current = rewriter.create<AffineLoadOp>(loc, runLenEncodingAdaptor.getInput(), ivY );
 
-    //store count at N+i
-    // Value countPlus1 = rewriter.create<arith::AddFOp>(loc, countArg, countVal);
+    Value current = rewriter.create<AffineLoadOp>(
+        loc, runLenEncodingAdaptor.getInput(), ivY);
+
+    // store count at N+i
+    //  Value countPlus1 = rewriter.create<arith::AddFOp>(loc, countArg,
+    //  countVal);
     Value Indx1 = rewriter.create<arith::ConstantIndexOp>(loc, 1);
-    Value kPlusOne = rewriter.create<arith::AddIOp>(loc,rewriter.getIndexType(), kArg, Indx1);
-    // Value constInt1 = rewriter.create<arith::ConstantIntOp>(loc,rewriter.getI64IntegerAttr(1), rewriter.getI64Type() );
+    Value kPlusOne = rewriter.create<arith::AddIOp>(
+        loc, rewriter.getIndexType(), kArg, Indx1);
+    // Value constInt1 =
+    // rewriter.create<arith::ConstantIntOp>(loc,rewriter.getI64IntegerAttr(1),
+    // rewriter.getI64Type() );
 
-    // Value kPlusOneIndex = rewriter.create<arith::IndexCastOp>(loc, rewriter.getIndexType(), kPlusOne);
+    // Value kPlusOneIndex = rewriter.create<arith::IndexCastOp>(loc,
+    // rewriter.getIndexType(), kPlusOne);
 
     // kPlusOne.dump();
-    // Value kArg1 = rewriter.create<arith::IndexCastUIOp>(loc, rewriter.getIndexType(), kArg);
+    // Value kArg1 = rewriter.create<arith::IndexCastUIOp>(loc,
+    // rewriter.getIndexType(), kArg);
 
-    // rewriter.create<AffineStoreOp>(loc, countPlus1, alloc, mapExprNPlusI, ValueRange{kPlusOne});
-    // rewriter.create<AffineStoreOp>(loc, countPlus1, alloc, ValueRange{kArg});
-    // Store the result
+    // rewriter.create<AffineStoreOp>(loc, countPlus1, alloc, mapExprNPlusI,
+    // ValueRange{kPlusOne}); rewriter.create<AffineStoreOp>(loc, countPlus1,
+    // alloc, ValueRange{kArg}); Store the result
     // rewriter.create<AffineStoreOp>(loc, current, alloc, ivY); //working
     rewriter.create<AffineStoreOp>(loc, current, alloc, ValueRange{kArg});
-    //yield the values
-    rewriter.create<AffineYieldOp>(loc, ValueRange{kPlusOne });
+    // yield the values
+    rewriter.create<AffineYieldOp>(loc, ValueRange{kPlusOne});
     // rewriter.create<AffineYieldOp>(loc, ValueRange{countPlus1 , kPlusOne});
     rewriter.setInsertionPointAfter(forOpY);
 
 #endif
 
 #if TryLoadStoreForWhile
-    //store k at its location & load and do addition to 1 and 
-    Value kVal = rewriter.create<arith::ConstantIndexOp>(loc, ub-1);
+    // store k at its location & load and do addition to 1 and
+    Value kVal = rewriter.create<arith::ConstantIndexOp>(loc, ub - 1);
     Value Indx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
 
-    auto kValStore = rewriter.create<AffineStoreOp>(loc, kVal, alloc2 , ValueRange{Indx0});
-    affine::AffineForOp forOpY = rewriter.create<AffineForOp>(loc, lb, ub, step, ValueRange{inputX0});
+    auto kValStore =
+        rewriter.create<AffineStoreOp>(loc, kVal, alloc2, ValueRange{Indx0});
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step, ValueRange{inputX0});
     auto ivY = forOpY.getInductionVar();
     auto prev = forOpY.getRegionIterArgs()[0];
     // auto kArg = forOpY.getRegionIterArgs()[0];
     rewriter.setInsertionPointToStart(forOpY.getBody());
-    
-    Value Indx00 = rewriter.create<arith::ConstantIndexOp>(loc, 0); 
-    Value current = rewriter.create<AffineLoadOp>(loc, runLenEncodingAdaptor.getInput(), ivY );
-    Value loadKVal = rewriter.create<AffineLoadOp>(loc, alloc2, ValueRange{Indx0} );
-    Value const1 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(1));
+
+    Value Indx00 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    Value current = rewriter.create<AffineLoadOp>(
+        loc, runLenEncodingAdaptor.getInput(), ivY);
+    Value loadKVal =
+        rewriter.create<AffineLoadOp>(loc, alloc2, ValueRange{Indx0});
+    Value const1 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1));
     Value currentPlus1 = rewriter.create<arith::AddFOp>(loc, prev, const1);
 
-    auto CmpPrevCurrent = rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OGE, current , const1 );
+    auto CmpPrevCurrent = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OGE, current, const1);
 
+    // create if block with else condition
+    //  if prev == current, count++
+    //  auto ifOp = rewriter.create<scf::IfOp>(loc, TypeRange{floatType ,
+    //  indexType}, CmpPrevCurrent , true /* else=1 */);
+    auto ifOp =
+        rewriter.create<scf::IfOp>(loc, CmpPrevCurrent, true /* else=1 */);
 
-    //create if block with else condition
-    // if prev == current, count++
-    // auto ifOp = rewriter.create<scf::IfOp>(loc, TypeRange{floatType , indexType}, CmpPrevCurrent , true /* else=1 */);  
-    auto ifOp = rewriter.create<scf::IfOp>(loc,  CmpPrevCurrent , true /* else=1 */);   
- 
     rewriter.setInsertionPointToStart(ifOp.thenBlock());
     DEBUG_PRINT_NO_ARGS();
 
-    //store count at N+i
-    // Value countPlus1 = rewriter.create<arith::AddFOp>(loc, countArg, countVal);
+    // store count at N+i
+    //  Value countPlus1 = rewriter.create<arith::AddFOp>(loc, countArg,
+    //  countVal);
     Value Indx1 = rewriter.create<arith::ConstantIndexOp>(loc, 1);
-    Value kPlusOne = rewriter.create<arith::AddIOp>(loc, rewriter.getIndexType() , loadKVal , Indx1);
+    Value kPlusOne = rewriter.create<arith::AddIOp>(
+        loc, rewriter.getIndexType(), loadKVal, Indx1);
 
     rewriter.create<AffineStoreOp>(loc, current, alloc, ValueRange{ivY});
-    // rewriter.create<AffineStoreOp>(loc, current, alloc, ValueRange{kPlusOne});
+    // rewriter.create<AffineStoreOp>(loc, current, alloc,
+    // ValueRange{kPlusOne});
     rewriter.create<memref::StoreOp>(loc, current, alloc, ValueRange{kPlusOne});
     rewriter.create<AffineStoreOp>(loc, kPlusOne, alloc2, ValueRange{Indx0});
 
     rewriter.setInsertionPointToStart(ifOp.elseBlock());
     rewriter.create<AffineStoreOp>(loc, currentPlus1, alloc, ValueRange{ivY});
-    //yield the values
-    // rewriter.create<AffineYieldOp>(loc, ValueRange{kPlusOne });
+    // yield the values
+    //  rewriter.create<AffineYieldOp>(loc, ValueRange{kPlusOne });
     rewriter.setInsertionPointAfter(ifOp);
-    rewriter.create<AffineYieldOp>(loc, ValueRange{current });
+    rewriter.create<AffineYieldOp>(loc, ValueRange{current});
 
     rewriter.setInsertionPointAfter(forOpY);
 
-
 #endif
 
-    //debug
-    // forOpY->dump();
-      // affine.store %cst, %alloc_10[] : memref<f64>
-      // %0 = affine.load %alloc_11[4] : memref<10xf64>
-      // affine.store %0, %alloc[0] : memref<1xf64>
-    
+    // debug
+    //  forOpY->dump();
+    //  affine.store %cst, %alloc_10[] : memref<f64>
+    //  %0 = affine.load %alloc_11[4] : memref<10xf64>
+    //  affine.store %0, %alloc[0] : memref<1xf64>
+
     rewriter.replaceOp(op, alloc);
-    
+
     return success();
   }
 };
@@ -2303,26 +2813,29 @@ struct RunLenEncodingOpLowering : public ConversionPattern {
 
 struct LMSFilterResponseOpLowering : public ConversionPattern {
   LMSFilterResponseOpLowering(MLIRContext *ctx)
-      : ConversionPattern(dsp::LMSFilterResponseOp::getOperationName(), 1, ctx) {}
+      : ConversionPattern(dsp::LMSFilterResponseOp::getOperationName(), 1,
+                          ctx) {}
 
   LogicalResult
   matchAndRewrite(Operation *op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const final {
     auto loc = op->getLoc();
-    
-    //Pseudo-code:
-    // for (int n = 0; n < NUM_SAMPLES; n++) {
-    //     // Calculate the filter output y[n]
-    //     y[n] = 0;
-    //     for (int i = 0; i < FILTER_LENGTH; i++) {
-    //         if (n - i >= 0) { // affine if 
-    //             y[n] = y[n] + (w[i] * x[n - i]); 
-    //         }
-    //     }
-        
+
+    // Pseudo-code:
+    //  for (int n = 0; n < NUM_SAMPLES; n++) {
+    //		// we also need to initialize w
+    //		// w[n] = 0;
+    //      // Calculate the filter output y[n]
+    //      y[n] = 0;
+    //      for (int i = 0; i < FILTER_LENGTH; i++) {
+    //          if (n - i >= 0) { // affine if
+    //              y[n] = y[n] + (w[i] * x[n - i]);
+    //          }
+    //      }
+
     //     // Calculate the error e[n]
     //     e[n] = d[n] - y[n];
-        
+
     //     // Update the filter weights w[i]
     //     for (int i = 0; i < FILTER_LENGTH; i++) {
     //         if (n - i >= 0) {
@@ -2331,130 +2844,137 @@ struct LMSFilterResponseOpLowering : public ConversionPattern {
     //     }
     // }
 
-    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));    
-    
-    //allocation & deallocation for the result of this operation
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
     auto memRefType = convertTensorToMemRef(tensorType);
     auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
 
-    //construct affine loops for the input
-    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/0);
-    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);    
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
 
     LMSFilterOpAdaptor lmsFilterAdaptor(operands);
-    // Value alpha = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
+    // Value alpha = rewriter.create<arith::ConstantOp>(loc,
+    // rewriter.getF64Type(),
     //                                                      rewriter.getF64FloatAttr(1));
-    Value zeroval = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(0));
-     Value mu = rewriter.create<AffineLoadOp>(loc, lmsFilterAdaptor.getMu()); 
-    
-    //For loop -- iterate from 0 to last
-    int64_t lb = 0 ;
+    Value zeroval = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    Value mu = rewriter.create<AffineLoadOp>(loc, lmsFilterAdaptor.getMu());
+
+    // For loop -- iterate from 0 to last
+    int64_t lb = 0;
     int64_t numSamples = tensorType.getShape()[0];
     int64_t step = 1;
 
     Value GetFilterLOp = op->getOperand(3);
-    dsp::ConstantOp constantOp3rdArg = GetFilterLOp.getDefiningOp<dsp::ConstantOp>();
-    DenseElementsAttr constant3rdValue = constantOp3rdArg.getValue();;
+    dsp::ConstantOp constantOp3rdArg =
+        GetFilterLOp.getDefiningOp<dsp::ConstantOp>();
+    DenseElementsAttr constant3rdValue = constantOp3rdArg.getValue();
+    ;
     auto elements1 = constant3rdValue.getValues<FloatAttr>();
     float filterlenval = elements1[0].getValueAsDouble();
-    auto FilterLength = (uint64_t) filterlenval;
+    auto FilterLength = (uint64_t)filterlenval;
 
     auto yMemRefType = MemRefType::get({numSamples}, rewriter.getF64Type());
     auto wAlloc = rewriter.create<memref::AllocOp>(loc, yMemRefType);
 
-    affine::AffineForOp forOp1 = rewriter.create<AffineForOp>(loc, lb, numSamples, step);
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb, numSamples, step);
     auto iv = forOp1.getInductionVar();
-    
 
     rewriter.setInsertionPointToStart(forOp1.getBody());
 
-    //For affine expression: #map1 = affine_map<(%arg0)[] : (%arg0 - 1)
+    // For affine expression: #map1 = affine_map<(%arg0)[] : (%arg0 - 1)
     AffineExpr d0, d1, s0;
     bindDims(rewriter.getContext(), d0, d1);
-    // AffineExpr ExprForXSlice = rewriter.getAffineDimExpr(0) - rewriter.getAffineDimExpr(1); //d0 - d1; 
+    // AffineExpr ExprForXSlice = rewriter.getAffineDimExpr(0) -
+    // rewriter.getAffineDimExpr(1); //d0 - d1;
     AffineExpr ExprForXSlice = d0 - d1;
     AffineMap addMapForLMSFilter = AffineMap::get(2, 0, ExprForXSlice);
     IntegerSet set1 = IntegerSet::get(2, 0, {ExprForXSlice}, {false});
 
+    // w[n] = 0;
     // y[n] = 0;
     // rewriter.create<AffineStoreOp>(loc, zeroval, alloc, ValueRange{iv});
     // Allocate and initialize array for y
     // Value constantIndx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
-    
+
+    rewriter.create<AffineStoreOp>(loc, zeroval, wAlloc, ValueRange{iv});
     rewriter.create<AffineStoreOp>(loc, zeroval, alloc, ValueRange{iv});
 
-    affine::AffineForOp forOp2 = rewriter.create<AffineForOp>(loc, lb, FilterLength, step);
+    affine::AffineForOp forOp2 =
+        rewriter.create<AffineForOp>(loc, lb, FilterLength, step);
     auto iv2 = forOp2.getInductionVar();
 
     rewriter.setInsertionPointToStart(forOp2.getBody());
 
-    auto ifOp = rewriter.create<affine::AffineIfOp>( loc, set1 , ValueRange{iv,iv2} , false /*no else*/ );
+    auto ifOp = rewriter.create<affine::AffineIfOp>(
+        loc, set1, ValueRange{iv, iv2}, false /*no else*/);
     rewriter.setInsertionPointToStart(ifOp.getThenBlock());
 
-    Value inputX = rewriter.create<AffineLoadOp>(loc, lmsFilterAdaptor.getLhs(), addMapForLMSFilter,
-                  ValueRange{iv,iv2}); 
-    Value w = rewriter.create<AffineLoadOp>(loc, wAlloc, 
-                  ValueRange{iv2}); //memRefType
+    Value inputX =
+        rewriter.create<AffineLoadOp>(loc, lmsFilterAdaptor.getLhs(),
+                                      addMapForLMSFilter, ValueRange{iv, iv2});
+    Value w = rewriter.create<AffineLoadOp>(loc, wAlloc,
+                                            ValueRange{iv2}); // memRefType
 
-    Value wmulx = rewriter.create<arith::MulFOp>(loc, inputX ,w );
+    Value wmulx = rewriter.create<arith::MulFOp>(loc, inputX, w);
     Value ybefore = rewriter.create<AffineLoadOp>(loc, alloc, ValueRange{iv});
     Value sumNext = rewriter.create<arith::AddFOp>(loc, wmulx, ybefore);
     rewriter.create<AffineStoreOp>(loc, sumNext, alloc, ValueRange{iv});
     rewriter.setInsertionPointAfter(ifOp);
     rewriter.setInsertionPointAfter(forOp2);
 
-
     //  get e[n] = d[n] - y[n]
 
-    Value desiredX = rewriter.create<AffineLoadOp>(loc, lmsFilterAdaptor.getRhs(), ValueRange{iv});
-    Value ynew = rewriter.create<AffineLoadOp>(loc, alloc,
-                      ValueRange{iv}); 
-          
-    Value err = rewriter.create<arith::SubFOp>(loc, desiredX ,ynew );
+    Value desiredX = rewriter.create<AffineLoadOp>(
+        loc, lmsFilterAdaptor.getRhs(), ValueRange{iv});
+    Value ynew = rewriter.create<AffineLoadOp>(loc, alloc, ValueRange{iv});
 
-    
+    Value err = rewriter.create<arith::SubFOp>(loc, desiredX, ynew);
 
-    affine::AffineForOp forOp3 = rewriter.create<AffineForOp>(loc, lb, FilterLength, step);
-        auto iv3 = forOp3.getInductionVar();
-        
-        rewriter.setInsertionPointToStart(forOp3.getBody());
-
-        auto ifOp2 = rewriter.create<affine::AffineIfOp>( loc, set1 , ValueRange{iv,iv3} , false /*no else*/ );
-        rewriter.setInsertionPointToStart(ifOp2.getThenBlock());
-
-        Value inputX2 = rewriter.create<AffineLoadOp>(loc, lmsFilterAdaptor.getLhs(), addMapForLMSFilter,
-                      ValueRange{iv,iv3}); 
-         
-        Value Prevw2 = rewriter.create<AffineLoadOp>(loc, wAlloc, 
-                      ValueRange{iv3}); 
-
-        // f(u(n),e(n),μ)=μe(n)u∗(n)
-        Value mul1 = rewriter.create<arith::MulFOp>(loc, err ,inputX2 );
-        Value mul2 = rewriter.create<arith::MulFOp>(loc, mu ,mul1 );
-
-        // FInal w[n]
-        Value answer = rewriter.create<arith::AddFOp>(loc, Prevw2 ,mul2 );              
-                      
-        rewriter.create<AffineStoreOp>(loc, answer, wAlloc, ValueRange{iv3});
-        rewriter.setInsertionPointAfter(ifOp2);
-        rewriter.setInsertionPointAfter(forOp3);
-
-        rewriter.setInsertionPointAfter(forOp1);
-    //debug
-    // forOp1->dump();
+    affine::AffineForOp forOp3 =
+        rewriter.create<AffineForOp>(loc, lb, FilterLength, step);
+    auto iv3 = forOp3.getInductionVar();
+
+    rewriter.setInsertionPointToStart(forOp3.getBody());
+
+    auto ifOp2 = rewriter.create<affine::AffineIfOp>(
+        loc, set1, ValueRange{iv, iv3}, false /*no else*/);
+    rewriter.setInsertionPointToStart(ifOp2.getThenBlock());
+
+    Value inputX2 =
+        rewriter.create<AffineLoadOp>(loc, lmsFilterAdaptor.getLhs(),
+                                      addMapForLMSFilter, ValueRange{iv, iv3});
+
+    Value Prevw2 = rewriter.create<AffineLoadOp>(loc, wAlloc, ValueRange{iv3});
+
+    // f(u(n),e(n),μ)=μe(n)u∗(n)
+    Value mul1 = rewriter.create<arith::MulFOp>(loc, err, inputX2);
+    Value mul2 = rewriter.create<arith::MulFOp>(loc, mu, mul1);
+
+    // FInal w[n]
+    Value answer = rewriter.create<arith::AddFOp>(loc, Prevw2, mul2);
+
+    rewriter.create<AffineStoreOp>(loc, answer, wAlloc, ValueRange{iv3});
+    rewriter.setInsertionPointAfter(ifOp2);
+    rewriter.setInsertionPointAfter(forOp3);
+
+    rewriter.setInsertionPointAfter(forOp1);
+    // debug
+    //  forOp1->dump();
 
     rewriter.replaceOp(op, alloc);
-    
+
     return success();
   }
-};  
+};
 
 //===----------------------------------------------------------------------===//
 // ToyToAffine RewritePatterns: Quantization operations
 //===----------------------------------------------------------------------===//
 
-
 struct QuantizationOpLowering : public ConversionPattern {
   QuantizationOpLowering(MLIRContext *ctx)
       : ConversionPattern(dsp::QuantizationOp::getOperationName(), 1, ctx) {}
@@ -2463,103 +2983,112 @@ struct QuantizationOpLowering : public ConversionPattern {
   matchAndRewrite(Operation *op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const final {
     auto loc = op->getLoc();
-    
-    //Pseudo-code:
-      // y_quantized[i] = Round(a[i] - min) / step) * step + min
-      //   where, step = (max-min)/ NoOfLevels , NoOLevels = 2^NoOfBits 
 
-      // 	steps:
-      // 		1) given NoOfLevels 
-      // 		2) Then calculate stepSize = (Max-Min)/NoOfLevels
-      // 		3) iterate for all the elements and calculate quantizedCoeff
+    // Pseudo-code:
+    //  y_quantized[i] = Round(a[i] - min) / step) * step + min
+    //    where, step = (max-min)/ NoOfLevels , NoOLevels = 2^NoOfBits
+
+    // 	steps:
+    // 		1) given NoOfLevels
+    // 		2) Then calculate stepSize = (Max-Min)/NoOfLevels
+    // 		3) iterate for all the elements and calculate quantizedCoeff
 
-      // 			GetLevelForVal =  (a[i] - min)/step
-      // 			RoundedVal = arith.FPToSI(GetLevelForVal)
-      // 			QuantVal = RoundedVal * step + min_val
+    // 			GetLevelForVal =  (a[i] - min)/step
+    // 			RoundedVal = arith.FPToSI(GetLevelForVal)
+    // 			QuantVal = RoundedVal * step + min_val
 
     DEBUG_PRINT_NO_ARGS();
 
-    //output for result type
-    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));  
-    
-    //allocation & deallocation for the result of this operation
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
     auto memRefType = convertTensorToMemRef(tensorType);
 
     auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
 
-    //create another memory location for getting NoOfLevels
+    // create another memory location for getting NoOfLevels
 
-    // Value constant1 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
+    // Value constant1 = rewriter.create<arith::ConstantOp>(loc,
+    // rewriter.getF64Type(),
     //                                                      rewriter.getF64FloatAttr(1));
 
-    
-    //1) Then calculate stepSize = (Max-Min)/NoOfLevels
- 
+    // 1) Then calculate stepSize = (Max-Min)/NoOfLevels
+
     QuantizationOpAdaptor quantizationAdaptor(operands);
     DEBUG_PRINT_NO_ARGS();
     Value getMaxMemref = quantizationAdaptor.getMax();
-    auto getMax = rewriter.create<AffineLoadOp>(loc, getMaxMemref, ValueRange{});
+    auto getMax =
+        rewriter.create<AffineLoadOp>(loc, getMaxMemref, ValueRange{});
 
     Value getMinMemref = quantizationAdaptor.getMin();
-    auto getMin = rewriter.create<AffineLoadOp>(loc, getMinMemref, ValueRange{});
+    auto getMin =
+        rewriter.create<AffineLoadOp>(loc, getMinMemref, ValueRange{});
 
     Value getNLevelsMemref = quantizationAdaptor.getNlevels();
-    auto getNlevels = rewriter.create<AffineLoadOp>(loc, getNLevelsMemref, ValueRange{});
-
-    Value MaxMinusMin = rewriter.create<arith::SubFOp>(loc, getMax ,getMin );
-    Value StepSize = rewriter.create<arith::DivFOp>(loc, MaxMinusMin, getNlevels);
+    auto getNlevels =
+        rewriter.create<AffineLoadOp>(loc, getNLevelsMemref, ValueRange{});
 
+    Value MaxMinusMin = rewriter.create<arith::SubFOp>(loc, getMax, getMin);
+    Value StepSize =
+        rewriter.create<arith::DivFOp>(loc, MaxMinusMin, getNlevels);
 
     // iterate for all the elements and calculate quantizedCoeff
 
     // 			GetLevelForVal =  (a[i] - min)/step
     // 			RoundedVal = arith.FPToSI(GetLevelForVal)
     // 			QuantVal = RoundedVal * step + min_val
-    int64_t lb = 0 ;
-    int64_t ub = tensorType.getShape()[0];   
+    int64_t lb = 0;
+    int64_t ub = tensorType.getShape()[0];
     int64_t step = 1;
 
     DEBUG_PRINT_NO_ARGS();
-    
-    //for loop from 0 to len
-    // use iter_arg as passing value for the loop 
-    affine::AffineForOp forOpY = rewriter.create<AffineForOp>(loc, lb, ub, step);
+
+    // for loop from 0 to len
+    //  use iter_arg as passing value for the loop
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
     auto ivY = forOpY.getInductionVar();
     rewriter.setInsertionPointToStart(forOpY.getBody());
-    //Use iter_arg for taking prev_val 
-    //Get iter_arg 
-    
+    // Use iter_arg for taking prev_val
+    // Get iter_arg
+
     // 			GetLevelForVal =  (a[i] - min)/step
-	
-	  // 			QuantVal = RoundedVal * step + min_val
 
-    Value inputX = rewriter.create<AffineLoadOp>(loc, quantizationAdaptor.getInput(), ivY );
-    Value inputMinusMin = rewriter.create<arith::SubFOp>(loc, inputX, getMin );
-    Value aMinusMinDivStep = rewriter.create<arith::DivFOp>(loc, inputMinusMin, StepSize );
+    // 			QuantVal = RoundedVal * step + min_val
+
+    Value inputX =
+        rewriter.create<AffineLoadOp>(loc, quantizationAdaptor.getInput(), ivY);
+    Value inputMinusMin = rewriter.create<arith::SubFOp>(loc, inputX, getMin);
+    Value aMinusMinDivStep =
+        rewriter.create<arith::DivFOp>(loc, inputMinusMin, StepSize);
 
     // 	RoundedVal = arith.FPToSI(GetLevelForVal)
-    Value RoundedVal = rewriter.create<arith::FPToSIOp>(loc,rewriter.getI64Type(),  aMinusMinDivStep);
-    Value RoundValFloat = rewriter.create<arith::SIToFPOp>(loc, rewriter.getF64Type() , RoundedVal);
+    Value RoundedVal = rewriter.create<arith::FPToSIOp>(
+        loc, rewriter.getI64Type(), aMinusMinDivStep);
+    Value RoundValFloat = rewriter.create<arith::SIToFPOp>(
+        loc, rewriter.getF64Type(), RoundedVal);
 
     // 	QuantVal = RoundedVal * step + min_val
-    Value RoundedMulStep = rewriter.create<arith::MulFOp>(loc, RoundValFloat , StepSize);
-    Value QuantVal = rewriter.create<arith::AddFOp>(loc, RoundedMulStep, getMin);
-    rewriter.create<AffineStoreOp>(loc, QuantVal, alloc, ValueRange{ivY}); 
+    Value RoundedMulStep =
+        rewriter.create<arith::MulFOp>(loc, RoundValFloat, StepSize);
+    Value QuantVal =
+        rewriter.create<arith::AddFOp>(loc, RoundedMulStep, getMin);
+    rewriter.create<AffineStoreOp>(loc, QuantVal, alloc, ValueRange{ivY});
     rewriter.setInsertionPointAfter(forOpY);
-   
-    //debug
-    // forOpY->dump();
-      // affine.store %cst, %alloc_10[] : memref<f64>
-      // %0 = affine.load %alloc_11[4] : memref<10xf64>
-      // affine.store %0, %alloc[0] : memref<1xf64>
-    
+
+    // debug
+    //  forOpY->dump();
+    //  affine.store %cst, %alloc_10[] : memref<f64>
+    //  %0 = affine.load %alloc_11[4] : memref<10xf64>
+    //  affine.store %0, %alloc[0] : memref<1xf64>
+
     rewriter.replaceOp(op, alloc);
-    
+
     return success();
   }
 };
 
-
 //===----------------------------------------------------------------------===//
 // ToyToAffine RewritePatterns: lmsFilter operations
 //===----------------------------------------------------------------------===//
@@ -2572,20 +3101,20 @@ struct LMSFilterOpLowering : public ConversionPattern {
   matchAndRewrite(Operation *op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const final {
     auto loc = op->getLoc();
-    
-    //Pseudo-code:
-    // for (int n = 0; n < NUM_SAMPLES; n++) {
-    //     // Calculate the filter output y[n]
-    //     y[n] = 0;
-    //     for (int i = 0; i < FILTER_LENGTH; i++) {
-    //         if (n - i >= 0) { // affine if 
-    //             y[n] = y[n] + (w[i] * x[n - i]); 
-    //         }
-    //     }
-        
+
+    // Pseudo-code:
+    //  for (int n = 0; n < NUM_SAMPLES; n++) {
+    //      // Calculate the filter output y[n]
+    //      y[n] = 0;
+    //      for (int i = 0; i < FILTER_LENGTH; i++) {
+    //          if (n - i >= 0) { // affine if
+    //              y[n] = y[n] + (w[i] * x[n - i]);
+    //          }
+    //      }
+
     //     // Calculate the error e[n]
     //     e[n] = d[n] - y[n];
-        
+
     //     // Update the filter weights w[i]
     //     for (int i = 0; i < FILTER_LENGTH; i++) {
     //         if (n - i >= 0) {
@@ -2594,59 +3123,64 @@ struct LMSFilterOpLowering : public ConversionPattern {
     //     }
     // }
 
-    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));    
-    
-    //allocation & deallocation for the result of this operation
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
     auto memRefType = convertTensorToMemRef(tensorType);
     auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
 
-    //construct affine loops for the input
-    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/0);
-    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);    
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
 
     LMSFilterOpAdaptor lmsFilterAdaptor(operands);
-    // Value alpha = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
+    // Value alpha = rewriter.create<arith::ConstantOp>(loc,
+    // rewriter.getF64Type(),
     //                                                      rewriter.getF64FloatAttr(1));
-    Value zeroval = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(0));
-     Value mu = rewriter.create<AffineLoadOp>(loc, lmsFilterAdaptor.getMu()); 
-    
-    //For loop -- iterate from 0 to last
-    int64_t lb = 0 ;
+    Value zeroval = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    Value mu = rewriter.create<AffineLoadOp>(loc, lmsFilterAdaptor.getMu());
+
+    // For loop -- iterate from 0 to last
+    int64_t lb = 0;
     int64_t numSamples = tensorType.getShape()[0];
     int64_t step = 1;
 
     Value GetFilterLOp = op->getOperand(3);
-    dsp::ConstantOp constantOp3rdArg = GetFilterLOp.getDefiningOp<dsp::ConstantOp>();
-    DenseElementsAttr constant3rdValue = constantOp3rdArg.getValue();;
+    dsp::ConstantOp constantOp3rdArg =
+        GetFilterLOp.getDefiningOp<dsp::ConstantOp>();
+    DenseElementsAttr constant3rdValue = constantOp3rdArg.getValue();
+    ;
     auto elements1 = constant3rdValue.getValues<FloatAttr>();
     float filterlenval = elements1[0].getValueAsDouble();
-    auto FilterLength = (uint64_t) filterlenval;
+    auto FilterLength = (uint64_t)filterlenval;
 
     Value GetItersLOp = op->getOperand(4);
-    dsp::ConstantOp constantOp4thArg = GetItersLOp.getDefiningOp<dsp::ConstantOp>();
-    DenseElementsAttr constant4thValue = constantOp4thArg.getValue();;
+    dsp::ConstantOp constantOp4thArg =
+        GetItersLOp.getDefiningOp<dsp::ConstantOp>();
+    DenseElementsAttr constant4thValue = constantOp4thArg.getValue();
+    ;
     auto elements = constant4thValue.getValues<FloatAttr>();
     float interationsval = elements[0].getValueAsDouble();
-    auto TotalIterations = (uint64_t) interationsval;
-    
-  
-    
+    auto TotalIterations = (uint64_t)interationsval;
+
     auto yMemRefType = MemRefType::get({numSamples}, rewriter.getF64Type());
     auto yAlloc = rewriter.create<memref::AllocOp>(loc, yMemRefType);
 
-    affine::AffineForOp forOpiter = rewriter.create<AffineForOp>(loc, lb, TotalIterations, step);
+    affine::AffineForOp forOpiter =
+        rewriter.create<AffineForOp>(loc, lb, TotalIterations, step);
     rewriter.setInsertionPointToStart(forOpiter.getBody());
-    affine::AffineForOp forOp1 = rewriter.create<AffineForOp>(loc, lb, numSamples, step);
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb, numSamples, step);
     auto iv = forOp1.getInductionVar();
-    
 
     rewriter.setInsertionPointToStart(forOp1.getBody());
 
-    //For affine expression: #map1 = affine_map<(%arg0)[] : (%arg0 - 1)
+    // For affine expression: #map1 = affine_map<(%arg0)[] : (%arg0 - 1)
     AffineExpr d0, d1, s0;
     bindDims(rewriter.getContext(), d0, d1);
-    // AffineExpr ExprForXSlice = rewriter.getAffineDimExpr(0) - rewriter.getAffineDimExpr(1); //d0 - d1; 
+    // AffineExpr ExprForXSlice = rewriter.getAffineDimExpr(0) -
+    // rewriter.getAffineDimExpr(1); //d0 - d1;
     AffineExpr ExprForXSlice = d0 - d1;
     AffineMap addMapForLMSFilter = AffineMap::get(2, 0, ExprForXSlice);
     IntegerSet set1 = IntegerSet::get(2, 0, {ExprForXSlice}, {false});
@@ -2655,72 +3189,74 @@ struct LMSFilterOpLowering : public ConversionPattern {
     // rewriter.create<AffineStoreOp>(loc, zeroval, alloc, ValueRange{iv});
     // Allocate and initialize array for y
     // Value constantIndx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
-    
+
     rewriter.create<AffineStoreOp>(loc, zeroval, yAlloc, ValueRange{iv});
 
-    affine::AffineForOp forOp2 = rewriter.create<AffineForOp>(loc, lb, FilterLength, step);
+    affine::AffineForOp forOp2 =
+        rewriter.create<AffineForOp>(loc, lb, FilterLength, step);
     auto iv2 = forOp2.getInductionVar();
 
     rewriter.setInsertionPointToStart(forOp2.getBody());
 
-    auto ifOp = rewriter.create<affine::AffineIfOp>( loc, set1 , ValueRange{iv,iv2} , false /*no else*/ );
+    auto ifOp = rewriter.create<affine::AffineIfOp>(
+        loc, set1, ValueRange{iv, iv2}, false /*no else*/);
     rewriter.setInsertionPointToStart(ifOp.getThenBlock());
 
-    Value inputX = rewriter.create<AffineLoadOp>(loc, lmsFilterAdaptor.getLhs(), addMapForLMSFilter,
-                  ValueRange{iv,iv2}); 
-    Value Prevw = rewriter.create<AffineLoadOp>(loc, alloc, 
-                  ValueRange{iv2}); //memRefType
+    Value inputX =
+        rewriter.create<AffineLoadOp>(loc, lmsFilterAdaptor.getLhs(),
+                                      addMapForLMSFilter, ValueRange{iv, iv2});
+    Value Prevw = rewriter.create<AffineLoadOp>(loc, alloc,
+                                                ValueRange{iv2}); // memRefType
 
-    Value wmulx = rewriter.create<arith::MulFOp>(loc, inputX ,Prevw );
+    Value wmulx = rewriter.create<arith::MulFOp>(loc, inputX, Prevw);
     Value ybefore = rewriter.create<AffineLoadOp>(loc, yAlloc, ValueRange{iv});
     Value sumNext = rewriter.create<arith::AddFOp>(loc, wmulx, ybefore);
     rewriter.create<AffineStoreOp>(loc, sumNext, yAlloc, ValueRange{iv});
     rewriter.setInsertionPointAfter(ifOp);
     rewriter.setInsertionPointAfter(forOp2);
 
-
     //  get e[n] = d[n] - y[n]
 
-    Value desiredX = rewriter.create<AffineLoadOp>(loc, lmsFilterAdaptor.getRhs(), ValueRange{iv});
-    Value ynew = rewriter.create<AffineLoadOp>(loc, yAlloc,
-                      ValueRange{iv}); 
-          
-    Value err = rewriter.create<arith::SubFOp>(loc, desiredX ,ynew );
+    Value desiredX = rewriter.create<AffineLoadOp>(
+        loc, lmsFilterAdaptor.getRhs(), ValueRange{iv});
+    Value ynew = rewriter.create<AffineLoadOp>(loc, yAlloc, ValueRange{iv});
 
-    
+    Value err = rewriter.create<arith::SubFOp>(loc, desiredX, ynew);
 
-    affine::AffineForOp forOp3 = rewriter.create<AffineForOp>(loc, lb, FilterLength, step);
-        auto iv3 = forOp3.getInductionVar();
-        
-        rewriter.setInsertionPointToStart(forOp3.getBody());
-
-        auto ifOp2 = rewriter.create<affine::AffineIfOp>( loc, set1 , ValueRange{iv,iv3} , false /*no else*/ );
-        rewriter.setInsertionPointToStart(ifOp2.getThenBlock());
-
-        Value inputX2 = rewriter.create<AffineLoadOp>(loc, lmsFilterAdaptor.getLhs(), addMapForLMSFilter,
-                      ValueRange{iv,iv3}); 
-         
-        Value Prevw2 = rewriter.create<AffineLoadOp>(loc, alloc, 
-                      ValueRange{iv3}); 
-
-        // f(u(n),e(n),μ)=μe(n)u∗(n)
-        Value mul1 = rewriter.create<arith::MulFOp>(loc, err ,inputX2 );
-        Value mul2 = rewriter.create<arith::MulFOp>(loc, mu ,mul1 );
-
-        // FInal w[n]
-        Value answer = rewriter.create<arith::AddFOp>(loc, Prevw2 ,mul2 );              
-                      
-        rewriter.create<AffineStoreOp>(loc, answer, alloc, ValueRange{iv3});
-        rewriter.setInsertionPointAfter(ifOp2);
-        rewriter.setInsertionPointAfter(forOp3);
-
-        rewriter.setInsertionPointAfter(forOp1);
-        rewriter.setInsertionPointAfter(forOpiter);
-    //debug
-    // forOp1->dump();
+    affine::AffineForOp forOp3 =
+        rewriter.create<AffineForOp>(loc, lb, FilterLength, step);
+    auto iv3 = forOp3.getInductionVar();
+
+    rewriter.setInsertionPointToStart(forOp3.getBody());
+
+    auto ifOp2 = rewriter.create<affine::AffineIfOp>(
+        loc, set1, ValueRange{iv, iv3}, false /*no else*/);
+    rewriter.setInsertionPointToStart(ifOp2.getThenBlock());
+
+    Value inputX2 =
+        rewriter.create<AffineLoadOp>(loc, lmsFilterAdaptor.getLhs(),
+                                      addMapForLMSFilter, ValueRange{iv, iv3});
+
+    Value Prevw2 = rewriter.create<AffineLoadOp>(loc, alloc, ValueRange{iv3});
+
+    // f(u(n),e(n),μ)=μe(n)u∗(n)
+    Value mul1 = rewriter.create<arith::MulFOp>(loc, err, inputX2);
+    Value mul2 = rewriter.create<arith::MulFOp>(loc, mu, mul1);
+
+    // FInal w[n]
+    Value answer = rewriter.create<arith::AddFOp>(loc, Prevw2, mul2);
+
+    rewriter.create<AffineStoreOp>(loc, answer, alloc, ValueRange{iv3});
+    rewriter.setInsertionPointAfter(ifOp2);
+    rewriter.setInsertionPointAfter(forOp3);
+
+    rewriter.setInsertionPointAfter(forOp1);
+    rewriter.setInsertionPointAfter(forOpiter);
+    // debug
+    //  forOp1->dump();
 
     rewriter.replaceOp(op, alloc);
-    
+
     return success();
   }
 };
@@ -2729,7 +3265,6 @@ struct LMSFilterOpLowering : public ConversionPattern {
 // ToyToAffine RewritePatterns: Threshold operations
 //===----------------------------------------------------------------------===//
 
-
 struct ThresholdOpLowering : public ConversionPattern {
   ThresholdOpLowering(MLIRContext *ctx)
       : ConversionPattern(dsp::ThresholdOp::getOperationName(), 1, ctx) {}
@@ -2738,207 +3273,227 @@ struct ThresholdOpLowering : public ConversionPattern {
   matchAndRewrite(Operation *op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const final {
     auto loc = op->getLoc();
-    
-    //Pseudo-code:
-      //  y[n] = a[n] , if a[i] >= threshld or, a[i] <= -threshld
-              //    = 0 , else
+
+    // Pseudo-code:
+    //   y[n] = a[n] , if a[i] >= threshld or, a[i] <= -threshld
+    //     = 0 , else
 
     DEBUG_PRINT_NO_ARGS();
 
-    //output for result type
-    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));  
-    
-    //allocation & deallocation for the result of this operation
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
     auto memRefType = convertTensorToMemRef(tensorType);
 
     auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
 
-    Value constant0 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(0));
+    Value constant0 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
 
+    // y[n] = a[n] , if a[i] >= threshld or, a[i] <= -threshld
+    // loop from 0 to len
 
-    //y[n] = a[n] , if a[i] >= threshld or, a[i] <= -threshld
-    //loop from 0 to len
-
-    //load from X, 
+    // load from X,
     ThresholdOpAdaptor thresholdAdaptor(operands);
     DEBUG_PRINT_NO_ARGS();
 
-    int64_t lb = 0 ;
-    int64_t ub = tensorType.getShape()[0];   
+    int64_t lb = 0;
+    int64_t ub = tensorType.getShape()[0];
     int64_t step = 1;
 
     DEBUG_PRINT_NO_ARGS();
-    
-    //for loop from 0 to len(Output)
-    affine::AffineForOp forOpY = rewriter.create<AffineForOp>(loc, lb, ub, step);
+
+    // for loop from 0 to len(Output)
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
     auto ivY = forOpY.getInductionVar();
     rewriter.setInsertionPointToStart(forOpY.getBody());
-     
-    Value inputX = rewriter.create<AffineLoadOp>(loc, thresholdAdaptor.getInput(), ivY );
-    
+
+    Value inputX =
+        rewriter.create<AffineLoadOp>(loc, thresholdAdaptor.getInput(), ivY);
+
     // Load the threshold value from the memref
     auto thresholdMemRef = thresholdAdaptor.getThreshld();
-    auto threshold = rewriter.create<AffineLoadOp>(loc, thresholdMemRef, ValueRange{});
+    auto threshold =
+        rewriter.create<AffineLoadOp>(loc, thresholdMemRef, ValueRange{});
 
     // Compare a[i] <= threshold
-    auto cmp1 = rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OLE, inputX, threshold);
-    
+    auto cmp1 = rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OLE,
+                                               inputX, threshold);
+
     // Compare a[i] >= -threshold
     auto negThreshold = rewriter.create<arith::NegFOp>(loc, threshold);
-    auto cmp2 = rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OGE, inputX, negThreshold);
+    auto cmp2 = rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OGE,
+                                               inputX, negThreshold);
 
     // Combine the comparisons using AND
     auto cmpAnd = rewriter.create<arith::AndIOp>(loc, cmp1, cmp2);
 
     // Use select to choose between 0 and a[i]
-    auto selectOp = rewriter.create<arith::SelectOp>(loc, cmpAnd, constant0, inputX);
+    auto selectOp =
+        rewriter.create<arith::SelectOp>(loc, cmpAnd, constant0, inputX);
 
     // Store the result
     rewriter.create<AffineStoreOp>(loc, selectOp, alloc, ivY);
 
     rewriter.setInsertionPointAfter(forOpY);
-    //debug
-    // forOpY->dump();
-      // affine.store %cst, %alloc_10[] : memref<f64>
-      // %0 = affine.load %alloc_11[4] : memref<10xf64>
-      // affine.store %0, %alloc[0] : memref<1xf64>
-    
+    // debug
+    //  forOpY->dump();
+    //  affine.store %cst, %alloc_10[] : memref<f64>
+    //  %0 = affine.load %alloc_11[4] : memref<10xf64>
+    //  affine.store %0, %alloc[0] : memref<1xf64>
+
     rewriter.replaceOp(op, alloc);
-    
+
     return success();
   }
 };
 
-
-
 //===----------------------------------------------------------------------===//
 // ToyToAffine RewritePatterns: HighPassFIRHammingOptimizedOp operations
 //===----------------------------------------------------------------------===//
 
 struct HighPassFIRHammingOptimizedOpLowering : public ConversionPattern {
   HighPassFIRHammingOptimizedOpLowering(MLIRContext *ctx)
-      : ConversionPattern(dsp::HighPassFIRHammingOptimizedOp::getOperationName(), 1, ctx) {}
+      : ConversionPattern(
+            dsp::HighPassFIRHammingOptimizedOp::getOperationName(), 1, ctx) {}
 
   LogicalResult
   matchAndRewrite(Operation *op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const final {
     auto loc = op->getLoc();
-    
-    //Pseudo-code:
-      // y_highFIRHamming[n] = -1 * [wc/pi * sinc(wc * (n- (N-1)/2))] * [0.54 - 0.46 cos(2 *pi * n/N-1)], 0<= n < (N-1)/2 : 
-           // = 1 - wc/pi , n = (N-1)/2
 
-           // and also, y_FIRHamming[N-1-n] = y[n] ie, store at n and also at N-1-n 
+    // Pseudo-code:
+    //  y_highFIRHamming[n] = -1 * [wc/pi * sinc(wc * (n- (N-1)/2))] * [0.54 -
+    //  0.46 cos(2 *pi * n/N-1)], 0<= n < (N-1)/2 : = 1 - wc/pi , n = (N-1)/2
 
-      // 1 loops : first from 0 <= n < (N-1)/2 - 1
-      //     
+    // and also, y_FIRHamming[N-1-n] = y[n] ie, store at n and also at N-1-n
 
-    //output for result type
-    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));  
-     
-    //allocation & deallocation for the result of this operation
+    // 1 loops : first from 0 <= n < (N-1)/2 - 1
+    //
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
     auto memRefType = convertTensorToMemRef(tensorType);
     auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
-    
-    //construct affine loops for the input
-    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/0);
-    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);    
-    
-
-    //first from 0 <= i < (N-1)/2 - 1
-    int64_t lb = 0 ;
-    int64_t N = tensorType.getShape()[0];   
-    int64_t ub = (N-1) / 2 ;
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
+
+    // first from 0 <= i < (N-1)/2 - 1
+    int64_t lb = 0;
+    int64_t N = tensorType.getShape()[0];
+    int64_t ub = (N - 1) / 2;
     int64_t step = 1;
 
     DEBUG_PRINT_NO_ARGS();
-    HighPassFIRHammingOptimizedOpAdaptor highPassFIRHammingOptimizedOpAdaptor(operands);
-    //Handle middle y[mid] = wc / pi
-    int64_t midIndx = ub ;
-    Value constantIndxMid = rewriter.create<arith::ConstantIndexOp>(loc, midIndx);
-    // rewriter.create<AffineStoreOp>(loc, constant0, alloc, ValueRange{constantIndx0});
-    Value wc = rewriter.create<AffineLoadOp>(loc, highPassFIRHammingOptimizedOpAdaptor.getWc(), ValueRange{});
-    Value constant1 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(1));
-    Value constantMinus1 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(-1));
-    Value constpi = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(3.14159265359));
+    HighPassFIRHammingOptimizedOpAdaptor highPassFIRHammingOptimizedOpAdaptor(
+        operands);
+    // Handle middle y[mid] = wc / pi
+    int64_t midIndx = ub;
+    Value constantIndxMid =
+        rewriter.create<arith::ConstantIndexOp>(loc, midIndx);
+    // rewriter.create<AffineStoreOp>(loc, constant0, alloc,
+    // ValueRange{constantIndx0});
+    Value wc = rewriter.create<AffineLoadOp>(
+        loc, highPassFIRHammingOptimizedOpAdaptor.getWc(), ValueRange{});
+    Value constant1 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1));
+    Value constantMinus1 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(-1));
+    Value constpi = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(3.14159265359));
     Value wcByPi = rewriter.create<arith::DivFOp>(loc, wc, constpi);
-    Value OneMinusWcByPi = rewriter.create<arith::SubFOp>(loc, constant1, wcByPi); 
-    rewriter.create<AffineStoreOp>(loc, OneMinusWcByPi, alloc, ValueRange{constantIndxMid});
-
-    //first from 0 <= i < (N-1)/2 - 1
-
-    //calculate i-(N-1)/2 
-
-    Value Nminus1By2 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr((float) ub));
-    
-    //calculate 0.54 - 0.46 cos(2 *pi * n/N-1)
-    Value constant0_54 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(0.54));
-    Value constant0_46 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(0.46));
-    Value const2pi = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(6.28318530718));
-    Value NMinus1 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr((float) N - 1));
-
-    affine::AffineForOp forOpY = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    Value OneMinusWcByPi =
+        rewriter.create<arith::SubFOp>(loc, constant1, wcByPi);
+    rewriter.create<AffineStoreOp>(loc, OneMinusWcByPi, alloc,
+                                   ValueRange{constantIndxMid});
+
+    // first from 0 <= i < (N-1)/2 - 1
+
+    // calculate i-(N-1)/2
+
+    Value Nminus1By2 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr((float)ub));
+
+    // calculate 0.54 - 0.46 cos(2 *pi * n/N-1)
+    Value constant0_54 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0.54));
+    Value constant0_46 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0.46));
+    Value const2pi = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(6.28318530718));
+    Value NMinus1 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr((float)N - 1));
+
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
     auto ivY = forOpY.getInductionVar();
     rewriter.setInsertionPointToStart(forOpY.getBody());
-    //convert index to f64
-    Value IndxY = rewriter.create<arith::IndexCastUIOp>(loc, rewriter.getIntegerType(32), ivY);
-    Value i = rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxY);
+    // convert index to f64
+    Value IndxY = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivY);
+    Value i =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxY);
 
-    //get sin(wc * (i - (N-1)/ 2))
+    // get sin(wc * (i - (N-1)/ 2))
     Value iMinusMid = rewriter.create<arith::SubFOp>(loc, i, Nminus1By2);
-    Value mulwc_iMinusMid = rewriter.create<arith::MulFOp>(loc, wc , iMinusMid);  
+    Value mulwc_iMinusMid = rewriter.create<arith::MulFOp>(loc, wc, iMinusMid);
 
     Value GetSin = rewriter.create<math::SinOp>(loc, mulwc_iMinusMid);
-        
+
     // sin(wc*(i-(N-1)/2)) / pi * (i-(N-1)/2)
 
-    Value piMuliMinusMid = rewriter.create<arith::MulFOp>(loc, constpi , iMinusMid);   
-    Value GetDiv = rewriter.create<arith::DivFOp>(loc, GetSin ,piMuliMinusMid) ;
+    Value piMuliMinusMid =
+        rewriter.create<arith::MulFOp>(loc, constpi, iMinusMid);
+    Value GetDiv = rewriter.create<arith::DivFOp>(loc, GetSin, piMuliMinusMid);
 
     // [sin(wc*(i-(N-1)/2)) / pi * (i-(N-1)/2)] * [0.54-0.46 cos(2*pi*i/N-1)
 
-    //get 2*pi * k / (N -1)
-    Value mul2pi_k = rewriter.create<arith::MulFOp>(loc, const2pi , i);  
-    Value divIndxByNMinus1 = rewriter.create<arith::DivFOp>(loc, mul2pi_k, NMinus1 )  ;     
+    // get 2*pi * k / (N -1)
+    Value mul2pi_k = rewriter.create<arith::MulFOp>(loc, const2pi, i);
+    Value divIndxByNMinus1 =
+        rewriter.create<arith::DivFOp>(loc, mul2pi_k, NMinus1);
 
     // get cos(2*pi * k/(N-1)
     Value GetCos = rewriter.create<math::CosOp>(loc, divIndxByNMinus1);
-    Value MulCos0_46 = rewriter.create<arith::MulFOp>(loc, constant0_46 , GetCos);   
-    Value Sub0_54_Cos = rewriter.create<arith::SubFOp>(loc, constant0_54 ,MulCos0_46) ;
-
-    //Multiply Sub0_54_Cos and GetDiv -- sin(wc*(i-(N-1)/2)) / pi * (i-(N-1)/2)
-    Value MulFilterHamming = rewriter.create<arith::MulFOp>(loc, GetDiv , Sub0_54_Cos);
-    Value MulByMinus1 = rewriter.create<arith::MulFOp>(loc, constantMinus1 ,MulFilterHamming) ;
-    rewriter.create<AffineStoreOp>(loc, MulByMinus1, alloc, ValueRange{ivY}); 
-    
-    //also , store same value at N-1-i using affine-Map
-    //For affine expression: #map1 = affine_map<(%arg0)[N] : (N - 1 -%arg0)
+    Value MulCos0_46 =
+        rewriter.create<arith::MulFOp>(loc, constant0_46, GetCos);
+    Value Sub0_54_Cos =
+        rewriter.create<arith::SubFOp>(loc, constant0_54, MulCos0_46);
+
+    // Multiply Sub0_54_Cos and GetDiv -- sin(wc*(i-(N-1)/2)) / pi * (i-(N-1)/2)
+    Value MulFilterHamming =
+        rewriter.create<arith::MulFOp>(loc, GetDiv, Sub0_54_Cos);
+    Value MulByMinus1 =
+        rewriter.create<arith::MulFOp>(loc, constantMinus1, MulFilterHamming);
+    rewriter.create<AffineStoreOp>(loc, MulByMinus1, alloc, ValueRange{ivY});
+
+    // also , store same value at N-1-i using affine-Map
+    // For affine expression: #map1 = affine_map<(%arg0)[N] : (N - 1 -%arg0)
     AffineExpr d0, s0;
     bindDims(rewriter.getContext(), d0);
     bindSymbols(rewriter.getContext(), s0);
-    //calulate N - 1 - i
-    AffineExpr ExprForNMinus1minusI = s0 - d0 ;
-    AffineMap addMapForNMinus1minusI = AffineMap::get(1, 1, ExprForNMinus1minusI);
-
-    //store at N-1-i index , result
-    Value constantNMinus1Indx = rewriter.create<arith::ConstantIndexOp>(loc, N -1);
-    rewriter.create<AffineStoreOp>(loc, MulByMinus1, alloc, addMapForNMinus1minusI, 
-                  ValueRange{ivY,constantNMinus1Indx});
+    // calulate N - 1 - i
+    AffineExpr ExprForNMinus1minusI = s0 - d0;
+    AffineMap addMapForNMinus1minusI =
+        AffineMap::get(1, 1, ExprForNMinus1minusI);
+
+    // store at N-1-i index , result
+    Value constantNMinus1Indx =
+        rewriter.create<arith::ConstantIndexOp>(loc, N - 1);
+    rewriter.create<AffineStoreOp>(loc, MulByMinus1, alloc,
+                                   addMapForNMinus1minusI,
+                                   ValueRange{ivY, constantNMinus1Indx});
     rewriter.setInsertionPointAfter(forOpY);
 
-    //debug
-    // forOpX->dump();
-    // forOpY->dump();
-
+    // debug
+    //  forOpX->dump();
+    //  forOpY->dump();
 
     // affine.for %arg0 = 0 to 3 {
     //   %12 = arith.index_castui %arg0 : index to i32
@@ -2959,11 +3514,10 @@ struct HighPassFIRHammingOptimizedOpLowering : public ConversionPattern {
     //   affine.store %25, %alloc[-%arg0 + 6] : memref<7xf64>
     // }
 
-
-        // }
-        // }
+    // }
+    // }
     rewriter.replaceOp(op, alloc);
-      
+
     return success();
   }
 };
@@ -2974,203 +3528,222 @@ struct HighPassFIRHammingOptimizedOpLowering : public ConversionPattern {
 
 struct FIRFilterHammingOptimizedOpLowering : public ConversionPattern {
   FIRFilterHammingOptimizedOpLowering(MLIRContext *ctx)
-      : ConversionPattern(dsp::FIRFilterHammingOptimizedOp::getOperationName(), 1, ctx) {}
+      : ConversionPattern(dsp::FIRFilterHammingOptimizedOp::getOperationName(),
+                          1, ctx) {}
 
   LogicalResult
   matchAndRewrite(Operation *op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const final {
     auto loc = op->getLoc();
-    
-    //Pseudo-code:
-      //  y_FIRHamming[n] = [wc/pi * sinc(wc * (n- (N-1)/2))] * [0.54 - 0.46 cos(2 *pi * n/N-1)], 0<= n < (N-1)/2 : 
-         // = wc/pi * 1 , n = (N-1)/2
 
-         // and also, y_FIRHamming[N-1-n] = y[n] ie, store at n and also at N-1-n 
+    // Pseudo-code:
+    //   y_FIRHamming[n] = [wc/pi * sinc(wc * (n- (N-1)/2))] * [0.54 - 0.46
+    //   cos(2 *pi * n/N-1)], 0<= n < (N-1)/2 :
+    //  = wc/pi * 1 , n = (N-1)/2
+
+    // and also, y_FIRHamming[N-1-n] = y[n] ie, store at n and also at N-1-n
 
-      // 1 loops : first from 0 <= n < (N-1)/2 - 1
-      //     
+    // 1 loops : first from 0 <= n < (N-1)/2 - 1
+    //
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
 
-    //output for result type
-    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));  
-     
-    //allocation & deallocation for the result of this operation
+    // allocation & deallocation for the result of this operation
     auto memRefType = convertTensorToMemRef(tensorType);
     auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
-    
-    //construct affine loops for the input
-    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/0);
-    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);    
-    
-
-    //first from 0 <= i < (N-1)/2 - 1
-    int64_t lb = 0 ;
-    int64_t N = tensorType.getShape()[0];   
-    int64_t ub = (N-1) / 2 ;
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
+
+    // first from 0 <= i < (N-1)/2 - 1
+    int64_t lb = 0;
+    int64_t N = tensorType.getShape()[0];
+    int64_t ub = (N - 1) / 2;
     int64_t step = 1;
 
     DEBUG_PRINT_NO_ARGS();
-    FIRFilterHammingOptimizedOpAdaptor firFilterHammingOptimizedOpAdaptor(operands);
-    //Handle middle y[mid] = wc / pi
-    int64_t midIndx = ub ;
-    Value constantIndxMid = rewriter.create<arith::ConstantIndexOp>(loc, midIndx);
-    // rewriter.create<AffineStoreOp>(loc, constant0, alloc, ValueRange{constantIndx0});
-    Value wc = rewriter.create<AffineLoadOp>(loc, firFilterHammingOptimizedOpAdaptor.getWc(), ValueRange{});
-
-    Value constpi = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(3.14159265359));
+    FIRFilterHammingOptimizedOpAdaptor firFilterHammingOptimizedOpAdaptor(
+        operands);
+    // Handle middle y[mid] = wc / pi
+    int64_t midIndx = ub;
+    Value constantIndxMid =
+        rewriter.create<arith::ConstantIndexOp>(loc, midIndx);
+    // rewriter.create<AffineStoreOp>(loc, constant0, alloc,
+    // ValueRange{constantIndx0});
+    Value wc = rewriter.create<AffineLoadOp>(
+        loc, firFilterHammingOptimizedOpAdaptor.getWc(), ValueRange{});
+
+    Value constpi = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(3.14159265359));
     Value wcByPi = rewriter.create<arith::DivFOp>(loc, wc, constpi);
 
-    rewriter.create<AffineStoreOp>(loc, wcByPi, alloc, ValueRange{constantIndxMid});
+    rewriter.create<AffineStoreOp>(loc, wcByPi, alloc,
+                                   ValueRange{constantIndxMid});
+
+    // first from 0 <= i < (N-1)/2 - 1
 
-    //first from 0 <= i < (N-1)/2 - 1
+    // calculate i-(N-1)/2
 
-    //calculate i-(N-1)/2 
+    Value Nminus1By2 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr((float)ub));
 
-    Value Nminus1By2 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr((float) ub));
-    
-    //calculate 0.54 - 0.46 cos(2 *pi * n/N-1)
-    Value constant0_54 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(0.54));
-    Value constant0_46 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(0.46));
-    Value const2pi = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(6.28318530718));
-    Value NMinus1 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr((float) N - 1));
+    // calculate 0.54 - 0.46 cos(2 *pi * n/N-1)
+    Value constant0_54 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0.54));
+    Value constant0_46 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0.46));
+    Value const2pi = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(6.28318530718));
+    Value NMinus1 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr((float)N - 1));
 
-    affine::AffineForOp forOpY = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
     auto ivY = forOpY.getInductionVar();
     rewriter.setInsertionPointToStart(forOpY.getBody());
-    //convert index to f64
-    Value IndxY = rewriter.create<arith::IndexCastUIOp>(loc, rewriter.getIntegerType(32), ivY);
-    Value i = rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxY);
+    // convert index to f64
+    Value IndxY = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivY);
+    Value i =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxY);
 
-    //get sin(wc * (i - (N-1)/ 2))
+    // get sin(wc * (i - (N-1)/ 2))
     Value iMinusMid = rewriter.create<arith::SubFOp>(loc, i, Nminus1By2);
-    Value mulwc_iMinusMid = rewriter.create<arith::MulFOp>(loc, wc , iMinusMid);  
+    Value mulwc_iMinusMid = rewriter.create<arith::MulFOp>(loc, wc, iMinusMid);
 
     Value GetSin = rewriter.create<math::SinOp>(loc, mulwc_iMinusMid);
-        
+
     // sin(wc*(i-(N-1)/2)) / pi * (i-(N-1)/2)
 
-    Value piMuliMinusMid = rewriter.create<arith::MulFOp>(loc, constpi , iMinusMid);   
-    Value GetDiv = rewriter.create<arith::DivFOp>(loc, GetSin ,piMuliMinusMid) ;
+    Value piMuliMinusMid =
+        rewriter.create<arith::MulFOp>(loc, constpi, iMinusMid);
+    Value GetDiv = rewriter.create<arith::DivFOp>(loc, GetSin, piMuliMinusMid);
 
     // [sin(wc*(i-(N-1)/2)) / pi * (i-(N-1)/2)] * [0.54-0.46 cos(2*pi*i/N-1)
 
-    //get 2*pi * k / (N -1)
-    Value mul2pi_k = rewriter.create<arith::MulFOp>(loc, const2pi , i);  
-    Value divIndxByNMinus1 = rewriter.create<arith::DivFOp>(loc, mul2pi_k, NMinus1 )  ;     
+    // get 2*pi * k / (N -1)
+    Value mul2pi_k = rewriter.create<arith::MulFOp>(loc, const2pi, i);
+    Value divIndxByNMinus1 =
+        rewriter.create<arith::DivFOp>(loc, mul2pi_k, NMinus1);
 
     // get cos(2*pi * k/(N-1)
     Value GetCos = rewriter.create<math::CosOp>(loc, divIndxByNMinus1);
-    Value MulCos0_46 = rewriter.create<arith::MulFOp>(loc, constant0_46 , GetCos);   
-    Value Sub0_54_Cos = rewriter.create<arith::SubFOp>(loc, constant0_54 ,MulCos0_46) ;
-
-    //Multiply Sub0_54_Cos and GetDiv -- sin(wc*(i-(N-1)/2)) / pi * (i-(N-1)/2)
-    Value MulFilterHamming = rewriter.create<arith::MulFOp>(loc, GetDiv , Sub0_54_Cos);
-    rewriter.create<AffineStoreOp>(loc, MulFilterHamming, alloc, ValueRange{ivY}); 
-    
-    //also , store same value at N-1-i using affine-Map
-    //For affine expression: #map1 = affine_map<(%arg0)[N] : (N - 1 -%arg0)
+    Value MulCos0_46 =
+        rewriter.create<arith::MulFOp>(loc, constant0_46, GetCos);
+    Value Sub0_54_Cos =
+        rewriter.create<arith::SubFOp>(loc, constant0_54, MulCos0_46);
+
+    // Multiply Sub0_54_Cos and GetDiv -- sin(wc*(i-(N-1)/2)) / pi * (i-(N-1)/2)
+    Value MulFilterHamming =
+        rewriter.create<arith::MulFOp>(loc, GetDiv, Sub0_54_Cos);
+    rewriter.create<AffineStoreOp>(loc, MulFilterHamming, alloc,
+                                   ValueRange{ivY});
+
+    // also , store same value at N-1-i using affine-Map
+    // For affine expression: #map1 = affine_map<(%arg0)[N] : (N - 1 -%arg0)
     AffineExpr d0, s0;
     bindDims(rewriter.getContext(), d0);
     bindSymbols(rewriter.getContext(), s0);
-    //calulate N - 1 - i
-    AffineExpr ExprForNMinus1minusI = s0 - d0 ;
-    AffineMap addMapForNMinus1minusI = AffineMap::get(1, 1, ExprForNMinus1minusI);
-
-    //store at N-1-i index , result
-    Value constantNMinus1Indx = rewriter.create<arith::ConstantIndexOp>(loc, N -1);
-    rewriter.create<AffineStoreOp>(loc, MulFilterHamming, alloc, addMapForNMinus1minusI, 
-                  ValueRange{ivY,constantNMinus1Indx});
+    // calulate N - 1 - i
+    AffineExpr ExprForNMinus1minusI = s0 - d0;
+    AffineMap addMapForNMinus1minusI =
+        AffineMap::get(1, 1, ExprForNMinus1minusI);
+
+    // store at N-1-i index , result
+    Value constantNMinus1Indx =
+        rewriter.create<arith::ConstantIndexOp>(loc, N - 1);
+    rewriter.create<AffineStoreOp>(loc, MulFilterHamming, alloc,
+                                   addMapForNMinus1minusI,
+                                   ValueRange{ivY, constantNMinus1Indx});
     rewriter.setInsertionPointAfter(forOpY);
 
-    //debug
-    // forOpX->dump();
-    // forOpY->dump();
-
+    // debug
+    //  forOpX->dump();
+    //  forOpY->dump();
+
+    // %cst = arith.constant 6.2831853071800001 : f64
+    // %cst_0 = arith.constant 4.600000e-01 : f64
+    // %cst_1 = arith.constant 5.400000e-01 : f64
+    // %cst_2 = arith.constant 4.000000e+00 : f64
+    // %alloc = memref.alloc() : memref<4xf64>
+    // %alloc_3 = memref.alloc() : memref<f64>
+    // affine.store %cst_2, %alloc_3[] : memref<f64>
+    // affine.for %arg0 = 0 to 4 {
+    //   %0 = arith.index_castui %arg0 : index to i32
+    //   %1 = arith.uitofp %0 : i32 to f64
+    //   %2 = arith.mulf %1, %cst : f64
+    //   %3 = arith.divf %2, %cst_2 : f64
+    //   %4 = math.cos %3 : f64
+    //   %5 = arith.mulf %4, %cst_0 : f64
+    //   %6 = arith.subf %cst_1, %5 : f64
+    //   affine.store %6, %alloc[%arg0] : memref<4xf64>
+    // }
 
-        // %cst = arith.constant 6.2831853071800001 : f64
-        // %cst_0 = arith.constant 4.600000e-01 : f64
-        // %cst_1 = arith.constant 5.400000e-01 : f64
-        // %cst_2 = arith.constant 4.000000e+00 : f64
-        // %alloc = memref.alloc() : memref<4xf64>
-        // %alloc_3 = memref.alloc() : memref<f64>
-        // affine.store %cst_2, %alloc_3[] : memref<f64>
-        // affine.for %arg0 = 0 to 4 {
-        //   %0 = arith.index_castui %arg0 : index to i32
-        //   %1 = arith.uitofp %0 : i32 to f64
-        //   %2 = arith.mulf %1, %cst : f64
-        //   %3 = arith.divf %2, %cst_2 : f64
-        //   %4 = math.cos %3 : f64
-        //   %5 = arith.mulf %4, %cst_0 : f64
-        //   %6 = arith.subf %cst_1, %5 : f64
-        //   affine.store %6, %alloc[%arg0] : memref<4xf64>
-        // }
-
-
-        // }
-        // }
+    // }
+    // }
     rewriter.replaceOp(op, alloc);
-      
+
     return success();
   }
 };
 
-
 //===----------------------------------------------------------------------===//
 // ToyToAffine RewritePatterns: GetRangeOfVectorOp operations
 //===----------------------------------------------------------------------===//
 
 struct GetRangeOfVectorOpLowering : public ConversionPattern {
   GetRangeOfVectorOpLowering(MLIRContext *ctx)
-      : ConversionPattern(dsp::GetRangeOfVectorOp::getOperationName(), 1, ctx) {}
+      : ConversionPattern(dsp::GetRangeOfVectorOp::getOperationName(), 1, ctx) {
+  }
 
   LogicalResult
   matchAndRewrite(Operation *op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const final {
     auto loc = op->getLoc();
-    
-    //Pseudo-code:
-      //  y[0] = first: 
-      //  y[i] = y[i-1] + step for  1<=i<N
-      //  
-      //Alt:  y[0] = first , prev_val = first
-       //  for i =1 to N 
-      //    y[i] = prev_val 
-      //    prev_val = prev_val + step
-
-
-    //output for result type
-    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));  
-     
-    //allocation & deallocation for the result of this operation
+
+    // Pseudo-code:
+    //   y[0] = first:
+    //   y[i] = y[i-1] + step for  1<=i<N
+    //
+    // Alt:  y[0] = first , prev_val = first
+    //   for i =1 to N
+    //    y[i] = prev_val
+    //    prev_val = prev_val + step
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
     auto memRefType = convertTensorToMemRef(tensorType);
     auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
-    
-    //construct affine loops for the input
-    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/0);
-    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);    
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
     GetRangeOfVectorOpAdaptor getRangeOfVectorOpOpAdaptor(operands);
 
     Value GetValueAtIndx2ndArg = op->getOperand(0);
-    dsp::ConstantOp constantOp2ndArg = GetValueAtIndx2ndArg.getDefiningOp<dsp::ConstantOp>();
-    DenseElementsAttr constantRhsValue = constantOp2ndArg.getValue();;
+    dsp::ConstantOp constantOp2ndArg =
+        GetValueAtIndx2ndArg.getDefiningOp<dsp::ConstantOp>();
+    DenseElementsAttr constantRhsValue = constantOp2ndArg.getValue();
+    ;
     auto elements = constantRhsValue.getValues<FloatAttr>();
     float FirstValue = elements[0].getValueAsDouble();
-    
-    DEBUG_PRINT_WITH_ARGS("FirstValue is" , FirstValue);
+
+    DEBUG_PRINT_WITH_ARGS("FirstValue is", FirstValue);
     Value GetStepOp = op->getOperand(2);
-    dsp::ConstantOp constantOp3rdArg = GetStepOp.getDefiningOp<dsp::ConstantOp>();
-    DenseElementsAttr constant3rdValue = constantOp3rdArg.getValue();;
+    dsp::ConstantOp constantOp3rdArg =
+        GetStepOp.getDefiningOp<dsp::ConstantOp>();
+    DenseElementsAttr constant3rdValue = constantOp3rdArg.getValue();
+    ;
     auto elements1 = constant3rdValue.getValues<FloatAttr>();
     float StepValue = elements1[0].getValueAsDouble();
 
-    //first from 1 <= i < N
-    int64_t lb = 1 ;
-    int64_t ub = tensorType.getShape()[0];   
+    // first from 1 <= i < N
+    int64_t lb = 1;
+    int64_t ub = tensorType.getShape()[0];
     // int64_t ub = (N-1) / 2 ;
     int64_t step = 1;
 
@@ -3179,58 +3752,58 @@ struct GetRangeOfVectorOpLowering : public ConversionPattern {
     float valAtIndxI = FirstValue;
 
     Value constantIndx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
-    Value constantFirst = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(valAtIndxI));
-    Value constantStep = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(StepValue));
+    Value constantFirst = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(valAtIndxI));
+    Value constantStep = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(StepValue));
 
-    rewriter.create<AffineStoreOp>(loc, constantFirst, alloc, ValueRange{constantIndx0});
+    rewriter.create<AffineStoreOp>(loc, constantFirst, alloc,
+                                   ValueRange{constantIndx0});
 
-    //loop from 1 <= i < N
+    // loop from 1 <= i < N
 
-    affine::AffineForOp forOpY = rewriter.create<AffineForOp>(loc, lb, ub, step, ValueRange{constantFirst});
+    affine::AffineForOp forOpY = rewriter.create<AffineForOp>(
+        loc, lb, ub, step, ValueRange{constantFirst});
     auto ivY = forOpY.getInductionVar();
     rewriter.setInsertionPointToStart(forOpY.getBody());
-    //Use iter_arg for taking prev_val 
-    //Get iter_arg 
-    auto getIterArg =  forOpY.getBody()->getArgument(1);
+    // Use iter_arg for taking prev_val
+    // Get iter_arg
+    auto getIterArg = forOpY.getBody()->getArgument(1);
     // getIterArg.dump();
 
-    Value sumNext = rewriter.create<arith::AddFOp>(loc, getIterArg,constantStep );
-    rewriter.create<AffineStoreOp>(loc, sumNext, alloc, ValueRange{ivY}); 
+    Value sumNext =
+        rewriter.create<arith::AddFOp>(loc, getIterArg, constantStep);
+    rewriter.create<AffineStoreOp>(loc, sumNext, alloc, ValueRange{ivY});
     // llvm::errs() << "LINE " << __LINE__ << " file= " << __FILE__ << "\n" ;
-    rewriter.create<AffineYieldOp>(loc, ValueRange{sumNext} );
+    rewriter.create<AffineYieldOp>(loc, ValueRange{sumNext});
     rewriter.setInsertionPointAfter(forOpY);
 
+    // debug
+    //  forOpX->dump();
+    //  forOpY->dump();
+
+    // %cst = arith.constant 6.2831853071800001 : f64
+    // %cst_0 = arith.constant 4.600000e-01 : f64
+    // %cst_1 = arith.constant 5.400000e-01 : f64
+    // %cst_2 = arith.constant 4.000000e+00 : f64
+    // %alloc = memref.alloc() : memref<4xf64>
+    // %alloc_3 = memref.alloc() : memref<f64>
+    // affine.store %cst_2, %alloc_3[] : memref<f64>
+    // affine.for %arg0 = 0 to 4 {
+    //   %0 = arith.index_castui %arg0 : index to i32
+    //   %1 = arith.uitofp %0 : i32 to f64
+    //   %2 = arith.mulf %1, %cst : f64
+    //   %3 = arith.divf %2, %cst_2 : f64
+    //   %4 = math.cos %3 : f64
+    //   %5 = arith.mulf %4, %cst_0 : f64
+    //   %6 = arith.subf %cst_1, %5 : f64
+    //   affine.store %6, %alloc[%arg0] : memref<4xf64>
+    // }
 
-    //debug
-    // forOpX->dump();
-    // forOpY->dump();
-
-
-        // %cst = arith.constant 6.2831853071800001 : f64
-        // %cst_0 = arith.constant 4.600000e-01 : f64
-        // %cst_1 = arith.constant 5.400000e-01 : f64
-        // %cst_2 = arith.constant 4.000000e+00 : f64
-        // %alloc = memref.alloc() : memref<4xf64>
-        // %alloc_3 = memref.alloc() : memref<f64>
-        // affine.store %cst_2, %alloc_3[] : memref<f64>
-        // affine.for %arg0 = 0 to 4 {
-        //   %0 = arith.index_castui %arg0 : index to i32
-        //   %1 = arith.uitofp %0 : i32 to f64
-        //   %2 = arith.mulf %1, %cst : f64
-        //   %3 = arith.divf %2, %cst_2 : f64
-        //   %4 = math.cos %3 : f64
-        //   %5 = arith.mulf %4, %cst_0 : f64
-        //   %6 = arith.subf %cst_1, %5 : f64
-        //   affine.store %6, %alloc[%arg0] : memref<4xf64>
-        // }
-
-
-        // }
-        // }
+    // }
+    // }
     rewriter.replaceOp(op, alloc);
-      
+
     return success();
   }
 };
@@ -3241,283 +3814,309 @@ struct GetRangeOfVectorOpLowering : public ConversionPattern {
 
 struct HighPassFIRFilterOpLowering : public ConversionPattern {
   HighPassFIRFilterOpLowering(MLIRContext *ctx)
-      : ConversionPattern(dsp::HighPassFIRFilterOp::getOperationName(), 1, ctx) {}
+      : ConversionPattern(dsp::HighPassFIRFilterOp::getOperationName(), 1,
+                          ctx) {}
 
   LogicalResult
   matchAndRewrite(Operation *op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const final {
     auto loc = op->getLoc();
-    
-    //Pseudo-code:
-      //  y_lpf[n] = wc/pi * sinc(wc * (n- (N-1)/2)) , n!= (N-1)/2 : 
-      //           = wc/pi , n = (N-1)/2
-     //  y_hpf[n] = dirac(n- (N-1)/2) - y_lpf[n] = -1 * wc/pi * sinc(wc * (n- (N-1)/2)) , n!= (N-1)/2 : 
-      //           = 1 - wc/pi , n = (N-1)/2
-
-      // 2 loops : first from 0 <= n <= (N-1)/2 - 1
-      //      2nd from (N-1)/2 +1 <= n < N
-
-    //output for result type
-    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));  
-     
-    //allocation & deallocation for the result of this operation
+
+    // Pseudo-code:
+    //   y_lpf[n] = wc/pi * sinc(wc * (n- (N-1)/2)) , n!= (N-1)/2 :
+    //            = wc/pi , n = (N-1)/2
+    //  y_hpf[n] = dirac(n- (N-1)/2) - y_lpf[n] = -1 * wc/pi * sinc(wc * (n-
+    //  (N-1)/2)) , n!= (N-1)/2 :
+    //           = 1 - wc/pi , n = (N-1)/2
+
+    // 2 loops : first from 0 <= n <= (N-1)/2 - 1
+    //      2nd from (N-1)/2 +1 <= n < N
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
     auto memRefType = convertTensorToMemRef(tensorType);
     auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
-    
-    //construct affine loops for the input
-    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/0);
-    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);    
-    
-
-    //first from 0 <= i <= (N-1)/2 - 1
-    int64_t lb = 0 ;
-    int64_t N = tensorType.getShape()[0];   
-    int64_t ub = (N-1) / 2 ;
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
+
+    // first from 0 <= i <= (N-1)/2 - 1
+    int64_t lb = 0;
+    int64_t N = tensorType.getShape()[0];
+    int64_t ub = (N - 1) / 2;
     int64_t step = 1;
 
     DEBUG_PRINT_NO_ARGS();
     HighPassFIRFilterOpAdaptor highPassfirFilterOpAdaptor(operands);
-    //Handle middle y[mid] = wc / pi
-    int64_t midIndx = ub ;
-    Value constantIndxMid = rewriter.create<arith::ConstantIndexOp>(loc, midIndx);
-    // rewriter.create<AffineStoreOp>(loc, constant0, alloc, ValueRange{constantIndx0});
-    Value constant1 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(1));
-    Value constantMinus1 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(-1));
-
-    Value wc = rewriter.create<AffineLoadOp>(loc, highPassfirFilterOpAdaptor.getWc(), ValueRange{});
-
-    Value constpi = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(3.14159265359));
+    // Handle middle y[mid] = wc / pi
+    int64_t midIndx = ub;
+    Value constantIndxMid =
+        rewriter.create<arith::ConstantIndexOp>(loc, midIndx);
+    // rewriter.create<AffineStoreOp>(loc, constant0, alloc,
+    // ValueRange{constantIndx0});
+    Value constant1 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1));
+    Value constantMinus1 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(-1));
+
+    Value wc = rewriter.create<AffineLoadOp>(
+        loc, highPassfirFilterOpAdaptor.getWc(), ValueRange{});
+
+    Value constpi = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(3.14159265359));
     Value wcByPi = rewriter.create<arith::DivFOp>(loc, wc, constpi);
-    Value OneMinusWcByPi = rewriter.create<arith::SubFOp>(loc, constant1, wcByPi);
-    rewriter.create<AffineStoreOp>(loc, OneMinusWcByPi, alloc, ValueRange{constantIndxMid});
-
-    //first from 0 <= i <= (N-1)/2 - 1
-
-    //calculate i-(N-1)/2 
-    Value Nminus1By2 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr((float) ub));
-    affine::AffineForOp forOpY = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    Value OneMinusWcByPi =
+        rewriter.create<arith::SubFOp>(loc, constant1, wcByPi);
+    rewriter.create<AffineStoreOp>(loc, OneMinusWcByPi, alloc,
+                                   ValueRange{constantIndxMid});
+
+    // first from 0 <= i <= (N-1)/2 - 1
+
+    // calculate i-(N-1)/2
+    Value Nminus1By2 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr((float)ub));
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
     auto ivY = forOpY.getInductionVar();
     rewriter.setInsertionPointToStart(forOpY.getBody());
-    //convert index to f64
-    Value IndxY = rewriter.create<arith::IndexCastUIOp>(loc, rewriter.getIntegerType(32), ivY);
-    Value i = rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxY);
+    // convert index to f64
+    Value IndxY = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivY);
+    Value i =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxY);
 
-    //get sin(wc * (i - (N-1)/ 2))
+    // get sin(wc * (i - (N-1)/ 2))
     Value iMinusMid = rewriter.create<arith::SubFOp>(loc, i, Nminus1By2);
-    Value mulwc_iMinusMid = rewriter.create<arith::MulFOp>(loc, wc , iMinusMid);  
+    Value mulwc_iMinusMid = rewriter.create<arith::MulFOp>(loc, wc, iMinusMid);
 
     Value GetSin = rewriter.create<math::SinOp>(loc, mulwc_iMinusMid);
-        
+
     // get sin(wc*i) / pi * i
 
-    Value piMuliMinusMid = rewriter.create<arith::MulFOp>(loc, constpi , iMinusMid);   
-    Value GetDiv = rewriter.create<arith::DivFOp>(loc, GetSin ,piMuliMinusMid) ;
-    Value MulByMinus1 = rewriter.create<arith::MulFOp>(loc, constantMinus1 ,GetDiv) ;
-    rewriter.create<AffineStoreOp>(loc, MulByMinus1, alloc, ValueRange{ivY}); 
+    Value piMuliMinusMid =
+        rewriter.create<arith::MulFOp>(loc, constpi, iMinusMid);
+    Value GetDiv = rewriter.create<arith::DivFOp>(loc, GetSin, piMuliMinusMid);
+    Value MulByMinus1 =
+        rewriter.create<arith::MulFOp>(loc, constantMinus1, GetDiv);
+    rewriter.create<AffineStoreOp>(loc, MulByMinus1, alloc, ValueRange{ivY});
     // llvm::errs() << "LINE " << __LINE__ << " file= " << __FILE__ << "\n" ;
     rewriter.setInsertionPointAfter(forOpY);
 
-    //2nd loop from (N-1)/2 + 1 <= i < N
-    lb = ub + 1 ; 
-    ub = N ;
+    // 2nd loop from (N-1)/2 + 1 <= i < N
+    lb = ub + 1;
+    ub = N;
 
-    affine::AffineForOp forOp1 = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
     auto iv1 = forOp1.getInductionVar();
     rewriter.setInsertionPointToStart(forOp1.getBody());
-    //convert index to f64
-    Value Indx1 = rewriter.create<arith::IndexCastUIOp>(loc, rewriter.getIntegerType(32), iv1);
-    Value i1 = rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), Indx1);
+    // convert index to f64
+    Value Indx1 = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), iv1);
+    Value i1 =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), Indx1);
 
-    //get sin(wc * (i1 - (N-1)/ 2))
+    // get sin(wc * (i1 - (N-1)/ 2))
     Value iMinusMid1 = rewriter.create<arith::SubFOp>(loc, i1, Nminus1By2);
-    Value mulwc_iMinusMid1 = rewriter.create<arith::MulFOp>(loc, wc , iMinusMid1);  
+    Value mulwc_iMinusMid1 =
+        rewriter.create<arith::MulFOp>(loc, wc, iMinusMid1);
     Value GetSin1 = rewriter.create<math::SinOp>(loc, mulwc_iMinusMid1);
 
-    //get sin(i1 - (N-1)/ 2) / (i1 - (N-1)/ 2) * pi 
-    // get sin(wc*i1) / pi * i1
+    // get sin(i1 - (N-1)/ 2) / (i1 - (N-1)/ 2) * pi
+    //  get sin(wc*i1) / pi * i1
 
-    Value piMuliMinusMid1 = rewriter.create<arith::MulFOp>(loc, constpi , iMinusMid1);   
-    Value GetDiv1 = rewriter.create<arith::DivFOp>(loc, GetSin1 ,piMuliMinusMid1) ;
+    Value piMuliMinusMid1 =
+        rewriter.create<arith::MulFOp>(loc, constpi, iMinusMid1);
+    Value GetDiv1 =
+        rewriter.create<arith::DivFOp>(loc, GetSin1, piMuliMinusMid1);
 
-    Value GetDiv1MulNeg1 = rewriter.create<arith::MulFOp>(loc, constantMinus1 ,GetDiv1) ;
+    Value GetDiv1MulNeg1 =
+        rewriter.create<arith::MulFOp>(loc, constantMinus1, GetDiv1);
 
-    rewriter.create<AffineStoreOp>(loc, GetDiv1MulNeg1, alloc, ValueRange{iv1}); 
+    rewriter.create<AffineStoreOp>(loc, GetDiv1MulNeg1, alloc, ValueRange{iv1});
     // llvm::errs() << "LINE " << __LINE__ << " file= " << __FILE__ << "\n" ;
     rewriter.setInsertionPointAfter(forOp1);
 
-    //debug
-    // forOpX->dump();
-    // forOpY->dump();
-
+    // debug
+    //  forOpX->dump();
+    //  forOpY->dump();
+
+    // %cst = arith.constant 6.2831853071800001 : f64
+    // %cst_0 = arith.constant 4.600000e-01 : f64
+    // %cst_1 = arith.constant 5.400000e-01 : f64
+    // %cst_2 = arith.constant 4.000000e+00 : f64
+    // %alloc = memref.alloc() : memref<4xf64>
+    // %alloc_3 = memref.alloc() : memref<f64>
+    // affine.store %cst_2, %alloc_3[] : memref<f64>
+    // affine.for %arg0 = 0 to 4 {
+    //   %0 = arith.index_castui %arg0 : index to i32
+    //   %1 = arith.uitofp %0 : i32 to f64
+    //   %2 = arith.mulf %1, %cst : f64
+    //   %3 = arith.divf %2, %cst_2 : f64
+    //   %4 = math.cos %3 : f64
+    //   %5 = arith.mulf %4, %cst_0 : f64
+    //   %6 = arith.subf %cst_1, %5 : f64
+    //   affine.store %6, %alloc[%arg0] : memref<4xf64>
+    // }
 
-        // %cst = arith.constant 6.2831853071800001 : f64
-        // %cst_0 = arith.constant 4.600000e-01 : f64
-        // %cst_1 = arith.constant 5.400000e-01 : f64
-        // %cst_2 = arith.constant 4.000000e+00 : f64
-        // %alloc = memref.alloc() : memref<4xf64>
-        // %alloc_3 = memref.alloc() : memref<f64>
-        // affine.store %cst_2, %alloc_3[] : memref<f64>
-        // affine.for %arg0 = 0 to 4 {
-        //   %0 = arith.index_castui %arg0 : index to i32
-        //   %1 = arith.uitofp %0 : i32 to f64
-        //   %2 = arith.mulf %1, %cst : f64
-        //   %3 = arith.divf %2, %cst_2 : f64
-        //   %4 = math.cos %3 : f64
-        //   %5 = arith.mulf %4, %cst_0 : f64
-        //   %6 = arith.subf %cst_1, %5 : f64
-        //   affine.store %6, %alloc[%arg0] : memref<4xf64>
-        // }
-
-
-        // }
-        // }
+    // }
+    // }
     rewriter.replaceOp(op, alloc);
-      
+
     return success();
   }
 };
 
-
 //===----------------------------------------------------------------------===//
 // ToyToAffine RewritePatterns: LowPassFIRFilterOp operations
 //===----------------------------------------------------------------------===//
 
 struct LowPassFIRFilterOpLowering : public ConversionPattern {
   LowPassFIRFilterOpLowering(MLIRContext *ctx)
-      : ConversionPattern(dsp::LowPassFIRFilterOp::getOperationName(), 1, ctx) {}
+      : ConversionPattern(dsp::LowPassFIRFilterOp::getOperationName(), 1, ctx) {
+  }
 
   LogicalResult
   matchAndRewrite(Operation *op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const final {
     auto loc = op->getLoc();
-    
-    //Pseudo-code:
-      //  y_lpf[n] = wc/pi * sinc(wc * (n- (N-1)/2)) , n!= (N-1)/2 : 
-      //           = wc/pi , n = (N-1)/2
-
-      // 2 loops : first from 0 <= n <= (N-1)/2 - 1
-      //      2nd from (N-1)/2 +1 <= n < N
-
-    //output for result type
-    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));  
-     
-    //allocation & deallocation for the result of this operation
+
+    // Pseudo-code:
+    //   y_lpf[n] = wc/pi * sinc(wc * (n- (N-1)/2)) , n!= (N-1)/2 :
+    //            = wc/pi , n = (N-1)/2
+
+    // 2 loops : first from 0 <= n <= (N-1)/2 - 1
+    //      2nd from (N-1)/2 +1 <= n < N
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
     auto memRefType = convertTensorToMemRef(tensorType);
     auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
-    
-    //construct affine loops for the input
-    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/0);
-    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);    
-    
-
-    //first from 0 <= i <= (N-1)/2 - 1
-    int64_t lb = 0 ;
-    int64_t N = tensorType.getShape()[0];   
-    int64_t ub = (N-1) / 2 ;
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
+
+    // first from 0 <= i <= (N-1)/2 - 1
+    int64_t lb = 0;
+    int64_t N = tensorType.getShape()[0];
+    int64_t ub = (N - 1) / 2;
     int64_t step = 1;
 
     DEBUG_PRINT_NO_ARGS();
     LowPassFIRFilterOpAdaptor lowPassfirFilterOpAdaptor(operands);
-    //Handle middle y[mid] = wc / pi
-    int64_t midIndx = ub ;
-    Value constantIndxMid = rewriter.create<arith::ConstantIndexOp>(loc, midIndx);
-    // rewriter.create<AffineStoreOp>(loc, constant0, alloc, ValueRange{constantIndx0});
-    Value wc = rewriter.create<AffineLoadOp>(loc, lowPassfirFilterOpAdaptor.getWc(), ValueRange{});
-
-    Value constpi = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(3.14159265359));
+    // Handle middle y[mid] = wc / pi
+    int64_t midIndx = ub;
+    Value constantIndxMid =
+        rewriter.create<arith::ConstantIndexOp>(loc, midIndx);
+    // rewriter.create<AffineStoreOp>(loc, constant0, alloc,
+    // ValueRange{constantIndx0});
+    Value wc = rewriter.create<AffineLoadOp>(
+        loc, lowPassfirFilterOpAdaptor.getWc(), ValueRange{});
+
+    Value constpi = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(3.14159265359));
     Value wcByPi = rewriter.create<arith::DivFOp>(loc, wc, constpi);
 
-    rewriter.create<AffineStoreOp>(loc, wcByPi, alloc, ValueRange{constantIndxMid});
+    rewriter.create<AffineStoreOp>(loc, wcByPi, alloc,
+                                   ValueRange{constantIndxMid});
 
-    //first from 0 <= i <= (N-1)/2 - 1
+    // first from 0 <= i <= (N-1)/2 - 1
 
-    //calculate i-(N-1)/2 
-    Value Nminus1By2 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr((float) ub));
-    affine::AffineForOp forOpY = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    // calculate i-(N-1)/2
+    Value Nminus1By2 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr((float)ub));
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
     auto ivY = forOpY.getInductionVar();
     rewriter.setInsertionPointToStart(forOpY.getBody());
-    //convert index to f64
-    Value IndxY = rewriter.create<arith::IndexCastUIOp>(loc, rewriter.getIntegerType(32), ivY);
-    Value i = rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxY);
+    // convert index to f64
+    Value IndxY = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivY);
+    Value i =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxY);
 
-    //get sin(wc * (i - (N-1)/ 2))
+    // get sin(wc * (i - (N-1)/ 2))
     Value iMinusMid = rewriter.create<arith::SubFOp>(loc, i, Nminus1By2);
-    Value mulwc_iMinusMid = rewriter.create<arith::MulFOp>(loc, wc , iMinusMid);  
+    Value mulwc_iMinusMid = rewriter.create<arith::MulFOp>(loc, wc, iMinusMid);
 
     Value GetSin = rewriter.create<math::SinOp>(loc, mulwc_iMinusMid);
-        
+
     // get sin(wc*i) / pi * i
 
-    Value piMuliMinusMid = rewriter.create<arith::MulFOp>(loc, constpi , iMinusMid);   
-    Value GetDiv = rewriter.create<arith::DivFOp>(loc, GetSin ,piMuliMinusMid) ;
-    rewriter.create<AffineStoreOp>(loc, GetDiv, alloc, ValueRange{ivY}); 
+    Value piMuliMinusMid =
+        rewriter.create<arith::MulFOp>(loc, constpi, iMinusMid);
+    Value GetDiv = rewriter.create<arith::DivFOp>(loc, GetSin, piMuliMinusMid);
+    rewriter.create<AffineStoreOp>(loc, GetDiv, alloc, ValueRange{ivY});
     // llvm::errs() << "LINE " << __LINE__ << " file= " << __FILE__ << "\n" ;
     rewriter.setInsertionPointAfter(forOpY);
 
-    //2nd loop from (N-1)/2 + 1 <= i < N
-    lb = ub + 1 ; 
-    ub = N ;
+    // 2nd loop from (N-1)/2 + 1 <= i < N
+    lb = ub + 1;
+    ub = N;
 
-    affine::AffineForOp forOp1 = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
     auto iv1 = forOp1.getInductionVar();
     rewriter.setInsertionPointToStart(forOp1.getBody());
-    //convert index to f64
-    Value Indx1 = rewriter.create<arith::IndexCastUIOp>(loc, rewriter.getIntegerType(32), iv1);
-    Value i1 = rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), Indx1);
+    // convert index to f64
+    Value Indx1 = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), iv1);
+    Value i1 =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), Indx1);
 
-    //get sin(wc * (i1 - (N-1)/ 2))
+    // get sin(wc * (i1 - (N-1)/ 2))
     Value iMinusMid1 = rewriter.create<arith::SubFOp>(loc, i1, Nminus1By2);
-    Value mulwc_iMinusMid1 = rewriter.create<arith::MulFOp>(loc, wc , iMinusMid1);  
+    Value mulwc_iMinusMid1 =
+        rewriter.create<arith::MulFOp>(loc, wc, iMinusMid1);
     Value GetSin1 = rewriter.create<math::SinOp>(loc, mulwc_iMinusMid1);
 
-    //get sin(i1 - (N-1)/ 2) / (i1 - (N-1)/ 2) * pi 
-    // get sin(wc*i1) / pi * i1
+    // get sin(i1 - (N-1)/ 2) / (i1 - (N-1)/ 2) * pi
+    //  get sin(wc*i1) / pi * i1
 
-    Value piMuliMinusMid1 = rewriter.create<arith::MulFOp>(loc, constpi , iMinusMid1);   
-    Value GetDiv1 = rewriter.create<arith::DivFOp>(loc, GetSin1 ,piMuliMinusMid1) ;
-    rewriter.create<AffineStoreOp>(loc, GetDiv1, alloc, ValueRange{iv1}); 
+    Value piMuliMinusMid1 =
+        rewriter.create<arith::MulFOp>(loc, constpi, iMinusMid1);
+    Value GetDiv1 =
+        rewriter.create<arith::DivFOp>(loc, GetSin1, piMuliMinusMid1);
+    rewriter.create<AffineStoreOp>(loc, GetDiv1, alloc, ValueRange{iv1});
     // llvm::errs() << "LINE " << __LINE__ << " file= " << __FILE__ << "\n" ;
     rewriter.setInsertionPointAfter(forOp1);
 
-    //debug
-    // forOpX->dump();
-    // forOpY->dump();
-
+    // debug
+    //  forOpX->dump();
+    //  forOpY->dump();
+
+    // %cst = arith.constant 6.2831853071800001 : f64
+    // %cst_0 = arith.constant 4.600000e-01 : f64
+    // %cst_1 = arith.constant 5.400000e-01 : f64
+    // %cst_2 = arith.constant 4.000000e+00 : f64
+    // %alloc = memref.alloc() : memref<4xf64>
+    // %alloc_3 = memref.alloc() : memref<f64>
+    // affine.store %cst_2, %alloc_3[] : memref<f64>
+    // affine.for %arg0 = 0 to 4 {
+    //   %0 = arith.index_castui %arg0 : index to i32
+    //   %1 = arith.uitofp %0 : i32 to f64
+    //   %2 = arith.mulf %1, %cst : f64
+    //   %3 = arith.divf %2, %cst_2 : f64
+    //   %4 = math.cos %3 : f64
+    //   %5 = arith.mulf %4, %cst_0 : f64
+    //   %6 = arith.subf %cst_1, %5 : f64
+    //   affine.store %6, %alloc[%arg0] : memref<4xf64>
+    // }
 
-        // %cst = arith.constant 6.2831853071800001 : f64
-        // %cst_0 = arith.constant 4.600000e-01 : f64
-        // %cst_1 = arith.constant 5.400000e-01 : f64
-        // %cst_2 = arith.constant 4.000000e+00 : f64
-        // %alloc = memref.alloc() : memref<4xf64>
-        // %alloc_3 = memref.alloc() : memref<f64>
-        // affine.store %cst_2, %alloc_3[] : memref<f64>
-        // affine.for %arg0 = 0 to 4 {
-        //   %0 = arith.index_castui %arg0 : index to i32
-        //   %1 = arith.uitofp %0 : i32 to f64
-        //   %2 = arith.mulf %1, %cst : f64
-        //   %3 = arith.divf %2, %cst_2 : f64
-        //   %4 = math.cos %3 : f64
-        //   %5 = arith.mulf %4, %cst_0 : f64
-        //   %6 = arith.subf %cst_1, %5 : f64
-        //   affine.store %6, %alloc[%arg0] : memref<4xf64>
-        // }
-
-
-        // }
-        // }
+    // }
+    // }
     rewriter.replaceOp(op, alloc);
-      
+
     return success();
   }
 };
 
-
 //===----------------------------------------------------------------------===//
 // ToyToAffine RewritePatterns: SetElemAtIndx operations
 //===----------------------------------------------------------------------===//
@@ -3530,71 +4129,82 @@ struct SetElemAtIndxOpLowering : public ConversionPattern {
   matchAndRewrite(Operation *op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const final {
     auto loc = op->getLoc();
-    
-    //Pseudo-code:
-      //  output = input[index] 
 
-      // replace this upsampling op with the output_mem_allocation op
+    // Pseudo-code:
+    //   output = input[index]
+
+    // replace this upsampling op with the output_mem_allocation op
 
     DEBUG_PRINT_NO_ARGS();
 
-    //output for result type
+    // output for result type
     SetElemAtIndxOpAdaptor setElemAtIndxAdaptor(operands);
-    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));  
-    // auto tensorType = llvm::cast<RankedTensorType>(setElemAtIndxAdaptor.getInput());
-    //iterate to result1 --not needed for now but for future reference  
-   
-    //allocation & deallocation for the result of this operation
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    // auto tensorType =
+    // llvm::cast<RankedTensorType>(setElemAtIndxAdaptor.getInput());
+    // iterate to result1 --not needed for now but for future reference
+
+    // allocation & deallocation for the result of this operation
     auto memRefType = convertTensorToMemRef(tensorType);
     auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
 
-    //For loop -- iterate from 1 to last
-    // int64_t lb = 0 ;
-    // int64_t ub = tensorType.getShape()[0];   
-    // int64_t step = 1;
-    // affine::AffineForOp forOpY = rewriter.create<AffineForOp>(loc, lb, ub, step);
-    // auto ivY = forOpY.getInductionVar();
-    // rewriter.setInsertionPointToStart(forOpY.getBody());
+    // For loop -- iterate from 1 to last
+    //  int64_t lb = 0 ;
+    //  int64_t ub = tensorType.getShape()[0];
+    //  int64_t step = 1;
+    //  affine::AffineForOp forOpY = rewriter.create<AffineForOp>(loc, lb, ub,
+    //  step); auto ivY = forOpY.getInductionVar();
+    //  rewriter.setInsertionPointToStart(forOpY.getBody());
 
-    // Value inputX = rewriter.create<AffineLoadOp>(loc, setElemAtIndxAdaptor.getInput(), ValueRange{ivY});
+    // Value inputX = rewriter.create<AffineLoadOp>(loc,
+    // setElemAtIndxAdaptor.getInput(), ValueRange{ivY});
     // rewriter.create<AffineStoreOp>(loc, inputX, alloc, ValueRange{ivY});
 
     // rewriter.setInsertionPointAfter(forOpY);
     DEBUG_PRINT_WITH_ARGS("\nCheck for index --here");
-    //load from X, using 2nd operand as index
+    // load from X, using 2nd operand as index
 
-    // Value GetValueAtIndx2ndArg = setElemAtIndxAdaptor.getIndx(); // getOperand(1);
+    // Value GetValueAtIndx2ndArg = setElemAtIndxAdaptor.getIndx(); //
+    // getOperand(1);
     DEBUG_PRINT_NO_ARGS();
     Value GetValueAtIndx2ndArg = op->getOperand(1);
-    dsp::ConstantOp constantOp2ndArg = GetValueAtIndx2ndArg.getDefiningOp<dsp::ConstantOp>();
-    DenseElementsAttr constantRhsValue = constantOp2ndArg.getValue();;
+    dsp::ConstantOp constantOp2ndArg =
+        GetValueAtIndx2ndArg.getDefiningOp<dsp::ConstantOp>();
+    DenseElementsAttr constantRhsValue = constantOp2ndArg.getValue();
+    ;
     auto elements = constantRhsValue.getValues<FloatAttr>();
     float SecondValue = elements[0].getValueAsDouble();
-    int SecondValueInt = (int64_t) SecondValue;
-    DEBUG_PRINT_WITH_ARGS("Indx is" , SecondValueInt);
+    int SecondValueInt = (int64_t)SecondValue;
+    DEBUG_PRINT_WITH_ARGS("Indx is", SecondValueInt);
 
-    Value constantIndx2Indx = rewriter.create<arith::ConstantIndexOp>(loc, SecondValueInt);
+    Value constantIndx2Indx =
+        rewriter.create<arith::ConstantIndexOp>(loc, SecondValueInt);
     Value constantIndx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
-    // Value constant0 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
+    // Value constant0 = rewriter.create<arith::ConstantOp>(loc,
+    // rewriter.getF64Type(),
     //                                                      rewriter.getF64FloatAttr(15));
 
-    // Value ValToStore = setElemAtIndxAdaptor.getVal(); 
+    // Value ValToStore = setElemAtIndxAdaptor.getVal();
     // Value ValToStore = op->getOperand(2);
-    Value ValToStore = rewriter.create<AffineLoadOp>(loc, setElemAtIndxAdaptor.getVal(), ValueRange{constantIndx0});
-    // Value ValToStore = rewriter.create<AffineLoadOp>(loc, setElemAtIndxAdaptor.getVal(), ValueRange{});
-
-    // rewriter.create<AffineStoreOp>(loc, constant0, alloc, ValueRange{constantIndx2Indx});
-    rewriter.create<AffineStoreOp>(loc, ValToStore, setElemAtIndxAdaptor.getInput(), ValueRange{constantIndx2Indx});
+    Value ValToStore = rewriter.create<AffineLoadOp>(
+        loc, setElemAtIndxAdaptor.getVal(), ValueRange{constantIndx0});
+    // Value ValToStore = rewriter.create<AffineLoadOp>(loc,
+    // setElemAtIndxAdaptor.getVal(), ValueRange{});
+
+    // rewriter.create<AffineStoreOp>(loc, constant0, alloc,
+    // ValueRange{constantIndx2Indx});
+    rewriter.create<AffineStoreOp>(loc, ValToStore,
+                                   setElemAtIndxAdaptor.getInput(),
+                                   ValueRange{constantIndx2Indx});
+
+    // debug
+    //  forOpY->dump();
+    //  affine.store %cst, %alloc_10[] : memref<f64>
+    //  %0 = affine.load %alloc_11[4] : memref<10xf64>
+    //  affine.store %0, %alloc[0] : memref<1xf64>
 
-
-    //debug
-    // forOpY->dump();
-      // affine.store %cst, %alloc_10[] : memref<f64>
-      // %0 = affine.load %alloc_11[4] : memref<10xf64>
-      // affine.store %0, %alloc[0] : memref<1xf64>
-    
     rewriter.replaceOp(op, alloc);
-    
+
     return success();
   }
 };
@@ -3603,7 +4213,6 @@ struct SetElemAtIndxOpLowering : public ConversionPattern {
 // ToyToAffine RewritePatterns: GetElemAtIndx operations
 //===----------------------------------------------------------------------===//
 
-
 struct GetElemAtIndxOpLowering : public ConversionPattern {
   GetElemAtIndxOpLowering(MLIRContext *ctx)
       : ConversionPattern(dsp::GetElemAtIndxOp::getOperationName(), 1, ctx) {}
@@ -3612,62 +4221,65 @@ struct GetElemAtIndxOpLowering : public ConversionPattern {
   matchAndRewrite(Operation *op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const final {
     auto loc = op->getLoc();
-    
-    //Pseudo-code:
-      //  output = input[index] 
 
-      // replace this upsampling op with the output_mem_allocation op
+    // Pseudo-code:
+    //   output = input[index]
+
+    // replace this upsampling op with the output_mem_allocation op
 
     DEBUG_PRINT_NO_ARGS();
 
-    //output for result type
-    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));  
-    //iterate to result1 --not needed for now but for future reference  
-   
-    //allocation & deallocation for the result of this operation
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    // iterate to result1 --not needed for now but for future reference
+
+    // allocation & deallocation for the result of this operation
     auto memRefType = convertTensorToMemRef(tensorType);
     // auto memRefType2 = convertTensorToMemRef(tensorType1);
     auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
 
-    // Value constant0 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
+    // Value constant0 = rewriter.create<arith::ConstantOp>(loc,
+    // rewriter.getF64Type(),
     //                                                      rewriter.getF64FloatAttr(0));
 
-
-
     DEBUG_PRINT_WITH_ARGS("\nCheck for index --here");
-    //load from X, using 2nd operand as index
+    // load from X, using 2nd operand as index
     GetElemAtIndxOpAdaptor getElemAtIndxAdaptor(operands);
-    // Value GetValueAtIndx2ndArg = getElemAtIndxAdaptor.getIndx(); // getOperand(1);
+    // Value GetValueAtIndx2ndArg = getElemAtIndxAdaptor.getIndx(); //
+    // getOperand(1);
     DEBUG_PRINT_NO_ARGS();
     Value GetValueAtIndx2ndArg = op->getOperand(1);
-    dsp::ConstantOp constantOp2ndArg = GetValueAtIndx2ndArg.getDefiningOp<dsp::ConstantOp>();
-    DenseElementsAttr constantRhsValue = constantOp2ndArg.getValue();;
+    dsp::ConstantOp constantOp2ndArg =
+        GetValueAtIndx2ndArg.getDefiningOp<dsp::ConstantOp>();
+    DenseElementsAttr constantRhsValue = constantOp2ndArg.getValue();
+    ;
     auto elements = constantRhsValue.getValues<FloatAttr>();
     float SecondValue = elements[0].getValueAsDouble();
-    int SecondValueInt = (int64_t) SecondValue;
-    DEBUG_PRINT_WITH_ARGS("Indx is" , SecondValueInt);
+    int SecondValueInt = (int64_t)SecondValue;
+    DEBUG_PRINT_WITH_ARGS("Indx is", SecondValueInt);
 
-    Value constantIndx2Indx = rewriter.create<arith::ConstantIndexOp>(loc, SecondValueInt);
+    Value constantIndx2Indx =
+        rewriter.create<arith::ConstantIndexOp>(loc, SecondValueInt);
     Value constantIndx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
-    
-    Value inputX = rewriter.create<AffineLoadOp>(loc, getElemAtIndxAdaptor.getInput(), ValueRange{constantIndx2Indx});
-    rewriter.create<AffineStoreOp>(loc, inputX, alloc, ValueRange{constantIndx0});
 
+    Value inputX = rewriter.create<AffineLoadOp>(
+        loc, getElemAtIndxAdaptor.getInput(), ValueRange{constantIndx2Indx});
+    rewriter.create<AffineStoreOp>(loc, inputX, alloc,
+                                   ValueRange{constantIndx0});
+
+    // debug
+    //  forOpX->dump();
+    //  forOpY->dump();
+    //  affine.store %cst, %alloc_10[] : memref<f64>
+    //  %0 = affine.load %alloc_11[4] : memref<10xf64>
+    //  affine.store %0, %alloc[0] : memref<1xf64>
 
-    //debug
-    // forOpX->dump();
-    // forOpY->dump();
-      // affine.store %cst, %alloc_10[] : memref<f64>
-      // %0 = affine.load %alloc_11[4] : memref<10xf64>
-      // affine.store %0, %alloc[0] : memref<1xf64>
-    
     rewriter.replaceOp(op, alloc);
-    
+
     return success();
   }
 };
 
-
 //===----------------------------------------------------------------------===//
 // ToyToAffine RewritePatterns: SincOp operations
 //===----------------------------------------------------------------------===//
@@ -3680,100 +4292,100 @@ struct SincOpLowering : public ConversionPattern {
   matchAndRewrite(Operation *op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const final {
     auto loc = op->getLoc();
-    
-    //Pseudo-code:
-      //  y = sinc(wc * n) = [1, sin(wc)/pi , sin(2* wc)/2*pi , ... sin(n * wc)/n*pi] , 0<=n<=N 
-    
-
-    //output for result type
-    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));  
-     
-    //allocation & deallocation for the result of this operation
+
+    // Pseudo-code:
+    //   y = sinc(wc * n) = [1, sin(wc)/pi , sin(2* wc)/2*pi , ... sin(n *
+    //   wc)/n*pi] , 0<=n<=N
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
     auto memRefType = convertTensorToMemRef(tensorType);
     auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
-    
-    //construct affine loops for the input
-    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/0);
-    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);    
-    
-
-    //For loop -- iterate from 1 to last
-    int64_t lb = 1 ;
-    int64_t ub = tensorType.getShape()[0];   
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
+
+    // For loop -- iterate from 1 to last
+    int64_t lb = 1;
+    int64_t ub = tensorType.getShape()[0];
     int64_t step = 1;
 
     DEBUG_PRINT_NO_ARGS();
-    //get constants -- 0.54 & 0.46
+    // get constants -- 0.54 & 0.46
     Value constantIndx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
-    // rewriter.create<AffineStoreOp>(loc, constant0, alloc, ValueRange{constantIndx0});
-    
-    Value constant1 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(1));
-    Value constpi = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(3.14159265359));
-    rewriter.create<AffineStoreOp>(loc, constant1, alloc, ValueRange{constantIndx0});
-
-    //For loop
+    // rewriter.create<AffineStoreOp>(loc, constant0, alloc,
+    // ValueRange{constantIndx0});
+
+    Value constant1 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1));
+    Value constpi = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(3.14159265359));
+    rewriter.create<AffineStoreOp>(loc, constant1, alloc,
+                                   ValueRange{constantIndx0});
+
+    // For loop
     SincOpAdaptor sincOpAdaptor(operands);
-    //loop for Y
-    affine::AffineForOp forOpY = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    // loop for Y
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
     auto ivY = forOpY.getInductionVar();
     rewriter.setInsertionPointToStart(forOpY.getBody());
-    //convert index to f64
-    Value IndxY = rewriter.create<arith::IndexCastUIOp>(loc, rewriter.getIntegerType(32), ivY);
-    Value i = rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxY);
+    // convert index to f64
+    Value IndxY = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivY);
+    Value i =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxY);
 
+    // get wc * i
+    Value wc =
+        rewriter.create<AffineLoadOp>(loc, sincOpAdaptor.getWc(), ValueRange{});
 
-    //get wc * i 
-    Value wc = rewriter.create<AffineLoadOp>(loc, sincOpAdaptor.getWc(), ValueRange{});
-    
-    Value mulwc_i = rewriter.create<arith::MulFOp>(loc, wc , i);  
+    Value mulwc_i = rewriter.create<arith::MulFOp>(loc, wc, i);
 
     // get sin(wc*i) / pi * i
     Value GetSin = rewriter.create<math::SinOp>(loc, mulwc_i);
-    Value piMuli = rewriter.create<arith::MulFOp>(loc, constpi , i);   
-    Value GetDiv = rewriter.create<arith::DivFOp>(loc, GetSin ,piMuli) ;
-    rewriter.create<AffineStoreOp>(loc, GetDiv, alloc, ValueRange{ivY}); 
+    Value piMuli = rewriter.create<arith::MulFOp>(loc, constpi, i);
+    Value GetDiv = rewriter.create<arith::DivFOp>(loc, GetSin, piMuli);
+    rewriter.create<AffineStoreOp>(loc, GetDiv, alloc, ValueRange{ivY});
     // llvm::errs() << "LINE " << __LINE__ << " file= " << __FILE__ << "\n" ;
     rewriter.setInsertionPointAfter(forOpY);
-    //debug
-    // forOpX->dump();
-    // forOpY->dump();
-
+    // debug
+    //  forOpX->dump();
+    //  forOpY->dump();
+
+    // %cst = arith.constant 6.2831853071800001 : f64
+    // %cst_0 = arith.constant 4.600000e-01 : f64
+    // %cst_1 = arith.constant 5.400000e-01 : f64
+    // %cst_2 = arith.constant 4.000000e+00 : f64
+    // %alloc = memref.alloc() : memref<4xf64>
+    // %alloc_3 = memref.alloc() : memref<f64>
+    // affine.store %cst_2, %alloc_3[] : memref<f64>
+    // affine.for %arg0 = 0 to 4 {
+    //   %0 = arith.index_castui %arg0 : index to i32
+    //   %1 = arith.uitofp %0 : i32 to f64
+    //   %2 = arith.mulf %1, %cst : f64
+    //   %3 = arith.divf %2, %cst_2 : f64
+    //   %4 = math.cos %3 : f64
+    //   %5 = arith.mulf %4, %cst_0 : f64
+    //   %6 = arith.subf %cst_1, %5 : f64
+    //   affine.store %6, %alloc[%arg0] : memref<4xf64>
+    // }
 
-        // %cst = arith.constant 6.2831853071800001 : f64
-        // %cst_0 = arith.constant 4.600000e-01 : f64
-        // %cst_1 = arith.constant 5.400000e-01 : f64
-        // %cst_2 = arith.constant 4.000000e+00 : f64
-        // %alloc = memref.alloc() : memref<4xf64>
-        // %alloc_3 = memref.alloc() : memref<f64>
-        // affine.store %cst_2, %alloc_3[] : memref<f64>
-        // affine.for %arg0 = 0 to 4 {
-        //   %0 = arith.index_castui %arg0 : index to i32
-        //   %1 = arith.uitofp %0 : i32 to f64
-        //   %2 = arith.mulf %1, %cst : f64
-        //   %3 = arith.divf %2, %cst_2 : f64
-        //   %4 = math.cos %3 : f64
-        //   %5 = arith.mulf %4, %cst_0 : f64
-        //   %6 = arith.subf %cst_1, %5 : f64
-        //   affine.store %6, %alloc[%arg0] : memref<4xf64>
-        // }
-
-
-        // }
-        // }
+    // }
+    // }
     rewriter.replaceOp(op, alloc);
-      
+
     return success();
   }
 };
 
-
 //===----------------------------------------------------------------------===//
 // ToyToAffine RewritePatterns: FFT1DImg operations
 //===----------------------------------------------------------------------===//
 
-
 struct FFT1DImgOpLowering : public ConversionPattern {
   FFT1DImgOpLowering(MLIRContext *ctx)
       : ConversionPattern(dsp::FFT1DImgOp::getOperationName(), 1, ctx) {}
@@ -3782,171 +4394,181 @@ struct FFT1DImgOpLowering : public ConversionPattern {
   matchAndRewrite(Operation *op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const final {
     auto loc = op->getLoc();
-    
-    //Pseudo-code:
-      //  y[k] = y_real[k] + j *y_img[k] 
-      // y_img = sumOver_n(x[n]*sin[2*pi * k *n/N ] * -1
-      //init  output mem for y_real & y_img as 0 
-      //iterate for output from k=0 to last 
-        //iterate for all x from n=0 to last
-          //perform the calculations : ie x[n] * cos[2*pi * k *n/N ] and sum and store them at y[k]
-          // 
-      // replace this upsampling op with the output_mem_allocation op
+
+    // Pseudo-code:
+    //   y[k] = y_real[k] + j *y_img[k]
+    //  y_img = sumOver_n(x[n]*sin[2*pi * k *n/N ] * -1
+    // init  output mem for y_real & y_img as 0
+    // iterate for output from k=0 to last
+    // iterate for all x from n=0 to last
+    // perform the calculations : ie x[n] * cos[2*pi * k *n/N ] and sum and
+    // store them at y[k]
+    //
+    // replace this upsampling op with the output_mem_allocation op
 
     // DEBUG_PRINT_NO_ARGS() ;
 
-    //output for result type
-    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));  
-    //iterate to result1 --not needed for now but for future reference  
-    // auto tensorType1 =  llvm::cast<RankedTensorType>(*std::next(op->result_type_begin(), 1));
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    // iterate to result1 --not needed for now but for future reference
+    //  auto tensorType1 =
+    //  llvm::cast<RankedTensorType>(*std::next(op->result_type_begin(), 1));
+
+    // DEBUG_PRINT_NO_ARGS() ;
+    // tensorType.getShape()[0]
+    // llvm::errs() << "tensorType1.getShape()[0] " << tensorType1.getShape()[0]
+    // << " func= " << __func__ << "\n";
 
-    // DEBUG_PRINT_NO_ARGS() ; 
-    //tensorType.getShape()[0]
-    // llvm::errs() << "tensorType1.getShape()[0] " << tensorType1.getShape()[0] << " func= " << __func__ << "\n"; 
-    
-    //allocation & deallocation for the result of this operation
+    // allocation & deallocation for the result of this operation
     auto memRefType = convertTensorToMemRef(tensorType);
     // auto memRefType2 = convertTensorToMemRef(tensorType1);
     auto alloc_img = insertAllocAndDealloc(memRefType, loc, rewriter);
 
-    //construct affine loops for the input
-    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/0);
-    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);    
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
 
     // affine.for %y = 0 to 4 {
-        //     affine.store %cst_3, %alloc_real[%y] : memref<4xf64>
-        //     affine.store %cst_3, %alloc_img[%y] : memref<4xf64>
-        // }
-    Value constant0 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(0));
-
+    //     affine.store %cst_3, %alloc_real[%y] : memref<4xf64>
+    //     affine.store %cst_3, %alloc_img[%y] : memref<4xf64>
+    // }
+    Value constant0 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
 
-    //For loop -- iterate from 1 to last
-    int64_t lb = 0 ;
+    // For loop -- iterate from 1 to last
+    int64_t lb = 0;
     int64_t ub = tensorType.getShape()[0];
     int64_t step = 1;
 
-    affine::AffineForOp forOp1 = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
     auto iv = forOp1.getInductionVar();
     rewriter.setInsertionPointToStart(forOp1.getBody());
     rewriter.create<AffineStoreOp>(loc, constant0, alloc_img, ValueRange{iv});
     rewriter.setInsertionPointAfter(forOp1);
 
-    //loop for Y
-    affine::AffineForOp forOpY = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    // loop for Y
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
     auto ivY = forOpY.getInductionVar();
     rewriter.setInsertionPointToStart(forOpY.getBody());
 
-    
-    //loop for X
-    affine::AffineForOp forOpX = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    // loop for X
+    affine::AffineForOp forOpX =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
     auto ivX = forOpX.getInductionVar();
     rewriter.setInsertionPointToStart(forOpX.getBody());
 
-    //load from X, & y1 & y2
+    // load from X, & y1 & y2
     FFT1DImgOpAdaptor fft1DImgAdaptor(operands);
-    Value inputX = rewriter.create<AffineLoadOp>(loc, fft1DImgAdaptor.getInput(), ValueRange{ivX});
-    Value loadYImg = rewriter.create<AffineLoadOp>(loc, alloc_img, ValueRange{ivY});
+    Value inputX = rewriter.create<AffineLoadOp>(
+        loc, fft1DImgAdaptor.getInput(), ValueRange{ivX});
+    Value loadYImg =
+        rewriter.create<AffineLoadOp>(loc, alloc_img, ValueRange{ivY});
 
-    //convert index to f64
-    Value IndxY = rewriter.create<arith::IndexCastUIOp>(loc, rewriter.getIntegerType(32), ivY);
-    Value k = rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxY);
+    // convert index to f64
+    Value IndxY = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivY);
+    Value k =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxY);
 
-    Value IndxX = rewriter.create<arith::IndexCastUIOp>(loc, rewriter.getIntegerType(32), ivX);
-    Value i = rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxX);
+    Value IndxX = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivX);
+    Value i =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxX);
 
-    //get 2*pi * k * i / N
-    Value muli_k =  rewriter.create<arith::MulFOp>(loc, k , i);
-    
-    Value const2pi = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(6.28318530718));
-    Value mul2piKI = rewriter.create<arith::MulFOp>(loc, const2pi , muli_k);  
+    // get 2*pi * k * i / N
+    Value muli_k = rewriter.create<arith::MulFOp>(loc, k, i);
+
+    Value const2pi = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(6.28318530718));
+    Value mul2piKI = rewriter.create<arith::MulFOp>(loc, const2pi, muli_k);
 
     // getOperand().getType()
-    // auto inputTensorType = llvm::cast<RankedTensorType>(op->getOperand(0).getType());
-    float LengthOfInput = (float) ub;
-    Value N = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(LengthOfInput));
+    // auto inputTensorType =
+    // llvm::cast<RankedTensorType>(op->getOperand(0).getType());
+    float LengthOfInput = (float)ub;
+    Value N = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(LengthOfInput));
     // Value N = inputTensorType.getShape()[0];
 
-    Value divIndxByN = rewriter.create<arith::DivFOp>(loc, mul2piKI, N )  ;     
-    
+    Value divIndxByN = rewriter.create<arith::DivFOp>(loc, mul2piKI, N);
+
     // Img part = -1 * Sum(x[i] * sin(div) )
     Value GetSin = rewriter.create<math::SinOp>(loc, divIndxByN);
-    Value xMulSin = rewriter.create<arith::MulFOp>(loc, inputX , GetSin);   
-    Value imgSum = rewriter.create<arith::SubFOp>(loc, loadYImg ,xMulSin) ;
+    Value xMulSin = rewriter.create<arith::MulFOp>(loc, inputX, GetSin);
+    Value imgSum = rewriter.create<arith::SubFOp>(loc, loadYImg, xMulSin);
 
-    // Value constMinus1 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
+    // Value constMinus1 = rewriter.create<arith::ConstantOp>(loc,
+    // rewriter.getF64Type(),
     //                                                      rewriter.getF64FloatAttr(-1));
-    // Value NegImgSum = rewriter.create<arith::MulFOp>(loc, constMinus1 , imgSum);
-    rewriter.create<AffineStoreOp>(loc, imgSum, alloc_img, ValueRange{ivY}); 
+    // Value NegImgSum = rewriter.create<arith::MulFOp>(loc, constMinus1 ,
+    // imgSum);
+    rewriter.create<AffineStoreOp>(loc, imgSum, alloc_img, ValueRange{ivY});
     // x[n-1]
     rewriter.setInsertionPointAfter(forOpX);
     // Calculate y[k] = 1/N * y[k]
-    
+
     rewriter.setInsertionPointAfter(forOpY);
-    //debug
-    // forOpX->dump();
-    // forOpY->dump();
-        // affine.for %y = 0 to 4 {
-        //     affine.store %cst_3, %alloc_real[%y] : memref<4xf64>
-        //     affine.store %cst_3, %alloc_img[%y] : memref<4xf64>
-        // }
-
-
-        // affine.for %y = 0 to 4 {
-        // //   %0 = affine.load %alloc_3[%arg0] : memref<4xf64>
-        // //   affine.store %0, %alloc_real[%arg0] : memref<4xf64>
-        // affine.for %x = 0 to 4 {
-        //     // CAcluations
-        //           %1 = affine.load %alloc_3[%x] : memref<4xf64>
-        //           %2 = affine.load %alloc_real[%y] : memref<4xf64>
-        //           %3 = affine.load %alloc_img[%y] : memref<4xf64>
-        //           // index cast for multiply 
-        //           %4 = arith.index_castui %y : index to i32
-        //           %k = arith.uitofp %4 : i32 to f64
-        //           %6 = arith.index_castui %x : index to i32
-        //           %i = arith.uitofp %6 : i32 to f64
-        //         //   %8 = arith.index_castui %arg3 : index to i32
-        //         //   %9 = arith.uitofp %8 : i32 to f64
-        //         //   %10 = arith.index_castui %arg4 : index to i32
-        //         //   %11 = arith.uitofp %10 : i32 to f64
-                
-        //           %mul_1 = arith.mulf %i, %k : f64
-        //           %mul = arith.mulf %mul_1, %cst_2pi : f64
-        //         //  ixk / N
-        //           %div = arith.divf %mul, %N : f64
-        //         //   cos of the above
-        //           %res_cos = math.cos %div : f64
-        //         //   %16 = arith.addf %14, %15 : f64
-        //         //   %res_sin = arith.mulf %16, %cst_0 : f64
-                 
-        //           %res_sin = math.sin %div : f64
-        //           %real_prod = arith.mulf %1, %res_cos : f64
-        //           %img_prod_1 = arith.mulf %1, %res_sin : f64
-        //           %img_prod = arith.mulf %cst_5, %img_prod_1 : f64
-
-        //           %real = arith.addf %2, %real_prod : f64
-        //           %img = arith.addf %3, %img_prod : f64
-        //           affine.store %real, %alloc_real[%y] : memref<4xf64>
-        //         //    dsp.print %alloc_real : memref<4xf64>
-        //           affine.store %img, %alloc_img[%y] : memref<4xf64>
-
-        // }
-        // }
+    // debug
+    //  forOpX->dump();
+    //  forOpY->dump();
+    //  affine.for %y = 0 to 4 {
+    //      affine.store %cst_3, %alloc_real[%y] : memref<4xf64>
+    //      affine.store %cst_3, %alloc_img[%y] : memref<4xf64>
+    //  }
+
+    // affine.for %y = 0 to 4 {
+    // //   %0 = affine.load %alloc_3[%arg0] : memref<4xf64>
+    // //   affine.store %0, %alloc_real[%arg0] : memref<4xf64>
+    // affine.for %x = 0 to 4 {
+    //     // CAcluations
+    //           %1 = affine.load %alloc_3[%x] : memref<4xf64>
+    //           %2 = affine.load %alloc_real[%y] : memref<4xf64>
+    //           %3 = affine.load %alloc_img[%y] : memref<4xf64>
+    //           // index cast for multiply
+    //           %4 = arith.index_castui %y : index to i32
+    //           %k = arith.uitofp %4 : i32 to f64
+    //           %6 = arith.index_castui %x : index to i32
+    //           %i = arith.uitofp %6 : i32 to f64
+    //         //   %8 = arith.index_castui %arg3 : index to i32
+    //         //   %9 = arith.uitofp %8 : i32 to f64
+    //         //   %10 = arith.index_castui %arg4 : index to i32
+    //         //   %11 = arith.uitofp %10 : i32 to f64
+
+    //           %mul_1 = arith.mulf %i, %k : f64
+    //           %mul = arith.mulf %mul_1, %cst_2pi : f64
+    //         //  ixk / N
+    //           %div = arith.divf %mul, %N : f64
+    //         //   cos of the above
+    //           %res_cos = math.cos %div : f64
+    //         //   %16 = arith.addf %14, %15 : f64
+    //         //   %res_sin = arith.mulf %16, %cst_0 : f64
+
+    //           %res_sin = math.sin %div : f64
+    //           %real_prod = arith.mulf %1, %res_cos : f64
+    //           %img_prod_1 = arith.mulf %1, %res_sin : f64
+    //           %img_prod = arith.mulf %cst_5, %img_prod_1 : f64
+
+    //           %real = arith.addf %2, %real_prod : f64
+    //           %img = arith.addf %3, %img_prod : f64
+    //           affine.store %real, %alloc_real[%y] : memref<4xf64>
+    //         //    dsp.print %alloc_real : memref<4xf64>
+    //           affine.store %img, %alloc_img[%y] : memref<4xf64>
+
+    // }
+    // }
     // rewriter.replaceOp(op, alloc_real);
     rewriter.replaceOp(op, alloc_img);
-    
+
     return success();
   }
 };
 
-
 //===----------------------------------------------------------------------===//
 // ToyToAffine RewritePatterns: FFT1DReal operations
 //===----------------------------------------------------------------------===//
 
-
 struct FFT1DRealOpLowering : public ConversionPattern {
   FFT1DRealOpLowering(MLIRContext *ctx)
       : ConversionPattern(dsp::FFT1DRealOp::getOperationName(), 1, ctx) {}
@@ -3955,160 +4577,170 @@ struct FFT1DRealOpLowering : public ConversionPattern {
   matchAndRewrite(Operation *op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const final {
     auto loc = op->getLoc();
-    
-    //Pseudo-code:
-      //  y[k] = y_real[k] + j *y_img[k] 
-      // y_real = sumOver_n(x[n]*cos[2*pi * k *n/N ] 
-      // y_img = sumOver_n(x[n]*sin[2*pi * k *n/N ] * -1
-      //init  output mem for y_real & y_img as 0 
-      //iterate for output from k=0 to last 
-        //iterate for all x from n=0 to last
-          //perform the calculations : ie x[n] * cos[2*pi * k *n/N ] and sum and store them at y[k]
-          // 
-      // replace this upsampling op with the output_mem_allocation op
+
+    // Pseudo-code:
+    //   y[k] = y_real[k] + j *y_img[k]
+    //  y_real = sumOver_n(x[n]*cos[2*pi * k *n/N ]
+    //  y_img = sumOver_n(x[n]*sin[2*pi * k *n/N ] * -1
+    // init  output mem for y_real & y_img as 0
+    // iterate for output from k=0 to last
+    // iterate for all x from n=0 to last
+    // perform the calculations : ie x[n] * cos[2*pi * k *n/N ] and sum and
+    // store them at y[k]
+    //
+    // replace this upsampling op with the output_mem_allocation op
 
     // DEBUG_PRINT_NO_ARGS() ;
 
-    //output for result type
-    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));  
-    //iterate to result1 --not needed for now but for future reference  
-    // auto tensorType1 =  llvm::cast<RankedTensorType>(*std::next(op->result_type_begin(), 1));
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    // iterate to result1 --not needed for now but for future reference
+    //  auto tensorType1 =
+    //  llvm::cast<RankedTensorType>(*std::next(op->result_type_begin(), 1));
+
+    // DEBUG_PRINT_NO_ARGS() ;
+    // tensorType.getShape()[0]
+    // llvm::errs() << "tensorType1.getShape()[0] " << tensorType1.getShape()[0]
+    // << " func= " << __func__ << "\n";
 
-    // DEBUG_PRINT_NO_ARGS() ; 
-    //tensorType.getShape()[0]
-    // llvm::errs() << "tensorType1.getShape()[0] " << tensorType1.getShape()[0] << " func= " << __func__ << "\n"; 
-    
-    //allocation & deallocation for the result of this operation
+    // allocation & deallocation for the result of this operation
     auto memRefType = convertTensorToMemRef(tensorType);
     // auto memRefType2 = convertTensorToMemRef(tensorType1);
     auto alloc_real = insertAllocAndDealloc(memRefType, loc, rewriter);
-    
-    //construct affine loops for the input
-    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/0);
-    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);    
 
-    // affine.for %y = 0 to 4 {
-        //     affine.store %cst_3, %alloc_real[%y] : memref<4xf64>
-        //     affine.store %cst_3, %alloc_img[%y] : memref<4xf64>
-        // }
-    Value constant0 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(0));
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
 
+    // affine.for %y = 0 to 4 {
+    //     affine.store %cst_3, %alloc_real[%y] : memref<4xf64>
+    //     affine.store %cst_3, %alloc_img[%y] : memref<4xf64>
+    // }
+    Value constant0 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
 
-    //For loop -- iterate from 1 to last
-    int64_t lb = 0 ;
+    // For loop -- iterate from 1 to last
+    int64_t lb = 0;
     int64_t ub = tensorType.getShape()[0];
     int64_t step = 1;
 
-    affine::AffineForOp forOp1 = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
     auto iv = forOp1.getInductionVar();
     rewriter.setInsertionPointToStart(forOp1.getBody());
     rewriter.create<AffineStoreOp>(loc, constant0, alloc_real, ValueRange{iv});
     rewriter.setInsertionPointAfter(forOp1);
 
-    //loop for Y
-    affine::AffineForOp forOpY = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    // loop for Y
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
     auto ivY = forOpY.getInductionVar();
     rewriter.setInsertionPointToStart(forOpY.getBody());
 
-    //loop for X
-    affine::AffineForOp forOpX = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    // loop for X
+    affine::AffineForOp forOpX =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
     auto ivX = forOpX.getInductionVar();
     rewriter.setInsertionPointToStart(forOpX.getBody());
 
-    //load from X, & y1 & y2
+    // load from X, & y1 & y2
     FFT1DRealOpAdaptor fft1DrealAdaptor(operands);
-    Value inputX = rewriter.create<AffineLoadOp>(loc, fft1DrealAdaptor.getInput(), ValueRange{ivX});
-    Value loadYReal = rewriter.create<AffineLoadOp>(loc, alloc_real, ValueRange{ivY});
-    
-    //convert index to f64
-    Value IndxY = rewriter.create<arith::IndexCastUIOp>(loc, rewriter.getIntegerType(32), ivY);
-    Value k = rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxY);
-
-    Value IndxX = rewriter.create<arith::IndexCastUIOp>(loc, rewriter.getIntegerType(32), ivX);
-    Value i = rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxX);
-
-    //get 2*pi * k * i / N
-    Value muli_k =  rewriter.create<arith::MulFOp>(loc, k , i);
-    
-    Value const2pi = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(6.28318530718));
-    Value mul2piKI = rewriter.create<arith::MulFOp>(loc, const2pi , muli_k);  
+    Value inputX = rewriter.create<AffineLoadOp>(
+        loc, fft1DrealAdaptor.getInput(), ValueRange{ivX});
+    Value loadYReal =
+        rewriter.create<AffineLoadOp>(loc, alloc_real, ValueRange{ivY});
+
+    // convert index to f64
+    Value IndxY = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivY);
+    Value k =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxY);
+
+    Value IndxX = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivX);
+    Value i =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxX);
+
+    // get 2*pi * k * i / N
+    Value muli_k = rewriter.create<arith::MulFOp>(loc, k, i);
+
+    Value const2pi = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(6.28318530718));
+    Value mul2piKI = rewriter.create<arith::MulFOp>(loc, const2pi, muli_k);
 
     // getOperand().getType()
-    // auto inputTensorType = llvm::cast<RankedTensorType>(op->getOperand(0).getType());
-    float LengthOfInput = (float) ub;
-    Value N = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(LengthOfInput));
+    // auto inputTensorType =
+    // llvm::cast<RankedTensorType>(op->getOperand(0).getType());
+    float LengthOfInput = (float)ub;
+    Value N = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(LengthOfInput));
     // Value N = inputTensorType.getShape()[0];
 
-    Value divIndxByN = rewriter.create<arith::DivFOp>(loc, mul2piKI, N )  ;     
+    Value divIndxByN = rewriter.create<arith::DivFOp>(loc, mul2piKI, N);
 
     // Real part = Sum(x[i] * cos(div) )
     Value GetCos = rewriter.create<math::CosOp>(loc, divIndxByN);
-    Value xMulCos = rewriter.create<arith::MulFOp>(loc, inputX , GetCos);   
-    Value realSum = rewriter.create<arith::AddFOp>(loc, loadYReal ,xMulCos) ;
-    rewriter.create<AffineStoreOp>(loc, realSum, alloc_real, ValueRange{ivY}); 
-    
+    Value xMulCos = rewriter.create<arith::MulFOp>(loc, inputX, GetCos);
+    Value realSum = rewriter.create<arith::AddFOp>(loc, loadYReal, xMulCos);
+    rewriter.create<AffineStoreOp>(loc, realSum, alloc_real, ValueRange{ivY});
 
     // DEBUG_PRINT_NO_ARGS() ;
-  
+
     rewriter.setInsertionPointAfter(forOpX);
     // forOpX->dump();
     // rewriter.create<AffineYieldOp>(loc, ValueRange{alloc_real, alloc_img});
     rewriter.setInsertionPointAfter(forOpY);
-    //debug
-    // forOpX->dump();
-    // forOpY->dump();
-        // affine.for %y = 0 to 4 {
-        //     affine.store %cst_3, %alloc_real[%y] : memref<4xf64>
-        //     affine.store %cst_3, %alloc_img[%y] : memref<4xf64>
-        // }
-
-
-        // affine.for %y = 0 to 4 {
-        // //   %0 = affine.load %alloc_3[%arg0] : memref<4xf64>
-        // //   affine.store %0, %alloc_real[%arg0] : memref<4xf64>
-        // affine.for %x = 0 to 4 {
-        //     // CAcluations
-        //           %1 = affine.load %alloc_3[%x] : memref<4xf64>
-        //           %2 = affine.load %alloc_real[%y] : memref<4xf64>
-        //           %3 = affine.load %alloc_img[%y] : memref<4xf64>
-        //           // index cast for multiply 
-        //           %4 = arith.index_castui %y : index to i32
-        //           %k = arith.uitofp %4 : i32 to f64
-        //           %6 = arith.index_castui %x : index to i32
-        //           %i = arith.uitofp %6 : i32 to f64
-        //         //   %8 = arith.index_castui %arg3 : index to i32
-        //         //   %9 = arith.uitofp %8 : i32 to f64
-        //         //   %10 = arith.index_castui %arg4 : index to i32
-        //         //   %11 = arith.uitofp %10 : i32 to f64
-                
-        //           %mul_1 = arith.mulf %i, %k : f64
-        //           %mul = arith.mulf %mul_1, %cst_2pi : f64
-        //         //  ixk / N
-        //           %div = arith.divf %mul, %N : f64
-        //         //   cos of the above
-        //           %res_cos = math.cos %div : f64
-        //         //   %16 = arith.addf %14, %15 : f64
-        //         //   %res_sin = arith.mulf %16, %cst_0 : f64
-                 
-        //           %res_sin = math.sin %div : f64
-        //           %real_prod = arith.mulf %1, %res_cos : f64
-        //           %img_prod_1 = arith.mulf %1, %res_sin : f64
-        //           %img_prod = arith.mulf %cst_5, %img_prod_1 : f64
-
-        //           %real = arith.addf %2, %real_prod : f64
-        //           %img = arith.addf %3, %img_prod : f64
-        //           affine.store %real, %alloc_real[%y] : memref<4xf64>
-        //         //    dsp.print %alloc_real : memref<4xf64>
-        //           affine.store %img, %alloc_img[%y] : memref<4xf64>
-
-        // }
-        // }
+    // debug
+    //  forOpX->dump();
+    //  forOpY->dump();
+    //  affine.for %y = 0 to 4 {
+    //      affine.store %cst_3, %alloc_real[%y] : memref<4xf64>
+    //      affine.store %cst_3, %alloc_img[%y] : memref<4xf64>
+    //  }
+
+    // affine.for %y = 0 to 4 {
+    // //   %0 = affine.load %alloc_3[%arg0] : memref<4xf64>
+    // //   affine.store %0, %alloc_real[%arg0] : memref<4xf64>
+    // affine.for %x = 0 to 4 {
+    //     // CAcluations
+    //           %1 = affine.load %alloc_3[%x] : memref<4xf64>
+    //           %2 = affine.load %alloc_real[%y] : memref<4xf64>
+    //           %3 = affine.load %alloc_img[%y] : memref<4xf64>
+    //           // index cast for multiply
+    //           %4 = arith.index_castui %y : index to i32
+    //           %k = arith.uitofp %4 : i32 to f64
+    //           %6 = arith.index_castui %x : index to i32
+    //           %i = arith.uitofp %6 : i32 to f64
+    //         //   %8 = arith.index_castui %arg3 : index to i32
+    //         //   %9 = arith.uitofp %8 : i32 to f64
+    //         //   %10 = arith.index_castui %arg4 : index to i32
+    //         //   %11 = arith.uitofp %10 : i32 to f64
+
+    //           %mul_1 = arith.mulf %i, %k : f64
+    //           %mul = arith.mulf %mul_1, %cst_2pi : f64
+    //         //  ixk / N
+    //           %div = arith.divf %mul, %N : f64
+    //         //   cos of the above
+    //           %res_cos = math.cos %div : f64
+    //         //   %16 = arith.addf %14, %15 : f64
+    //         //   %res_sin = arith.mulf %16, %cst_0 : f64
+
+    //           %res_sin = math.sin %div : f64
+    //           %real_prod = arith.mulf %1, %res_cos : f64
+    //           %img_prod_1 = arith.mulf %1, %res_sin : f64
+    //           %img_prod = arith.mulf %cst_5, %img_prod_1 : f64
+
+    //           %real = arith.addf %2, %real_prod : f64
+    //           %img = arith.addf %3, %img_prod : f64
+    //           affine.store %real, %alloc_real[%y] : memref<4xf64>
+    //         //    dsp.print %alloc_real : memref<4xf64>
+    //           affine.store %img, %alloc_img[%y] : memref<4xf64>
+
+    // }
+    // }
     // rewriter.replaceOp(op, alloc_real);
     rewriter.replaceOp(op, alloc_real);
-    
+
     return success();
   }
 };
@@ -4125,64 +4757,64 @@ struct SquareOpLowering : public ConversionPattern {
   matchAndRewrite(Operation *op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const final {
     auto loc = op->getLoc();
-    
-    //Pseudo-code:
-      //output = 0
-      //iterate for len = 0 to inputLen
-      //  elem = a[i]
-      //  output[i] = elem * elem 
-      //  store output
-
-    //DEBUG_PRINT_NO_ARGS() ;
-
-    //output for result type
-    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));    
-    
-    //allocation & deallocation for the result of this operation
+
+    // Pseudo-code:
+    // output = 0
+    // iterate for len = 0 to inputLen
+    //   elem = a[i]
+    //   output[i] = elem * elem
+    //   store output
+
+    // DEBUG_PRINT_NO_ARGS() ;
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
     auto memRefType = convertTensorToMemRef(tensorType);
     auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
 
-    //construct affine loops for the input
-    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/0);
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
     SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
 
-    
-    //For loop
+    // For loop
     SquareOpAdaptor squareOpAdaptor(operands);
     // DEBUG_PRINT_NO_ARGS() ;
-    
-    int64_t lb = 0 ;
+
+    int64_t lb = 0;
     int64_t ub = tensorType.getShape()[0];
     int64_t step = 1;
 
-    //for loop
-    affine::AffineForOp forOp1 = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    // for loop
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
     auto iv = forOp1.getInductionVar();
 
     rewriter.setInsertionPointToStart(forOp1.getBody());
-    
+
     // DEBUG_PRINT_NO_ARGS() ;
-    Value elemIn = rewriter.create<AffineLoadOp>(loc, squareOpAdaptor.getInput(), iv);   
-    Value square = rewriter.create<arith::MulFOp>(loc, elemIn , elemIn);
-    
-    //store the result
+    Value elemIn =
+        rewriter.create<AffineLoadOp>(loc, squareOpAdaptor.getInput(), iv);
+    Value square = rewriter.create<arith::MulFOp>(loc, elemIn, elemIn);
+
+    // store the result
     rewriter.create<AffineStoreOp>(loc, square, alloc, iv);
 
     rewriter.setInsertionPointAfter(forOp1);
-    //debug
-    // forOp1->dump();
-    //   affine.for %arg0 = 0 to 5 {
-    //   %0 = affine.load %alloc_6[%arg0] : memref<5xf64>
-    //   %1 = arith.mulf %0, %0 : f64
-    //   affine.store %1, %alloc_5[%arg0] : memref<5xf64>
-    // }
+    // debug
+    //  forOp1->dump();
+    //    affine.for %arg0 = 0 to 5 {
+    //    %0 = affine.load %alloc_6[%arg0] : memref<5xf64>
+    //    %1 = arith.mulf %0, %0 : f64
+    //    affine.store %1, %alloc_5[%arg0] : memref<5xf64>
+    //  }
     rewriter.replaceOp(op, alloc);
-    
+
     return success();
   }
 };
 
-
 //===----------------------------------------------------------------------===//
 // ToyToAffine RewritePatterns: SumOp operations
 //===----------------------------------------------------------------------===//
@@ -4195,128 +4827,134 @@ struct SumOpLowering : public ConversionPattern {
   matchAndRewrite(Operation *op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const final {
     auto loc = op->getLoc();
-    
-    //Pseudo-code:
-      //output = 0
-      //iterate for len = 0 to inputLen
-      //  output = load output
-      //  elem = a[i]
-      //  output = output + elem 
-      //  store output
+
+    // Pseudo-code:
+    // output = 0
+    // iterate for len = 0 to inputLen
+    //   output = load output
+    //   elem = a[i]
+    //   output = output + elem
+    //   store output
 
     // DEBUG_PRINT_NO_ARGS() ;
 
-    //output for result type
-    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));    
-    
-    //allocation & deallocation for the result of this operation
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
     auto memRefType = convertTensorToMemRef(tensorType);
     auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
 
-    //construct affine loops for the input
-    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/0);
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
     SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
 
-    
-    //For loop
+    // For loop
     SumOpAdaptor sumOpAdaptor(operands);
     // DEBUG_PRINT_NO_ARGS() ;
-    auto inputType = llvm::dyn_cast<RankedTensorType>(op->getOperand(0).getType()); //op->getOperand(
-    // auto inputType = llvm::dyn_cast<RankedTensorType>(sumOpAdaptor.getInput().getType());
+    auto inputType = llvm::dyn_cast<RankedTensorType>(
+        op->getOperand(0).getType()); // op->getOperand(
+    // auto inputType =
+    // llvm::dyn_cast<RankedTensorType>(sumOpAdaptor.getInput().getType());
     // DEBUG_PRINT_NO_ARGS() ;
 
-    int64_t lb = 0 ;
+    int64_t lb = 0;
     int64_t ub = inputType.getShape()[0];
     int64_t step = 1;
 
-    //init 0 for output
+    // init 0 for output
     Value constantIndx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
-    // Value GetInputX0 = rewriter.create<AffineLoadOp>(loc, lowPassFilterAdaptor.getLhs(), /* iv */ ValueRange{constantIndx0});
-    Value constant0 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
-    // Value elemIn = rewriter.create<AffineLoadOp>(loc, upsamplingAdaptor.getLhs(), iv);
-    // DEBUG_PRINT_NO_ARGS() ;
-    rewriter.create<AffineStoreOp>(loc, constant0, alloc, ValueRange{constantIndx0});
-
-    affine::AffineForOp forOp1 = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    // Value GetInputX0 = rewriter.create<AffineLoadOp>(loc,
+    // lowPassFilterAdaptor.getLhs(), /* iv */ ValueRange{constantIndx0});
+    Value constant0 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    // Value elemIn = rewriter.create<AffineLoadOp>(loc,
+    // upsamplingAdaptor.getLhs(), iv); DEBUG_PRINT_NO_ARGS() ;
+    rewriter.create<AffineStoreOp>(loc, constant0, alloc,
+                                   ValueRange{constantIndx0});
+
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
     auto iv = forOp1.getInductionVar();
 
     rewriter.setInsertionPointToStart(forOp1.getBody());
-    
+
     // DEBUG_PRINT_NO_ARGS() ;
-    Value elemIn = rewriter.create<AffineLoadOp>(loc, sumOpAdaptor.getInput(), iv);
-    Value loadSum = rewriter.create<AffineLoadOp>(loc, alloc, ValueRange{constantIndx0});
-    
-    Value sum = rewriter.create<arith::AddFOp>(loc, elemIn , loadSum);
-    
-    //store the result
+    Value elemIn =
+        rewriter.create<AffineLoadOp>(loc, sumOpAdaptor.getInput(), iv);
+    Value loadSum =
+        rewriter.create<AffineLoadOp>(loc, alloc, ValueRange{constantIndx0});
+
+    Value sum = rewriter.create<arith::AddFOp>(loc, elemIn, loadSum);
+
+    // store the result
     rewriter.create<AffineStoreOp>(loc, sum, alloc, ValueRange{constantIndx0});
 
     rewriter.setInsertionPointAfter(forOp1);
-    //debug
-    // forOp1->dump();
-      //   %cont3 = arith.const 3 : f64
-      //   affine.for %arg0 = 0 to 8 {
-      //    %elem1 = affine.load input[%arg0]
-      //    #map1 = affine_map<(%arg0)[] : (%arg0 + 1)
-      //    #map2 = affine_map<(%arg0)[] : (%arg0 + 2)
-      //    %elem2 = affine.load input[#map1] <-- affine apply 
-      //    %elem3 = affine.load input[#map2]
-
-      //    %sum1 = arith.addf %elem1 , %elem2
-      //    %sum2 = arith.addf %sum1, %elem3
-      //    %res = arith.divf %sum2 , 
-      //    affine.store %sum2, out[%arg0]
-      // }
+    // debug
+    //  forOp1->dump();
+    //    %cont3 = arith.const 3 : f64
+    //    affine.for %arg0 = 0 to 8 {
+    //     %elem1 = affine.load input[%arg0]
+    //     #map1 = affine_map<(%arg0)[] : (%arg0 + 1)
+    //     #map2 = affine_map<(%arg0)[] : (%arg0 + 2)
+    //     %elem2 = affine.load input[#map1] <-- affine apply
+    //     %elem3 = affine.load input[#map2]
+
+    //    %sum1 = arith.addf %elem1 , %elem2
+    //    %sum2 = arith.addf %sum1, %elem3
+    //    %res = arith.divf %sum2 ,
+    //    affine.store %sum2, out[%arg0]
+    // }
     rewriter.replaceOp(op, alloc);
-    
+
     return success();
   }
 };
 
-
 //===----------------------------------------------------------------------===//
 // ToyToAffine RewritePatterns: FIRFilterResponse operations
 //===----------------------------------------------------------------------===//
-struct filterOpLowering: public ConversionPattern {
-      filterOpLowering(MLIRContext *ctx)
-        : ConversionPattern(dsp::filterOp::getOperationName(), 1 , ctx) {}
+struct filterOpLowering : public ConversionPattern {
+  filterOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::filterOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    // dsp.filterOp has 3 operands -- both of type tensor f64
 
-    LogicalResult 
-    matchAndRewrite(Operation *op, ArrayRef<Value> operands,
-              ConversionPatternRewriter &rewriter) const final {
-      //dsp.filterOp has 3 operands -- both of type tensor f64 
+    // Pseudo-code:
+    //  y[i] = sum(b[j] * x(i-j) - a[j] *x[i-j] ) j=1 to i and  i=1 to len(x)
+    //  also, y[0] = b[0] * x[0]
 
-    //Pseudo-code:
-      // y[i] = sum(b[j] * x(i-j) - a[j] *x[i-j] ) j=1 to i and  i=1 to len(x)
-      // also, y[0] = b[0] * x[0]
-     
     // 1) calculate y[0]
     // 2) iterate for indx=1 to input_len:
     //     load y[indx] = b[0] * x[indx]
-    //     3) iterate for j=1 to indx : 
+    //     3) iterate for j=1 to indx :
     //             load b[j] , x[i-j] , a[j] , y[i-j]
     //             y[indx] = y[indx] + b[j] * x[i-j] - a[j]*y[i-j]
 
     auto loc = op->getLoc();
-    //output for result type
-    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));  
-        
-    //allocation & deallocation for the result of this operation
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
     auto memRefType = convertTensorToMemRef(tensorType);
     auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
     filterOpAdaptor filterOpAdaptor1(operands);
 
-    //construct affine loops for the input
-    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/0);
-    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);    
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
 
     // IR:
-    // ConstantIndx0 
+    // ConstantIndx0
     // b0 = affine.load(b, ConstantIndx0)
     // x0 = affine.load(x, ConstantIndx0)
     // tempY0 = arith.mulf(b0,x0)
 
-    // lb = 1, ub = x.size() , ivY = forLoopY.inductionVariable() 
+    // lb = 1, ub = x.size() , ivY = forLoopY.inductionVariable()
     // forLoopY
     // xIvY = affine.load(x,ivY )
     // tempYIndx = affine.mulf(b0, xIvY)
@@ -4324,7 +4962,7 @@ struct filterOpLowering: public ConversionPattern {
 
     //     forloopJ , ivJ = forloopJ.inductionVariable()
     //         //optional get min ivY and len(b) -- iterate for this
-    //         load (b,ivJ) ; (x, map(ivY - ivJ)) , (a, ivJ) , 
+    //         load (b,ivJ) ; (x, map(ivY - ivJ)) , (a, ivJ) ,
     //         (y, map(ivY - ivJ) ), (y , ivJ)
 
     //         tempBxX = arith.mulf(b , x)
@@ -4333,83 +4971,95 @@ struct filterOpLowering: public ConversionPattern {
     //         sumY_A = arith.addf( Y , tempB_A )
     //         affine.store(sumY_A , y , ivY)
 
-    // ConstantIndx0 
+    // ConstantIndx0
     // b0 = affine.load(b, ConstantIndx0)
     // x0 = affine.load(x, ConstantIndx0)
     // tempY0 = arith.mulf(b0,x0)
 
     Value constantIndx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
-    Value b0 = rewriter.create<affine::AffineLoadOp>(loc, filterOpAdaptor1.getB() ,ValueRange{constantIndx0} );
-    Value x0 = rewriter.create<affine::AffineLoadOp>(loc, filterOpAdaptor1.getX() ,ValueRange{constantIndx0} );
+    Value b0 = rewriter.create<affine::AffineLoadOp>(
+        loc, filterOpAdaptor1.getB(), ValueRange{constantIndx0});
+    Value x0 = rewriter.create<affine::AffineLoadOp>(
+        loc, filterOpAdaptor1.getX(), ValueRange{constantIndx0});
     Value tempY0 = rewriter.create<arith::MulFOp>(loc, b0, x0);
 
-    //store at Y0
-    rewriter.create<affine::AffineStoreOp>(loc, tempY0 , alloc,ValueRange{constantIndx0} );
+    // store at Y0
+    rewriter.create<affine::AffineStoreOp>(loc, tempY0, alloc,
+                                           ValueRange{constantIndx0});
 
-    //For loop -- iterate from 1 to last
-    // lb = 1, ub = x.size() , ivY = forLoopY.inductionVariable() 
-    //     forLoopY
-    //     xIvY = affine.load(x,ivY )
-    //     tempYIndx = affine.mulf(b0, xIvY)
-    //     affine.store(tempYIndx, y, ivY)
+    // For loop -- iterate from 1 to last
+    //  lb = 1, ub = x.size() , ivY = forLoopY.inductionVariable()
+    //      forLoopY
+    //      xIvY = affine.load(x,ivY )
+    //      tempYIndx = affine.mulf(b0, xIvY)
+    //      affine.store(tempYIndx, y, ivY)
 
-    int64_t lb = 1 ;
+    int64_t lb = 1;
     int64_t ub = tensorType.getShape()[0];
     int64_t step = 1;
 
     // DEBUG_PRINT_NO_ARGS() ;
 
-    //loop for Y
-    affine::AffineForOp forOpY = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    // loop for Y
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
     auto ivY = forOpY.getInductionVar();
     rewriter.setInsertionPointToStart(forOpY.getBody());
 
-    Value xIvY = rewriter.create<affine::AffineLoadOp>(loc, filterOpAdaptor1.getX() , ivY);
+    Value xIvY = rewriter.create<affine::AffineLoadOp>(
+        loc, filterOpAdaptor1.getX(), ivY);
     Value b0mulxIvY = rewriter.create<arith::MulFOp>(loc, b0, xIvY);
-    rewriter.create<affine::AffineStoreOp>(loc, b0mulxIvY , alloc,ivY );
+    rewriter.create<affine::AffineStoreOp>(loc, b0mulxIvY, alloc, ivY);
 
-    //loop for X-- 1 to upperIndx ie, ivY
-      // forloopJ , ivJ = forloopJ.inductionVariable()
-      // //optional get min ivY and len(b) -- iterate for this
-      // load (b,ivJ) ; (x, map(ivY - ivJ)) , (a, ivJ) , 
-      // (y, map(ivY - ivJ) ), (y , ivJ)
+    // loop for X-- 1 to upperIndx ie, ivY
+    //  forloopJ , ivJ = forloopJ.inductionVariable()
+    //  //optional get min ivY and len(b) -- iterate for this
+    //  load (b,ivJ) ; (x, map(ivY - ivJ)) , (a, ivJ) ,
+    //  (y, map(ivY - ivJ) ), (y , ivJ)
 
-      // tempBxX = arith.mulf(b , x)
-      // tempAxY = arith.mulf(a , Y_i-j)
-      // tempB_A = arith.subf( tempBxX - tempAxY)
-      // sumY_A = arith.addf( Y , tempB_A )
-      // affine.store(sumY_A , y , ivY)
+    // tempBxX = arith.mulf(b , x)
+    // tempAxY = arith.mulf(a , Y_i-j)
+    // tempB_A = arith.subf( tempBxX - tempAxY)
+    // sumY_A = arith.addf( Y , tempB_A )
+    // affine.store(sumY_A , y , ivY)
 
-    //look for here
-    // DEBUG_PRINT_NO_ARGS() ;
-    //Future -- try to loop 
-    // Value forlb = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+    // look for here
+    //  DEBUG_PRINT_NO_ARGS() ;
+    // Future -- try to loop
+    //  Value forlb = rewriter.create<arith::ConstantIndexOp>(loc, 1);
     AffineExpr expr0;
     bindDims(rewriter.getContext(), expr0);
     // AffineMap lbMap = AffineMap::get(1, 0, expr0);
 
-    // affine::AffineForOp forOpJ = rewriter.create<AffineForOp>(loc, lbMap, ValueRange{forlb} ,lbMap , ValueRange{ivY}, step);
-    affine::AffineForOp forOpJ = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    // affine::AffineForOp forOpJ = rewriter.create<AffineForOp>(loc, lbMap,
+    // ValueRange{forlb} ,lbMap , ValueRange{ivY}, step);
+    affine::AffineForOp forOpJ =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
 
     auto ivJ = forOpJ.getInductionVar();
     rewriter.setInsertionPointToStart(forOpJ.getBody());
 
-    //load from X, & Y
-    // DCTOpAdaptor dctAdaptor(operands);
-    //For affine expression: #map1 = affine_map<(%ivY , ivJ)[] : (%ivY - ivJ)
+    // load from X, & Y
+    //  DCTOpAdaptor dctAdaptor(operands);
+    // For affine expression: #map1 = affine_map<(%ivY , ivJ)[] : (%ivY - ivJ)
     AffineExpr d0, d1, s0;
     bindDims(rewriter.getContext(), d0, d1);
-    // AffineExpr ExprForIndxYminusX = rewriter.getAffineDimExpr(0) - rewriter.getAffineDimExpr(1); //d0 - d1; 
-    AffineExpr ExprForIndxYminusX = d0 - d1; 
+    // AffineExpr ExprForIndxYminusX = rewriter.getAffineDimExpr(0) -
+    // rewriter.getAffineDimExpr(1); //d0 - d1;
+    AffineExpr ExprForIndxYminusX = d0 - d1;
 
     AffineMap addMapForYminusX = AffineMap::get(2, 0, ExprForIndxYminusX);
 
-    // load (b,ivJ) ; (x, map(ivY - ivJ)) , (a, ivJ) , 
+    // load (b,ivJ) ; (x, map(ivY - ivJ)) , (a, ivJ) ,
     // (y, map(ivY - ivJ) ), (y , ivJ)
-    Value inputX = rewriter.create<AffineLoadOp>(loc, filterOpAdaptor1.getX(),addMapForYminusX, ValueRange{ivY,ivJ});
-    Value inputB = rewriter.create<AffineLoadOp>(loc, filterOpAdaptor1.getB(), ValueRange{ivJ});
-    Value inputA = rewriter.create<AffineLoadOp>(loc, filterOpAdaptor1.getA(), ValueRange{ivJ});
-    Value inputPrevY = rewriter.create<AffineLoadOp>(loc, alloc,addMapForYminusX, ValueRange{ivY,ivJ});
+    Value inputX = rewriter.create<AffineLoadOp>(
+        loc, filterOpAdaptor1.getX(), addMapForYminusX, ValueRange{ivY, ivJ});
+    Value inputB = rewriter.create<AffineLoadOp>(loc, filterOpAdaptor1.getB(),
+                                                 ValueRange{ivJ});
+    Value inputA = rewriter.create<AffineLoadOp>(loc, filterOpAdaptor1.getA(),
+                                                 ValueRange{ivJ});
+    Value inputPrevY = rewriter.create<AffineLoadOp>(
+        loc, alloc, addMapForYminusX, ValueRange{ivY, ivJ});
     Value outY = rewriter.create<AffineLoadOp>(loc, alloc, ValueRange{ivY});
 
     // tempBxX = arith.mulf(b , x)
@@ -4422,77 +5072,71 @@ struct filterOpLowering: public ConversionPattern {
     Value tempAxY = rewriter.create<arith::MulFOp>(loc, inputA, inputPrevY);
     Value tempBminusA = rewriter.create<arith::SubFOp>(loc, tempBxX, tempAxY);
     Value sumY_A = rewriter.create<arith::AddFOp>(loc, outY, tempBminusA);
-    rewriter.create<affine::AffineStoreOp>(loc, sumY_A , alloc,ivY );
+    rewriter.create<affine::AffineStoreOp>(loc, sumY_A, alloc, ivY);
 
- 
     rewriter.setInsertionPointAfter(forOpJ);
     rewriter.setInsertionPointAfter(forOpY);
     // forOpJ->dump();
-  
-    //debug
-    // forOpJ->dump();
-    // forOpY->dump();
-        // affine.for %y = 0 to 4 {
-        //     affine.store %cst_3, %alloc[%y] : memref<4xf64>
-        //     affine.store %cst_3, %alloc_img[%y] : memref<4xf64>
-        // }
-
-
-        // affine.for %y = 0 to 4 {
-        // //   %0 = affine.load %alloc_3[%arg0] : memref<4xf64>
-        // //   affine.store %0, %alloc[%arg0] : memref<4xf64>
-        // affine.for %x = 0 to 4 {
-        //     // CAcluations
-        //           %1 = affine.load %alloc_3[%x] : memref<4xf64>
-        //           %2 = affine.load %alloc[%y] : memref<4xf64>
-        //           %3 = affine.load %alloc_img[%y] : memref<4xf64>
-        //           // index cast for multiply 
-        //           %4 = arith.index_castui %y : index to i32
-        //           %k = arith.uitofp %4 : i32 to f64
-        //           %6 = arith.index_castui %x : index to i32
-        //           %i = arith.uitofp %6 : i32 to f64
-        //         //   %8 = arith.index_castui %arg3 : index to i32
-        //         //   %9 = arith.uitofp %8 : i32 to f64
-        //         //   %10 = arith.index_castui %arg4 : index to i32
-        //         //   %11 = arith.uitofp %10 : i32 to f64
-                
-        //           %mul_1 = arith.mulf %i, %k : f64
-        //           %mul = arith.mulf %mul_1, %cst_2pi : f64
-        //         //  ixk / N
-        //           %div = arith.divf %mul, %N : f64
-        //         //   cos of the above
-        //           %res_cos = math.cos %div : f64
-        //         //   %16 = arith.addf %14, %15 : f64
-        //         //   %res_sin = arith.mulf %16, %cst_0 : f64
-                 
-        //           %res_sin = math.sin %div : f64
-        //           %real_prod = arith.mulf %1, %res_cos : f64
-        //           %img_prod_1 = arith.mulf %1, %res_sin : f64
-        //           %img_prod = arith.mulf %cst_5, %img_prod_1 : f64
-
-        //           %real = arith.addf %2, %real_prod : f64
-        //           %img = arith.addf %3, %img_prod : f64
-        //           affine.store %real, %alloc[%y] : memref<4xf64>
-        //         //    dsp.print %alloc : memref<4xf64>
-        //           affine.store %img, %alloc_img[%y] : memref<4xf64>
-
-        // }
-        // }
+
+    // debug
+    //  forOpJ->dump();
+    //  forOpY->dump();
+    //  affine.for %y = 0 to 4 {
+    //      affine.store %cst_3, %alloc[%y] : memref<4xf64>
+    //      affine.store %cst_3, %alloc_img[%y] : memref<4xf64>
+    //  }
+
+    // affine.for %y = 0 to 4 {
+    // //   %0 = affine.load %alloc_3[%arg0] : memref<4xf64>
+    // //   affine.store %0, %alloc[%arg0] : memref<4xf64>
+    // affine.for %x = 0 to 4 {
+    //     // CAcluations
+    //           %1 = affine.load %alloc_3[%x] : memref<4xf64>
+    //           %2 = affine.load %alloc[%y] : memref<4xf64>
+    //           %3 = affine.load %alloc_img[%y] : memref<4xf64>
+    //           // index cast for multiply
+    //           %4 = arith.index_castui %y : index to i32
+    //           %k = arith.uitofp %4 : i32 to f64
+    //           %6 = arith.index_castui %x : index to i32
+    //           %i = arith.uitofp %6 : i32 to f64
+    //         //   %8 = arith.index_castui %arg3 : index to i32
+    //         //   %9 = arith.uitofp %8 : i32 to f64
+    //         //   %10 = arith.index_castui %arg4 : index to i32
+    //         //   %11 = arith.uitofp %10 : i32 to f64
+
+    //           %mul_1 = arith.mulf %i, %k : f64
+    //           %mul = arith.mulf %mul_1, %cst_2pi : f64
+    //         //  ixk / N
+    //           %div = arith.divf %mul, %N : f64
+    //         //   cos of the above
+    //           %res_cos = math.cos %div : f64
+    //         //   %16 = arith.addf %14, %15 : f64
+    //         //   %res_sin = arith.mulf %16, %cst_0 : f64
+
+    //           %res_sin = math.sin %div : f64
+    //           %real_prod = arith.mulf %1, %res_cos : f64
+    //           %img_prod_1 = arith.mulf %1, %res_sin : f64
+    //           %img_prod = arith.mulf %cst_5, %img_prod_1 : f64
+
+    //           %real = arith.addf %2, %real_prod : f64
+    //           %img = arith.addf %3, %img_prod : f64
+    //           affine.store %real, %alloc[%y] : memref<4xf64>
+    //         //    dsp.print %alloc : memref<4xf64>
+    //           affine.store %img, %alloc_img[%y] : memref<4xf64>
+
+    // }
+    // }
     rewriter.replaceOp(op, alloc);
     // rewriter.replaceOp(op, ValueRange{alloc,alloc_img});
-    
-    return success();
-    }
-
 
+    return success();
+  }
 };
 
-
 //===----------------------------------------------------------------------===//
 // ToyToAffine RewritePatterns: DCT operations
 //===----------------------------------------------------------------------===//
 
-
 struct DCTOpLowering : public ConversionPattern {
   DCTOpLowering(MLIRContext *ctx)
       : ConversionPattern(dsp::DCTOp::getOperationName(), 1, ctx) {}
@@ -4501,187 +5145,201 @@ struct DCTOpLowering : public ConversionPattern {
   matchAndRewrite(Operation *op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const final {
     auto loc = op->getLoc();
-    
-    //Pseudo-code:
-      //  y[k] = sqrt(2/N) * SumOverAllN( x[n] cos(pi * k * (n +0.5)/N)) , 0<=n<=N-1 : 
-      // for y[0] , the answer will be multiplied by 1/sqrt(2)
-     
-      //init  output mem for y as 0 
-      //iterate for output from k=0 to last 
-        //iterate for all x from n=0 to last
-          //perform the calculations : ie x[n] cos(pi * k * (n +0.5)/N) and sum and store them at y[k]
-          // 
-      // replace this upsampling op with the output_mem_allocation op
+
+    // Pseudo-code:
+    //   y[k] = sqrt(2/N) * SumOverAllN( x[n] cos(pi * k * (n +0.5)/N)) ,
+    //   0<=n<=N-1 :
+    //  for y[0] , the answer will be multiplied by 1/sqrt(2)
+
+    // init  output mem for y as 0
+    // iterate for output from k=0 to last
+    // iterate for all x from n=0 to last
+    // perform the calculations : ie x[n] cos(pi * k * (n +0.5)/N) and sum and
+    // store them at y[k]
+    //
+    // replace this upsampling op with the output_mem_allocation op
 
     // DEBUG_PRINT_NO_ARGS() ;
 
-    //output for result type
-    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));  
-        
-    //allocation & deallocation for the result of this operation
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
     auto memRefType = convertTensorToMemRef(tensorType);
     auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
     DCTOpAdaptor dctAdaptor(operands);
 
-    //construct affine loops for the input
-    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/0);
-    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);    
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
 
-    //constant values:
+    // constant values:
     const float sqrt2 = 1.41421356237;
     const float pi = 3.14159265358;
 
     // affine.for %y = 0 to 4 {
-        //     affine.store %cst_3, %alloc[%y] : memref<4xf64>
-        // }
-    Value constant0 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(0));
-
+    //     affine.store %cst_3, %alloc[%y] : memref<4xf64>
+    // }
+    Value constant0 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
 
-    //For loop -- iterate from 0 to last
-    int64_t lb = 0 ;
+    // For loop -- iterate from 0 to last
+    int64_t lb = 0;
     int64_t ub = tensorType.getShape()[0];
     int64_t step = 1;
 
-    affine::AffineForOp forOp1 = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
     auto iv = forOp1.getInductionVar();
     rewriter.setInsertionPointToStart(forOp1.getBody());
     rewriter.create<AffineStoreOp>(loc, constant0, alloc, ValueRange{iv});
     rewriter.setInsertionPointAfter(forOp1);
     // DEBUG_PRINT_NO_ARGS() ;
 
-    //loop for Y
-    affine::AffineForOp forOpY = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    // loop for Y
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
     auto ivY = forOpY.getInductionVar();
     rewriter.setInsertionPointToStart(forOpY.getBody());
 
-    //loop for X
-    affine::AffineForOp forOpX = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    // loop for X
+    affine::AffineForOp forOpX =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
     auto ivX = forOpX.getInductionVar();
     rewriter.setInsertionPointToStart(forOpX.getBody());
 
-    //load from X, & Y
-    // DCTOpAdaptor dctAdaptor(operands);
-    Value inputX = rewriter.create<AffineLoadOp>(loc, dctAdaptor.getInput(), ValueRange{ivX});
-    Value loadYReal = rewriter.create<AffineLoadOp>(loc, alloc, ValueRange{ivY});
+    // load from X, & Y
+    //  DCTOpAdaptor dctAdaptor(operands);
+    Value inputX = rewriter.create<AffineLoadOp>(loc, dctAdaptor.getInput(),
+                                                 ValueRange{ivX});
+    Value loadYReal =
+        rewriter.create<AffineLoadOp>(loc, alloc, ValueRange{ivY});
 
-    //convert index to f64
-    Value IndxY = rewriter.create<arith::IndexCastUIOp>(loc, rewriter.getIntegerType(32), ivY);
-    Value k = rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxY);
+    // convert index to f64
+    Value IndxY = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivY);
+    Value k =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxY);
 
-    Value IndxX = rewriter.create<arith::IndexCastUIOp>(loc, rewriter.getIntegerType(32), ivX);
-    Value i = rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxX);
+    Value IndxX = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivX);
+    Value i =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxX);
 
-    //get pi * k * (i + 0.5) / N
-    Value constant0_5 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(0.5));
+    // get pi * k * (i + 0.5) / N
+    Value constant0_5 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0.5));
 
     Value add_i_half = rewriter.create<arith::AddFOp>(loc, i, constant0_5);
-    Value muli_k =  rewriter.create<arith::MulFOp>(loc, k , add_i_half);
-    
-    Value constpi = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(pi));
-    Value mulpiKI_half = rewriter.create<arith::MulFOp>(loc, constpi , muli_k);  
+    Value muli_k = rewriter.create<arith::MulFOp>(loc, k, add_i_half);
+
+    Value constpi = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(pi));
+    Value mulpiKI_half = rewriter.create<arith::MulFOp>(loc, constpi, muli_k);
 
     // Get N
     // DEBUG_PRINT_NO_ARGS() ;
-    float LengthOfInput = (float) ub;
-    Value N = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(LengthOfInput));
+    float LengthOfInput = (float)ub;
+    Value N = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(LengthOfInput));
 
-    Value divIndxByN = rewriter.create<arith::DivFOp>(loc, mulpiKI_half, N )  ;     
+    Value divIndxByN = rewriter.create<arith::DivFOp>(loc, mulpiKI_half, N);
 
     // Get cos ( pi * k * (n +0.5)/N))
     // DEBUG_PRINT_NO_ARGS() ;
     Value GetCos = rewriter.create<math::CosOp>(loc, divIndxByN);
-    Value xMulCos = rewriter.create<arith::MulFOp>(loc, inputX , GetCos);   
-    Value realSum = rewriter.create<arith::AddFOp>(loc, loadYReal ,xMulCos) ;
-    rewriter.create<AffineStoreOp>(loc, realSum, alloc, ValueRange{ivY}); 
-    
+    Value xMulCos = rewriter.create<arith::MulFOp>(loc, inputX, GetCos);
+    Value realSum = rewriter.create<arith::AddFOp>(loc, loadYReal, xMulCos);
+    rewriter.create<AffineStoreOp>(loc, realSum, alloc, ValueRange{ivY});
+
     rewriter.setInsertionPointAfter(forOpX);
 
-    //multiply Y(k) with sqrt(2) / sqrt(N) 
-    // DEBUG_PRINT_NO_ARGS() ;
-    Value loadYReal1 = rewriter.create<AffineLoadOp>(loc, alloc, ValueRange{ivY});
-    Value constSqrt2 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(sqrt2));
+    // multiply Y(k) with sqrt(2) / sqrt(N)
+    //  DEBUG_PRINT_NO_ARGS() ;
+    Value loadYReal1 =
+        rewriter.create<AffineLoadOp>(loc, alloc, ValueRange{ivY});
+    Value constSqrt2 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(sqrt2));
     // Type floatType = rewriter.getF64Type();
-    Value N2 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(LengthOfInput));
+    Value N2 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(LengthOfInput));
     // Define fast math flags
     // auto fastMathFlags = arith::FastMathFlagsAttr::get(
     //   rewriter.getContext(), arith::FastMathFlags::none);
-      // arith::FastMathFlags::ApproximateSqrt |
-      // arith::FastMathFlags::AllowReciprocal);
-    Value sqrtN = rewriter.create<math::RsqrtOp>(loc,  N2  );
-    // Value sqrtN = rewriter.create<math::RsqrtOp>(loc, TypeRange{ floatType } , N2 , fastMathFlags );
-
-    Value mulSqrt2ByN = rewriter.create<arith::MulFOp>(loc, constSqrt2 , sqrtN);
-    Value mulSqrt2ByNByY = rewriter.create<arith::MulFOp>(loc, mulSqrt2ByN , loadYReal1);
+    // arith::FastMathFlags::ApproximateSqrt |
+    // arith::FastMathFlags::AllowReciprocal);
+    Value sqrtN = rewriter.create<math::RsqrtOp>(loc, N2);
+    // Value sqrtN = rewriter.create<math::RsqrtOp>(loc, TypeRange{ floatType }
+    // , N2 , fastMathFlags );
+
+    Value mulSqrt2ByN = rewriter.create<arith::MulFOp>(loc, constSqrt2, sqrtN);
+    Value mulSqrt2ByNByY =
+        rewriter.create<arith::MulFOp>(loc, mulSqrt2ByN, loadYReal1);
     // DEBUG_PRINT_NO_ARGS() ;
-    rewriter.create<AffineStoreOp>(loc, mulSqrt2ByNByY, alloc, ValueRange{ivY}); 
+    rewriter.create<AffineStoreOp>(loc, mulSqrt2ByNByY, alloc, ValueRange{ivY});
     rewriter.setInsertionPointAfter(forOpY);
 
-    //get Y0 multiplied by sqrt(2)
+    // get Y0 multiplied by sqrt(2)
     Value constantIndx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
-    Value GetY0 = rewriter.create<AffineLoadOp>(loc, alloc, /* iv */ ValueRange{constantIndx0});
-    Value valSqrt2 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(sqrt2));
+    Value GetY0 = rewriter.create<AffineLoadOp>(
+        loc, alloc, /* iv */ ValueRange{constantIndx0});
+    Value valSqrt2 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(sqrt2));
     Value Y0MulSqrt2 = rewriter.create<arith::DivFOp>(loc, GetY0, valSqrt2);
-    rewriter.create<AffineStoreOp>(loc, Y0MulSqrt2, alloc, ValueRange{constantIndx0});
-    
-    //debug
-    // forOpX->dump();
-    // forOpY->dump();
-        // affine.for %y = 0 to 4 {
-        //     affine.store %cst_3, %alloc[%y] : memref<4xf64>
-        //     affine.store %cst_3, %alloc_img[%y] : memref<4xf64>
-        // }
-
-
-        // affine.for %y = 0 to 4 {
-        // //   %0 = affine.load %alloc_3[%arg0] : memref<4xf64>
-        // //   affine.store %0, %alloc[%arg0] : memref<4xf64>
-        // affine.for %x = 0 to 4 {
-        //     // CAcluations
-        //           %1 = affine.load %alloc_3[%x] : memref<4xf64>
-        //           %2 = affine.load %alloc[%y] : memref<4xf64>
-        //           %3 = affine.load %alloc_img[%y] : memref<4xf64>
-        //           // index cast for multiply 
-        //           %4 = arith.index_castui %y : index to i32
-        //           %k = arith.uitofp %4 : i32 to f64
-        //           %6 = arith.index_castui %x : index to i32
-        //           %i = arith.uitofp %6 : i32 to f64
-        //         //   %8 = arith.index_castui %arg3 : index to i32
-        //         //   %9 = arith.uitofp %8 : i32 to f64
-        //         //   %10 = arith.index_castui %arg4 : index to i32
-        //         //   %11 = arith.uitofp %10 : i32 to f64
-                
-        //           %mul_1 = arith.mulf %i, %k : f64
-        //           %mul = arith.mulf %mul_1, %cst_2pi : f64
-        //         //  ixk / N
-        //           %div = arith.divf %mul, %N : f64
-        //         //   cos of the above
-        //           %res_cos = math.cos %div : f64
-        //         //   %16 = arith.addf %14, %15 : f64
-        //         //   %res_sin = arith.mulf %16, %cst_0 : f64
-                 
-        //           %res_sin = math.sin %div : f64
-        //           %real_prod = arith.mulf %1, %res_cos : f64
-        //           %img_prod_1 = arith.mulf %1, %res_sin : f64
-        //           %img_prod = arith.mulf %cst_5, %img_prod_1 : f64
-
-        //           %real = arith.addf %2, %real_prod : f64
-        //           %img = arith.addf %3, %img_prod : f64
-        //           affine.store %real, %alloc[%y] : memref<4xf64>
-        //         //    dsp.print %alloc : memref<4xf64>
-        //           affine.store %img, %alloc_img[%y] : memref<4xf64>
-
-        // }
-        // }
+    rewriter.create<AffineStoreOp>(loc, Y0MulSqrt2, alloc,
+                                   ValueRange{constantIndx0});
+
+    // debug
+    //  forOpX->dump();
+    //  forOpY->dump();
+    //  affine.for %y = 0 to 4 {
+    //      affine.store %cst_3, %alloc[%y] : memref<4xf64>
+    //      affine.store %cst_3, %alloc_img[%y] : memref<4xf64>
+    //  }
+
+    // affine.for %y = 0 to 4 {
+    // //   %0 = affine.load %alloc_3[%arg0] : memref<4xf64>
+    // //   affine.store %0, %alloc[%arg0] : memref<4xf64>
+    // affine.for %x = 0 to 4 {
+    //     // CAcluations
+    //           %1 = affine.load %alloc_3[%x] : memref<4xf64>
+    //           %2 = affine.load %alloc[%y] : memref<4xf64>
+    //           %3 = affine.load %alloc_img[%y] : memref<4xf64>
+    //           // index cast for multiply
+    //           %4 = arith.index_castui %y : index to i32
+    //           %k = arith.uitofp %4 : i32 to f64
+    //           %6 = arith.index_castui %x : index to i32
+    //           %i = arith.uitofp %6 : i32 to f64
+    //         //   %8 = arith.index_castui %arg3 : index to i32
+    //         //   %9 = arith.uitofp %8 : i32 to f64
+    //         //   %10 = arith.index_castui %arg4 : index to i32
+    //         //   %11 = arith.uitofp %10 : i32 to f64
+
+    //           %mul_1 = arith.mulf %i, %k : f64
+    //           %mul = arith.mulf %mul_1, %cst_2pi : f64
+    //         //  ixk / N
+    //           %div = arith.divf %mul, %N : f64
+    //         //   cos of the above
+    //           %res_cos = math.cos %div : f64
+    //         //   %16 = arith.addf %14, %15 : f64
+    //         //   %res_sin = arith.mulf %16, %cst_0 : f64
+
+    //           %res_sin = math.sin %div : f64
+    //           %real_prod = arith.mulf %1, %res_cos : f64
+    //           %img_prod_1 = arith.mulf %1, %res_sin : f64
+    //           %img_prod = arith.mulf %cst_5, %img_prod_1 : f64
+
+    //           %real = arith.addf %2, %real_prod : f64
+    //           %img = arith.addf %3, %img_prod : f64
+    //           affine.store %real, %alloc[%y] : memref<4xf64>
+    //         //    dsp.print %alloc : memref<4xf64>
+    //           affine.store %img, %alloc_img[%y] : memref<4xf64>
+
+    // }
+    // }
     rewriter.replaceOp(op, alloc);
     // rewriter.replaceOp(op, ValueRange{alloc,alloc_img});
-    
+
     return success();
   }
 };
@@ -4698,95 +5356,98 @@ struct HammingWindowOpLowering : public ConversionPattern {
   matchAndRewrite(Operation *op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const final {
     auto loc = op->getLoc();
-    
-    //Pseudo-code:
-      //  y[k] = 0.54 - 0.46 cos(2 *pi * k/N-1) , 0<=n<N 
+
+    // Pseudo-code:
+    //   y[k] = 0.54 - 0.46 cos(2 *pi * k/N-1) , 0<=n<N
     // DEBUG_PRINT_NO_ARGS() ;
 
-    //output for result type
-    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));  
-    // llvm::errs() << "tensorType " << tensorType.get;  
-    //allocation & deallocation for the result of this operation
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    // llvm::errs() << "tensorType " << tensorType.get;
+    // allocation & deallocation for the result of this operation
     auto memRefType = convertTensorToMemRef(tensorType);
     auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
-    
-    //construct affine loops for the input
-    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/0);
-    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);    
-    
-
-    //For loop -- iterate from 1 to last
-    DEBUG_PRINT_NO_ARGS() ;
-    int64_t lb = 0 ;
-    int64_t ub = tensorType.getShape()[0];   
-    int64_t step = 1;
 
-    DEBUG_PRINT_NO_ARGS() ;
-    //get constants -- 0.54 & 0.46
-    Value constant0_54 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(0.54));
-    Value constant0_46 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(0.46));
-    Value const2pi = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(6.28318530718));
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
 
+    // For loop -- iterate from 1 to last
+    DEBUG_PRINT_NO_ARGS();
+    int64_t lb = 0;
+    int64_t ub = tensorType.getShape()[0];
+    int64_t step = 1;
 
-    //loop for Y
-    affine::AffineForOp forOpY = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    DEBUG_PRINT_NO_ARGS();
+    // get constants -- 0.54 & 0.46
+    Value constant0_54 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0.54));
+    Value constant0_46 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0.46));
+    Value const2pi = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(6.28318530718));
+
+    // loop for Y
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
     auto ivY = forOpY.getInductionVar();
     rewriter.setInsertionPointToStart(forOpY.getBody());
-    //convert index to f64
-    Value IndxY = rewriter.create<arith::IndexCastUIOp>(loc, rewriter.getIntegerType(32), ivY);
-    Value k = rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxY);
-
+    // convert index to f64
+    Value IndxY = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivY);
+    Value k =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxY);
 
-    //get 2*pi * k / (N -1)  
-    Value mul2pi_k = rewriter.create<arith::MulFOp>(loc, const2pi , k);  
+    // get 2*pi * k / (N -1)
+    Value mul2pi_k = rewriter.create<arith::MulFOp>(loc, const2pi, k);
 
     // getOperand().getType()
-    // auto inputTensorType = llvm::cast<RankedTensorType>(op->getOperand(0).getType());
-    float LengthOfInput = (float) ub ;
-    Value NMinus1 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(LengthOfInput - 1));
-    
-    Value divIndxByNMinus1 = rewriter.create<arith::DivFOp>(loc, mul2pi_k, NMinus1 )  ;     
+    // auto inputTensorType =
+    // llvm::cast<RankedTensorType>(op->getOperand(0).getType());
+    float LengthOfInput = (float)ub;
+    Value NMinus1 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(),
+        rewriter.getF64FloatAttr(LengthOfInput - 1));
+
+    Value divIndxByNMinus1 =
+        rewriter.create<arith::DivFOp>(loc, mul2pi_k, NMinus1);
 
     // get cos(2*pi * k/(N-1)
     Value GetCos = rewriter.create<math::CosOp>(loc, divIndxByNMinus1);
-    Value MulCos0_46 = rewriter.create<arith::MulFOp>(loc, constant0_46 , GetCos);   
-    Value Sub0_54_Cos = rewriter.create<arith::SubFOp>(loc, constant0_54 ,MulCos0_46) ;
-    rewriter.create<AffineStoreOp>(loc, Sub0_54_Cos, alloc, ValueRange{ivY}); 
-    DEBUG_PRINT_NO_ARGS() ;
+    Value MulCos0_46 =
+        rewriter.create<arith::MulFOp>(loc, constant0_46, GetCos);
+    Value Sub0_54_Cos =
+        rewriter.create<arith::SubFOp>(loc, constant0_54, MulCos0_46);
+    rewriter.create<AffineStoreOp>(loc, Sub0_54_Cos, alloc, ValueRange{ivY});
+    DEBUG_PRINT_NO_ARGS();
     rewriter.setInsertionPointAfter(forOpY);
-    //debug
-    // forOpX->dump();
-    // forOpY->dump();
-
+    // debug
+    //  forOpX->dump();
+    //  forOpY->dump();
+
+    // %cst = arith.constant 6.2831853071800001 : f64
+    // %cst_0 = arith.constant 4.600000e-01 : f64
+    // %cst_1 = arith.constant 5.400000e-01 : f64
+    // %cst_2 = arith.constant 4.000000e+00 : f64
+    // %alloc = memref.alloc() : memref<4xf64>
+    // %alloc_3 = memref.alloc() : memref<f64>
+    // affine.store %cst_2, %alloc_3[] : memref<f64>
+    // affine.for %arg0 = 0 to 4 {
+    //   %0 = arith.index_castui %arg0 : index to i32
+    //   %1 = arith.uitofp %0 : i32 to f64
+    //   %2 = arith.mulf %1, %cst : f64
+    //   %3 = arith.divf %2, %cst_2 : f64
+    //   %4 = math.cos %3 : f64
+    //   %5 = arith.mulf %4, %cst_0 : f64
+    //   %6 = arith.subf %cst_1, %5 : f64
+    //   affine.store %6, %alloc[%arg0] : memref<4xf64>
+    // }
 
-        // %cst = arith.constant 6.2831853071800001 : f64
-        // %cst_0 = arith.constant 4.600000e-01 : f64
-        // %cst_1 = arith.constant 5.400000e-01 : f64
-        // %cst_2 = arith.constant 4.000000e+00 : f64
-        // %alloc = memref.alloc() : memref<4xf64>
-        // %alloc_3 = memref.alloc() : memref<f64>
-        // affine.store %cst_2, %alloc_3[] : memref<f64>
-        // affine.for %arg0 = 0 to 4 {
-        //   %0 = arith.index_castui %arg0 : index to i32
-        //   %1 = arith.uitofp %0 : i32 to f64
-        //   %2 = arith.mulf %1, %cst : f64
-        //   %3 = arith.divf %2, %cst_2 : f64
-        //   %4 = math.cos %3 : f64
-        //   %5 = arith.mulf %4, %cst_0 : f64
-        //   %6 = arith.subf %cst_1, %5 : f64
-        //   affine.store %6, %alloc[%arg0] : memref<4xf64>
-        // }
-
-
-        // }
-        // }
+    // }
+    // }
     rewriter.replaceOp(op, alloc);
-    //rewriter.replaceOp(op, ValueRange{alloc,alloc_img});
-    
+    // rewriter.replaceOp(op, ValueRange{alloc,alloc_img});
+
     return success();
   }
 };
@@ -4803,187 +5464,194 @@ struct IFFT1DOpLowering : public ConversionPattern {
   matchAndRewrite(Operation *op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const final {
     auto loc = op->getLoc();
-    
-    //Pseudo-code:
-      //  y[k] = y_real[k] + j *y_img[k] 
-      // y_real = sumOver_n(x[k]*cos[2*pi * k *n/N ] 
-      // y_img = sumOver_n(x[k]*sin[2*pi * k *n/N ] 
-      // here, x[k] is complex ie, x_real[k] + x_complex[k]
-      //so, y[k] = sumOver_n(x[k]e^(2*pi * k *n/N)) 
-    	// ==>   = x_real[k]cos(2*pi * k *n/N) - x_complex[k]sin(2*pi * k *n/N)
-
-      //init  output mem for y_real  
-      //iterate for output from k=0 to last 
-        //iterate for all x from n=0 to last
-          //perform the calculations : ie x_real[k]cos(2*pi * k *n/N) - x_complex[k]sin(2*pi * k *n/N) and 
-    	  //sum and store them at y[k]
-          // 
- 
-    DEBUG_PRINT_NO_ARGS() ;
-
-    //output for result type
-    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));  
-    //iterate to result1 --not needed for now but for future reference  
-    // DEBUG_PRINT_NO_ARGS() ; 
-       
-    //allocation & deallocation for the result of this operation
+
+    // Pseudo-code:
+    //   y[k] = y_real[k] + j *y_img[k]
+    //  y_real = sumOver_n(x[k]*cos[2*pi * k *n/N ]
+    //  y_img = sumOver_n(x[k]*sin[2*pi * k *n/N ]
+    //  here, x[k] is complex ie, x_real[k] + x_complex[k]
+    // so, y[k] = sumOver_n(x[k]e^(2*pi * k *n/N))
+    //  ==>   = x_real[k]cos(2*pi * k *n/N) - x_complex[k]sin(2*pi * k *n/N)
+
+    // init  output mem for y_real
+    // iterate for output from k=0 to last
+    // iterate for all x from n=0 to last
+    // perform the calculations : ie x_real[k]cos(2*pi * k *n/N) -
+    // x_complex[k]sin(2*pi * k *n/N) and sum and store them at y[k]
+    //
+
+    DEBUG_PRINT_NO_ARGS();
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    // iterate to result1 --not needed for now but for future reference
+    //  DEBUG_PRINT_NO_ARGS() ;
+
+    // allocation & deallocation for the result of this operation
     auto memRefType = convertTensorToMemRef(tensorType);
     auto alloc_real = insertAllocAndDealloc(memRefType, loc, rewriter);
-    
-    //construct affine loops for the input
-    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/0);
-    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);    
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
 
     // affine.for %y = 0 to 4 {
-        //     affine.store %cst_3, %alloc_real[%y] : memref<4xf64>
-        //     affine.store %cst_3, %alloc_img[%y] : memref<4xf64>
-        // }
-    Value constant0 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(0));
-
-    DEBUG_PRINT_NO_ARGS() ;
-    //For loop -- iterate from 0 to last
-    int64_t lb = 0 ;
+    //     affine.store %cst_3, %alloc_real[%y] : memref<4xf64>
+    //     affine.store %cst_3, %alloc_img[%y] : memref<4xf64>
+    // }
+    Value constant0 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+
+    DEBUG_PRINT_NO_ARGS();
+    // For loop -- iterate from 0 to last
+    int64_t lb = 0;
     int64_t ub = tensorType.getShape()[0];
     int64_t step = 1;
 
-    affine::AffineForOp forOp1 = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
     auto iv = forOp1.getInductionVar();
     rewriter.setInsertionPointToStart(forOp1.getBody());
     rewriter.create<AffineStoreOp>(loc, constant0, alloc_real, ValueRange{iv});
     rewriter.setInsertionPointAfter(forOp1);
 
-    //loop for Y
-    affine::AffineForOp forOpY = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    // loop for Y
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
     auto ivY = forOpY.getInductionVar();
     rewriter.setInsertionPointToStart(forOpY.getBody());
 
-    //loop for X
-    affine::AffineForOp forOpX = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    // loop for X
+    affine::AffineForOp forOpX =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
     auto ivX = forOpX.getInductionVar();
     rewriter.setInsertionPointToStart(forOpX.getBody());
 
-    //load from X, & y1 & y2
+    // load from X, & y1 & y2
     IFFT1DOpAdaptor ifft1DAdaptor(operands);
-    Value inputReal = rewriter.create<AffineLoadOp>(loc, ifft1DAdaptor.getReal(), ValueRange{ivX});
-    Value loadYReal = rewriter.create<AffineLoadOp>(loc, alloc_real, ValueRange{ivY});
-    
-    //convert index to f64
-    Value IndxY = rewriter.create<arith::IndexCastUIOp>(loc, rewriter.getIntegerType(32), ivY);
-    Value k = rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxY);
-
-    Value IndxX = rewriter.create<arith::IndexCastUIOp>(loc, rewriter.getIntegerType(32), ivX);
-    Value i = rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxX);
-
-    //get 2*pi * k * i / N
-    Value muli_k =  rewriter.create<arith::MulFOp>(loc, k , i);
-    
-    Value const2pi = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(6.28318530718));
-    Value mul2piKI = rewriter.create<arith::MulFOp>(loc, const2pi , muli_k);  
+    Value inputReal = rewriter.create<AffineLoadOp>(
+        loc, ifft1DAdaptor.getReal(), ValueRange{ivX});
+    Value loadYReal =
+        rewriter.create<AffineLoadOp>(loc, alloc_real, ValueRange{ivY});
+
+    // convert index to f64
+    Value IndxY = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivY);
+    Value k =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxY);
+
+    Value IndxX = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivX);
+    Value i =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxX);
+
+    // get 2*pi * k * i / N
+    Value muli_k = rewriter.create<arith::MulFOp>(loc, k, i);
+
+    Value const2pi = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(6.28318530718));
+    Value mul2piKI = rewriter.create<arith::MulFOp>(loc, const2pi, muli_k);
 
     // getOperand().getType()
-    // auto inputTensorType = llvm::cast<RankedTensorType>(op->getOperand(0).getType());
-    float LengthOfInput = (float) ub;
-    Value N = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(LengthOfInput));
+    // auto inputTensorType =
+    // llvm::cast<RankedTensorType>(op->getOperand(0).getType());
+    float LengthOfInput = (float)ub;
+    Value N = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(LengthOfInput));
     // Value N = inputTensorType.getShape()[0];
 
-    Value divIndxByN = rewriter.create<arith::DivFOp>(loc, mul2piKI, N )  ;     
+    Value divIndxByN = rewriter.create<arith::DivFOp>(loc, mul2piKI, N);
 
-    // Real Cos part = x_real[i] * cos(div) 
+    // Real Cos part = x_real[i] * cos(div)
     Value GetCos = rewriter.create<math::CosOp>(loc, divIndxByN);
-    Value xMulCos = rewriter.create<arith::MulFOp>(loc, inputReal , GetCos);   
+    Value xMulCos = rewriter.create<arith::MulFOp>(loc, inputReal, GetCos);
 
-    // Real Sin part =  x_complex[i] * sin(div) 
-    Value inputImg = rewriter.create<AffineLoadOp>(loc, ifft1DAdaptor.getImg(), ValueRange{ivX});
+    // Real Sin part =  x_complex[i] * sin(div)
+    Value inputImg = rewriter.create<AffineLoadOp>(loc, ifft1DAdaptor.getImg(),
+                                                   ValueRange{ivX});
     Value GetSin = rewriter.create<math::SinOp>(loc, divIndxByN);
-    Value xMulSin = rewriter.create<arith::MulFOp>(loc, inputImg , GetSin);   
-
-    //Get real Ans = x_real[i] * cos(div) - x_complex[i] * sin(div)
-    //Then sum over real_Ans by loading YReal
-    Value realAns = rewriter.create<arith::SubFOp>(loc, xMulCos ,xMulSin) ;
-    Value realSum = rewriter.create<arith::AddFOp>(loc, loadYReal ,realAns) ;
-    rewriter.create<AffineStoreOp>(loc, realSum, alloc_real, ValueRange{ivY}); 
-    
-    //x[n-1]
-    DEBUG_PRINT_NO_ARGS() ;
+    Value xMulSin = rewriter.create<arith::MulFOp>(loc, inputImg, GetSin);
+
+    // Get real Ans = x_real[i] * cos(div) - x_complex[i] * sin(div)
+    // Then sum over real_Ans by loading YReal
+    Value realAns = rewriter.create<arith::SubFOp>(loc, xMulCos, xMulSin);
+    Value realSum = rewriter.create<arith::AddFOp>(loc, loadYReal, realAns);
+    rewriter.create<AffineStoreOp>(loc, realSum, alloc_real, ValueRange{ivY});
+
+    // x[n-1]
+    DEBUG_PRINT_NO_ARGS();
     // Value xMinusPrevX = rewriter.create<arith::SubFOp>(loc, inputX ,PrevX );
 
     rewriter.setInsertionPointAfter(forOpX);
     // Calculate y[k] = 1/N * y[k]
-    Value loadY = rewriter.create<AffineLoadOp>(loc, alloc_real, ValueRange{ivY});
+    Value loadY =
+        rewriter.create<AffineLoadOp>(loc, alloc_real, ValueRange{ivY});
     // float LengthOfInput = (float) ub;
-    Value N1 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(LengthOfInput));
-    Value SumDivByN = rewriter.create<arith::DivFOp>(loc,loadY , N1 );
-    rewriter.create<AffineStoreOp>(loc, SumDivByN, alloc_real, ValueRange{ivY}); 
-
+    Value N1 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(LengthOfInput));
+    Value SumDivByN = rewriter.create<arith::DivFOp>(loc, loadY, N1);
+    rewriter.create<AffineStoreOp>(loc, SumDivByN, alloc_real, ValueRange{ivY});
 
     rewriter.setInsertionPointAfter(forOpY);
-    //debug
-    // forOpX->dump();
-    // forOpY->dump();
-        // affine.for %y = 0 to 4 {
-        //     affine.store %cst_3, %alloc_real[%y] : memref<4xf64>
-        //     affine.store %cst_3, %alloc_img[%y] : memref<4xf64>
-        // }
-
-
-        // affine.for %y = 0 to 4 {
-        // //   %0 = affine.load %alloc_3[%arg0] : memref<4xf64>
-        // //   affine.store %0, %alloc_real[%arg0] : memref<4xf64>
-        // affine.for %x = 0 to 4 {
-        //     // CAcluations
-        //           %1 = affine.load %alloc_3[%x] : memref<4xf64>
-        //           %2 = affine.load %alloc_real[%y] : memref<4xf64>
-        //           %3 = affine.load %alloc_img[%y] : memref<4xf64>
-        //           // index cast for multiply 
-        //           %4 = arith.index_castui %y : index to i32
-        //           %k = arith.uitofp %4 : i32 to f64
-        //           %6 = arith.index_castui %x : index to i32
-        //           %i = arith.uitofp %6 : i32 to f64
-        //         //   %8 = arith.index_castui %arg3 : index to i32
-        //         //   %9 = arith.uitofp %8 : i32 to f64
-        //         //   %10 = arith.index_castui %arg4 : index to i32
-        //         //   %11 = arith.uitofp %10 : i32 to f64
-                
-        //           %mul_1 = arith.mulf %i, %k : f64
-        //           %mul = arith.mulf %mul_1, %cst_2pi : f64
-        //         //  ixk / N
-        //           %div = arith.divf %mul, %N : f64
-        //         //   cos of the above
-        //           %res_cos = math.cos %div : f64
-        //         //   %16 = arith.addf %14, %15 : f64
-        //         //   %res_sin = arith.mulf %16, %cst_0 : f64
-                 
-        //           %res_sin = math.sin %div : f64
-        //           %real_prod = arith.mulf %1, %res_cos : f64
-        //           %img_prod_1 = arith.mulf %1, %res_sin : f64
-        //           %img_prod = arith.mulf %cst_5, %img_prod_1 : f64
-
-        //           %real = arith.addf %2, %real_prod : f64
-        //           %img = arith.addf %3, %img_prod : f64
-        //           affine.store %real, %alloc_real[%y] : memref<4xf64>
-        //         //    dsp.print %alloc_real : memref<4xf64>
-        //           affine.store %img, %alloc_img[%y] : memref<4xf64>
-
-        // }
-        // }
+    // debug
+    //  forOpX->dump();
+    //  forOpY->dump();
+    //  affine.for %y = 0 to 4 {
+    //      affine.store %cst_3, %alloc_real[%y] : memref<4xf64>
+    //      affine.store %cst_3, %alloc_img[%y] : memref<4xf64>
+    //  }
+
+    // affine.for %y = 0 to 4 {
+    // //   %0 = affine.load %alloc_3[%arg0] : memref<4xf64>
+    // //   affine.store %0, %alloc_real[%arg0] : memref<4xf64>
+    // affine.for %x = 0 to 4 {
+    //     // CAcluations
+    //           %1 = affine.load %alloc_3[%x] : memref<4xf64>
+    //           %2 = affine.load %alloc_real[%y] : memref<4xf64>
+    //           %3 = affine.load %alloc_img[%y] : memref<4xf64>
+    //           // index cast for multiply
+    //           %4 = arith.index_castui %y : index to i32
+    //           %k = arith.uitofp %4 : i32 to f64
+    //           %6 = arith.index_castui %x : index to i32
+    //           %i = arith.uitofp %6 : i32 to f64
+    //         //   %8 = arith.index_castui %arg3 : index to i32
+    //         //   %9 = arith.uitofp %8 : i32 to f64
+    //         //   %10 = arith.index_castui %arg4 : index to i32
+    //         //   %11 = arith.uitofp %10 : i32 to f64
+
+    //           %mul_1 = arith.mulf %i, %k : f64
+    //           %mul = arith.mulf %mul_1, %cst_2pi : f64
+    //         //  ixk / N
+    //           %div = arith.divf %mul, %N : f64
+    //         //   cos of the above
+    //           %res_cos = math.cos %div : f64
+    //         //   %16 = arith.addf %14, %15 : f64
+    //         //   %res_sin = arith.mulf %16, %cst_0 : f64
+
+    //           %res_sin = math.sin %div : f64
+    //           %real_prod = arith.mulf %1, %res_cos : f64
+    //           %img_prod_1 = arith.mulf %1, %res_sin : f64
+    //           %img_prod = arith.mulf %cst_5, %img_prod_1 : f64
+
+    //           %real = arith.addf %2, %real_prod : f64
+    //           %img = arith.addf %3, %img_prod : f64
+    //           affine.store %real, %alloc_real[%y] : memref<4xf64>
+    //         //    dsp.print %alloc_real : memref<4xf64>
+    //           affine.store %img, %alloc_img[%y] : memref<4xf64>
+
+    // }
+    // }
     rewriter.replaceOp(op, alloc_real);
     // rewriter.replaceOp(op, ValueRange{alloc_real,alloc_img});
-    
+
     return success();
   }
 };
 
-
-
 //===----------------------------------------------------------------------===//
 // ToyToAffine RewritePatterns: FFT1D operations
 //===----------------------------------------------------------------------===//
 
-
 struct FFT1DOpLowering : public ConversionPattern {
   FFT1DOpLowering(MLIRContext *ctx)
       : ConversionPattern(dsp::FFT1DOp::getOperationName(), 1, ctx) {}
@@ -4992,173 +5660,187 @@ struct FFT1DOpLowering : public ConversionPattern {
   matchAndRewrite(Operation *op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const final {
     auto loc = op->getLoc();
-    
-    //Pseudo-code:
-      //  y[k] = y_real[k] + j *y_img[k] 
-      // y_real = sumOver_n(x[n]*cos[2*pi * k *n/N ] 
-      // y_img = sumOver_n(x[n]*sin[2*pi * k *n/N ] * -1
-      //init  output mem for y_real & y_img as 0 
-      //iterate for output from k=0 to last 
-        //iterate for all x from n=0 to last
-          //perform the calculations : ie x[n] * cos[2*pi * k *n/N ] and sum and store them at y[k]
-          // 
-      // replace this upsampling op with the output_mem_allocation op
+
+    // Pseudo-code:
+    //   y[k] = y_real[k] + j *y_img[k]
+    //  y_real = sumOver_n(x[n]*cos[2*pi * k *n/N ]
+    //  y_img = sumOver_n(x[n]*sin[2*pi * k *n/N ] * -1
+    // init  output mem for y_real & y_img as 0
+    // iterate for output from k=0 to last
+    // iterate for all x from n=0 to last
+    // perform the calculations : ie x[n] * cos[2*pi * k *n/N ] and sum and
+    // store them at y[k]
+    //
+    // replace this upsampling op with the output_mem_allocation op
 
     // DEBUG_PRINT_NO_ARGS() ;
 
-    //output for result type
-    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));  
-    //iterate to result1 --not needed for now but for future reference  
-    // auto tensorType1 =  llvm::cast<RankedTensorType>(*std::next(op->result_type_begin(), 1));
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    // iterate to result1 --not needed for now but for future reference
+    //  auto tensorType1 =
+    //  llvm::cast<RankedTensorType>(*std::next(op->result_type_begin(), 1));
+
+    // DEBUG_PRINT_NO_ARGS() ;
+    // tensorType.getShape()[0]
+    // llvm::errs() << "tensorType1.getShape()[0] " << tensorType1.getShape()[0]
+    // << " func= " << __func__ << "\n";
 
-    // DEBUG_PRINT_NO_ARGS() ; 
-    //tensorType.getShape()[0]
-    // llvm::errs() << "tensorType1.getShape()[0] " << tensorType1.getShape()[0] << " func= " << __func__ << "\n"; 
-    
-    //allocation & deallocation for the result of this operation
+    // allocation & deallocation for the result of this operation
     auto memRefType = convertTensorToMemRef(tensorType);
     // auto memRefType2 = convertTensorToMemRef(tensorType1);
     auto alloc_real = insertAllocAndDealloc(memRefType, loc, rewriter);
     auto alloc_img = insertAllocAndDealloc(memRefType, loc, rewriter);
 
-    //construct affine loops for the input
-    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/0);
-    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);    
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
 
     // affine.for %y = 0 to 4 {
-        //     affine.store %cst_3, %alloc_real[%y] : memref<4xf64>
-        //     affine.store %cst_3, %alloc_img[%y] : memref<4xf64>
-        // }
-    Value constant0 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(0));
-
+    //     affine.store %cst_3, %alloc_real[%y] : memref<4xf64>
+    //     affine.store %cst_3, %alloc_img[%y] : memref<4xf64>
+    // }
+    Value constant0 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
 
-    //For loop -- iterate from 1 to last
-    int64_t lb = 0 ;
+    // For loop -- iterate from 1 to last
+    int64_t lb = 0;
     int64_t ub = tensorType.getShape()[0];
     int64_t step = 1;
 
-    affine::AffineForOp forOp1 = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
     auto iv = forOp1.getInductionVar();
     rewriter.setInsertionPointToStart(forOp1.getBody());
     rewriter.create<AffineStoreOp>(loc, constant0, alloc_real, ValueRange{iv});
     rewriter.create<AffineStoreOp>(loc, constant0, alloc_img, ValueRange{iv});
     rewriter.setInsertionPointAfter(forOp1);
 
-    //loop for Y
-    affine::AffineForOp forOpY = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    // loop for Y
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
     auto ivY = forOpY.getInductionVar();
     rewriter.setInsertionPointToStart(forOpY.getBody());
 
-    //loop for X
-    affine::AffineForOp forOpX = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    // loop for X
+    affine::AffineForOp forOpX =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
     auto ivX = forOpX.getInductionVar();
     rewriter.setInsertionPointToStart(forOpX.getBody());
 
-    //load from X, & y1 & y2
+    // load from X, & y1 & y2
     FFT1DOpAdaptor fft1DAdaptor(operands);
-    Value inputX = rewriter.create<AffineLoadOp>(loc, fft1DAdaptor.getInput(), ValueRange{ivX});
-    Value loadYReal = rewriter.create<AffineLoadOp>(loc, alloc_real, ValueRange{ivY});
-    Value loadYImg = rewriter.create<AffineLoadOp>(loc, alloc_img, ValueRange{ivY});
-
-    //convert index to f64
-    Value IndxY = rewriter.create<arith::IndexCastUIOp>(loc, rewriter.getIntegerType(32), ivY);
-    Value k = rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxY);
-
-    Value IndxX = rewriter.create<arith::IndexCastUIOp>(loc, rewriter.getIntegerType(32), ivX);
-    Value i = rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxX);
-
-    //get 2*pi * k * i / N
-    Value muli_k =  rewriter.create<arith::MulFOp>(loc, k , i);
-    
-    Value const2pi = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(6.28318530718));
-    Value mul2piKI = rewriter.create<arith::MulFOp>(loc, const2pi , muli_k);  
+    Value inputX = rewriter.create<AffineLoadOp>(loc, fft1DAdaptor.getInput(),
+                                                 ValueRange{ivX});
+    Value loadYReal =
+        rewriter.create<AffineLoadOp>(loc, alloc_real, ValueRange{ivY});
+    Value loadYImg =
+        rewriter.create<AffineLoadOp>(loc, alloc_img, ValueRange{ivY});
+
+    // convert index to f64
+    Value IndxY = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivY);
+    Value k =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxY);
+
+    Value IndxX = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivX);
+    Value i =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxX);
+
+    // get 2*pi * k * i / N
+    Value muli_k = rewriter.create<arith::MulFOp>(loc, k, i);
+
+    Value const2pi = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(6.28318530718));
+    Value mul2piKI = rewriter.create<arith::MulFOp>(loc, const2pi, muli_k);
 
     // getOperand().getType()
-    // auto inputTensorType = llvm::cast<RankedTensorType>(op->getOperand(0).getType());
-    float LengthOfInput = (float) ub;
-    Value N = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
-                                                         rewriter.getF64FloatAttr(LengthOfInput));
+    // auto inputTensorType =
+    // llvm::cast<RankedTensorType>(op->getOperand(0).getType());
+    float LengthOfInput = (float)ub;
+    Value N = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(LengthOfInput));
     // Value N = inputTensorType.getShape()[0];
 
-    Value divIndxByN = rewriter.create<arith::DivFOp>(loc, mul2piKI, N )  ;     
+    Value divIndxByN = rewriter.create<arith::DivFOp>(loc, mul2piKI, N);
 
     // Real part = Sum(x[i] * cos(div) )
     Value GetCos = rewriter.create<math::CosOp>(loc, divIndxByN);
-    Value xMulCos = rewriter.create<arith::MulFOp>(loc, inputX , GetCos);   
-    Value realSum = rewriter.create<arith::AddFOp>(loc, loadYReal ,xMulCos) ;
-    rewriter.create<AffineStoreOp>(loc, realSum, alloc_real, ValueRange{ivY}); 
-    
+    Value xMulCos = rewriter.create<arith::MulFOp>(loc, inputX, GetCos);
+    Value realSum = rewriter.create<arith::AddFOp>(loc, loadYReal, xMulCos);
+    rewriter.create<AffineStoreOp>(loc, realSum, alloc_real, ValueRange{ivY});
+
     // Img part = -1 * Sum(x[i] * sin(div) )
     Value GetSin = rewriter.create<math::SinOp>(loc, divIndxByN);
-    Value xMulSin = rewriter.create<arith::MulFOp>(loc, inputX , GetSin);   
-    Value imgSum = rewriter.create<arith::SubFOp>(loc, loadYImg ,xMulSin) ;
+    Value xMulSin = rewriter.create<arith::MulFOp>(loc, inputX, GetSin);
+    Value imgSum = rewriter.create<arith::SubFOp>(loc, loadYImg, xMulSin);
 
-    // Value constMinus1 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
+    // Value constMinus1 = rewriter.create<arith::ConstantOp>(loc,
+    // rewriter.getF64Type(),
     //                                                      rewriter.getF64FloatAttr(-1));
-    // Value NegImgSum = rewriter.create<arith::MulFOp>(loc, constMinus1 , imgSum);
-    rewriter.create<AffineStoreOp>(loc, imgSum, alloc_img, ValueRange{ivY}); 
-    //x[n-1]
-    // DEBUG_PRINT_NO_ARGS() ;
-    // Value xMinusPrevX = rewriter.create<arith::SubFOp>(loc, inputX ,PrevX );
+    // Value NegImgSum = rewriter.create<arith::MulFOp>(loc, constMinus1 ,
+    // imgSum);
+    rewriter.create<AffineStoreOp>(loc, imgSum, alloc_img, ValueRange{ivY});
+    // x[n-1]
+    //  DEBUG_PRINT_NO_ARGS() ;
+    //  Value xMinusPrevX = rewriter.create<arith::SubFOp>(loc, inputX ,PrevX );
 
     rewriter.setInsertionPointAfter(forOpX);
     // forOpX->dump();
     // rewriter.create<AffineYieldOp>(loc, ValueRange{alloc_real, alloc_img});
     rewriter.setInsertionPointAfter(forOpY);
-    //debug
-    // forOpX->dump();
-    // forOpY->dump();
-        // affine.for %y = 0 to 4 {
-        //     affine.store %cst_3, %alloc_real[%y] : memref<4xf64>
-        //     affine.store %cst_3, %alloc_img[%y] : memref<4xf64>
-        // }
-
-
-        // affine.for %y = 0 to 4 {
-        // //   %0 = affine.load %alloc_3[%arg0] : memref<4xf64>
-        // //   affine.store %0, %alloc_real[%arg0] : memref<4xf64>
-        // affine.for %x = 0 to 4 {
-        //     // CAcluations
-        //           %1 = affine.load %alloc_3[%x] : memref<4xf64>
-        //           %2 = affine.load %alloc_real[%y] : memref<4xf64>
-        //           %3 = affine.load %alloc_img[%y] : memref<4xf64>
-        //           // index cast for multiply 
-        //           %4 = arith.index_castui %y : index to i32
-        //           %k = arith.uitofp %4 : i32 to f64
-        //           %6 = arith.index_castui %x : index to i32
-        //           %i = arith.uitofp %6 : i32 to f64
-        //         //   %8 = arith.index_castui %arg3 : index to i32
-        //         //   %9 = arith.uitofp %8 : i32 to f64
-        //         //   %10 = arith.index_castui %arg4 : index to i32
-        //         //   %11 = arith.uitofp %10 : i32 to f64
-                
-        //           %mul_1 = arith.mulf %i, %k : f64
-        //           %mul = arith.mulf %mul_1, %cst_2pi : f64
-        //         //  ixk / N
-        //           %div = arith.divf %mul, %N : f64
-        //         //   cos of the above
-        //           %res_cos = math.cos %div : f64
-        //         //   %16 = arith.addf %14, %15 : f64
-        //         //   %res_sin = arith.mulf %16, %cst_0 : f64
-                 
-        //           %res_sin = math.sin %div : f64
-        //           %real_prod = arith.mulf %1, %res_cos : f64
-        //           %img_prod_1 = arith.mulf %1, %res_sin : f64
-        //           %img_prod = arith.mulf %cst_5, %img_prod_1 : f64
-
-        //           %real = arith.addf %2, %real_prod : f64
-        //           %img = arith.addf %3, %img_prod : f64
-        //           affine.store %real, %alloc_real[%y] : memref<4xf64>
-        //         //    dsp.print %alloc_real : memref<4xf64>
-        //           affine.store %img, %alloc_img[%y] : memref<4xf64>
-
-        // }
-        // }
+    // debug
+    //  forOpX->dump();
+    //  forOpY->dump();
+    //  affine.for %y = 0 to 4 {
+    //      affine.store %cst_3, %alloc_real[%y] : memref<4xf64>
+    //      affine.store %cst_3, %alloc_img[%y] : memref<4xf64>
+    //  }
+
+    // affine.for %y = 0 to 4 {
+    // //   %0 = affine.load %alloc_3[%arg0] : memref<4xf64>
+    // //   affine.store %0, %alloc_real[%arg0] : memref<4xf64>
+    // affine.for %x = 0 to 4 {
+    //     // CAcluations
+    //           %1 = affine.load %alloc_3[%x] : memref<4xf64>
+    //           %2 = affine.load %alloc_real[%y] : memref<4xf64>
+    //           %3 = affine.load %alloc_img[%y] : memref<4xf64>
+    //           // index cast for multiply
+    //           %4 = arith.index_castui %y : index to i32
+    //           %k = arith.uitofp %4 : i32 to f64
+    //           %6 = arith.index_castui %x : index to i32
+    //           %i = arith.uitofp %6 : i32 to f64
+    //         //   %8 = arith.index_castui %arg3 : index to i32
+    //         //   %9 = arith.uitofp %8 : i32 to f64
+    //         //   %10 = arith.index_castui %arg4 : index to i32
+    //         //   %11 = arith.uitofp %10 : i32 to f64
+
+    //           %mul_1 = arith.mulf %i, %k : f64
+    //           %mul = arith.mulf %mul_1, %cst_2pi : f64
+    //         //  ixk / N
+    //           %div = arith.divf %mul, %N : f64
+    //         //   cos of the above
+    //           %res_cos = math.cos %div : f64
+    //         //   %16 = arith.addf %14, %15 : f64
+    //         //   %res_sin = arith.mulf %16, %cst_0 : f64
+
+    //           %res_sin = math.sin %div : f64
+    //           %real_prod = arith.mulf %1, %res_cos : f64
+    //           %img_prod_1 = arith.mulf %1, %res_sin : f64
+    //           %img_prod = arith.mulf %cst_5, %img_prod_1 : f64
+
+    //           %real = arith.addf %2, %real_prod : f64
+    //           %img = arith.addf %3, %img_prod : f64
+    //           affine.store %real, %alloc_real[%y] : memref<4xf64>
+    //         //    dsp.print %alloc_real : memref<4xf64>
+    //           affine.store %img, %alloc_img[%y] : memref<4xf64>
+
+    // }
+    // }
     // rewriter.replaceOp(op, alloc_real);
-    rewriter.replaceOp(op, ValueRange{alloc_real,alloc_img});
-    
+    rewriter.replaceOp(op, ValueRange{alloc_real, alloc_img});
+
     return success();
   }
 };
@@ -5167,7 +5849,6 @@ struct FFT1DOpLowering : public ConversionPattern {
 // ToyToAffine RewritePatterns: HighPassFilter operations
 //===----------------------------------------------------------------------===//
 
-
 struct HighPassFilterOpLowering : public ConversionPattern {
   HighPassFilterOpLowering(MLIRContext *ctx)
       : ConversionPattern(dsp::HighPassFilterOp::getOperationName(), 1, ctx) {}
@@ -5176,83 +5857,88 @@ struct HighPassFilterOpLowering : public ConversionPattern {
   matchAndRewrite(Operation *op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const final {
     auto loc = op->getLoc();
-    
-    //Pseudo-code:
-      //init first value of output with first value of input: y[0] = x[0]
-      //iterate for output from 1st to last 
-      //y[i] = x[i] - x[i -1 ]
-      // replace this upsampling op with the output_mem_allocation op
-
-    DEBUG_PRINT_NO_ARGS() ;
-
-    //output for result type
-    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));    
-    
-    //allocation & deallocation for the result of this operation
+
+    // Pseudo-code:
+    // init first value of output with first value of input: y[0] = x[0]
+    // iterate for output from 1st to last
+    // y[i] = x[i] - x[i -1 ]
+    //  replace this upsampling op with the output_mem_allocation op
+
+    DEBUG_PRINT_NO_ARGS();
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
     auto memRefType = convertTensorToMemRef(tensorType);
     auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
 
-    //construct affine loops for the input
-    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/0);
-    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);    
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
 
-    //Init y for the first index ie, index0
+    // Init y for the first index ie, index0
     Value constantIndx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
 
     HighPassFilterOpAdaptor highPassFilterAdaptor(operands);
-    Value GetInputX0 = rewriter.create<AffineLoadOp>(loc, highPassFilterAdaptor.getInput(), /* iv */ ValueRange{constantIndx0});
-    rewriter.create<AffineStoreOp>(loc, GetInputX0, alloc, ValueRange{constantIndx0});
-
-    //For loop -- iterate from 1 to last
-    int64_t lb = 1 ;
+    Value GetInputX0 =
+        rewriter.create<AffineLoadOp>(loc, highPassFilterAdaptor.getInput(),
+                                      /* iv */ ValueRange{constantIndx0});
+    rewriter.create<AffineStoreOp>(loc, GetInputX0, alloc,
+                                   ValueRange{constantIndx0});
+
+    // For loop -- iterate from 1 to last
+    int64_t lb = 1;
     int64_t ub = tensorType.getShape()[0];
     int64_t step = 1;
 
-    affine::AffineForOp forOp1 = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
     auto iv = forOp1.getInductionVar();
-    
 
     rewriter.setInsertionPointToStart(forOp1.getBody());
 
-    
-    
-    //For affine expression: #map1 = affine_map<(%arg0)[] : (%arg0 - 1)
+    // For affine expression: #map1 = affine_map<(%arg0)[] : (%arg0 - 1)
     AffineExpr d0, s0;
     bindDims(rewriter.getContext(), d0);
     AffineExpr ExprForPrevX = d0 - 1;
     AffineMap addMapForHighPassFilter = AffineMap::get(1, 0, ExprForPrevX);
 
-    //x[n-1]
-    DEBUG_PRINT_NO_ARGS() ;
-    Value PrevX = rewriter.create<AffineLoadOp>(loc, highPassFilterAdaptor.getInput(), addMapForHighPassFilter, 
-                  ValueRange{iv}); //memRefType
+    // x[n-1]
+    DEBUG_PRINT_NO_ARGS();
+    Value PrevX = rewriter.create<AffineLoadOp>(
+        loc, highPassFilterAdaptor.getInput(), addMapForHighPassFilter,
+        ValueRange{iv}); // memRefType
     // PrevX.dump();
-    Value inputX = rewriter.create<AffineLoadOp>(loc, highPassFilterAdaptor.getInput(), ValueRange{iv});
-    
-    //get y[i] = x[i] - x[i -1 ]
-    Value xMinusPrevX = rewriter.create<arith::SubFOp>(loc, inputX ,PrevX );
+    Value inputX = rewriter.create<AffineLoadOp>(
+        loc, highPassFilterAdaptor.getInput(), ValueRange{iv});
+
+    // get y[i] = x[i] - x[i -1 ]
+    Value xMinusPrevX = rewriter.create<arith::SubFOp>(loc, inputX, PrevX);
     // Value cosRes = rewriter.create<math::CosOp>(loc, xMinusPrevX);
-    rewriter.create<AffineStoreOp>(loc, xMinusPrevX, alloc, ValueRange{iv}); //PrevX //AddmulAlphaXAndPreYAlphaMinus1
+    rewriter.create<AffineStoreOp>(
+        loc, xMinusPrevX, alloc,
+        ValueRange{iv}); // PrevX //AddmulAlphaXAndPreYAlphaMinus1
 
     rewriter.setInsertionPointAfter(forOp1);
-    //debug
-    // forOp1->dump();
-      // init first value of output with first value of input: y[0] = x[0]
-      // iterate for output from 1st to last 
-      // y[i] = x[i] - x[i -1 ]
-      // replace this upsampling op with the output_mem_allocation op
-        //  %indx0 = arith.constantIndex 0 : index
-        // %0 = affine.load in[indx0 ] : f64
-        //  affine.store %0 ,out[indx0]
-        // affine.for %arg0 = 1 to len_y {
-        //    #map1 = affine_map<(%arg0)[] : (%arg0 - 1)
-        //    %1 = affine.load in[#map1]     
-      //      %load_in = affine.load in[%arg0]
-      //      %2 = arith.subf %const1 , alpha
-      //      affine.store %2, out[%arg0]
-      // }
+    // debug
+    //  forOp1->dump();
+    //  init first value of output with first value of input: y[0] = x[0]
+    //  iterate for output from 1st to last
+    //  y[i] = x[i] - x[i -1 ]
+    //  replace this upsampling op with the output_mem_allocation op
+    //   %indx0 = arith.constantIndex 0 : index
+    //  %0 = affine.load in[indx0 ] : f64
+    //   affine.store %0 ,out[indx0]
+    //  affine.for %arg0 = 1 to len_y {
+    //     #map1 = affine_map<(%arg0)[] : (%arg0 - 1)
+    //     %1 = affine.load in[#map1]
+    //      %load_in = affine.load in[%arg0]
+    //      %2 = arith.subf %const1 , alpha
+    //      affine.store %2, out[%arg0]
+    // }
     rewriter.replaceOp(op, alloc);
-    
+
     return success();
   }
 };
@@ -5261,127 +5947,134 @@ struct HighPassFilterOpLowering : public ConversionPattern {
 // ToyToAffine RewritePatterns: LowPassFilter operations
 //===----------------------------------------------------------------------===//
 
-
 struct LowPassFilter1stOrderOpLowering : public ConversionPattern {
   LowPassFilter1stOrderOpLowering(MLIRContext *ctx)
-      : ConversionPattern(dsp::LowPassFilter1stOrderOp::getOperationName(), 1, ctx) {}
+      : ConversionPattern(dsp::LowPassFilter1stOrderOp::getOperationName(), 1,
+                          ctx) {}
 
   LogicalResult
   matchAndRewrite(Operation *op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const final {
     auto loc = op->getLoc();
-    
-    //Pseudo-code:
-      //init first value of output with first value of input: y[0] = x[0]
-      //iterate for output from 1st to last 
-      //y[i] = (1 - alpha) * y[i-1] + alpha * x[i]
-      // replace this upsampling op with the output_mem_allocation op
+
+    // Pseudo-code:
+    // init first value of output with first value of input: y[0] = x[0]
+    // iterate for output from 1st to last
+    // y[i] = (1 - alpha) * y[i-1] + alpha * x[i]
+    //  replace this upsampling op with the output_mem_allocation op
 
     // DEBUG_PRINT_NO_ARGS() ;
 
-    //output for result type
-    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));    
-    
-    //allocation & deallocation for the result of this operation
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
     auto memRefType = convertTensorToMemRef(tensorType);
     auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
 
-    //construct affine loops for the input
-    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/0);
-    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);    
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
 
-    //Init y for the first index ie, index0
+    // Init y for the first index ie, index0
     Value constantIndx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
 
     LowPassFilter1stOrderOpAdaptor lowPassFilterAdaptor(operands);
-    Value GetInputX0 = rewriter.create<AffineLoadOp>(loc, lowPassFilterAdaptor.getLhs(), /* iv */ ValueRange{constantIndx0});
-    rewriter.create<AffineStoreOp>(loc, GetInputX0, alloc, ValueRange{constantIndx0});
+    Value GetInputX0 = rewriter.create<AffineLoadOp>(
+        loc, lowPassFilterAdaptor.getLhs(), /* iv */ ValueRange{constantIndx0});
+    rewriter.create<AffineStoreOp>(loc, GetInputX0, alloc,
+                                   ValueRange{constantIndx0});
 
-    //For loop -- iterate from 1 to last
-    int64_t lb = 1 ;
+    // For loop -- iterate from 1 to last
+    int64_t lb = 1;
     int64_t ub = tensorType.getShape()[0];
     int64_t step = 1;
 
-    affine::AffineForOp forOp1 = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
     auto iv = forOp1.getInductionVar();
-    
 
     rewriter.setInsertionPointToStart(forOp1.getBody());
 
-    
-    
-    //For affine expression: #map1 = affine_map<(%arg0)[] : (%arg0 - 1)
+    // For affine expression: #map1 = affine_map<(%arg0)[] : (%arg0 - 1)
     AffineExpr d0, s0;
     bindDims(rewriter.getContext(), d0);
     AffineExpr ExprForPrevY = d0 - 1;
     AffineMap addMapForLowPassFilter = AffineMap::get(1, 0, ExprForPrevY);
 
-    //y[n-1]
-    // DEBUG_PRINT_NO_ARGS() ;
-    // Value PrevY = rewriter.create<AffineLoadOp>(loc, lowPassFilterAdaptor.getLhs(), addMapForLowPassFilter, 
-    //               ValueRange{iv});
-    // Value PrevY = rewriter.create<AffineLoadOp>(loc, (*op->result_type_begin()), addMapForLowPassFilter, 
-    //               ValueRange{iv}); //memRefType
-    Value PrevY = rewriter.create<AffineLoadOp>(loc, alloc, addMapForLowPassFilter, 
-                  ValueRange{iv}); //memRefType
+    // y[n-1]
+    //  DEBUG_PRINT_NO_ARGS() ;
+    //  Value PrevY = rewriter.create<AffineLoadOp>(loc,
+    //  lowPassFilterAdaptor.getLhs(), addMapForLowPassFilter,
+    //                ValueRange{iv});
+    //  Value PrevY = rewriter.create<AffineLoadOp>(loc,
+    //  (*op->result_type_begin()), addMapForLowPassFilter,
+    //                ValueRange{iv}); //memRefType
+    Value PrevY = rewriter.create<AffineLoadOp>(
+        loc, alloc, addMapForLowPassFilter, ValueRange{iv}); // memRefType
     // PrevY.dump();
-    Value constant1 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1));
+    Value constant1 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1));
     // Value alpha = lowPassFilterAdaptor.getRhs(); //op->getOperand(1);
-    Value alpha = rewriter.create<AffineLoadOp>(loc, lowPassFilterAdaptor.getRhs(), /* iv */ ValueRange{});
-    //get y[n] = (1- alpha ) * y[n-1] + alpha * x[n]
-    Value oneMinusAlpha = rewriter.create<arith::SubFOp>(loc, constant1 ,alpha );
-    Value mulPrevYAlphaMinus1 = rewriter.create<arith::MulFOp>(loc, oneMinusAlpha ,PrevY);
-
-    Value inputX = rewriter.create<AffineLoadOp>(loc, lowPassFilterAdaptor.getLhs(), ValueRange{iv});
-    Value mulAlphaX = rewriter.create<arith::MulFOp>(loc, alpha ,inputX);
-
-    Value AddmulAlphaXAndPreYAlphaMinus1 = rewriter.create<arith::AddFOp>(loc, mulPrevYAlphaMinus1 ,mulAlphaX);
+    Value alpha = rewriter.create<AffineLoadOp>(
+        loc, lowPassFilterAdaptor.getRhs(), /* iv */ ValueRange{});
+    // get y[n] = (1- alpha ) * y[n-1] + alpha * x[n]
+    Value oneMinusAlpha = rewriter.create<arith::SubFOp>(loc, constant1, alpha);
+    Value mulPrevYAlphaMinus1 =
+        rewriter.create<arith::MulFOp>(loc, oneMinusAlpha, PrevY);
+
+    Value inputX = rewriter.create<AffineLoadOp>(
+        loc, lowPassFilterAdaptor.getLhs(), ValueRange{iv});
+    Value mulAlphaX = rewriter.create<arith::MulFOp>(loc, alpha, inputX);
+
+    Value AddmulAlphaXAndPreYAlphaMinus1 =
+        rewriter.create<arith::AddFOp>(loc, mulPrevYAlphaMinus1, mulAlphaX);
     // DEBUG_PRINT_NO_ARGS() ;
     // AddmulAlphaXAndPreYAlphaMinus1.dump();
     // forOp1->dump();
 
-    rewriter.create<AffineStoreOp>(loc, AddmulAlphaXAndPreYAlphaMinus1, alloc, ValueRange{iv}); //PrevY //AddmulAlphaXAndPreYAlphaMinus1
+    rewriter.create<AffineStoreOp>(
+        loc, AddmulAlphaXAndPreYAlphaMinus1, alloc,
+        ValueRange{iv}); // PrevY //AddmulAlphaXAndPreYAlphaMinus1
 
     rewriter.setInsertionPointAfter(forOp1);
-    //debug
-    // forOp1->dump();
-      // init first value of output with first value of input: y[0] = x[0]
-      // iterate for output from 1st to last 
-      // y[i] = (1 - alpha) * y[i-1] + alpha * x[i]
-      // replace this upsampling op with the output_mem_allocation op
-        //  %indx0 = arith.constantIndex 0 : index
-        // %0 = affine.load in[indx0 ] : f64
-        //  affine.store %0 ,out[indx0]
-        // affine.for %arg0 = 1 to len_y {
-        //    #map1 = affine_map<(%arg0)[] : (%arg0 - 1)
-        //    %1 = affine.load out[#map1]
-      //      %2 = arith.subf %const1 , alpha
-      //      %3 = arith.mulf %2 , %1
-     
-      //      %load_in = affine.load in[%arg0]
-      //      %4 = arith.mulf alpha, %load_in
-      //      %5 = arith.addf %4, %3 
-      //      affine.store %5, out[%arg0]
-      // }
-      //   %2ndOperand = arith.const 3 : f64
-      //   affine.for %arg0 = 0 to input_len {
-      //      %elem1 = affine.load input[%arg0] <-- affine apply
-      //      #map1 = affine_map<(%arg0)[2ndOperand] : (%arg0 * 2ndOperand)
-      //      
-      //      affine.store %elem1, out[#map1]
-      // }
+    // debug
+    //  forOp1->dump();
+    //  init first value of output with first value of input: y[0] = x[0]
+    //  iterate for output from 1st to last
+    //  y[i] = (1 - alpha) * y[i-1] + alpha * x[i]
+    //  replace this upsampling op with the output_mem_allocation op
+    //   %indx0 = arith.constantIndex 0 : index
+    //  %0 = affine.load in[indx0 ] : f64
+    //   affine.store %0 ,out[indx0]
+    //  affine.for %arg0 = 1 to len_y {
+    //     #map1 = affine_map<(%arg0)[] : (%arg0 - 1)
+    //     %1 = affine.load out[#map1]
+    //      %2 = arith.subf %const1 , alpha
+    //      %3 = arith.mulf %2 , %1
+
+    //      %load_in = affine.load in[%arg0]
+    //      %4 = arith.mulf alpha, %load_in
+    //      %5 = arith.addf %4, %3
+    //      affine.store %5, out[%arg0]
+    // }
+    //   %2ndOperand = arith.const 3 : f64
+    //   affine.for %arg0 = 0 to input_len {
+    //      %elem1 = affine.load input[%arg0] <-- affine apply
+    //      #map1 = affine_map<(%arg0)[2ndOperand] : (%arg0 * 2ndOperand)
+    //
+    //      affine.store %elem1, out[#map1]
+    // }
     rewriter.replaceOp(op, alloc);
-    
+
     return success();
   }
 };
 
-
 //===----------------------------------------------------------------------===//
 // ToyToAffine RewritePatterns: Upsampling operations
 //===----------------------------------------------------------------------===//
 
-
 struct UpSamplingOpLowering : public ConversionPattern {
   UpSamplingOpLowering(MLIRContext *ctx)
       : ConversionPattern(dsp::UpsamplingOp::getOperationName(), 1, ctx) {}
@@ -5390,111 +6083,124 @@ struct UpSamplingOpLowering : public ConversionPattern {
   matchAndRewrite(Operation *op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const final {
     auto loc = op->getLoc();
-    
-    //Pseudo-code:
-      //init all out values with 0 using affine loop
-      //Update certain y_values with corresponding x 
-      //iterate for input : i = 0 to len
-      //get the corresponding output mapping index = M * i
-      // store in y at that index
-      // replace this upsampling op with the output_mem_allocation op
+
+    // Pseudo-code:
+    // init all out values with 0 using affine loop
+    // Update certain y_values with corresponding x
+    // iterate for input : i = 0 to len
+    // get the corresponding output mapping index = M * i
+    //  store in y at that index
+    //  replace this upsampling op with the output_mem_allocation op
 
     // DEBUG_PRINT_NO_ARGS() ;
 
-    //output for result type
-    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));    
-    
-    //allocation & deallocation for the result of this operation
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
     auto memRefType = convertTensorToMemRef(tensorType);
     auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
 
-    //construct affine loops for the input
-    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/0);
-    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);    
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
 
-    //For loop
-    int64_t lb = 0 ;
+    // For loop
+    int64_t lb = 0;
     int64_t ub = tensorType.getShape()[0];
     int64_t step = 1;
 
-    //init all the output mem location with 0
-    affine::AffineForOp forOpSetOut0Loop = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    // init all the output mem location with 0
+    affine::AffineForOp forOpSetOut0Loop =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
     auto ivforOpSetOut0Loop = forOpSetOut0Loop.getInductionVar();
-    
 
     rewriter.setInsertionPointToStart(forOpSetOut0Loop.getBody());
-    Value constant0 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
-    //store the result
+    Value constant0 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    // store the result
     rewriter.create<AffineStoreOp>(loc, constant0, alloc, ivforOpSetOut0Loop);
     rewriter.setInsertionPointAfter(forOpSetOut0Loop);
 
     Value upsampling2ndArg = op->getOperand(1);
     UpsamplingOpAdaptor upsamplingAdaptor(operands);
-    auto inputType = llvm::dyn_cast<RankedTensorType>(op->getOperand(0).getType());
-    int64_t ub2 =  inputType.getShape()[0]; // tensorType.getShape()[0];
-    //create another for loop for updating corresponding y with x
-    affine::AffineForOp forOp1 = rewriter.create<AffineForOp>(loc, lb, ub2, step);
+    auto inputType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(0).getType());
+    int64_t ub2 = inputType.getShape()[0]; // tensorType.getShape()[0];
+    // create another for loop for updating corresponding y with x
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb, ub2, step);
     auto iv = forOp1.getInductionVar();
-    
 
     rewriter.setInsertionPointToStart(forOp1.getBody());
-    //Load input elem
-    
-    Value elemIn = rewriter.create<AffineLoadOp>(loc, upsamplingAdaptor.getLhs(), iv);
+    // Load input elem
 
-    // Value elemIn = rewriter.create<AffineLoadOp>(loc, upsamplingAdaptor.getLhs(), addMapForUpSampling, 
+    Value elemIn =
+        rewriter.create<AffineLoadOp>(loc, upsamplingAdaptor.getLhs(), iv);
+
+    // Value elemIn = rewriter.create<AffineLoadOp>(loc,
+    // upsamplingAdaptor.getLhs(), addMapForUpSampling,
     //               ValueRange{iv,constantSamplingRateIndx});
 
-    
-    
-    //For affine expression: #map1 = affine_map<(%arg0)[2ndOperand] : (%arg0 * 2ndOperand)
+    // For affine expression: #map1 = affine_map<(%arg0)[2ndOperand] : (%arg0 *
+    // 2ndOperand)
     AffineExpr d0, s0;
     bindDims(rewriter.getContext(), d0);
     bindSymbols(rewriter.getContext(), s0);
 
-    // AffineExpr ExprForUpSampling = rewriter.getAffineDimExpr(0) * rewriter.getAffineSymbolExpr(0);
+    // AffineExpr ExprForUpSampling = rewriter.getAffineDimExpr(0) *
+    // rewriter.getAffineSymbolExpr(0);
     AffineExpr ExprForUpSampling = d0 * s0;
-    // Value constant3 = rewriter.create<arith::ConstantOp>(loc, rewriter.getI64Type(), rewriter.getIntegerAttr(rewriter.getIntegerType(64), 3));
-    Value constant3 = rewriter.create<arith::ConstantIndexOp>(loc, 3); //working
+    // Value constant3 = rewriter.create<arith::ConstantOp>(loc,
+    // rewriter.getI64Type(),
+    // rewriter.getIntegerAttr(rewriter.getIntegerType(64), 3));
+    Value constant3 =
+        rewriter.create<arith::ConstantIndexOp>(loc, 3); // working
     constant3.dump();
 
     int64_t SecondValueInt = 1;
-    
-    dsp::ConstantOp constantOp2ndArg = upsampling2ndArg.getDefiningOp<dsp::ConstantOp>();
-    DenseElementsAttr constantRhsValue = constantOp2ndArg.getValue();;
+
+    dsp::ConstantOp constantOp2ndArg =
+        upsampling2ndArg.getDefiningOp<dsp::ConstantOp>();
+    DenseElementsAttr constantRhsValue = constantOp2ndArg.getValue();
+    ;
     auto elements = constantRhsValue.getValues<FloatAttr>();
     float SecondValue = elements[0].getValueAsDouble();
-    SecondValueInt = (int64_t) SecondValue;
+    SecondValueInt = (int64_t)SecondValue;
 
-    // Value downSamplingRateAsIndex = rewriter.create<arith::IndexCastOp>(loc, rewriter.getIndexType(),UpsamplingRate);
-    Value constantSamplingRateIndx = rewriter.create<arith::ConstantIndexOp>(loc, SecondValueInt);
+    // Value downSamplingRateAsIndex = rewriter.create<arith::IndexCastOp>(loc,
+    // rewriter.getIndexType(),UpsamplingRate);
+    Value constantSamplingRateIndx =
+        rewriter.create<arith::ConstantIndexOp>(loc, SecondValueInt);
     constantSamplingRateIndx.dump();
-    
+
     AffineMap addMapForUpSampling = AffineMap::get(1, 1, ExprForUpSampling);
 
     // DEBUG_PRINT_NO_ARGS() ;
-    // Value elem2 = rewriter.create<AffineLoadOp>(loc, upsamplingAdaptor.getLhs(), addMapForUpSampling, 
+    // Value elem2 = rewriter.create<AffineLoadOp>(loc,
+    // upsamplingAdaptor.getLhs(), addMapForUpSampling,
     //               ValueRange{iv,constantSamplingRateIndx});
     // elem2.dump();
-    //store the result
-    rewriter.create<AffineStoreOp>(loc, elemIn, alloc, addMapForUpSampling, ValueRange{iv,constantSamplingRateIndx});
+    // store the result
+    rewriter.create<AffineStoreOp>(loc, elemIn, alloc, addMapForUpSampling,
+                                   ValueRange{iv, constantSamplingRateIndx});
 
     rewriter.setInsertionPointAfter(forOp1);
-    //debug
-    // forOp1->dump();
-      //   %0 = arith.const 0 : f64
-      //   affine.for %arg0 = 0 to out_y {
-      //      affine.store %0, out[%arg0]
-      // }
-      //   %2ndOperand = arith.const 3 : f64
-      //   affine.for %arg0 = 0 to input_len {
-      //      %elem1 = affine.load input[%arg0] <-- affine apply
-      //      #map1 = affine_map<(%arg0)[2ndOperand] : (%arg0 * 2ndOperand)
-      //      
-      //      affine.store %elem1, out[#map1]
-      // }
+    // debug
+    //  forOp1->dump();
+    //    %0 = arith.const 0 : f64
+    //    affine.for %arg0 = 0 to out_y {
+    //       affine.store %0, out[%arg0]
+    //  }
+    //    %2ndOperand = arith.const 3 : f64
+    //    affine.for %arg0 = 0 to input_len {
+    //       %elem1 = affine.load input[%arg0] <-- affine apply
+    //       #map1 = affine_map<(%arg0)[2ndOperand] : (%arg0 * 2ndOperand)
+    //
+    //       affine.store %elem1, out[#map1]
+    //  }
     rewriter.replaceOp(op, alloc);
-    
+
     return success();
   }
 };
@@ -5503,7 +6209,6 @@ struct UpSamplingOpLowering : public ConversionPattern {
 // ToyToAffine RewritePatterns: Downsampling operations
 //===----------------------------------------------------------------------===//
 
-
 struct DownSamplingOpLowering : public ConversionPattern {
   DownSamplingOpLowering(MLIRContext *ctx)
       : ConversionPattern(dsp::DownsamplingOp::getOperationName(), 1, ctx) {}
@@ -5512,83 +6217,162 @@ struct DownSamplingOpLowering : public ConversionPattern {
   matchAndRewrite(Operation *op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const final {
     auto loc = op->getLoc();
-    
-    //Pseudo-code:
-      //iterate for output len : i = 0 to len
-      //get the input elem using  input mapping index = M* i
-      // store in y 
-      // replace this op with the output_mem 
+
+    // Pseudo-code:
+    // iterate for output len : i = 0 to len
+    // get the input elem using  input mapping index = M* i
+    //  store in y
+    //  replace this op with the output_mem
 
     // DEBUG_PRINT_NO_ARGS() ;
 
-    //output for result type
-    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));    
-    
-    //allocation & deallocation for the result of this operation
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
     auto memRefType = convertTensorToMemRef(tensorType);
     auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
 
-    //construct affine loops for the input
-    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/0);
-    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);    
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
 
-    //For loop
-    int64_t lb = 0 ;
+    // For loop
+    int64_t lb = 0;
     int64_t ub = tensorType.getShape()[0];
     int64_t step = 1;
 
-    affine::AffineForOp forOp1 = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
     auto iv = forOp1.getInductionVar();
-    
 
     rewriter.setInsertionPointToStart(forOp1.getBody());
     DownsamplingOpAdaptor downsamplingAdaptor(operands);
-    
-    //For affine expression: #map1 = affine_map<(%arg0)[2ndOperand] : (%arg0 * 2ndOperand)
+
+    // For affine expression: #map1 = affine_map<(%arg0)[2ndOperand] : (%arg0 *
+    // 2ndOperand)
     AffineExpr d0, s0;
     bindDims(rewriter.getContext(), d0);
     bindSymbols(rewriter.getContext(), s0);
 
-    // AffineExpr ExprForDownSampling = rewriter.getAffineDimExpr(0) * rewriter.getAffineSymbolExpr(0);
+    // AffineExpr ExprForDownSampling = rewriter.getAffineDimExpr(0) *
+    // rewriter.getAffineSymbolExpr(0);
     AffineExpr ExprForDownSampling = d0 * s0;
-    // Value constant3 = rewriter.create<arith::ConstantOp>(loc, rewriter.getI64Type(), rewriter.getIntegerAttr(rewriter.getIntegerType(64), 3));
-    Value constant3 = rewriter.create<arith::ConstantIndexOp>(loc, 3); //working
+    // Value constant3 = rewriter.create<arith::ConstantOp>(loc,
+    // rewriter.getI64Type(),
+    // rewriter.getIntegerAttr(rewriter.getIntegerType(64), 3));
+    Value constant3 =
+        rewriter.create<arith::ConstantIndexOp>(loc, 3); // working
     constant3.dump();
 
     int64_t SecondValueInt = 1;
     Value downsampling2ndArg = op->getOperand(1);
-    dsp::ConstantOp constantOp2ndArg = downsampling2ndArg.getDefiningOp<dsp::ConstantOp>();
-    DenseElementsAttr constantRhsValue = constantOp2ndArg.getValue();;
+    dsp::ConstantOp constantOp2ndArg =
+        downsampling2ndArg.getDefiningOp<dsp::ConstantOp>();
+    DenseElementsAttr constantRhsValue = constantOp2ndArg.getValue();
+    ;
     auto elements = constantRhsValue.getValues<FloatAttr>();
     float SecondValue = elements[0].getValueAsDouble();
-    SecondValueInt = (int64_t) SecondValue;
+    SecondValueInt = (int64_t)SecondValue;
 
-    // Value downSamplingRateAsIndex = rewriter.create<arith::IndexCastOp>(loc, rewriter.getIndexType(),DownsamplingRate);
-    Value constantSamplingRateIndx = rewriter.create<arith::ConstantIndexOp>(loc, SecondValueInt);
+    // Value downSamplingRateAsIndex = rewriter.create<arith::IndexCastOp>(loc,
+    // rewriter.getIndexType(),DownsamplingRate);
+    Value constantSamplingRateIndx =
+        rewriter.create<arith::ConstantIndexOp>(loc, SecondValueInt);
     constantSamplingRateIndx.dump();
-    
+
     AffineMap addMapForDownSampling = AffineMap::get(1, 1, ExprForDownSampling);
-    // AffineMap addMapForDownSampling = AffineMap::get(1, 1, ValueRange{d0,s0 });
-    // AffineMap addMapForDownSampling = AffineMap::get(1, 1, ExprForDownSampling, rewriter.getContext());
-    // AffineMap addMapForDownSampling = AffineMap::get(1, 0, { d0}); //Working
+    // AffineMap addMapForDownSampling = AffineMap::get(1, 1, ValueRange{d0,s0
+    // }); AffineMap addMapForDownSampling = AffineMap::get(1, 1,
+    // ExprForDownSampling, rewriter.getContext()); AffineMap
+    // addMapForDownSampling = AffineMap::get(1, 0, { d0}); //Working
     // DEBUG_PRINT_NO_ARGS() ;
-    Value elem2 = rewriter.create<AffineLoadOp>(loc, downsamplingAdaptor.getLhs(), addMapForDownSampling, 
-                  ValueRange{iv,constantSamplingRateIndx});
+    Value elem2 = rewriter.create<AffineLoadOp>(
+        loc, downsamplingAdaptor.getLhs(), addMapForDownSampling,
+        ValueRange{iv, constantSamplingRateIndx});
     elem2.dump();
-    //store the result
+    // store the result
     rewriter.create<AffineStoreOp>(loc, elem2, alloc, iv);
 
     rewriter.setInsertionPointAfter(forOp1);
-    //debug
-    // forOp1->dump();
-      //   %2ndOperand = arith.const 3 : f64
-      //   affine.for %arg0 = 0 to 10 {
-      //    #map1 = affine_map<(%arg0)[2ndOperand] : (%arg0 * 2ndOperand)
-      //    %elem1 = affine.load input[#map1] <-- affine apply  
-      //    affine.store %elem1, out[%arg0]
-      // }
+    // debug
+    //  forOp1->dump();
+    //    %2ndOperand = arith.const 3 : f64
+    //    affine.for %arg0 = 0 to 10 {
+    //     #map1 = affine_map<(%arg0)[2ndOperand] : (%arg0 * 2ndOperand)
+    //     %elem1 = affine.load input[#map1] <-- affine apply
+    //     affine.store %elem1, out[%arg0]
+    //  }
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: MedianFilterOp operations
+//===----------------------------------------------------------------------===//
+
+struct MedianFilterOpLowering : public ConversionPattern {
+  MedianFilterOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::MedianFilterOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), 1);
+
+    // For loop
+    int64_t lb = 0;
+    int64_t ub = tensorType.getShape()[0];
+    int64_t step = 1;
+
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto iv = forOp1.getInductionVar();
+
+    rewriter.setInsertionPointToStart(forOp1.getBody());
+    MedianFilterOpAdaptor medianFilterOpAdaptor(operands);
+
+    Value elem1 = rewriter.create<AffineLoadOp>(
+        loc, medianFilterOpAdaptor.getInput(), iv);
+    AffineExpr ExprForElem2 =
+        rewriter.getAffineDimExpr(0) + rewriter.getAffineConstantExpr(1);
+    AffineExpr ExprForElem3 =
+        rewriter.getAffineDimExpr(0) + rewriter.getAffineConstantExpr(2);
+    AffineMap addMapForElem2 = AffineMap::get(1, 0, ExprForElem2);
+    AffineMap addMapForElem3 = AffineMap::get(1, 0, ExprForElem3);
+    Value elem2 = rewriter.create<AffineLoadOp>(
+        loc, medianFilterOpAdaptor.getInput(), addMapForElem2, ValueRange{iv});
+    Value elem3 = rewriter.create<AffineLoadOp>(
+        loc, medianFilterOpAdaptor.getInput(), addMapForElem3, ValueRange{iv});
+
+    // sum
+    Value sum1 = rewriter.create<arith::AddFOp>(loc, elem1, elem2);
+    Value sum = rewriter.create<arith::AddFOp>(loc, sum1, elem3);
+
+    // min
+    Value minElem1Elem2 = rewriter.create<arith::MinimumFOp>(loc, elem1, elem2);
+    Value min = rewriter.create<arith::MinimumFOp>(loc, minElem1Elem2, elem3);
+
+    // max
+    Value maxElem1Elem2 = rewriter.create<arith::MaximumFOp>(loc, elem1, elem2);
+    Value max = rewriter.create<arith::MaximumFOp>(loc, maxElem1Elem2, elem3);
+
+    // median
+    Value min_plus_max = rewriter.create<arith::AddFOp>(loc, min, max);
+    Value median = rewriter.create<arith::SubFOp>(loc, sum, min_plus_max);
+
+    // store in alloc
+    rewriter.create<AffineStoreOp>(loc, median, alloc, iv);
+    rewriter.setInsertionPointAfter(forOp1);
     rewriter.replaceOp(op, alloc);
-    
     return success();
   }
 };
@@ -5599,84 +6383,90 @@ struct DownSamplingOpLowering : public ConversionPattern {
 
 struct SlidingWindowAvgOpLowering : public ConversionPattern {
   SlidingWindowAvgOpLowering(MLIRContext *ctx)
-      : ConversionPattern(dsp::SlidingWindowAvgOp::getOperationName(), 1, ctx) {}
+      : ConversionPattern(dsp::SlidingWindowAvgOp::getOperationName(), 1, ctx) {
+  }
 
   LogicalResult
   matchAndRewrite(Operation *op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const final {
     auto loc = op->getLoc();
-    
-    //Pseudo-code:
-      //iterate for len = len - 2
-      //get 3 elements
-      //get the sum
-      //get the avg = sum / 3
-      // store the result to output_mem
-      // replace this op with the output_mem 
+
+    // Pseudo-code:
+    // iterate for len = len - 2
+    // get 3 elements
+    // get the sum
+    // get the avg = sum / 3
+    //  store the result to output_mem
+    //  replace this op with the output_mem
 
     // DEBUG_PRINT_NO_ARGS() ;
 
-    //output for result type
-    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));    
-    
-    //allocation & deallocation for the result of this operation
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
     auto memRefType = convertTensorToMemRef(tensorType);
     auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
 
-    //construct affine loops for the input
-    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/0);
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
     SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
 
-    Value constant3 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(3));
-    //For loop
-    int64_t lb = 0 ;
+    Value constant3 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(3));
+    // For loop
+    int64_t lb = 0;
     int64_t ub = tensorType.getShape()[0];
     int64_t step = 1;
 
-    affine::AffineForOp forOp1 = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
     auto iv = forOp1.getInductionVar();
 
     rewriter.setInsertionPointToStart(forOp1.getBody());
     SlidingWindowAvgOpAdaptor slidingWinAvgAdaptor(operands);
-    
-    Value elem1 = rewriter.create<AffineLoadOp>(loc, slidingWinAvgAdaptor.getInput(), iv);
 
-    //affine-maps for elem2 and elem3
-    AffineExpr ExprForElem2 = rewriter.getAffineDimExpr(0) + rewriter.getAffineConstantExpr(1);
-    AffineExpr ExprForElem3 = rewriter.getAffineDimExpr(0) + rewriter.getAffineConstantExpr(2);
+    Value elem1 =
+        rewriter.create<AffineLoadOp>(loc, slidingWinAvgAdaptor.getInput(), iv);
+
+    // affine-maps for elem2 and elem3
+    AffineExpr ExprForElem2 =
+        rewriter.getAffineDimExpr(0) + rewriter.getAffineConstantExpr(1);
+    AffineExpr ExprForElem3 =
+        rewriter.getAffineDimExpr(0) + rewriter.getAffineConstantExpr(2);
 
     AffineMap addMapForElem2 = AffineMap::get(1, 0, ExprForElem2);
     AffineMap addMapForElem3 = AffineMap::get(1, 0, ExprForElem3);
-    Value elem2 = rewriter.create<AffineLoadOp>(loc, slidingWinAvgAdaptor.getInput(), addMapForElem2, 
-                  ValueRange{iv});
-    Value elem3 = rewriter.create<AffineLoadOp>(loc, slidingWinAvgAdaptor.getInput(), addMapForElem3, 
-                  ValueRange{iv});
+    Value elem2 = rewriter.create<AffineLoadOp>(
+        loc, slidingWinAvgAdaptor.getInput(), addMapForElem2, ValueRange{iv});
+    Value elem3 = rewriter.create<AffineLoadOp>(
+        loc, slidingWinAvgAdaptor.getInput(), addMapForElem3, ValueRange{iv});
 
-    Value sum1 = rewriter.create<arith::AddFOp>(loc, elem1 , elem2);
-    Value sum2 = rewriter.create<arith::AddFOp>(loc, sum1 , elem3);
+    Value sum1 = rewriter.create<arith::AddFOp>(loc, elem1, elem2);
+    Value sum2 = rewriter.create<arith::AddFOp>(loc, sum1, elem3);
     Value avg = rewriter.create<arith::DivFOp>(loc, sum2, constant3);
 
-    //store the result
+    // store the result
     rewriter.create<AffineStoreOp>(loc, avg, alloc, iv);
 
     rewriter.setInsertionPointAfter(forOp1);
-    //debug
-    // forOp1->dump();
-      //   %cont3 = arith.const 3 : f64
-      //   affine.for %arg0 = 0 to 8 {
-      //    %elem1 = affine.load input[%arg0]
-      //    #map1 = affine_map<(%arg0)[] : (%arg0 + 1)
-      //    #map2 = affine_map<(%arg0)[] : (%arg0 + 2)
-      //    %elem2 = affine.load input[#map1] <-- affine apply 
-      //    %elem3 = affine.load input[#map2]
-
-      //    %sum1 = arith.addf %elem1 , %elem2
-      //    %sum2 = arith.addf %sum1, %elem3
-      //    %res = arith.divf %sum2 , 
-      //    affine.store %sum2, out[%arg0]
-      // }
+    // debug
+    //  forOp1->dump();
+    //    %cont3 = arith.const 3 : f64
+    //    affine.for %arg0 = 0 to 8 {
+    //     %elem1 = affine.load input[%arg0]
+    //     #map1 = affine_map<(%arg0)[] : (%arg0 + 1)
+    //     #map2 = affine_map<(%arg0)[] : (%arg0 + 2)
+    //     %elem2 = affine.load input[#map1] <-- affine apply
+    //     %elem3 = affine.load input[#map2]
+
+    //    %sum1 = arith.addf %elem1 , %elem2
+    //    %sum2 = arith.addf %sum1, %elem3
+    //    %res = arith.divf %sum2 ,
+    //    affine.store %sum2, out[%arg0]
+    // }
     rewriter.replaceOp(op, alloc);
-    
+
     return success();
   }
 };
@@ -5684,146 +6474,157 @@ struct SlidingWindowAvgOpLowering : public ConversionPattern {
 //===----------------------------------------------------------------------===//
 // ToyToAffine RewritePatterns: FIRFilterResponse operations
 //===----------------------------------------------------------------------===//
-struct FIRFilterResponseOpLowering: public ConversionPattern {
-      FIRFilterResponseOpLowering(MLIRContext *ctx)
-        : ConversionPattern(dsp::FIRFilterResponseOp::getOperationName(), 1 , ctx) {}
-
-    LogicalResult 
-    matchAndRewrite(Operation *op, ArrayRef<Value> operands,
-              ConversionPatternRewriter &rewriter) const final {
-      //dsp.FIRFilterResponseOp has 2 operands -- both of type tensor f64 
-
-      //Get the location of FIRFilterResponseOp
-      auto loc = op->getLoc();
-      
-      //Pseudo-Code
-      // y[n] = sum( h[k] * x[n-k]) k = 0 to lenOfh 
-
-      //Range for each element of the output tensor -- i = %arg0
-      //  Create a tempValue = 0
-        //  Range for each of the elements of filter len -- k = %arg1
-        //  check for the condition that %arg0  - %arg1 >= 0 && < inputLen
-          //  get elem1 = filter[k] , elem2 = x[i-k]
-          // use affine-map expression for calculating i-k
-          //  tempValue = tempValue + elem1 * elem2
-      // y[i] = tempValue
-        
-        lowerOpToLoopsFIR(op, operands, rewriter, 
-            [loc, op ] (OpBuilder &builder, ValueRange memRefOperands,
-                  ValueRange loopIvs) {
-                  // ValueRange loopIvs) {
-                     
-                    // Generate an adaptor for the remapped operands of the
-                     // BinaryOp. This allows for using the nice named accessors
-                     // that are generated by the ODS.
-                    dsp::FIRFilterResponseOpAdaptor firFilterAdaptor(memRefOperands);
+struct FIRFilterResponseOpLowering : public ConversionPattern {
+  FIRFilterResponseOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::FIRFilterResponseOp::getOperationName(), 1,
+                          ctx) {}
 
-                    // Generate loads for the element of 'lhs' and 'rhs' at the
-                    // inner loop.
-                    // auto lhsTensor = delayAdaptor.getLhs();
-                    auto lhsTensor = builder.create<affine::AffineLoadOp>(
-                         loc, firFilterAdaptor.getLhs(), loopIvs);
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    // dsp.FIRFilterResponseOp has 2 operands -- both of type tensor f64
+
+    // Get the location of FIRFilterResponseOp
+    auto loc = op->getLoc();
 
-                    // auto rhsScalar = op->getOperand(1);     
-                    auto rhsScalar = builder.create<affine::AffineLoadOp>(
-                         loc, firFilterAdaptor.getRhs(), loopIvs);
+    // Pseudo-Code
+    //  y[n] = sum( h[k] * x[n-k]) k = 0 to lenOfh
+
+    // Range for each element of the output tensor -- i = %arg0
+    //   Create a tempValue = 0
+    //   Range for each of the elements of filter len -- k = %arg1
+    //   check for the condition that %arg0  - %arg1 >= 0 && < inputLen
+    //   get elem1 = filter[k] , elem2 = x[i-k]
+    //  use affine-map expression for calculating i-k
+    //   tempValue = tempValue + elem1 * elem2
+    // y[i] = tempValue
+
+    lowerOpToLoopsFIR(
+        op, operands, rewriter,
+        [loc, op](OpBuilder &builder, ValueRange memRefOperands,
+                  ValueRange loopIvs) {
+          // ValueRange loopIvs) {
 
-                    auto resultMulOp = builder.create<arith::MulFOp>(loc, lhsTensor,
-                                                            rhsScalar);
+          // Generate an adaptor for the remapped operands of the
+          // BinaryOp. This allows for using the nice named accessors
+          // that are generated by the ODS.
+          dsp::FIRFilterResponseOpAdaptor firFilterAdaptor(memRefOperands);
 
-                    return resultMulOp;
+          // Generate loads for the element of 'lhs' and 'rhs' at the
+          // inner loop.
+          // auto lhsTensor = delayAdaptor.getLhs();
+          auto lhsTensor = builder.create<affine::AffineLoadOp>(
+              loc, firFilterAdaptor.getLhs(), loopIvs);
 
-        });
+          // auto rhsScalar = op->getOperand(1);
+          auto rhsScalar = builder.create<affine::AffineLoadOp>(
+              loc, firFilterAdaptor.getRhs(), loopIvs);
 
-      return success();
-    }
+          auto resultMulOp =
+              builder.create<arith::MulFOp>(loc, lhsTensor, rhsScalar);
 
+          return resultMulOp;
+        });
 
+    return success();
+  }
 };
 
 //===----------------------------------------------------------------------===//
 // ToyToAffine RewritePatterns: Delay operations
 //===----------------------------------------------------------------------===//
-struct DelayOpLowering: public ConversionPattern {
-      DelayOpLowering(MLIRContext *ctx)
-        : ConversionPattern(dsp::DelayOp::getOperationName(), 1 , ctx) {}
-
-    LogicalResult 
-    matchAndRewrite(Operation *op, ArrayRef<Value> operands,
-              ConversionPatternRewriter &rewriter) const final {
-      //dsp.DelayOp has 2 operands -- both of type tensor f64
-
-      //Get the location of delayop
-      auto loc = op->getLoc();
-
-      //Pseudo-code
-      //2 affine loops -- 
-      //first from 0 to delay_2ndArg
-      //          here, inside AffineNest
-      //          create affine:load from the arith.const operation with value 0
-      //          use affine:store to store at result_op at indx
-      // 
-      //2nd from delay_2ndArg to lengthOfOperand0 of delayOp 
-      //          here, inside AffineNest
-      //          create affine:load from input memref & indx = indx - delay_2ndArg 
-      //          create affine:store at result_op indx
-
-    //output for result type
-    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));    
-    
-    //allocation & deallocation for the result of this operation
+struct DelayOpLowering : public ConversionPattern {
+  DelayOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::DelayOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    // dsp.DelayOp has 2 operands -- both of type tensor f64
+
+    // Get the location of delayop
+    auto loc = op->getLoc();
+
+    // Pseudo-code
+    // 2 affine loops --
+    // first from 0 to delay_2ndArg
+    //           here, inside AffineNest
+    //           create affine:load from the arith.const operation with value 0
+    //           use affine:store to store at result_op at indx
+    //
+    // 2nd from delay_2ndArg to lengthOfOperand0 of delayOp
+    //           here, inside AffineNest
+    //           create affine:load from input memref & indx = indx -
+    //           delay_2ndArg create affine:store at result_op indx
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
     auto memRefType = convertTensorToMemRef(tensorType);
     auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
 
-    //construct affine loops for the input
-    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/0);
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
     SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
 
-    //For loop
+    // For loop
     int64_t ub = tensorType.getShape()[0];
 
-    //Get 2nd Arg
+    // Get 2nd Arg
     DelayOpAdaptor delayOpAdaptor(operands);
 
-    Value constant0 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    Value constant0 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
     DEBUG_PRINT_NO_ARGS();
     // Creating SSA values for the lower bound and upper bound
-    Value lowerBound = rewriter.create<arith::ConstantOp>(loc, rewriter.getIndexType(), rewriter.getIntegerAttr(rewriter.getIndexType(), 0));
+    Value lowerBound = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getIndexType(),
+        rewriter.getIntegerAttr(rewriter.getIndexType(), 0));
     // Cast the f64 value directly to the index type
-    Value inputUnit = rewriter.create<AffineLoadOp>(loc, delayOpAdaptor.getRhs(), ValueRange{} );
-    Value i64UpperBound = rewriter.create<arith::FPToSIOp>(loc, rewriter.getI64Type(), inputUnit);
+    Value inputUnit = rewriter.create<AffineLoadOp>(
+        loc, delayOpAdaptor.getRhs(), ValueRange{});
+    Value i64UpperBound =
+        rewriter.create<arith::FPToSIOp>(loc, rewriter.getI64Type(), inputUnit);
     // Cast the i64 value to index type
-    Value delay2ndArg = rewriter.create<arith::IndexCastOp>(loc, rewriter.getIndexType(), i64UpperBound);
-    // Value inputLen = rewriter.create<arith::ConstantOp>(loc, rewriter.getIndexType(), rewriter.getIntegerAttr(rewriter.getIndexType(), ub));
+    Value delay2ndArg = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getIndexType(), i64UpperBound);
+    // Value inputLen = rewriter.create<arith::ConstantOp>(loc,
+    // rewriter.getIndexType(), rewriter.getIntegerAttr(rewriter.getIndexType(),
+    // ub));
     DEBUG_PRINT_WITH_ARGS("print delay2ndArg.dump() for debugging");
-    
+
     DEBUG_PRINT_NO_ARGS();
     // Create an empty affine map list
     // SmallVector<AffineMap, 4> lbMaps, ubMaps;
     // Create identity affine maps for bounds
-    // AffineMap lbMap = AffineMap::get(/*dimCount=*/0, /*symbolCount=*/0, rewriter.getContext());
-    // AffineMap ubMap = AffineMap::get(/*dimCount=*/0, /*symbolCount=*/0, rewriter.getContext());
+    // AffineMap lbMap = AffineMap::get(/*dimCount=*/0, /*symbolCount=*/0,
+    // rewriter.getContext()); AffineMap ubMap = AffineMap::get(/*dimCount=*/0,
+    // /*symbolCount=*/0, rewriter.getContext());
 
     // Create an AffineForOp with SSA values for the bounds
     Value step1 = rewriter.create<arith::ConstantIndexOp>(loc, 1);
 
-    scf::ForOp forOp1 = rewriter.create<scf::ForOp>(loc, lowerBound, delay2ndArg, step1);
-    //Affine loop with non-int loop indices
-    // affine::AffineForOp forOp1 = rewriter.create<affine::AffineForOp>(loc, lowerBound, lbMap, inputLen, ubMap, 1);
+    scf::ForOp forOp1 =
+        rewriter.create<scf::ForOp>(loc, lowerBound, delay2ndArg, step1);
+    // Affine loop with non-int loop indices
+    //  affine::AffineForOp forOp1 = rewriter.create<affine::AffineForOp>(loc,
+    //  lowerBound, lbMap, inputLen, ubMap, 1);
     DEBUG_PRINT_NO_ARGS();
-    
+
     auto iv = forOp1.getInductionVar();
 
     rewriter.setInsertionPointToStart(forOp1.getBody());
-    //store the result
-    // rewriter.create<AffineStoreOp>(loc, constant0, alloc, iv);
+    // store the result
+    //  rewriter.create<AffineStoreOp>(loc, constant0, alloc, iv);
     rewriter.create<memref::StoreOp>(loc, constant0, alloc, iv);
 
     rewriter.setInsertionPointAfter(forOp1);
 
     // Create the constants for lb2, step1, and calculate ub2
     Value lb2 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
-    Value lenOfInput = rewriter.create<arith::ConstantIndexOp>(loc, /*length of input*/ub); // Replace with the actual length
+    Value lenOfInput = rewriter.create<arith::ConstantIndexOp>(
+        loc, /*length of input*/ ub); // Replace with the actual length
     Value ub2 = rewriter.create<arith::SubIOp>(loc, lenOfInput, delay2ndArg);
     Value step2 = rewriter.create<arith::ConstantIndexOp>(loc, 1);
 
@@ -5835,7 +6636,8 @@ struct DelayOpLowering: public ConversionPattern {
     rewriter.setInsertionPointToStart(forOp2.getBody());
 
     // Load value from allocIP[iv2]
-    Value loadedVal = rewriter.create<memref::LoadOp>(loc, delayOpAdaptor.getLhs(), iv2);
+    Value loadedVal =
+        rewriter.create<memref::LoadOp>(loc, delayOpAdaptor.getLhs(), iv2);
 
     // Calculate the index iv2 + delaySecondArg
     Value newIndex = rewriter.create<arith::AddIOp>(loc, iv2, delay2ndArg);
@@ -5844,135 +6646,340 @@ struct DelayOpLowering: public ConversionPattern {
     rewriter.create<memref::StoreOp>(loc, loadedVal, alloc, newIndex);
     rewriter.setInsertionPointAfter(forOp2);
     DEBUG_PRINT_NO_ARGS();
-    //For 2nd loop -- 
-    //loop from 0 to lenOfInput - 2ndArg
-    // load from index 
-    // store at index + 2ndArg
+    // For 2nd loop --
+    // loop from 0 to lenOfInput - 2ndArg
+    //  load from index
+    //  store at index + 2ndArg
 
     // forOp1.dump();
-    //Expected MLIR-Affine
-      // %0 = affine.load %alloc_0[] : memref<f64>
-      // %1 = arith.fptosi %0 : f64 to i64
-      // %2 = arith.index_cast %1 : i64 to index
-      // %c1_15 = arith.constant 1 : index
-      // scf.for %arg0 = %c0_14 to %2 step %c1_15 {
-      //   memref.store %cst_13, %alloc[%arg0] : memref<10xf64>
-      // }
-      // %c0_16 = arith.constant 0 : index
-      // %c10 = arith.constant 10 : index
-      // %3 = arith.subi %c10, %2 : index
-      // %c1_17 = arith.constant 1 : index
-      // scf.for %arg0 = %c0_16 to %3 step %c1_17 {
-      //   %4 = memref.load %alloc_1[%arg0] : memref<10xf64>
-      //   %5 = arith.addi %arg0, %2 : index
-      //   memref.store %4, %alloc[%5] : memref<10xf64>
-      // }
-
- 
+    // Expected MLIR-Affine
+    // %0 = affine.load %alloc_0[] : memref<f64>
+    // %1 = arith.fptosi %0 : f64 to i64
+    // %2 = arith.index_cast %1 : i64 to index
+    // %c1_15 = arith.constant 1 : index
+    // scf.for %arg0 = %c0_14 to %2 step %c1_15 {
+    //   memref.store %cst_13, %alloc[%arg0] : memref<10xf64>
+    // }
+    // %c0_16 = arith.constant 0 : index
+    // %c10 = arith.constant 10 : index
+    // %3 = arith.subi %c10, %2 : index
+    // %c1_17 = arith.constant 1 : index
+    // scf.for %arg0 = %c0_16 to %3 step %c1_17 {
+    //   %4 = memref.load %alloc_1[%arg0] : memref<10xf64>
+    //   %5 = arith.addi %arg0, %2 : index
+    //   memref.store %4, %alloc[%5] : memref<10xf64>
+    // }
 
     rewriter.replaceOp(op, alloc);
     DEBUG_PRINT_NO_ARGS();
     return success();
-    }
-
-
+  }
 };
 
 //===----------------------------------------------------------------------===//
 // ToyToAffine RewritePatterns: Gain operations
 //===----------------------------------------------------------------------===//
-struct GainOpLowering: public ConversionPattern {
-      GainOpLowering(MLIRContext *ctx)
-        : ConversionPattern(dsp::GainOp::getOperationName(), 1 , ctx) {}
+struct GainOpLowering : public ConversionPattern {
+  GainOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::GainOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    // dsp.GainOp has 2 operands -- both of type tensor f64 , 2ndOperand should
+    // have only 1 element
 
-    LogicalResult 
-    matchAndRewrite(Operation *op, ArrayRef<Value> operands,
-              ConversionPatternRewriter &rewriter) const final {
-      //dsp.GainOp has 2 operands -- both of type tensor f64 , 2ndOperand should have only 1 element
+    // Get the location of GainOp
+    auto loc = op->getLoc();
 
-      //Get the location of GainOp
-      auto loc = op->getLoc();
-      
-      
-      //Pseudo-code:
-          //  y[i] = y[i] * gain for  0<=i<N
-          //  
+    // Pseudo-code:
+    //   y[i] = y[i] * gain for  0<=i<N
+    //
 
-    //output for result type
-    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));  
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
 
-    //allocation & deallocation for the result of this operation
+    // allocation & deallocation for the result of this operation
     auto memRefType = convertTensorToMemRef(tensorType);
     auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
 
-    //construct affine loops for the input
-    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/0);
-    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);    
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
     GainOpAdaptor gainOpOpAdaptor(operands);
     // Value GetValueAtIndx2ndArg = op->getOperand(1);
-    // dsp::ConstantOp constantOp2ndArg = GetValueAtIndx2ndArg.getDefiningOp<dsp::ConstantOp>();
-    // DenseElementsAttr constantRhsValue = constantOp2ndArg.getValue();;
-    // auto elements = constantRhsValue.getValues<FloatAttr>();
-    // float gain = elements[0].getValueAsDouble();
+    // dsp::ConstantOp constantOp2ndArg =
+    // GetValueAtIndx2ndArg.getDefiningOp<dsp::ConstantOp>(); DenseElementsAttr
+    // constantRhsValue = constantOp2ndArg.getValue();; auto elements =
+    // constantRhsValue.getValues<FloatAttr>(); float gain =
+    // elements[0].getValueAsDouble();
 
     // Value gain = gainOpOpAdaptor.getRhs();
-    
+
     DEBUG_PRINT_NO_ARGS();
 
-    //first from 1 <= i < N
-    int64_t lb = 0 ;
-    int64_t ub = tensorType.getShape()[0];   
+    // first from 1 <= i < N
+    int64_t lb = 0;
+    int64_t ub = tensorType.getShape()[0];
     int64_t step = 1;
 
     DEBUG_PRINT_NO_ARGS();
 
-    
-    //loop from 0 <= i < N
+    // loop from 0 <= i < N
 
-    affine::AffineForOp forOpY = rewriter.create<AffineForOp>(loc, lb, ub, step);
-    auto ivY = forOpY.getInductionVar();
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
     rewriter.setInsertionPointToStart(forOpY.getBody());
+    auto ivY = forOpY.getInductionVar();
 
-    Value getLhs = rewriter.create<AffineLoadOp>(loc, gainOpOpAdaptor.getLhs() , ivY);
-    Value getRhs = rewriter.create<AffineLoadOp>(loc, gainOpOpAdaptor.getRhs() , ValueRange{});
-    Value mulProd = rewriter.create<arith::MulFOp>(loc, getLhs, getRhs );
-    rewriter.create<AffineStoreOp>(loc, mulProd, alloc, ValueRange{ivY}); 
+    Value getLhs =
+        rewriter.create<AffineLoadOp>(loc, gainOpOpAdaptor.getLhs(), ValueRange{ivY});
+    Value getRhs = rewriter.create<AffineLoadOp>(loc, gainOpOpAdaptor.getRhs(),
+                                                 ValueRange{});
+    Value mulProd = rewriter.create<arith::MulFOp>(loc, getLhs, getRhs);
+    rewriter.create<AffineStoreOp>(loc, mulProd, alloc, ValueRange{ivY});
     DEBUG_PRINT_NO_ARGS();
     rewriter.setInsertionPointAfter(forOpY);
 
+    // debug
+    //  forOpX->dump();
+    //  forOpY->dump();
+
+    // %cst = arith.constant 6.2831853071800001 : f64
+    // %cst_0 = arith.constant 4.600000e-01 : f64
+    // %cst_1 = arith.constant 5.400000e-01 : f64
+    // %cst_2 = arith.constant 4.000000e+00 : f64
+    // %alloc = memref.alloc() : memref<4xf64>
+    // %alloc_3 = memref.alloc() : memref<f64>
+    // affine.store %cst_2, %alloc_3[] : memref<f64>
+    // affine.for %arg0 = 0 to 4 {
+    //   %0 = arith.index_castui %arg0 : index to i32
+    //   %1 = arith.uitofp %0 : i32 to f64
+    //   %2 = arith.mulf %1, %cst : f64
+    //   %3 = arith.divf %2, %cst_2 : f64
+    //   %4 = math.cos %3 : f64
+    //   %5 = arith.mulf %4, %cst_0 : f64
+    //   %6 = arith.subf %cst_1, %5 : f64
+    //   affine.store %6, %alloc[%arg0] : memref<4xf64>
+    // }
 
-    //debug
-    // forOpX->dump();
-    // forOpY->dump();
+    // }
+    // }
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: BitwiseAndOp operations
+//===----------------------------------------------------------------------===//
+
+struct BitwiseAndOpLowering : public ConversionPattern {
+  BitwiseAndOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::BitwiseAndOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    // dsp.bitwiseandop has 2 operands -- both of type tensor f64 , of the same
+    // size
+
+    // Get the location of BitwiseAndOp
+    auto loc = op->getLoc();
+
+    // Pseudo-code:
+    //   y[i] = bitwiseand(lhs[i], rhs[i]) for  0<=i<N
+    //
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
+    BitwiseAndOpAdaptor bitwiseandOpAdaptor(operands);
+
+    DEBUG_PRINT_NO_ARGS();
+
+    // first from 0 <= i < N
+    int64_t lb = 0;
+    int64_t ub = tensorType.getShape()[0];
+    int64_t step = 1;
+
+    DEBUG_PRINT_NO_ARGS();
+
+    // loop from 0 <= i < N
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto ivY = forOpY.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpY.getBody());
+
+    Value getLhs =
+        rewriter.create<AffineLoadOp>(loc, bitwiseandOpAdaptor.getLhs(), ivY);
+    Value getRhs =
+        rewriter.create<AffineLoadOp>(loc, bitwiseandOpAdaptor.getRhs(), ivY);
+    Value lhsInt =
+        rewriter.create<arith::FPToSIOp>(loc, rewriter.getI64Type(), getLhs);
+    Value rhsInt =
+        rewriter.create<arith::FPToSIOp>(loc, rewriter.getI64Type(), getRhs);
+    Value andiResult = rewriter.create<arith::AndIOp>(loc, lhsInt, rhsInt);
+    Value resultFp = rewriter.create<arith::SIToFPOp>(
+        loc, rewriter.getF64Type(), andiResult);
+
+    rewriter.create<AffineStoreOp>(loc, resultFp, alloc, ValueRange{ivY});
+    rewriter.setInsertionPointAfter(forOpY);
 
+    // debug
+    forOpY->dump();
 
-        // %cst = arith.constant 6.2831853071800001 : f64
-        // %cst_0 = arith.constant 4.600000e-01 : f64
-        // %cst_1 = arith.constant 5.400000e-01 : f64
-        // %cst_2 = arith.constant 4.000000e+00 : f64
-        // %alloc = memref.alloc() : memref<4xf64>
-        // %alloc_3 = memref.alloc() : memref<f64>
-        // affine.store %cst_2, %alloc_3[] : memref<f64>
-        // affine.for %arg0 = 0 to 4 {
-        //   %0 = arith.index_castui %arg0 : index to i32
-        //   %1 = arith.uitofp %0 : i32 to f64
-        //   %2 = arith.mulf %1, %cst : f64
-        //   %3 = arith.divf %2, %cst_2 : f64
-        //   %4 = math.cos %3 : f64
-        //   %5 = arith.mulf %4, %cst_0 : f64
-        //   %6 = arith.subf %cst_1, %5 : f64
-        //   affine.store %6, %alloc[%arg0] : memref<4xf64>
-        // }
-
-
-        // }
-        // }
     rewriter.replaceOp(op, alloc);
-      
+
     return success();
-  }
+  };
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: BitwiseAndOp operations
+//===----------------------------------------------------------------------===//
+
+struct zeroCrossCountOpLowering : public ConversionPattern {
+  zeroCrossCountOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::zeroCrossCountOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    // dsp.zerocrosscount has 1 operand -- of type tensor f64
+
+    // Get the location of zeroCrossCountOp
+    auto loc = op->getLoc();
+
+    // Pseudo-code is based on the C++ implementation here:
+    // https://toto-share.com/2011/05/cc-zero-crossing-code/
+    //   for 1<=i<N
+    //      if sign of operand[i] is not equal to sign of operand[i-1]
+    //         increment zero-cross count
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    Type integerType = rewriter.getI64Type();
+
+    // allocation & deallocation for the result of this operation
+    // auto memRefType = convertTensorToMemRef(tensorType);
+    // Force the result to be a tensor of size 1
+    auto alloc = insertAllocAndDealloc(
+        MemRefType::get(ArrayRef<int64_t>(1), tensorType.getElementType()), loc,
+        rewriter);
+    zeroCrossCountOpAdaptor zeroCrossCountOpAdaptor(operands);
+    DEBUG_PRINT_NO_ARGS();
+
+    // Define constants
+    Value constant0 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getI64Type(),
+        rewriter.getIntegerAttr(rewriter.getI64Type(), 0));
+    Value constant1 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getI64Type(),
+        rewriter.getIntegerAttr(rewriter.getI64Type(), 1));
+    Value Indx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+
+    // Define bounds
+    Value lb = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getIndexType(),
+        rewriter.getIntegerAttr(rewriter.getIndexType(), 1));
+    Value ub = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getIndexType(),
+        rewriter.getIntegerAttr(rewriter.getIndexType(),
+                                tensorType.getShape()[0]));
+    Value step = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+
+    // Set up for loop
+    auto forOpY =
+        rewriter.create<scf::ForOp>(loc, lb, ub, step, ValueRange{constant0});
+    auto ivY = forOpY.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpY.getBody());
+    auto countArg = forOpY.getRegionIterArgs()[0];
+
+    // Get the current and previous elements
+    Value ivYPrev = rewriter.create<arith::SubIOp>(loc, ivY, step);
+    Value getLhsPrev = rewriter.create<memref::LoadOp>(
+        loc, zeroCrossCountOpAdaptor.getLhs(), ivYPrev);
+    Value getLhs = rewriter.create<memref::LoadOp>(
+        loc, zeroCrossCountOpAdaptor.getLhs(), ivY);
+
+    // Convert from float to integer
+    Value lhsPrevInt = rewriter.create<arith::FPToSIOp>(
+        loc, rewriter.getI64Type(), getLhsPrev);
+    Value lhsInt =
+        rewriter.create<arith::FPToSIOp>(loc, rewriter.getI64Type(), getLhs);
+
+    // Check whether the elements are less than zero
+    Value signLhsPrev = rewriter.create<arith::CmpIOp>(
+        loc, arith::CmpIPredicate::slt, lhsPrevInt, constant0);
+    Value signLhs = rewriter.create<arith::CmpIOp>(
+        loc, arith::CmpIPredicate::slt, lhsInt, constant0);
+    Value equal = rewriter.create<arith::CmpIOp>(loc, arith::CmpIPredicate::eq,
+                                                 signLhsPrev, signLhs);
+
+    // If the signs aren't the same, increment the zero cross counter
+    auto ifOp =
+        rewriter.create<scf::IfOp>(loc, TypeRange{integerType}, equal, true);
+
+    // If block
+    rewriter.setInsertionPointToStart(ifOp.thenBlock());
+    rewriter.create<scf::YieldOp>(loc, ValueRange{countArg});
+
+    // Else block
+    rewriter.setInsertionPointToStart(ifOp.elseBlock());
+    auto countPlusOne =
+        rewriter.create<arith::AddIOp>(loc, countArg, constant1);
+    rewriter.create<scf::YieldOp>(loc, ValueRange{countPlusOne});
+
+    rewriter.setInsertionPointAfter(ifOp);
+    auto countRes = ifOp.getResults()[0];
+    rewriter.create<scf::YieldOp>(loc, ValueRange{countRes});
+
+    rewriter.setInsertionPointAfter(forOpY);
+
+    // debug
+    // forOpY->dump();
+    // %15 = "scf.for"(%12, %13, %14, %9) ({
+    //     ^bb0(%arg0: index, %arg1: i64):
+    //     %17 = "arith.subi"(%arg0, %14) <{overflowFlags =
+    //     #arith.overflow<none>}>
+    // : (index, index) -> index %18 = "memref.load"(%1, %17) <{nontemporal =
+    // false}> : (memref<3xf64>, index) -> f64 %19 = "memref.load"(%1, %arg0)
+    // <{nontemporal = false}> : (memref<3xf64>, index) -> f64 %20 =
+    // "arith.fptosi"(%18) : (f64) -> i64 %21 = "arith.fptosi"(%19) : (f64) ->
+    // i64
+    //     %22 = "arith.cmpi"(%20, %9) <{predicate = 2 : i64}> : (i64, i64) ->
+    //     i1 %23 = "arith.cmpi"(%21, %9) <{predicate = 2 : i64}> : (i64, i64)
+    //     -> i1 %24 = "arith.cmpi"(%22, %23) <{predicate = 0 : i64}> : (i1, i1)
+    //     -> i1 %25 = "scf.if"(%24) ({
+    //         "scf.yield"(%arg1) : (i64) -> ()
+    //     }, {
+    //         %26 = "arith.addi"(%arg1, %10) <{overflowFlags =
+    // #arith.overflow<none>}> : (i64, i64) -> i64 "scf.yield"(%26) : (i64) ->
+    // ()
+    //     }) : (i1) -> i64
+    //     "scf.yield"(%25) : (i64) -> ()
+    // }) : (index, index, index, i64) -> i64
+
+    auto finalCountArg = forOpY.getResults()[0];
+    Value finalCountArgFloat = rewriter.create<arith::SIToFPOp>(
+        loc, rewriter.getF64Type(), finalCountArg);
+
+    rewriter.create<AffineStoreOp>(loc, finalCountArgFloat, alloc, Indx0);
+    rewriter.replaceOp(op, alloc);
 
+    return success();
+  };
 };
+
 //===----------------------------------------------------------------------===//
 // ToyToAffine RewritePatterns: Binary operations
 //===----------------------------------------------------------------------===//
@@ -6010,217 +7017,5057 @@ struct BinaryOpLowering : public ConversionPattern {
   }
 };
 
-
- //===----------------------------------------------------------------------===//
- // ToyToAffine RewritePatterns: Unary operations
- //===----------------------------------------------------------------------===//
-
- template <typename UnaryOp, typename LoweredUnaryOp>
- struct UnaryOpLowering : public ConversionPattern {
-   UnaryOpLowering(MLIRContext *ctx)
-       : ConversionPattern(UnaryOp::getOperationName(), 1, ctx) {}
-
-   LogicalResult
-   matchAndRewrite(Operation *op, ArrayRef<Value> operands,
-                   ConversionPatternRewriter &rewriter) const final {
-     auto loc = op->getLoc();
-     lowerOpToLoops(op, operands, rewriter,
-                    [loc](OpBuilder &builder, ValueRange memRefOperands,
-                          ValueRange loopIvs) {
-                      // Generate an adaptor for the remapped operands of the
-                      // UnaryOp. This allows for using the nice named accessors
-                      // that are generated by the ODS.
-                      typename UnaryOp::Adaptor unaryAdaptor(memRefOperands);
-
-                      // Generate loads for the element of 'lhs' and 'rhs' at the
-                      // inner loop.
-                      auto loadedInput = builder.create<affine::AffineLoadOp>(
-                          loc, unaryAdaptor.getInput(), loopIvs);
-
-                      // Create the unary operation performed on the loaded
-                      // values.
-                      return builder.create<LoweredUnaryOp>(loc, loadedInput);
-                    });
-     return success();
-   }
- };
-
-using AddOpLowering = BinaryOpLowering<dsp::AddOp, arith::AddFOp>;
-using SubOpLowering = BinaryOpLowering<dsp::SubOp, arith::SubFOp>;
-using MulOpLowering = BinaryOpLowering<dsp::MulOp, arith::MulFOp>;
-using DivOpLowering = BinaryOpLowering<dsp::DivOp, arith::DivFOp>;
-using SinOpLowering = UnaryOpLowering<dsp::SinOp, math::SinOp>;
-using CosOpLowering = UnaryOpLowering<dsp::CosOp, math::CosOp>;
 //===----------------------------------------------------------------------===//
-// ToyToAffine RewritePatterns: Constant operations
+// ToyToAffine AdditionalPatterns: Shift operations
 //===----------------------------------------------------------------------===//
 
-struct ConstantOpLowering : public OpRewritePattern<dsp::ConstantOp> {
-  using OpRewritePattern<dsp::ConstantOp>::OpRewritePattern;
+struct ShiftRightOpLowering : public ConversionPattern {
+  ShiftRightOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::ShiftRightOp::getOperationName(), 1, ctx) {}
 
-  LogicalResult matchAndRewrite(dsp::ConstantOp op,
-                                PatternRewriter &rewriter) const final {
-    DenseElementsAttr constantValue = op.getValue();
-    Location loc = op.getLoc();
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
 
-    // When lowering the constant operation, we allocate and assign the constant
-    // values to a corresponding memref allocation.
-    auto tensorType = llvm::cast<RankedTensorType>(op.getType());
+    // Get the location of GainOp
+    auto loc = op->getLoc();
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
     auto memRefType = convertTensorToMemRef(tensorType);
     auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
 
-    // We will be generating constant indices up-to the largest dimension.
-    // Create these constants up-front to avoid large amounts of redundant
-    // operations.
-    auto valueShape = memRefType.getShape();
-    SmallVector<Value, 8> constantIndices;
+    // first from 1 <= i < N
+    int64_t lb = 0;
+    int64_t ub = tensorType.getShape()[0];
+    int64_t step = 1;
 
-    if (!valueShape.empty()) {
-      for (auto i : llvm::seq<int64_t>(
-               0, *std::max_element(valueShape.begin(), valueShape.end())))
-        constantIndices.push_back(
-            rewriter.create<arith::ConstantIndexOp>(loc, i));
-    } else {
-      // This is the case of a tensor of rank 0.
-      constantIndices.push_back(
-          rewriter.create<arith::ConstantIndexOp>(loc, 0));
-    }
+    typename dsp::ShiftRightOp::Adaptor binaryAdaptor(operands);
 
-    // The constant operation represents a multi-dimensional constant, so we
-    // will need to generate a store for each of the elements. The following
-    // functor recursively walks the dimensions of the constant shape,
-    // generating a store when the recursion hits the base case.
-    SmallVector<Value, 2> indices;
-    auto valueIt = constantValue.value_begin<FloatAttr>();
-    std::function<void(uint64_t)> storeElements = [&](uint64_t dimension) {
-      // The last dimension is the base case of the recursion, at this point
-      // we store the element at the given index.
-      if (dimension == valueShape.size()) {
-        rewriter.create<affine::AffineStoreOp>(
-            loc, rewriter.create<arith::ConstantOp>(loc, *valueIt++), alloc,
-            llvm::ArrayRef(indices));
-        return;
-      }
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto ivY = forOpY.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpY.getBody());
 
-      // Otherwise, iterate over the current dimension and add the indices to
-      // the list.
-      for (uint64_t i = 0, e = valueShape[dimension]; i != e; ++i) {
-        indices.push_back(constantIndices[i]);
-        storeElements(dimension + 1);
-        indices.pop_back();
-      }
-    };
+    auto loadedLhs =
+        rewriter.create<affine::AffineLoadOp>(loc, binaryAdaptor.getLhs(), ivY);
+    Value IntegerLhs =
+        rewriter.create<arith::FPToSIOp>(loc, rewriter.getI64Type(), loadedLhs);
 
-    // Start the element storing recursion from the first dimension.
-    storeElements(/*dimension=*/0);
+    auto loadedRhs =
+        rewriter.create<affine::AffineLoadOp>(loc, binaryAdaptor.getRhs(), ivY);
+    Value IntegerRhs =
+        rewriter.create<arith::FPToSIOp>(loc, rewriter.getI64Type(), loadedRhs);
 
-    // Replace this operation with the generated alloc.
+    auto LoweredOp =
+        rewriter.create<arith::ShRSIOp>(loc, IntegerLhs, IntegerRhs);
+
+    Value FloatOp =
+        rewriter.create<arith::SIToFPOp>(loc, rewriter.getF64Type(), LoweredOp);
+
+    rewriter.create<AffineStoreOp>(loc, FloatOp, alloc, ValueRange{ivY});
+
+    rewriter.setInsertionPointAfter(forOpY);
+
+    DEBUG_PRINT_NO_ARGS();
+
+    // rewriter.replaceOp(op, FloatOp);
     rewriter.replaceOp(op, alloc);
+
     return success();
   }
 };
 
 //===----------------------------------------------------------------------===//
-// ToyToAffine RewritePatterns: Func operations
+// ToyToAffine AdditionalPatterns: Matmul operations
 //===----------------------------------------------------------------------===//
 
-struct FuncOpLowering : public OpConversionPattern<dsp::FuncOp> {
-  using OpConversionPattern<dsp::FuncOp>::OpConversionPattern;
+// template <typename BinaryOp>
+
+struct MatmulOpLowering : public ConversionPattern {
+  MatmulOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::MatmulOp::getOperationName(), 1, ctx) {}
 
   LogicalResult
-  matchAndRewrite(dsp::FuncOp op, OpAdaptor adaptor,
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const final {
-    // We only lower the main function as we expect that all other functions
-    // have been inlined.
-    if (op.getName() != "main")
-      return failure();
 
-    // Verify that the given main has no inputs and results.
-    if (op.getNumArguments() || op.getFunctionType().getNumResults()) {
-      return rewriter.notifyMatchFailure(op, [](Diagnostic &diag) {
-        diag << "expected 'main' to have 0 inputs and 0 results";
-      });
-    }
+    // Get the location of GainOp
+    auto loc = op->getLoc();
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc_output = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    typename dsp::MatmulOp::Adaptor binaryAdaptor(operands);
+
+    auto lhsType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(0).getType());
+    // auto rhsType =
+    // llvm::dyn_cast<RankedTensorType>(op->getOperand(1).getType());
+
+    // first from 1 <= i < N
+    int64_t lb = 0;
+    int64_t ub_0 = lhsType.getShape()[0];
+    int64_t ub_1 = lhsType.getShape()[1];
+    int64_t step = 1;
+
+    Value constantZero = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+
+    // NOTE: matrix [y, x] --> y means row, x means column
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub_0, step);
+    auto ivY = forOpY.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpY.getBody());
+
+    affine::AffineForOp forOpX =
+        rewriter.create<AffineForOp>(loc, lb, ub_1, step);
+    auto ivX = forOpX.getInductionVar();
+    // auto getIterArg =  forOpX.getBody()->getArgument(1); //HWISOO: Find this
+    // to check how previous codes did
+    rewriter.setInsertionPointToStart(forOpX.getBody());
+
+    rewriter.create<AffineStoreOp>(loc, constantZero, alloc_output,
+                                   ValueRange{ivY, ivX});
+
+    affine::AffineForOp forOpIndex =
+        rewriter.create<AffineForOp>(loc, lb, ub_1, step);
+    auto ivIndex = forOpIndex.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpIndex.getBody());
+
+    auto loadedLhs = rewriter.create<affine::AffineLoadOp>(
+        loc, binaryAdaptor.getLhs(), ValueRange{ivY, ivIndex});
+
+    auto loadedRhs = rewriter.create<affine::AffineLoadOp>(
+        loc, binaryAdaptor.getRhs(), ValueRange{ivIndex, ivX});
+
+    Value mulLhsRhs = rewriter.create<arith::MulFOp>(loc, loadedLhs, loadedRhs);
+
+    auto loadedResult = rewriter.create<affine::AffineLoadOp>(
+        loc, alloc_output, ValueRange{ivY, ivX});
+
+    Value addResultAndMul =
+        rewriter.create<arith::AddFOp>(loc, loadedResult, mulLhsRhs);
+
+    rewriter.create<AffineStoreOp>(loc, addResultAndMul, alloc_output,
+                                   ValueRange{ivY, ivX});
+
+    /*
+    auto loadedLhs = rewriter.create<affine::AffineLoadOp>(loc,
+binaryAdaptor.getLhs(), ivY); Value IntegerLhs =
+rewriter.create<arith::FPToSIOp>(loc, rewriter.getI64Type(),  loadedLhs);
+
+auto loadedRhs = rewriter.create<affine::AffineLoadOp>(loc,
+binaryAdaptor.getRhs(), ivY); Value IntegerRhs =
+rewriter.create<arith::FPToSIOp>(loc, rewriter.getI64Type(),  loadedRhs);
+
+    auto LoweredOp = rewriter.create<LoweredBinaryOp>(loc, IntegerLhs,
+IntegerRhs);
+
+    Value FloatOp = rewriter.create<arith::SIToFPOp>(loc, rewriter.getF64Type(),
+LoweredOp);
+
+    rewriter.create<AffineStoreOp>(loc, FloatOp, alloc, ValueRange{ivY});
+
+    */
+
+    rewriter.setInsertionPointAfter(forOpY);
+
+    DEBUG_PRINT_NO_ARGS();
+
+    // rewriter.replaceOp(op, FloatOp);
+    rewriter.replaceOp(op, alloc_output);
 
-    // Create a new non-dsp function, with the same region.
-    auto func = rewriter.create<mlir::func::FuncOp>(op.getLoc(), op.getName(),
-                                                    op.getFunctionType());
-    rewriter.inlineRegionBefore(op.getRegion(), func.getBody(), func.end());
-    rewriter.eraseOp(op);
     return success();
   }
 };
 
 //===----------------------------------------------------------------------===//
-// ToyToAffine RewritePatterns: Print operations
+// ToyToAffine AdditionalPatterns: Find peaks operations
 //===----------------------------------------------------------------------===//
 
-struct PrintOpLowering : public OpConversionPattern<dsp::PrintOp> {
-  using OpConversionPattern<dsp::PrintOp>::OpConversionPattern;
+// template <typename BinaryOp>
+
+struct FindPeaksOpLowering : public ConversionPattern {
+  FindPeaksOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::FindPeaksOp::getOperationName(), 1, ctx) {}
 
   LogicalResult
-  matchAndRewrite(dsp::PrintOp op, OpAdaptor adaptor,
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const final {
-    // We don't lower "dsp.print" in this pass, but we need to update its
-    // operands.
-    rewriter.modifyOpInPlace(op,
-                               [&] { op->setOperands(adaptor.getOperands()); });
+
+    // Get the location of GainOp
+    auto loc = op->getLoc();
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc_output = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    auto countMemRefType = MemRefType::get({}, rewriter.getIndexType());
+    auto alloc_peaks_count =
+        insertAllocAndDealloc(countMemRefType, loc, rewriter);
+
+    typename dsp::FindPeaksOp::Adaptor findPeaksOpAdaptor(operands);
+
+    Value constant_minus_one = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(-1));
+
+    Value constant_index_zero = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getIndexType(), rewriter.getIndexAttr(0));
+    Value constant_index_one = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getIndexType(), rewriter.getIndexAttr(1));
+
+    rewriter.create<AffineStoreOp>(loc, constant_index_zero, alloc_peaks_count,
+                                   ValueRange{});
+
+    auto heightArgType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(1).getType());
+
+    int heightArgShape = heightArgType.getShape().size();
+
+    ValueRange heightValueRange;
+
+    if (heightArgShape == 0)
+      heightValueRange = ValueRange{};
+    else
+      heightValueRange = ValueRange{constant_index_zero};
+
+    auto distanceArgType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(2).getType());
+
+    int distanceArgShape = distanceArgType.getShape().size();
+
+    ValueRange distanceValueRange;
+
+    if (distanceArgShape == 0)
+      distanceValueRange = ValueRange{};
+    else
+      distanceValueRange = ValueRange{constant_index_zero};
+
+    auto signalType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(0).getType());
+    int64_t lb = 1;
+    int64_t ub = signalType.getShape()[0] - 1;
+    int64_t step = 1;
+
+    //%distance = affine.load %alloc_distance[] : memref<index>
+    auto distance_fp = rewriter.create<affine::AffineLoadOp>(
+        loc, findPeaksOpAdaptor.getDistance(), distanceValueRange);
+    // f64 to index
+    Value distance_ui = rewriter.create<arith::FPToUIOp>(
+        loc, rewriter.getIntegerType(32), distance_fp);
+    Value distance = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getIndexType(), distance_ui);
+
+    auto height = rewriter.create<affine::AffineLoadOp>(
+        loc, findPeaksOpAdaptor.getHeight(), heightValueRange);
+
+    affine::AffineForOp forOpInit =
+        rewriter.create<AffineForOp>(loc, 0, tensorType.getShape()[0], step);
+    auto init_iter = forOpInit.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpInit.getBody());
+
+    rewriter.create<AffineStoreOp>(loc, constant_minus_one, alloc_output,
+                                   ValueRange{init_iter});
+
+    rewriter.setInsertionPointAfter(forOpInit);
+
+    affine::AffineForOp forOpSignal =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto current_index = forOpSignal.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpSignal.getBody());
+
+    // %prev_index = arith.subi %current_index, %cst_one_index : index
+    // %signal_prev = memref.load %alloc_signal[%prev_index] : memref<10xf64>
+    // %signal_current = affine.load %alloc_signal[%current_index] :
+    // memref<10xf64> %signal_next = affine.load %alloc_signal[%current_index+1]
+    // : memref<10xf64> Q. How can I do this? %height = affine.load
+    // %alloc_height[] : memref<f64>
+
+    AffineExpr ExprForPrev =
+        rewriter.getAffineDimExpr(0) - rewriter.getAffineConstantExpr(1);
+    AffineMap addMapForPrev = AffineMap::get(1, 0, ExprForPrev);
+
+    AffineExpr ExprForNext =
+        rewriter.getAffineDimExpr(0) + rewriter.getAffineConstantExpr(1);
+    AffineMap addMapForNext = AffineMap::get(1, 0, ExprForNext);
+
+    auto signal_prev =
+        rewriter.create<AffineLoadOp>(loc, findPeaksOpAdaptor.getSignal(),
+                                      addMapForPrev, ValueRange{current_index});
+    auto signal_current = rewriter.create<affine::AffineLoadOp>(
+        loc, findPeaksOpAdaptor.getSignal(), ValueRange{current_index});
+    auto signal_next =
+        rewriter.create<AffineLoadOp>(loc, findPeaksOpAdaptor.getSignal(),
+                                      addMapForNext, ValueRange{current_index});
+
+    //%cmp_current_prev = arith.cmpf ogt, %signal_current, %signal_prev : f64
+    //%cmp_current_next = arith.cmpf ogt, %signal_current, %signal_next : f64
+    //%cmp_current_height = arith.cmpf oge, %signal_current, %signal_next : f64
+    auto cmp_current_prev = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OGT, signal_current, signal_prev);
+    auto cmp_current_next = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OGT, signal_current, signal_next);
+    auto cmp_current_height = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OGE, signal_current, height);
+
+    //%and_two_cmps = arith.andi %cmp_current_prev, %cmp_current_next : index
+    //%and_three_cmps = arith.andi %and_two_cmps, cmp_current_height : index
+    auto and_two_cmps =
+        rewriter.create<arith::AndIOp>(loc, cmp_current_prev, cmp_current_next);
+    auto and_three_cmps =
+        rewriter.create<arith::AndIOp>(loc, and_two_cmps, cmp_current_height);
+
+    // scf.if %and_three_cmps {
+    auto firstIfOp =
+        rewriter.create<scf::IfOp>(loc, and_three_cmps, false /* else=1 */);
+    rewriter.setInsertionPointToStart(firstIfOp.thenBlock());
+
+    //%peaks_count = affine.load %alloc_peaks_count[] : memref<index>
+    //%cmp_new_peak = arith.cmpi eq, %peaks_count, %cst_zero_index : index
+    auto peaks_count = rewriter.create<affine::AffineLoadOp>(
+        loc, alloc_peaks_count, ValueRange{});
+    auto cmp_new_peak = rewriter.create<arith::CmpIOp>(
+        loc, arith::CmpIPredicate::eq, peaks_count, constant_index_zero);
+
+    // scf.if %cmp_new_peak {
+    //     memref.store %current_index, %alloc_peaks[%peaks_count] :
+    //     memref<10xindex> %peaks_count_inc = arith.addi %peaks_count,
+    //     %cst_one_index : index affine.store %peaks_count_inc,
+    //     %alloc_peaks_count[] : memref<index>
+    // }
+    auto secondIfOp =
+        rewriter.create<scf::IfOp>(loc, cmp_new_peak, true /* else=1 */);
+    rewriter.setInsertionPointToStart(secondIfOp.thenBlock());
+    // index to f64
+    Value current_index_to_ui = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), current_index);
+    Value current_index_to_f64 = rewriter.create<arith::UIToFPOp>(
+        loc, rewriter.getF64Type(), current_index_to_ui);
+    rewriter.create<memref::StoreOp>(loc, current_index_to_f64, alloc_output,
+                                     ValueRange{peaks_count});
+    auto peaks_count_inc =
+        rewriter.create<arith::AddIOp>(loc, peaks_count, constant_index_one);
+    rewriter.create<AffineStoreOp>(loc, peaks_count_inc, alloc_peaks_count,
+                                   ValueRange{});
+
+    /*
+    else {
+        %last_peaks_count = arith.subi %peaks_count, %cst_one_index : index
+        %last_peak_index = memref.load %alloc_peaks[%last_peaks_count] :
+    memref<10xindex> %subtract_current_index_last_peak = arith.subi
+    %current_index, %last_peak_index : index %cmp_sub_distance = arith.cmpi sge,
+    %subtract_current_index_last_peak, %distance : index
+        */
+    rewriter.setInsertionPointToStart(secondIfOp.elseBlock());
+    // auto last_peak_index = rewriter.create<AffineLoadOp>(loc, alloc_output,
+    // addMapForPrev, ValueRange{peaks_count}); HWISOO: It does not work since
+    // it gives "error: 'affine.load' op index must be a valid dimension or
+    // symbol identifier" here.
+    Value last_peaks_count =
+        rewriter.create<arith::SubIOp>(loc, peaks_count, constant_index_one);
+    auto last_peak_index_fp = rewriter.create<memref::LoadOp>(
+        loc, alloc_output, ValueRange{last_peaks_count});
+    // f64 to index
+    Value last_peak_index_ui = rewriter.create<arith::FPToUIOp>(
+        loc, rewriter.getIntegerType(32), last_peak_index_fp);
+    Value last_peak_index = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getIndexType(), last_peak_index_ui);
+    Value subtract_current_index_last_peak =
+        rewriter.create<arith::SubIOp>(loc, current_index, last_peak_index);
+    auto cmp_sub_distance = rewriter.create<arith::CmpIOp>(
+        loc, arith::CmpIPredicate::sge, subtract_current_index_last_peak,
+        distance);
+
+    /*
+        scf.if %cmp_sub_distance {
+    memref.store %current_index, %alloc_peaks[%peaks_count] : memref<10xindex>
+    %peaks_count_inc = arith.addi %peaks_count, %cst_one_index : index
+    affine.store %peaks_count_inc, %alloc_peaks_count[] : memref<index>
+            }
+    }
+    */
+    auto thirdIfOp =
+        rewriter.create<scf::IfOp>(loc, cmp_sub_distance, true /* else=1 */);
+    rewriter.setInsertionPointToStart(thirdIfOp.thenBlock());
+    // index to f64
+    Value current_index_to_ui_2 = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), current_index);
+    Value current_index_to_f64_2 = rewriter.create<arith::UIToFPOp>(
+        loc, rewriter.getF64Type(), current_index_to_ui_2);
+    rewriter.create<memref::StoreOp>(loc, current_index_to_f64_2, alloc_output,
+                                     ValueRange{peaks_count});
+    auto peaks_count_inc_2 =
+        rewriter.create<arith::AddIOp>(loc, peaks_count, constant_index_one);
+    rewriter.create<AffineStoreOp>(loc, peaks_count_inc_2, alloc_peaks_count,
+                                   ValueRange{});
+
+    rewriter.setInsertionPointAfter(forOpSignal);
+
+    /* Setting last element of the output as the count of peaks.
+    Note that last-last ([-2]) should be always -1. */
+    auto peaks_count_final = rewriter.create<affine::AffineLoadOp>(
+        loc, alloc_peaks_count, ValueRange{});
+    // index to f64
+    Value peaks_count_final_to_ui = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), peaks_count_final);
+    Value peaks_count_final_to_f64 = rewriter.create<arith::UIToFPOp>(
+        loc, rewriter.getF64Type(), peaks_count_final_to_ui);
+
+    Value result_size = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getIndexType(),
+        rewriter.getIndexAttr(tensorType.getShape()[0]));
+    Value result_size_minusOne =
+        rewriter.create<arith::SubIOp>(loc, result_size, constant_index_one);
+    rewriter.create<AffineStoreOp>(loc, peaks_count_final_to_f64, alloc_output,
+                                   ValueRange{result_size_minusOne});
+
+    rewriter.replaceOp(op, alloc_output);
+
     return success();
   }
 };
 
-//===----------------------------------------------------------------------===//
-// ToyToAffine RewritePatterns: Return operations
-//===----------------------------------------------------------------------===//
+struct MaxOpLowering : public ConversionPattern {
+  MaxOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::MaxOp::getOperationName(), 1, ctx) {}
 
-struct ReturnOpLowering : public OpRewritePattern<dsp::ReturnOp> {
-  using OpRewritePattern<dsp::ReturnOp>::OpRewritePattern;
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
 
-  LogicalResult matchAndRewrite(dsp::ReturnOp op,
-                                PatternRewriter &rewriter) const final {
-    // During this lowering, we expect that all function calls have been
-    // inlined.
-    if (op.hasOperand())
-      return failure();
+    // Get the location of GainOp
+    auto loc = op->getLoc();
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc_output = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    typename dsp::MaxOp::Adaptor maxOpAdaptor(operands);
+
+    Value constantZero = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+
+    // Value cst_idx_zero = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+
+    rewriter.create<AffineStoreOp>(loc, constantZero, alloc_output,
+                                   ValueRange{});
+
+    auto inputType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(0).getType());
+
+    // loop for 0 <= i < N
+    int64_t lb = 0;
+    int64_t ub = inputType.getShape()[0];
+    int64_t step = 1;
+
+    affine::AffineForOp forOp = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto idx = forOp.getInductionVar();
+    rewriter.setInsertionPointToStart(forOp.getBody());
+
+    auto loadedInput = rewriter.create<affine::AffineLoadOp>(
+        loc, maxOpAdaptor.getInput(), ValueRange{idx});
+    auto loadedOutput =
+        rewriter.create<affine::AffineLoadOp>(loc, alloc_output, ValueRange{});
+    auto compare_input_output = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OGT, loadedInput, loadedOutput);
+
+    auto ifOp = rewriter.create<scf::IfOp>(loc, compare_input_output, false);
+
+    rewriter.setInsertionPointToStart(ifOp.thenBlock());
+
+    rewriter.create<AffineStoreOp>(loc, loadedInput, alloc_output,
+                                   ValueRange{});
+
+    rewriter.setInsertionPointAfter(forOp);
+
+    rewriter.replaceOp(op, alloc_output);
 
-    // We lower "dsp.return" directly to "func.return".
-    rewriter.replaceOpWithNewOp<func::ReturnOp>(op);
     return success();
   }
 };
 
-//===----------------------------------------------------------------------===//
-// ToyToAffine RewritePatterns: Transpose operations
-//===----------------------------------------------------------------------===//
+struct MeanOpLowering : public ConversionPattern {
+  MeanOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::MeanOp::getOperationName(), 1, ctx) {}
 
-struct TransposeOpLowering : public ConversionPattern {
-  TransposeOpLowering(MLIRContext *ctx)
-      : ConversionPattern(dsp::TransposeOp::getOperationName(), 1, ctx) {}
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+
+    // Get the location of GainOp
+    auto loc = op->getLoc();
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc_output = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    typename dsp::MeanOp::Adaptor meanOpAdaptor(operands);
+
+    Value constantZero = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    Value cst_idx_zero = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+
+    rewriter.create<AffineStoreOp>(loc, constantZero, alloc_output,
+                                   ValueRange{});
+
+    auto lengthArgType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(1).getType());
+
+    int lengthArgShape = lengthArgType.getShape().size();
+
+    ValueRange lengthValueRange;
+
+    if (lengthArgShape == 0)
+      lengthValueRange = ValueRange{};
+    else
+      lengthValueRange = ValueRange{cst_idx_zero};
+
+    auto loadedLength = rewriter.create<affine::AffineLoadOp>(
+        loc, meanOpAdaptor.getLength(), lengthValueRange);
+
+    // f64 to index
+    Value length_ui = rewriter.create<arith::FPToUIOp>(
+        loc, rewriter.getIntegerType(32), loadedLength);
+    Value length_index = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getIndexType(), length_ui);
+
+    // loop for 0 <= i < length
+    // Note: we need to use scf.for and memref::LoadOp/StoreOp (can we use
+    // dynamic ub for affine.for?)
+    auto lb = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    auto step = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+    auto forOp = rewriter.create<scf::ForOp>(loc, lb, length_index, step);
+    auto idx = forOp.getInductionVar();
+    rewriter.setInsertionPointToStart(forOp.getBody());
+
+    auto loadedInput = rewriter.create<memref::LoadOp>(
+        loc, meanOpAdaptor.getInput(), ValueRange{idx});
+    auto loadedOutput =
+        rewriter.create<memref::LoadOp>(loc, alloc_output, ValueRange{});
+    auto added_output =
+        rewriter.create<arith::AddFOp>(loc, loadedInput, loadedOutput);
+    rewriter.create<memref::StoreOp>(loc, added_output, alloc_output,
+                                     ValueRange{});
+
+    rewriter.setInsertionPointAfter(forOp);
+
+    auto loadedOutput2 =
+        rewriter.create<affine::AffineLoadOp>(loc, alloc_output, ValueRange{});
+    auto divided_output =
+        rewriter.create<arith::DivFOp>(loc, loadedOutput2, loadedLength);
+    rewriter.create<AffineStoreOp>(loc, divided_output, alloc_output,
+                                   ValueRange{});
+
+    rewriter.replaceOp(op, alloc_output);
+
+    return success();
+  }
+};
+
+struct DiffOpLowering : public ConversionPattern {
+  DiffOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::DiffOp::getOperationName(), 1, ctx) {}
 
   LogicalResult
   matchAndRewrite(Operation *op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const final {
+
+    // Get the location of GainOp
     auto loc = op->getLoc();
-    lowerOpToLoops(op, operands, rewriter,
-                   [loc](OpBuilder &builder, ValueRange memRefOperands,
-                         ValueRange loopIvs) {
-                     // Generate an adaptor for the remapped operands of the
-                     // TransposeOp. This allows for using the nice named
-                     // accessors that are generated by the ODS.
-                     dsp::TransposeOpAdaptor transposeAdaptor(memRefOperands);
-                     Value input = transposeAdaptor.getInput();
 
-                     // Transpose the elements by generating a load from the
-                     // reverse indices.
-                     SmallVector<Value, 2> reverseIvs(llvm::reverse(loopIvs));
-                     return builder.create<affine::AffineLoadOp>(loc, input,
-                                                                 reverseIvs);
-                   });
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc_output = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    typename dsp::DiffOp::Adaptor diffOpAdaptor(operands);
+
+    Value cst_idx_zero = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    Value cst_idx_one = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+
+    auto lengthArgType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(1).getType());
+
+    int lengthArgShape = lengthArgType.getShape().size();
+
+    ValueRange lengthValueRange;
+
+    if (lengthArgShape == 0)
+      lengthValueRange = ValueRange{};
+    else
+      lengthValueRange = ValueRange{cst_idx_zero};
+
+    auto loadedLength = rewriter.create<affine::AffineLoadOp>(
+        loc, diffOpAdaptor.getLength(), lengthValueRange);
+
+    // f64 to index
+    Value length_ui = rewriter.create<arith::FPToUIOp>(
+        loc, rewriter.getIntegerType(32), loadedLength);
+    Value length_index = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getIndexType(), length_ui);
+    Value length_index_minus =
+        rewriter.create<arith::SubIOp>(loc, length_index, cst_idx_one);
+
+    // loop for 0 <= i < N-1
+    // Note: we need to use scf.for and memref::LoadOp/StoreOp (can we use
+    // dynamic ub for affine.for?)
+    auto lb = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    auto step = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+    auto forOp = rewriter.create<scf::ForOp>(loc, lb, length_index_minus, step);
+    auto idx = forOp.getInductionVar();
+    rewriter.setInsertionPointToStart(forOp.getBody());
+
+    Value constant_index_one = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getIndexType(), rewriter.getIndexAttr(1));
+    Value idx_next =
+        rewriter.create<arith::AddIOp>(loc, idx, constant_index_one);
+
+    auto input_current = rewriter.create<memref::LoadOp>(
+        loc, diffOpAdaptor.getInput(), ValueRange{idx});
+    auto input_next = rewriter.create<memref::LoadOp>(
+        loc, diffOpAdaptor.getInput(), ValueRange{idx_next});
+
+    auto diff_input =
+        rewriter.create<arith::SubFOp>(loc, input_next, input_current);
+    rewriter.create<memref::StoreOp>(loc, diff_input, alloc_output,
+                                     ValueRange{idx});
+
+    rewriter.setInsertionPointAfter(forOp);
+
+    rewriter.replaceOp(op, alloc_output);
+
     return success();
   }
 };
 
-} // namespace
+struct GetSingleElemAtIdxOpLowering : public ConversionPattern {
+  GetSingleElemAtIdxOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::GetSingleElemAtIdxOp::getOperationName(), 1,
+                          ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // auto tensorType =
+    // llvm::cast<UnrankedTensorType>((*op->result_type_begin())); auto
+    // memRefType = convertTensorToMemRef(tensorType);
+    auto memRefType = MemRefType::get({}, rewriter.getF64Type());
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    typename dsp::GetSingleElemAtIdxOp::Adaptor getSingleElemAtIdxAdaptor(
+        operands);
+
+    auto indxArgType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(1).getType());
+
+    int indxArgShape = indxArgType.getShape().size();
+
+    ValueRange indexValueRange;
+
+    if (indxArgShape == 0)
+      indexValueRange = ValueRange{};
+    else {
+      Value cst_idx_zero = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+      indexValueRange = ValueRange{cst_idx_zero};
+    }
+
+    Value loadedIndx = rewriter.create<AffineLoadOp>(
+        loc, getSingleElemAtIdxAdaptor.getIndx(), indexValueRange);
+
+    // f64 to index
+    Value indx_ui = rewriter.create<arith::FPToUIOp>(
+        loc, rewriter.getIntegerType(32), loadedIndx);
+    Value indx_index = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getIndexType(), indx_ui);
+
+    Value loadedElement = rewriter.create<AffineLoadOp>(
+        loc, getSingleElemAtIdxAdaptor.getInput(), ValueRange{indx_index});
+
+    rewriter.create<AffineStoreOp>(loc, loadedElement, alloc, ValueRange{});
+
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+struct Diff2MeanOptimizedOpLowering : public ConversionPattern {
+  Diff2MeanOptimizedOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::Diff2MeanOptimizedOp::getOperationName(), 1,
+                          ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+
+    // Get the location of GainOp
+    auto loc = op->getLoc();
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc_output = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    typename dsp::Diff2MeanOptimizedOp::Adaptor diff2MeanOptimizedOpAdaptor(
+        operands);
+
+    Value cst_idx_zero = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+
+    auto lengthArgType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(1).getType());
+
+    int lengthArgShape = lengthArgType.getShape().size();
+
+    ValueRange lengthValueRange;
+
+    if (lengthArgShape == 0)
+      lengthValueRange = ValueRange{};
+    else
+      lengthValueRange = ValueRange{cst_idx_zero};
+
+    auto loadedLength = rewriter.create<affine::AffineLoadOp>(
+        loc, diff2MeanOptimizedOpAdaptor.getLength(), lengthValueRange);
+
+    // f64 to index
+    Value length_ui = rewriter.create<arith::FPToUIOp>(
+        loc, rewriter.getIntegerType(32), loadedLength);
+    Value length_index = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getIndexType(), length_ui);
+
+    auto input_first = rewriter.create<memref::LoadOp>(
+        loc, diff2MeanOptimizedOpAdaptor.getInput(), ValueRange{cst_idx_zero});
+    auto input_last = rewriter.create<memref::LoadOp>(
+        loc, diff2MeanOptimizedOpAdaptor.getInput(), ValueRange{length_index});
+
+    auto diff_input =
+        rewriter.create<arith::SubFOp>(loc, input_last, input_first);
+
+    auto div_input =
+        rewriter.create<arith::DivFOp>(loc, diff_input, loadedLength);
+
+    rewriter.create<memref::StoreOp>(loc, div_input, alloc_output,
+                                     ValueRange{});
+
+    rewriter.replaceOp(op, alloc_output);
+
+    return success();
+  }
+};
+
+struct FindPeaks2Diff2MeanOptimizedOpLowering : public ConversionPattern {
+  FindPeaks2Diff2MeanOptimizedOpLowering(MLIRContext *ctx)
+      : ConversionPattern(
+            dsp::FindPeaks2Diff2MeanOptimizedOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+
+    // Get the location of GainOp
+    auto loc = op->getLoc();
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc_output = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    auto alloc_output_last = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    auto countMemRefType = MemRefType::get({}, rewriter.getIndexType());
+    auto alloc_peaks_count =
+        insertAllocAndDealloc(countMemRefType, loc, rewriter);
+
+    typename dsp::FindPeaks2Diff2MeanOptimizedOp::Adaptor
+        findPeaks2Diff2MeanOptOpAdaptor(operands);
+
+    Value constant_minus_one = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(-1));
+
+    Value constant_index_zero = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getIndexType(), rewriter.getIndexAttr(0));
+    Value constant_index_one = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getIndexType(), rewriter.getIndexAttr(1));
+
+    rewriter.create<AffineStoreOp>(loc, constant_index_zero, alloc_peaks_count,
+                                   ValueRange{});
+
+    auto heightArgType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(1).getType());
+
+    int heightArgShape = heightArgType.getShape().size();
+
+    ValueRange heightValueRange;
+
+    if (heightArgShape == 0)
+      heightValueRange = ValueRange{};
+    else
+      heightValueRange = ValueRange{constant_index_zero};
+
+    auto distanceArgType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(2).getType());
+
+    int distanceArgShape = distanceArgType.getShape().size();
+
+    ValueRange distanceValueRange;
+
+    if (distanceArgShape == 0)
+      distanceValueRange = ValueRange{};
+    else
+      distanceValueRange = ValueRange{constant_index_zero};
+
+    auto signalType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(0).getType());
+    int64_t lb = 1;
+    int64_t ub = signalType.getShape()[0] - 1;
+    int64_t step = 1;
+
+    //%distance = affine.load %alloc_distance[] : memref<index>
+    auto distance_fp = rewriter.create<affine::AffineLoadOp>(
+        loc, findPeaks2Diff2MeanOptOpAdaptor.getDistance(), distanceValueRange);
+    // f64 to index
+    Value distance_ui = rewriter.create<arith::FPToUIOp>(
+        loc, rewriter.getIntegerType(32), distance_fp);
+    Value distance = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getIndexType(), distance_ui);
+
+    auto height = rewriter.create<affine::AffineLoadOp>(
+        loc, findPeaks2Diff2MeanOptOpAdaptor.getHeight(), heightValueRange);
+
+    rewriter.create<AffineStoreOp>(loc, constant_minus_one, alloc_output,
+                                   ValueRange{});
+
+    rewriter.create<AffineStoreOp>(loc, constant_minus_one, alloc_output_last,
+                                   ValueRange{});
+
+    affine::AffineForOp forOpSignal =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto current_index = forOpSignal.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpSignal.getBody());
+
+    // %prev_index = arith.subi %current_index, %cst_one_index : index
+    // %signal_prev = memref.load %alloc_signal[%prev_index] : memref<10xf64>
+    // %signal_current = affine.load %alloc_signal[%current_index] :
+    // memref<10xf64> %signal_next = affine.load %alloc_signal[%current_index+1]
+    // : memref<10xf64> Q. How can I do this? %height = affine.load
+    // %alloc_height[] : memref<f64>
+
+    AffineExpr ExprForPrev =
+        rewriter.getAffineDimExpr(0) - rewriter.getAffineConstantExpr(1);
+    AffineMap addMapForPrev = AffineMap::get(1, 0, ExprForPrev);
+
+    AffineExpr ExprForNext =
+        rewriter.getAffineDimExpr(0) + rewriter.getAffineConstantExpr(1);
+    AffineMap addMapForNext = AffineMap::get(1, 0, ExprForNext);
+
+    auto signal_prev = rewriter.create<AffineLoadOp>(
+        loc, findPeaks2Diff2MeanOptOpAdaptor.getSignal(), addMapForPrev,
+        ValueRange{current_index});
+    auto signal_current = rewriter.create<affine::AffineLoadOp>(
+        loc, findPeaks2Diff2MeanOptOpAdaptor.getSignal(),
+        ValueRange{current_index});
+    auto signal_next = rewriter.create<AffineLoadOp>(
+        loc, findPeaks2Diff2MeanOptOpAdaptor.getSignal(), addMapForNext,
+        ValueRange{current_index});
+
+    //%cmp_current_prev = arith.cmpf ogt, %signal_current, %signal_prev : f64
+    //%cmp_current_next = arith.cmpf ogt, %signal_current, %signal_next : f64
+    //%cmp_current_height = arith.cmpf oge, %signal_current, %signal_next : f64
+    auto cmp_current_prev = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OGT, signal_current, signal_prev);
+    auto cmp_current_next = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OGT, signal_current, signal_next);
+    auto cmp_current_height = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OGE, signal_current, height);
+
+    //%and_two_cmps = arith.andi %cmp_current_prev, %cmp_current_next : index
+    //%and_three_cmps = arith.andi %and_two_cmps, cmp_current_height : index
+    auto and_two_cmps =
+        rewriter.create<arith::AndIOp>(loc, cmp_current_prev, cmp_current_next);
+    auto and_three_cmps =
+        rewriter.create<arith::AndIOp>(loc, and_two_cmps, cmp_current_height);
+
+    // scf.if %and_three_cmps {
+    auto firstIfOp =
+        rewriter.create<scf::IfOp>(loc, and_three_cmps, false /* else=1 */);
+    rewriter.setInsertionPointToStart(firstIfOp.thenBlock());
+
+    //%peaks_count = affine.load %alloc_peaks_count[] : memref<index>
+    //%cmp_new_peak = arith.cmpi eq, %peaks_count, %cst_zero_index : index
+    auto peaks_count = rewriter.create<affine::AffineLoadOp>(
+        loc, alloc_peaks_count, ValueRange{});
+    auto cmp_new_peak = rewriter.create<arith::CmpIOp>(
+        loc, arith::CmpIPredicate::eq, peaks_count, constant_index_zero);
+
+    // scf.if %cmp_new_peak {
+    //     memref.store %current_index, %alloc_peaks[%peaks_count] :
+    //     memref<10xindex> %peaks_count_inc = arith.addi %peaks_count,
+    //     %cst_one_index : index affine.store %peaks_count_inc,
+    //     %alloc_peaks_count[] : memref<index>
+    // }
+    auto secondIfOp =
+        rewriter.create<scf::IfOp>(loc, cmp_new_peak, true /* else=1 */);
+    rewriter.setInsertionPointToStart(secondIfOp.thenBlock());
+    // index to f64
+    Value current_index_to_ui = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), current_index);
+    Value current_index_to_f64 = rewriter.create<arith::UIToFPOp>(
+        loc, rewriter.getF64Type(), current_index_to_ui);
+    rewriter.create<memref::StoreOp>(loc, current_index_to_f64, alloc_output,
+                                     ValueRange{});
+    rewriter.create<memref::StoreOp>(loc, current_index_to_f64,
+                                     alloc_output_last, ValueRange{});
+
+    auto peaks_count_inc =
+        rewriter.create<arith::AddIOp>(loc, peaks_count, constant_index_one);
+    rewriter.create<AffineStoreOp>(loc, peaks_count_inc, alloc_peaks_count,
+                                   ValueRange{});
+
+    /*
+    else {
+        %last_peaks_count = arith.subi %peaks_count, %cst_one_index : index
+        %last_peak_index = memref.load %alloc_peaks[%last_peaks_count] :
+    memref<10xindex> %subtract_current_index_last_peak = arith.subi
+    %current_index, %last_peak_index : index %cmp_sub_distance = arith.cmpi sge,
+    %subtract_current_index_last_peak, %distance : index
+        */
+    rewriter.setInsertionPointToStart(secondIfOp.elseBlock());
+    // auto last_peak_index = rewriter.create<AffineLoadOp>(loc, alloc_output,
+    // addMapForPrev, ValueRange{peaks_count}); HWISOO: It does not work since
+    // it gives "error: 'affine.load' op index must be a valid dimension or
+    // symbol identifier" here.
+    Value last_peaks_count =
+        rewriter.create<arith::SubIOp>(loc, peaks_count, constant_index_one);
+    auto last_peak_index_fp =
+        rewriter.create<memref::LoadOp>(loc, alloc_output_last, ValueRange{});
+    // f64 to index
+    Value last_peak_index_ui = rewriter.create<arith::FPToUIOp>(
+        loc, rewriter.getIntegerType(32), last_peak_index_fp);
+    Value last_peak_index = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getIndexType(), last_peak_index_ui);
+    Value subtract_current_index_last_peak =
+        rewriter.create<arith::SubIOp>(loc, current_index, last_peak_index);
+    auto cmp_sub_distance = rewriter.create<arith::CmpIOp>(
+        loc, arith::CmpIPredicate::sge, subtract_current_index_last_peak,
+        distance);
+
+    /*
+        scf.if %cmp_sub_distance {
+    memref.store %current_index, %alloc_peaks[%peaks_count] : memref<10xindex>
+    %peaks_count_inc = arith.addi %peaks_count, %cst_one_index : index
+    affine.store %peaks_count_inc, %alloc_peaks_count[] : memref<index>
+            }
+    }
+    */
+    auto thirdIfOp =
+        rewriter.create<scf::IfOp>(loc, cmp_sub_distance, true /* else=1 */);
+    rewriter.setInsertionPointToStart(thirdIfOp.thenBlock());
+    // index to f64
+    Value current_index_to_ui_2 = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), current_index);
+    Value current_index_to_f64_2 = rewriter.create<arith::UIToFPOp>(
+        loc, rewriter.getF64Type(), current_index_to_ui_2);
+    rewriter.create<memref::StoreOp>(loc, current_index_to_f64_2,
+                                     alloc_output_last, ValueRange{});
+    auto peaks_count_inc_2 =
+        rewriter.create<arith::AddIOp>(loc, peaks_count, constant_index_one);
+    rewriter.create<AffineStoreOp>(loc, peaks_count_inc_2, alloc_peaks_count,
+                                   ValueRange{});
+
+    rewriter.setInsertionPointAfter(forOpSignal);
+
+    auto final_loaded_peak_first =
+        rewriter.create<memref::LoadOp>(loc, alloc_output, ValueRange{});
+
+    auto final_loaded_peak_last =
+        rewriter.create<memref::LoadOp>(loc, alloc_output_last, ValueRange{});
+    Value difference = rewriter.create<arith::SubFOp>(
+        loc, final_loaded_peak_last, final_loaded_peak_first);
+    auto peaks_count_final = rewriter.create<affine::AffineLoadOp>(
+        loc, alloc_peaks_count, ValueRange{});
+    // index to f64
+    Value peaks_count_final_to_ui = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), peaks_count_final);
+    Value peaks_count_final_to_f64 = rewriter.create<arith::UIToFPOp>(
+        loc, rewriter.getF64Type(), peaks_count_final_to_ui);
+    Value peaks_count_minus = rewriter.create<arith::AddFOp>(
+        loc, peaks_count_final_to_f64, constant_minus_one);
+
+    Value final_output =
+        rewriter.create<arith::DivFOp>(loc, difference, peaks_count_minus);
+
+    rewriter.create<AffineStoreOp>(loc, final_output, alloc_output,
+                                   ValueRange{});
+
+    rewriter.replaceOp(op, alloc_output);
+
+    return success();
+  }
+};
+
+struct LMS2FindPeaksOptimizedOpLowering : public ConversionPattern {
+  LMS2FindPeaksOptimizedOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::LMS2FindPeaksOptimizedOp::getOperationName(), 1,
+                          ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // Pseudo-code:
+    //  for (int n = 0; n < NUM_SAMPLES; n++) {
+    //      // Calculate the filter output y[n]
+    //      y[n] = 0;
+    //      for (int i = 0; i < FILTER_LENGTH; i++) {
+    //          if (n - i >= 0) { // affine if
+    //              y[n] = y[n] + (w[i] * x[n - i]);
+    //          }
+    //      }
+
+    //     // Calculate the error e[n]
+    //     e[n] = d[n] - y[n];
+
+    //     // Update the filter weights w[i]
+    //     for (int i = 0; i < FILTER_LENGTH; i++) {
+    //         if (n - i >= 0) {
+    //             w[i] +=  MU * e[n] * x[n - i];
+    //         }
+    //     }
+    // }
+
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    auto lhsType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(0).getType());
+
+    ArrayRef<int64_t> lhsShape = lhsType.getShape();
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = MemRefType::get(lhsShape, rewriter.getF64Type());
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    auto memRefTypeOutput = convertTensorToMemRef(tensorType);
+    auto alloc_output = insertAllocAndDealloc(memRefTypeOutput, loc, rewriter);
+
+    auto countMemRefType = MemRefType::get({}, rewriter.getIndexType());
+    auto alloc_peaks_count =
+        insertAllocAndDealloc(countMemRefType, loc, rewriter);
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(lhsType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(lhsType.getRank(), /*Value=*/1);
+
+    typename dsp::LMS2FindPeaksOptimizedOp::Adaptor lfr2fpAdaptor(operands);
+
+    // Value alpha = rewriter.create<arith::ConstantOp>(loc,
+    // rewriter.getF64Type(),
+    //                                                      rewriter.getF64FloatAttr(1));
+    Value zeroval = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    Value mu = rewriter.create<AffineLoadOp>(loc, lfr2fpAdaptor.getMu());
+
+    Value cst_idx_zero = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    Value cst_idx_one = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+    Value constant_minus_one = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(-1));
+
+    // initialization for findPeaks
+    rewriter.create<AffineStoreOp>(loc, cst_idx_zero, alloc_peaks_count,
+                                   ValueRange{});
+
+    auto heightArgType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(4).getType());
+
+    int heightArgShape = heightArgType.getShape().size();
+
+    ValueRange heightValueRange;
+
+    if (heightArgShape == 0)
+      heightValueRange = ValueRange{};
+    else
+      heightValueRange = ValueRange{cst_idx_zero};
+
+    auto distanceArgType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(5).getType());
+
+    int distanceArgShape = distanceArgType.getShape().size();
+
+    ValueRange distanceValueRange;
+
+    if (distanceArgShape == 0)
+      distanceValueRange = ValueRange{};
+    else
+      distanceValueRange = ValueRange{cst_idx_zero};
+
+    auto distance_fp = rewriter.create<affine::AffineLoadOp>(
+        loc, lfr2fpAdaptor.getDistance(), distanceValueRange);
+    Value distance_ui = rewriter.create<arith::FPToUIOp>(
+        loc, rewriter.getIntegerType(32), distance_fp);
+    Value distance = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getIndexType(), distance_ui);
+
+    auto height = rewriter.create<affine::AffineLoadOp>(
+        loc, lfr2fpAdaptor.getHeight(), heightValueRange);
+
+    affine::AffineForOp forOpInit =
+        rewriter.create<AffineForOp>(loc, 0, tensorType.getShape()[0], 1);
+    auto init_iter = forOpInit.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpInit.getBody());
+
+    rewriter.create<AffineStoreOp>(loc, constant_minus_one, alloc_output,
+                                   ValueRange{init_iter});
+
+    rewriter.setInsertionPointAfter(forOpInit);
+
+    // unrolled two iterations.
+    int64_t lb = 0;
+    int64_t step = 1;
+
+    Value GetFilterLOp = op->getOperand(3);
+    dsp::ConstantOp constantOp3rdArg =
+        GetFilterLOp.getDefiningOp<dsp::ConstantOp>();
+    DenseElementsAttr constant3rdValue = constantOp3rdArg.getValue();
+
+    auto elements1 = constant3rdValue.getValues<FloatAttr>();
+    float filterlenval = elements1[0].getValueAsDouble();
+    auto FilterLength = (uint64_t)filterlenval;
+
+    int64_t numSamples = lhsType.getShape()[0];
+
+    auto yMemRefType = MemRefType::get({numSamples}, rewriter.getF64Type());
+    // auto wAlloc = rewriter.create<memref::AllocOp>(loc, yMemRefType);
+    auto wAlloc = insertAllocAndDealloc(yMemRefType, loc, rewriter);
+
+    // For affine expression: #map1 = affine_map<(%arg0)[] : (%arg0 - 1)
+    AffineExpr d0, d1, s0;
+    bindDims(rewriter.getContext(), d0, d1);
+    // AffineExpr ExprForXSlice = rewriter.getAffineDimExpr(0) -
+    // rewriter.getAffineDimExpr(1); //d0 - d1;
+    AffineExpr ExprForXSlice = d0 - d1;
+    AffineMap addMapForLMSFilter = AffineMap::get(2, 0, ExprForXSlice);
+    IntegerSet set1 = IntegerSet::get(2, 0, {ExprForXSlice}, {false});
+
+    {
+
+      // w[n] = 0;
+      // y[n] = 0;
+      // rewriter.create<AffineStoreOp>(loc, zeroval, alloc, ValueRange{iv});
+      // Allocate and initialize array for y
+      // Value constantIndx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+      rewriter.create<AffineStoreOp>(loc, zeroval, wAlloc,
+                                     ValueRange{cst_idx_zero});
+      rewriter.create<AffineStoreOp>(loc, zeroval, alloc,
+                                     ValueRange{cst_idx_zero});
+
+      affine::AffineForOp forOp2 =
+          rewriter.create<AffineForOp>(loc, lb, FilterLength, step);
+      auto iv2 = forOp2.getInductionVar();
+
+      rewriter.setInsertionPointToStart(forOp2.getBody());
+
+      auto ifOp = rewriter.create<affine::AffineIfOp>(
+          loc, set1, ValueRange{cst_idx_zero, iv2}, false /*no else*/);
+      rewriter.setInsertionPointToStart(ifOp.getThenBlock());
+
+      Value inputX = rewriter.create<AffineLoadOp>(
+          loc, lfr2fpAdaptor.getLhs(), addMapForLMSFilter,
+          ValueRange{cst_idx_zero, iv2});
+      Value w = rewriter.create<AffineLoadOp>(loc, wAlloc,
+                                              ValueRange{iv2}); // memRefType
+
+      Value wmulx = rewriter.create<arith::MulFOp>(loc, inputX, w);
+      Value ybefore =
+          rewriter.create<AffineLoadOp>(loc, alloc, ValueRange{cst_idx_zero});
+      Value sumNext = rewriter.create<arith::AddFOp>(loc, wmulx, ybefore);
+      rewriter.create<AffineStoreOp>(loc, sumNext, alloc,
+                                     ValueRange{cst_idx_zero});
+      rewriter.setInsertionPointAfter(ifOp);
+      rewriter.setInsertionPointAfter(forOp2);
+
+      //  get e[n] = d[n] - y[n]
+
+      Value desiredX = rewriter.create<AffineLoadOp>(
+          loc, lfr2fpAdaptor.getRhs(), ValueRange{cst_idx_zero});
+      Value ynew =
+          rewriter.create<AffineLoadOp>(loc, alloc, ValueRange{cst_idx_zero});
+
+      Value err = rewriter.create<arith::SubFOp>(loc, desiredX, ynew);
+
+      affine::AffineForOp forOp3 =
+          rewriter.create<AffineForOp>(loc, lb, FilterLength, step);
+      auto iv3 = forOp3.getInductionVar();
+
+      rewriter.setInsertionPointToStart(forOp3.getBody());
+
+      auto ifOp2 = rewriter.create<affine::AffineIfOp>(
+          loc, set1, ValueRange{cst_idx_zero, iv3}, false /*no else*/);
+      rewriter.setInsertionPointToStart(ifOp2.getThenBlock());
+
+      Value inputX2 = rewriter.create<AffineLoadOp>(
+          loc, lfr2fpAdaptor.getLhs(), addMapForLMSFilter,
+          ValueRange{cst_idx_zero, iv3});
+
+      Value Prevw2 =
+          rewriter.create<AffineLoadOp>(loc, wAlloc, ValueRange{iv3});
+
+      // f(u(n),e(n),μ)=μe(n)u∗(n)
+      Value mul1 = rewriter.create<arith::MulFOp>(loc, err, inputX2);
+      Value mul2 = rewriter.create<arith::MulFOp>(loc, mu, mul1);
+
+      // FInal w[n]
+      Value answer = rewriter.create<arith::AddFOp>(loc, Prevw2, mul2);
+
+      rewriter.create<AffineStoreOp>(loc, answer, wAlloc, ValueRange{iv3});
+
+      rewriter.setInsertionPointAfter(ifOp2);
+      rewriter.setInsertionPointAfter(forOp3);
+    }
+
+    {
+      // w[n] = 0;
+      // y[n] = 0;
+      // rewriter.create<AffineStoreOp>(loc, zeroval, alloc, ValueRange{iv});
+      // Allocate and initialize array for y
+      // Value constantIndx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+      rewriter.create<AffineStoreOp>(loc, zeroval, wAlloc,
+                                     ValueRange{cst_idx_one});
+      rewriter.create<AffineStoreOp>(loc, zeroval, alloc,
+                                     ValueRange{cst_idx_one});
+
+      affine::AffineForOp forOp2 =
+          rewriter.create<AffineForOp>(loc, lb, FilterLength, step);
+      auto iv2 = forOp2.getInductionVar();
+
+      rewriter.setInsertionPointToStart(forOp2.getBody());
+
+      auto ifOp = rewriter.create<affine::AffineIfOp>(
+          loc, set1, ValueRange{cst_idx_one, iv2}, false /*no else*/);
+      rewriter.setInsertionPointToStart(ifOp.getThenBlock());
+
+      Value inputX = rewriter.create<AffineLoadOp>(
+          loc, lfr2fpAdaptor.getLhs(), addMapForLMSFilter,
+          ValueRange{cst_idx_one, iv2});
+      Value w = rewriter.create<AffineLoadOp>(loc, wAlloc,
+                                              ValueRange{iv2}); // memRefType
+
+      Value wmulx = rewriter.create<arith::MulFOp>(loc, inputX, w);
+      Value ybefore =
+          rewriter.create<AffineLoadOp>(loc, alloc, ValueRange{cst_idx_one});
+      Value sumNext = rewriter.create<arith::AddFOp>(loc, wmulx, ybefore);
+      rewriter.create<AffineStoreOp>(loc, sumNext, alloc,
+                                     ValueRange{cst_idx_one});
+      rewriter.setInsertionPointAfter(ifOp);
+      rewriter.setInsertionPointAfter(forOp2);
+
+      //  get e[n] = d[n] - y[n]
+
+      Value desiredX = rewriter.create<AffineLoadOp>(
+          loc, lfr2fpAdaptor.getRhs(), ValueRange{cst_idx_one});
+      Value ynew =
+          rewriter.create<AffineLoadOp>(loc, alloc, ValueRange{cst_idx_one});
+
+      Value err = rewriter.create<arith::SubFOp>(loc, desiredX, ynew);
+
+      affine::AffineForOp forOp3 =
+          rewriter.create<AffineForOp>(loc, lb, FilterLength, step);
+      auto iv3 = forOp3.getInductionVar();
+
+      rewriter.setInsertionPointToStart(forOp3.getBody());
+
+      auto ifOp2 = rewriter.create<affine::AffineIfOp>(
+          loc, set1, ValueRange{cst_idx_one, iv3}, false /*no else*/);
+      rewriter.setInsertionPointToStart(ifOp2.getThenBlock());
+
+      Value inputX2 = rewriter.create<AffineLoadOp>(
+          loc, lfr2fpAdaptor.getLhs(), addMapForLMSFilter,
+          ValueRange{cst_idx_one, iv3});
+
+      Value Prevw2 =
+          rewriter.create<AffineLoadOp>(loc, wAlloc, ValueRange{iv3});
+
+      // f(u(n),e(n),μ)=μe(n)u∗(n)
+      Value mul1 = rewriter.create<arith::MulFOp>(loc, err, inputX2);
+      Value mul2 = rewriter.create<arith::MulFOp>(loc, mu, mul1);
+
+      // FInal w[n]
+      Value answer = rewriter.create<arith::AddFOp>(loc, Prevw2, mul2);
+
+      rewriter.create<AffineStoreOp>(loc, answer, wAlloc, ValueRange{iv3});
+
+      rewriter.setInsertionPointAfter(ifOp2);
+      rewriter.setInsertionPointAfter(forOp3);
+    }
+
+    // Outer for loop -- iterate from 2 to last
+    int64_t lb_outer = 2;
+
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb_outer, numSamples, step);
+    auto iv = forOp1.getInductionVar();
+
+    rewriter.setInsertionPointToStart(forOp1.getBody());
+    // w[n] = 0;
+    // y[n] = 0;
+    // rewriter.create<AffineStoreOp>(loc, zeroval, alloc, ValueRange{iv});
+    // Allocate and initialize array for y
+    // Value constantIndx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+
+    rewriter.create<AffineStoreOp>(loc, zeroval, wAlloc, ValueRange{iv});
+    rewriter.create<AffineStoreOp>(loc, zeroval, alloc, ValueRange{iv});
+
+    affine::AffineForOp forOp2 =
+        rewriter.create<AffineForOp>(loc, lb, FilterLength, step);
+    auto iv2 = forOp2.getInductionVar();
+
+    rewriter.setInsertionPointToStart(forOp2.getBody());
+
+    auto ifOp = rewriter.create<affine::AffineIfOp>(
+        loc, set1, ValueRange{iv, iv2}, false /*no else*/);
+    rewriter.setInsertionPointToStart(ifOp.getThenBlock());
+
+    Value inputX = rewriter.create<AffineLoadOp>(
+        loc, lfr2fpAdaptor.getLhs(), addMapForLMSFilter, ValueRange{iv, iv2});
+    Value w = rewriter.create<AffineLoadOp>(loc, wAlloc,
+                                            ValueRange{iv2}); // memRefType
+
+    Value wmulx = rewriter.create<arith::MulFOp>(loc, inputX, w);
+    Value ybefore = rewriter.create<AffineLoadOp>(loc, alloc, ValueRange{iv});
+    Value sumNext = rewriter.create<arith::AddFOp>(loc, wmulx, ybefore);
+    rewriter.create<AffineStoreOp>(loc, sumNext, alloc, ValueRange{iv});
+    rewriter.setInsertionPointAfter(ifOp);
+    rewriter.setInsertionPointAfter(forOp2);
+
+    //  get e[n] = d[n] - y[n]
+
+    Value desiredX = rewriter.create<AffineLoadOp>(loc, lfr2fpAdaptor.getRhs(),
+                                                   ValueRange{iv});
+    Value ynew = rewriter.create<AffineLoadOp>(loc, alloc, ValueRange{iv});
+
+    Value err = rewriter.create<arith::SubFOp>(loc, desiredX, ynew);
+
+    affine::AffineForOp forOp3 =
+        rewriter.create<AffineForOp>(loc, lb, FilterLength, step);
+    auto iv3 = forOp3.getInductionVar();
+
+    rewriter.setInsertionPointToStart(forOp3.getBody());
+
+    auto ifOp2 = rewriter.create<affine::AffineIfOp>(
+        loc, set1, ValueRange{iv, iv3}, false /*no else*/);
+    rewriter.setInsertionPointToStart(ifOp2.getThenBlock());
+
+    Value inputX2 = rewriter.create<AffineLoadOp>(
+        loc, lfr2fpAdaptor.getLhs(), addMapForLMSFilter, ValueRange{iv, iv3});
+
+    Value Prevw2 = rewriter.create<AffineLoadOp>(loc, wAlloc, ValueRange{iv3});
+
+    // f(u(n),e(n),μ)=μe(n)u∗(n)
+    Value mul1 = rewriter.create<arith::MulFOp>(loc, err, inputX2);
+    Value mul2 = rewriter.create<arith::MulFOp>(loc, mu, mul1);
+
+    // FInal w[n]
+    Value answer = rewriter.create<arith::AddFOp>(loc, Prevw2, mul2);
+
+    rewriter.create<AffineStoreOp>(loc, answer, wAlloc, ValueRange{iv3});
+    rewriter.setInsertionPointAfter(ifOp2);
+    rewriter.setInsertionPointAfter(forOp3);
+
+    // HERE WE SHOULD INSERT FIND_PEAKS FOR FUSING LOOP
+
+    AffineExpr ExprForPrev =
+        rewriter.getAffineDimExpr(0) - rewriter.getAffineConstantExpr(2);
+    AffineMap addMapForPrev = AffineMap::get(1, 0, ExprForPrev);
+
+    AffineExpr ExprForCurrent =
+        rewriter.getAffineDimExpr(0) - rewriter.getAffineConstantExpr(1);
+    AffineMap addMapForCurrent = AffineMap::get(1, 0, ExprForCurrent);
+
+    auto signal_prev = rewriter.create<AffineLoadOp>(loc, alloc, addMapForPrev,
+                                                     ValueRange{iv});
+    auto signal_current = rewriter.create<affine::AffineLoadOp>(
+        loc, alloc, addMapForCurrent, ValueRange{iv});
+    auto signal_next =
+        rewriter.create<AffineLoadOp>(loc, alloc, ValueRange{iv});
+
+    auto cmp_current_prev = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OGT, signal_current, signal_prev);
+    auto cmp_current_next = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OGT, signal_current, signal_next);
+    auto cmp_current_height = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OGE, signal_current, height);
+
+    auto and_two_cmps =
+        rewriter.create<arith::AndIOp>(loc, cmp_current_prev, cmp_current_next);
+    auto and_three_cmps =
+        rewriter.create<arith::AndIOp>(loc, and_two_cmps, cmp_current_height);
+
+    auto firstIfOp =
+        rewriter.create<scf::IfOp>(loc, and_three_cmps, false /* else=1 */);
+    rewriter.setInsertionPointToStart(firstIfOp.thenBlock());
+
+    auto peaks_count = rewriter.create<affine::AffineLoadOp>(
+        loc, alloc_peaks_count, ValueRange{});
+    auto cmp_new_peak = rewriter.create<arith::CmpIOp>(
+        loc, arith::CmpIPredicate::eq, peaks_count, cst_idx_zero);
+
+    auto current_index = rewriter.create<arith::SubIOp>(loc, iv, cst_idx_one);
+
+    auto secondIfOp =
+        rewriter.create<scf::IfOp>(loc, cmp_new_peak, true /* else=1 */);
+    rewriter.setInsertionPointToStart(secondIfOp.thenBlock());
+    Value current_index_to_ui = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), current_index);
+    Value current_index_to_f64 = rewriter.create<arith::UIToFPOp>(
+        loc, rewriter.getF64Type(), current_index_to_ui);
+    rewriter.create<memref::StoreOp>(loc, current_index_to_f64, alloc_output,
+                                     ValueRange{peaks_count});
+    auto peaks_count_inc =
+        rewriter.create<arith::AddIOp>(loc, peaks_count, cst_idx_one);
+    rewriter.create<AffineStoreOp>(loc, peaks_count_inc, alloc_peaks_count,
+                                   ValueRange{});
+
+    rewriter.setInsertionPointToStart(secondIfOp.elseBlock());
+
+    Value last_peaks_count =
+        rewriter.create<arith::SubIOp>(loc, peaks_count, cst_idx_one);
+    auto last_peak_index_fp = rewriter.create<memref::LoadOp>(
+        loc, alloc_output, ValueRange{last_peaks_count});
+    Value last_peak_index_ui = rewriter.create<arith::FPToUIOp>(
+        loc, rewriter.getIntegerType(32), last_peak_index_fp);
+    Value last_peak_index = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getIndexType(), last_peak_index_ui);
+    Value subtract_current_index_last_peak =
+        rewriter.create<arith::SubIOp>(loc, current_index, last_peak_index);
+    auto cmp_sub_distance = rewriter.create<arith::CmpIOp>(
+        loc, arith::CmpIPredicate::sge, subtract_current_index_last_peak,
+        distance);
+
+    auto thirdIfOp =
+        rewriter.create<scf::IfOp>(loc, cmp_sub_distance, true /* else=1 */);
+    rewriter.setInsertionPointToStart(thirdIfOp.thenBlock());
+    Value current_index_to_ui_2 = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), current_index);
+    Value current_index_to_f64_2 = rewriter.create<arith::UIToFPOp>(
+        loc, rewriter.getF64Type(), current_index_to_ui_2);
+    rewriter.create<memref::StoreOp>(loc, current_index_to_f64_2, alloc_output,
+                                     ValueRange{peaks_count});
+    auto peaks_count_inc_2 =
+        rewriter.create<arith::AddIOp>(loc, peaks_count, cst_idx_one);
+    rewriter.create<AffineStoreOp>(loc, peaks_count_inc_2, alloc_peaks_count,
+                                   ValueRange{});
+
+    rewriter.setInsertionPointAfter(forOp1);
+    // debug
+    //  forOp1->dump();
+
+    /* Setting last element of the output as the count of peaks. */
+    auto peaks_count_final = rewriter.create<affine::AffineLoadOp>(
+        loc, alloc_peaks_count, ValueRange{});
+    // index to f64
+    Value peaks_count_final_to_ui = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), peaks_count_final);
+    Value peaks_count_final_to_f64 = rewriter.create<arith::UIToFPOp>(
+        loc, rewriter.getF64Type(), peaks_count_final_to_ui);
+
+    Value result_size = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getIndexType(),
+        rewriter.getIndexAttr(tensorType.getShape()[0]));
+
+    rewriter.create<AffineStoreOp>(loc, peaks_count_final_to_f64, alloc_output,
+                                   addMapForCurrent, ValueRange{result_size});
+
+    // auto testValue = rewriter.create<affine::AffineLoadOp>(
+    // loc, alloc, ValueRange{cst_idx_zero});
+
+    // rewriter.create<AffineStoreOp>(loc, testValue, alloc_output,
+    // addMapForCurrent, ValueRange{result_size});
+
+    rewriter.replaceOp(op, alloc_output);
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: Unary operations
+//===----------------------------------------------------------------------===//
+
+template <typename UnaryOp, typename LoweredUnaryOp>
+struct UnaryOpLowering : public ConversionPattern {
+  UnaryOpLowering(MLIRContext *ctx)
+      : ConversionPattern(UnaryOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+    lowerOpToLoops(op, operands, rewriter,
+                   [loc](OpBuilder &builder, ValueRange memRefOperands,
+                         ValueRange loopIvs) {
+                     // Generate an adaptor for the remapped operands of the
+                     // UnaryOp. This allows for using the nice named accessors
+                     // that are generated by the ODS.
+                     typename UnaryOp::Adaptor unaryAdaptor(memRefOperands);
+
+                     // Generate loads for the element of 'lhs' and 'rhs' at the
+                     // inner loop.
+                     auto loadedInput = builder.create<affine::AffineLoadOp>(
+                         loc, unaryAdaptor.getInput(), loopIvs);
+
+                     // Create the unary operation performed on the loaded
+                     // values.
+                     return builder.create<LoweredUnaryOp>(loc, loadedInput);
+                   });
+    return success();
+  }
+};
+
+using AddOpLowering = BinaryOpLowering<dsp::AddOp, arith::AddFOp>;
+using ModuloOpLowering = BinaryOpLowering<dsp::ModuloOp, arith::RemFOp>;
+using SubOpLowering = BinaryOpLowering<dsp::SubOp, arith::SubFOp>;
+using MulOpLowering = BinaryOpLowering<dsp::MulOp, arith::MulFOp>;
+using DivOpLowering = BinaryOpLowering<dsp::DivOp, arith::DivFOp>;
+using AbsOpLowering = UnaryOpLowering<dsp::AbsOp, math::AbsFOp>;
+using SinOpLowering = UnaryOpLowering<dsp::SinOp, math::SinOp>;
+using CosOpLowering = UnaryOpLowering<dsp::CosOp, math::CosOp>;
+using SqrtOpLowering = UnaryOpLowering<dsp::SqrtOp, math::SqrtOp>;
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: Constant operations
+//===----------------------------------------------------------------------===//
+
+struct ConstantOpLowering : public OpRewritePattern<dsp::ConstantOp> {
+  using OpRewritePattern<dsp::ConstantOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(dsp::ConstantOp op,
+                                PatternRewriter &rewriter) const final {
+    DenseElementsAttr constantValue = op.getValue();
+    Location loc = op.getLoc();
+
+    // When lowering the constant operation, we allocate and assign the constant
+    // values to a corresponding memref allocation.
+    auto tensorType = llvm::cast<RankedTensorType>(op.getType());
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // We will be generating constant indices up-to the largest dimension.
+    // Create these constants up-front to avoid large amounts of redundant
+    // operations.
+    auto valueShape = memRefType.getShape();
+    SmallVector<Value, 8> constantIndices;
+
+    if (!valueShape.empty()) {
+      for (auto i : llvm::seq<int64_t>(
+               0, *std::max_element(valueShape.begin(), valueShape.end())))
+        constantIndices.push_back(
+            rewriter.create<arith::ConstantIndexOp>(loc, i));
+    } else {
+      // This is the case of a tensor of rank 0.
+      constantIndices.push_back(
+          rewriter.create<arith::ConstantIndexOp>(loc, 0));
+    }
+
+    // The constant operation represents a multi-dimensional constant, so we
+    // will need to generate a store for each of the elements. The following
+    // functor recursively walks the dimensions of the constant shape,
+    // generating a store when the recursion hits the base case.
+    SmallVector<Value, 2> indices;
+    auto valueIt = constantValue.value_begin<FloatAttr>();
+    std::function<void(uint64_t)> storeElements = [&](uint64_t dimension) {
+      // The last dimension is the base case of the recursion, at this point
+      // we store the element at the given index.
+      if (dimension == valueShape.size()) {
+        rewriter.create<affine::AffineStoreOp>(
+            loc, rewriter.create<arith::ConstantOp>(loc, *valueIt++), alloc,
+            llvm::ArrayRef(indices));
+        return;
+      }
+
+      // Otherwise, iterate over the current dimension and add the indices to
+      // the list.
+      for (uint64_t i = 0, e = valueShape[dimension]; i != e; ++i) {
+        indices.push_back(constantIndices[i]);
+        storeElements(dimension + 1);
+        indices.pop_back();
+      }
+    };
+
+    // Start the element storing recursion from the first dimension.
+    storeElements(/*dimension=*/0);
+
+    // Replace this operation with the generated alloc.
+    rewriter.replaceOp(op, alloc);
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: Func operations
+//===----------------------------------------------------------------------===//
+
+struct FuncOpLowering : public OpConversionPattern<dsp::FuncOp> {
+  using OpConversionPattern<dsp::FuncOp>::OpConversionPattern;
+
+  LogicalResult
+  matchAndRewrite(dsp::FuncOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const final {
+    // We only lower the main function as we expect that all other functions
+    // have been inlined.
+    if (op.getName() != "main")
+      return failure();
+
+    // Verify that the given main has no inputs and results.
+    if (op.getNumArguments() || op.getFunctionType().getNumResults()) {
+      return rewriter.notifyMatchFailure(op, [](Diagnostic &diag) {
+        diag << "expected 'main' to have 0 inputs and 0 results";
+      });
+    }
+
+    // Create a new non-dsp function, with the same region.
+    auto func = rewriter.create<mlir::func::FuncOp>(op.getLoc(), op.getName(),
+                                                    op.getFunctionType());
+    rewriter.inlineRegionBefore(op.getRegion(), func.getBody(), func.end());
+    rewriter.eraseOp(op);
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: Print operations
+//===----------------------------------------------------------------------===//
+
+struct PrintOpLowering : public OpConversionPattern<dsp::PrintOp> {
+  using OpConversionPattern<dsp::PrintOp>::OpConversionPattern;
+
+  LogicalResult
+  matchAndRewrite(dsp::PrintOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const final {
+    // We don't lower "dsp.print" in this pass, but we need to update its
+    // operands.
+    rewriter.modifyOpInPlace(op,
+                             [&] { op->setOperands(adaptor.getOperands()); });
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: Return operations
+//===----------------------------------------------------------------------===//
+
+struct ReturnOpLowering : public OpRewritePattern<dsp::ReturnOp> {
+  using OpRewritePattern<dsp::ReturnOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(dsp::ReturnOp op,
+                                PatternRewriter &rewriter) const final {
+    // During this lowering, we expect that all function calls have been
+    // inlined.
+    if (op.hasOperand())
+      return failure();
+
+    // We lower "dsp.return" directly to "func.return".
+    rewriter.replaceOpWithNewOp<func::ReturnOp>(op);
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: Transpose operations
+//===----------------------------------------------------------------------===//
+
+struct TransposeOpLowering : public ConversionPattern {
+  TransposeOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::TransposeOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+    lowerOpToLoops(op, operands, rewriter,
+                   [loc](OpBuilder &builder, ValueRange memRefOperands,
+                         ValueRange loopIvs) {
+                     // Generate an adaptor for the remapped operands of the
+                     // TransposeOp. This allows for using the nice named
+                     // accessors that are generated by the ODS.
+                     dsp::TransposeOpAdaptor transposeAdaptor(memRefOperands);
+                     Value input = transposeAdaptor.getInput();
+
+                     // Transpose the elements by generating a load from the
+                     // reverse indices.
+                     SmallVector<Value, 2> reverseIvs(llvm::reverse(loopIvs));
+                     return builder.create<affine::AffineLoadOp>(loc, input,
+                                                                 reverseIvs);
+                   });
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: Transpose operations
+//===----------------------------------------------------------------------===//
+
+struct Conv2DOpLowering : public ConversionPattern {
+  Conv2DOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::Conv2DOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+
+    auto loc = op->getLoc();
+    // output mem alloc and dealloc
+    auto output = llvm::dyn_cast<RankedTensorType>((*op->result_type_begin()));
+    auto outputMem = convertTensorToMemRef(output);
+    auto alloc = insertAllocAndDealloc(outputMem, loc, rewriter);
+
+    Conv2DOpAdaptor conv2dAdaptor(operands);
+    Value input = conv2dAdaptor.getInput();
+    Value kernel = conv2dAdaptor.getKernel();
+
+    // ranked tensor type
+    auto inputType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(0).getType());
+    auto kernelType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(1).getType());
+
+    ArrayRef<int64_t> inputShape = inputType.getShape();
+    ArrayRef<int64_t> kernelShape = kernelType.getShape();
+
+    // input layout
+    int64_t IH = inputShape[0];
+    int64_t IW = inputShape[1];
+
+    // kernel layout
+    int64_t KH = kernelShape[0];
+    int64_t KW = kernelShape[1];
+
+    // output layout
+    ArrayRef<int64_t> outputShape = output.getShape();
+    int64_t OH = outputShape[0];
+    int64_t OW = outputShape[1];
+
+    AffineExpr d0, d1, d2, d3; // declare affine expression: i, j, p, q
+    bindDims(
+        rewriter.getContext(), d0, d1, d2,
+        d3); // bind affine expr d0, d1 to current input dimension i, j, p, q
+
+    // input affine map
+    AffineMap inputMap = AffineMap::get(
+        4, 0, ArrayRef<AffineExpr>{d0 + d2, d1 + d3}, rewriter.getContext());
+    // kernel affine map
+    AffineMap kernelMap = AffineMap::get(4, 0, ArrayRef<AffineExpr>{d2, d3},
+                                         rewriter.getContext());
+
+    // loops
+    int64_t lb = 0, step = 1;
+    /* looping i*/
+    AffineForOp forOpI = rewriter.create<AffineForOp>(loc, lb, OH, step);
+    rewriter.setInsertionPointToStart(forOpI.getBody());
+    auto ivI = forOpI.getInductionVar();
+
+    /* looping j*/
+    AffineForOp forOpJ = rewriter.create<AffineForOp>(loc, lb, OW, step);
+    rewriter.setInsertionPointToStart(forOpJ.getBody());
+    auto ivJ = forOpJ.getInductionVar();
+
+    // initilize output val
+    Value zeroVal = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    rewriter.create<AffineStoreOp>(loc, zeroVal, alloc, ValueRange{ivI, ivJ});
+
+    /* looping p*/
+    AffineForOp forOpP = rewriter.create<AffineForOp>(loc, lb, KH, step);
+    rewriter.setInsertionPointToStart(forOpP.getBody());
+    auto ivP = forOpP.getInductionVar();
+
+    /* looping q*/
+    AffineForOp forOpQ = rewriter.create<AffineForOp>(loc, lb, KW, step);
+    rewriter.setInsertionPointToStart(forOpQ.getBody());
+    auto ivQ = forOpQ.getInductionVar();
+
+    // input bound check
+    Value inputRow = rewriter.create<AffineApplyOp>(
+        loc, inputMap.getSubMap(0), ValueRange{ivI, ivJ, ivP, ivQ});
+    Value inputCol = rewriter.create<AffineApplyOp>(
+        loc, inputMap.getSubMap(1), ValueRange{ivI, ivJ, ivP, ivQ});
+    Value rowUB = rewriter.create<arith::CmpIOp>(
+        loc, arith::CmpIPredicate::slt, inputRow,
+        rewriter.create<arith::ConstantIndexOp>(loc, IH));
+    Value colUB = rewriter.create<arith::CmpIOp>(
+        loc, arith::CmpIPredicate::slt, inputCol,
+        rewriter.create<arith::ConstantIndexOp>(loc, IW));
+    Value bound = rewriter.create<arith::AndIOp>(loc, rowUB, colUB);
+
+    // bound condition
+    rewriter.create<scf::IfOp>(
+        loc, bound, [&](OpBuilder &builder, Location loc) {
+          // load input
+          Value inputVal = builder.create<AffineLoadOp>(
+              loc, input, inputMap, ValueRange{ivI, ivJ, ivP, ivQ});
+          Value kernelVal = builder.create<AffineLoadOp>(
+              loc, kernel, kernelMap, ValueRange{ivI, ivJ, ivP, ivQ});
+          // mul
+          Value prod = builder.create<arith::MulFOp>(loc, inputVal, kernelVal);
+          Value outputVal =
+              builder.create<AffineLoadOp>(loc, alloc, ValueRange{ivI, ivJ});
+          Value sum = builder.create<arith::AddFOp>(loc, prod, outputVal);
+
+          // store the computed output
+          builder.create<AffineStoreOp>(loc, sum, alloc, ValueRange{ivI, ivJ});
+
+          builder.create<scf::YieldOp>(loc);
+        });
+
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+}; // conv2d
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: ThresholdUpOpLowering operations
+//===----------------------------------------------------------------------===//
+
+struct ThresholdUpOpLowering : public ConversionPattern {
+  ThresholdUpOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::ThresholdUpOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // Pseudo-code:
+    //   y[n] = 1 , if a[i] >= threshld
+    //     = 0 , else
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    Value constant0 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+
+    Value constant1 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1));
+
+    // y[n] = a[n] , if a[i] >= threshld
+    // loop from 0 to len
+
+    // load from X,
+    ThresholdUpOpAdaptor thresholdUpAdaptor(operands);
+    auto input = thresholdUpAdaptor.getInput();
+    auto thresholdMemRef = thresholdUpAdaptor.getThreshold();
+    auto returnOriginalMemRef = thresholdUpAdaptor.getReturnoriginal();
+
+    int64_t lb = 0;
+    int64_t ub = tensorType.getShape()[0];
+    int64_t step = 1;
+
+    // for loop from 0 to len(Output)
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    rewriter.setInsertionPointToStart(forOpY.getBody());
+    auto ivY = forOpY.getInductionVar();
+
+    Value inputX =
+        rewriter.create<AffineLoadOp>(loc, input, ValueRange{ivY});
+
+    // Load the threshold value from the memref
+    auto threshold =
+        rewriter.create<AffineLoadOp>(loc, thresholdMemRef, ValueRange{});
+    auto returnOriginal =
+        rewriter.create<AffineLoadOp>(loc, returnOriginalMemRef, ValueRange{});
+
+    // Compare a[i] >= threshold
+    auto cmp1 = rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OGE,
+                                               inputX, threshold);
+    // Compare if return original is true or false and return 1 or original
+    // value
+    auto cmpro = rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OEQ,
+                                                constant1, returnOriginal);
+
+    // Use select to choose between inputX and 1
+    auto selectreturn =
+        rewriter.create<arith::SelectOp>(loc, cmpro, inputX, constant1);
+
+    // Use select to choose between 0 and selectreturn
+    auto selectOp =
+        rewriter.create<arith::SelectOp>(loc, cmp1, selectreturn, constant0);
+
+    // Store the result
+    rewriter.create<AffineStoreOp>(loc, selectOp, alloc, ValueRange{ivY});
+
+    rewriter.setInsertionPointAfter(forOpY);
+    // debug
+    //  forOpY->dump();
+    //  affine.store %cst, %alloc_10[] : memref<f64>
+    //  %0 = affine.load %alloc_11[4] : memref<10xf64>
+    //  affine.store %0, %alloc[0] : memref<1xf64>
+
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: GenerateDTMFOpLowering operations
+//===----------------------------------------------------------------------===//
+
+struct GenerateDTMFOpLowering : public ConversionPattern {
+  GenerateDTMFOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::GenerateDTMFOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    GenerateDTMFOpAdaptor generatedtmfAdaptor(operands);
+    std::vector<std::vector<int64_t>> freqPairs = {
+        {941, 1336}, {697, 1209}, {697, 1336}, {697, 1477}, {770, 1209},
+        {770, 1336}, {770, 1477}, {852, 1209}, {852, 1336}, {852, 1477}};
+
+    auto GetDigitInput = op->getOperand(0);
+    auto inputvl = GetDigitInput.getDefiningOp<dsp::ConstantOp>();
+    auto inputvalue = inputvl.getValue();
+    auto elements1 = inputvalue.getValues<FloatAttr>();
+    float input = elements1[0].getValueAsDouble();
+
+    auto GetDurationOp = op->getOperand(1);
+    auto constantOp2ndArg = GetDurationOp.getDefiningOp<dsp::ConstantOp>();
+    auto constant2ndValue = constantOp2ndArg.getValue();
+    auto elements2 = constant2ndValue.getValues<FloatAttr>();
+    float duration = elements2[0].getValueAsDouble();
+
+    auto GetFreqOp = op->getOperand(2);
+    auto constantOp3rdArg = GetFreqOp.getDefiningOp<dsp::ConstantOp>();
+    auto constant3rdValue = constantOp3rdArg.getValue();
+    auto elements3 = constant3rdValue.getValues<FloatAttr>();
+    float freq = elements3[0].getValueAsDouble();
+
+    const std::vector<int64_t> &pair = freqPairs[input];
+    auto f1 = pair[0];
+    auto f2 = pair[1];
+    auto ub = tensorType.getShape()[0];
+    auto step = 1;
+
+    // Create constants
+    auto const2pi = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(6.28318530718));
+    auto const10 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(10));
+    auto constFs = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(freq));
+    auto constF1 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(f1));
+    auto constF2 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(f2));
+
+    // Create a loop to generate the DTMF tone
+    auto forOp = rewriter.create<scf::ForOp>(
+        loc, rewriter.create<arith::ConstantIndexOp>(loc, 0),
+        rewriter.create<arith::ConstantIndexOp>(loc, ub),
+        rewriter.create<arith::ConstantIndexOp>(loc, 1));
+
+    rewriter.setInsertionPointToStart(forOp.getBody());
+
+    // Get the loop induction variable
+    auto iv = forOp.getInductionVar();
+
+    // Convert loop index to time
+    auto indexToI64 =
+        rewriter.create<arith::IndexCastOp>(loc, rewriter.getI64Type(), iv);
+    auto indexToFloat = rewriter.create<arith::SIToFPOp>(
+        loc, rewriter.getF64Type(), indexToI64);
+    auto time = rewriter.create<arith::DivFOp>(loc, indexToFloat, constFs);
+
+    // Generate sine wave for f1
+    auto mulFreqTime1 = rewriter.create<arith::MulFOp>(loc, constF1, time);
+    auto mul2Pi1 = rewriter.create<arith::MulFOp>(loc, const2pi, mulFreqTime1);
+    auto sine1 = rewriter.create<math::SinOp>(loc, mul2Pi1);
+
+    // Generate sine wave for f2
+    auto mulFreqTime2 = rewriter.create<arith::MulFOp>(loc, constF2, time);
+    auto mul2Pi2 = rewriter.create<arith::MulFOp>(loc, const2pi, mulFreqTime2);
+    auto sine2 = rewriter.create<math::SinOp>(loc, mul2Pi2);
+
+    // Combine the two sine waves
+    auto sumSines = rewriter.create<arith::AddFOp>(loc, sine1, sine2);
+    auto scaledSum = rewriter.create<arith::MulFOp>(loc, const10, sumSines);
+
+    // Store the result in the allocated memref
+    rewriter.create<memref::StoreOp>(loc, scaledSum, alloc, iv);
+
+    rewriter.setInsertionPointAfter(forOp);
+
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: FFTFreqOpLowering operations
+//===----------------------------------------------------------------------===//
+
+struct FFTFreqOpLowering : public ConversionPattern {
+  FFTFreqOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::FFTFreqOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // Get the result type of the operation
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // Extract the operands
+    auto n = op->getOperand(0);
+    auto nArg = n.getDefiningOp<dsp::ConstantOp>();
+    auto nValue = nArg.getValue();
+    auto elements0 = nValue.getValues<FloatAttr>();
+    float nDouble = elements0[0].getValueAsDouble();
+
+    auto d = op->getOperand(1);
+    auto dArg = d.getDefiningOp<dsp::ConstantOp>();
+    auto dValue = dArg.getValue();
+    auto elements1 = dValue.getValues<FloatAttr>();
+    float dDouble = elements1[0].getValueAsDouble();
+
+    // Create constants
+    auto constN = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(nDouble));
+    auto constD = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(dDouble));
+
+    auto lb = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    auto ub =
+        rewriter.create<arith::ConstantIndexOp>(loc, tensorType.getShape()[0]);
+    auto step = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+
+    auto NtimesD = rewriter.create<arith::MulFOp>(loc, constN, constD);
+    auto half = rewriter.create<arith::ConstantFloatOp>(loc, llvm::APFloat(0.5),
+                                                        rewriter.getF64Type());
+    auto one = rewriter.create<arith::ConstantFloatOp>(loc, llvm::APFloat(1.0),
+                                                       rewriter.getF64Type());
+    auto nPlusOne = rewriter.create<arith::SubFOp>(loc, constN, one);
+    auto nPlusOneByTwo = rewriter.create<arith::MulFOp>(loc, nPlusOne, half);
+
+    auto forOp = rewriter.create<scf::ForOp>(loc, lb, ub, step);
+    rewriter.setInsertionPointToStart(forOp.getBody());
+    auto iv = forOp.getInductionVar();
+    auto ivInt =
+        rewriter.create<arith::IndexCastOp>(loc, rewriter.getI64Type(), iv);
+    auto ivFloat =
+        rewriter.create<arith::SIToFPOp>(loc, rewriter.getF64Type(), ivInt);
+
+    auto ifCondition = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OLE, ivFloat, nPlusOneByTwo);
+    auto ifOp = rewriter.create<scf::IfOp>(
+        loc, TypeRange{rewriter.getF64Type()}, ifCondition, true);
+
+    rewriter.setInsertionPointToStart(ifOp.thenBlock());
+    auto freq = rewriter.create<arith::DivFOp>(loc, ivFloat, NtimesD);
+    rewriter.create<memref::StoreOp>(loc, freq, alloc, ValueRange{iv});
+    rewriter.create<scf::YieldOp>(loc, ValueRange{freq});
+
+    rewriter.setInsertionPointToStart(ifOp.elseBlock());
+    auto ivminusN = rewriter.create<arith::SubFOp>(loc, ivFloat, constN);
+    auto negfreq = rewriter.create<arith::DivFOp>(loc, ivminusN, NtimesD);
+    rewriter.create<memref::StoreOp>(loc, negfreq, alloc, ValueRange{iv});
+    rewriter.create<scf::YieldOp>(loc, ValueRange{negfreq});
+
+    rewriter.setInsertionPointAfter(ifOp);
+
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: FindDominantPeaksOpLowering operations
+//===----------------------------------------------------------------------===//
+
+struct FindDominantPeaksOpLowering : public ConversionPattern {
+  FindDominantPeaksOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::FindDominantPeaksOp::getOperationName(), 1,
+                          ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    auto frequencyOperand = op->getOperand(0);
+    auto frequenciesType =
+        llvm::dyn_cast<RankedTensorType>(frequencyOperand.getType());
+    auto frequenciesLength = frequenciesType.getNumElements();
+
+    auto frequenciesLengthIndex = rewriter.create<arith::ConstantIndexOp>(loc, frequenciesLength);
+    auto frequenciesLengthI64 = rewriter.create<arith::IndexCastOp>(loc, rewriter.getI64Type(), frequenciesLengthIndex);
+
+    auto frequenciesLengthF64 = rewriter.create<arith::SIToFPOp>(loc, 
+    rewriter.getF64Type(), // frequenciesLength);
+    frequenciesLengthI64  
+    );
+
+    auto two = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(2.0));
+
+    auto frequenciesHalfLength = rewriter.create<arith::DivFOp>(loc, frequenciesLengthF64, two);
+
+    auto frequenciesHalfLengthI32 = rewriter.create<arith::FPToUIOp>(loc, rewriter.getIntegerType(32), frequenciesHalfLength);
+    auto frequenciesHalfLengthIndex = rewriter.create<arith::IndexCastOp>(loc, rewriter.getIndexType(), frequenciesHalfLengthI32);
+    // Value length_ui = rewriter.create<arith::FPToUIOp>(
+    //     loc, rewriter.getIntegerType(32), loadedLength);
+    // Value length_index = rewriter.create<arith::IndexCastOp>(
+    //     loc, rewriter.getIndexType(), length_ui);
+
+    FindDominantPeaksOpAdaptor findDominantPeaksOpAdaptor(operands);
+    auto frequencies = findDominantPeaksOpAdaptor.getFrequencies();
+    auto magnitudes = findDominantPeaksOpAdaptor.getMagnitudes();
+
+    // Initialize variables to track the two highest magnitudes and their
+    // corresponding frequencies
+    auto max1 = rewriter.create<arith::ConstantFloatOp>(loc, llvm::APFloat(0.0),
+                                                        rewriter.getF64Type());
+    auto max2 = rewriter.create<arith::ConstantFloatOp>(loc, llvm::APFloat(0.0),
+                                                        rewriter.getF64Type());
+    auto freq1 = rewriter.create<arith::ConstantFloatOp>(
+        loc, llvm::APFloat(0.0), rewriter.getF64Type());
+    auto freq2 = rewriter.create<arith::ConstantFloatOp>(
+        loc, llvm::APFloat(0.0), rewriter.getF64Type());
+
+    auto lb = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    auto ub = frequenciesHalfLengthIndex; // rewriter.create<arith::ConstantIndexOp>(loc, frequenciesLength);
+    auto step = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+
+    auto forOp = rewriter.create<scf::ForOp>(
+        loc, lb, ub, step, ValueRange{max1, max2, freq1, freq2});
+    rewriter.setInsertionPointToStart(forOp.getBody());
+    auto iv = forOp.getInductionVar();
+    // Load current frequency and magnitude
+    auto currentFreq =
+        rewriter.create<memref::LoadOp>(loc, frequencies, ValueRange{iv});
+    auto currentMag =
+        rewriter.create<memref::LoadOp>(loc, magnitudes, ValueRange{iv});
+
+    // Check if frequency is positive
+    auto zero = rewriter.create<arith::ConstantFloatOp>(loc, llvm::APFloat(0.0),
+                                                        rewriter.getF64Type());
+    auto isPositive = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OGE, currentFreq, zero);
+
+    // Create if operation for positive frequency check
+    auto ifOp = rewriter.create<scf::IfOp>(loc, forOp.getResultTypes(),
+                                           isPositive, true);
+    rewriter.setInsertionPointToStart(&ifOp.getThenRegion().front());
+    // Compare current magnitude with max1
+    auto cmpMax1 = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OGT, currentMag,
+        forOp.getRegionIterArgs()[0]);
+    auto ifMax1 =
+        rewriter.create<scf::IfOp>(loc, forOp.getResultTypes(), cmpMax1, true);
+
+    rewriter.setInsertionPointToStart(&ifMax1.getThenRegion().front());
+    // Update max2 and freq2 with previous max1 and freq1
+    auto newMax2 = forOp.getRegionIterArgs()[0];
+    auto newFreq2 = forOp.getRegionIterArgs()[2];
+    // Update max1 and freq1 with current values
+    auto newMax1 = currentMag;
+    auto newFreq1 = currentFreq;
+    rewriter.create<scf::YieldOp>(
+        loc, ValueRange({newMax1, newMax2, newFreq1, newFreq2}));
+
+    rewriter.setInsertionPointToStart(&ifMax1.getElseRegion().front());
+    // Compare current magnitude with max2
+    auto cmpMax2 = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OGT, currentMag,
+        forOp.getRegionIterArgs()[1]);
+    auto ifMax2 =
+        rewriter.create<scf::IfOp>(loc, forOp.getResultTypes(), cmpMax2, true);
+
+    rewriter.setInsertionPointToStart(&ifMax2.getThenRegion().front());
+    // Update max2 and freq2 with current values
+    rewriter.create<scf::YieldOp>(
+        loc, ValueRange{forOp.getRegionIterArgs()[0], currentMag,
+                        forOp.getRegionIterArgs()[2], currentFreq});
+
+    rewriter.setInsertionPointToStart(&ifMax2.getElseRegion().front());
+    // No update, yield original values
+    rewriter.create<scf::YieldOp>(loc, forOp.getRegionIterArgs());
+
+    rewriter.setInsertionPointAfter(ifMax2);
+    rewriter.create<scf::YieldOp>(loc, ifMax2.getResults());
+
+    rewriter.setInsertionPointAfter(ifMax1);
+    rewriter.create<scf::YieldOp>(loc, ifMax1.getResults());
+
+    rewriter.setInsertionPointToStart(&ifOp.getElseRegion().front());
+    // No update for negative frequencies, yield original values
+    rewriter.create<scf::YieldOp>(loc, forOp.getRegionIterArgs());
+
+    rewriter.setInsertionPointAfter(ifOp);
+    rewriter.create<scf::YieldOp>(loc, ifOp.getResults());
+
+    rewriter.setInsertionPointAfter(forOp);
+
+    // Compare freq1 and freq2 to determine the order
+    auto cmpFreq = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OLT, forOp.getResult(2), forOp.getResult(3));
+
+    auto ifFreq = rewriter.create<scf::IfOp>(
+        loc, TypeRange{rewriter.getF64Type(), rewriter.getF64Type()}, cmpFreq,
+        true);
+
+    rewriter.setInsertionPointToStart(&ifFreq.getThenRegion().front());
+    // freq1 < freq2, so keep the order
+    rewriter.create<scf::YieldOp>(
+        loc, ValueRange{forOp.getResult(2), forOp.getResult(3)});
+
+    rewriter.setInsertionPointToStart(&ifFreq.getElseRegion().front());
+    // freq1 >= freq2, so swap the order
+    rewriter.create<scf::YieldOp>(
+        loc, ValueRange{forOp.getResult(3), forOp.getResult(2)});
+
+    rewriter.setInsertionPointAfter(ifFreq);
+
+    // Store the two highest peak frequencies in the result memref, now in the
+    // correct order
+    auto storeFreq1 = rewriter.create<memref::StoreOp>(
+        loc, ifFreq.getResult(0), alloc,
+        ValueRange{rewriter.create<arith::ConstantIndexOp>(loc, 0)});
+    auto storeFreq2 = rewriter.create<memref::StoreOp>(
+        loc, ifFreq.getResult(1), alloc,
+        ValueRange{rewriter.create<arith::ConstantIndexOp>(loc, 1)});
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: RecoverDTMFDigitOpLowering operations
+//===----------------------------------------------------------------------===//
+
+struct RecoverDTMFDigitOpLowering : public ConversionPattern {
+  RecoverDTMFDigitOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::RecoverDTMFDigitOp::getOperationName(), 1, ctx) {
+  }
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    auto indexMemRefType = MemRefType::get({}, rewriter.getIndexType());
+    auto finalMatchIndex_alloc =
+        insertAllocAndDealloc(indexMemRefType, loc, rewriter);
+
+    RecoverDTMFDigitOpAdaptor recoverDTMFDigitOpAdaptor(operands);
+
+    auto frequencies = recoverDTMFDigitOpAdaptor.getFrequencies();
+    auto freqPairs = recoverDTMFDigitOpAdaptor.getFreqPairs();
+
+    auto highFreqIndex = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    auto lowFreqIndex = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+
+    auto highFreq = rewriter.create<memref::LoadOp>(loc, frequencies,
+                                                    ValueRange{highFreqIndex});
+    auto lowFreq = rewriter.create<memref::LoadOp>(loc, frequencies,
+                                                   ValueRange{lowFreqIndex});
+
+    auto initialMatchIndex = rewriter.create<arith::ConstantIndexOp>(loc, -1);
+    rewriter.create<AffineStoreOp>(loc, initialMatchIndex,
+                                   finalMatchIndex_alloc, ValueRange{});
+
+    auto tolerance = rewriter.create<arith::ConstantFloatOp>(
+        loc, llvm::APFloat(3.0), rewriter.getF64Type());
+
+    auto lb = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    auto ub = rewriter.create<arith::ConstantIndexOp>(loc, 10);
+    auto step = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+
+    auto forOp = rewriter.create<scf::ForOp>(loc, lb, ub, step);
+    rewriter.setInsertionPointToStart(forOp.getBody());
+    auto iv = forOp.getInductionVar();
+
+    auto matchIndex = rewriter.create<memref::LoadOp>(
+        loc, finalMatchIndex_alloc, ValueRange{});
+
+    auto highFreqOg = rewriter.create<memref::LoadOp>(
+        loc, freqPairs, ValueRange{iv, highFreqIndex});
+    auto lowFreqOg = rewriter.create<memref::LoadOp>(
+        loc, freqPairs, ValueRange{iv, lowFreqIndex});
+
+    auto highFreqDiff =
+        rewriter.create<arith::SubFOp>(loc, highFreqOg, highFreq);
+    auto lowFreqDiff = rewriter.create<arith::SubFOp>(loc, lowFreqOg, lowFreq);
+
+    auto absHighFreqDiff = rewriter.create<math::AbsFOp>(loc, highFreqDiff);
+    auto absLowFreqDiff = rewriter.create<math::AbsFOp>(loc, lowFreqDiff);
+
+    auto highFreqMatch = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OLE, absHighFreqDiff, tolerance);
+    auto lowFreqMatch = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OLE, absLowFreqDiff, tolerance);
+    auto bothMatch =
+        rewriter.create<arith::AndIOp>(loc, highFreqMatch, lowFreqMatch);
+
+    auto newMatchIndex =
+        rewriter.create<arith::SelectOp>(loc, bothMatch, iv, matchIndex);
+
+    rewriter.create<memref::StoreOp>(loc, newMatchIndex, finalMatchIndex_alloc,
+                                     ValueRange{});
+
+    rewriter.setInsertionPointAfter(forOp);
+
+    auto finalMatchIndex = rewriter.create<memref::LoadOp>(
+        loc, finalMatchIndex_alloc, ValueRange{});
+
+    auto finalMatchIndexI64 = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getI64Type(), finalMatchIndex);
+    auto finalMatchIndexF64 = rewriter.create<arith::SIToFPOp>(
+        loc, rewriter.getF64Type(), finalMatchIndexI64);
+
+    auto zero = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    rewriter.create<memref::StoreOp>(loc, finalMatchIndexF64, alloc,
+                                     ValueRange{zero});
+
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+// Store finalMatchIndexF64 into alloc
+// auto zero = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+// rewriter.create<memref::StoreOp>(loc, finalMatchIndexF64, alloc,
+// ValueRange{zero});
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: GenerateVoiceSignatureOpLowering operations
+//===----------------------------------------------------------------------===//
+
+struct GenerateVoiceSignatureOpLowering : public ConversionPattern {
+  GenerateVoiceSignatureOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::GenerateVoiceSignatureOp::getOperationName(), 1,
+                          ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    auto GetF1Op = op->getOperand(0);
+    auto constantOp0thArg = GetF1Op.getDefiningOp<dsp::ConstantOp>();
+    auto constant0thValue = constantOp0thArg.getValue();
+    auto elements0 = constant0thValue.getValues<FloatAttr>();
+    float f1 = elements0[0].getValueAsDouble();
+
+    auto GetF2Op = op->getOperand(1);
+    auto constantOp1stArg = GetF2Op.getDefiningOp<dsp::ConstantOp>();
+    auto constant1stValue = constantOp1stArg.getValue();
+    auto elements1 = constant1stValue.getValues<FloatAttr>();
+    float f2 = elements1[0].getValueAsDouble();
+
+    auto GetDurationOp = op->getOperand(2);
+    auto constantOp2ndArg = GetDurationOp.getDefiningOp<dsp::ConstantOp>();
+    auto constant2ndValue = constantOp2ndArg.getValue();
+    auto elements2 = constant2ndValue.getValues<FloatAttr>();
+    float duration = elements2[0].getValueAsDouble();
+
+    auto GetFreqOp = op->getOperand(3);
+    auto constantOp3rdArg = GetFreqOp.getDefiningOp<dsp::ConstantOp>();
+    auto constant3rdValue = constantOp3rdArg.getValue();
+    auto elements3 = constant3rdValue.getValues<FloatAttr>();
+    float freq = elements3[0].getValueAsDouble();
+
+    auto lb = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    auto ub =
+        rewriter.create<arith::ConstantIndexOp>(loc, tensorType.getShape()[0]);
+    auto step = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+
+    // Create constants
+    auto const2pi = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(6.28318530718));
+    auto const05 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0.5));
+    auto constFs = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(freq));
+    auto constF1 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(f1));
+    auto constF2 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(f2));
+
+    // Create a loop to generate the DTMF tone
+    auto forOp = rewriter.create<scf::ForOp>(loc, lb, ub, step);
+    rewriter.setInsertionPointToStart(forOp.getBody());
+    // Get the loop induction variable
+    auto iv = forOp.getInductionVar();
+
+    // Convert loop index to time
+    auto indexToI64 =
+        rewriter.create<arith::IndexCastOp>(loc, rewriter.getI64Type(), iv);
+    auto indexToFloat = rewriter.create<arith::SIToFPOp>(
+        loc, rewriter.getF64Type(), indexToI64);
+    auto time = rewriter.create<arith::DivFOp>(loc, indexToFloat, constFs);
+
+    // Generate sine wave for f1
+    auto mulFreqTime1 = rewriter.create<arith::MulFOp>(loc, constF1, time);
+    auto mul2Pi1 = rewriter.create<arith::MulFOp>(loc, const2pi, mulFreqTime1);
+    auto sine1 = rewriter.create<math::SinOp>(loc, mul2Pi1);
+
+    // Generate sine wave for f2
+    auto mulFreqTime2 = rewriter.create<arith::MulFOp>(loc, constF2, time);
+    auto mul2Pi2 = rewriter.create<arith::MulFOp>(loc, const2pi, mulFreqTime2);
+    auto sine2 = rewriter.create<math::SinOp>(loc, mul2Pi2);
+
+    // Combine the two sine waves
+    auto sumSines = rewriter.create<arith::AddFOp>(loc, sine1, sine2);
+    // auto scaledSum = rewriter.create<arith::MulFOp>(loc, const05, sumSines);
+
+    // Store the result in the allocated memref
+    rewriter.create<memref::StoreOp>(loc, sumSines, alloc, iv);
+
+    rewriter.setInsertionPointAfter(forOp);
+
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: FFTCombineOpLowering operations
+//===----------------------------------------------------------------------===//
+
+struct FFTCombineOpLowering : public ConversionPattern {
+  FFTCombineOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::FFTCombineOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    FFTCombineOpAdaptor fftCombineOpAdaptor(operands);
+
+    auto real = fftCombineOpAdaptor.getReal();
+    auto imag = fftCombineOpAdaptor.getImag();
+
+    auto lb = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    auto ub =
+        rewriter.create<arith::ConstantIndexOp>(loc, tensorType.getShape()[0]);
+    auto step = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+
+    auto forOp = rewriter.create<scf::ForOp>(loc, lb, ub, step);
+    rewriter.setInsertionPointToStart(forOp.getBody());
+    auto iv = forOp.getInductionVar();
+
+    auto realInput = rewriter.create<memref::LoadOp>(loc, real, ValueRange{iv});
+    auto imagInput = rewriter.create<memref::LoadOp>(loc, imag, ValueRange{iv});
+    auto realInputSquared =
+        rewriter.create<arith::MulFOp>(loc, realInput, realInput);
+    auto imagInputSquared =
+        rewriter.create<arith::MulFOp>(loc, imagInput, imagInput);
+    auto sum =
+        rewriter.create<arith::AddFOp>(loc, realInputSquared, imagInputSquared);
+    auto root = rewriter.create<math::SqrtOp>(loc, sum);
+
+    rewriter.create<memref::StoreOp>(loc, root, alloc, ValueRange{iv});
+
+    rewriter.setInsertionPointAfter(forOp);
+
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+// Store finalMatchIndexF64 into alloc
+// auto zero = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+// rewriter.create<memref::StoreOp>(loc, finalMatchIndexF64, alloc,
+// ValueRange{zero});
+
+struct QamModulateRealOpLowering : public ConversionPattern {
+  QamModulateRealOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::QamModulateRealOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    auto output = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    auto outputMem = convertTensorToMemRef(output);
+    auto alloc = insertAllocAndDealloc(outputMem, loc, rewriter);
+
+    QamModulateRealOpAdaptor adaptor(operands);
+    Value signal = adaptor.getSignal();
+
+    llvm::ArrayRef<int64_t> outputShape = output.getShape();
+
+    // constant vals;
+    Value negOneVal = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(-1));
+    Value zeroVal = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    Value oneVal = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1));
+
+    // get i*2 from input signal
+    AffineExpr realExpr = rewriter.getAffineDimExpr(0) * rewriter.getAffineConstantExpr(2);
+
+    // real affine map
+    AffineMap signalMap = AffineMap::get(1, 0, realExpr);
+
+    // loops
+    int64_t lb = 0, step = 1, ub = outputShape[0];
+    /* looping i*/
+    AffineForOp forOpI = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    rewriter.setInsertionPointToStart(forOpI.getBody());
+    auto ivI = forOpI.getInductionVar();
+
+    // input bound check
+    Value signalNum =
+        rewriter.create<AffineLoadOp>(loc, signal, signalMap, ValueRange{ivI});
+
+    Value zeroReal = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OEQ, signalNum, zeroVal);
+
+    Value out =
+        rewriter.create<arith::SelectOp>(loc, zeroReal, negOneVal, oneVal);
+
+    rewriter.create<AffineStoreOp>(loc, out, alloc, ValueRange{ivI});
+
+    rewriter.setInsertionPointAfter(forOpI);
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+struct QamModulateImgOpLowering : public ConversionPattern {
+  QamModulateImgOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::QamModulateImgOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    auto output = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    auto outputMem = convertTensorToMemRef(output);
+    auto alloc = insertAllocAndDealloc(outputMem, loc, rewriter);
+
+    QamModulateImgOpAdaptor adaptor(operands);
+    Value signal = adaptor.getSignal();
+
+    llvm::ArrayRef<int64_t> outputShape = output.getShape();
+
+    // constant vals;
+    Value negOneVal = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(-1));
+    Value zeroVal = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    Value oneVal = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1));
+
+    AffineExpr imgExpr = rewriter.getAffineDimExpr(0) * rewriter.getAffineConstantExpr(2) + rewriter.getAffineConstantExpr(1);
+
+    // real affine map
+    AffineMap signalMap = AffineMap::get(1, 0, imgExpr);
+    // loops
+    int64_t lb = 0, step = 1, ub = outputShape[0];
+    /* looping i*/
+    AffineForOp forOpI = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    rewriter.setInsertionPointToStart(forOpI.getBody());
+    auto ivI = forOpI.getInductionVar();
+
+    // input bound check
+    Value signalNum =
+        rewriter.create<AffineLoadOp>(loc, signal, signalMap, ValueRange{ivI});
+
+    Value zeroReal = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OEQ, signalNum, zeroVal);
+
+    Value out =
+        rewriter.create<arith::SelectOp>(loc, zeroReal, negOneVal, oneVal);
+
+    rewriter.create<AffineStoreOp>(loc, out, alloc, ValueRange{ivI});
+
+    rewriter.setInsertionPointAfter(forOpI);
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: QAM demodulate operations
+//===----------------------------------------------------------------------===//
+// #define DUMP(x) llvm::errs() << x << "\n";
+
+struct QamDemodulateOpLowering : public ConversionPattern {
+  QamDemodulateOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::QamDemodulateOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+
+    auto loc = op->getLoc();
+    // output mem alloc and dealloc
+    auto output = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    auto outputMem = convertTensorToMemRef(output);
+    auto alloc = insertAllocAndDealloc(outputMem, loc, rewriter);
+
+    QamDemodulateOpAdaptor qamDemodualteAdaptor(operands);
+    Value realVal = qamDemodualteAdaptor.getReal();
+    Value imgVal = qamDemodualteAdaptor.getImagine();
+
+    // ranked tensor type
+    auto realType =
+        llvm::cast<RankedTensorType>(op->getOperand(0).getType());
+
+    llvm::ArrayRef<int64_t> realShape = realType.getShape();
+
+    // constant vals;
+    Value negOneVal = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(-1));
+    Value zeroVal = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    Value oneVal = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1));
+
+    AffineExpr signalExpr = rewriter.getAffineDimExpr(0).floorDiv(2);
+    AffineExpr outputExpr = rewriter.getAffineDimExpr(0) + rewriter.getAffineConstantExpr(1);
+
+    // output affine map
+    AffineMap signalMap = AffineMap::get(1, 0, signalExpr);
+    AffineMap outputMap = AffineMap::get(1, 0, outputExpr);
+
+    // loops
+    int64_t lb = 0, step = 2, ub = output.getShape()[0];
+    /* looping i*/
+    AffineForOp forOpI = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    rewriter.setInsertionPointToStart(forOpI.getBody());
+    auto ivI = forOpI.getInductionVar();
+
+    // input bound check
+    Value realNum =
+        rewriter.create<AffineLoadOp>(loc, realVal, signalMap, ValueRange{ivI});
+    Value imgNum =
+        rewriter.create<AffineLoadOp>(loc, imgVal, signalMap, ValueRange{ivI});
+
+    Value negReal = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OEQ, realNum, negOneVal);
+    Value negImagine = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OEQ, imgNum, negOneVal);
+
+    Value out1 =
+        rewriter.create<arith::SelectOp>(loc, negReal, zeroVal, oneVal);
+    Value out2 =
+        rewriter.create<arith::SelectOp>(loc, negImagine, zeroVal, oneVal);
+
+    rewriter.create<AffineStoreOp>(loc, out1, alloc, ValueRange{ivI});
+    rewriter.create<AffineStoreOp>(loc, out2, alloc, outputMap, ValueRange{ivI});
+
+    rewriter.setInsertionPointAfter(forOpI);
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+}; // qam_demodulate op
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: BeamForm operations
+//===----------------------------------------------------------------------===//
+
+struct BeamFormOpLowering : public ConversionPattern {
+  BeamFormOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::BeamFormOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+    auto beamFormOp = llvm::cast<mlir::dsp::BeamFormOp>(op);
+
+    // allocating space for output
+    auto output = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    auto outputMemRefType = convertTensorToMemRef(output);
+    auto alloc = insertAllocAndDealloc(outputMemRefType, loc, rewriter);
+
+    BeamFormOpAdaptor beamFormAdaptor(operands);
+    auto time = beamFormAdaptor.getTime();
+    auto weights = beamFormAdaptor.getWeights();
+
+    // allocating space for internal generated signals
+    int64_t timeDim = output.getShape()[0]; // dry run: 9
+    int64_t antennas = beamFormOp.getAntennas();
+    int64_t frequency = beamFormOp.getFreq();
+
+    llvm::SmallVector<int64_t, 2> signalShapeVec{antennas, timeDim};
+    llvm::ArrayRef<int64_t> signalShape(signalShapeVec);
+
+    auto signalType = output.clone(signalShape, output.getElementType()); 
+    auto signalMemRefType = convertTensorToMemRef(signalType);
+    auto allocSignal = insertAllocAndDealloc(signalMemRefType, loc, rewriter);
+
+    AffineExpr d0, d1; // i, j for generated signal dimension
+    bindDims(rewriter.getContext(), d0, d1);
+
+    // generated input map
+    AffineMap genInputMap =
+        AffineMap::get(2 /* dim */, 0 /* sym */, ArrayRef<AffineExpr>{d1, d0},
+                       rewriter.getContext());
+    // time affine map
+    AffineMap timeMap =
+        AffineMap::get(2 /* dim */, 0 /* sym */, ArrayRef<AffineExpr>{d1},
+                       rewriter.getContext());
+
+    // // output map
+    // AffineMap outputMap =
+    // AffineMap::get(2, 0, ArrayRef<AffineExpr>{d0}, rewriter.getContext());
+
+    auto pi = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(3.1415926));
+    auto zero = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
+                                                   rewriter.getF64FloatAttr(0));
+    auto one = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
+                                                  rewriter.getF64FloatAttr(1));
+    auto two = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
+                                                  rewriter.getF64FloatAttr(2));
+    auto four = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
+                                                   rewriter.getF64FloatAttr(4));
+    auto two_pi = rewriter.create<arith::MulFOp>(loc, pi, two); // 2 * pi
+    auto freq_val = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(frequency));
+    auto phase_var =
+        rewriter.create<arith::MulFOp>(loc, two_pi, freq_val); // 2*pi*freq
+
+    // for loop from 0 to phase
+    int64_t lb = 0, ub = antennas, step = 1;
+    affine::AffineForOp forOpI =
+        rewriter.create<AffineForOp>(loc, lb, ub, step, ValueRange{zero});
+    auto ivI = forOpI.getInductionVar(); // i : phase
+    rewriter.setInsertionPointToStart(forOpI.getBody());
+
+    // get the induction var to phase variable
+    auto floatI = forOpI.getBody()->getArgument(1);
+
+    auto iter_tmp = rewriter.create<arith::MulFOp>(loc, floatI, pi); // i * pi
+    auto iter_args =
+        rewriter.create<arith::DivFOp>(loc, iter_tmp, four); // i*pi/4
+
+    // for loop from 0 to timeDim
+    ub = timeDim;
+    affine::AffineForOp forOpJ =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto ivJ = forOpJ.getInductionVar(); // i : phase
+    rewriter.setInsertionPointToStart(forOpJ.getBody());
+
+    // loop body
+    auto time_var =
+        rewriter.create<AffineLoadOp>(loc, time, timeMap, ValueRange{ivI, ivJ});
+    auto mul_var = rewriter.create<arith::MulFOp>(loc, time_var, phase_var);
+    auto sin_body = rewriter.create<arith::AddFOp>(loc, mul_var, iter_args);
+    auto result = rewriter.create<math::SinOp>(loc, sin_body);
+    rewriter.create<AffineStoreOp>(loc, result, allocSignal,
+                                   ValueRange{ivI, ivJ});
+
+    rewriter.setInsertionPointAfter(forOpJ); // end for loop: j
+
+    auto increFloatI = rewriter.create<arith::AddFOp>(loc, floatI, one);
+    rewriter.create<AffineYieldOp>(loc, ValueRange{increFloatI});
+
+    rewriter.setInsertionPointAfter(forOpI); // end for loop: i
+
+    ub = timeDim;
+    affine::AffineForOp forOpIOut =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto ivIoutput = forOpIOut.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpIOut.getBody());
+
+    ub = antennas;
+    affine::AffineForOp forOpJOut =
+        rewriter.create<AffineForOp>(loc, lb, ub, step, ValueRange{zero});
+    auto ivJoutput = forOpJOut.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpJOut.getBody());
+
+    // load from signal input
+    auto signalInput = rewriter.create<AffineLoadOp>(
+        loc, allocSignal, genInputMap, ValueRange{ivIoutput, ivJoutput});
+    auto weight = rewriter.create<AffineLoadOp>(
+        loc, weights, timeMap, ValueRange{ivIoutput, ivJoutput});
+    auto intermediateVal =
+        rewriter.create<arith::MulFOp>(loc, signalInput, weight);
+
+    // iterargs
+    auto sumVal = forOpJOut.getBody()->getArgument(1);
+    auto beamOut = rewriter.create<arith::AddFOp>(loc, intermediateVal, sumVal);
+
+    rewriter.create<AffineStoreOp>(loc, beamOut, alloc, ValueRange{ivIoutput});
+    rewriter.create<AffineYieldOp>(loc, ValueRange{beamOut});
+
+    rewriter.setInsertionPointAfter(forOpJOut);
+    rewriter.setInsertionPointAfter(forOpIOut);
+
+    rewriter.replaceOp(op, alloc);
+
+    return mlir::success();
+  }
+};
+
+struct SpaceModulateOpLowering : public ConversionPattern {
+  SpaceModulateOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::SpaceModulateOp::getOperationName(), 1, ctx) {}
+
+  mlir::LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // output
+    auto output = llvm::dyn_cast<RankedTensorType>((*op->result_type_begin()));
+    auto outputMem = convertTensorToMemRef(output);
+    auto alloc = insertAllocAndDealloc(outputMem, loc, rewriter);
+
+    SpaceModulateOpAdaptor spaceModAdaptor(operands);
+    Value signal = spaceModAdaptor.getSignal();
+    auto signalType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(0).getType());
+    llvm::ArrayRef<int64_t> signalShape = signalType.getShape();
+
+    Value negOneVal = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(-1));
+    // Value zeroVal = rewriter.create<arith::ConstantOp>(
+    //     loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    Value oneVal = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1));
+
+    // one dim loop
+    int64_t lb = 0, ub = signalShape[0], step = 1;
+    AffineForOp forOp = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    rewriter.setInsertionPointToStart(forOp.getBody());
+    auto iv = forOp.getInductionVar();
+
+    Value bit = rewriter.create<AffineLoadOp>(loc, signal, ValueRange{iv});
+
+    Value isOne = rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OEQ,
+                                                 bit, oneVal);
+
+    auto out = rewriter.create<arith::SelectOp>(loc, isOne, oneVal, negOneVal);
+
+    rewriter.create<AffineStoreOp>(loc, out, alloc, ValueRange{iv});
+    rewriter.setInsertionPointAfter(forOp);
+
+    rewriter.replaceOp(op, alloc);
+    return mlir::success();
+  }
+}; // space modulate
+
+struct SpaceDemodulateOpLowering : public ConversionPattern {
+  SpaceDemodulateOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::SpaceDemodulateOp::getOperationName(), 1, ctx) {}
+
+  mlir::LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // output
+    auto output = llvm::dyn_cast<RankedTensorType>((*op->result_type_begin()));
+    auto outputMem = convertTensorToMemRef(output);
+    auto alloc = insertAllocAndDealloc(outputMem, loc, rewriter);
+
+    SpaceDemodulateOpAdaptor spaceDemodAdaptor(operands);
+    Value binary = spaceDemodAdaptor.getBinary();
+    auto binaryType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(0).getType());
+    llvm::ArrayRef<int64_t> binaryShape = binaryType.getShape();
+
+    // Value negOneVal = rewriter.create<arith::ConstantOp>(
+    //     loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(-1));
+    Value zeroVal = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    Value oneVal = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1));
+
+    // one dim loop
+    int64_t lb = 0, ub = binaryShape[0], step = 1;
+    AffineForOp forOp = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    rewriter.setInsertionPointToStart(forOp.getBody());
+    auto iv = forOp.getInductionVar();
+
+    Value bit = rewriter.create<AffineLoadOp>(loc, binary, ValueRange{iv});
+
+    Value isOne = rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OGE,
+                                                 bit, oneVal);
+
+    auto out = rewriter.create<arith::SelectOp>(loc, isOne, oneVal, zeroVal);
+
+    rewriter.create<AffineStoreOp>(loc, out, alloc, ValueRange{iv});
+
+    rewriter.setInsertionPointAfter(forOp);
+    rewriter.replaceOp(op, alloc);
+    return mlir::success();
+  }
+}; // soace demodulate
+
+struct SpaceErrCorrectionOpLowering : public ConversionPattern {
+  SpaceErrCorrectionOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::SpaceErrCorrectionOp::getOperationName(), 1,
+                          ctx) {}
+
+  mlir::LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // output
+    auto output = llvm::dyn_cast<RankedTensorType>((*op->result_type_begin()));
+    auto outputMem = convertTensorToMemRef(output);
+    auto alloc = insertAllocAndDealloc(outputMem, loc, rewriter);
+
+    SpaceErrCorrectionOpAdaptor adaptor(operands);
+    Value signal = adaptor.getSignal();
+    auto signalType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(0).getType());
+    llvm::ArrayRef<int64_t> signalShape = signalType.getShape();
+
+    Value zeroVal = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    Value oneVal = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1));
+    Value twoVal = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(2));
+
+    AffineExpr d0, d1;
+    bindDims(rewriter.getContext(), d0, d1);
+    AffineMap first =
+        AffineMap::get(2, 0, ArrayRef<AffineExpr>{d0}, rewriter.getContext());
+    AffineMap index = AffineMap::get(2, 0, ArrayRef<AffineExpr>{d0 + d1},
+                                     rewriter.getContext());
+
+    int64_t lb = 0, ub = signalShape[0], step = 8;
+    AffineForOp forOpI = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    rewriter.setInsertionPointToStart(forOpI.getBody());
+    auto ivI = forOpI.getInductionVar();
+
+    auto firstVal = rewriter.create<AffineLoadOp>(
+        loc, signal, ValueRange{ivI}); // signal [0]
+    rewriter.create<AffineStoreOp>(
+        loc, firstVal, alloc, ValueRange{ivI}); // store signal[0] to alloc[0]
+
+    int64_t inner_lb = 1, inner_ub = 8, inner_step = 1;
+    AffineForOp forOpJ =
+        rewriter.create<AffineForOp>(loc, inner_lb, inner_ub, inner_step);
+    rewriter.setInsertionPointToStart(forOpJ.getBody());
+    auto ivJ = forOpJ.getInductionVar();
+
+    auto stored = rewriter.create<AffineLoadOp>(
+        loc, alloc, first, ValueRange{ivI, ivJ}); // load alloc[0]
+    auto loaded = rewriter.create<AffineLoadOp>(
+        loc, signal, index, ValueRange{ivI, ivJ}); // load signal[1...7]
+
+    auto added = rewriter.create<arith::AddFOp>(loc, stored, loaded); // add
+    rewriter.create<AffineStoreOp>(loc, added, alloc,
+                                   ValueRange{ivI}); // store val to alloc[0]
+    rewriter.create<AffineStoreOp>(
+        loc, loaded, alloc, index,
+        ValueRange{ivI, ivJ}); // store val to alloc[1...7]
+
+    rewriter.setInsertionPointAfter(forOpJ);
+
+    auto initVal = rewriter.create<AffineLoadOp>(
+        loc, signal, ValueRange{ivI}); // load signal[0]
+    auto oneCount = rewriter.create<AffineLoadOp>(
+        loc, alloc, ValueRange{ivI}); // load alloc[0]
+    auto parityCheck = rewriter.create<arith::RemFOp>(
+        loc, oneCount,
+        twoVal); // get remainder from oneCount / 2 -> either 1 or 0
+
+    auto oddParity =
+        rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OEQ, oneVal,
+                                       parityCheck); // if paritycheck == 1
+    auto valToAlloc = rewriter.create<arith::SelectOp>(
+        loc, oddParity, zeroVal, initVal); // if true: valToAlloc = 0 else NC
+
+    rewriter.create<AffineStoreOp>(
+        loc, valToAlloc, alloc, ValueRange{ivI}); // store the value to alloc[0]
+
+    rewriter.setInsertionPointAfter(forOpI);
+
+    rewriter.replaceOp(op, alloc);
+    return mlir::success();
+  }
+};
+
+struct ArgMaxOpLowering : public ConversionPattern {
+  ArgMaxOpLowering(MLIRContext *context)
+      : ConversionPattern(dsp::ArgMaxOp::getOperationName(), 1, context) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    auto zeroVal = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    auto oneVal = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1));
+
+    // argmax adaptor
+    ArgMaxOpAdaptor adaptor(operands);
+    auto input = adaptor.getInput();
+    auto inputType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(0).getType());
+
+    // get operation
+    auto argmaxOp = llvm::dyn_cast<dsp::ArgMaxOp>(op);
+
+    // get attribute
+    int64_t axis = argmaxOp.getAxis();
+
+    // output allocation
+    auto output = llvm::dyn_cast<RankedTensorType>((*op->result_type_begin()));
+    auto outputMemRef = convertTensorToMemRef(output);
+    auto alloc = insertAllocAndDealloc(outputMemRef, loc,
+                                       rewriter); // stroing max ele index
+
+    auto allocEle =
+        insertAllocAndDealloc(outputMemRef, loc, rewriter); // stroing max ele
+
+    auto outputShape = output.getShape();
+    auto outputSizeOp = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(),
+        rewriter.getF64FloatAttr(outputShape.size()));
+
+    auto sizeSwitch = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OEQ, outputSizeOp,
+        oneVal); // if outputsize > 1
+    AffineExpr d0;
+    bindDims(rewriter.getContext(), d0);
+    AffineMap zeroIdx = AffineMap::get(1, 0, ArrayRef<AffineExpr>{d0 - d0},
+                                       rewriter.getContext());
+
+    auto ifOp = rewriter.create<scf::IfOp>(
+        loc, sizeSwitch,
+        true); // FIXME: else condition for 2 dimensional tensor input
+    rewriter.setInsertionPointToStart(ifOp.thenBlock());
+    // output size == 1
+    /* -> one loop through tensor, recording max val and its index
+     */
+    Value iv0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    rewriter.create<AffineStoreOp>(loc, zeroVal, allocEle, ValueRange{iv0});
+
+    auto zero = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
+                                                   rewriter.getF64FloatAttr(0));
+    auto one = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
+                                                  rewriter.getF64FloatAttr(1));
+
+    int lb = 0, ub = inputType.getShape()[0], step = 1;
+    auto forOp =
+        rewriter.create<AffineForOp>(loc, lb, ub, step, ValueRange{zero});
+    auto ivI = forOp.getInductionVar();
+    rewriter.setInsertionPointToStart(forOp.getBody());
+
+    auto floatI = forOp.getBody()->getArgument(1);
+
+    auto curMax =
+        rewriter.create<AffineLoadOp>(loc, allocEle, zeroIdx, ValueRange{ivI});
+    auto curMaxIdx =
+        rewriter.create<AffineLoadOp>(loc, alloc, zeroIdx, ValueRange{ivI});
+    auto curEle = rewriter.create<AffineLoadOp>(loc, input, ivI);
+    auto cmpOp = rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OGT,
+                                                curEle, curMax);
+    // if ele > max: update val
+    auto maxOp = rewriter.create<arith::SelectOp>(loc, cmpOp, curEle, curMax);
+
+    // store the idx based on cmp output
+    auto idxOp =
+        rewriter.create<arith::SelectOp>(loc, cmpOp, floatI, curMaxIdx);
+
+    rewriter.create<AffineStoreOp>(loc, maxOp, allocEle, zeroIdx,
+                                   ValueRange{ivI});
+    rewriter.create<AffineStoreOp>(loc, idxOp, alloc, zeroIdx, ValueRange{ivI});
+
+    auto increFloatI = rewriter.create<arith::AddFOp>(loc, floatI, one);
+    rewriter.create<AffineYieldOp>(loc, ValueRange{increFloatI});
+
+    rewriter.setInsertionPointAfter(forOp);
+    rewriter.setInsertionPointAfter(ifOp);
+
+    rewriter.replaceOp(op, alloc);
+    return mlir::success();
+  }
+};
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: Power operations
+//===----------------------------------------------------------------------===//
+
+struct PowOpLowering : public ConversionPattern {
+  PowOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::PowOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    dsp::PowOpAdaptor powerAdaptor(operands);
+    Value lhs = powerAdaptor.getLhs();
+    Value rhs = powerAdaptor.getRhs();
+
+    auto inputType = llvm::cast<RankedTensorType>(lhs.getType());
+    auto resultType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocate space for result
+    auto memRefType = convertTensorToMemRef(resultType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // affine loops for input
+    int64_t lb = 0;
+    int64_t ub = inputType.getShape()[0];
+    int64_t step = 1;
+
+    affine::AffineForOp forOp = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto iv = forOp.getInductionVar();
+
+    rewriter.setInsertionPointToStart(forOp.getBody());
+
+    Value loadLHS = rewriter.create<AffineLoadOp>(loc, lhs, ValueRange{iv});
+    Value loadRHS = rewriter.create<AffineLoadOp>(loc, rhs, ValueRange{});
+
+    Value power = rewriter.create<math::PowFOp>(loc, loadLHS, loadRHS);
+
+    // store result
+    rewriter.create<AffineStoreOp>(loc, power, alloc, ValueRange{iv});
+    rewriter.setInsertionPointAfter(forOp);
+
+    // replace op
+    rewriter.replaceOp(op, alloc);
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: Normalize operations
+//===----------------------------------------------------------------------===//
+
+struct NormalizeOpLowering : public ConversionPattern {
+  NormalizeOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::NormalizeOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    auto tensorType =
+        llvm::dyn_cast<RankedTensorType>(*op->result_type_begin());
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+    auto shape = tensorType.getShape()[0];
+
+    dsp::NormalizeOpAdaptor adaptor(operands);
+    Value signal = adaptor.getSignal();
+
+    Value min = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(INT64_MAX));
+    Value max = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(INT64_MIN));
+
+    int64_t lb = 0, ub = shape, step = 1;
+    // finding min and max;
+    affine::AffineForOp forOp =
+        rewriter.create<AffineForOp>(loc, lb, ub, step, ValueRange{min, max});
+    auto iv = forOp.getInductionVar();
+    rewriter.setInsertionPointToStart(forOp.getBody());
+
+    auto minVal = forOp.getBody()->getArgument(1);
+    auto maxVal = forOp.getBody()->getArgument(2);
+
+    auto cmpVal = rewriter.create<AffineLoadOp>(loc, signal, ValueRange{iv});
+    Value isMin = rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OLT,
+                                                 cmpVal, minVal);
+    Value isMax = rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OGT,
+                                                 cmpVal, maxVal);
+
+    auto minOut = rewriter.create<arith::SelectOp>(loc, isMin, cmpVal, minVal);
+    auto maxOut = rewriter.create<arith::SelectOp>(loc, isMax, cmpVal, maxVal);
+
+    rewriter.create<AffineYieldOp>(
+        loc, ValueRange{minOut.getResult(), maxOut.getResult()});
+    rewriter.setInsertionPointAfter(forOp);
+
+    auto minSignal = forOp.getResults()[0];
+    auto maxSignal = forOp.getResults()[1];
+
+    auto divisor = rewriter.create<arith::SubFOp>(loc, maxSignal, minSignal);
+    // ele-wise normalize
+    affine::AffineForOp forOpI =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto ivI = forOpI.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpI.getBody());
+
+    auto loadedVal =
+        rewriter.create<AffineLoadOp>(loc, signal, ValueRange{ivI});
+    auto subVal = rewriter.create<arith::SubFOp>(loc, loadedVal, minSignal);
+    auto resultVal = rewriter.create<arith::DivFOp>(loc, subVal, divisor);
+
+    rewriter.create<AffineStoreOp>(loc, resultVal, alloc, ValueRange{ivI});
+    rewriter.setInsertionPointAfter(forOpI);
+
+    rewriter.replaceOp(op, alloc);
+    return mlir::success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: NormLMSFilterResponseOptimizeOp operations
+//===----------------------------------------------------------------------===//
+
+struct NormLMSFilterResponseOptimizeOpLowering : public ConversionPattern {
+  NormLMSFilterResponseOptimizeOpLowering(MLIRContext *ctx)
+      : ConversionPattern(
+            dsp::NormLMSFilterResponseOptimizeOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
+
+    LMSFilterOpAdaptor lmsFilterAdaptor(operands);
+
+    Value zeroval = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    Value mu = rewriter.create<AffineLoadOp>(loc, lmsFilterAdaptor.getMu());
+
+    // For loop -- iterate from 0 to last
+    int64_t lb = 0;
+    int64_t numSamples = tensorType.getShape()[0];
+    int64_t step = 1;
+
+    Value GetFilterLOp = op->getOperand(3);
+    dsp::ConstantOp constantOp3rdArg =
+        GetFilterLOp.getDefiningOp<dsp::ConstantOp>();
+    DenseElementsAttr constant3rdValue = constantOp3rdArg.getValue();
+
+    auto elements1 = constant3rdValue.getValues<FloatAttr>();
+    float filterlenval = elements1[0].getValueAsDouble();
+    auto FilterLength = (uint64_t)filterlenval;
+
+    auto yMemRefType = MemRefType::get({numSamples}, rewriter.getF64Type());
+    auto wAlloc = rewriter.create<memref::AllocOp>(loc, yMemRefType);
+
+    Value min = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(INT64_MAX));
+    Value max = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(INT64_MIN));
+
+    affine::AffineForOp forOp1 = rewriter.create<AffineForOp>(
+        loc, lb, numSamples, step, ValueRange{min, max});
+    auto iv = forOp1.getInductionVar();
+
+    rewriter.setInsertionPointToStart(forOp1.getBody());
+
+    AffineExpr d0, d1, s0;
+    bindDims(rewriter.getContext(), d0, d1);
+    AffineExpr ExprForXSlice = d0 - d1;
+    AffineMap addMapForLMSFilter = AffineMap::get(2, 0, ExprForXSlice);
+    IntegerSet set1 = IntegerSet::get(2, 0, {ExprForXSlice}, {false});
+
+    rewriter.create<AffineStoreOp>(loc, zeroval, alloc, ValueRange{iv});
+
+    affine::AffineForOp forOp2 =
+        rewriter.create<AffineForOp>(loc, lb, FilterLength, step);
+    auto iv2 = forOp2.getInductionVar();
+
+    rewriter.setInsertionPointToStart(forOp2.getBody());
+
+    auto ifOp = rewriter.create<affine::AffineIfOp>(
+        loc, set1, ValueRange{iv, iv2}, false /*no else*/);
+    rewriter.setInsertionPointToStart(ifOp.getThenBlock());
+
+    Value inputX =
+        rewriter.create<AffineLoadOp>(loc, lmsFilterAdaptor.getLhs(),
+                                      addMapForLMSFilter, ValueRange{iv, iv2});
+    Value w = rewriter.create<AffineLoadOp>(loc, wAlloc,
+                                            ValueRange{iv2}); // memRefType
+
+    auto wmulx = rewriter.create<arith::MulFOp>(loc, inputX, w);
+    auto ybefore = rewriter.create<AffineLoadOp>(loc, alloc, ValueRange{iv});
+    auto sumNext = rewriter.create<arith::AddFOp>(loc, wmulx, ybefore);
+    rewriter.create<AffineStoreOp>(loc, sumNext, alloc, ValueRange{iv});
+    rewriter.setInsertionPointAfter(ifOp);
+    rewriter.setInsertionPointAfter(forOp2);
+
+    auto cmpVal = rewriter.create<AffineLoadOp>(loc, alloc, ValueRange{iv});
+    Value minVal = forOp1.getBody()->getArgument(1);
+    Value maxVal = forOp1.getBody()->getArgument(2);
+
+    auto minOut = rewriter.create<arith::MinNumFOp>(loc, cmpVal, minVal);
+    auto maxOut = rewriter.create<arith::MaxNumFOp>(loc, cmpVal, maxVal);
+    //  get e[n] = d[n] - y[n]
+
+    Value desiredX = rewriter.create<AffineLoadOp>(
+        loc, lmsFilterAdaptor.getRhs(), ValueRange{iv});
+    Value ynew = rewriter.create<AffineLoadOp>(loc, alloc, ValueRange{iv});
+
+    Value err = rewriter.create<arith::SubFOp>(loc, desiredX, ynew);
+
+    affine::AffineForOp forOp3 =
+        rewriter.create<AffineForOp>(loc, lb, FilterLength, step);
+    auto iv3 = forOp3.getInductionVar();
+
+    rewriter.setInsertionPointToStart(forOp3.getBody());
+
+    auto ifOp2 = rewriter.create<affine::AffineIfOp>(
+        loc, set1, ValueRange{iv, iv3}, false /*no else*/);
+    rewriter.setInsertionPointToStart(ifOp2.getThenBlock());
+
+    Value inputX2 =
+        rewriter.create<AffineLoadOp>(loc, lmsFilterAdaptor.getLhs(),
+                                      addMapForLMSFilter, ValueRange{iv, iv3});
+
+    Value Prevw2 = rewriter.create<AffineLoadOp>(loc, wAlloc, ValueRange{iv3});
+
+    // f(u(n),e(n),μ)=μe(n)u∗(n)
+    Value mul1 = rewriter.create<arith::MulFOp>(loc, err, inputX2);
+    Value mul2 = rewriter.create<arith::MulFOp>(loc, mu, mul1);
+
+    // FInal w[n]
+    Value answer = rewriter.create<arith::AddFOp>(loc, Prevw2, mul2);
+
+    rewriter.create<AffineStoreOp>(loc, answer, wAlloc, ValueRange{iv3});
+    rewriter.setInsertionPointAfter(ifOp2);
+    rewriter.setInsertionPointAfter(forOp3);
+
+    rewriter.create<AffineYieldOp>(
+        loc, ValueRange{minOut.getResult(), maxOut.getResult()});
+    rewriter.setInsertionPointAfter(forOp1);
+
+    Value minSignal = forOp1.getResults()[0];
+    Value maxSignal = forOp1.getResults()[1];
+
+    Value divisor = rewriter.create<arith::SubFOp>(loc, maxSignal, minSignal);
+
+    // ele-wise normalize
+    affine::AffineForOp forOpI =
+        rewriter.create<AffineForOp>(loc, lb, numSamples, step);
+    auto ivI = forOpI.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpI.getBody());
+
+    auto loadedVal = rewriter.create<AffineLoadOp>(loc, alloc, ValueRange{ivI});
+    auto subVal = rewriter.create<arith::SubFOp>(loc, loadedVal, minSignal);
+    auto resultVal = rewriter.create<arith::DivFOp>(loc, subVal, divisor);
+
+    rewriter.create<AffineStoreOp>(loc, resultVal, alloc, ValueRange{ivI});
+    rewriter.setInsertionPointAfter(forOpI);
+
+    rewriter.replaceOp(op, alloc);
+    return success();
+  }
+};
+
+struct Median2SlidingOptimizedOpLowering : public ConversionPattern {
+  Median2SlidingOptimizedOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::Median2SlidingOptimizedOp::getOperationName(), 1,
+                          ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), 1);
+
+    // For loop
+    int64_t lb = 0;
+    int64_t ub = tensorType.getShape()[0];
+    int64_t step = 1;
+
+    Value constant_three = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(3));
+
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto iv = forOp1.getInductionVar();
+
+    rewriter.setInsertionPointToStart(forOp1.getBody());
+    typename dsp::Median2SlidingOptimizedOp::Adaptor
+        median2SlidingOptimizedOpAdaptor(operands);
+
+    Value elem1 = rewriter.create<AffineLoadOp>(
+        loc, median2SlidingOptimizedOpAdaptor.getInput(), iv);
+    AffineExpr ExprForElem2 =
+        rewriter.getAffineDimExpr(0) + rewriter.getAffineConstantExpr(1);
+    AffineExpr ExprForElem3 =
+        rewriter.getAffineDimExpr(0) + rewriter.getAffineConstantExpr(2);
+    AffineExpr ExprForElem4 =
+        rewriter.getAffineDimExpr(0) + rewriter.getAffineConstantExpr(3);
+    AffineExpr ExprForElem5 =
+        rewriter.getAffineDimExpr(0) + rewriter.getAffineConstantExpr(4);
+
+    AffineMap addMapForElem2 = AffineMap::get(1, 0, ExprForElem2);
+    AffineMap addMapForElem3 = AffineMap::get(1, 0, ExprForElem3);
+    AffineMap addMapForElem4 = AffineMap::get(1, 0, ExprForElem4);
+    AffineMap addMapForElem5 = AffineMap::get(1, 0, ExprForElem5);
+
+    Value elem2 = rewriter.create<AffineLoadOp>(
+        loc, median2SlidingOptimizedOpAdaptor.getInput(), addMapForElem2,
+        ValueRange{iv});
+    Value elem3 = rewriter.create<AffineLoadOp>(
+        loc, median2SlidingOptimizedOpAdaptor.getInput(), addMapForElem3,
+        ValueRange{iv});
+    Value elem4 = rewriter.create<AffineLoadOp>(
+        loc, median2SlidingOptimizedOpAdaptor.getInput(), addMapForElem4,
+        ValueRange{iv});
+    Value elem5 = rewriter.create<AffineLoadOp>(
+        loc, median2SlidingOptimizedOpAdaptor.getInput(), addMapForElem5,
+        ValueRange{iv});
+
+    // sums
+    Value sum23 = rewriter.create<arith::AddFOp>(loc, elem2, elem3);
+    Value sum34 = rewriter.create<arith::AddFOp>(loc, elem3, elem4);
+
+    Value sum123 = rewriter.create<arith::AddFOp>(loc, elem1, sum23);
+    Value sum234 = rewriter.create<arith::AddFOp>(loc, sum23, elem4);
+    Value sum345 = rewriter.create<arith::AddFOp>(loc, sum34, elem5);
+
+    // min
+    Value min23 = rewriter.create<arith::MinimumFOp>(loc, elem2, elem3);
+    Value min34 = rewriter.create<arith::MinimumFOp>(loc, elem3, elem4);
+
+    Value min123 = rewriter.create<arith::MinimumFOp>(loc, elem1, min23);
+    Value min234 = rewriter.create<arith::MinimumFOp>(loc, min23, elem4);
+    Value min345 = rewriter.create<arith::MinimumFOp>(loc, min34, elem5);
+
+    // max
+    Value max23 = rewriter.create<arith::MaximumFOp>(loc, elem2, elem3);
+    Value max34 = rewriter.create<arith::MaximumFOp>(loc, elem3, elem4);
+
+    Value max123 = rewriter.create<arith::MaximumFOp>(loc, elem1, max23);
+    Value max234 = rewriter.create<arith::MaximumFOp>(loc, max23, elem4);
+    Value max345 = rewriter.create<arith::MaximumFOp>(loc, max34, elem5);
+
+    // median
+    Value min_plus_max_123 =
+        rewriter.create<arith::AddFOp>(loc, min123, max123);
+    Value min_plus_max_234 =
+        rewriter.create<arith::AddFOp>(loc, min234, max234);
+    Value min_plus_max_345 =
+        rewriter.create<arith::AddFOp>(loc, min345, max345);
+
+    Value median123 =
+        rewriter.create<arith::SubFOp>(loc, sum123, min_plus_max_123);
+    Value median234 =
+        rewriter.create<arith::SubFOp>(loc, sum234, min_plus_max_234);
+    Value median345 =
+        rewriter.create<arith::SubFOp>(loc, sum345, min_plus_max_345);
+
+    // mean of three medians
+    Value two_medians =
+        rewriter.create<arith::AddFOp>(loc, median123, median234);
+    Value three_medians =
+        rewriter.create<arith::AddFOp>(loc, two_medians, median345);
+    Value median_mean =
+        rewriter.create<arith::DivFOp>(loc, three_medians, constant_three);
+
+    // store in alloc
+    rewriter.create<AffineStoreOp>(loc, median_mean, alloc, iv);
+    rewriter.setInsertionPointAfter(forOp1);
+    rewriter.replaceOp(op, alloc);
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: FIRFilterResSymmThresholdUpOptimizedOp
+// operations
+//===----------------------------------------------------------------------===//
+struct FIRFilterResSymmThresholdUpOptimizedOpLowering
+    : public ConversionPattern {
+  FIRFilterResSymmThresholdUpOptimizedOpLowering(MLIRContext *ctx)
+      : ConversionPattern(
+            dsp::FIRFilterResSymmThresholdUpOptimizedOp::getOperationName(), 1,
+            ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    // dsp.FIRFilterResSymmThresholdUpOptimizedOp has 2 operands -- both of type
+    // tensor f64
+
+    // Get the location of FIRFilterResSymmThresholdUpOptimizedOp
+    auto loc = op->getLoc();
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+    // Pseudo-Code
+    // y[n] = sum(h[k] .{ x[n-k] + x[n-(L-1-k)]}) + h[L-1/2].x[n-(L-1)/2] , k=0
+    // to L-1/2
+    //  N = lenY , M = lenX ,  L = lenH
+    // for n=0 to N
+    //  sum = 0, temp =0
+    //  for k = 0 to L-1/2
+    // if 0 <= n-k < M
+    // val1 = x[n-k] else, val1 = 0
+    // if 0 <= n+k - (L-1) < M
+    // val2 = x[n+k-(L-1)] else, val2 = 0
+    // temp = val1 + val2
+    //  sum = sum + h[k] . temp
+
+    // middle-one
+    //  if 0 <= n - (L-1)/2 < M
+    //  sum2 = sum + h[L-1/2] . x[n-(n - (L-1)/2)]
+    // y[n] = sum2
+
+    int64_t lb = 0;
+    int64_t ub = tensorType.getShape()[0];
+    int64_t step = 1;
+    DEBUG_PRINT_NO_ARGS();
+    affine::AffineForOp forOp1 =
+        rewriter.create<affine::AffineForOp>(loc, lb, ub, step);
+    rewriter.setInsertionPointToStart(forOp1.getBody());
+    auto iv = forOp1.getInductionVar();
+
+    // for n=0 to N
+    //  sum = 0, temp =0
+    // get filter len
+    //  auto tensorTypeFilter =
+    //  llvm::cast<RankedTensorType>((*op->getOperand(1))); //operand_type_end
+    //  auto tensorTypeFilter =
+    //  llvm::cast<RankedTensorType>((*op->operand_type_begin()));
+    auto operandIt = op->operand_type_begin();
+    auto tensorTypeInput = llvm::cast<RankedTensorType>(*operandIt);
+    int64_t ubForInput = tensorTypeInput.getShape()[0];
+    // get second operand
+    operandIt = operandIt + 1;
+
+    // auto tensorTypeFilter =
+    // llvm::cast<RankedTensorType>((*op->operand_type_begin())); //operandIt
+    auto tensorTypeFilter = llvm::cast<RankedTensorType>(*operandIt);
+    int64_t ubForFilter = tensorTypeFilter.getShape()[0];
+    DEBUG_PRINT_NO_ARGS();
+    // llvm::errs() << "ubForFilter= " << ubForFilter << "\n";
+    // create a constant for sum
+    Value constant0 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    affine::AffineForOp forOp2 = rewriter.create<affine::AffineForOp>(
+        loc, lb, ubForFilter / 2, step, ValueRange{constant0});
+    rewriter.setInsertionPointToStart(forOp2.getBody());
+    auto iv2 = forOp2.getInductionVar();
+
+    auto getIterArg =
+        forOp2.getBody()->getArgument(1); // forOp1.getIterOperands();
+    DEBUG_PRINT_NO_ARGS();
+    FIRFilterResSymmThresholdUpOptimizedOpAdaptor
+        firFilterResSymmThresholdUpOpAdaptor(operands);
+
+    // if 0 <= n-k < M
+    // val1 = x[n-k] else, val1 = 0
+    // For n-k
+    // if 0 <= n-k < M or, 0 <= n-k <= M -1
+    AffineExpr d0, d1, s0, s1;
+    bindDims(rewriter.getContext(), d0, d1);
+    AffineExpr ExprNMinusK = d0 - d1;
+    AffineMap mapNMinusK = AffineMap::get(2, 0, ExprNMinusK);
+    // n-k <= M -1 or, n-k-(M-1) <= 0
+    bindSymbols(rewriter.getContext(), s0, s1);
+    Value constantMMinus1Indx =
+        rewriter.create<arith::ConstantIndexOp>(loc, ubForInput - 1);
+
+    AffineExpr ExprNMinusKMinusMPlus1 = s0 - d0 + d1;
+    IntegerSet setForIf = IntegerSet::get(
+        2, 1, {ExprNMinusK, ExprNMinusKMinusMPlus1}, {false, false});
+    DEBUG_PRINT_NO_ARGS();
+
+    // if 0 <= n-k <= M -1
+    // use typeRange too:
+    Type floatType = rewriter.getF64Type();
+    //  if n-k >= 0 && n-k <= M -1 or, M-1 -n + k >= 0
+    auto ifOp = rewriter.create<affine::AffineIfOp>(
+        loc, TypeRange{floatType}, setForIf,
+        ValueRange{iv, iv2, constantMMinus1Indx}, true /*else*/);
+    rewriter.setInsertionPointToStart(ifOp.getThenBlock());
+
+    // val1 = x[n-k] else, val1 = 0
+    // load x[n-k]
+    DEBUG_PRINT_NO_ARGS();
+    Value loadInput = rewriter.create<AffineLoadOp>(
+        loc, firFilterResSymmThresholdUpOpAdaptor.getLhs(), mapNMinusK,
+        ValueRange{iv, iv2});
+    rewriter.create<AffineYieldOp>(loc, ValueRange{loadInput});
+    // else block
+    rewriter.setInsertionPointToStart(ifOp.getElseBlock());
+    Value const0ForElse = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    rewriter.create<AffineYieldOp>(loc, ValueRange{const0ForElse});
+    rewriter.setInsertionPointAfter(ifOp);
+
+    // if 0 <= n+k - (L-1) < M
+    // val2 = x[n+k-(L-1)] else, val2 = 0
+    // val2 lower bound
+    //  AffineExpr ExprNMinKMinLPlus1 = d0 - d1 - s0; //s0 = (L-1) => -s0 = -L+1
+    //  AffineExpr ExprLowerBoundVal2 = d0 - d1 - s0; //s0 = (L-1) => -s0 = -L+1
+    // Val2 LowerBound: n+k - (L-1) >= 0
+    AffineExpr ExprLowerBoundVal2 =
+        rewriter.getAffineDimExpr(0) + rewriter.getAffineDimExpr(1) -
+        rewriter.getAffineConstantExpr(ubForFilter - 1);
+    // Val2 UpperBound: n+k - (L-1) <= M -1 ie, M - 1 + L -1 -k -n >= 0 ie,
+    // (M+L-2) - k -n >= 0
+    //  AffineExpr ExprUpperBoundVal2 = s0 + s1 + d1 - d0; //s1 = M+L-2 = L-1 +
+    //  M -1
+    AffineExpr ExprUpperBoundVal2 =
+        rewriter.getAffineConstantExpr(ubForInput + ubForFilter - 2) -
+        rewriter.getAffineDimExpr(1) - rewriter.getAffineDimExpr(0);
+    // s0 = L -1
+    //  Value s0LMin1Indx = rewriter.create<arith::ConstantIndexOp>(loc,
+    //  ubForFilter - 1); s1 = M + L -2 for val2 upperBound Value
+    //  s1MPlusLPlus2Indx = rewriter.create<arith::ConstantIndexOp>(loc,
+    //  ubForInput + ubForFilter - 2); Value s1MMin1Indx =
+    //  rewriter.create<arith::ConstantIndexOp>(loc, ubForInput - 1);
+
+    IntegerSet setForIf2 = IntegerSet::get(
+        2, 0, {ExprLowerBoundVal2, ExprUpperBoundVal2}, {false, false});
+
+    auto ifOp2 = rewriter.create<affine::AffineIfOp>(
+        loc, TypeRange{floatType}, setForIf2, ValueRange{iv, iv2},
+        true /*else*/);
+    rewriter.setInsertionPointToStart(ifOp2.getThenBlock());
+
+    // val2 = x[n+k-(L-1)] else, val2 = 0
+    AffineMap addMap2 = AffineMap::get(2, 0, ExprLowerBoundVal2);
+    // load x[n+k-(L-1)]
+    DEBUG_PRINT_NO_ARGS();
+    Value loadInputForVal2 = rewriter.create<AffineLoadOp>(
+        loc, firFilterResSymmThresholdUpOpAdaptor.getLhs(), addMap2,
+        ValueRange{iv, iv2});
+    rewriter.create<AffineYieldOp>(loc, ValueRange{loadInputForVal2});
+    // else block
+    rewriter.setInsertionPointToStart(ifOp2.getElseBlock());
+    Value const0ForElse2 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    rewriter.create<AffineYieldOp>(loc, ValueRange{const0ForElse2});
+    rewriter.setInsertionPointAfter(ifOp2);
+
+    // temp = val1 + val2
+    //  sum = sum + h[k] . temp
+
+    Value Val1Plus2 = rewriter.create<arith::AddFOp>(loc, ifOp.getResult(0),
+                                                     ifOp2.getResult(0));
+
+    // load filter and then mult and then sum
+    Value loadFilter = rewriter.create<affine::AffineLoadOp>(
+        loc, firFilterResSymmThresholdUpOpAdaptor.getRhs(), iv2);
+
+    Value filterMulInput =
+        rewriter.create<arith::MulFOp>(loc, Val1Plus2, loadFilter);
+    Value sumNext =
+        rewriter.create<arith::AddFOp>(loc, filterMulInput, getIterArg);
+    rewriter.create<AffineYieldOp>(loc, ValueRange{sumNext});
+    // rewriter.setInsertionPointToEnd(forOp2->getBlock());
+    rewriter.setInsertionPointAfter(forOp2);
+    DEBUG_PRINT_NO_ARGS();
+    // Middle - point
+    // if 0 <= n - (L-1)/2 < M
+    // sum2 = sum + h[L-1/2] . x[n-(L-1)/2)]
+    // y[n] = sum2
+
+    // if 0 <= n - (L-1)/2 < M
+    // AffineExpr ExprLowerBoundVal3 = d0 - s0; //s0 = (L-1)/2
+    // AffineExpr ExprUpperBoundVal3 = d0 - s1; //s1 = M+ (L-1)/2
+    int64_t midFilterLen = (ubForFilter - 1) / 2;
+    AffineExpr ExprLowerBoundVal3 =
+        rewriter.getAffineDimExpr(0) -
+        rewriter.getAffineConstantExpr(midFilterLen);
+    // UpperBound: n - (L-1)/2 <= M - 1 ie, M-1 + mid - n
+    AffineExpr ExprUpperBoundVal3 =
+        rewriter.getAffineConstantExpr(ubForInput + midFilterLen - 1) -
+        rewriter.getAffineDimExpr(0);
+
+    AffineMap addMap3 = AffineMap::get(1, 0, ExprLowerBoundVal3);
+
+    IntegerSet setForIf3 = IntegerSet::get(
+        1, 0, {ExprLowerBoundVal3, ExprUpperBoundVal3}, {false, false});
+
+    auto ifOp3 = rewriter.create<affine::AffineIfOp>(
+        loc, TypeRange{floatType}, setForIf3, ValueRange{iv}, true /*else*/);
+    rewriter.setInsertionPointToStart(ifOp3.getThenBlock());
+
+    // val3 = x[n-(L-1)/2)] else, val3 = 0
+    // load x[n-(L-1)/2)]
+    DEBUG_PRINT_NO_ARGS();
+    Value loadInputForVal3 = rewriter.create<AffineLoadOp>(
+        loc, firFilterResSymmThresholdUpOpAdaptor.getLhs(), addMap3,
+        ValueRange{iv});
+    rewriter.create<AffineYieldOp>(loc, ValueRange{loadInputForVal3});
+    // else block
+    rewriter.setInsertionPointToStart(ifOp3.getElseBlock());
+    Value const0ForElse3 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    rewriter.create<AffineYieldOp>(loc, ValueRange{const0ForElse3});
+    rewriter.setInsertionPointAfter(ifOp3);
+
+    // sum2 = sum + h[L-1/2] . x[n-(L-1)/2)]
+    //  y[n] = sum2
+    // load filter and then mult and then sum
+    Value midFilterLenIndx =
+        rewriter.create<arith::ConstantIndexOp>(loc, midFilterLen);
+
+    Value loadFilterMid = rewriter.create<affine::AffineLoadOp>(
+        loc, firFilterResSymmThresholdUpOpAdaptor.getRhs(), midFilterLenIndx);
+    Value filterMulInput2 =
+        rewriter.create<arith::MulFOp>(loc, ifOp3.getResult(0), loadFilterMid);
+    Value sum2 = rewriter.create<arith::AddFOp>(loc, filterMulInput2,
+                                                forOp2.getResult(0));
+    // rewriter.create<AffineStoreOp>(loc, forOp2.getResult(0) , alloc, iv);
+
+    // Optimize here, compare with threshold, then if returnoriginal then store
+    // same value else 1
+
+    auto thresholdMemRef = firFilterResSymmThresholdUpOpAdaptor.getThreshold();
+    auto returnOriginalMemRef =
+        firFilterResSymmThresholdUpOpAdaptor.getReturnoriginal();
+
+    auto threshold =
+        rewriter.create<AffineLoadOp>(loc, thresholdMemRef, ValueRange{});
+    auto returnOriginal =
+        rewriter.create<AffineLoadOp>(loc, returnOriginalMemRef, ValueRange{});
+    Value constant00 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+
+    Value constant11 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1));
+    // Compare a[i] >= threshold
+    auto cmp1 = rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OGE,
+                                               sum2, threshold);
+    // Compare if return original is true or false and return 1 or original
+    // value
+    auto cmpro = rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OEQ,
+                                                constant11, returnOriginal);
+
+    // Use select to choose between inputX and 1
+    auto selectreturn =
+        rewriter.create<arith::SelectOp>(loc, cmpro, sum2, constant11);
+
+    // Use select to choose between 0 and selectreturn
+    auto selectOp =
+        rewriter.create<arith::SelectOp>(loc, cmp1, selectreturn, constant00);
+
+    // Store the result
+    rewriter.create<AffineStoreOp>(loc, selectOp, alloc, iv);
+
+    // rewriter.create<AffineStoreOp>(loc, sum2, alloc, iv);
+    rewriter.setInsertionPointAfter(forOp1);
+    DEBUG_PRINT_NO_ARGS();
+    // ifOp->dump();
+    rewriter.replaceOp(op, alloc);
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: FFTOp operations
+//===----------------------------------------------------------------------===//
+
+struct FFTOpLowering : public ConversionPattern {
+  FFTOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::FFTOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    auto memrefType = convertTensorToMemRef(tensorType);
+
+    auto alloc_temp_real = insertAllocAndDealloc(memrefType, loc, rewriter);
+    auto alloc_temp_imag = insertAllocAndDealloc(memrefType, loc, rewriter);
+
+    FFTRealOpAdaptor fftRealOpAdaptor(operands);
+
+    auto input = fftRealOpAdaptor.getLhs();
+    auto lb = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    auto ub =
+        rewriter.create<arith::ConstantIndexOp>(loc, tensorType.getShape()[0]);
+    auto step = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+
+    // alloc memory for reversed and dealloc when not required
+    auto alloc_reversed_real = insertAllocAndDealloc(memrefType, loc, rewriter);
+    auto alloc_reversed_imag = insertAllocAndDealloc(memrefType, loc, rewriter);
+
+    // bits needed for bit  reversal
+    auto ubInt =
+        rewriter.create<arith::IndexCastOp>(loc, rewriter.getI64Type(), ub);
+    auto ubFloat =
+        rewriter.create<arith::SIToFPOp>(loc, rewriter.getF64Type(), ubInt);
+    auto bitsNeededFloat = rewriter.create<math::Log2Op>(loc, ubFloat);
+    auto bitsNeededInt = rewriter.create<arith::FPToSIOp>(
+        loc, rewriter.getI64Type(), bitsNeededFloat);
+    auto bitsNeeded = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getIndexType(), bitsNeededInt);
+
+    // bit reversal
+    auto bitReversalLoop = rewriter.create<scf::ForOp>(loc, lb, ub, step);
+    rewriter.setInsertionPointToStart(bitReversalLoop.getBody());
+    auto i = bitReversalLoop.getInductionVar();
+    auto iInt = rewriter.create<arith::IndexCastOp>(loc, rewriter.getI64Type(),
+                                                    i); // check here
+
+    // Calculate reversed index
+    // auto zero = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    auto initialRevIndex = rewriter.create<arith::ConstantIntOp>(loc, 0, 64);
+
+    auto innerLoop = rewriter.create<scf::ForOp>(loc, lb, bitsNeeded, step,
+                                                 ValueRange{initialRevIndex});
+    rewriter.setInsertionPointToStart(innerLoop.getBody());
+    auto j = innerLoop.getInductionVar();
+    auto jInt =
+        rewriter.create<arith::IndexCastOp>(loc, rewriter.getI64Type(), j);
+    auto carriedRevIndex = innerLoop.getRegionIterArgs()[0];
+
+    auto bitMask = rewriter.create<arith::ShLIOp>(
+        loc, rewriter.create<arith::ConstantIntOp>(loc, 1, 64), jInt);
+    auto iAndMask = rewriter.create<arith::AndIOp>(loc, iInt, bitMask);
+    auto isNonZero = rewriter.create<arith::CmpIOp>(
+        loc, arith::CmpIPredicate::ne, iAndMask,
+        rewriter.create<arith::ConstantIntOp>(loc, 0, 64));
+    auto shiftAmount = rewriter.create<arith::SubIOp>(
+        loc, rewriter.create<arith::SubIOp>(loc, bitsNeeded, j),
+        rewriter.create<arith::ConstantIndexOp>(loc, 1));
+    auto shiftAmountI64 = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getI64Type(), shiftAmount);
+    auto bitToSet = rewriter.create<arith::ShLIOp>(
+        loc, rewriter.create<arith::ConstantIntOp>(loc, 1, 64), shiftAmountI64);
+
+    // Update newRevIndex using a select operation
+    auto updatedRevIndex = rewriter.create<arith::OrIOp>(
+        loc, carriedRevIndex,
+        rewriter.create<arith::SelectOp>(
+            loc, isNonZero, bitToSet,
+            rewriter.create<arith::ConstantIntOp>(loc, 0, 64)));
+
+    // Yield the updated value to carry it forward
+    rewriter.create<scf::YieldOp>(loc, ValueRange{updatedRevIndex});
+
+    // auto revIndex = rewriter.create<arith::IndexCastOp>(loc,
+    // rewriter.getIndexType(), newRevIndex);
+
+    rewriter.setInsertionPointAfter(innerLoop);
+
+    auto finalRevIndex = innerLoop.getResult(0);
+    auto revIndex = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getIndexType(), finalRevIndex);
+
+    // Load from alloc_temp and store in alloc_reversed
+    auto realValue = rewriter.create<memref::LoadOp>(loc, input, ValueRange{i});
+    auto imagValue = rewriter.create<arith::ConstantFloatOp>(
+        loc, llvm::APFloat(0.0), rewriter.getF64Type());
+    rewriter.create<memref::StoreOp>(loc, realValue, alloc_reversed_real,
+                                     ValueRange{revIndex});
+    rewriter.create<memref::StoreOp>(loc, imagValue, alloc_reversed_imag,
+                                     ValueRange{revIndex});
+
+    rewriter.setInsertionPointAfter(bitReversalLoop);
+
+    // Cooley-Tukey FFT implementation
+    auto N = tensorType.getShape()[0];
+    auto stages = static_cast<int64_t>(std::log2(N));
+    auto stagesValue = rewriter.create<arith::ConstantIndexOp>(loc, stages);
+
+    // Constants for complex arithmetic
+    auto pi = rewriter.create<arith::ConstantFloatOp>(loc, llvm::APFloat(M_PI),
+                                                      rewriter.getF64Type());
+    auto neg2 = rewriter.create<arith::ConstantFloatOp>(
+        loc, llvm::APFloat(-2.0), rewriter.getF64Type());
+
+    auto fftLoop = rewriter.create<scf::ForOp>(loc, lb, stagesValue, step);
+    rewriter.setInsertionPointToStart(fftLoop.getBody());
+    auto stage = fftLoop.getInductionVar();
+    auto half_size = rewriter.create<arith::ShLIOp>(
+        loc, rewriter.create<arith::ConstantIndexOp>(loc, 1), stage);
+    auto full_size = rewriter.create<arith::ShLIOp>(
+        loc, half_size, rewriter.create<arith::ConstantIndexOp>(loc, 1));
+
+    auto outerLoop = rewriter.create<scf::ForOp>(loc, lb, ub, full_size);
+    rewriter.setInsertionPointToStart(outerLoop.getBody());
+    auto start = outerLoop.getInductionVar();
+
+    auto butterflyLoop = rewriter.create<scf::ForOp>(loc, lb, half_size, step);
+    rewriter.setInsertionPointToStart(butterflyLoop.getBody());
+    auto k = butterflyLoop.getInductionVar();
+
+    // Calculate indices for even and odd elements
+    auto even_index = rewriter.create<arith::AddIOp>(loc, start, k);
+    auto odd_index = rewriter.create<arith::AddIOp>(loc, even_index, half_size);
+
+    // Calculate twiddle factor
+    auto k_i64 =
+        rewriter.create<arith::IndexCastOp>(loc, rewriter.getI64Type(), k);
+    auto k_f64 =
+        rewriter.create<arith::SIToFPOp>(loc, rewriter.getF64Type(), k_i64);
+    auto full_size_i64 = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getI64Type(), full_size);
+    auto full_size_f64 = rewriter.create<arith::SIToFPOp>(
+        loc, rewriter.getF64Type(), full_size_i64);
+    auto angle_div = rewriter.create<arith::DivFOp>(loc, k_f64, full_size_f64);
+    auto angle_mul = rewriter.create<arith::MulFOp>(loc, neg2, angle_div);
+    auto angle_final = rewriter.create<arith::MulFOp>(loc, pi, angle_mul);
+    auto cos = rewriter.create<math::CosOp>(loc, angle_final);
+    auto sin = rewriter.create<math::SinOp>(loc, angle_final);
+
+    // Load odd value
+    auto odd_real = rewriter.create<memref::LoadOp>(loc, alloc_reversed_real,
+                                                    ValueRange{odd_index});
+    auto odd_imag = rewriter.create<memref::LoadOp>(loc, alloc_reversed_imag,
+                                                    ValueRange{odd_index});
+
+    // Multiply by twiddle factor
+    auto odd_real_cos = rewriter.create<arith::MulFOp>(loc, odd_real, cos);
+    auto odd_imag_sin = rewriter.create<arith::MulFOp>(loc, odd_imag, sin);
+    auto t_real =
+        rewriter.create<arith::SubFOp>(loc, odd_real_cos, odd_imag_sin);
+
+    auto odd_real_sin = rewriter.create<arith::MulFOp>(loc, odd_real, sin);
+    auto odd_imag_cos = rewriter.create<arith::MulFOp>(loc, odd_imag, cos);
+    auto t_imag =
+        rewriter.create<arith::AddFOp>(loc, odd_real_sin, odd_imag_cos);
+
+    // Load even value
+    auto even_real = rewriter.create<memref::LoadOp>(loc, alloc_reversed_real,
+                                                     ValueRange{even_index});
+    auto even_imag = rewriter.create<memref::LoadOp>(loc, alloc_reversed_imag,
+                                                     ValueRange{even_index});
+    // Butterfly operation
+    auto new_even_real = rewriter.create<arith::AddFOp>(loc, even_real, t_real);
+    auto new_even_imag = rewriter.create<arith::AddFOp>(loc, even_imag, t_imag);
+    auto new_odd_real = rewriter.create<arith::SubFOp>(loc, even_real, t_real);
+    auto new_odd_imag = rewriter.create<arith::SubFOp>(loc, even_imag, t_imag);
+
+    // Store results
+    rewriter.create<memref::StoreOp>(loc, new_even_real, alloc_reversed_real,
+                                     ValueRange{even_index});
+    rewriter.create<memref::StoreOp>(loc, new_even_imag, alloc_reversed_imag,
+                                     ValueRange{even_index});
+    rewriter.create<memref::StoreOp>(loc, new_odd_real, alloc_reversed_real,
+                                     ValueRange{odd_index});
+    rewriter.create<memref::StoreOp>(loc, new_odd_imag, alloc_reversed_imag,
+                                     ValueRange{odd_index});
+
+    // replace the operation with the final value
+    rewriter.replaceOp(op,
+                       ValueRange{alloc_reversed_real, alloc_reversed_imag});
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: FFTAbsOp operations
+//===----------------------------------------------------------------------===//
+
+struct FFTAbsOpLowering : public ConversionPattern {
+  FFTAbsOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::FFTAbsOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    auto memrefType = convertTensorToMemRef(tensorType);
+
+    auto alloc_temp_real = insertAllocAndDealloc(memrefType, loc, rewriter);
+    auto alloc_temp_imag = insertAllocAndDealloc(memrefType, loc, rewriter);
+
+    FFTAbsOpAdaptor fftAbsOpAdaptor(operands);
+
+    auto input = fftAbsOpAdaptor.getInput();
+    auto lb = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    auto ub =
+        rewriter.create<arith::ConstantIndexOp>(loc, tensorType.getShape()[0]);
+    auto step = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+
+    // alloc memory for reversed and dealloc when not required
+    auto alloc_reversed_real = insertAllocAndDealloc(memrefType, loc, rewriter);
+    auto alloc_reversed_imag = insertAllocAndDealloc(memrefType, loc, rewriter);
+    auto alloc_amplitude = insertAllocAndDealloc(memrefType, loc, rewriter);
+
+    // bits needed for bit  reversal
+    auto ubInt =
+        rewriter.create<arith::IndexCastOp>(loc, rewriter.getI64Type(), ub);
+    auto ubFloat =
+        rewriter.create<arith::SIToFPOp>(loc, rewriter.getF64Type(), ubInt);
+    auto bitsNeededFloat = rewriter.create<math::Log2Op>(loc, ubFloat);
+    auto bitsNeededInt = rewriter.create<arith::FPToSIOp>(
+        loc, rewriter.getI64Type(), bitsNeededFloat);
+    auto bitsNeeded = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getIndexType(), bitsNeededInt);
+
+    // bit reversal
+    auto bitReversalLoop = rewriter.create<scf::ForOp>(loc, lb, ub, step);
+    rewriter.setInsertionPointToStart(bitReversalLoop.getBody());
+    auto i = bitReversalLoop.getInductionVar();
+    auto iInt = rewriter.create<arith::IndexCastOp>(loc, rewriter.getI64Type(),
+                                                    i); // check here
+
+    // Calculate reversed index
+    // auto zero = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    auto initialRevIndex = rewriter.create<arith::ConstantIntOp>(loc, 0, 64);
+
+    auto innerLoop = rewriter.create<scf::ForOp>(loc, lb, bitsNeeded, step,
+                                                 ValueRange{initialRevIndex});
+    rewriter.setInsertionPointToStart(innerLoop.getBody());
+    auto j = innerLoop.getInductionVar();
+    auto jInt =
+        rewriter.create<arith::IndexCastOp>(loc, rewriter.getI64Type(), j);
+    auto carriedRevIndex = innerLoop.getRegionIterArgs()[0];
+
+    auto bitMask = rewriter.create<arith::ShLIOp>(
+        loc, rewriter.create<arith::ConstantIntOp>(loc, 1, 64), jInt);
+    auto iAndMask = rewriter.create<arith::AndIOp>(loc, iInt, bitMask);
+    auto isNonZero = rewriter.create<arith::CmpIOp>(
+        loc, arith::CmpIPredicate::ne, iAndMask,
+        rewriter.create<arith::ConstantIntOp>(loc, 0, 64));
+    auto shiftAmount = rewriter.create<arith::SubIOp>(
+        loc, rewriter.create<arith::SubIOp>(loc, bitsNeeded, j),
+        rewriter.create<arith::ConstantIndexOp>(loc, 1));
+    auto shiftAmountI64 = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getI64Type(), shiftAmount);
+    auto bitToSet = rewriter.create<arith::ShLIOp>(
+        loc, rewriter.create<arith::ConstantIntOp>(loc, 1, 64), shiftAmountI64);
+
+    // Update newRevIndex using a select operation
+    auto updatedRevIndex = rewriter.create<arith::OrIOp>(
+        loc, carriedRevIndex,
+        rewriter.create<arith::SelectOp>(
+            loc, isNonZero, bitToSet,
+            rewriter.create<arith::ConstantIntOp>(loc, 0, 64)));
+
+    // Yield the updated value to carry it forward
+    rewriter.create<scf::YieldOp>(loc, ValueRange{updatedRevIndex});
+
+    // auto revIndex = rewriter.create<arith::IndexCastOp>(loc,
+    // rewriter.getIndexType(), newRevIndex);
+
+    rewriter.setInsertionPointAfter(innerLoop);
+
+    auto finalRevIndex = innerLoop.getResult(0);
+    auto revIndex = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getIndexType(), finalRevIndex);
+
+    // Load from alloc_temp and store in alloc_reversed
+    auto realValue = rewriter.create<memref::LoadOp>(loc, input, ValueRange{i});
+    auto imagValue = rewriter.create<arith::ConstantFloatOp>(
+        loc, llvm::APFloat(0.0), rewriter.getF64Type());
+    rewriter.create<memref::StoreOp>(loc, realValue, alloc_reversed_real,
+                                     ValueRange{revIndex});
+    rewriter.create<memref::StoreOp>(loc, imagValue, alloc_reversed_imag,
+                                     ValueRange{revIndex});
+
+    rewriter.setInsertionPointAfter(bitReversalLoop);
+
+    // Cooley-Tukey FFT implementation
+    auto N = tensorType.getShape()[0];
+    auto stages = static_cast<int64_t>(std::log2(N));
+    auto stagesValue = rewriter.create<arith::ConstantIndexOp>(loc, stages);
+
+    // Constants for complex arithmetic
+    auto pi = rewriter.create<arith::ConstantFloatOp>(loc, llvm::APFloat(M_PI),
+                                                      rewriter.getF64Type());
+    auto neg2 = rewriter.create<arith::ConstantFloatOp>(
+        loc, llvm::APFloat(-2.0), rewriter.getF64Type());
+
+    auto fftLoop = rewriter.create<scf::ForOp>(loc, lb, stagesValue, step);
+    rewriter.setInsertionPointToStart(fftLoop.getBody());
+    auto stage = fftLoop.getInductionVar();
+    auto half_size = rewriter.create<arith::ShLIOp>(
+        loc, rewriter.create<arith::ConstantIndexOp>(loc, 1), stage);
+    auto full_size = rewriter.create<arith::ShLIOp>(
+        loc, half_size, rewriter.create<arith::ConstantIndexOp>(loc, 1));
+
+    auto outerLoop = rewriter.create<scf::ForOp>(loc, lb, ub, full_size);
+    rewriter.setInsertionPointToStart(outerLoop.getBody());
+    auto start = outerLoop.getInductionVar();
+
+    auto butterflyLoop = rewriter.create<scf::ForOp>(loc, lb, half_size, step);
+    rewriter.setInsertionPointToStart(butterflyLoop.getBody());
+    auto k = butterflyLoop.getInductionVar();
+
+    // Calculate indices for even and odd elements
+    auto even_index = rewriter.create<arith::AddIOp>(loc, start, k);
+    auto odd_index = rewriter.create<arith::AddIOp>(loc, even_index, half_size);
+
+    // Calculate twiddle factor
+    auto k_i64 =
+        rewriter.create<arith::IndexCastOp>(loc, rewriter.getI64Type(), k);
+    auto k_f64 =
+        rewriter.create<arith::SIToFPOp>(loc, rewriter.getF64Type(), k_i64);
+    auto full_size_i64 = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getI64Type(), full_size);
+    auto full_size_f64 = rewriter.create<arith::SIToFPOp>(
+        loc, rewriter.getF64Type(), full_size_i64);
+    auto angle_div = rewriter.create<arith::DivFOp>(loc, k_f64, full_size_f64);
+    auto angle_mul = rewriter.create<arith::MulFOp>(loc, neg2, angle_div);
+    auto angle_final = rewriter.create<arith::MulFOp>(loc, pi, angle_mul);
+    auto cos = rewriter.create<math::CosOp>(loc, angle_final);
+    auto sin = rewriter.create<math::SinOp>(loc, angle_final);
+
+    // Load odd value
+    auto odd_real = rewriter.create<memref::LoadOp>(loc, alloc_reversed_real,
+                                                    ValueRange{odd_index});
+    auto odd_imag = rewriter.create<memref::LoadOp>(loc, alloc_reversed_imag,
+                                                    ValueRange{odd_index});
+
+    // Multiply by twiddle factor
+    auto odd_real_cos = rewriter.create<arith::MulFOp>(loc, odd_real, cos);
+    auto odd_imag_sin = rewriter.create<arith::MulFOp>(loc, odd_imag, sin);
+    auto t_real =
+        rewriter.create<arith::SubFOp>(loc, odd_real_cos, odd_imag_sin);
+
+    auto odd_real_sin = rewriter.create<arith::MulFOp>(loc, odd_real, sin);
+    auto odd_imag_cos = rewriter.create<arith::MulFOp>(loc, odd_imag, cos);
+    auto t_imag =
+        rewriter.create<arith::AddFOp>(loc, odd_real_sin, odd_imag_cos);
+
+    // Load even value
+    auto even_real = rewriter.create<memref::LoadOp>(loc, alloc_reversed_real,
+                                                     ValueRange{even_index});
+    auto even_imag = rewriter.create<memref::LoadOp>(loc, alloc_reversed_imag,
+                                                     ValueRange{even_index});
+    // Butterfly operation
+    auto new_even_real = rewriter.create<arith::AddFOp>(loc, even_real, t_real);
+    auto new_even_imag = rewriter.create<arith::AddFOp>(loc, even_imag, t_imag);
+    auto new_odd_real = rewriter.create<arith::SubFOp>(loc, even_real, t_real);
+    auto new_odd_imag = rewriter.create<arith::SubFOp>(loc, even_imag, t_imag);
+
+    // Calculate amplitude for even index
+    auto new_even_real_squared =
+        rewriter.create<arith::MulFOp>(loc, new_even_real, new_even_real);
+    auto new_even_imag_squared =
+        rewriter.create<arith::MulFOp>(loc, new_even_imag, new_even_imag);
+    auto sum_even = rewriter.create<arith::AddFOp>(loc, new_even_real_squared,
+                                                   new_even_imag_squared);
+    auto sqrt_even = rewriter.create<math::SqrtOp>(loc, sum_even);
+
+    // Calculate amplitude for odd index
+    auto new_odd_real_squared =
+        rewriter.create<arith::MulFOp>(loc, new_odd_real, new_odd_real);
+    auto new_odd_imag_squared =
+        rewriter.create<arith::MulFOp>(loc, new_odd_imag, new_odd_imag);
+    auto sum_odd = rewriter.create<arith::AddFOp>(loc, new_odd_real_squared,
+                                                  new_odd_imag_squared);
+    auto sqrt_odd = rewriter.create<math::SqrtOp>(loc, sum_odd);
+
+    // Store results
+    rewriter.create<memref::StoreOp>(loc, new_even_real, alloc_reversed_real,
+                                     ValueRange{even_index});
+    rewriter.create<memref::StoreOp>(loc, new_even_imag, alloc_reversed_imag,
+                                     ValueRange{even_index});
+    rewriter.create<memref::StoreOp>(loc, new_odd_real, alloc_reversed_real,
+                                     ValueRange{odd_index});
+    rewriter.create<memref::StoreOp>(loc, new_odd_imag, alloc_reversed_imag,
+                                     ValueRange{odd_index});
+    rewriter.create<memref::StoreOp>(loc, sqrt_even, alloc_amplitude,
+                                     ValueRange{even_index});
+    rewriter.create<memref::StoreOp>(loc, sqrt_odd, alloc_amplitude,
+                                     ValueRange{odd_index});
+
+    // replace the operation with the final value
+    rewriter.replaceOp(op, alloc_amplitude);
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: DFTAbsOp operations
+//===----------------------------------------------------------------------===//
+
+struct DFTAbsOpLowering : public ConversionPattern {
+  DFTAbsOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::DFTAbsOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+
+    auto loc = op->getLoc();
+
+    // Pseudo-code:
+    //   y[k] = y_real[k] + j *y_img[k]
+    //  y_real = sumOver_n(x[n]*cos[2*pi * k *n/N ]
+    //  y_img = sumOver_n(x[n]*sin[2*pi * k *n/N ] * -1
+    // init  output mem for y_real & y_img as 0
+    // iterate for output from k=0 to last
+    // iterate for all x from n=0 to last
+    // perform the calculations : ie x[n] * cos[2*pi * k *n/N ] and sum and
+    // store them at y[k]
+    //
+    // replace this upsampling op with the output_mem_allocation op
+
+    // DEBUG_PRINT_NO_ARGS() ;
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    // iterate to result1 --not needed for now but for future reference
+    //  auto tensorType1 =
+    //  llvm::cast<RankedTensorType>(*std::next(op->result_type_begin(), 1));
+
+    // DEBUG_PRINT_NO_ARGS() ;
+    // tensorType.getShape()[0]
+    // llvm::errs() << "tensorType1.getShape()[0] " << tensorType1.getShape()[0]
+    // << " func= " << __func__ << "\n";
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    // auto memRefType2 = convertTensorToMemRef(tensorType1);
+    auto alloc_real = insertAllocAndDealloc(memRefType, loc, rewriter);
+    auto alloc_img = insertAllocAndDealloc(memRefType, loc, rewriter);
+    auto alloc_mag = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
+
+    // affine.for %y = 0 to 4 {
+    //     affine.store %cst_3, %alloc_real[%y] : memref<4xf64>
+    //     affine.store %cst_3, %alloc_img[%y] : memref<4xf64>
+    // }
+    Value constant0 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+
+    // For loop -- iterate from 1 to last
+    int64_t lb = 0;
+    int64_t ub = tensorType.getShape()[0];
+    int64_t step = 1;
+
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto iv = forOp1.getInductionVar();
+    rewriter.setInsertionPointToStart(forOp1.getBody());
+    rewriter.create<AffineStoreOp>(loc, constant0, alloc_real, ValueRange{iv});
+    rewriter.create<AffineStoreOp>(loc, constant0, alloc_img, ValueRange{iv});
+    rewriter.create<AffineStoreOp>(loc, constant0, alloc_mag, ValueRange{iv});
+    rewriter.setInsertionPointAfter(forOp1);
+
+    // loop for Y
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto ivY = forOpY.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpY.getBody());
+
+    // loop for X
+    affine::AffineForOp forOpX =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto ivX = forOpX.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpX.getBody());
+
+    // load from X, & y1 & y2
+    DFTAbsOpAdaptor fft1DAdaptor(operands);
+    Value inputX = rewriter.create<AffineLoadOp>(loc, fft1DAdaptor.getInput(),
+                                                 ValueRange{ivX});
+    Value loadYReal =
+        rewriter.create<AffineLoadOp>(loc, alloc_real, ValueRange{ivY});
+    Value loadYImg =
+        rewriter.create<AffineLoadOp>(loc, alloc_img, ValueRange{ivY});
+
+    // convert index to f64
+    Value IndxY = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivY);
+    Value k =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxY);
+
+    Value IndxX = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivX);
+    Value i =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxX);
+
+    // get 2*pi * k * i / N
+    Value muli_k = rewriter.create<arith::MulFOp>(loc, k, i);
+
+    Value const2pi = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(6.28318530718));
+    Value mul2piKI = rewriter.create<arith::MulFOp>(loc, const2pi, muli_k);
+
+    // getOperand().getType()
+    // auto inputTensorType =
+    // llvm::cast<RankedTensorType>(op->getOperand(0).getType());
+    float LengthOfInput = (float)ub;
+    Value N = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(LengthOfInput));
+    // Value N = inputTensorType.getShape()[0];
+
+    Value divIndxByN = rewriter.create<arith::DivFOp>(loc, mul2piKI, N);
+
+    // Real part = Sum(x[i] * cos(div) )
+    Value GetCos = rewriter.create<math::CosOp>(loc, divIndxByN);
+    Value xMulCos = rewriter.create<arith::MulFOp>(loc, inputX, GetCos);
+    Value realSum = rewriter.create<arith::AddFOp>(loc, loadYReal, xMulCos);
+    rewriter.create<AffineStoreOp>(loc, realSum, alloc_real, ValueRange{ivY});
+
+    // Img part = -1 * Sum(x[i] * sin(div) )
+    Value GetSin = rewriter.create<math::SinOp>(loc, divIndxByN);
+    Value xMulSin = rewriter.create<arith::MulFOp>(loc, inputX, GetSin);
+    Value imgSum = rewriter.create<arith::SubFOp>(loc, loadYImg, xMulSin);
+
+    rewriter.create<AffineStoreOp>(loc, imgSum, alloc_img, ValueRange{ivY});
+    rewriter.setInsertionPointAfter(forOpX);
+    Value final_real =
+        rewriter.create<AffineLoadOp>(loc, alloc_real, ValueRange{ivY});
+    Value final_img =
+        rewriter.create<AffineLoadOp>(loc, alloc_img, ValueRange{ivY});
+
+    // Calculate amplitude
+    auto real_squared =
+        rewriter.create<arith::MulFOp>(loc, final_real, final_real);
+    auto img_squared =
+        rewriter.create<arith::MulFOp>(loc, final_img, final_img);
+    auto sum_odd =
+        rewriter.create<arith::AddFOp>(loc, real_squared, img_squared);
+    auto amplitude = rewriter.create<math::SqrtOp>(loc, sum_odd);
+
+    // replace the operation with the final value
+    rewriter.create<AffineStoreOp>(loc, amplitude, alloc_mag, ValueRange{ivY});
+    rewriter.setInsertionPointAfter(forOpY);
+    rewriter.replaceOp(op, alloc_mag);
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: DFTAbsThresholdUpOp operations
+//===----------------------------------------------------------------------===//
+
+struct DFTAbsThresholdUpOpLowering : public ConversionPattern {
+  DFTAbsThresholdUpOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::DFTAbsThresholdUpOp::getOperationName(), 1,
+                          ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+
+    auto loc = op->getLoc();
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    // auto memRefType2 = convertTensorToMemRef(tensorType1);
+    auto alloc_real = insertAllocAndDealloc(memRefType, loc, rewriter);
+    auto alloc_img = insertAllocAndDealloc(memRefType, loc, rewriter);
+    auto alloc_mag = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
+
+    Value constant0 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+
+    // For loop -- iterate from 1 to last
+    int64_t lb = 0;
+    int64_t ub = tensorType.getShape()[0];
+    int64_t step = 1;
+
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto iv = forOp1.getInductionVar();
+    rewriter.setInsertionPointToStart(forOp1.getBody());
+    rewriter.create<AffineStoreOp>(loc, constant0, alloc_real, ValueRange{iv});
+    rewriter.create<AffineStoreOp>(loc, constant0, alloc_img, ValueRange{iv});
+    rewriter.create<AffineStoreOp>(loc, constant0, alloc_mag, ValueRange{iv});
+    rewriter.setInsertionPointAfter(forOp1);
+
+    // loop for Y
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto ivY = forOpY.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpY.getBody());
+
+    // loop for X
+    affine::AffineForOp forOpX =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto ivX = forOpX.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpX.getBody());
+
+    // load from X, & y1 & y2
+    DFTAbsThresholdUpOpAdaptor dftAbsThresholdUpOp(operands);
+    Value inputX = rewriter.create<AffineLoadOp>(
+        loc, dftAbsThresholdUpOp.getInput(), ValueRange{ivX});
+    Value loadYReal =
+        rewriter.create<AffineLoadOp>(loc, alloc_real, ValueRange{ivY});
+    Value loadYImg =
+        rewriter.create<AffineLoadOp>(loc, alloc_img, ValueRange{ivY});
+
+    // convert index to f64
+    Value IndxY = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivY);
+    Value k =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxY);
+
+    Value IndxX = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivX);
+    Value i =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxX);
+
+    // get 2*pi * k * i / N
+    Value muli_k = rewriter.create<arith::MulFOp>(loc, k, i);
+
+    Value const2pi = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(6.28318530718));
+    Value mul2piKI = rewriter.create<arith::MulFOp>(loc, const2pi, muli_k);
+
+    // getOperand().getType()
+    // auto inputTensorType =
+    // llvm::cast<RankedTensorType>(op->getOperand(0).getType());
+    float LengthOfInput = (float)ub;
+    Value N = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(LengthOfInput));
+    // Value N = inputTensorType.getShape()[0];
+
+    Value divIndxByN = rewriter.create<arith::DivFOp>(loc, mul2piKI, N);
+
+    // Real part = Sum(x[i] * cos(div) )
+    Value GetCos = rewriter.create<math::CosOp>(loc, divIndxByN);
+    Value xMulCos = rewriter.create<arith::MulFOp>(loc, inputX, GetCos);
+    Value realSum = rewriter.create<arith::AddFOp>(loc, loadYReal, xMulCos);
+    rewriter.create<AffineStoreOp>(loc, realSum, alloc_real, ValueRange{ivY});
+
+    // Img part = -1 * Sum(x[i] * sin(div) )
+    Value GetSin = rewriter.create<math::SinOp>(loc, divIndxByN);
+    Value xMulSin = rewriter.create<arith::MulFOp>(loc, inputX, GetSin);
+    Value imgSum = rewriter.create<arith::SubFOp>(loc, loadYImg, xMulSin);
+
+    rewriter.create<AffineStoreOp>(loc, imgSum, alloc_img, ValueRange{ivY});
+    rewriter.setInsertionPointAfter(forOpX);
+    Value final_real =
+        rewriter.create<AffineLoadOp>(loc, alloc_real, ValueRange{ivY});
+    Value final_img =
+        rewriter.create<AffineLoadOp>(loc, alloc_img, ValueRange{ivY});
+
+    // Calculate amplitude
+    auto real_squared =
+        rewriter.create<arith::MulFOp>(loc, final_real, final_real);
+    auto img_squared =
+        rewriter.create<arith::MulFOp>(loc, final_img, final_img);
+    auto sum_odd =
+        rewriter.create<arith::AddFOp>(loc, real_squared, img_squared);
+    auto amplitude = rewriter.create<math::SqrtOp>(loc, sum_odd);
+
+    auto thresholdMemRef = dftAbsThresholdUpOp.getThreshold();
+    auto returnOriginalMemRef = dftAbsThresholdUpOp.getReturnoriginal();
+
+    auto threshold =
+        rewriter.create<AffineLoadOp>(loc, thresholdMemRef, ValueRange{});
+    auto returnOriginal =
+        rewriter.create<AffineLoadOp>(loc, returnOriginalMemRef, ValueRange{});
+    Value constant00 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+
+    Value constant11 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1));
+    // Compare a[i] >= threshold
+    auto cmp1 = rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OGE,
+                                               amplitude, threshold);
+    // Compare if return original is true or false and return 1 or original
+    // value
+    auto cmpro = rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OEQ,
+                                                constant11, returnOriginal);
+
+    // Use select to choose between inputX and 1
+    auto selectreturn =
+        rewriter.create<arith::SelectOp>(loc, cmpro, amplitude, constant11);
+
+    // Use select to choose between 0 and selectreturn
+    auto selectOp =
+        rewriter.create<arith::SelectOp>(loc, cmp1, selectreturn, constant00);
+
+    // replace the operation with the final value
+    rewriter.create<AffineStoreOp>(loc, selectOp, alloc_mag, ValueRange{ivY});
+    rewriter.setInsertionPointAfter(forOpY);
+    rewriter.replaceOp(op, alloc_mag);
+    return success();
+  }
+};
+
+
+struct CorrelateOpLowering : public ConversionPattern {
+  CorrelateOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::CorrelateOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+
+    auto loc = op->getLoc();
+
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc_output = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    typename dsp::CorrelateOp::Adaptor correlateOpAdaptor(operands);
+
+    Value cst_idx_zero = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    Value cst_idx_one = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+
+    // ranked tensor type
+    auto inputType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(0).getType());
+
+    ArrayRef<int64_t> inputShape = inputType.getShape();
+
+    int64_t N = inputShape[0];
+	
+	// First outer loop for k in range (0, N)
+    auto lb1 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+	auto ub1 = rewriter.create<arith::ConstantIndexOp>(loc, N);
+    auto step = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+	
+    Value constant_N_minus_one = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getIndexType(), rewriter.getIndexAttr(N-1));
+	
+    auto floatMemRefType = MemRefType::get({}, rewriter.getF64Type());
+    auto alloc_iter_sum =
+        insertAllocAndDealloc(floatMemRefType, loc, rewriter);
+		
+    Value constant_zero = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+	
+    auto forOp1 = rewriter.create<scf::ForOp>(loc, lb1, ub1, step);	
+    auto k1 = forOp1.getInductionVar();
+    rewriter.setInsertionPointToStart(forOp1.getBody());
+	
+	rewriter.create<memref::StoreOp>(loc, constant_zero, alloc_iter_sum, ValueRange{});
+	
+    Value lb1_inner = rewriter.create<arith::SubIOp>(loc, constant_N_minus_one, k1);
+        
+	auto forOp1_1 = rewriter.create<scf::ForOp>(loc, lb1_inner, ub1, step);	
+    auto iy1 = forOp1_1.getInductionVar();
+    rewriter.setInsertionPointToStart(forOp1_1.getBody());
+	
+	Value ix1 = rewriter.create<arith::SubIOp>(loc, iy1, lb1_inner);
+	Value loadedLhs = rewriter.create<memref::LoadOp>(loc,
+							correlateOpAdaptor.getLhs(), ValueRange{ix1});
+	Value loadedRhs = rewriter.create<memref::LoadOp>(loc,
+							correlateOpAdaptor.getRhs(), ValueRange{iy1});
+	Value mul1 = rewriter.create<arith::MulFOp>(loc, loadedLhs, loadedRhs);
+	
+	Value loaded_sum1 = rewriter.create<memref::LoadOp>(loc,
+							alloc_iter_sum, ValueRange{});
+							
+	Value inter_sum1 = rewriter.create<arith::AddFOp>(loc, loaded_sum1, mul1);
+	
+	rewriter.create<memref::StoreOp>(loc, inter_sum1, alloc_iter_sum, ValueRange{});
+
+	rewriter.setInsertionPointAfter(forOp1_1);
+	
+	auto loaded_sum1_outer = rewriter.create<memref::LoadOp>(loc,
+							alloc_iter_sum, ValueRange{});
+	rewriter.create<memref::StoreOp>(loc, loaded_sum1_outer, alloc_output, ValueRange{k1});							
+	
+	rewriter.setInsertionPointAfter(forOp1);
+
+	// Second outer loop for k in range (N, 2*N-1)
+	auto ub2 = rewriter.create<arith::ConstantIndexOp>(loc, 2*N-1);
+
+    //lb2 = ub1	
+    auto forOp2 = rewriter.create<scf::ForOp>(loc, ub1, ub2, step);	
+    auto k2 = forOp2.getInductionVar();
+    rewriter.setInsertionPointToStart(forOp2.getBody());
+	
+	rewriter.create<memref::StoreOp>(loc, constant_zero, alloc_iter_sum, ValueRange{});
+	
+    Value lb2_inner = rewriter.create<arith::SubIOp>(loc, k2, constant_N_minus_one);
+        
+	//NOTE: ub = ub1 (N)
+	auto forOp2_1 = rewriter.create<scf::ForOp>(loc, lb2_inner, ub1, step);	
+    auto ix2 = forOp2_1.getInductionVar();
+    rewriter.setInsertionPointToStart(forOp2_1.getBody());
+	
+	Value iy2 = rewriter.create<arith::SubIOp>(loc, ix2, lb2_inner);
+	Value loadedLhs2 = rewriter.create<memref::LoadOp>(loc,
+							correlateOpAdaptor.getLhs(), ValueRange{ix2});
+	Value loadedRhs2 = rewriter.create<memref::LoadOp>(loc,
+							correlateOpAdaptor.getRhs(), ValueRange{iy2});
+	Value mul2 = rewriter.create<arith::MulFOp>(loc, loadedLhs2, loadedRhs2);
+	
+	Value loaded_sum2 = rewriter.create<memref::LoadOp>(loc,
+							alloc_iter_sum, ValueRange{});
+							
+	Value inter_sum2 = rewriter.create<arith::AddFOp>(loc, loaded_sum2, mul2);
+	
+	rewriter.create<memref::StoreOp>(loc, inter_sum2, alloc_iter_sum, ValueRange{});
+
+	rewriter.setInsertionPointAfter(forOp2_1);
+	
+	auto loaded_sum2_outer = rewriter.create<memref::LoadOp>(loc,
+							alloc_iter_sum, ValueRange{});
+	rewriter.create<memref::StoreOp>(loc, loaded_sum2_outer, alloc_output, ValueRange{k2});
+	
+	rewriter.setInsertionPointAfter(forOp2);
+
+
+    rewriter.replaceOp(op, alloc_output);
+
+    return success();
+  }
+};
+
+
+struct SetSingleElemAtIdxOpLowering : public ConversionPattern {
+  SetSingleElemAtIdxOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::SetSingleElemAtIdxOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // output for result type
+    SetSingleElemAtIdxOpAdaptor setSingleElemAtIdxAdaptor(operands);
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+
+    auto indxArgType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(1).getType());
+
+    int indxArgShape = indxArgType.getShape().size();
+
+    ValueRange indexValueRange;
+
+    Value cst_idx_zero = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+
+    if (indxArgShape == 0)
+      indexValueRange = ValueRange{};
+    else 
+      indexValueRange = ValueRange{cst_idx_zero};
+
+    Value loadedIndx = rewriter.create<AffineLoadOp>(
+        loc, setSingleElemAtIdxAdaptor.getIndx(), indexValueRange);
+		
+    // f64 to index
+    Value indx_ui = rewriter.create<arith::FPToUIOp>(
+        loc, rewriter.getIntegerType(32), loadedIndx);
+    Value indx_index = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getIndexType(), indx_ui);
+
+    ValueRange valValueRange;
+
+    if (indxArgShape == 0)
+      valValueRange = ValueRange{};
+    else
+      valValueRange = ValueRange{cst_idx_zero};
+
+    Value loadedVal = rewriter.create<AffineLoadOp>(
+        loc, setSingleElemAtIdxAdaptor.getVal(), valValueRange);
+
+    rewriter.create<AffineStoreOp>(loc, loadedVal,
+                                   setSingleElemAtIdxAdaptor.getInput(),
+                                   ValueRange{indx_index});
+
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+
+
+struct Correl2MaxOptimizedOpLowering : public ConversionPattern {
+  Correl2MaxOptimizedOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::Correl2MaxOptimizedOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+
+    auto loc = op->getLoc();
+
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc_output = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    typename dsp::Correl2MaxOptimizedOp::Adaptor correl2MaxOpAdaptor(operands);
+
+    Value cst_idx_zero = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    Value cst_idx_one = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+
+    // ranked tensor type
+    auto inputType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(0).getType());
+
+    ArrayRef<int64_t> inputShape = inputType.getShape();
+
+    int64_t N = inputShape[0];
+	
+	// First outer loop for k in range (0, N)
+    auto lb1 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+	auto ub1 = rewriter.create<arith::ConstantIndexOp>(loc, N);
+    auto step = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+	
+    Value constant_N_minus_one = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getIndexType(), rewriter.getIndexAttr(N-1));
+	
+    auto floatMemRefType = MemRefType::get({}, rewriter.getF64Type());
+    auto alloc_iter_sum =
+        insertAllocAndDealloc(floatMemRefType, loc, rewriter);
+		
+    Value constant_zero = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+		
+	rewriter.create<memref::StoreOp>(loc, constant_zero, alloc_output, ValueRange{});							
+	
+    auto forOp1 = rewriter.create<scf::ForOp>(loc, lb1, ub1, step);	
+    auto k1 = forOp1.getInductionVar();
+    rewriter.setInsertionPointToStart(forOp1.getBody());
+	
+	rewriter.create<memref::StoreOp>(loc, constant_zero, alloc_iter_sum, ValueRange{});
+	
+    Value lb1_inner = rewriter.create<arith::SubIOp>(loc, constant_N_minus_one, k1);
+        
+	auto forOp1_1 = rewriter.create<scf::ForOp>(loc, lb1_inner, ub1, step);	
+    auto iy1 = forOp1_1.getInductionVar();
+    rewriter.setInsertionPointToStart(forOp1_1.getBody());
+	
+	Value ix1 = rewriter.create<arith::SubIOp>(loc, iy1, lb1_inner);
+	Value loadedLhs = rewriter.create<memref::LoadOp>(loc,
+							correl2MaxOpAdaptor.getLhs(), ValueRange{ix1});
+	Value loadedRhs = rewriter.create<memref::LoadOp>(loc,
+							correl2MaxOpAdaptor.getRhs(), ValueRange{iy1});
+	Value mul1 = rewriter.create<arith::MulFOp>(loc, loadedLhs, loadedRhs);
+	
+	Value loaded_sum1 = rewriter.create<memref::LoadOp>(loc,
+							alloc_iter_sum, ValueRange{});
+							
+	Value inter_sum1 = rewriter.create<arith::AddFOp>(loc, loaded_sum1, mul1);
+	
+	rewriter.create<memref::StoreOp>(loc, inter_sum1, alloc_iter_sum, ValueRange{});
+
+	rewriter.setInsertionPointAfter(forOp1_1);
+	
+	auto loaded_sum1_outer = rewriter.create<memref::LoadOp>(loc,
+							alloc_iter_sum, ValueRange{});
+	auto loaded_output1 = rewriter.create<memref::LoadOp>(loc,
+							alloc_output, ValueRange{});
+
+	// If this is larger than current max, we need to change max
+    auto compare_sum1_output1 = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OGT, loaded_sum1_outer, loaded_output1);
+
+    auto ifOp1 = rewriter.create<scf::IfOp>(loc, compare_sum1_output1, false);
+
+    rewriter.setInsertionPointToStart(ifOp1.thenBlock());
+	
+	rewriter.create<memref::StoreOp>(loc, loaded_sum1_outer, alloc_output, ValueRange{});
+	
+	rewriter.setInsertionPointAfter(forOp1);
+
+	// Second outer loop for k in range (N, 2*N-1)
+	auto ub2 = rewriter.create<arith::ConstantIndexOp>(loc, 2*N-1);
+
+    //lb2 = ub1	
+    auto forOp2 = rewriter.create<scf::ForOp>(loc, ub1, ub2, step);	
+    auto k2 = forOp2.getInductionVar();
+    rewriter.setInsertionPointToStart(forOp2.getBody());
+	
+	rewriter.create<memref::StoreOp>(loc, constant_zero, alloc_iter_sum, ValueRange{});
+	
+    Value lb2_inner = rewriter.create<arith::SubIOp>(loc, k2, constant_N_minus_one);
+        
+	//NOTE: ub = ub1 (N)
+	auto forOp2_1 = rewriter.create<scf::ForOp>(loc, lb2_inner, ub1, step);	
+    auto ix2 = forOp2_1.getInductionVar();
+    rewriter.setInsertionPointToStart(forOp2_1.getBody());
+	
+	Value iy2 = rewriter.create<arith::SubIOp>(loc, ix2, lb2_inner);
+	Value loadedLhs2 = rewriter.create<memref::LoadOp>(loc,
+							correl2MaxOpAdaptor.getLhs(), ValueRange{ix2});
+	Value loadedRhs2 = rewriter.create<memref::LoadOp>(loc,
+							correl2MaxOpAdaptor.getRhs(), ValueRange{iy2});
+	Value mul2 = rewriter.create<arith::MulFOp>(loc, loadedLhs2, loadedRhs2);
+	
+	Value loaded_sum2 = rewriter.create<memref::LoadOp>(loc,
+							alloc_iter_sum, ValueRange{});
+							
+	Value inter_sum2 = rewriter.create<arith::AddFOp>(loc, loaded_sum2, mul2);
+	
+	rewriter.create<memref::StoreOp>(loc, inter_sum2, alloc_iter_sum, ValueRange{});
+
+	rewriter.setInsertionPointAfter(forOp2_1);
+	
+	auto loaded_sum2_outer = rewriter.create<memref::LoadOp>(loc,
+							alloc_iter_sum, ValueRange{});
+	auto loaded_output2 = rewriter.create<memref::LoadOp>(loc,
+							alloc_output, ValueRange{});
+
+	// If this is larger than current max, we need to change max
+    auto compare_sum2_output2 = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OGT, loaded_sum2_outer, loaded_output2);
+
+    auto ifOp2 = rewriter.create<scf::IfOp>(loc, compare_sum2_output2, false);
+
+    rewriter.setInsertionPointToStart(ifOp2.thenBlock());
+	
+	rewriter.create<memref::StoreOp>(loc, loaded_sum2_outer, alloc_output, ValueRange{});							
+
+	
+	rewriter.setInsertionPointAfter(forOp2);
+
+
+    rewriter.replaceOp(op, alloc_output);
+
+    return success();
+  }
+};
+
+
+
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: lmsFilterResponse operations
+//===----------------------------------------------------------------------===//
+
+struct LMSFilterResponse2GainOpLowering : public ConversionPattern {
+  LMSFilterResponse2GainOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::LMSFilterResponse2GainOp::getOperationName(), 1,
+                          ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // Pseudo-code:
+    //  for (int n = 0; n < NUM_SAMPLES; n++) {
+    //		// we also need to initialize w
+    //		// w[n] = 0;
+    //      // Calculate the filter output y[n]
+    //      y[n] = 0;
+    //      for (int i = 0; i < FILTER_LENGTH; i++) {
+    //          if (n - i >= 0) { // affine if
+    //              y[n] = y[n] + (w[i] * x[n - i]);
+    //          }
+    //      }
+    //     // Calculate the error e[n]
+    //     e[n] = d[n] - y[n];
+	//     y[n] = y[n] * gain;
+    //     // Update the filter weights w[i]
+    //     for (int i = 0; i < FILTER_LENGTH; i++) {
+    //         if (n - i >= 0) {
+    //             w[i] +=  MU * e[n] * x[n - i];
+    //         }
+    //     }
+    // }
+
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
+
+    LMSFilterResponse2GainOpAdaptor lmsFilterResponse2GainAdaptor(operands);
+    // Value alpha = rewriter.create<arith::ConstantOp>(loc,
+    // rewriter.getF64Type(),
+    //                                                      rewriter.getF64FloatAttr(1));
+    Value zeroval = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    Value mu = rewriter.create<AffineLoadOp>(loc, lmsFilterResponse2GainAdaptor.getMu());
+
+	// Before for loop, load the gain value
+    Value gain = rewriter.create<AffineLoadOp>(loc, lmsFilterResponse2GainAdaptor.getGain());
+
+    // For loop -- iterate from 0 to last
+    int64_t lb = 0;
+    int64_t numSamples = tensorType.getShape()[0];
+    int64_t step = 1;
+
+    Value GetFilterLOp = op->getOperand(3);
+    dsp::ConstantOp constantOp3rdArg =
+        GetFilterLOp.getDefiningOp<dsp::ConstantOp>();
+    DenseElementsAttr constant3rdValue = constantOp3rdArg.getValue();
+    ;
+    auto elements1 = constant3rdValue.getValues<FloatAttr>();
+    float filterlenval = elements1[0].getValueAsDouble();
+    auto FilterLength = (uint64_t)filterlenval;
+
+    auto yMemRefType = MemRefType::get({numSamples}, rewriter.getF64Type());
+    auto wAlloc = rewriter.create<memref::AllocOp>(loc, yMemRefType);
+
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb, numSamples, step);
+    auto iv = forOp1.getInductionVar();
+
+    rewriter.setInsertionPointToStart(forOp1.getBody());
+
+    // For affine expression: #map1 = affine_map<(%arg0)[] : (%arg0 - 1)
+    AffineExpr d0, d1, s0;
+    bindDims(rewriter.getContext(), d0, d1);
+    // AffineExpr ExprForXSlice = rewriter.getAffineDimExpr(0) -
+    // rewriter.getAffineDimExpr(1); //d0 - d1;
+    AffineExpr ExprForXSlice = d0 - d1;
+    AffineMap addMapForLMSFilter = AffineMap::get(2, 0, ExprForXSlice);
+    IntegerSet set1 = IntegerSet::get(2, 0, {ExprForXSlice}, {false});
+
+    // w[n] = 0;
+    // y[n] = 0;
+    // rewriter.create<AffineStoreOp>(loc, zeroval, alloc, ValueRange{iv});
+    // Allocate and initialize array for y
+    // Value constantIndx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+
+    rewriter.create<AffineStoreOp>(loc, zeroval, wAlloc, ValueRange{iv});
+    rewriter.create<AffineStoreOp>(loc, zeroval, alloc, ValueRange{iv});
+
+    affine::AffineForOp forOp2 =
+        rewriter.create<AffineForOp>(loc, lb, FilterLength, step);
+    auto iv2 = forOp2.getInductionVar();
+
+    rewriter.setInsertionPointToStart(forOp2.getBody());
+
+    auto ifOp = rewriter.create<affine::AffineIfOp>(
+        loc, set1, ValueRange{iv, iv2}, false /*no else*/);
+    rewriter.setInsertionPointToStart(ifOp.getThenBlock());
+
+    Value inputX =
+        rewriter.create<AffineLoadOp>(loc, lmsFilterResponse2GainAdaptor.getLhs(),
+                                      addMapForLMSFilter, ValueRange{iv, iv2});
+    Value w = rewriter.create<AffineLoadOp>(loc, wAlloc,
+                                            ValueRange{iv2}); // memRefType
+
+    Value wmulx = rewriter.create<arith::MulFOp>(loc, inputX, w);
+    Value ybefore = rewriter.create<AffineLoadOp>(loc, alloc, ValueRange{iv});
+    Value sumNext = rewriter.create<arith::AddFOp>(loc, wmulx, ybefore);
+    rewriter.create<AffineStoreOp>(loc, sumNext, alloc, ValueRange{iv});
+    rewriter.setInsertionPointAfter(ifOp);
+    rewriter.setInsertionPointAfter(forOp2);
+
+    //  get e[n] = d[n] - y[n]
+
+    Value desiredX = rewriter.create<AffineLoadOp>(
+        loc, lmsFilterResponse2GainAdaptor.getRhs(), ValueRange{iv});
+    Value ynew = rewriter.create<AffineLoadOp>(loc, alloc, ValueRange{iv});
+
+    Value err = rewriter.create<arith::SubFOp>(loc, desiredX, ynew);
+	
+	// y[n] = y[n] * gain for fusion
+    Value ynewGain = rewriter.create<arith::MulFOp>(loc, ynew, gain);
+    rewriter.create<AffineStoreOp>(loc, ynewGain, alloc, ValueRange{iv});
+
+
+    affine::AffineForOp forOp3 =
+        rewriter.create<AffineForOp>(loc, lb, FilterLength, step);
+    auto iv3 = forOp3.getInductionVar();
+
+    rewriter.setInsertionPointToStart(forOp3.getBody());
+
+    auto ifOp2 = rewriter.create<affine::AffineIfOp>(
+        loc, set1, ValueRange{iv, iv3}, false /*no else*/);
+    rewriter.setInsertionPointToStart(ifOp2.getThenBlock());
+
+    Value inputX2 =
+        rewriter.create<AffineLoadOp>(loc, lmsFilterResponse2GainAdaptor.getLhs(),
+                                      addMapForLMSFilter, ValueRange{iv, iv3});
+
+    Value Prevw2 = rewriter.create<AffineLoadOp>(loc, wAlloc, ValueRange{iv3});
+
+    // f(u(n),e(n),μ)=μe(n)u∗(n)
+    Value mul1 = rewriter.create<arith::MulFOp>(loc, err, inputX2);
+    Value mul2 = rewriter.create<arith::MulFOp>(loc, mu, mul1);
+
+    // FInal w[n]
+    Value answer = rewriter.create<arith::AddFOp>(loc, Prevw2, mul2);
+
+    rewriter.create<AffineStoreOp>(loc, answer, wAlloc, ValueRange{iv3});
+    rewriter.setInsertionPointAfter(ifOp2);
+    rewriter.setInsertionPointAfter(forOp3);
+
+    rewriter.setInsertionPointAfter(forOp1);
+    // debug
+    //  forOp1->dump();
+
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+
+// namespace
 
 //===----------------------------------------------------------------------===//
 // ToyToAffineLoweringPass
@@ -6235,9 +12082,9 @@ struct ToyToAffineLoweringPass
   MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(ToyToAffineLoweringPass)
 
   void getDependentDialects(DialectRegistry &registry) const override {
-    registry.insert<affine::AffineDialect, func::FuncDialect,
-                    memref::MemRefDialect, math::MathDialect,
-                    scf::SCFDialect>();
+    registry
+        .insert<affine::AffineDialect, func::FuncDialect, memref::MemRefDialect,
+                math::MathDialect, scf::SCFDialect>();
   }
   void runOnOperation() final;
 };
@@ -6271,23 +12118,42 @@ void ToyToAffineLoweringPass::runOnOperation() {
   // Now that the conversion target has been defined, we just need to provide
   // the set of patterns that will lower the Toy operations.
   RewritePatternSet patterns(&getContext());
-  patterns.add<AddOpLowering, ConstantOpLowering, FuncOpLowering, MulOpLowering, 
-               PrintOpLowering, ReturnOpLowering, TransposeOpLowering ,
-               DelayOpLowering, GainOpLowering, SubOpLowering, FIRFilterResponseOpLowering, 
-               SlidingWindowAvgOpLowering, DownSamplingOpLowering, 
-               UpSamplingOpLowering, LowPassFilter1stOrderOpLowering, 
-               HighPassFilterOpLowering, FFT1DOpLowering, IFFT1DOpLowering,
-               HammingWindowOpLowering, DCTOpLowering, filterOpLowering, DivOpLowering,
-               SumOpLowering, SinOpLowering, CosOpLowering, SquareOpLowering,
-               FFT1DRealOpLowering, FFT1DImgOpLowering, SincOpLowering, GetElemAtIndxOpLowering,
-               SetElemAtIndxOpLowering ,LowPassFIRFilterOpLowering, HighPassFIRFilterOpLowering,
-               GetRangeOfVectorOpLowering, FIRFilterHammingOptimizedOpLowering, HighPassFIRHammingOptimizedOpLowering, 
-               LMSFilterOpLowering ,ThresholdOpLowering, QuantizationOpLowering, LMSFilterResponseOpLowering,
-               RunLenEncodingOpLowering, FIRFilterResSymmOptimizedOpLowering,
-               LengthOpLowering, ReverseInputOpLowering, PaddingOpLowering,
-               FIRFilterYSymmOptimizedOpLowering , FFT1DRealSymmOpLowering,
-               FFT1DImgConjSymmOpLowering >(
-      &getContext());
+  patterns.add<
+      AddOpLowering, ModuloOpLowering, ConstantOpLowering, FuncOpLowering,
+      MulOpLowering, PrintOpLowering, ReturnOpLowering, TransposeOpLowering,
+      DelayOpLowering, GainOpLowering, SubOpLowering,
+      FIRFilterResponseOpLowering, SlidingWindowAvgOpLowering,
+      DownSamplingOpLowering, UpSamplingOpLowering,
+      LowPassFilter1stOrderOpLowering, HighPassFilterOpLowering,
+      FFT1DOpLowering, IFFT1DOpLowering, HammingWindowOpLowering, DCTOpLowering,
+      filterOpLowering, DivOpLowering, BitwiseAndOpLowering, PowOpLowering,
+      zeroCrossCountOpLowering, SumOpLowering, SinOpLowering, CosOpLowering,
+      SquareOpLowering, FFT1DRealOpLowering, FFT1DImgOpLowering, SincOpLowering,
+      GetElemAtIndxOpLowering, SetElemAtIndxOpLowering,
+      LowPassFIRFilterOpLowering, HighPassFIRFilterOpLowering,
+      GetRangeOfVectorOpLowering, FIRFilterHammingOptimizedOpLowering,
+      HighPassFIRHammingOptimizedOpLowering, LMSFilterOpLowering,
+      ThresholdOpLowering, QuantizationOpLowering, LMSFilterResponseOpLowering,
+      RunLenEncodingOpLowering, FIRFilterResSymmOptimizedOpLowering,
+      LengthOpLowering, ReverseInputOpLowering, PaddingOpLowering,
+      FIRFilterYSymmOptimizedOpLowering, FFT1DRealSymmOpLowering,
+      FFT1DImgConjSymmOpLowering, FFTRealOpLowering, FFTImagOpLowering,
+      Conv2DOpLowering, ShiftRightOpLowering, MatmulOpLowering,
+      ThresholdUpOpLowering, QamModulateRealOpLowering,
+      QamModulateImgOpLowering, QamDemodulateOpLowering, FindPeaksOpLowering,
+      BeamFormOpLowering, SpaceModulateOpLowering, SpaceDemodulateOpLowering,
+      SpaceErrCorrectionOpLowering, FindPeaksOpLowering, MaxOpLowering,
+      MeanOpLowering, DiffOpLowering, GetSingleElemAtIdxOpLowering,
+      Diff2MeanOptimizedOpLowering, Median2SlidingOptimizedOpLowering,
+      NormalizeOpLowering, AbsOpLowering, MedianFilterOpLowering,
+      LMS2FindPeaksOptimizedOpLowering, FindPeaks2Diff2MeanOptimizedOpLowering,
+      NormLMSFilterResponseOptimizeOpLowering,
+      FIRFilterResSymmThresholdUpOptimizedOpLowering, FFTCombineOpLowering,
+      GenerateDTMFOpLowering, GenerateVoiceSignatureOpLowering, SqrtOpLowering,
+      FFTFreqOpLowering, FindDominantPeaksOpLowering,
+      RecoverDTMFDigitOpLowering, FFTOpLowering, FFTAbsOpLowering,
+      DFTAbsOpLowering, DFTAbsThresholdUpOpLowering, ArgMaxOpLowering, CorrelateOpLowering,
+	  SetSingleElemAtIdxOpLowering, Correl2MaxOptimizedOpLowering, LMSFilterResponse2GainOpLowering>(&getContext());
 
   // With the target and rewrite patterns defined, we can now attempt the
   // conversion. The conversion will signal failure if any of our `illegal`
diff --git a/mlir/examples/dsp/SimpleBlocks/mlir/MLIRGen.cpp b/mlir/examples/dsp/SimpleBlocks/mlir/MLIRGen.cpp
index 24017a99f81e..98a43ca2cb1b 100644
--- a/mlir/examples/dsp/SimpleBlocks/mlir/MLIRGen.cpp
+++ b/mlir/examples/dsp/SimpleBlocks/mlir/MLIRGen.cpp
@@ -15,25 +15,25 @@
 #include "toy/AST.h"
 #include "toy/Dialect.h"
 
-
-#include "mlir/IR/Block.h"
-#include "mlir/IR/Diagnostics.h"
-#include "mlir/IR/Value.h"
-#include "mlir/Support/LogicalResult.h"
 #include "mlir/IR/Attributes.h"
+#include "mlir/IR/Block.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/Diagnostics.h"
 #include "mlir/IR/MLIRContext.h"
+#include "mlir/IR/Value.h"
 #include "mlir/IR/Verifier.h"
+#include "mlir/Support/LogicalResult.h"
 #include "toy/Lexer.h"
 
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/ScopedHashTable.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/Support/raw_ostream.h"
+#include <bitset>
 #include <cassert>
 #include <cstdint>
 #include <functional>
@@ -218,6 +218,8 @@ class MLIRGenImpl {
       return builder.create<DivOp>(location, lhs, rhs);
     case '-':
       return builder.create<SubOp>(location, lhs, rhs);
+    case '^':
+      return builder.create<PowOp>(location, lhs, rhs);
     }
 
     emitError(location, "invalid binary operator '") << binop.getOp() << "'";
@@ -332,6 +334,16 @@ class MLIRGenImpl {
 
     // Builtin calls have their custom operation, meaning this is a
     // straightforward emission.
+
+    if (callee == "bitwiseand") {
+      if (call.getArgs().size() != 2) {
+        emitError(location, "MLIR codegen encountered an error: dsp.bitwiseand "
+                            "accepts only 2 arguments");
+        return nullptr;
+      }
+      return builder.create<BitwiseAndOp>(location, operands[0], operands[1]);
+    }
+
     if (callee == "transpose") {
       if (call.getArgs().size() != 1) {
         emitError(location, "MLIR codegen encountered an error: dsp.transpose "
@@ -342,99 +354,263 @@ class MLIRGenImpl {
     }
 
     //
-    if(callee == "delay"){
-      if(call.getArgs().size() != 2){
+    if (callee == "delay") {
+      if (call.getArgs().size() != 2) {
         emitError(location, "MLIR codegen encountered an error: dsp.delay "
                             "accepts only 2 arguments");
         return nullptr;
       }
-      return builder.create<DelayOp>(location, operands[0] , operands[1]);
+      return builder.create<DelayOp>(location, operands[0], operands[1]);
     }
 
-    if(callee == "gain"){
-      if(call.getArgs().size() != 2){
+    if (callee == "gain") {
+      if (call.getArgs().size() != 2) {
         emitError(location, "MLIR codegen encountered an error: dsp.gain "
                             "accepts only 2 arguments");
         return nullptr;
       }
-      return builder.create<GainOp>(location, operands[0] , operands[1]);
+      return builder.create<GainOp>(location, operands[0], operands[1]);
     }
 
     // Sub Op
-    if(callee == "sub"){
+    if (callee == "sub") {
+      if (call.getArgs().size() != 2) {
+        emitError(location, "MLIR codegen encountered an error: dsp.sub "
+                            "accepts only 2 arguments");
+        return nullptr;
+      }
+      return builder.create<SubOp>(location, operands[0], operands[1]);
+    }
+    if(callee == "pow"){
        if(call.getArgs().size() != 2){
-         emitError(location, "MLIR codegen encountered an error: dsp.sub "
+         emitError(location, "MLIR codegen encountered an error: dsp.pow "
                              "accepts only 2 arguments");
          return nullptr;
        }
-       return builder.create<SubOp>(location, operands[0], operands[1]);
+       return builder.create<PowOp>(location, operands[0], operands[1]);
     }
 
-    if(callee == "zeroCrossCount"){
-      if(call.getArgs().size() != 1){
-        emitError(location, "MLIR codegen encountered an error: dsp.zeroCrossCount "
-                            "accepts only 1 arguments");
+
+    // Modulo Op
+    if (callee == "modulo") {
+      if (call.getArgs().size() != 2) {
+        emitError(location, "MLIR codegen encountered an error: dsp.modulo "
+                            "accepts only 2 arguments");
         return nullptr;
       }
-      return builder.create<zeroCrossCountOp>(location, operands[0]);
+      return builder.create<ModuloOp>(location, operands[0], operands[1]);
     }
 
-    if(callee == "FIRFilterResponse"){
-      if(call.getArgs().size() != 2){
-        emitError(location, "MLIR codegen encountered an error: dsp.FIRFilterResponse "
-                            "accepts only 2 arguments");
+    if (callee == "fftReal") {
+      if (call.getArgs().size() != 1) {
+        emitError(location,
+                  "MLIR codegen encountered an error: dsp.zeroCrossCount "
+                  "accepts only 1 arguments");
         return nullptr;
       }
-      return builder.create<FIRFilterResponseOp>(location, operands[0] , operands[1]);
+      return builder.create<FFTRealOp>(location, operands[0]);
     }
 
-    if(callee == "slidingWindowAvg"){
-      if(call.getArgs().size() != 1){
-        emitError(location, "MLIR codegen encountered an error: dsp.slidingWindowAvg "
-                            "accepts only 1 arguments");
+    if (callee == "fftImag") {
+      if (call.getArgs().size() != 1) {
+        emitError(location,
+                  "MLIR codegen encountered an error: dsp.zeroCrossCount "
+                  "accepts only 1 arguments");
+        return nullptr;
+      }
+      return builder.create<FFTImagOp>(location, operands[0]);
+    }
+
+    // FindPeaks Op
+    if (callee == "find_peaks") {
+      if (call.getArgs().size() != 3) {
+        emitError(location,
+                  "MLIR codegen encountered an error: dsp.find_peaks "
+                  "accepts only 3 arguments: signal, height, and distance");
+        return nullptr;
+      }
+      return builder.create<FindPeaksOp>(location, operands[0], operands[1],
+                                         operands[2]);
+    }
+
+    // Max Op
+    if (callee == "max") {
+      if (call.getArgs().size() != 1) {
+        emitError(location, "MLIR codegen encountered an error: dsp.max "
+                            "accepts only 1 argument.");
+        return nullptr;
+      }
+      return builder.create<MaxOp>(location, operands[0]);
+    }
+
+    // Mean Op
+    if (callee == "mean") {
+      if (call.getArgs().size() != 2) {
+        emitError(location, "MLIR codegen encountered an error: dsp.mean "
+                            "accepts only 2 arguments: input tensor, length");
+        return nullptr;
+      }
+      return builder.create<MeanOp>(location, operands[0], operands[1]);
+    }
+
+    // Diff Op
+    if (callee == "diff") {
+      if (call.getArgs().size() != 2) {
+        emitError(location, "MLIR codegen encountered an error: dsp.diff "
+                            "accepts only 2 arguments: input tensor, legnth");
+        return nullptr;
+      }
+      return builder.create<DiffOp>(location, operands[0], operands[1]);
+    }
+       
+    // Abs Op
+    if(callee == "abs") {
+      if (call.getArgs().size() != 1) {
+        emitError(location, "MLIR codegen encountered an error: dsp.abs "
+                            "accepts only 1 arguments: input tensor.");
+        return nullptr;
+      }
+      return builder.create<AbsOp>(location, operands[0]);
+    }
+
+    // ArgMax Op
+    if(callee == "argmax") {
+      if (call.getArgs().size() != 2) {
+        emitError(location, "MLIR codegen encountered an error: dsp.argmax "
+                            "accepts only 2 arguments: input tensor, axis.");
         return nullptr;
       }
-      return builder.create<SlidingWindowAvgOp>(location, operands[0] );
+
+      auto axisOp = operands[1].getDefiningOp<mlir::dsp::ConstantOp>();
+      auto axisVal = axisOp.getValue().getValues<mlir::FloatAttr>();
+      double axis = axisVal[0].getValueAsDouble();
+
+      return builder.create<ArgMaxOp>(location, operands[0], axis);
     }
 
-    if(callee == "downsampling"){
-      if(call.getArgs().size() != 2){
-        emitError(location, "MLIR codegen encountered an error: dsp.downsampling "
+    // Normalize Op
+    if (callee == "normalize") {
+      if (call.getArgs().size() != 1) {
+        emitError(location, "MLIR codegen encountered an error: dsp.normalize "
+                            "accepts only 1 arguments: input tensor");
+        return nullptr;
+      }
+      return builder.create<NormalizeOp>(location, operands[0]);
+    }
+   
+    // Normalize LMS filter Op
+    if (callee == "norm_LMSFilterResponse_opt") {
+      if (call.getArgs().size() != 4) {
+        emitError(location, "MLIR codegen encountered an error: dsp.norm_LMSFilterResponse_opt "
+                            "accepts 4 arguments ");
+        return nullptr;
+      }
+      return builder.create<NormLMSFilterResponseOptimizeOp>(location, operands[0], operands[1], operands[2], operands[3]);
+    }
+
+    // Shift right Op
+    if (callee == "shiftRight") {
+      if (call.getArgs().size() != 2) {
+        emitError(location, "MLIR codegen encountered an error: dsp.shiftRight "
                             "accepts only 2 arguments");
         return nullptr;
       }
-      return builder.create<DownsamplingOp>(location, operands[0] , operands[1]);
+      return builder.create<ShiftRightOp>(location, operands[0], operands[1]);
     }
 
-    if(callee == "upsampling"){
-      if(call.getArgs().size() != 2){
-        emitError(location, "MLIR codegen encountered an error: dsp.upsampling "
+    // Matmul Op
+    if (callee == "matmul") {
+      if (call.getArgs().size() != 2) {
+        emitError(location, "MLIR codegen encountered an error: dsp.matmul "
                             "accepts only 2 arguments");
         return nullptr;
       }
-      return builder.create<UpsamplingOp>(location, operands[0] , operands[1]);
+      return builder.create<MatmulOp>(location, operands[0], operands[1]);
     }
 
-    if(callee == "lowPassFilter"){
-      if(call.getArgs().size() != 2){
-        emitError(location, "MLIR codegen encountered an error: dsp.lowPassFilter "
+    if (callee == "zeroCrossCount") {
+      if (call.getArgs().size() != 1) {
+        emitError(location,
+                  "MLIR codegen encountered an error: dsp.zeroCrossCount "
+                  "accepts only 1 arguments");
+        return nullptr;
+      }
+      return builder.create<zeroCrossCountOp>(location, operands[0]);
+    }
+
+    if (callee == "FIRFilterResponse") {
+      if (call.getArgs().size() != 2) {
+        emitError(location,
+                  "MLIR codegen encountered an error: dsp.FIRFilterResponse "
+                  "accepts only 2 arguments");
+        return nullptr;
+      }
+      return builder.create<FIRFilterResponseOp>(location, operands[0],
+                                                 operands[1]);
+    }
+
+    if (callee == "medianFilter") {
+      if (call.getArgs().size() != 1) {
+        emitError(location,
+                  "MLIR codegen encountered an error: dsp.medianFilter "
+                  "accepts only 1 argument");
+        return nullptr;
+      }
+      return builder.create<MedianFilterOp>(location, operands[0]);
+    }
+
+    if (callee == "slidingWindowAvg") {
+      if (call.getArgs().size() != 1) {
+        emitError(location,
+                  "MLIR codegen encountered an error: dsp.slidingWindowAvg "
+                  "accepts only 1 arguments");
+        return nullptr;
+      }
+      return builder.create<SlidingWindowAvgOp>(location, operands[0]);
+    }
+
+    if (callee == "downsampling") {
+      if (call.getArgs().size() != 2) {
+        emitError(location,
+                  "MLIR codegen encountered an error: dsp.downsampling "
+                  "accepts only 2 arguments");
+        return nullptr;
+      }
+      return builder.create<DownsamplingOp>(location, operands[0], operands[1]);
+    }
+
+    if (callee == "upsampling") {
+      if (call.getArgs().size() != 2) {
+        emitError(location, "MLIR codegen encountered an error: dsp.upsampling "
                             "accepts only 2 arguments");
         return nullptr;
       }
-      return builder.create<LowPassFilter1stOrderOp>(location, operands[0] , operands[1]);
+      return builder.create<UpsamplingOp>(location, operands[0], operands[1]);
     }
 
-    if(callee == "highPassFilter"){
-      if(call.getArgs().size() != 1){
-        emitError(location, "MLIR codegen encountered an error: dsp.highPassFilter "
-                            "accepts only 1 arguments");
+    if (callee == "lowPassFilter") {
+      if (call.getArgs().size() != 2) {
+        emitError(location,
+                  "MLIR codegen encountered an error: dsp.lowPassFilter "
+                  "accepts only 2 arguments");
+        return nullptr;
+      }
+      return builder.create<LowPassFilter1stOrderOp>(location, operands[0],
+                                                     operands[1]);
+    }
+
+    if (callee == "highPassFilter") {
+      if (call.getArgs().size() != 1) {
+        emitError(location,
+                  "MLIR codegen encountered an error: dsp.highPassFilter "
+                  "accepts only 1 arguments");
         return nullptr;
       }
-      return builder.create<HighPassFilterOp>(location, operands[0] );
+      return builder.create<HighPassFilterOp>(location, operands[0]);
     }
 
-    if(callee == "fft1d"){
-      if(call.getArgs().size() != 1){
+    if (callee == "fft1d") {
+      if (call.getArgs().size() != 1) {
         emitError(location, "MLIR codegen encountered an error: dsp.fft1d "
                             "accepts only 1 arguments");
         return nullptr;
@@ -442,26 +618,26 @@ class MLIRGenImpl {
       // return builder.create<FFT1DOp>(location, operands[0] );
     }
 
-    if(callee == "fft1dreal"){
-      if(call.getArgs().size() != 1){
+    if (callee == "fft1dreal") {
+      if (call.getArgs().size() != 1) {
         emitError(location, "MLIR codegen encountered an error: dsp.fft1dreal "
                             "accepts only 1 arguments");
         return nullptr;
       }
-      return builder.create<FFT1DRealOp>(location, operands[0] );
+      return builder.create<FFT1DRealOp>(location, operands[0]);
     }
 
-    if(callee == "fft1dimg"){
-      if(call.getArgs().size() != 1){
+    if (callee == "fft1dimg") {
+      if (call.getArgs().size() != 1) {
         emitError(location, "MLIR codegen encountered an error: dsp.fft1dimg "
                             "accepts only 1 arguments");
         return nullptr;
       }
-      return builder.create<FFT1DImgOp>(location, operands[0] );
+      return builder.create<FFT1DImgOp>(location, operands[0]);
     }
 
-    if(callee == "ifft1d"){
-      if(call.getArgs().size() != 2){
+    if (callee == "ifft1d") {
+      if (call.getArgs().size() != 2) {
         emitError(location, "MLIR codegen encountered an error: dsp.ifft1d "
                             "accepts only 1 arguments");
         return nullptr;
@@ -469,264 +645,578 @@ class MLIRGenImpl {
       return builder.create<IFFT1DOp>(location, operands[0], operands[1]);
     }
 
-    if(callee == "hamming"){
-      if(call.getArgs().size() != 1){
+    if (callee == "hamming") {
+      if (call.getArgs().size() != 1) {
         emitError(location, "MLIR codegen encountered an error: dsp.hamming "
                             "accepts only 1 arguments");
         return nullptr;
       }
-      return builder.create<HammingWindowOp>(location, operands[0] );
+      return builder.create<HammingWindowOp>(location, operands[0]);
     }
 
-    if(callee == "dct"){
-      if(call.getArgs().size() != 1){
+    if (callee == "dct") {
+      if (call.getArgs().size() != 1) {
         emitError(location, "MLIR codegen encountered an error: dsp.dct "
                             "accepts only 1 arguments");
         return nullptr;
       }
-      return builder.create<DCTOp>(location, operands[0] );
+      return builder.create<DCTOp>(location, operands[0]);
     }
 
-    if(callee == "filter"){
-      if(call.getArgs().size() != 3){
+    if (callee == "filter") {
+      if (call.getArgs().size() != 3) {
         emitError(location, "MLIR codegen encountered an error: dsp.filter "
                             "accepts only 1 arguments");
         return nullptr;
       }
-      return builder.create<filterOp>(location, operands[0],operands[1], operands[2] );
+      return builder.create<filterOp>(location, operands[0], operands[1],
+                                      operands[2]);
     }
 
-    if(callee == "div"){
-      if(call.getArgs().size() != 2){
+    if (callee == "div") {
+      if (call.getArgs().size() != 2) {
         emitError(location, "MLIR codegen encountered an error: dsp.div "
                             "accepts only 2 arguments");
         return nullptr;
       }
-      return builder.create<DivOp>(location, operands[0] , operands[1]);
+      return builder.create<DivOp>(location, operands[0], operands[1]);
     }
 
-    if(callee == "sum"){
-      if(call.getArgs().size() != 1){
+    if (callee == "sum") {
+      if (call.getArgs().size() != 1) {
         emitError(location, "MLIR codegen encountered an error: dsp.sum "
                             "accepts only 1 arguments");
         return nullptr;
       }
-      return builder.create<SumOp>(location, operands[0] );
+      return builder.create<SumOp>(location, operands[0]);
     }
 
-    if(callee == "sin"){
-       if(call.getArgs().size() != 1){
-         emitError(location, "MLIR codegen encountered an error: dsp.sin "
-                             "accepts only 1 arguments");
-         return nullptr;
-       }
-       return builder.create<SinOp>(location, operands[0] );
-     }
+    if (callee == "sin") {
+      if (call.getArgs().size() != 1) {
+        emitError(location, "MLIR codegen encountered an error: dsp.sin "
+                            "accepts only 1 arguments");
+        return nullptr;
+      }
+      return builder.create<SinOp>(location, operands[0]);
+    }
 
-     if(callee == "cos"){
-       if(call.getArgs().size() != 1){
-         emitError(location, "MLIR codegen encountered an error: dsp.cos "
-                             "accepts only 1 arguments");
-         return nullptr;
-       }
-       return builder.create<CosOp>(location, operands[0] );
-     }
+    if (callee == "cos") {
+      if (call.getArgs().size() != 1) {
+        emitError(location, "MLIR codegen encountered an error: dsp.cos "
+                            "accepts only 1 arguments");
+        return nullptr;
+      }
+      return builder.create<CosOp>(location, operands[0]);
+    }
 
-     if(callee == "square"){
-      if(call.getArgs().size() != 1){
+    if (callee == "square") {
+      if (call.getArgs().size() != 1) {
         emitError(location, "MLIR codegen encountered an error: dsp.square "
                             "accepts only 1 arguments");
         return nullptr;
       }
-      return builder.create<SquareOp>(location, operands[0] );
+      return builder.create<SquareOp>(location, operands[0]);
     }
 
     // Sinc Op
-    if(callee == "sinc"){
-       if(call.getArgs().size() != 2){
-         emitError(location, "MLIR codegen encountered an error: dsp.sinc "
-                             "accepts only 2 arguments");
-         return nullptr;
-       }
-       return builder.create<SincOp>(location, operands[0], operands[1]);
+    if (callee == "sinc") {
+      if (call.getArgs().size() != 2) {
+        emitError(location, "MLIR codegen encountered an error: dsp.sinc "
+                            "accepts only 2 arguments");
+        return nullptr;
+      }
+      return builder.create<SincOp>(location, operands[0], operands[1]);
     }
 
     // Get Elem At Op
-    if(callee == "getElemAtIndx"){
-       if(call.getArgs().size() != 2){
-         emitError(location, "MLIR codegen encountered an error: dsp.getElemAtIndx "
-                             "accepts only 2 arguments");
-         return nullptr;
-       }
-       return builder.create<GetElemAtIndxOp>(location, operands[0], operands[1]);
+    if (callee == "getElemAtIndx") {
+      if (call.getArgs().size() != 2) {
+        emitError(location,
+                  "MLIR codegen encountered an error: dsp.getElemAtIndx "
+                  "accepts only 2 arguments");
+        return nullptr;
+      }
+      return builder.create<GetElemAtIndxOp>(location, operands[0],
+                                             operands[1]);
+    }
+
+    // Get Single Element At Op
+    if (callee == "getSingleElemAtIndx") {
+      if (call.getArgs().size() != 2) {
+        emitError(location,
+                  "MLIR codegen encountered an error: dsp.getSingleElemAtIndx "
+                  "accepts only 2 arguments");
+        return nullptr;
+      }
+      return builder.create<GetSingleElemAtIdxOp>(location, operands[0],
+                                                  operands[1]);
+    }
+
+    // Diff2MeanOptimized Op
+    if (callee == "diff2meanOpt") {
+      if (call.getArgs().size() != 2) {
+        emitError(location,
+                  "MLIR codegen encountered an error: dsp.diff2meanOpt "
+                  "accepts only 2 arguments");
+        return nullptr;
+      }
+      return builder.create<Diff2MeanOptimizedOp>(location, operands[0],
+                                                  operands[1]);
+    }
+	
+    // FindPeaksDiff2MeanOptimized Op
+    if (callee == "findpeaks2diff2meanOpt") {
+      if (call.getArgs().size() != 1) {
+        emitError(location,
+                  "MLIR codegen encountered an error: dsp.findpeaks2diff2meanOpt "
+                  "accepts only 3 arguments.");
+        return nullptr;
+      }
+      return builder.create<FindPeaks2Diff2MeanOptimizedOp>(location, operands[0], operands[1], operands[2]);
+    }
+
+    // LMS2FindPeaksOptimizedOp Op
+    if (callee == "lms2findPeaks") {
+      if (call.getArgs().size() != 6) {
+        emitError(location,
+                  "MLIR codegen encountered an error: dsp.lmsFilterResponse2findPeaks "
+                  "accepts only 6 arguments");
+        return nullptr;
+      }
+      return builder.create<LMS2FindPeaksOptimizedOp>(location, operands[0],
+                                                  operands[1], operands[2], operands[3], operands[4], operands[5]);
     }
 
+    // Median2SlidingOptimized Op
+    if (callee == "median2slidingOp") {
+      if (call.getArgs().size() != 1) {
+        emitError(location,
+                  "MLIR codegen encountered an error: dsp.median2slidingOp"
+                  "accepts only 1 argument.");
+        return nullptr;
+      }
+      return builder.create<Median2SlidingOptimizedOp>(location, operands[0]);
+    }
+
+
     // Set Elem At Indx
-    if(callee == "setElemAtIndx"){
-       if(call.getArgs().size() != 3){
-         emitError(location, "MLIR codegen encountered an error: dsp.setElemAtIndx "
-                             "accepts only 2 arguments");
-         return nullptr;
-       }
-       return builder.create<SetElemAtIndxOp>(location, operands[0], operands[1], operands[2]);
+    if (callee == "setElemAtIndx") {
+      if (call.getArgs().size() != 3) {
+        emitError(location,
+                  "MLIR codegen encountered an error: dsp.setElemAtIndx "
+                  "accepts only 2 arguments");
+        return nullptr;
+      }
+      return builder.create<SetElemAtIndxOp>(location, operands[0], operands[1],
+                                             operands[2]);
     }
 
     // lowPassFilter Op
-    if(callee == "lowPassFIRFilter"){
-       if(call.getArgs().size() != 2){
-         emitError(location, "MLIR codegen encountered an error: dsp.lowPassFilter "
-                             "accepts only 2 arguments");
-         return nullptr;
-       }
-       return builder.create<LowPassFIRFilterOp>(location, operands[0], operands[1]);
+    if (callee == "lowPassFIRFilter") {
+      if (call.getArgs().size() != 2) {
+        emitError(location,
+                  "MLIR codegen encountered an error: dsp.lowPassFilter "
+                  "accepts only 2 arguments");
+        return nullptr;
+      }
+      return builder.create<LowPassFIRFilterOp>(location, operands[0],
+                                                operands[1]);
     }
 
     // highPassFilter Op
-    if(callee == "highPassFIRFilter"){
-       if(call.getArgs().size() != 2){
-         emitError(location, "MLIR codegen encountered an error: dsp.highPassFilter "
-                             "accepts only 2 arguments");
-         return nullptr;
-       }
-       return builder.create<HighPassFIRFilterOp>(location, operands[0], operands[1]);
+    if (callee == "highPassFIRFilter") {
+      if (call.getArgs().size() != 2) {
+        emitError(location,
+                  "MLIR codegen encountered an error: dsp.highPassFilter "
+                  "accepts only 2 arguments");
+        return nullptr;
+      }
+      return builder.create<HighPassFIRFilterOp>(location, operands[0],
+                                                 operands[1]);
     }
 
-    if(callee == "getRangeOfVector"){
-      if(call.getArgs().size() != 3){
-        emitError(location, "MLIR codegen encountered an error: dsp.getRangeOfVector "
-                            "accepts only 3 arguments");
+    if (callee == "getRangeOfVector") {
+      if (call.getArgs().size() != 3) {
+        emitError(location,
+                  "MLIR codegen encountered an error: dsp.getRangeOfVector "
+                  "accepts only 3 arguments");
         return nullptr;
       }
-      return builder.create<GetRangeOfVectorOp>(location, operands[0],operands[1], operands[2] );
+      return builder.create<GetRangeOfVectorOp>(location, operands[0],
+                                                operands[1], operands[2]);
     }
 
     // FIRHammingOptimizedOp
-    if(callee == "FIRFilterHammingOptimized"){
-       if(call.getArgs().size() != 2){
-         emitError(location, "MLIR codegen encountered an error: dsp.FIRFilterHammingOptimized "
-                             "accepts only 2 arguments");
-         return nullptr;
-       }
-       return builder.create<FIRFilterHammingOptimizedOp>(location, operands[0], operands[1]);
+    if (callee == "FIRFilterHammingOptimized") {
+      if (call.getArgs().size() != 2) {
+        emitError(
+            location,
+            "MLIR codegen encountered an error: dsp.FIRFilterHammingOptimized "
+            "accepts only 2 arguments");
+        return nullptr;
+      }
+      return builder.create<FIRFilterHammingOptimizedOp>(location, operands[0],
+                                                         operands[1]);
     }
 
     // HighPassFIRHammingOptimizedOp
-    if(callee == "highPassFIRHammingOptimized"){
-       if(call.getArgs().size() != 2){
-         emitError(location, "MLIR codegen encountered an error: dsp.HighPassFIRHammingOptimizedOp "
-                             "accepts only 2 arguments");
-         return nullptr;
-       }
-       return builder.create<HighPassFIRHammingOptimizedOp>(location, operands[0], operands[1]);
+    if (callee == "highPassFIRHammingOptimized") {
+      if (call.getArgs().size() != 2) {
+        emitError(location, "MLIR codegen encountered an error: "
+                            "dsp.HighPassFIRHammingOptimizedOp "
+                            "accepts only 2 arguments");
+        return nullptr;
+      }
+      return builder.create<HighPassFIRHammingOptimizedOp>(
+          location, operands[0], operands[1]);
     }
 
     // LMS FILTER
-    if(callee == "lmsFilter"){
-      if(call.getArgs().size() != 5){
+    if (callee == "lmsFilter") {
+      if (call.getArgs().size() != 5) {
         emitError(location, "MLIR codegen encountered an error: dsp.lmsFilter"
                             "accepts only 5 arguments");
         return nullptr;
       }
-      return builder.create<LMSFilterOp>(location, operands[0] , operands[1], operands[2], operands[3],operands[4] );
+      return builder.create<LMSFilterOp>(location, operands[0], operands[1],
+                                         operands[2], operands[3], operands[4]);
     }
 
-    if(callee == "threshold"){
-       if(call.getArgs().size() != 2){
-         emitError(location, "MLIR codegen encountered an error: dsp.ThresholdOp "
-                             "accepts only 2 arguments");
-         return nullptr;
-       }
-       return builder.create<ThresholdOp>(location, operands[0], operands[1]);
+    if (callee == "threshold") {
+      if (call.getArgs().size() != 2) {
+        emitError(location,
+                  "MLIR codegen encountered an error: dsp.ThresholdOp "
+                  "accepts only 2 arguments");
+        return nullptr;
+      }
+      return builder.create<ThresholdOp>(location, operands[0], operands[1]);
     }
 
-    if(callee == "quantization"){
-       if(call.getArgs().size() != 4){
-         emitError(location, "MLIR codegen encountered an error: dsp.quantization "
-                             "accepts only 4 arguments");
-         return nullptr;
-       }
-       return builder.create<QuantizationOp>(location, operands[0], operands[1],operands[2], operands[3]);
+    if (callee == "quantization") {
+      if (call.getArgs().size() != 4) {
+        emitError(location,
+                  "MLIR codegen encountered an error: dsp.quantization "
+                  "accepts only 4 arguments");
+        return nullptr;
+      }
+      return builder.create<QuantizationOp>(location, operands[0], operands[1],
+                                            operands[2], operands[3]);
     }
 
-    if(callee == "lmsFilterResponse"){
-      if(call.getArgs().size() != 4){
+    if (callee == "lmsFilterResponse") {
+      if (call.getArgs().size() != 4) {
         emitError(location, "MLIR codegen encountered an error: dsp.lmsFilter"
                             "accepts only 4 arguments");
         return nullptr;
       }
-      return builder.create<LMSFilterResponseOp>(location, operands[0] , operands[1], operands[2], operands[3]);
+      return builder.create<LMSFilterResponseOp>(
+          location, operands[0], operands[1], operands[2], operands[3]);
     }
 
-    if(callee == "runLenEncoding"){
-       if(call.getArgs().size() != 1){
-         emitError(location, "MLIR codegen encountered an error: dsp.runLenEncoding "
-                             "accepts only 1 arguments");
-         return nullptr;
-       }
-       return builder.create<RunLenEncodingOp>(location, operands[0]);
+    if (callee == "runLenEncoding") {
+      if (call.getArgs().size() != 1) {
+        emitError(location,
+                  "MLIR codegen encountered an error: dsp.runLenEncoding "
+                  "accepts only 1 arguments");
+        return nullptr;
+      }
+      return builder.create<RunLenEncodingOp>(location, operands[0]);
     }
 
-    if(callee == "FIRFilterResSymmOptimized"){
-      if(call.getArgs().size() != 2){
-        emitError(location, "MLIR codegen encountered an error: dsp.FIRFilterResSymmOptimized "
-                            "accepts only 2 arguments");
+    if (callee == "FIRFilterResSymmOptimized") {
+      if (call.getArgs().size() != 2) {
+        emitError(
+            location,
+            "MLIR codegen encountered an error: dsp.FIRFilterResSymmOptimized "
+            "accepts only 2 arguments");
         return nullptr;
       }
-      return builder.create<FIRFilterResSymmOptimizedOp>(location, operands[0] , operands[1]);
+      return builder.create<FIRFilterResSymmOptimizedOp>(location, operands[0],
+                                                         operands[1]);
     }
 
-    if(callee == "len"){
-      if(call.getArgs().size() != 1){
+    if (callee == "len") {
+      if (call.getArgs().size() != 1) {
         emitError(location, "MLIR codegen encountered an error: dsp.len "
                             "accepts only 1 arguments");
         return nullptr;
       }
-      return builder.create<LengthOp>(location, operands[0] );
+      return builder.create<LengthOp>(location, operands[0]);
     }
 
+    if (callee == "reverseInput") {
+      if (call.getArgs().size() != 1) {
+        emitError(location,
+                  "MLIR codegen encountered an error: dsp.reverseInput "
+                  "accepts only 1 arguments");
+        return nullptr;
+      }
+      return builder.create<ReverseInputOp>(location, operands[0]);
+    }
 
-    if(callee == "reverseInput"){
-      if(call.getArgs().size() != 1){
-        emitError(location, "MLIR codegen encountered an error: dsp.reverseInput "
-                            "accepts only 1 arguments");
+    if (callee == "padding") {
+      if (call.getArgs().size() != 3) {
+        emitError(location, "MLIR codegen encountered an error: dsp.padding "
+                            "accepts only 3 arguments");
         return nullptr;
       }
-      return builder.create<ReverseInputOp>(location, operands[0] );
+      return builder.create<PaddingOp>(location, operands[0], operands[1],
+                                       operands[2]);
     }
 
-   if(callee == "padding"){
-       if(call.getArgs().size() != 3){
-         emitError(location, "MLIR codegen encountered an error: dsp.padding "
-                             "accepts only 3 arguments");
-         return nullptr;
-       }
-       return builder.create<PaddingOp>(location, operands[0], operands[1], operands[2]);
+    if (callee == "FIRFilterYSymmOptimized") {
+      if (call.getArgs().size() != 2) {
+        emitError(
+            location,
+            "MLIR codegen encountered an error: dsp.FIRFilterYSymmOptimizedOp "
+            "accepts only 2 arguments");
+        return nullptr;
+      }
+      return builder.create<FIRFilterYSymmOptimizedOp>(location, operands[0],
+                                                       operands[1]);
+    }
+    if (callee == "fft1DRealSymm") {
+      if (call.getArgs().size() != 1) {
+        emitError(location,
+                  "MLIR codegen encountered an error: dsp.FFT1DRealSymmOp "
+                  "accepts only 1 arguments");
+        return nullptr;
+      }
+      return builder.create<FFT1DRealSymmOp>(location, operands[0]);
+    } // FFT1DImgConjSymmOpLowering
+    if (callee == "fft1DimgConjSymm") {
+      if (call.getArgs().size() != 1) {
+        emitError(location,
+                  "MLIR codegen encountered an error: dsp.FFT1DImgConjSymmOp "
+                  "accepts only 1 arguments");
+        return nullptr;
+      }
+      return builder.create<FFT1DImgConjSymmOp>(location, operands[0]);
     }
 
-    if(callee == "FIRFilterYSymmOptimized"){
-      if(call.getArgs().size() != 2){
-        emitError(location, "MLIR codegen encountered an error: dsp.FIRFilterYSymmOptimizedOp "
-                            "accepts only 2 arguments");
+    if (callee == "conv2d") {
+      if (call.getArgs().size() != 3) {
+        emitError(location, "MLIR codegen encountered an error: dsp.Conv2DOp "
+                            "accepts 3 arguments");
         return nullptr;
       }
-      return builder.create<FIRFilterYSymmOptimizedOp>(location, operands[0] , operands[1]);
+      return builder.create<Conv2DOp>(location, operands[0], operands[1],
+                                      operands[2]);
     }
-   if(callee == "fft1DRealSymm"){
-      if(call.getArgs().size() != 1){
-        emitError(location, "MLIR codegen encountered an error: dsp.FFT1DRealSymmOp "
-                            "accepts only 1 arguments");
+
+    if (callee == "thresholdUp") {
+      if (call.getArgs().size() != 3) {
+        emitError(location,
+                  "MLIR codegen encountered an error: dsp.thresholdUp "
+                  "accepts 3 arguments");
         return nullptr;
       }
-      return builder.create<FFT1DRealSymmOp>(location, operands[0] );
-    } //FFT1DImgConjSymmOpLowering
-   if(callee == "fft1DimgConjSymm"){
-      if(call.getArgs().size() != 1){
-        emitError(location, "MLIR codegen encountered an error: dsp.FFT1DImgConjSymmOp "
-                            "accepts only 1 arguments");
+      return builder.create<ThresholdUpOp>(location, operands[0], operands[1],
+                                           operands[2]);
+    }
+
+    if (callee == "generateDtmf") {
+      if (call.getArgs().size() != 3) {
+        emitError(location,
+                  "MLIR codegen encountered an error: dsp.GenerateDTMFOp "
+                  "accepts 3 arguments");
+        return nullptr;
+      }
+      return builder.create<GenerateDTMFOp>(location, operands[0], operands[1],
+                                            operands[2]);
+    }
+
+    if (callee == "fftfreq") {
+      if (call.getArgs().size() != 2) {
+        emitError(location,
+                  "MLIR codegen encountered an error: dsp.GenerateDTMFOp "
+                  "accepts 2 arguments");
+        return nullptr;
+      }
+      return builder.create<FFTFreqOp>(location, operands[0], operands[1]);
+    }
+
+    if (callee == "findDominantPeaks") {
+      if (call.getArgs().size() != 2) {
+        emitError(location,
+                  "MLIR codegen encountered an error: dsp.FindDominantPeaksOp "
+                  "accepts 2 arguments");
+        return nullptr;
+      }
+      return builder.create<FindDominantPeaksOp>(location, operands[0], operands[1]);
+    }
+
+    if (callee == "recoverDtmfDigit") {
+      if (call.getArgs().size() != 2) {
+        emitError(location,
+                  "MLIR codegen encountered an error: dsp.RecoverDTMFDigitOp "
+                  "accepts 2 arguments");
+        return nullptr;
+      }
+      return builder.create<RecoverDTMFDigitOp>(location, operands[0], operands[1]);
+    }
+
+    if (callee == "fftCombine") {
+      if (call.getArgs().size() != 2) {
+        emitError(location,
+                  "MLIR codegen encountered an error: dsp.FFTCombinOp "
+                  "accepts 2 arguments");
+        return nullptr;
+      }
+      return builder.create<FFTCombineOp>(location, operands[0], operands[1]);
+    }
+
+    if (callee == "sqrt") {
+      if (call.getArgs().size() != 1) {
+        emitError(location,
+                  "MLIR codegen encountered an error: dsp.GenerateDTMFOp "
+                  "accepts only 1 argument.");
+        return nullptr;
+      }
+      return builder.create<SqrtOp>(location, operands[0]);
+    }
+
+    if (callee == "generateVoiceSignature") {
+      if (call.getArgs().size() != 4) {
+        emitError(location,
+                  "MLIR codegen encountered an error: dsp.GenerateVoiceSignatureOp "
+                  "accepts 4 arguments");
         return nullptr;
       }
-      return builder.create<FFT1DImgConjSymmOp>(location, operands[0] );
+      return builder.create<GenerateVoiceSignatureOp>(location, operands[0], operands[1],
+                                            operands[2], operands[3]);
     }
+
+    // beam form
+    if (callee == "beam_form") {
+      if (call.getArgs().size() != 4) {
+        emitError(location,
+                  "MLIR codegen encountered an error: dsp.GenerateDTMFOp "
+                  "accepts 4 argument");
+        return nullptr;
+      }
+      auto antennaConst = operands[0].getDefiningOp<mlir::dsp::ConstantOp>();
+      auto freqConst = operands[1].getDefiningOp<mlir::dsp::ConstantOp>();
+      auto antennaVal = antennaConst.getValue().getValues<mlir::FloatAttr>();
+      auto freqVal = freqConst.getValue().getValues<mlir::FloatAttr>();
+
+      double antenna = antennaVal[0].getValueAsDouble();
+      double freq = freqVal[0].getValueAsDouble();
+
+      return builder.create<BeamFormOp>(location, antenna, freq, operands[2],
+                                        operands[3]);
+    }
+    // qam modulate op
+    if (callee == "qam_modulate_real") {
+      if (call.getArgs().size() != 1) {
+        emitError(location,
+                  "MLIR codegen encountered an error: dsp.QamModulateRealOp "
+                  "accepts 1 arguments");
+        return nullptr;
+      }
+
+      return builder.create<QamModulateRealOp>(location, operands[0]);
+    }
+
+    if (callee == "qam_modulate_imagine") {
+      if (call.getArgs().size() != 1) {
+        emitError(location,
+                  "MLIR codegen encountered an error: dsp.QamModualteImgOp "
+                  "accepts 1 arguments");
+        return nullptr;
+      }
+
+      return builder.create<QamModulateImgOp>(location, operands[0]);
+    }
+    // qam_demodulate
+    if (callee == "qam_demodulate") {
+      if (call.getArgs().size() != 2) {
+        emitError(location,
+                  "MLIR codegen encountered an error: dsp.QamDemodulateOp"
+                  "accepts 2 arguments");
+        return nullptr;
+      }
+      return builder.create<QamDemodulateOp>(location, operands[0],
+                                             operands[1]);
+    }
+    // space_demodulate
+    if (callee == "space_demodulate") {
+      if (call.getArgs().size() != 1) {
+        emitError(location,
+                  "MLIR codegen encountered an error: dsp.SpaceDemodulateOp"
+                  "accepts 1 arguments");
+        return nullptr;
+      }
+      return builder.create<SpaceDemodulateOp>(location, operands[0]);
+    }
+    // space_modulate
+    if (callee == "space_modulate") {
+      if (call.getArgs().size() != 1) {
+        emitError(location,
+                  "MLIR codegen encountered an error: dsp.SpaceModulateOp"
+                  "accepts 1 arguments");
+        return nullptr;
+      }
+      return builder.create<SpaceModulateOp>(location, operands[0]);
+    }
+    // space_err_correction
+    if (callee == "space_err_correction") {
+      if (call.getArgs().size() != 1) {
+        emitError(location,
+                  "MLIR codegen encountered an error: dsp.SpaceErrCorrectionOp"
+                  "accepts 1 arguments");
+        return nullptr;
+      }
+      return builder.create<SpaceErrCorrectionOp>(location, operands[0]);
+    }
+	
+	// Correlate Op
+    if (callee == "correlate") {
+      if (call.getArgs().size() != 2) {
+        emitError(location, "MLIR codegen encountered an error: dsp.correlate "
+                            "accepts only 2 arguments: lhs rhs");
+        return nullptr;
+      }
+      return builder.create<CorrelateOp>(location, operands[0], operands[1]);
+    }
+	
+    // Set Single Element At Op
+    if (callee == "setSingleElemAtIndx") {
+      if (call.getArgs().size() != 3) {
+        emitError(location,
+                  "MLIR codegen encountered an error: dsp.setSingleElemAtIndx "
+                  "accepts only 3 arguments");
+        return nullptr;
+      }
+      return builder.create<SetSingleElemAtIdxOp>(location, operands[0],
+                                                  operands[1], operands[2]);
+    }
+
+    // Correl2MaxOptimizedOp Op
+    if (callee == "correl2max") {
+      if (call.getArgs().size() != 1) {
+        emitError(location, "MLIR codegen encountered an error: dsp.correl2max "
+                            "accepts only 2 argument.");
+        return nullptr;
+      }
+      return builder.create<Correl2MaxOptimizedOp>(location, operands[0], operands[1]);
+    }
+
+
+    //LMSFilterResponse2GainOp
+    if (callee == "lmsFilterResponse2gain") {
+      if (call.getArgs().size() != 5) {
+        emitError(location, "MLIR codegen encountered an error: dsp.lmsFilterResponse2gain "
+                            "accepts only 5 argument.");
+        return nullptr;
+      }
+      return builder.create<LMSFilterResponse2GainOp>(location, operands[0], operands[1], operands[2], operands[3], operands[4]);
+    }
+	
+
+
+
+	
     // Builtin calls have their custom operation, meaning this is a
     // straightforward emission.
     // if(callee == "delay"){
@@ -760,6 +1250,31 @@ class MLIRGenImpl {
     return builder.create<ConstantOp>(loc(num.loc()), num.getValue());
   }
 
+  /// Emit a string exression
+  mlir::Value mlirGen(StringExprAST &expr) {
+    auto string_val = expr.getStringVal();
+
+    std::vector<double> signals;
+    for (char ch : string_val) {
+      std::bitset<8> bits(static_cast<unsigned char>(ch)), reversed;
+      int n = 8;
+      for (int i = 0; i < n; ++i)
+        reversed[i] = bits[n - i - 1];
+      for (int i = 0; i < n; ++i)
+        signals.push_back(reversed[i]);
+    }
+
+    mlir::Type eleType = builder.getF64Type();
+    auto dataType = mlir::RankedTensorType::get(signals.size(), eleType);
+
+    auto dataAttr =
+        mlir::DenseElementsAttr::get(dataType, llvm::ArrayRef(signals));
+
+    auto type = getType(signals.size());
+
+    return builder.create<ConstantOp>(loc(expr.loc()), type, dataAttr);
+  }
+
   /// Dispatch codegen for the right expression subclass using RTTI.
   mlir::Value mlirGen(ExprAST &expr) {
     switch (expr.getKind()) {
@@ -773,6 +1288,8 @@ class MLIRGenImpl {
       return mlirGen(cast<CallExprAST>(expr));
     case dsp::ExprAST::Expr_Num:
       return mlirGen(cast<NumberExprAST>(expr));
+    case dsp::ExprAST::Expr_String:
+      return mlirGen(cast<StringExprAST>(expr));
     default:
       emitError(loc(expr.loc()))
           << "MLIR codegen encountered an unhandled expr kind '"
@@ -793,7 +1310,9 @@ class MLIRGenImpl {
       return nullptr;
     }
 
-    mlir::Value value = mlirGen(*init);
+    mlir::Value value;
+    // Register the value in the symbol table.
+    value = mlirGen(*init);
     if (!value)
       return nullptr;
 
@@ -804,8 +1323,6 @@ class MLIRGenImpl {
       value = builder.create<ReshapeOp>(loc(vardecl.loc()),
                                         getType(vardecl.getType()), value);
     }
-
-    // Register the value in the symbol table.
     if (failed(declare(vardecl.getName(), value)))
       return nullptr;
     return value;
diff --git a/mlir/examples/dsp/SimpleBlocks/mlir/ToyCombine.cpp b/mlir/examples/dsp/SimpleBlocks/mlir/ToyCombine.cpp
index e2c461afa434..ea0ab6abbc9b 100644
--- a/mlir/examples/dsp/SimpleBlocks/mlir/ToyCombine.cpp
+++ b/mlir/examples/dsp/SimpleBlocks/mlir/ToyCombine.cpp
@@ -11,15 +11,20 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+
 #include "mlir/IR/MLIRContext.h"
 #include "mlir/IR/PatternMatch.h"
 #include "mlir/IR/Value.h"
 #include "mlir/Support/LogicalResult.h"
-#include "toy/Dialect.h"
 #include "toy/DebugConfig.h"
+#include "toy/Dialect.h"
 #include <numeric>
 using namespace mlir;
 using namespace dsp;
+using namespace std;
 
 namespace {
 /// Include the patterns defined in the Declarative Rewrite framework.
@@ -58,16 +63,15 @@ struct SimplifyRedundantTranspose : public mlir::OpRewritePattern<TransposeOp> {
   }
 };
 
-
-
-//Pseudo-Code
-//Find back to back gain operation
-    // result1 = upsampling(input1, rate1)
-    // result2 = downsampling(result1, rate2)
+// Pseudo-Code
+// Find back to back gain operation
+//  result1 = upsampling(input1, rate1)
+//  result2 = downsampling(result1, rate2)
 // if rate1 == rate2 then result2 = input1
-  // result2 will be now delay(input1, gain1 + gain2)
-  // replaceOp 
-struct SimplifyUpsamplingDownsampling : public mlir::OpRewritePattern<DownsamplingOp> {
+// result2 will be now delay(input1, gain1 + gain2)
+// replaceOp
+struct SimplifyUpsamplingDownsampling
+    : public mlir::OpRewritePattern<DownsamplingOp> {
   /// We register this pattern to match every dsp.downsampling in the IR.
   /// The "benefit" is used by the framework to order the patterns and process
   /// them in order of profitability.
@@ -83,152 +87,300 @@ struct SimplifyUpsamplingDownsampling : public mlir::OpRewritePattern<Downsampli
     // Look through the input of the current downsampling.
     mlir::Value downsamplingOperand1_Rate = op.getOperand(1);
     mlir::Value downsamplingOperand0_input = op.getOperand(0);
-    dsp::UpsamplingOp prev_UpSamplingOp = downsamplingOperand0_input.getDefiningOp<UpsamplingOp>();
+    dsp::UpsamplingOp prev_UpSamplingOp =
+        downsamplingOperand0_input.getDefiningOp<UpsamplingOp>();
 
     // Input defined by another downsampling? If not, no match.
     if (!prev_UpSamplingOp)
       return failure();
 
-    //Get operands for UpSamplingOp
+    // Get operands for UpSamplingOp
     mlir::Value UpsamplingOperand1_Rate = prev_UpSamplingOp.getOperand(1);
     mlir::Value UpsamplingOperand0_input = prev_UpSamplingOp.getOperand(0);
 
-    //get constant value from the downsamplingOp -- operand1
-    dsp::ConstantOp constant_Op1_downsamplingOp = downsamplingOperand1_Rate.getDefiningOp<dsp::ConstantOp>();
-  	// DEBUG_PRINT_NO_ARGS();
-    DenseElementsAttr DenseValueFrmDownsampling = constant_Op1_downsamplingOp.getValue();
-  	// DEBUG_PRINT_NO_ARGS();
+    // get constant value from the downsamplingOp -- operand1
+    dsp::ConstantOp constant_Op1_downsamplingOp =
+        downsamplingOperand1_Rate.getDefiningOp<dsp::ConstantOp>();
+    // DEBUG_PRINT_NO_ARGS();
+    DenseElementsAttr DenseValueFrmDownsampling =
+        constant_Op1_downsamplingOp.getValue();
+    // DEBUG_PRINT_NO_ARGS();
     auto elements = DenseValueFrmDownsampling.getValues<FloatAttr>();
     float FirstValue = elements[0].getValueAsDouble();
-    int64_t DownsamplingRate = (int64_t) FirstValue;
+    int64_t DownsamplingRate = (int64_t)FirstValue;
 
-    //Get constant value from upsampling: -- operand1
-    dsp::ConstantOp constant_Op1_upSamplingOp = UpsamplingOperand1_Rate.getDefiningOp<dsp::ConstantOp>();
-  	// DEBUG_PRINT_NO_ARGS();
-    DenseElementsAttr DenseValueFrmUpsampling = constant_Op1_upSamplingOp.getValue();
-  	// DEBUG_PRINT_NO_ARGS();
+    // Get constant value from upsampling: -- operand1
+    dsp::ConstantOp constant_Op1_upSamplingOp =
+        UpsamplingOperand1_Rate.getDefiningOp<dsp::ConstantOp>();
+    // DEBUG_PRINT_NO_ARGS();
+    DenseElementsAttr DenseValueFrmUpsampling =
+        constant_Op1_upSamplingOp.getValue();
+    // DEBUG_PRINT_NO_ARGS();
     elements = DenseValueFrmUpsampling.getValues<FloatAttr>();
     FirstValue = elements[0].getValueAsDouble();
-    int64_t UpsamplingRate = (int64_t) FirstValue;
-
-    llvm::errs() << "DownsamplingRate = " << DownsamplingRate << " UpsamplingRate" << UpsamplingRate << "\n";
-    if(DownsamplingRate == UpsamplingRate)
-    {
-	    // Otherwise, we have a redundant downsampling. Use the rewriter.
-	    // rewriter.replaceOp(op, {downsamplingInputOp.getOperand()}); //downsamplingOperand0_input
+    int64_t UpsamplingRate = (int64_t)FirstValue;
+
+    llvm::errs() << "DownsamplingRate = " << DownsamplingRate
+                 << " UpsamplingRate" << UpsamplingRate << "\n";
+    if (DownsamplingRate == UpsamplingRate) {
+      // Otherwise, we have a redundant downsampling. Use the rewriter.
+      // rewriter.replaceOp(op, {downsamplingInputOp.getOperand()});
+      // //downsamplingOperand0_input
       llvm::errs() << "Going for Downsampling pass\n";
       rewriter.replaceOp(op, UpsamplingOperand0_input);
-	    return success();
+      return success();
 
-    }
-    else if(UpsamplingRate > DownsamplingRate)
-    {
-      //check if UpSamplingRate is a multiple of DownsamplingRate
-      //if yes, final result should be UpSampling with SamplingRate as division 
-      if(UpsamplingRate % DownsamplingRate != 0)
-      {
+    } else if (UpsamplingRate > DownsamplingRate) {
+      // check if UpSamplingRate is a multiple of DownsamplingRate
+      // if yes, final result should be UpSampling with SamplingRate as division
+      if (UpsamplingRate % DownsamplingRate != 0) {
         return failure();
       }
 
       //
-      if(DownsamplingRate == 0)
-      {
-        llvm::errs() << "DownSamplingRate= 0 Not allowed" << "\n"; 
+      if (DownsamplingRate == 0) {
+        llvm::errs() << "DownSamplingRate= 0 Not allowed" << "\n";
         return failure();
       }
-      double finalUpSamplingRate = (double) UpsamplingRate / DownsamplingRate;
+      double finalUpSamplingRate = (double)UpsamplingRate / DownsamplingRate;
 
-      auto constOp_finalSamplingRate = rewriter.create<ConstantOp>(op.getLoc(), finalUpSamplingRate);
+      auto constOp_finalSamplingRate =
+          rewriter.create<ConstantOp>(op.getLoc(), finalUpSamplingRate);
 
-      auto finalUpSamplingOp = rewriter.create<UpsamplingOp>(op.getLoc(),
-                          UpsamplingOperand0_input , constOp_finalSamplingRate);
+      auto finalUpSamplingOp = rewriter.create<UpsamplingOp>(
+          op.getLoc(), UpsamplingOperand0_input, constOp_finalSamplingRate);
 
       llvm::errs() << "Going for Downsampling pass\n";
       rewriter.replaceOp(op, finalUpSamplingOp);
-
     }
     return failure();
-
   }
 };
 
-//Pseudo-Code
-//Find back to back gain operation
-    // result1 = gain(input1, gain1)
-    // result2 = gain(result1, gain2)
+// Pseudo-Code
+// Find back to back gain operation
+//  result1 = gain(input1, gain1)
+//  result2 = gain(result1, gain2)
 // if result1 is coming from another delay operation
-  // result2 will be now delay(input1, gain1 + gain2)
-  // replaceOp 
-struct SimplifyBack2BackGain: public mlir::OpRewritePattern<GainOp>{
+// result2 will be now delay(input1, gain1 + gain2)
+// replaceOp
+struct SimplifyBack2BackGain : public mlir::OpRewritePattern<GainOp> {
   //
-  SimplifyBack2BackGain(mlir::MLIRContext *context) 
-    : OpRewritePattern<GainOp>(context, 1) {}
-
-    mlir::LogicalResult matchAndRewrite(GainOp op, 
-                        mlir::PatternRewriter &rewriter) const override {
-     
-     //
-     mlir::Value gainOp_operand0 = op.getOperand(0);
-     
-     //check if this is coming from another gain operation
-     GainOp prev_gainOp = gainOp_operand0.getDefiningOp<GainOp>();
-
-     if(!prev_gainOp)
-        return failure();
+  SimplifyBack2BackGain(mlir::MLIRContext *context)
+      : OpRewritePattern<GainOp>(context, 1) {}
 
-     mlir::Value gainOp_operand1 = op.getOperand(1);
-     mlir::Value prev_gainOp_operand0 = prev_gainOp.getOperand(0);
-     mlir::Value prev_gainOp_operand1 = prev_gainOp.getOperand(1);
+  mlir::LogicalResult
+  matchAndRewrite(GainOp op, mlir::PatternRewriter &rewriter) const override {
+
+    //
+    mlir::Value gainOp_operand0 = op.getOperand(0);
+
+    // check if this is coming from another gain operation
+    GainOp prev_gainOp = gainOp_operand0.getDefiningOp<GainOp>();
+
+    if (!prev_gainOp)
+      return failure();
+
+    mlir::Value gainOp_operand1 = op.getOperand(1);
+    mlir::Value prev_gainOp_operand0 = prev_gainOp.getOperand(0);
+    mlir::Value prev_gainOp_operand1 = prev_gainOp.getOperand(1);
 
-     //create add op 
-     auto addOp = rewriter.create<MulOp>(op.getLoc(), prev_gainOp_operand1, gainOp_operand1);
-     auto newGainOp = rewriter.create<GainOp>(op.getLoc(),
-                          prev_gainOp_operand0 , addOp.getResult());
-    
-    //Repalce the use of original gain operation with this newGainOp
+    // create add op
+    auto addOp = rewriter.create<MulOp>(op.getLoc(), prev_gainOp_operand1,
+                                        gainOp_operand1);
+    auto newGainOp = rewriter.create<GainOp>(op.getLoc(), prev_gainOp_operand0,
+                                             addOp.getResult());
+
+    // Repalce the use of original gain operation with this newGainOp
     rewriter.replaceOp(op, newGainOp.getResult());
     return mlir::success();
+  }
+};
 
-    }
+// Pseudo-Code
+//  Mean of diff is equal to (input[-1] - input[0])/len(input).
+//  For example, for array (a, b, c, d, e)
+//  diff(array) = (b-a, c-b, d-c, e-d)
+//  mean(diff(array)) = ((b-a) + (c-b) + (d-c) + (e-d))/4 = (e-a)/4
+//  result1 = diff(input1, diff_length) //NOTE: len(result1) == diff_length-1
+//  virtually (tensor size is fixed as len(input)-1). result2 = mean(result1,
+//  mean_length)
+// if mean_length <= (diff_length-1),
+// result2 will be now (input1[mean_length] - input[0])/mean_length
+// replaceOp
+struct SimplifyDiff2Mean : public mlir::OpRewritePattern<MeanOp> {
+  //
+  SimplifyDiff2Mean(mlir::MLIRContext *context)
+      : OpRewritePattern<MeanOp>(context, 1) {}
+
+  mlir::LogicalResult
+  matchAndRewrite(MeanOp op, mlir::PatternRewriter &rewriter) const override {
+
+    //
+    mlir::Value meanOp_operand0 = op.getOperand(0);
+
+    // check if this is coming from diff operation.
+    DiffOp prev_diffOp = meanOp_operand0.getDefiningOp<DiffOp>();
+
+    if (!prev_diffOp)
+      return failure();
+
+    mlir::Value meanOp_operand1 = op.getOperand(1);
+    mlir::Value prev_diffOp_operand0 = prev_diffOp.getOperand(0);
+
+    auto optimizedOp = rewriter.create<dsp::Diff2MeanOptimizedOp>(
+        op.getLoc(), prev_diffOp_operand0, meanOp_operand1);
+
+    // Repalce the use of original diff operation with this operation
+    rewriter.replaceOp(op, optimizedOp.getResult());
+    return mlir::success();
+  }
 };
 
+struct SimplifyLMS2FindPeaks : public mlir::OpRewritePattern<FindPeaksOp> {
+  //
+  SimplifyLMS2FindPeaks(mlir::MLIRContext *context)
+      : OpRewritePattern<FindPeaksOp>(context, 1) {}
+
+  mlir::LogicalResult
+  matchAndRewrite(FindPeaksOp op,
+                  mlir::PatternRewriter &rewriter) const override {
+    //
+    mlir::Value findPeaksOp_operand0 = op.getOperand(0);
+
+    // check if this is coming from diff operation.
+    LMSFilterResponseOp prev_lmsFilterResponseOp =
+        findPeaksOp_operand0.getDefiningOp<LMSFilterResponseOp>();
+
+    if (!prev_lmsFilterResponseOp)
+      return failure();
 
-struct SimplifyBack2BackDelay: public mlir::OpRewritePattern<DelayOp>{
+    mlir::Value findPeaksOp_operand1 = op.getOperand(1);
+    mlir::Value findPeaksOp_operand2 = op.getOperand(2);
+    mlir::Value prev_lmsFilterResponseOp_operand0 =
+        prev_lmsFilterResponseOp.getOperand(0);
+    mlir::Value prev_lmsFilterResponseOp_operand1 =
+        prev_lmsFilterResponseOp.getOperand(1);
+    mlir::Value prev_lmsFilterResponseOp_operand2 =
+        prev_lmsFilterResponseOp.getOperand(2);
+    mlir::Value prev_lmsFilterResponseOp_operand3 =
+        prev_lmsFilterResponseOp.getOperand(3);
+
+    auto optimizedOp = rewriter.create<dsp::LMS2FindPeaksOptimizedOp>(
+        op.getLoc(), prev_lmsFilterResponseOp_operand0,
+        prev_lmsFilterResponseOp_operand1, prev_lmsFilterResponseOp_operand2,
+        prev_lmsFilterResponseOp_operand3, findPeaksOp_operand1,
+        findPeaksOp_operand2);
+
+    // Repalce the use of original diff operation with this operation
+    rewriter.replaceOp(op, optimizedOp.getResult());
+    return mlir::success();
+  }
+};
+
+struct SimplifyFindPeaks2Diff2Mean : public mlir::OpRewritePattern<MeanOp> {
   //
-  SimplifyBack2BackDelay(mlir::MLIRContext *context) 
-    : OpRewritePattern<DelayOp>(context, 1) {}
-
-    mlir::LogicalResult matchAndRewrite(DelayOp op, 
-                        mlir::PatternRewriter &rewriter) const override {
-     
-     //
-     mlir::Value delayOp_operand0 = op.getOperand(0);
-     
-     //check if this is coming from another delay operation
-     DelayOp prev_delayOp = delayOp_operand0.getDefiningOp<DelayOp>();
-
-     if(!prev_delayOp)
-        return failure();
+  SimplifyFindPeaks2Diff2Mean(mlir::MLIRContext *context)
+      : OpRewritePattern<MeanOp>(context, 1) {}
+
+  mlir::LogicalResult
+  matchAndRewrite(MeanOp op, mlir::PatternRewriter &rewriter) const override {
 
-     mlir::Value delayOp_operand1 = op.getOperand(1);
-     mlir::Value prev_delayOp_operand0 = prev_delayOp.getOperand(0);
-     mlir::Value prev_delayOp_operand1 = prev_delayOp.getOperand(1);
+    //
+    mlir::Value meanOp_operand0 = op.getOperand(0);
 
-     //create add op 
-     auto addOp = rewriter.create<AddOp>(op.getLoc(), prev_delayOp_operand1, delayOp_operand1);
-     auto newDelayOp = rewriter.create<DelayOp>(op.getLoc(),
-                          prev_delayOp_operand0 , addOp.getResult());
-    
-    //Repalce the use of original delay operation with this newDelayOp
-    rewriter.replaceOp(op, newDelayOp.getResult());
+    // check if this is coming from diff operation.
+    DiffOp prev_diffOp = meanOp_operand0.getDefiningOp<DiffOp>();
+
+    if (!prev_diffOp)
+      return failure();
+
+    mlir::Value prev_diffOp_operand0 = prev_diffOp.getOperand(0);
+    FindPeaksOp prev_findPeaksOp =
+        prev_diffOp_operand0.getDefiningOp<FindPeaksOp>();
+
+    if (!prev_findPeaksOp)
+      return failure();
+
+    mlir::Value prev_findPeaksOp_operand0 = prev_findPeaksOp.getOperand(0);
+    mlir::Value prev_findPeaksOp_operand1 = prev_findPeaksOp.getOperand(1);
+    mlir::Value prev_findPeaksOp_operand2 = prev_findPeaksOp.getOperand(2);
+
+    auto optimizedOp = rewriter.create<dsp::FindPeaks2Diff2MeanOptimizedOp>(
+        op.getLoc(), prev_findPeaksOp_operand0, prev_findPeaksOp_operand1,
+        prev_findPeaksOp_operand2);
+
+    // Repalce the use of original diff operation with this operation
+    rewriter.replaceOp(op, optimizedOp.getResult());
     return mlir::success();
+  }
+};
 
-    }
+struct SimplifyMedian2Sliding
+    : public mlir::OpRewritePattern<SlidingWindowAvgOp> {
+  //
+  SimplifyMedian2Sliding(mlir::MLIRContext *context)
+      : OpRewritePattern<SlidingWindowAvgOp>(context, 1) {}
+
+  mlir::LogicalResult
+  matchAndRewrite(SlidingWindowAvgOp op,
+                  mlir::PatternRewriter &rewriter) const override {
+
+    mlir::Value slidingOp_operand0 = op.getOperand();
+
+    // check if this is coming from medianFilter operation.
+    MedianFilterOp prev_medianFilterOp =
+        slidingOp_operand0.getDefiningOp<MedianFilterOp>();
+
+    if (!prev_medianFilterOp)
+      return failure();
+
+    mlir::Value prev_medianFilterOp_operand0 = prev_medianFilterOp.getOperand();
+
+    auto optimizedOp = rewriter.create<dsp::Median2SlidingOptimizedOp>(
+        op.getLoc(), prev_medianFilterOp_operand0);
+
+    rewriter.replaceOp(op, optimizedOp.getResult());
+    return mlir::success();
+  }
+};
+
+struct SimplifyBack2BackDelay : public mlir::OpRewritePattern<DelayOp> {
+  //
+  SimplifyBack2BackDelay(mlir::MLIRContext *context)
+      : OpRewritePattern<DelayOp>(context, 1) {}
+
+  mlir::LogicalResult
+  matchAndRewrite(DelayOp op, mlir::PatternRewriter &rewriter) const override {
+
+    //
+    mlir::Value delayOp_operand0 = op.getOperand(0);
+
+    // check if this is coming from another delay operation
+    DelayOp prev_delayOp = delayOp_operand0.getDefiningOp<DelayOp>();
+
+    if (!prev_delayOp)
+      return failure();
+
+    mlir::Value delayOp_operand1 = op.getOperand(1);
+    mlir::Value prev_delayOp_operand0 = prev_delayOp.getOperand(0);
+    mlir::Value prev_delayOp_operand1 = prev_delayOp.getOperand(1);
+
+    // create add op
+    auto addOp = rewriter.create<AddOp>(op.getLoc(), prev_delayOp_operand1,
+                                        delayOp_operand1);
+    auto newDelayOp = rewriter.create<DelayOp>(
+        op.getLoc(), prev_delayOp_operand0, addOp.getResult());
+
+    // Repalce the use of original delay operation with this newDelayOp
+    rewriter.replaceOp(op, newDelayOp.getResult());
+    return mlir::success();
+  }
 };
 
 // Pseudo-code
-// if operand of square is coming from real part of fft1d 
-// replace fft1d with fft1dreal 
+// if operand of square is coming from real part of fft1d
+// replace fft1d with fft1dreal
 // still squareOp will remain same
 struct SimplifyFFTSquare : public mlir::OpRewritePattern<SquareOp> {
   /// We register this pattern to match every dsp.downsampling in the IR.
@@ -241,8 +393,7 @@ struct SimplifyFFTSquare : public mlir::OpRewritePattern<SquareOp> {
   /// argument is the orchestrator of the sequence of rewrites. The pattern is
   /// expected to interact with it to perform any changes to the IR from here.
   mlir::LogicalResult
-  matchAndRewrite(SquareOp op,
-                  mlir::PatternRewriter &rewriter) const override {
+  matchAndRewrite(SquareOp op, mlir::PatternRewriter &rewriter) const override {
     // Look through the input of the current downsampling.
     // mlir::Value squareOperand1_Rate = op.getOperand(1);
     mlir::Value squareOperand0_input = op.getInput();
@@ -252,63 +403,64 @@ struct SimplifyFFTSquare : public mlir::OpRewritePattern<SquareOp> {
     if (!prev_FFT1DOp)
       return failure();
 
-    //Replace fft1d with fft1dreal
-    DEBUG_PRINT_WITH_ARGS( squareOperand0_input) ;
-    DEBUG_PRINT_WITH_ARGS( "Going fr some") ;
-    DEBUG_PRINT_NO_ARGS() ;
-  	mlir::Value prev_FFT1DOp_Operand = prev_FFT1DOp.getInput();
-  	auto fft1drealOp1 = rewriter.create<FFT1DRealOp>(op.getLoc(),
-                          prev_FFT1DOp_Operand );
+    // Replace fft1d with fft1dreal
+    DEBUG_PRINT_WITH_ARGS(squareOperand0_input);
+    DEBUG_PRINT_WITH_ARGS("Going fr some");
+    DEBUG_PRINT_NO_ARGS();
+    mlir::Value prev_FFT1DOp_Operand = prev_FFT1DOp.getInput();
+    auto fft1drealOp1 =
+        rewriter.create<FFT1DRealOp>(op.getLoc(), prev_FFT1DOp_Operand);
     // DEBUG_PRINT_NO_ARGS();
-  	auto SquareOp1 = rewriter.create<SquareOp>(op.getLoc(), fft1drealOp1);
+    auto SquareOp1 = rewriter.create<SquareOp>(op.getLoc(), fft1drealOp1);
 
     rewriter.replaceOp(op, SquareOp1);
     return mlir::success();
   }
 };
 
-struct SimplifyGainwZero: public mlir::OpRewritePattern<GainOp>{
-  SimplifyGainwZero(mlir::MLIRContext *context) 
-    : OpRewritePattern<GainOp>(context, 1) {}
-
-    mlir::LogicalResult matchAndRewrite(GainOp op, 
-                        mlir::PatternRewriter &rewriter) const override {
-     
-     //
-     mlir::Value gainOp_operand1 = op.getOperand(1);
-     
-     //check if the value is zero
-     DEBUG_PRINT_NO_ARGS();
-     dsp::ConstantOp constant_Op1 = gainOp_operand1.getDefiningOp<dsp::ConstantOp>();
+struct SimplifyGainwZero : public mlir::OpRewritePattern<GainOp> {
+  SimplifyGainwZero(mlir::MLIRContext *context)
+      : OpRewritePattern<GainOp>(context, 1) {}
+
+  mlir::LogicalResult
+  matchAndRewrite(GainOp op, mlir::PatternRewriter &rewriter) const override {
+
+    //
+    mlir::Value gainOp_operand1 = op.getOperand(1);
+
+    // check if the value is zero
+    DEBUG_PRINT_NO_ARGS();
+    dsp::ConstantOp constant_Op1 =
+        gainOp_operand1.getDefiningOp<dsp::ConstantOp>();
     DenseElementsAttr DenseValueFrmgainOp = constant_Op1.getValue();
     auto elements = DenseValueFrmgainOp.getValues<FloatAttr>();
     float FirstValue = elements[0].getValueAsDouble();
-    int64_t GainRate = (int64_t) FirstValue;
+    int64_t GainRate = (int64_t)FirstValue;
 
-     if(!GainRate==0)
-        return failure();
+    if (!GainRate == 0)
+      return failure();
 
     mlir::Value gainOp_operand0 = op.getOperand(0);
-    dsp::ConstantOp constant_Op0 = gainOp_operand0.getDefiningOp<dsp::ConstantOp>();
+    dsp::ConstantOp constant_Op0 =
+        gainOp_operand0.getDefiningOp<dsp::ConstantOp>();
     DenseElementsAttr InputValueFrmgainOp = constant_Op0.getValue();
     int64_t inputSize = InputValueFrmgainOp.size();
 
-  // Define the type of the tensor (tensor<f64>).
-  RankedTensorType tensorType = RankedTensorType::get({inputSize}, rewriter.getF64Type());
-
-  // Create a constant operation with the specified value and type.
-  DenseElementsAttr zerovalue = DenseElementsAttr::get(tensorType, 0.0);
-  Operation* constantOp = rewriter.create<ConstantOp>(op.getLoc(), zerovalue);
+    // Define the type of the tensor (tensor<f64>).
+    RankedTensorType tensorType =
+        RankedTensorType::get({inputSize}, rewriter.getF64Type());
 
+    // Create a constant operation with the specified value and type.
+    DenseElementsAttr zerovalue = DenseElementsAttr::get(tensorType, 0.0);
+    Operation *constantOp = rewriter.create<ConstantOp>(op.getLoc(), zerovalue);
 
     rewriter.replaceOp(op, constantOp);
     return mlir::success();
-
-    }
+  }
 };
 
 // Pseudo-code
-// if operands of MulOp are coming from lowPassFIRFilter & hamming 
+// if operands of MulOp are coming from lowPassFIRFilter & hamming
 // then replace the MulOp with the symmetrical operation
 struct SimplifyFilterMulHamming : public mlir::OpRewritePattern<MulOp> {
   /// We register this pattern to match every dsp.downsampling in the IR.
@@ -321,8 +473,7 @@ struct SimplifyFilterMulHamming : public mlir::OpRewritePattern<MulOp> {
   /// argument is the orchestrator of the sequence of rewrites. The pattern is
   /// expected to interact with it to perform any changes to the IR from here.
   mlir::LogicalResult
-  matchAndRewrite(MulOp op,
-                  mlir::PatternRewriter &rewriter) const override {
+  matchAndRewrite(MulOp op, mlir::PatternRewriter &rewriter) const override {
     // Get the operands operation from MulFOp
     // check if op0 is Low/HighPassFIRFilterOp & op1 is HammingWindowOp
     // if this true then get the operands of op0 ie, Low/HighPassFIRFilterOp
@@ -330,32 +481,35 @@ struct SimplifyFilterMulHamming : public mlir::OpRewritePattern<MulOp> {
     // mlir::Value squareOperand1_Rate = op.getOperand(1);
     mlir::Value mulOperand0_Lhs = op.getLhs();
     mlir::Value mulOperand1_Rhs = op.getRhs();
-    dsp::LowPassFIRFilterOp op_LowPassFIRFilterOp = mulOperand0_Lhs.getDefiningOp<LowPassFIRFilterOp>();
-    dsp::HammingWindowOp op_HammingWindowOp = mulOperand1_Rhs.getDefiningOp<HammingWindowOp>();
+    dsp::LowPassFIRFilterOp op_LowPassFIRFilterOp =
+        mulOperand0_Lhs.getDefiningOp<LowPassFIRFilterOp>();
+    dsp::HammingWindowOp op_HammingWindowOp =
+        mulOperand1_Rhs.getDefiningOp<HammingWindowOp>();
 
     DEBUG_PRINT_NO_ARGS();
     // Inputs are LowPassFIRFilterOp && HammingWindowOp => If not, no match.
     if (!op_LowPassFIRFilterOp || !op_HammingWindowOp)
       return failure();
 
-    //Replace fft1d with fft1dreal
-    DEBUG_PRINT_WITH_ARGS( mulOperand0_Lhs) ;
-    DEBUG_PRINT_WITH_ARGS( "SimplifyFilterMulHamming - ConditionMet") ;
-    DEBUG_PRINT_NO_ARGS() ;
+    // Replace fft1d with fft1dreal
+    DEBUG_PRINT_WITH_ARGS(mulOperand0_Lhs);
+    DEBUG_PRINT_WITH_ARGS("SimplifyFilterMulHamming - ConditionMet");
+    DEBUG_PRINT_NO_ARGS();
     mlir::Value LowPassFIRFilterOperand_wc = op_LowPassFIRFilterOp.getWc();
     mlir::Value LowPassFIRFilterOperand_N = op_LowPassFIRFilterOp.getN();
 
-    auto firFilterHammingOptimized = rewriter.create<FIRFilterHammingOptimizedOp>(op.getLoc(),
-                          LowPassFIRFilterOperand_wc, LowPassFIRFilterOperand_N );
+    auto firFilterHammingOptimized =
+        rewriter.create<FIRFilterHammingOptimizedOp>(
+            op.getLoc(), LowPassFIRFilterOperand_wc, LowPassFIRFilterOperand_N);
     DEBUG_PRINT_NO_ARGS();
-    
+
     rewriter.replaceOp(op, firFilterHammingOptimized);
     return mlir::success();
   }
 };
 
 // Pseudo-code
-// if operands of MulOp are coming from highPassFIRFilter & hamming 
+// if operands of MulOp are coming from highPassFIRFilter & hamming
 // then replace the MulOp with the symmetrical operation
 struct SimplifyHighPassFIRHamming : public mlir::OpRewritePattern<MulOp> {
   /// We register this pattern to match every dsp.downsampling in the IR.
@@ -368,8 +522,7 @@ struct SimplifyHighPassFIRHamming : public mlir::OpRewritePattern<MulOp> {
   /// argument is the orchestrator of the sequence of rewrites. The pattern is
   /// expected to interact with it to perform any changes to the IR from here.
   mlir::LogicalResult
-  matchAndRewrite(MulOp op,
-                  mlir::PatternRewriter &rewriter) const override {
+  matchAndRewrite(MulOp op, mlir::PatternRewriter &rewriter) const override {
     // Get the operands operation from MulFOp
     // check if op0 is Low/HighPassFIRFilterOp & op1 is HammingWindowOp
     // if this true then get the operands of op0 ie, Low/HighPassFIRFilterOp
@@ -377,38 +530,43 @@ struct SimplifyHighPassFIRHamming : public mlir::OpRewritePattern<MulOp> {
     // mlir::Value squareOperand1_Rate = op.getOperand(1);
     mlir::Value mulOperand0_Lhs = op.getLhs();
     mlir::Value mulOperand1_Rhs = op.getRhs();
-    dsp::HighPassFIRFilterOp op_HighPassFIRFilterOp = mulOperand0_Lhs.getDefiningOp<HighPassFIRFilterOp>();
-    dsp::HammingWindowOp op_HammingWindowOp = mulOperand1_Rhs.getDefiningOp<HammingWindowOp>();
+    dsp::HighPassFIRFilterOp op_HighPassFIRFilterOp =
+        mulOperand0_Lhs.getDefiningOp<HighPassFIRFilterOp>();
+    dsp::HammingWindowOp op_HammingWindowOp =
+        mulOperand1_Rhs.getDefiningOp<HammingWindowOp>();
 
     DEBUG_PRINT_NO_ARGS();
     // Inputs are HighPassFIRFilterOp && HammingWindowOp => If not, no match.
     if (!op_HighPassFIRFilterOp || !op_HammingWindowOp)
       return failure();
 
-    //Replace fft1d with fft1dreal
-    DEBUG_PRINT_WITH_ARGS( mulOperand0_Lhs) ;
-    DEBUG_PRINT_WITH_ARGS( "SimplifyHighPassFIRHamming - ConditionMet") ;
-    DEBUG_PRINT_NO_ARGS() ;
+    // Replace fft1d with fft1dreal
+    DEBUG_PRINT_WITH_ARGS(mulOperand0_Lhs);
+    DEBUG_PRINT_WITH_ARGS("SimplifyHighPassFIRHamming - ConditionMet");
+    DEBUG_PRINT_NO_ARGS();
     mlir::Value HighPassFIRFilterOperand_wc = op_HighPassFIRFilterOp.getWc();
     mlir::Value HighPassFIRFilterOperand_N = op_HighPassFIRFilterOp.getN();
 
-    auto highPassFIRHammingOptimized = rewriter.create<HighPassFIRHammingOptimizedOp>(op.getLoc(),
-                          HighPassFIRFilterOperand_wc, HighPassFIRFilterOperand_N );
+    auto highPassFIRHammingOptimized =
+        rewriter.create<HighPassFIRHammingOptimizedOp>(
+            op.getLoc(), HighPassFIRFilterOperand_wc,
+            HighPassFIRFilterOperand_N);
     DEBUG_PRINT_NO_ARGS();
-    
+
     rewriter.replaceOp(op, highPassFIRHammingOptimized);
     return mlir::success();
   }
 };
 
-//Pseudo-Code
-//Find FIRFilterResponse & FIRFilterHammingOptimized &  operation
-    // result1 = dsp.FIRFilterHammingOptimized(input1, rate1) //filter and hamming
-    // result2 = dsp.FIRFilterResponse(result1, rate2) //FilterResponse
-// For above pattern , replace dsp.FIRFilterResponse with FIRFilterResSymmOptimized
-  // result1 = dsp.FIRFilterHammingOptimized(input1, rate1)
-  // result2 = dsp.FIRFilterResSymmOptimized(result1, rate2) 
-struct SimplifyFIRFilterRespnseWithSymmFilter : public mlir::OpRewritePattern<FIRFilterResponseOp> {
+// Pseudo-Code
+// Find FIRFilterResponse & FIRFilterHammingOptimized &  operation
+//  result1 = dsp.FIRFilterHammingOptimized(input1, rate1) //filter and hamming
+//  result2 = dsp.FIRFilterResponse(result1, rate2) //FilterResponse
+// For above pattern , replace dsp.FIRFilterResponse with
+// FIRFilterResSymmOptimized result1 = dsp.FIRFilterHammingOptimized(input1,
+// rate1) result2 = dsp.FIRFilterResSymmOptimized(result1, rate2)
+struct SimplifyFIRFilterRespnseWithSymmFilter
+    : public mlir::OpRewritePattern<FIRFilterResponseOp> {
   /// We register this pattern to match every dsp.downsampling in the IR.
   /// The "benefit" is used by the framework to order the patterns and process
   /// them in order of profitability.
@@ -422,37 +580,42 @@ struct SimplifyFIRFilterRespnseWithSymmFilter : public mlir::OpRewritePattern<FI
   matchAndRewrite(FIRFilterResponseOp op,
                   mlir::PatternRewriter &rewriter) const override {
     // Look through the input of the current downsampling.
-    //if 1 of the operands is FIRFilterHammingOptimized then go for rewrite
-    //ie, if 
+    // if 1 of the operands is FIRFilterHammingOptimized then go for rewrite
+    // ie, if
     mlir::Value Operand1_forFIRFilterResp = op.getOperand(1);
     mlir::Value Operand0_forFIRFilterResp = op.getOperand(0);
-    dsp::FIRFilterHammingOptimizedOp prev_FIRFilterSymmOp = Operand1_forFIRFilterResp.getDefiningOp<FIRFilterHammingOptimizedOp>();
+    dsp::FIRFilterHammingOptimizedOp prev_FIRFilterSymmOp =
+        Operand1_forFIRFilterResp.getDefiningOp<FIRFilterHammingOptimizedOp>();
 
     // Input defined by another downsampling? If not, no match.
-    if (!prev_FIRFilterSymmOp){
+    if (!prev_FIRFilterSymmOp) {
       return failure();
     }
 
     // create FIRFilterHammingOptimizedOp with current operands
-    DEBUG_PRINT_WITH_ARGS("Going for FIRFilterresponse Opt when the operand1 is a symmetric filter");
-    
-    auto firFilterResSymmOptimizedOp = rewriter.create<FIRFilterResSymmOptimizedOp>(op.getLoc(),
-                          Operand0_forFIRFilterResp , Operand1_forFIRFilterResp);
+    DEBUG_PRINT_WITH_ARGS("Going for FIRFilterresponse Opt when the operand1 "
+                          "is a symmetric filter");
 
-    DEBUG_PRINT_NO_ARGS() ;
+    auto firFilterResSymmOptimizedOp =
+        rewriter.create<FIRFilterResSymmOptimizedOp>(
+            op.getLoc(), Operand0_forFIRFilterResp, Operand1_forFIRFilterResp);
+
+    DEBUG_PRINT_NO_ARGS();
     rewriter.replaceOp(op, firFilterResSymmOptimizedOp);
 
     return mlir::success();
   }
 };
 
-//Pseudo code: 
-// if the FFT1DRealOp & FFT1DImgOp has same input then replace them with single 
-// %4 = "dsp.fft1dreal"(%3) : (tensor<10xf64>) -> tensor<10xf64>
-// %5 = "dsp.fft1dimg"(%3) : (tensor<10xf64>) -> tensor<10xf64>
-// replace with %4, %5 = "dsp.fft1d"(%3) : (tensor<10xf64>) -> (tensor<10xf64 , tensor<10xf64)>
+// label: pass 1st
+// Pseudo code:
+//  if the FFT1DRealOp & FFT1DImgOp has same input then replace them with single
+//  %4 = "dsp.fft1dreal"(%3) : (tensor<10xf64>) -> tensor<10xf64>
+//  %5 = "dsp.fft1dimg"(%3) : (tensor<10xf64>) -> tensor<10xf64>
+//  replace with %4, %5 = "dsp.fft1d"(%3) : (tensor<10xf64>) -> (tensor<10xf64 ,
+//  tensor<10xf64)>
 //
-// Define the canonicalization pattern.
+//  Define the canonicalization pattern.
 struct SimplifyFFTRealAndImg : public OpRewritePattern<FFT1DRealOp> {
   SimplifyFFTRealAndImg(MLIRContext *context)
       : OpRewritePattern<FFT1DRealOp>(context, /*benefit=*/1) {}
@@ -464,14 +627,15 @@ struct SimplifyFFTRealAndImg : public OpRewritePattern<FFT1DRealOp> {
     if (!nextOp || !isa<FFT1DImgOp>(nextOp))
       return failure();
 
-    DEBUG_PRINT_NO_ARGS() ;
+    DEBUG_PRINT_NO_ARGS();
     auto imgOp = cast<FFT1DImgOp>(nextOp);
     if (realOp.getInput() != imgOp.getInput())
       return failure();
 
     // Replace the two operations with the combined FFT1D operation.
-    DEBUG_PRINT_NO_ARGS() ;
-    auto combinedOp = rewriter.create<FFT1DOp>(realOp.getLoc(), realOp.getInput());
+    DEBUG_PRINT_NO_ARGS();
+    auto combinedOp =
+        rewriter.create<FFT1DOp>(realOp.getLoc(), realOp.getInput());
     rewriter.replaceOp(realOp, combinedOp.getResult(0));
     rewriter.replaceOp(imgOp, combinedOp.getResult(1));
 
@@ -479,15 +643,16 @@ struct SimplifyFFTRealAndImg : public OpRewritePattern<FFT1DRealOp> {
   }
 };
 
-
-//Pseudo-Code
-//Find FIRFilterResponse & reverseInput
-   // %1 = "dsp.reverseInput"(%0) : (tensor<4xf64>) -> tensor<*xf64>
-   // %2 = "dsp.FIRFilterResponse"(%0, %1) : (tensor<4xf64>, tensor<*xf64>) -> tensor<*xf64>
-// For above pattern , replace dsp.FIRFilterResponse with FIRFilterYSymmOptimized
-  // %1 = "dsp.reverseInput"(%0)
-  // result2 = dsp.FIRFilterYSymmOptimized(result1, rate2) 
-struct SimplifyFilterRespX_ReverseXYSymmFilter : public mlir::OpRewritePattern<FIRFilterResponseOp> {
+// Pseudo-Code
+// Find FIRFilterResponse & reverseInput
+//  %1 = "dsp.reverseInput"(%0) : (tensor<4xf64>) -> tensor<*xf64>
+//  %2 = "dsp.FIRFilterResponse"(%0, %1) : (tensor<4xf64>, tensor<*xf64>) ->
+//  tensor<*xf64>
+// For above pattern , replace dsp.FIRFilterResponse with
+// FIRFilterYSymmOptimized %1 = "dsp.reverseInput"(%0) result2 =
+// dsp.FIRFilterYSymmOptimized(result1, rate2)
+struct SimplifyFilterRespX_ReverseXYSymmFilter
+    : public mlir::OpRewritePattern<FIRFilterResponseOp> {
   /// We register this pattern to match every dsp.downsampling in the IR.
   /// The "benefit" is used by the framework to order the patterns and process
   /// them in order of profitability.
@@ -501,33 +666,36 @@ struct SimplifyFilterRespX_ReverseXYSymmFilter : public mlir::OpRewritePattern<F
   matchAndRewrite(FIRFilterResponseOp op,
                   mlir::PatternRewriter &rewriter) const override {
     // Look through the input of the current downsampling.
-    //if 1 of the operands is FIRFilterHammingOptimized then go for rewrite
-    //ie, if 
+    // if 1 of the operands is FIRFilterHammingOptimized then go for rewrite
+    // ie, if
     mlir::Value Operand1_forFIRFilterResp = op.getOperand(1);
     mlir::Value Operand0_forFIRFilterResp = op.getOperand(0);
-    dsp::ReverseInputOp prev_ReverseOp = Operand1_forFIRFilterResp.getDefiningOp<ReverseInputOp>();
+    dsp::ReverseInputOp prev_ReverseOp =
+        Operand1_forFIRFilterResp.getDefiningOp<ReverseInputOp>();
 
     // Operand1 defined by another ReverseOp? If not, no match.
-    if (!prev_ReverseOp){
+    if (!prev_ReverseOp) {
       return failure();
     }
 
     // create FIRFilterYSymmOptimizedOp with current operands
-    DEBUG_PRINT_WITH_ARGS("Going for FIRFilterResponse Opt when the operand1 is a ReverseInputOp");
-    
-    auto firFilterResYSymmOptimizedOp = rewriter.create<FIRFilterYSymmOptimizedOp>(op.getLoc(),
-                          Operand0_forFIRFilterResp , Operand1_forFIRFilterResp);
+    DEBUG_PRINT_WITH_ARGS("Going for FIRFilterResponse Opt when the operand1 "
+                          "is a ReverseInputOp");
 
-    DEBUG_PRINT_NO_ARGS() ;
+    auto firFilterResYSymmOptimizedOp =
+        rewriter.create<FIRFilterYSymmOptimizedOp>(
+            op.getLoc(), Operand0_forFIRFilterResp, Operand1_forFIRFilterResp);
+
+    DEBUG_PRINT_NO_ARGS();
     rewriter.replaceOp(op, firFilterResYSymmOptimizedOp);
 
     return mlir::success();
   }
 };
 
-//Pseudo code: 
-// if the  input of FFT1DRealOp = FIRFilterYSymmOptimizedOp then replace it with FFT1DRealSymmOp 
-// Define the canonicalization pattern.
+// Pseudo code:
+//  if the  input of FFT1DRealOp = FIRFilterYSymmOptimizedOp then replace it
+//  with FFT1DRealSymmOp Define the canonicalization pattern.
 struct SimplifyFFTRealAtInputRealSymm : public OpRewritePattern<FFT1DRealOp> {
   SimplifyFFTRealAtInputRealSymm(MLIRContext *context)
       : OpRewritePattern<FFT1DRealOp>(context, /*benefit=*/1) {}
@@ -536,27 +704,28 @@ struct SimplifyFFTRealAtInputRealSymm : public OpRewritePattern<FFT1DRealOp> {
                                 PatternRewriter &rewriter) const override {
     // Check if there is a corresponding FFT1DImgOp with the same input.
     mlir::Value fftOperand_input = Op.getInput();
-    dsp::FIRFilterYSymmOptimizedOp op_FIRFilterYSymmOptimizedOp = fftOperand_input.getDefiningOp<FIRFilterYSymmOptimizedOp>();
-    
+    dsp::FIRFilterYSymmOptimizedOp op_FIRFilterYSymmOptimizedOp =
+        fftOperand_input.getDefiningOp<FIRFilterYSymmOptimizedOp>();
+
     if (!op_FIRFilterYSymmOptimizedOp)
       return failure();
 
-    DEBUG_PRINT_NO_ARGS() ;
-
+    DEBUG_PRINT_NO_ARGS();
 
     // Replace the two operations with the combined FFT1D operation.
-    auto fft1dRealSymmOp = rewriter.create<FFT1DRealSymmOp>(Op.getLoc(), Op.getInput());
-    DEBUG_PRINT_NO_ARGS() ;
-    rewriter.replaceOp(Op, fft1dRealSymmOp.getResult());
-    // rewriter.replaceOp(Op, fft1dRealSymmOp);
-    DEBUG_PRINT_NO_ARGS() ;
+    auto fft1dRealSymmOp =
+        rewriter.create<FFT1DRealSymmOp>(Op.getLoc(), Op.getInput());
+    DEBUG_PRINT_NO_ARGS();
+    // rewriter.replaceOp(Op, fft1dRealSymmOp.getResult());
+    rewriter.replaceOp(Op, fft1dRealSymmOp);
+    DEBUG_PRINT_NO_ARGS();
     return success();
   }
 };
 
-//Pseudo code: 
-// if the  input of FFT1DImgOp = FIRFilterYSymmOptimizedOp then replace it with FFT1DImgConjSymmOp 
-// Define the canonicalization pattern.
+// Pseudo code:
+//  if the  input of FFT1DImgOp = FIRFilterYSymmOptimizedOp then replace it with
+//  FFT1DImgConjSymmOp Define the canonicalization pattern.
 struct SimplifyFFTImgAtInputRealSymm : public OpRewritePattern<FFT1DImgOp> {
   SimplifyFFTImgAtInputRealSymm(MLIRContext *context)
       : OpRewritePattern<FFT1DImgOp>(context, /*benefit=*/1) {}
@@ -565,64 +734,696 @@ struct SimplifyFFTImgAtInputRealSymm : public OpRewritePattern<FFT1DImgOp> {
                                 PatternRewriter &rewriter) const override {
     // Check if there is a corresponding FFT1DImgOp with the same input.
     mlir::Value fftOperand_input = Op.getInput();
-    dsp::FIRFilterYSymmOptimizedOp op_FIRFilterYSymmOptimizedOp = fftOperand_input.getDefiningOp<FIRFilterYSymmOptimizedOp>();
-    
+    dsp::FIRFilterYSymmOptimizedOp op_FIRFilterYSymmOptimizedOp =
+        fftOperand_input.getDefiningOp<FIRFilterYSymmOptimizedOp>();
+
     if (!op_FIRFilterYSymmOptimizedOp)
       return failure();
 
-    DEBUG_PRINT_NO_ARGS() ;
-
+    DEBUG_PRINT_NO_ARGS();
 
     // Replace the two operations with the combined FFT1D operation.
-    
-    auto fft1dImgConjSymmOp = rewriter.create<FFT1DImgConjSymmOp>(Op.getLoc(), Op.getInput());
-    DEBUG_PRINT_NO_ARGS() ;
+
+    auto fft1dImgConjSymmOp =
+        rewriter.create<FFT1DImgConjSymmOp>(Op.getLoc(), Op.getInput());
+    DEBUG_PRINT_NO_ARGS();
     // rewriter.replaceOp(Op, fft1dImgConjSymmOp.getResult());
     rewriter.replaceOp(Op, fft1dImgConjSymmOp);
-    DEBUG_PRINT_NO_ARGS() ;
+    DEBUG_PRINT_NO_ARGS();
     return success();
   }
 };
 
+// Pseudo-Code
+// Find lmsFIlter with gain operation
+//  result1 = lmsFilter(noisy_sig, clean_sig, mu, filterSize, iter);
+//  result2 = gain(result1, G1)
+// result2 will be now lmsFilter(noisy_sig, clean_sig, mu*g1,
+// filterSize, iter); replaceOp
+struct SimplifyLMSFilterwithGain
+    : public mlir::OpRewritePattern<GainOp> {
+  SimplifyLMSFilterwithGain(mlir::MLIRContext *context)
+      : OpRewritePattern<GainOp>(context, 1) {}
+
+  mlir::LogicalResult
+  matchAndRewrite(GainOp op, mlir::PatternRewriter &rewriter) const override {
+
+    mlir::Value gainOp_operand0 = op.getOperand(0);
+
+    LMSFilterOp prev_LMSFilterOp =
+        gainOp_operand0.getDefiningOp<LMSFilterOp>();
+
+    if (!prev_LMSFilterOp)
+      return failure();
+
+    mlir::Value gainOp_operand1 = op.getOperand(1);
+    mlir::Value prev_LMSFilterOp_0 =
+        prev_LMSFilterOp.getOperand(0);
+    mlir::Value prev_LMSFilterOp_1 =
+        prev_LMSFilterOp.getOperand(1);
+    mlir::Value prev_LMSFilterOp_mu =
+        prev_LMSFilterOp.getOperand(2);
+    mlir::Value prev_LMSFilterOp_3 =
+        prev_LMSFilterOp.getOperand(3);
+    mlir::Value prev_LMSFilterOp_4 =
+        prev_LMSFilterOp.getOperand(4);
+
+    // create mul op
+    auto mulOp = rewriter.create<MulOp>(
+        op.getLoc(), prev_LMSFilterOp_mu, gainOp_operand1);
+    auto newLMSFilterOp = rewriter.create<LMSFilterOp>(
+        op.getLoc(), prev_LMSFilterOp_0, prev_LMSFilterOp_1,
+        mulOp.getResult(), prev_LMSFilterOp_3, prev_LMSFilterOp_4);
+
+    // Repalce the use of original gain operation with this newGainOp
+    rewriter.replaceOp(op, newLMSFilterOp.getResult());
+    return mlir::success();
+  }
+};
+
+
+
+
+
+
+
+
+// Pseudo-Code
+// Find lmsFIlterResponse with gain operation
+//  result1 = lmsFilterResponse(noisy_sig, clean_sig, mu, filterSize);
+//  result2 = gain(result1, G1)
+// result2 will be now lmsFilterResponse(noisy_sig, clean_sig, mu*g1,
+// filterSize); replaceOp
+struct SimplifyLMSFilterResponsewithGain
+    : public mlir::OpRewritePattern<GainOp> {
+  SimplifyLMSFilterResponsewithGain(mlir::MLIRContext *context)
+      : OpRewritePattern<GainOp>(context, 1) {}
+
+  mlir::LogicalResult
+  matchAndRewrite(GainOp op, mlir::PatternRewriter &rewriter) const override {
+
+    mlir::Value gainOp_operand0 = op.getOperand(0);
+
+    LMSFilterResponseOp prev_LMSFilterResponseOp =
+        gainOp_operand0.getDefiningOp<LMSFilterResponseOp>();
+
+    if (!prev_LMSFilterResponseOp)
+      return failure();
+
+    mlir::Value gainOp_operand1 = op.getOperand(1);
+    mlir::Value prev_LMSFilterResponseOp_0 =
+        prev_LMSFilterResponseOp.getOperand(0);
+    mlir::Value prev_LMSFilterResponseOp_1 =
+        prev_LMSFilterResponseOp.getOperand(1);
+    mlir::Value prev_LMSFilterResponseOp_2 =
+        prev_LMSFilterResponseOp.getOperand(2);
+    mlir::Value prev_LMSFilterResponseOp_3 =
+        prev_LMSFilterResponseOp.getOperand(3);
+
+    auto OptimizedOp = rewriter.create<LMSFilterResponse2GainOp>(
+        op.getLoc(), prev_LMSFilterResponseOp_0, prev_LMSFilterResponseOp_1,
+        prev_LMSFilterResponseOp_2, prev_LMSFilterResponseOp_3, gainOp_operand1);
+
+    // Repalce the use of original gain operation with this newGainOp
+    rewriter.replaceOp(op, OptimizedOp.getResult());
+    return mlir::success();
+  }
+};
 
-//Pseudo-Code
-//Find lmsFIlter with gain operation
-    // result1 = lmsFilterResponse(noisy_sig, clean_sig, mu, filterSize);
-    // result2 = gain(result1, G1)
-  // result2 will be now lmsFilterResponse(noisy_sig, clean_sig, mu*g1, filterSize);
-  // replaceOp 
-struct SimplifyLMSFilterResponsewithGain: public mlir::OpRewritePattern<GainOp>{
-  SimplifyLMSFilterResponsewithGain(mlir::MLIRContext *context) 
-    : OpRewritePattern<GainOp>(context, 1) {}
-
-    mlir::LogicalResult matchAndRewrite(GainOp op, 
-                        mlir::PatternRewriter &rewriter) const override {
-     
-     mlir::Value gainOp_operand0 = op.getOperand(0);
-     
-     LMSFilterResponseOp prev_LMSFilterResponseOp = gainOp_operand0.getDefiningOp<LMSFilterResponseOp>();
-
-     if(!prev_LMSFilterResponseOp)
-        return failure();
 
-     mlir::Value gainOp_operand1 = op.getOperand(1);
-     mlir::Value prev_LMSFilterResponseOp_0 = prev_LMSFilterResponseOp.getOperand(0);
-     mlir::Value prev_LMSFilterResponseOp_1 = prev_LMSFilterResponseOp.getOperand(1);
-     mlir::Value prev_LMSFilterResponseOp_mu = prev_LMSFilterResponseOp.getOperand(2);
-     mlir::Value prev_LMSFilterResponseOp_3 = prev_LMSFilterResponseOp.getOperand(3);
-
-     //create mul op 
-     auto mulOp = rewriter.create<MulOp>(op.getLoc(), prev_LMSFilterResponseOp_mu, gainOp_operand1);
-     auto newLMSFilterResponseOp = rewriter.create<LMSFilterResponseOp>(op.getLoc(),
-                          prev_LMSFilterResponseOp_0, prev_LMSFilterResponseOp_1, mulOp.getResult(), prev_LMSFilterResponseOp_3);
-    
-    //Repalce the use of original gain operation with this newGainOp
-    rewriter.replaceOp(op, newLMSFilterResponseOp.getResult());
+
+
+
+
+
+
+
+
+struct SimplifySpaceModDemodulate
+    : public mlir::OpRewritePattern<SpaceDemodulateOp> {
+  SimplifySpaceModDemodulate(mlir::MLIRContext *context)
+      : OpRewritePattern<SpaceDemodulateOp>(context, 1) {}
+
+  mlir::LogicalResult
+  matchAndRewrite(SpaceDemodulateOp op,
+                  mlir::PatternRewriter &rewriter) const override {
+
+    // a flag checking if the define operation chain of demod op contains mod op
+    bool opt = false;
+    SpaceModulateOp prev_mod;
+    auto iter = op.getOperand();
+    while (iter.getDefiningOp()) {
+      auto pred = iter.getDefiningOp();
+      // llvm::errs() << pred->getName().getStringRef() << "\n";
+      if (llvm::dyn_cast<SpaceModulateOp>(*pred)) {
+        opt = true;
+        prev_mod = llvm::dyn_cast<SpaceModulateOp>(*pred);
+        break;
+      }
+      iter = (*pred).getOperand(0);
+    }
+
+    if (!opt)
+      return failure();
+
+    auto constVal = prev_mod.getOperand().getDefiningOp();
+    rewriter.replaceOp(op, constVal);
     return mlir::success();
+  }
+};
+
+struct SimplifyNormLMSFilterResponse
+    : public mlir::OpRewritePattern<NormalizeOp> {
+  SimplifyNormLMSFilterResponse(mlir::MLIRContext *ctx)
+      : OpRewritePattern<NormalizeOp>(ctx, 1) {}
+
+  mlir::LogicalResult
+  matchAndRewrite(NormalizeOp op,
+                  mlir::PatternRewriter &rewriter) const override {
+
+    Value signal = op.getOperand();
+    Operation *filterOp = signal.getDefiningOp<LMSFilterResponseOp>();
+
+    if (!filterOp)
+      return failure();
+
+    Value filterOp_operand0 = filterOp->getOperand(0);
+    Value filterOp_operand1 = filterOp->getOperand(1);
+    Value filterOp_operand2 = filterOp->getOperand(2);
+    Value filterOp_operand3 = filterOp->getOperand(3);
 
+    auto normLMSfilterOpt = rewriter.create<NormLMSFilterResponseOptimizeOp>(
+        op.getLoc(), filterOp_operand0, filterOp_operand1, filterOp_operand2,
+        filterOp_operand3);
+
+    rewriter.replaceOp(op, normLMSfilterOpt);
+    if (filterOp->use_empty()) {
+      rewriter.eraseOp(filterOp);
     }
+
+    return mlir::success();
+  }
+};
+
+struct SimplifyDSSDPass : public mlir::OpRewritePattern<DivOp> {
+  SimplifyDSSDPass(mlir::MLIRContext *ctx) : OpRewritePattern<DivOp>(ctx, 1) {}
+
+  mlir::LogicalResult
+  matchAndRewrite(DivOp op, mlir::PatternRewriter &rewriter) const override {
+
+#define CHECK(x)                                                               \
+  if (!x)                                                                      \
+    return failure();
+#define REMOVE(x)                                                              \
+  if (x->use_empty())                                                          \
+    rewriter.eraseOp(x);
+#define DEBUG(x)                                                               \
+  { llvm::errs() << "check for " << x << "\n"; }
+#define PASS llvm::errs() << "pass\n";
+
+    auto loc = op.getLoc();
+
+    // pattern -> CHECK()
+    Operation *sumOp = op.getOperand(0).getDefiningOp<SumOp>();
+    CHECK(sumOp);
+
+    Operation *addOp = sumOp->getOperand(0).getDefiningOp<AddOp>();
+    CHECK(addOp);
+
+    Operation *sqrtOp0 = addOp->getOperand(0).getDefiningOp<SquareOp>();
+    CHECK(sqrtOp0);
+
+    Operation *sqrtOp1 = addOp->getOperand(1).getDefiningOp<SquareOp>();
+    CHECK(sqrtOp1);
+
+    Operation *fftRealOp = sqrtOp0->getOperand(0).getDefiningOp<FFT1DRealOp>();
+    CHECK(fftRealOp);
+
+    // See defining op: suppose to be fftImg, but modified beforhand by <label>
+    // pass 1st
+    Operation *fftImgOp = sqrtOp1->getOperand(0).getDefiningOp<FFT1DRealOp>();
+    CHECK(fftImgOp);
+
+    // check if come from same input
+    Value input1 = fftRealOp->getOperand(0);
+    Value input2 = fftImgOp->getOperand(0);
+    CHECK((input1 == input2));
+
+    auto newSqrt = rewriter.create<SquareOp>(loc, input1);
+    auto newResult = rewriter.create<SumOp>(loc, newSqrt);
+
+    rewriter.replaceOp(op, newResult);
+
+    REMOVE(fftImgOp);
+    REMOVE(fftRealOp);
+    REMOVE(sqrtOp1);
+    REMOVE(sqrtOp0);
+    REMOVE(addOp);
+    REMOVE(sumOp);
+
+    return mlir::success();
+  }
 };
 
+struct SimplifyFIRFilterHammingThreholdUpOptimized
+    : public mlir::OpRewritePattern<ThresholdUpOp> {
+  SimplifyFIRFilterHammingThreholdUpOptimized(mlir::MLIRContext *context)
+      : OpRewritePattern<ThresholdUpOp>(context, /*benefit=*/1) {}
+
+  /// This method attempts to match a pattern and rewrite it. The rewriter
+  /// argument is the orchestrator of the sequence of rewrites. The pattern is
+  /// expected to interact with it to perform any changes to the IR from here.
+  mlir::LogicalResult
+  matchAndRewrite(ThresholdUpOp op,
+                  mlir::PatternRewriter &rewriter) const override {
+    mlir::Value Operand0_threshold = op.getOperand(0);
+    mlir::Value Operand1_threshold = op.getOperand(1);
+    mlir::Value Operand2_threshold = op.getOperand(2);
+    dsp::FIRFilterResSymmOptimizedOp prev_FIRFilterSymmOp =
+        Operand0_threshold.getDefiningOp<FIRFilterResSymmOptimizedOp>();
+
+    if (!prev_FIRFilterSymmOp) {
+      return failure();
+    }
+    Value input1 = prev_FIRFilterSymmOp->getOperand(0);
+    Value input2 = prev_FIRFilterSymmOp->getOperand(1);
+    auto fIRFilterResSymmThresholdUpOptimizedOp =
+        rewriter.create<FIRFilterResSymmThresholdUpOptimizedOp>(
+            op.getLoc(), input1, input2, Operand1_threshold,
+            Operand2_threshold);
+
+    DEBUG_PRINT_NO_ARGS();
+    rewriter.replaceOp(op, fIRFilterResSymmThresholdUpOptimizedOp);
+
+    return mlir::success();
+  }
+};
+
+//  Define the canonicalization pattern.
+struct SimplifyFFTAbs : public OpRewritePattern<FFTRealOp> {
+  SimplifyFFTAbs(MLIRContext *context)
+      : OpRewritePattern<FFTRealOp>(context, 1) {}
+
+  LogicalResult matchAndRewrite(FFTRealOp realOp,
+                                PatternRewriter &rewriter) const override {
+    // Check if there is a corresponding FFT1DImgOp with the same input.
+    Operation *nextofFFTRealOp = realOp->getNextNode();
+    if (!nextofFFTRealOp || !isa<FFTImagOp>(nextofFFTRealOp))
+      return failure();
+
+    DEBUG_PRINT_NO_ARGS();
+    auto fftImagOp = cast<FFTImagOp>(nextofFFTRealOp);
+    if (realOp.getLhs() != fftImagOp.getLhs())
+      return failure();
+
+    Operation *nextofFFTImagOp = fftImagOp->getNextNode();
+    if (!nextofFFTImagOp || !isa<SquareOp>(nextofFFTImagOp))
+      return failure();
+
+    DEBUG_PRINT_NO_ARGS();
+    auto square1Op = cast<SquareOp>(nextofFFTImagOp);
+    if (realOp.getResult() != square1Op.getInput())
+      return failure();
+
+    Operation *nextofSquare1Op = square1Op->getNextNode();
+    if (!nextofSquare1Op || !isa<SquareOp>(nextofSquare1Op))
+      return failure();
+
+    DEBUG_PRINT_NO_ARGS();
+    auto square2Op = cast<SquareOp>(nextofSquare1Op);
+    if (fftImagOp.getResult() != square2Op.getInput())
+      return failure();
+
+    Operation *nextofSquare2Op = square2Op->getNextNode();
+    if (!nextofSquare2Op || !isa<AddOp>(nextofSquare2Op))
+      return failure();
+    // (addOp.getLhs() != squareOp.getResult()) || (addOp.getRhs() !=
+    // square2Op.getResult())   &&  (addOp.getRhs() != squareOp.getResult()) &&
+    // (addOp.getLhs() != square2Op.getResult())
+    DEBUG_PRINT_NO_ARGS();
+    auto addOp = cast<AddOp>(nextofSquare2Op);
+    if ((addOp.getLhs() != square1Op.getResult() ||
+         addOp.getRhs() != square2Op.getResult()) &&
+        (addOp.getRhs() != square1Op.getResult() ||
+         addOp.getLhs() != square2Op.getResult()))
+      return failure();
+
+    Operation *nextofAddOp = addOp->getNextNode();
+    if (!nextofAddOp || !isa<SqrtOp>(nextofAddOp))
+      return failure();
+
+    DEBUG_PRINT_NO_ARGS();
+    auto sqrtOp = cast<SqrtOp>(nextofAddOp);
+    if (sqrtOp.getInput() != addOp.getResult())
+      return failure();
+
+    DEBUG_PRINT_NO_ARGS();
+    auto combinedOp =
+        rewriter.create<FFTAbsOp>(realOp.getLoc(), realOp.getLhs());
+    rewriter.replaceOp(sqrtOp, combinedOp.getAmplitude());
+
+    rewriter.eraseOp(addOp);
+    rewriter.eraseOp(square2Op);
+    rewriter.eraseOp(square1Op);
+    rewriter.eraseOp(fftImagOp);
+    rewriter.eraseOp(realOp);
+
+    return success();
+  }
+};
+
+struct SimplifyDFTAbs : public OpRewritePattern<FFT1DRealOp> {
+  SimplifyDFTAbs(MLIRContext *context)
+      : OpRewritePattern<FFT1DRealOp>(context, 1) {}
+
+  LogicalResult matchAndRewrite(FFT1DRealOp realOp,
+                                PatternRewriter &rewriter) const override {
+    // Check if there is a corresponding FFT1DImgOp with the same input.
+    Operation *nextofFFTRealOp = realOp->getNextNode();
+    if (!nextofFFTRealOp || !isa<FFT1DImgOp>(nextofFFTRealOp))
+      return failure();
+
+    DEBUG_PRINT_NO_ARGS();
+    auto fftImagOp = cast<FFT1DImgOp>(nextofFFTRealOp);
+    if (realOp.getInput() != fftImagOp.getInput())
+      return failure();
+
+    Operation *nextofFFTImagOp = fftImagOp->getNextNode();
+    if (!nextofFFTImagOp || !isa<SquareOp>(nextofFFTImagOp))
+      return failure();
+
+    DEBUG_PRINT_NO_ARGS();
+    auto square1Op = cast<SquareOp>(nextofFFTImagOp);
+    if (realOp.getResult() != square1Op.getInput())
+      return failure();
+
+    Operation *nextofSquare1Op = square1Op->getNextNode();
+    if (!nextofSquare1Op || !isa<SquareOp>(nextofSquare1Op))
+      return failure();
+
+    DEBUG_PRINT_NO_ARGS();
+    auto square2Op = cast<SquareOp>(nextofSquare1Op);
+    if (fftImagOp.getResult() != square2Op.getInput())
+      return failure();
+
+    Operation *nextofSquare2Op = square2Op->getNextNode();
+    if (!nextofSquare2Op || !isa<AddOp>(nextofSquare2Op))
+      return failure();
+    // (addOp.getLhs() != squareOp.getResult()) || (addOp.getRhs() !=
+    // square2Op.getResult())   &&  (addOp.getRhs() != squareOp.getResult()) &&
+    // (addOp.getLhs() != square2Op.getResult())
+    DEBUG_PRINT_NO_ARGS();
+    auto addOp = cast<AddOp>(nextofSquare2Op);
+    if ((addOp.getLhs() != square1Op.getResult() ||
+         addOp.getRhs() != square2Op.getResult()) &&
+        (addOp.getRhs() != square1Op.getResult() ||
+         addOp.getLhs() != square2Op.getResult()))
+      return failure();
+
+    Operation *nextofAddOp = addOp->getNextNode();
+    if (!nextofAddOp || !isa<SqrtOp>(nextofAddOp))
+      return failure();
+
+    DEBUG_PRINT_NO_ARGS();
+    auto sqrtOp = cast<SqrtOp>(nextofAddOp);
+    if (sqrtOp.getInput() != addOp.getResult())
+      return failure();
+
+    DEBUG_PRINT_NO_ARGS();
+    auto combinedOp =
+        rewriter.create<DFTAbsOp>(realOp.getLoc(), realOp.getInput());
+    rewriter.replaceOp(sqrtOp, combinedOp.getAmplitude());
+
+    rewriter.eraseOp(addOp);
+    rewriter.eraseOp(square2Op);
+    rewriter.eraseOp(square1Op);
+    rewriter.eraseOp(fftImagOp);
+    rewriter.eraseOp(realOp);
+
+    return success();
+  }
+};
+
+struct SimplifyDFTAbsThreshold : public mlir::OpRewritePattern<ThresholdUpOp> {
+  SimplifyDFTAbsThreshold(mlir::MLIRContext *context)
+      : OpRewritePattern<ThresholdUpOp>(context, /*benefit=*/1) {}
+
+  /// This method attempts to match a pattern and rewrite it. The rewriter
+  /// argument is the orchestrator of the sequence of rewrites. The pattern is
+  /// expected to interact with it to perform any changes to the IR from here.
+  mlir::LogicalResult
+  matchAndRewrite(ThresholdUpOp op,
+                  mlir::PatternRewriter &rewriter) const override {
+    mlir::Value Operand0_threshold = op.getOperand(0);
+    mlir::Value Operand1_threshold = op.getOperand(1);
+    mlir::Value Operand2_threshold = op.getOperand(2);
+    dsp::DFTAbsOp prev_dftAbsOp = Operand0_threshold.getDefiningOp<DFTAbsOp>();
+
+    if (!prev_dftAbsOp) {
+      return failure();
+    }
+    Value input1 = prev_dftAbsOp->getOperand(0);
+
+    auto combinedOp = rewriter.create<DFTAbsThresholdUpOp>(
+        op.getLoc(), input1, Operand1_threshold, Operand2_threshold);
+
+    DEBUG_PRINT_NO_ARGS();
+    rewriter.replaceOp(op, combinedOp);
+
+    return mlir::success();
+  }
+};
+
+//  Define the canonicalization pattern.
+struct SimplifyFFTRealAndImagToFFT : public OpRewritePattern<FFTRealOp> {
+  SimplifyFFTRealAndImagToFFT(MLIRContext *context)
+      : OpRewritePattern<FFTRealOp>(context, /*benefit=*/1) {}
+
+  LogicalResult matchAndRewrite(FFTRealOp realOp,
+                                PatternRewriter &rewriter) const override {
+    // Check if there is a corresponding FFT1DImgOp with the same input.
+    Operation *nextOp = realOp->getNextNode();
+    if (!nextOp || !isa<FFTImagOp>(nextOp))
+      return failure();
+
+    DEBUG_PRINT_NO_ARGS();
+    auto imgOp = cast<FFTImagOp>(nextOp);
+    if (realOp.getLhs() != imgOp.getLhs())
+      return failure();
+
+    // Replace the two operations with the combined FFT1D operation.
+    DEBUG_PRINT_NO_ARGS();
+    auto combinedOp = rewriter.create<FFTOp>(realOp.getLoc(), realOp.getLhs());
+    rewriter.replaceOp(realOp, combinedOp.getResult(0));
+    rewriter.replaceOp(imgOp, combinedOp.getResult(1));
+
+    return success();
+  }
+};
+
+
+struct SimplifyCorrel2Max : public mlir::OpRewritePattern<MaxOp> {
+  SimplifyCorrel2Max(mlir::MLIRContext *context)
+      : OpRewritePattern<MaxOp>(context, 1) {}
+
+  mlir::LogicalResult
+  matchAndRewrite(MaxOp op, mlir::PatternRewriter &rewriter) const override {
+
+    mlir::Value maxOp_operand0 = op.getOperand();
+
+    CorrelateOp prev_correlateOp = maxOp_operand0.getDefiningOp<CorrelateOp>();
+
+    if (!prev_correlateOp)
+      return failure();
+
+    mlir::Value prev_correlateOp_operand1 = prev_correlateOp.getOperand(0);
+	  mlir::Value prev_correlateOp_operand2 = prev_correlateOp.getOperand(1);
+
+    auto optimizedOp = rewriter.create<dsp::Correl2MaxOptimizedOp>(
+        op.getLoc(), prev_correlateOp_operand1, prev_correlateOp_operand2);
+
+    // Repalce the use of original diff operation with this operation
+    rewriter.replaceOp(op, optimizedOp.getResult());
+    return mlir::success();
+  }
+};
+
+// Pseudo-Code
+// Find pattern on DivOp
+//  %3 = "dsp.getRangeOfVector"(%0, %1, %2) : (tensor<f64>, tensor<f64>, tensor<f64>) -> tensor<*xf64>
+//  %4 = "dsp.fft1dreal"(%3) : (tensor<*xf64>) -> tensor<*xf64>
+//  %5 = "dsp.fft1dimg"(%3) : (tensor<*xf64>) -> tensor<*xf64>
+//  %6 = dsp.square(%4 : tensor<*xf64>) to tensor<*xf64>
+//  %7 = dsp.square(%5 : tensor<*xf64>) to tensor<*xf64>
+//  %8 = dsp.add %6, %7 : tensor<*xf64>
+//  %9 = dsp.sum(%8 : tensor<*xf64>) to tensor<*xf64>
+//  %10 = "dsp.len"(%3) : (tensor<*xf64>) -> tensor<*xf64>
+//  %11 = dsp.div %9, %10 : tensor<*xf64> 
+//  fft_real = fft1dreal(input)
+//  sq1 = square(fft_real)
+//  sq_abs = AddOp (sq1, square(fft_img)) // this is actually + sign
+//  result1 = sum(sq_abs)
+//  len1  = len(result1)
+//  result2 = DivOp(sum1, len1)
+//  
+// if result2 is coming from DivOp operation
+// output pattern is sq= square(input)
+// ans = sum(sq)
+
+struct SimplifyEnergyOfSignal : public mlir::OpRewritePattern<DivOp> {
+  //
+  SimplifyEnergyOfSignal(mlir::MLIRContext *context)
+      : OpRewritePattern<DivOp>(context, 1) {}
+
+  mlir::LogicalResult
+  matchAndRewrite(DivOp op, mlir::PatternRewriter &rewriter) const override {
+
+    //Get operands of DivOp
+    mlir::Value divOp_operand0 = op.getOperand(0);
+    mlir::Value divOp_operand1 = op.getOperand(1);
+
+    //check if FirstOperand is coming from sumOp & 2nd operand from LenOp
+    SumOp prev_SumOp = divOp_operand0.getDefiningOp<SumOp>();
+    LengthOp prev_LenOp = divOp_operand1.getDefiningOp<LengthOp>();
+    if (!prev_SumOp || !prev_LenOp)
+      return failure();
+
+    //check if sumOp operand is coming from AddOp
+    mlir::Value sumOp_operand0 = prev_SumOp.getOperand();
+    AddOp prev_AddOp = sumOp_operand0.getDefiningOp<AddOp>();
+    if (!prev_AddOp )
+      return failure();
+
+    //check if addOp opernad is coming from squareOp
+    mlir::Value addOp_operand0 = prev_AddOp.getOperand(0);
+    mlir::Value addOp_operand1 = prev_AddOp.getOperand(1);
+    SquareOp prev_SqOp0 = addOp_operand0.getDefiningOp<SquareOp>();
+    SquareOp prev_SqOp1 = addOp_operand1.getDefiningOp<SquareOp>();
+    if (!prev_SqOp0 || !prev_SqOp1)
+      return failure();
+
+    //check if squareOp is coming from fft1dreal & other from fft1dImg 
+    mlir::Value sqOp_operand0 = prev_SqOp0.getOperand();
+    mlir::Value sqOp_operand1 = prev_SqOp1.getOperand();
+    FFT1DRealOp prev_fftRealOp = sqOp_operand0.getDefiningOp<FFT1DRealOp>();
+    FFT1DImgOp prev_fftImgOp = sqOp_operand1.getDefiningOp<FFT1DImgOp>();
+
+    if (!prev_fftRealOp || !prev_fftImgOp)
+      return failure();
+
+    // get the opernad of fftReal 
+    mlir::Value input = prev_fftRealOp.getOperand();
+
+    // if result2 is coming from DivOp operation
+    // output pattern is sq= square(input)
+    // ans = sum(sq)
+    auto ansSqOp = rewriter.create<SquareOp>(op.getLoc(), input);
+    auto ansSumOp = rewriter.create<SumOp>(op.getLoc(), ansSqOp.getResult());
+
+    // Repalce the use of original gain operation with this newGainOp
+    rewriter.replaceOp(op, ansSumOp.getResult());
+    return mlir::success();
+  }
+};
+
+struct SimplifyConvolutionThm : public mlir::OpRewritePattern<IFFT1DOp> {
+  //
+  SimplifyConvolutionThm(mlir::MLIRContext *context)
+      : OpRewritePattern<IFFT1DOp>(context, 1) {}
+
+  mlir::LogicalResult
+  matchAndRewrite(IFFT1DOp op, mlir::PatternRewriter &rewriter) const override {
+#define CHECK(x)                                                               \
+  if (!x)                                                                      \
+    return failure();
+#define REMOVE(x)                                                              \
+  if (x->use_empty())                                                          \
+    rewriter.eraseOp(x);
+#define DEBUG(x)                                                               \
+  { llvm::errs() << "check for " << x << "\n"; }
+#define PASS llvm::errs() << "pass\n";
+#define ADD(x) {ops.push_back(x);}
+
+    auto loc = op.getLoc();
+    llvm::SmallVector<mlir::Operation* , 4> ops;
+
+    // init op = ifft1d
+    // pattern -> CHECK()
+    Operation *subOp = op.getOperand(0).getDefiningOp<SubOp>();
+    CHECK(subOp);
+    ADD(subOp);
+
+    Operation *addOp = op->getOperand(1).getDefiningOp<AddOp>();
+    CHECK(addOp);
+    ADD(addOp);
+
+    Operation *mulLreal = subOp->getOperand(0).getDefiningOp<MulOp>(); 
+    CHECK(mulLreal);
+    ADD(mulLreal);
+
+    Operation *mulRreal = subOp->getOperand(1).getDefiningOp<MulOp>(); 
+    CHECK(mulLreal);
+    ADD(mulRreal);
+
+    Operation *mulLImg = addOp->getOperand(0).getDefiningOp<MulOp>(); 
+    CHECK(mulLImg);
+    ADD(mulLImg);
+
+    Operation *mulRImg = addOp->getOperand(1).getDefiningOp<MulOp>(); 
+    CHECK(mulRImg);
+    ADD(mulRImg);
+
+    // 1st fft1dreal and fft1dimg is being replaced by fft1dop by other canonicalization
+    Operation *fft1DOp_a1 = mulLreal->getOperand(0).getDefiningOp<FFT1DOp>(); // real1
+    CHECK(fft1DOp_a1);
+    ADD(fft1DOp_a1);
+
+    Operation *fft1DOp_a2 = mulRreal->getOperand(0).getDefiningOp<FFT1DOp>(); // real2
+    CHECK(fft1DOp_a2);
+    ADD(fft1DOp_a2);
+
+    CHECK((fft1DOp_a1 == fft1DOp_a2));
+
+    // 2nd fft1dreal and fft1dimg is being replaced by fft1dop by other canonicalization
+    Operation *fft1DOp_b1 = mulLreal->getOperand(1).getDefiningOp<FFT1DOp>(); // img1
+    CHECK(fft1DOp_b1);
+    ADD(fft1DOp_b1);
+
+    Operation *fft1DOp_b2 = mulRreal->getOperand(1).getDefiningOp<FFT1DOp>(); // img2
+    CHECK(fft1DOp_b2);
+    ADD(fft1DOp_b2);
+
+    CHECK((fft1DOp_b1 == fft1DOp_b2));
+
+    Operation *padOp_1 = fft1DOp_a1->getOperand(0).getDefiningOp<PaddingOp>(); 
+    CHECK(padOp_1);
+    ADD(padOp_1);
+
+    Operation *padOp_2 = fft1DOp_b1->getOperand(0).getDefiningOp<PaddingOp>(); 
+    CHECK(padOp_2);
+    ADD(padOp_2);
+
+    // check if come from same input
+    Value input1 = padOp_1->getOperand(0);
+    CHECK(input1);
+  
+    Value input2 = padOp_2->getOperand(0);
+    CHECK(input2);
+
+    auto newResult = rewriter.create<FIRFilterResponseOp>(loc, input1, input2);
+
+    rewriter.replaceOp(op, newResult.getResult());
+
+    while(!ops.empty()){
+      REMOVE(ops.back());
+      ops.pop_back();
+    }
+
+    return mlir::success();
+  }
+};
+
+
 // ===================================
 // ===================================
 // ===================================
@@ -636,26 +1437,25 @@ struct SimplifyLMSFilterResponsewithGain: public mlir::OpRewritePattern<GainOp>{
 /// Register our patterns as "canonicalization" patterns on the TransposeOp so
 /// that they can be picked up by the Canonicalization framework.
 void FFT1DImgOp::getCanonicalizationPatterns(RewritePatternSet &results,
-                                        MLIRContext *context) {
+                                             MLIRContext *context) {
   if (getEnableCanonicalOpt()) {
-    results.add<//SimplifyFFTRealAndImg, 
-                SimplifyFFTImgAtInputRealSymm>(context);
+    results.add<SimplifyFFTImgAtInputRealSymm>(context);
   }
 }
 
 void FFT1DRealOp::getCanonicalizationPatterns(RewritePatternSet &results,
-                                        MLIRContext *context) {
+                                              MLIRContext *context) {
   if (getEnableCanonicalOpt()) {
-    results.add<//SimplifyFFTRealAndImg, 
+    results.add<SimplifyDFTAbs, SimplifyFFTRealAndImg,
                 SimplifyFFTRealAtInputRealSymm>(context);
   }
 }
 
-void FIRFilterResponseOp::getCanonicalizationPatterns(RewritePatternSet &results,
-                                        MLIRContext *context) {
+void FIRFilterResponseOp::getCanonicalizationPatterns(
+    RewritePatternSet &results, MLIRContext *context) {
   if (getEnableCanonicalOpt()) {
-    results.add<SimplifyFIRFilterRespnseWithSymmFilter , 
-    SimplifyFilterRespX_ReverseXYSymmFilter>(context);
+    results.add<SimplifyFIRFilterRespnseWithSymmFilter,
+                SimplifyFilterRespX_ReverseXYSymmFilter>(context);
   }
 }
 
@@ -673,7 +1473,6 @@ void SquareOp::getCanonicalizationPatterns(RewritePatternSet &results,
   }
 }
 
-
 /// Register our patterns as "canonicalization" patterns on the TransposeOp so
 /// that they can be picked up by the Canonicalization framework.
 
@@ -701,11 +1500,34 @@ void DelayOp::getCanonicalizationPatterns(RewritePatternSet &results,
   }
 }
 
-void GainOp::getCanonicalizationPatterns(RewritePatternSet &results, 
-                                              MLIRContext *context) {
+void GainOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                         MLIRContext *context) {
   // results.add<SimplifyBack2BackGain, SimplifyGainwZero>(context);
   if (getEnableCanonicalOpt()) {
-    results.add<SimplifyBack2BackGain, SimplifyLMSFilterResponsewithGain>(context);
+    results.add<SimplifyBack2BackGain, SimplifyLMSFilterwithGain, SimplifyLMSFilterResponsewithGain>(
+        context);
+  }
+}
+
+void MeanOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                         MLIRContext *context) {
+  if (getEnableCanonicalOpt()) {
+    // results.add<SimplifyDiff2Mean>(context);
+    results.add<SimplifyFindPeaks2Diff2Mean>(context);
+  }
+}
+
+void FindPeaksOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                              MLIRContext *context) {
+  if (getEnableCanonicalOpt()) {
+    results.add<SimplifyLMS2FindPeaks>(context);
+  }
+}
+
+void SlidingWindowAvgOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                                     MLIRContext *context) {
+  if (getEnableCanonicalOpt()) {
+    results.add<SimplifyMedian2Sliding>(context);
   }
 }
 
@@ -718,3 +1540,55 @@ void ReshapeOp::getCanonicalizationPatterns(RewritePatternSet &results,
                 FoldConstantReshapeOptPattern>(context);
   }
 }
+
+void SpaceDemodulateOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                                    MLIRContext *context) {
+  if (getEnableCanonicalOpt()) {
+    results.add<SimplifySpaceModDemodulate>(context);
+  }
+}
+
+void NormalizeOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                              MLIRContext *ctx) {
+  if (getEnableCanonicalOpt()) {
+    results.add<SimplifyNormLMSFilterResponse>(ctx);
+  }
+}
+
+void DivOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                        MLIRContext *ctx) {
+  if (getEnableCanonicalOpt()) {
+    results.add<SimplifyDSSDPass
+    // SimplifyEnergyOfSignal
+    >(ctx);
+  }
+}
+
+void ThresholdUpOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                                MLIRContext *ctx) {
+  if (getEnableCanonicalOpt()) {
+    results.add<SimplifyFIRFilterHammingThreholdUpOptimized,
+                SimplifyDFTAbsThreshold>(ctx);
+  }
+}
+
+void FFTRealOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                            MLIRContext *context) {
+  if (getEnableCanonicalOpt()) {
+    results.add<SimplifyFFTAbs, SimplifyFFTRealAndImagToFFT>(context);
+  }
+}
+
+void MaxOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                         MLIRContext *context) {
+  if (getEnableCanonicalOpt()) {
+    results.add<SimplifyCorrel2Max>(context);
+  }
+}
+
+void IFFT1DOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                         MLIRContext *context) {
+  if (getEnableCanonicalOpt()) {
+    results.add<SimplifyConvolutionThm>(context);
+  }
+}
diff --git a/mlir/examples/dsp/SimpleBlocks/parser/AST.cpp b/mlir/examples/dsp/SimpleBlocks/parser/AST.cpp
index a5dfa2d0f16e..6b824aa59997 100644
--- a/mlir/examples/dsp/SimpleBlocks/parser/AST.cpp
+++ b/mlir/examples/dsp/SimpleBlocks/parser/AST.cpp
@@ -51,6 +51,7 @@ class ASTDumper {
   void dump(PrintExprAST *node);
   void dump(PrototypeAST *node);
   void dump(FunctionAST *node);
+  void dump(StringExprAST *node);
 
   // Actually print spaces matching the current indentation level
   void indent() {
@@ -81,7 +82,7 @@ static std::string loc(T *node) {
 void ASTDumper::dump(ExprAST *expr) {
   llvm::TypeSwitch<ExprAST *>(expr)
       .Case<BinaryExprAST, CallExprAST, LiteralExprAST, NumberExprAST,
-            PrintExprAST, ReturnExprAST, VarDeclExprAST, VariableExprAST>(
+            PrintExprAST, ReturnExprAST, VarDeclExprAST, StringExprAST, VariableExprAST>(
           [&](auto *node) { this->dump(node); })
       .Default([&](ExprAST *) {
         // No match, fallback to a generic message
@@ -90,6 +91,13 @@ void ASTDumper::dump(ExprAST *expr) {
       });
 }
 
+/// A string expression
+void ASTDumper::dump(StringExprAST *stringExpr) {
+  INDENT();
+  llvm::errs() << "StringExpr \"" << stringExpr->getStringVal() << "\"";
+  llvm::errs() << " " << loc(stringExpr) << "\n";
+}
+
 /// A variable declaration is printing the variable name, the type, and then
 /// recurse in the initializer value.
 void ASTDumper::dump(VarDeclExprAST *varDecl) {
diff --git a/mlir/examples/dsp/SimpleBlocks/toyc.cpp b/mlir/examples/dsp/SimpleBlocks/toyc.cpp
index 2c8bd0baa8ca..4e1c6f19c3e8 100644
--- a/mlir/examples/dsp/SimpleBlocks/toyc.cpp
+++ b/mlir/examples/dsp/SimpleBlocks/toyc.cpp
@@ -88,6 +88,7 @@ enum Action {
   DumpMLIRLinalg,
   DumpMLIRLLVM,
   DumpLLVMIR,
+  DumpLLVMIRHexagonV68,
   RunJIT
 };
 } // namespace
@@ -102,6 +103,7 @@ static cl::opt<enum Action> emitAction(
     cl::values(clEnumValN(DumpMLIRLLVM, "mlir-llvm",
                           "output the MLIR dump after llvm lowering")),
     cl::values(clEnumValN(DumpLLVMIR, "llvm", "output the LLVM IR dump")),
+	cl::values(clEnumValN(DumpLLVMIRHexagonV68, "llvm-hexagonv68", "output the LLVM IR dump where target is hexagonv68")),
     cl::values(
         clEnumValN(RunJIT, "jit",
                    "JIT the code and run it by invoking the main function")));
@@ -361,6 +363,65 @@ int dumpLLVMIR(mlir::ModuleOp module) {
   return 0;
 }
 
+int dumpLLVMIRHexagonV68(mlir::ModuleOp module) {
+  // Register the translation to LLVM IR with the MLIR context.
+  mlir::registerBuiltinDialectTranslation(*module->getContext());
+  mlir::registerLLVMDialectTranslation(*module->getContext());
+
+  // Convert the module to LLVM IR in a new LLVM IR context.
+  llvm::LLVMContext llvmContext;
+  auto llvmModule = mlir::translateModuleToLLVMIR(module, llvmContext);
+  if (!llvmModule) {
+    llvm::errs() << "Failed to emit LLVM IR\n";
+    return -1;
+  }
+
+  // Initialize LLVM targets.
+  LLVMInitializeHexagonTarget();
+  LLVMInitializeHexagonTargetInfo();
+  LLVMInitializeHexagonTargetMC();
+  LLVMInitializeHexagonAsmPrinter();
+
+  // Configure the LLVM Module
+  /*
+  auto tmBuilderOrError = llvm::orc::JITTargetMachineBuilder::detectHost();
+  if (!tmBuilderOrError) {
+    llvm::errs() << "Could not create JITTargetMachineBuilder\n";
+    return -1;
+  }
+
+  auto tmOrError = tmBuilderOrError->createTargetMachine();
+  if (!tmOrError) {
+    llvm::errs() << "Could not create TargetMachine\n";
+    return -1;
+  }
+
+  
+  llvm::HexagonTargetMachine &TM = getHexagonTargetMachine();
+  
+  
+  mlir::ExecutionEngine::setupTargetTripleAndDataLayout(llvmModule.get(),
+														TM.get().get());
+                                                        //tmOrError.get().get());
+														
+  //Failed to make Hexagon Target Machine
+  */
+														
+  // enableOpt = false;
+
+  /// Optionally run an optimization pipeline over the llvm module.
+  auto optPipeline = mlir::makeOptimizingTransformer(
+      /*optLevel=*/enableOpt ? 3 : 0, /*sizeLevel=*/0,
+      /*targetMachine=*/nullptr);
+  if (auto err = optPipeline(llvmModule.get())) {
+    llvm::errs() << "Failed to optimize LLVM IR " << err << "\n";
+    return -1;
+  }
+  llvm::errs() << *llvmModule << "\n";
+  return 0;
+}
+
+
 int runJit(mlir::ModuleOp module) {
   // Initialize LLVM targets.
   llvm::InitializeNativeTarget();
@@ -437,6 +498,9 @@ int main(int argc, char **argv) {
   if (emitAction == Action::DumpLLVMIR)
     return dumpLLVMIR(*module);
 
+  else if (emitAction == Action::DumpLLVMIRHexagonV68)
+    return dumpLLVMIRHexagonV68(*module);
+
   // Otherwise, we must be running the jit.
   if (emitAction == Action::RunJIT)
   {
diff --git a/mlir/include/mlir/Conversion/DSPToAffine/DSPToAffine.h b/mlir/include/mlir/Conversion/DSPToAffine/DSPToAffine.h
new file mode 100644
index 000000000000..8cb711a0530e
--- /dev/null
+++ b/mlir/include/mlir/Conversion/DSPToAffine/DSPToAffine.h
@@ -0,0 +1,27 @@
+//===-- DSPToAffine.h - DSP optimization pass declarations --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the passes for the DSP to Affine Dialect conversion.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_CONVERSION_DSPTOAFFINE_DSPTOAFFINE_H
+#define MLIR_CONVERSION_DSPTOAFFINE_DSPTOAFFINE_H
+#include "mlir/Transforms/DialectConversion.h"
+
+namespace mlir {
+class ModuleOp;
+
+#define GEN_PASS_DECL_CONVERTDSPTOAFFINE
+#include "mlir/Conversion/Passes.h.inc"
+namespace dsp {
+void populateDSPToAffineConversionPatterns(RewritePatternSet &patterns);
+} // namespace dsp
+} // namespace mlir
+
+#endif // MLIR_CONVERSION_DSPTOAFFINE_DSPTOAFFINE_H
diff --git a/mlir/include/mlir/Conversion/DSPToAffine/DSPToAffinePass.h b/mlir/include/mlir/Conversion/DSPToAffine/DSPToAffinePass.h
new file mode 100644
index 000000000000..d6c4bf629cea
--- /dev/null
+++ b/mlir/include/mlir/Conversion/DSPToAffine/DSPToAffinePass.h
@@ -0,0 +1,16 @@
+#ifndef MLIR_CONVERSION_DSPTOAFFINE_DSPTOAFFINEPASS_H
+#define MLIR_CONVERSION_DSPTOAFFINE_DSPTOAFFINEPASS_H
+
+#include "mlir/Pass/Pass.h"
+
+namespace mlir {
+class ModuleOp;
+
+#define GEN_PASS_DECL_CONVERTDSPTOAFFINE
+#include "mlir/Conversion/Passes.h.inc"
+
+std::unique_ptr<OperationPass<ModuleOp>> createConvertDSPToAffinePass();
+
+} // namespace mlir
+
+#endif // MLIR_CONVERSION_DSPTOAFFINE_DSPTOAFFINEPASS_H
diff --git a/mlir/include/mlir/Conversion/Passes.h b/mlir/include/mlir/Conversion/Passes.h
index 2179ae18ac07..3c29772c2c39 100644
--- a/mlir/include/mlir/Conversion/Passes.h
+++ b/mlir/include/mlir/Conversion/Passes.h
@@ -75,6 +75,7 @@
 #include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.h"
 #include "mlir/Conversion/VectorToSCF/VectorToSCF.h"
 #include "mlir/Conversion/VectorToSPIRV/VectorToSPIRVPass.h"
+#include "mlir/Conversion/DSPToAffine/DSPToAffinePass.h"
 
 namespace mlir {
 
diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td
index d094ee3b36ab..da510ba6c46b 100644
--- a/mlir/include/mlir/Conversion/Passes.td
+++ b/mlir/include/mlir/Conversion/Passes.td
@@ -1351,4 +1351,28 @@ def ConvertVectorToSPIRV : Pass<"convert-vector-to-spirv"> {
   let dependentDialects = ["spirv::SPIRVDialect"];
 }
 
+//===----------------------------------------------------------------------===//
+// DSPToAffine
+//===----------------------------------------------------------------------===//
+
+
+def ConvertDSPToAffine : Pass<"dsp-to-affine", "ModuleOp"> {
+   let summary = "Converts DSP operations to Affine, SCF, MemRef, or Math operations.";
+   let dependentDialects = [
+     "affine::AffineDialect",
+     "memref::MemRefDialect",
+     "scf::SCFDialect",
+     "func::FuncDialect",
+	 "math::MathDialect",
+	 "LLVM::LLVMDialect",
+   ];
+   let description = [{
+     Pass that converts DSP operations to the equivalent operations using the
+     operations in the Affine dialect.
+   }];
+   let constructor = "mlir::createConvertDSPToAffinePass()";
+   // let constructor = "mlir::createConvertVectorToSPIRVPass()";
+}
+
+
 #endif // MLIR_CONVERSION_PASSES
diff --git a/mlir/include/mlir/Dialect/CMakeLists.txt b/mlir/include/mlir/Dialect/CMakeLists.txt
index 4bd7f12fabf7..a770b005913c 100644
--- a/mlir/include/mlir/Dialect/CMakeLists.txt
+++ b/mlir/include/mlir/Dialect/CMakeLists.txt
@@ -42,3 +42,4 @@ add_subdirectory(Utils)
 add_subdirectory(Vector)
 add_subdirectory(X86Vector)
 add_subdirectory(XeGPU)
+add_subdirectory(DSP)
diff --git a/mlir/include/mlir/Dialect/DSP/CMakeLists.txt b/mlir/include/mlir/Dialect/DSP/CMakeLists.txt
new file mode 100644
index 000000000000..3b881719575f
--- /dev/null
+++ b/mlir/include/mlir/Dialect/DSP/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_subdirectory(IR)
+add_subdirectory(Transforms)
\ No newline at end of file
diff --git a/mlir/include/mlir/Dialect/DSP/IR/CMakeLists.txt b/mlir/include/mlir/Dialect/DSP/IR/CMakeLists.txt
new file mode 100644
index 000000000000..a07a6302bf2c
--- /dev/null
+++ b/mlir/include/mlir/Dialect/DSP/IR/CMakeLists.txt
@@ -0,0 +1,28 @@
+add_mlir_dialect(DSP dsp)
+add_mlir_doc(DSP DSP Dialects/ -gen-dialect-doc)
+
+set(LLVM_TARGET_DEFINITIONS DSP.td)
+#mlir_tablegen(DSP_Ops.h.inc -gen-op-decls)
+#mlir_tablegen(DSP_Ops.cpp.inc -gen-op-defs)
+#mlir_tablegen(DSP_Dialect.h.inc -gen-dialect-decls)
+#mlir_tablegen(DSP_Dialect.cpp.inc -gen-dialect-defs)
+#add_public_tablegen_target(MLIRDspOpsIncGen)
+mlir_tablegen(DSPEnums.h.inc -gen-enum-decls)
+mlir_tablegen(DSPEnums.cpp.inc -gen-enum-defs)
+add_public_tablegen_target(MLIRDSPEnumsGen)
+
+
+
+
+
+# set(LLVM_TARGET_DEFINITIONS DSP.td)
+# mlir_tablegen(DSPAttributes.h.inc -gen-attrdef-decls -attrdefs-dialect=dsp)
+# mlir_tablegen(DSPAttributes.cpp.inc -gen-attrdef-defs -attrdefs-dialect=dsp)
+# add_public_tablegen_target(MLIRDSPAttributesIncGen)
+
+# set(LLVM_TARGET_DEFINITIONS DSP.td)
+# mlir_tablegen(DSPAttrDefs.h.inc -gen-attrdef-decls 
+#               -attrdefs-dialect=dsp)
+# mlir_tablegen(DSPAttrDefs.cpp.inc -gen-attrdef-defs 
+#               -attrdefs-dialect=dsp)
+# add_public_tablegen_target(DSPAttrDefsIncGen)
diff --git a/mlir/include/mlir/Dialect/DSP/IR/DSP.td b/mlir/include/mlir/Dialect/DSP/IR/DSP.td
new file mode 100644
index 000000000000..0703ad178514
--- /dev/null
+++ b/mlir/include/mlir/Dialect/DSP/IR/DSP.td
@@ -0,0 +1,2968 @@
+//===- Ops.td - DSP dialect operation definitions ----------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines the operations of the DSP dialect.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef DSP
+#define DSP
+
+// include "mlir/IR/FunctionInterfaces.td"
+include "mlir/Interfaces/FunctionInterfaces.td"
+include "mlir/IR/SymbolInterfaces.td"
+include "mlir/Interfaces/CallInterfaces.td"
+include "mlir/Interfaces/CastInterfaces.td"
+include "mlir/Interfaces/SideEffectInterfaces.td"
+include "mlir/Interfaces/ShapeInferenceOpInterfaces.td"
+include "mlir/IR/EnumAttr.td"
+include "mlir/IR/OpBase.td"
+
+// Provide a definition of the 'DSP' dialect in the ODS framework so that we
+// can define our operations.
+def DSP_Dialect : Dialect {
+  let name = "dsp";
+  let cppNamespace = "::mlir::dsp";
+}
+
+
+// Base class for DSP dialect operations. This operation inherits from the base
+// `Op` class in OpBase.td, and provides:
+//   * The parent dialect of the operation.
+//   * The mnemonic for the operation, or the name without the dialect prefix.
+//   * A list of traits for the operation.
+class DSP_Op<string mnemonic, list<Trait> traits = []> :
+    Op<DSP_Dialect, mnemonic, traits>;
+
+//===----------------------------------------------------------------------===//
+// DSP Operations
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ConstantOp
+//===----------------------------------------------------------------------===//
+
+// We define a DSP operation by inheriting from our base 'DSP_Op' class above.
+// Here we provide the mnemonic and a list of traits for the operation. The
+// constant operation is marked as 'Pure' as it is a pure operation
+// and may be removed if dead.
+def ConstantOp : DSP_Op<"constant", [Pure]> {
+  // Provide a summary and description for this operation. This can be used to
+  // auto-generate documentation of the operations within our dialect.
+  let summary = "constant";
+  let description = [{
+    Constant operation turns a literal into an SSA value. The data is attached
+    to the operation as an attribute. For example:
+
+    ```mlir
+      %0 = dsp.constant dense<[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]>
+                        : tensor<2x3xf64>
+    ```
+  }];
+
+  // The constant operation takes an attribute as the only input.
+  let arguments = (ins F64ElementsAttr:$value);
+
+  // The constant operation returns a single value of TensorType.
+  let results = (outs F64Tensor);
+
+  // Indicate that the operation has a custom parser and printer method.
+  let hasCustomAssemblyFormat = 1;
+
+  // Add custom build methods for the constant operation. These method populates
+  // the `state` that MLIR uses to create operations, i.e. these are used when
+  // using `builder.create<ConstantOp>(...)`.
+  let builders = [
+    // Build a constant with a given constant tensor value.
+    OpBuilder<(ins "DenseElementsAttr":$value), [{
+      build($_builder, $_state, value.getType(), value);
+    }]>,
+
+    // Build a constant with a given constant floating-point value.
+    OpBuilder<(ins "double":$value)>,
+
+    // Build a constant with a given constant floating-point value.
+    // OpBuilder<(ins "int":$value)>
+  ];
+
+  // Indicate that additional verification for this operation is necessary.
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// ModuloOp
+//===----------------------------------------------------------------------===//
+
+def ModuloOp : DSP_Op<"modulo",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "element-wise modulo operation";
+  let description = [{
+    The "modulo" operation performs element-wise modulo op between two tensors.
+    The shapes of the tensor operands are expected to match.
+  }];
+
+  let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs);
+  let results = (outs F64Tensor);
+  
+  let builders = [
+    OpBuilder<(ins "Value":$lhs, "Value":$rhs)>
+  ];
+}
+
+//===----------------------------------------------------------------------===//
+// AddOp
+//===----------------------------------------------------------------------===//
+
+def AddOp : DSP_Op<"add",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "element-wise addition operation";
+  let description = [{
+    The "add" operation performs element-wise addition between two tensors.
+    The shapes of the tensor operands are expected to match.
+  }];
+
+  let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs);
+  let results = (outs F64Tensor);
+
+  // Indicate that the operation has a custom parser and printer method.
+  let hasCustomAssemblyFormat = 1;
+
+  // Allow building an AddOp with from the two input operands.
+  let builders = [
+    OpBuilder<(ins "Value":$lhs, "Value":$rhs)>
+  ];
+}
+
+//===----------------------------------------------------------------------===//
+// CastOp
+//===----------------------------------------------------------------------===//
+
+def CastOp : DSP_Op<"cast", [
+     DeclareOpInterfaceMethods<CastOpInterface>,
+     DeclareOpInterfaceMethods<ShapeInferenceOpInterface>,
+     Pure,
+     SameOperandsAndResultShape
+  ]> {
+  let summary = "shape cast operation";
+  let description = [{
+    The "cast" operation converts a tensor from one type to an equivalent type
+    without changing any data elements. The source and destination types must
+    both be tensor types with the same element type. If both are ranked, then
+    shape is required to match. The operation is invalid if converting to a
+    mismatching constant dimension.
+  }];
+
+  let arguments = (ins F64Tensor:$input);
+  let results = (outs F64Tensor:$output);
+
+  let assemblyFormat = "$input attr-dict `:` type($input) `to` type($output)";
+}
+
+//===----------------------------------------------------------------------===//
+// FuncOp
+//===----------------------------------------------------------------------===//
+
+def FuncOp : DSP_Op<"func", [
+     FunctionOpInterface,IsolatedFromAbove
+  ]> {
+  let summary = "user defined function operation";
+  let description = [{
+    The "dsp.func" operation represents a user defined function. These are
+    callable SSA-region operations that contain DSP computations.
+
+    Example:
+
+    ```mlir
+    dsp.func @main() {
+      %0 = dsp.constant dense<5.500000e+00> : tensor<f64>
+      %1 = dsp.reshape(%0 : tensor<f64>) to tensor<2x2xf64>
+      dsp.print %1 : tensor<2x2xf64>
+      dsp.return
+    }
+    ```
+  }];
+
+  let arguments = (ins
+    SymbolNameAttr:$sym_name,
+    TypeAttrOf<FunctionType>:$function_type,
+    OptionalAttr<DictArrayAttr>:$arg_attrs,
+    OptionalAttr<DictArrayAttr>:$res_attrs
+  );
+  let regions = (region AnyRegion:$body);
+
+  let builders = [OpBuilder<(ins
+    "StringRef":$name, "FunctionType":$type,
+    CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)
+  >];
+  let extraClassDeclaration = [{
+    //===------------------------------------------------------------------===//
+    // FunctionOpInterface Methods
+    //===------------------------------------------------------------------===//
+
+    /// Returns the argument types of this function.
+    ArrayRef<Type> getArgumentTypes() { return getFunctionType().getInputs(); }
+
+    /// Returns the result types of this function.
+    ArrayRef<Type> getResultTypes() { return getFunctionType().getResults(); }
+
+    /// Returns the region on the function operation that is callable.
+    Region *getCallableRegion() { return &getBody(); }
+  }];
+  let hasCustomAssemblyFormat = 1;
+  let skipDefaultBuilders = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// GenericCallOp
+//===----------------------------------------------------------------------===//
+
+def GenericCallOp : DSP_Op<"generic_call",
+    [DeclareOpInterfaceMethods<CallOpInterface>]> {
+  let summary = "generic call operation";
+  let description = [{
+    Generic calls represent calls to a user defined function that needs to
+    be specialized for the shape of its arguments. The callee name is attached
+    as a symbol reference via an attribute. The arguments list must match the
+    arguments expected by the callee. For example:
+
+    ```mlir
+     %4 = dsp.generic_call @my_func(%1, %3)
+           : (tensor<2x3xf64>, tensor<2x3xf64>) -> tensor<*xf64>
+    ```
+
+    This is only valid if a function named "my_func" exists and takes two
+    arguments.
+  }];
+
+  // The generic call operation takes a symbol reference attribute as the
+  // callee, and inputs for the call.
+  let arguments = (ins FlatSymbolRefAttr:$callee, Variadic<F64Tensor>:$inputs);
+
+  // The generic call operation returns a single value of TensorType.
+  let results = (outs F64Tensor);
+
+  // Specialize assembly printing and parsing using a declarative format.
+  let assemblyFormat = [{
+    $callee `(` $inputs `)` attr-dict `:` functional-type($inputs, results)
+  }];
+
+  // Add custom build methods for the generic call operation.
+  let builders = [
+    OpBuilder<(ins "StringRef":$callee, "ArrayRef<Value>":$arguments)>
+  ];
+}
+
+//===----------------------------------------------------------------------===//
+// MulOp
+//===----------------------------------------------------------------------===//
+
+def MulOp : DSP_Op<"mul",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "element-wise multiplication operation";
+  let description = [{
+    The "mul" operation performs element-wise multiplication between two
+    tensors. The shapes of the tensor operands are expected to match.
+  }];
+
+  let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs);
+  let results = (outs F64Tensor);
+
+  // Indicate that the operation has a custom parser and printer method.
+  let hasCustomAssemblyFormat = 1;
+
+  // Allow building a MulOp with from the two input operands.
+  let builders = [
+    OpBuilder<(ins "Value":$lhs, "Value":$rhs)>
+  ];
+
+  let hasCanonicalizer = 1;
+}
+
+
+//===----------------------------------------------------------------------===//
+// DivOp
+//===----------------------------------------------------------------------===//
+
+def DivOp : DSP_Op<"div",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "element-wise division operation";
+  let description = [{
+    The "div" operation performs element-wise division between two
+    tensors. The shapes of the tensor operands are expected to match.
+  }];
+
+  let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs);
+  let results = (outs F64Tensor);
+
+  // Indicate that the operation has a custom parser and printer method.
+  let hasCustomAssemblyFormat = 1;
+
+  // Allow building a DivOp with from the two input operands.
+  let builders = [
+    OpBuilder<(ins "Value":$lhs, "Value":$rhs)>
+  ];
+
+  let hasCanonicalizer = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// BitwiseAndOp
+//===----------------------------------------------------------------------===//
+
+def BitwiseAndOp : DSP_Op<"bitwiseand",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "bit-wise and operation";
+  let description = [{
+    The "bitwiseand" operation performs bit-wise and between two
+    tensors. The shapes of the tensor operands are expected to match.
+  }];
+
+  let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs);
+  let results = (outs F64Tensor);
+
+  // Indicate that the operation has a custom parser and printer method.
+  let hasCustomAssemblyFormat = 1;
+
+  // Allow building a BitwiseAndOp with from the two input operands.
+  let builders = [
+    OpBuilder<(ins "Value":$lhs, "Value":$rhs)>
+  ];
+}
+
+//===----------------------------------------------------------------------===//
+// PowerOp
+//===----------------------------------------------------------------------===//
+
+def PowOp : DSP_Op<"pow",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]>{
+        let summary = "element-wise power operation for tensor";
+        let description = [{
+            The "pow" operation performs element-wise power for base tensor.
+                The accepted operand is restrict to a scaler constant.
+        }];
+
+        let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs);
+        let results = (outs F64Tensor);
+
+        // has custom parser and printer for method
+        // let hasCustomAssemblyFormat = 1;
+        // Allow building a PowOp from two operands.
+        let builders = [
+            OpBuilder<(ins "Value":$lhs, "Value":$rhs)>
+        ];
+        let hasVerifier = 1;
+    }
+
+
+//===----------------------------------------------------------------------===//
+// PrintOp
+//===----------------------------------------------------------------------===//
+
+def PrintOp : DSP_Op<"print"> {
+  let summary = "print operation";
+  let description = [{
+    The "print" builtin operation prints a given input tensor, and produces
+    no results.
+  }];
+
+  // The print operation takes an input tensor to print.
+  // We also allow a F64MemRef to enable interop during partial lowering.
+  let arguments = (ins AnyTypeOf<[F64Tensor, F64MemRef]>:$input);
+
+  let assemblyFormat = "$input attr-dict `:` type($input)";
+}
+
+//===----------------------------------------------------------------------===//
+// ReshapeOp
+//===----------------------------------------------------------------------===//
+
+def ReshapeOp : DSP_Op<"reshape", [Pure]> {
+  let summary = "tensor reshape operation";
+  let description = [{
+    Reshape operation is transforming its input tensor into a new tensor with
+    the same number of elements but different shapes. For example:
+
+    ```mlir
+       %0 = dsp.reshape (%arg1 : tensor<10xf64>) to tensor<5x2xf64>
+    ```
+  }];
+
+  let arguments = (ins F64Tensor:$input);
+
+  let assemblyFormat = [{
+    `(` $input `:` type($input) `)` attr-dict `to` type(results)
+  }];
+
+  // Enable registering canonicalization patterns with this operation.
+  let hasCanonicalizer = 1;
+
+  // We expect that the reshape operation returns a statically shaped tensor.
+  let results = (outs StaticShapeTensorOf<[F64]>);
+}
+
+//===----------------------------------------------------------------------===//
+// ReturnOp
+//===----------------------------------------------------------------------===//
+
+def ReturnOp : DSP_Op<"return", [Pure, HasParent<"FuncOp">,
+                                 Terminator]> {
+  let summary = "return operation";
+  let description = [{
+    The "return" operation represents a return operation within a function.
+    The operation takes an optional tensor operand and produces no results.
+    The operand type must match the signature of the function that contains
+    the operation. For example:
+
+    ```mlir
+      dsp.func @foo() -> tensor<2xf64> {
+        ...
+        dsp.return %0 : tensor<2xf64>
+      }
+    ```
+  }];
+
+  // The return operation takes an optional input operand to return. This
+  // value must match the return type of the enclosing function.
+  let arguments = (ins Variadic<F64Tensor>:$input);
+
+  // The return operation only emits the input in the format if it is present.
+  let assemblyFormat = "($input^ `:` type($input))? attr-dict ";
+
+  // Allow building a ReturnOp with no return operand.
+  let builders = [
+    OpBuilder<(ins), [{ build($_builder, $_state, std::nullopt); }]>
+  ];
+
+  // Provide extra utility definitions on the c++ operation class definition.
+  let extraClassDeclaration = [{
+    bool hasOperand() { return getNumOperands() != 0; }
+  }];
+
+  // Indicate that additional verification for this operation is necessary.
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// TransposeOp
+//===----------------------------------------------------------------------===//
+
+def TransposeOp : DSP_Op<"transpose",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "transpose operation";
+
+  let arguments = (ins F64Tensor:$input);
+  let results = (outs F64Tensor);
+
+  let assemblyFormat = [{
+    `(` $input `:` type($input) `)` attr-dict `to` type(results)
+  }];
+
+  // Enable registering canonicalization patterns with this operation.
+  let hasCanonicalizer = 1;
+
+  // Allow building a TransposeOp with from the input operand.
+  let builders = [
+    OpBuilder<(ins "Value":$input)>
+  ];
+
+  // Indicate that additional verification for this operation is necessary.
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// DelayOp
+//===----------------------------------------------------------------------===//
+
+def DelayOp : DSP_Op<"delay" , 
+          [Pure , DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "shifting tensor by given number";
+  let description = [{
+    The "delay" operation performs shift in array/tensor by given arg -- 
+    ex: shift by 1 -- 1 element will become 0.
+    The shapes of the tensor operands are expected to match.
+  }];
+
+  // let arguments = (ins F64Tensor:$lhs, UI32:$rhs); //
+  // let results = (outs F64Tensor);
+
+  // let arguments = (ins F64Tensor:$lhs, UI32:$rhs); 
+  let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs);//working
+  let results = (outs F64Tensor);
+
+  // Indicate that the operation has a custom parser and printer method.
+  // let hasCustomAssemblyFormat = 1;
+  // let assemblyFormat = [{
+  //   `(` $input `:` type($input1 , $input2) `)` attr-dict `to` type(results)
+  // }];
+  // Allow building a MulOp with from the two input operands.
+  let builders = [
+    OpBuilder<(ins "Value":$lhs, "Value":$rhs)>
+  ];
+
+  // Indicate that the operation has a custom parser and printer method.
+  // let hasCustomAssemblyFormat = 1;
+
+  // Allow building an Dealy2ArgsOp with from the two input operands.
+  // let builders = [
+  //   OpBuilder<(ins "Value":$lhs, "Value":$rhs)>
+  // ];
+  
+  // Enable registering canonicalization patterns with this operation.
+  let hasCanonicalizer = 1;
+
+  let hasVerifier = 1;
+
+}
+
+
+//===----------------------------------------------------------------------===//
+// GainOp
+//===----------------------------------------------------------------------===//
+def GainOp : DSP_Op<"gain" ,
+    [Pure , DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "shifting tensor by given number";
+  let description = [{
+    The "Gain" operation performs multiplication of each element with given gain -- 
+    ex: [1, 2, 4, 5] & gain= 2 : Output : [2, 4, 8, 10]
+    The shapes of the tensor operands are expected to match.
+  }];
+
+  // let arguments = (ins F64Tensor:$lhs, UI32:$rhs); //
+  // let results = (outs F64Tensor);
+
+  // let arguments = (ins F64Tensor:$lhs, UI32:$rhs); 
+  let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs);//working -- F64
+  // let arguments = (ins F64Tensor:$lhs, F64:$rhs);
+  // let arguments = (ins F64Tensor:$lhs, F64Attr:$rhs);
+  let results = (outs F64Tensor); //I64
+  
+
+  // Indicate that the operation has a custom parser and printer method.
+  // let hasCustomAssemblyFormat = 1;
+  // let assemblyFormat = [{
+  //   `(` $input `:` type($input1 , $input2) `)` attr-dict `to` type(results)
+  // }];
+  // Allow building a GainOp with from the two input operands.
+  let builders = [
+    OpBuilder<(ins "Value":$lhs, "Value":$rhs)>
+  ];
+
+  // Indicate that the operation has a custom parser and printer method.
+  // let hasCustomAssemblyFormat = 1;
+  
+  // Enable registering canonicalization patterns with this operation.
+  let hasCanonicalizer = 1;
+
+  // let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// SubOp
+//===----------------------------------------------------------------------===//
+
+def SubOp : DSP_Op<"sub", [Pure , DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+   let summary = "Subtract b from a";
+   let description = [{
+     The subtract block subtracts each element of a vector with each other.
+   }];
+
+   let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs);//working
+   let results = (outs F64Tensor);
+
+   // Indicate that the operation has a custom parser and printer method.
+   // let hasCustomAssemblyFormat = 1;
+   // let assemblyFormat = [{
+   //   `(` $input `:` type($input1 , $input2) `)` attr-dict `to` type(results)
+   // }];
+   // Allow building a MulOp with from the two input operands.
+   let builders = [
+     OpBuilder<(ins "Value":$lhs, "Value":$rhs)>
+   ];
+
+   // Indicate that the operation has a custom parser and printer method.
+   // let hasCustomAssemblyFormat = 1;
+
+   // let hasVerifier = 1;
+ }
+
+//===----------------------------------------------------------------------===//
+// FFTRealOp
+//===----------------------------------------------------------------------===//
+
+def FFTRealOp : DSP_Op<"fftReal", [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "Performs FFT Operation on the input";
+  let description = [{
+      This function accepts a 1D input array of size 2^n and returns the real part of its Fourier transform, 
+      producing an output array of the same size. The function is designed to work exclusively with input sizes that are powers of 2. 
+      Providing an array of any other size will result in a segmentation fault.
+  }];
+
+  let arguments = (ins F64Tensor:$lhs);
+  let results = (outs F64Tensor);
+
+  let builders = [
+    OpBuilder<(ins "Value":$lhs)>
+  ]; 
+
+  let hasCanonicalizer = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// FFTImagOp
+//===----------------------------------------------------------------------===//
+
+def FFTImagOp : DSP_Op<"fftImag", [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "Performs FFT Operation on the input";
+  let description = [{
+      This function accepts a 1D input array of size 2^n and returns the imaginary part of its Fourier transform, 
+      producing an output array of the same size. The function is designed to work exclusively with input sizes that are powers of 2. 
+      Providing an array of any other size will result in a segmentation fault.
+  }];
+
+  let arguments = (ins F64Tensor:$lhs);
+  let results = (outs F64Tensor);
+
+  let builders = [
+    OpBuilder<(ins "Value":$lhs)>
+  ]; 
+}
+
+//===----------------------------------------------------------------------===//
+// zeroCrossCountOp
+//===----------------------------------------------------------------------===//
+def zeroCrossCountOp : DSP_Op<"zeroCrossCount" ,
+    [Pure , DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "count the crosses through zero";
+  let description = [{
+    The "zeroCrossCount" operation detects no of zero crosses in a given array -- 
+    ex: [-1 , -2 , 3, 0 , 0, -2] has 2 zero-crosses 
+  }];
+
+  let arguments = (ins F64Tensor:$lhs); //working -- F64
+  let results =  (outs F64Tensor);
+  // let results = (outs I64);
+
+  // Indicate that the operation has a custom parser and printer method.
+  // let hasCustomAssemblyFormat = 1;
+  // let assemblyFormat = [{
+  //   `(` $input `:` type($input1 , $input2) `)` attr-dict `to` type(results)
+  // }];
+  // Allow building a zeroCrossCountOp with from the one input operands.
+  let builders = [
+    OpBuilder<(ins "Value":$lhs)>
+  ];
+
+  // Indicate that the operation has a custom parser and printer method.
+  // let hasCustomAssemblyFormat = 1;
+  
+  // Enable registering canonicalization patterns with this operation.
+  //let hasCanonicalizer = 1;
+
+  // let hasVerifier = 1;
+}
+
+
+//===----------------------------------------------------------------------===//
+// FIRFilterResponseOp
+//===----------------------------------------------------------------------===//
+def FIRFilterResponseOp : DSP_Op<"FIRFilterResponse" ,
+    [Pure , DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "shifting tensor by given number";
+  let description = [{
+    The "FIRFilterResponseOp" operation is basically the convolution of input 1-D and filter vector 
+    ex: x[n] = [2,1,3,2,4], h[0]=1,h[1]=−1, and h[2]=2 then
+      y[n] = sum(h(k) . x(n-k)) k=0 to N-1 
+  }];
+
+  let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs); //working -- F64
+  let results =  (outs F64Tensor);
+  // let results = (outs I64);
+
+  // Indicate that the operation has a custom parser and printer method.
+  // let hasCustomAssemblyFormat = 1;
+  // let assemblyFormat = [{
+  //   `(` $input `:` type($input1 , $input2) `)` attr-dict `to` type(results)
+  // }];
+  // Allow building a FIRFilterResponseOp with from the two input operands.
+  let builders = [
+    OpBuilder<(ins "Value":$lhs, "Value":$rhs)>
+  ];
+
+  // Indicate that the operation has a custom parser and printer method.
+  // let hasCustomAssemblyFormat = 1;
+  
+  // Enable registering canonicalization patterns with this operation.
+  let hasCanonicalizer = 1;
+
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// MedianFilterOp
+//===----------------------------------------------------------------------===//
+
+def MedianFilterOp : DSP_Op<"medianFilter", 
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  
+  let arguments = (ins F64Tensor:$input);
+  let results = (outs F64Tensor);
+
+  let builders = [
+    OpBuilder<(ins "Value":$input)>
+  ];
+}
+
+//===----------------------------------------------------------------------===//
+// SlidingWindowAvg
+//===----------------------------------------------------------------------===//
+
+def SlidingWindowAvgOp : DSP_Op<"slidingWindowAvg",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "sliding window average operation";
+
+  let arguments = (ins F64Tensor:$input);
+  let results = (outs F64Tensor);
+
+  let assemblyFormat = [{
+    `(` $input `:` type($input) `)` attr-dict `to` type(results)
+  }];
+
+  // Enable registering canonicalization patterns with this operation.
+  // let hasCanonicalizer = 1;
+
+  // Allow building a SlidingWindowAvg with from the input operand.
+  let builders = [
+    OpBuilder<(ins "Value":$input)>
+  ];
+
+  // Indicate that additional verification for this operation is necessary.
+  let hasVerifier = 1;
+  let hasCanonicalizer = 1;
+}
+
+
+//===----------------------------------------------------------------------===//
+// DownsamplingOp
+//===----------------------------------------------------------------------===//
+def DownsamplingOp : DSP_Op<"downsampling" ,
+    [Pure , DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "downsampling a input tensor by given rate";
+  let description = [{
+    y[m] = x[M . n] ie, m = len(x) / M 
+    The "Downsampling" operation gives output with less no of samples:  -- 
+    ex: [1, 2, 4, 5] & DownsamplingRate= 2 : Output : [1, 4]
+    The shapes of the tensor operands are expected to match.
+  }];
+
+
+  let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs);//working -- F64
+  let results = (outs F64Tensor); //I64
+  
+
+  // Indicate that the operation has a custom parser and printer method.
+  // let hasCustomAssemblyFormat = 1;
+  // let assemblyFormat = [{
+  //   `(` $input `:` type($input1 , $input2) `)` attr-dict `to` type(results)
+  // }];
+  // Allow building a DownsamplingOp with from the two input operands.
+  let builders = [
+    OpBuilder<(ins "Value":$lhs, "Value":$rhs)>
+  ];
+
+  // Indicate that the operation has a custom parser and printer method.
+  // let hasCustomAssemblyFormat = 1;
+  
+  // Enable registering canonicalization patterns with this operation.
+  let hasCanonicalizer = 1;
+
+  let hasVerifier = 1;
+}
+
+
+//===----------------------------------------------------------------------===//
+// UpsamplingOp
+//===----------------------------------------------------------------------===//
+
+def UpsamplingOp : DSP_Op<"upsampling" ,
+    [Pure , DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "upsampling a input tensor by given rate";
+  let description = [{
+    y[L . m] = x[n] ie, o/p len, m = len(x) * L 
+    The "upsampling" operation gives output with less no of samples:  -- 
+    ex: [1, 2, 4, 5] & UpsamplingRate= 2 : Output : [1, 0, 2, 0, 4, 0,5 , 0]
+    The shapes of the tensor operands are expected to match.
+  }];
+
+
+  let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs);//working -- F64
+  let results = (outs F64Tensor); //I64
+  
+
+  // Indicate that the operation has a custom parser and printer method.
+  // let hasCustomAssemblyFormat = 1;
+  // let assemblyFormat = [{
+  //   `(` $input `:` type($input1 , $input2) `)` attr-dict `to` type(results)
+  // }];
+  // Allow building a UpsamplingOp with from the two input operands.
+  let builders = [
+    OpBuilder<(ins "Value":$lhs, "Value":$rhs)>
+  ];
+
+  // Indicate that the operation has a custom parser and printer method.
+  // let hasCustomAssemblyFormat = 1;
+  
+  // Enable registering canonicalization patterns with this operation.
+  // let hasCanonicalizer = 1;
+
+  let hasVerifier = 1;
+}
+
+
+//===----------------------------------------------------------------------===//
+// LowPassFilter1stOrderOp
+//===----------------------------------------------------------------------===//
+
+def LowPassFilter1stOrderOp : DSP_Op<"lowPassFilter" ,
+    [Pure , DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "lowPassFilter a input tensor by given rate";
+  let description = [{
+    y[n] = (1-alpha) * y[n-1] + alpha * x[n]  
+    The "lowPassFilter" operation filters and gives low freq components based on alpha :  -- 
+    ex: [1, 2, 4, 5] & UpsamplingRate= 2 : Output : [1, 0, 2, 0, 4, 0,5 , 0]
+    The shapes of the tensor operands are expected to match.
+  }];
+
+
+  let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs);//working -- F64
+  let results = (outs F64Tensor); //I64
+  
+
+  // Indicate that the operation has a custom parser and printer method.
+  // let hasCustomAssemblyFormat = 1;
+  // let assemblyFormat = [{
+  //   `(` $input `:` type($input1 , $input2) `)` attr-dict `to` type(results)
+  // }];
+  // Allow building a LowPassFilter1stOrderOp with from the two input operands.
+  let builders = [
+    OpBuilder<(ins "Value":$lhs, "Value":$rhs)>
+  ];
+
+  // Indicate that the operation has a custom parser and printer method.
+  // let hasCustomAssemblyFormat = 1;
+  
+  // Enable registering canonicalization patterns with this operation.
+  // let hasCanonicalizer = 1;
+
+  let hasVerifier = 1;
+}
+
+
+//===----------------------------------------------------------------------===//
+// HighPassFilter
+//===----------------------------------------------------------------------===//
+
+
+def HighPassFilterOp : DSP_Op<"highPassFilter",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "high pass filter operation";
+  let description = [{
+    y[n] = x[n] - x[n-1] ie, H(z) = 1 - pow(z,-1) 
+    The "HighPassFilter" operation gives output with low freq components removed:  -- 
+    ex: x=[0,1,0,−1,0] &  Output : [0, 1,-1,-1,1]
+    The shapes of the tensor operands are expected to match.
+  }];
+
+  let arguments = (ins F64Tensor:$input);
+  let results = (outs F64Tensor);
+
+  // let assemblyFormat = [{
+  //   `(` $input `:` type($input) `)` attr-dict `to` type(results)
+  // }];
+
+  // let assemblyFormat = [{
+  //   `(` $input `)` `:` `(` type($input) `)` attr-dict `to` type(results)
+  // }];
+
+  // Enable registering canonicalization patterns with this operation.
+  // let hasCanonicalizer = 1;
+
+  // Allow building a HighPassFilter with from the input operand.
+  let builders = [
+    OpBuilder<(ins "Value":$input)>
+  ];
+
+  // Indicate that additional verification for this operation is necessary.
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// FFT1D
+//===----------------------------------------------------------------------===//
+
+
+def FFT1DOp : DSP_Op<"fft1d",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "fft1d operation";
+  let description = [{
+    y[k] = y_real[k] + j *y_img[k] 
+    y_real = sumOver_n(x[n]*cos[2*pi * k *n/N ] 
+    y_img = sumOver_n(x[n]*sin[2*pi * k *n/N ] * -1
+    The "FFT1D" operation gives output in freq domain  -- 
+    ex: x=[10,0,0,0] &  Output : [10,10,10,10] , [10,10,]
+    The shapes of the tensor operands are expected to match.
+  }];
+
+  let arguments = (ins F64Tensor:$input);
+  let results = (outs F64Tensor:$real, F64Tensor:$img); //(ins F64Tensor:$lhs, F64Tensor:$rhs);
+  // let results = (outs F64Tensor:$real);
+  // let assemblyFormat = [{
+  //   `(` $input `:` type($input) `)` attr-dict `to` type(results)
+  // }];
+
+  let assemblyFormat = [{
+    `(` $input `:` type($input) `)` attr-dict `to` `(` type($real) `,` type($img) `)`
+  }];
+
+  // Enable registering canonicalization patterns with this operation.
+  // let hasCanonicalizer = 1;
+
+  // Allow building a FFT1D with from the input operand.
+  let builders = [
+    OpBuilder<(ins "Value":$lhs)>
+  ];
+
+  // Indicate that additional verification for this operation is necessary.
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// IFFT1D
+//===----------------------------------------------------------------------===//
+
+
+def IFFT1DOp : DSP_Op<"ifft1d",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "ifft1d operation";
+  let description = [{
+    y[k] = y_real[k] + j *y_img[k] 
+    y_real = sumOver_n(x[n]*cos[2*pi * k *n/N ] 
+    y_img = sumOver_n(x[n]*sin[2*pi * k *n/N ] 
+    The "IFFT1D" operation gives output in freq domain  -- 
+    ex: x=[10,0,0,0] &  Output : real= [10,10,10,10] , img= j[0,0,0,0]
+    The shapes of the tensor operands are expected to match.
+  }];
+
+  // let arguments = (ins F64Tensor:$input ,  F64Tensor:$input);
+  let arguments = (ins F64Tensor:$real ,  F64Tensor:$img);
+  // let results = (outs F64Tensor:$real, F64Tensor:$img); //(ins F64Tensor:$lhs, F64Tensor:$rhs);
+  let results = (outs F64Tensor);
+
+  let hasCanonicalizer = 1;
+
+  // Allow building a IFFT1D with from the input operand.
+  let builders = [
+    OpBuilder<(ins "Value":$real, "Value":$img)>
+  ];
+
+  // Indicate that additional verification for this operation is necessary.
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// HammingWindow
+//===----------------------------------------------------------------------===//
+
+
+def HammingWindowOp : DSP_Op<"hamming",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "hamming operation";
+  let description = [{
+    y[n] = 0.54 - 0.46 cos(2 *pi * n/N-1) , 0<=n<N : Taking length as N 0 to N-1
+    
+    The "HammingWindow" operation calculates the 
+    ex: N = 5 &  Output : y= [0.080000 0.540000 1.000000 0.540000 0.0800004] 
+    The shapes of the tensor operands are expected to match.
+  }];
+
+  let arguments = (ins F64Tensor:$input);
+  // let results = (outs F64Tensor:$real, F64Tensor:$img); //(ins F64Tensor:$lhs, F64Tensor:$rhs);
+  let results = (outs F64Tensor);
+  let assemblyFormat = [{
+    `(` $input `:` type($input) `)` attr-dict `to` type(results)
+  }];
+
+  // let assemblyFormat = [{
+  //   `(` $input `:` type($input) `)` attr-dict `to` `(` type($real) `,` type($img) `)`
+  // }];
+
+  // Enable registering canonicalization patterns with this operation.
+  // let hasCanonicalizer = 1;
+
+  // Allow building a HammingWindow with from the input operand.
+  let builders = [
+    OpBuilder<(ins "Value":$lhs)>
+  ];
+
+  // Indicate that additional verification for this operation is necessary.
+  let hasVerifier = 1;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// DCT-II
+//===----------------------------------------------------------------------===//
+
+
+def DCTOp : DSP_Op<"dct",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "dct-II operation";
+  let description = [{
+    y[k] = sqrt(2/N) * SumOverAllN( x[n] cos(pi * k * (n +0.5)/N)) , 0<=n<=N-1 :
+    
+    for y[0] , the answer will be multiplied by 1/sqrt(2)
+    The "DCT" operation calculates the dct 2 
+    ex: N = 4 &  Output : y= [0.08,0.54,1,0.54] 
+    The shapes of the tensor operands are expected to match.
+  }];
+
+  let arguments = (ins F64Tensor:$input);
+  // let results = (outs F64Tensor:$real, F64Tensor:$img); //(ins F64Tensor:$lhs, F64Tensor:$rhs);
+  let results = (outs F64Tensor);
+  let assemblyFormat = [{
+    `(` $input `:` type($input) `)` attr-dict `to` type(results)
+  }];
+
+  // let assemblyFormat = [{
+  //   `(` $input `:` type($input) `)` attr-dict `to` `(` type($real) `,` type($img) `)`
+  // }];
+
+  // Enable registering canonicalization patterns with this operation.
+  // let hasCanonicalizer = 1;
+
+  // Allow building a DCT with from the input operand.
+  let builders = [
+    OpBuilder<(ins "Value":$lhs)>
+  ];
+
+  // Indicate that additional verification for this operation is necessary.
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// filterOp
+//===----------------------------------------------------------------------===//
+
+def filterOp : DSP_Op<"filter" ,
+    [Pure , DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "get filter response given b,a and x";
+  let description = [{
+    The "filterOp" operation is basically the filter response of input x and filter represented as b/a rational transfer function form
+    ex: x[n] = [2,1,3,2,4], b[0,1,2,3,4] and a[1,2,3,4,5] then
+      y[i] = sum(b[j] * x(i-j) - a[j] *x[i-j] ) j=1 to i and  i=1 to len(x)
+      also, y[0] = b[0] * x[0]
+  }];
+
+  let arguments = (ins F64Tensor:$b, F64Tensor:$a, F64Tensor:$x); //working -- F64
+  let results =  (outs F64Tensor);
+  
+  // Indicate that the operation has a custom parser and printer method.
+  // let hasCustomAssemblyFormat = 1;
+  // let assemblyFormat = [{
+  //   `(` $input `:` type($input1 , $input2) `)` attr-dict `to` type(results)
+  // }];
+  // Allow building a filterOp with from the two input operands.
+  let builders = [
+    OpBuilder<(ins "Value":$b, "Value":$a, "Value":$x)>
+  ];
+
+  // Indicate that the operation has a custom parser and printer method.
+  // let hasCustomAssemblyFormat = 1;
+  
+  // Enable registering canonicalization patterns with this operation.
+  //let hasCanonicalizer = 1;
+
+  let hasVerifier = 1;
+}
+
+
+//===----------------------------------------------------------------------===//
+// SumOp
+//===----------------------------------------------------------------------===//
+
+def SumOp : DSP_Op<"sum",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "sum of all the elements operation";
+
+  let arguments = (ins F64Tensor:$input);
+  let results = (outs F64Tensor);
+
+  let assemblyFormat = [{
+    `(` $input `:` type($input) `)` attr-dict `to` type(results)
+  }];
+
+  // Enable registering canonicalization patterns with this operation.
+  // let hasCanonicalizer = 1;
+
+  // Allow building a Sum with from the input operand.
+  let builders = [
+    OpBuilder<(ins "Value":$input)>
+  ];
+
+  // Indicate that additional verification for this operation is necessary.
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// SinOp
+//===----------------------------------------------------------------------===//
+
+ def SinOp : DSP_Op<"sin",
+     [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+   let summary = "sin operation";
+   let description = [{
+     The sin operation computes the sine of a given value. It takes one operand of floating point type (i.e., scalar, tensor or vector) and returns one result of the same type. It has no standard attributes.
+   }];
+
+   let arguments = (ins F64Tensor:$input);
+   // let results = (outs F64Tensor:$real, F64Tensor:$img); //(ins F64Tensor:$lhs, F64Tensor:$rhs);
+   let results = (outs F64Tensor);
+   let assemblyFormat = [{
+     `(` $input `:` type($input) `)` attr-dict `to` type(results)
+   }];
+
+   // let assemblyFormat = [{
+   //   `(` $input `:` type($input) `)` attr-dict `to` `(` type($real) `,` type($img) `)`
+   // }];
+
+   // Enable registering canonicalization patterns with this operation.
+   // let hasCanonicalizer = 1;
+
+   // Allow building a DCT with from the input operand.
+   let builders = [
+     OpBuilder<(ins "Value":$lhs)>
+   ];
+
+   // Indicate that additional verification for this operation is necessary.
+   let hasVerifier = 1;
+ }
+
+//===----------------------------------------------------------------------===//
+// CosOp
+//===----------------------------------------------------------------------===//
+
+ def CosOp : DSP_Op<"cos",
+     [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+   let summary = "cos operation";
+   let description = [{
+     The cos operation computes the cosine of a given value. It takes one operand of floating point type (i.e., scalar, tensor or vector) and returns one result of the same type. It has no standard attributes.
+   }];
+
+   let arguments = (ins F64Tensor:$input);
+   // let results = (outs F64Tensor:$real, F64Tensor:$img); //(ins F64Tensor:$lhs, F64Tensor:$rhs);
+   let results = (outs F64Tensor);
+   let assemblyFormat = [{
+     `(` $input `:` type($input) `)` attr-dict `to` type(results)
+   }];
+
+   // let assemblyFormat = [{
+   //   `(` $input `:` type($input) `)` attr-dict `to` `(` type($real) `,` type($img) `)`
+   // }];
+
+   // Enable registering canonicalization patterns with this operation.
+   // let hasCanonicalizer = 1;
+
+   // Allow building a DCT with from the input operand.
+   let builders = [
+     OpBuilder<(ins "Value":$lhs)>
+   ];
+
+   // Indicate that additional verification for this operation is necessary.
+   let hasVerifier = 1;
+ }
+
+
+ //===----------------------------------------------------------------------===//
+// SquareOp
+//===----------------------------------------------------------------------===//
+
+def SquareOp : DSP_Op<"square",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "square of the input vector";
+
+  let arguments = (ins F64Tensor:$input);
+  let results = (outs F64Tensor);
+
+  let assemblyFormat = [{
+    `(` $input `:` type($input) `)` attr-dict `to` type(results)
+  }];
+
+  // Enable registering canonicalization patterns with this operation.
+  // let hasCanonicalizer = 1;
+
+  // Allow building a Square with from the input operand.
+  let builders = [
+    OpBuilder<(ins "Value":$input)>
+  ];
+
+  // Indicate that additional verification for this operation is necessary.
+  let hasVerifier = 1;
+
+  let hasCanonicalizer = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// FFT1DRealOp
+//===----------------------------------------------------------------------===//
+def FFT1DRealOp : DSP_Op<"fft1dreal",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "fft1dreal operation";
+  let description = [{
+    y[k] = y_real[k]
+    y_real = sumOver_n(x[n]*cos[2*pi * k *n/N ] 
+    The "FFT1DReal" operation gives output in freq domain  -- 
+    ex: x=[10,0,0,0] &  Output : [10,10,10,10] , [10,10,]
+    The shapes of the tensor operands are expected to match.
+  }];
+
+  let arguments = (ins F64Tensor:$input);
+  let results = (outs F64Tensor:$real); //(ins F64Tensor:$lhs, F64Tensor:$rhs);
+  // let results = (outs F64Tensor:$real);
+  // let assemblyFormat = [{
+  //   `(` $input `:` type($input) `)` attr-dict `to` type(results)
+  // }];
+
+  // let assemblyFormat = [{
+  //   `(` $input `:` type($input) `)` attr-dict `to` `(` type($real) `,` type($img) `)`
+  // }];
+
+  // Enable registering canonicalization patterns with this operation.
+  // let hasCanonicalizer = 1;
+
+  // Allow building a FFT1DReal with from the input operand.
+  let builders = [
+    OpBuilder<(ins "Value":$lhs)>
+  ];
+
+  // Indicate that additional verification for this operation is necessary.
+  let hasVerifier = 1;
+
+  let hasCanonicalizer = 1;
+}
+
+
+//===----------------------------------------------------------------------===//
+// FFT1DImgOp
+//===----------------------------------------------------------------------===//
+def FFT1DImgOp : DSP_Op<"fft1dimg",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "fft1dimg operation";
+  let description = [{
+    y[k] = y_real[k]
+    y_real = sumOver_n(x[n]*sin[2*pi * k *n/N ] * -1
+    The "FFT1DImg" operation gives output in freq domain  -- 
+    ex: x=[10,0,0,0] &  Output : [10,10,10,10] , [10,10,]
+    The shapes of the tensor operands are expected to match.
+  }];
+
+  let arguments = (ins F64Tensor:$input);
+  let results = (outs F64Tensor:$real); //(ins F64Tensor:$lhs, F64Tensor:$rhs);
+  // let results = (outs F64Tensor:$real);
+  // let assemblyFormat = [{
+  //   `(` $input `:` type($input) `)` attr-dict `to` type(results)
+  // }];
+
+  // let assemblyFormat = [{
+  //   `(` $input `:` type($input) `)` attr-dict `to` `(` type($real) `,` type($img) `)`
+  // }];
+
+  // Enable registering canonicalization patterns with this operation.
+  let hasCanonicalizer = 1;
+
+  // Allow building a FFT1DImg with from the input operand.
+  let builders = [
+    OpBuilder<(ins "Value":$lhs)>
+  ];
+
+  // Indicate that additional verification for this operation is necessary.
+  let hasVerifier = 1;
+}
+
+
+//===----------------------------------------------------------------------===//
+// SincOp
+//===----------------------------------------------------------------------===//
+
+
+def SincOp : DSP_Op<"sinc",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "sinc operation";
+  let description = [{
+    y = sinc(wc * n) = [1, sin(wc)/pi , sin(2* wc)/2*pi , ... sin(n * wc)/n*pi]
+    The "SINc" operation gives sin(wc *n)/n*pi 
+    ex: x=[10,0,0,0] &  Output : [10,10,10,10] , [10,10,]
+    The shapes of the tensor operands are expected to match.
+  }];
+
+  let arguments = (ins F64Tensor:$wc , F64Tensor:$n);
+  let results = (outs F64Tensor); //(ins F64Tensor:$lhs, F64Tensor:$rhs);
+  // let results = (outs F64Tensor:$real);
+  // let assemblyFormat = [{
+  //   `(` $input `:` type($input) `)` attr-dict `to` type(results)
+  // }];
+
+  // let assemblyFormat = [{
+  //   `(` $input `:` type($input) `)` attr-dict `to` `(` type($real) `,` type($img) `)`
+  // }];
+
+  // Enable registering canonicalization patterns with this operation.
+  // let hasCanonicalizer = 1;
+
+  // Allow building a SINc with from the input operand.
+  let builders = [
+    OpBuilder<(ins "Value":$wc, "Value":$n)>
+  ];
+
+  // Indicate that additional verification for this operation is necessary.
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// GetElemAtIndxOp
+//===----------------------------------------------------------------------===//
+
+def GetElemAtIndxOp : DSP_Op<"getElemAtIndx",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "get elem at given indx of the vector";
+
+  let arguments = (ins F64Tensor:$input, F64Tensor:$indx);
+  let results = (outs F64Tensor);
+
+  // let assemblyFormat = [{
+  //   `(` $input `:` type($input) `)` attr-dict `to` type(results)
+  // }];
+
+  // Enable registering canonicalization patterns with this operation.
+  // let hasCanonicalizer = 1;
+
+  // Allow building a GetElemAtIndx with from the input operand.
+  let builders = [
+    OpBuilder<(ins "Value":$input, "Value":$indx)>
+  ];
+
+  // Indicate that additional verification for this operation is necessary.
+  let hasVerifier = 1;
+}
+
+
+
+
+
+
+//===----------------------------------------------------------------------===//
+// GetSingleElemAtIdxOp
+//===----------------------------------------------------------------------===//
+
+def GetSingleElemAtIdxOp : DSP_Op<"getSingleElemAtIndx",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "This one access ranked tensor at idx and returns signle tensor without dimension.";
+
+  let arguments = (ins F64Tensor:$input, F64Tensor:$indx);
+  let results = (outs F64Tensor);
+
+  let builders = [
+    OpBuilder<(ins "Value":$input, "Value":$indx)>
+  ];
+
+  // Indicate that additional verification for this operation is necessary.
+  //let hasVerifier = 1;
+}
+
+
+
+
+//===----------------------------------------------------------------------===//
+// Diff2MeanOptimizedOp
+//===----------------------------------------------------------------------===//
+
+def Diff2MeanOptimizedOp : DSP_Op<"diff2meanOpt",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "This one implemnets mean(diff(input)) as (input[-1] - input[0])/len(input). Note that mean uses length of diff, this operation consider input[-1] == input[length paramter of mean], not input[length paramter of mean-1]";
+
+  let arguments = (ins F64Tensor:$input, F64Tensor:$length);
+  let results = (outs F64Tensor);
+
+  let builders = [
+    OpBuilder<(ins "Value":$input, "Value":$length)>
+  ];
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// FindPeaks2Diff2MeanOptimizedOp
+//===----------------------------------------------------------------------===//
+
+def FindPeaks2Diff2MeanOptimizedOp : DSP_Op<"findpeaks2diff2meanOpt",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "This one implemnets mean(diff(find_peaks(input))) as (peak[-1] - peak[0])/(len(peaks)-1).";
+
+   let arguments = (ins F64Tensor:$signal, F64Tensor:$height, F64Tensor:$distance);
+   let results = (outs F64Tensor);
+   
+   let builders = [
+     OpBuilder<(ins "Value":$signal, "Value":$height, "Value":$distance)>
+   ];
+}
+
+
+
+
+
+
+
+//===----------------------------------------------------------------------===//
+// LMS2FindPeaksOptimizedOp
+//===----------------------------------------------------------------------===//
+
+def LMS2FindPeaksOptimizedOp : DSP_Op<"lms2findPeaks",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "Fusing loop for LMSFilterResponseOp and FindPeaksOp";
+
+  let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs, F64Tensor:$mu, F64Tensor:$filterLen, F64Tensor:$height, F64Tensor:$distance);
+  let results = (outs F64Tensor);
+
+  let builders = [
+    OpBuilder<(ins "Value":$lhs, "Value":$rhs, "Value":$mu, "Value":$filterLen, "Value":$height, "Value":$distance)>
+  ];
+}
+
+
+
+
+
+
+//===----------------------------------------------------------------------===//
+// SetElemAtIndxOp
+//===----------------------------------------------------------------------===//
+
+def SetElemAtIndxOp : DSP_Op<"setElemAtIndx",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "set elem at given indx of the vector";
+
+  let arguments = (ins F64Tensor:$input, F64Tensor:$indx , F64Tensor:$val);
+  let results = (outs F64Tensor);
+
+  // let assemblyFormat = [{
+  //   `(` $input `:` type($input) `)` attr-dict `to` type(results)
+  // }];
+
+  // Enable registering canonicalization patterns with this operation.
+  // let hasCanonicalizer = 1;
+
+  // Allow building a SetElemAtIndx with from the input operand.
+  let builders = [
+    OpBuilder<(ins "Value":$input, "Value":$indx, "Value":$val)>
+  ];
+
+  // Indicate that additional verification for this operation is necessary.
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// LowPassFilter
+//===----------------------------------------------------------------------===//
+
+
+def LowPassFIRFilterOp : DSP_Op<"lowPassFIRFilter",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "lowPassFilter operation";
+  let description = [{
+    y_lpf[n] = wc/pi * sinc(wc * (n- (N-1)/2)) , n!= (N-1)/2 : 
+             = wc/pi , n = (N-1)/2
+    
+    The "LowpassFilter" operation is the base filter 
+    ex: N = odd for symmetry &  Output : y= [0.08,0.54,1,0.54] 
+    The shapes of the tensor operands are expected to match.
+  }];
+
+  let arguments = (ins F64Tensor:$wc, F64Tensor:$n);
+  // let results = (outs F64Tensor:$real, F64Tensor:$img); //(ins F64Tensor:$lhs, F64Tensor:$rhs);
+  let results = (outs F64Tensor);
+  // let assemblyFormat = [{
+  //   `(` $input `:` type($input) `)` attr-dict `to` type(results)
+  // }];
+
+  // let assemblyFormat = [{
+  //   `(` $input `:` type($input) `)` attr-dict `to` `(` type($real) `,` type($img) `)`
+  // }];
+
+  // Enable registering canonicalization patterns with this operation.
+  // let hasCanonicalizer = 1;
+
+  // Allow building a LowpassFilter with from the input operand.
+  let builders = [
+    OpBuilder<(ins "Value":$wc,"Value":$n)>
+  ];
+
+  // Indicate that additional verification for this operation is necessary.
+  let hasVerifier = 1;
+}
+
+
+//===----------------------------------------------------------------------===//
+// LMSFilterOp
+//===----------------------------------------------------------------------===//
+
+
+def LMSFilterOp : DSP_Op<"lmsFilter",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "LMS filter";
+  let description = [{
+    When you select LMS for the Algorithm parameter, the block calculates the filter weights by using the least mean-square (LMS) algorithm. This algorithm is defined by these equations.
+    y(n)=wT(n−1)u(n)
+    e(n)=d(n)−y(n)
+    w(n)=αw(n−1)+f(u(n),e(n),μ)
+  }];
+
+  let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs, F64Tensor:$mu, F64Tensor:$filterLen, F64Tensor:$iters);
+   let results = (outs F64Tensor);
+  /// Indicate that the operation has a custom parser and printer method.
+   // let hasCustomAssemblyFormat = 1;
+   // let assemblyFormat = [{
+   //   `(` $input `:` type($input1 , $input2) `)` attr-dict `to` type(results)
+   // }];
+   // Allow building a LMSFilterOp with from the two input operands.
+   let builders = [
+     OpBuilder<(ins "Value":$lhs, "Value":$rhs, "Value":$mu, "Value":$filterLen, "Value":$iters)>
+   ];
+
+   // Indicate that the operation has a custom parser and printer method.
+   // let hasCustomAssemblyFormat = 1;
+
+   // let hasVerifier = 1;
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// HighPassFilter
+//===----------------------------------------------------------------------===//
+
+
+def HighPassFIRFilterOp : DSP_Op<"highPassFIRFilter",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "highPassFIRFilter operation";
+  let description = [{
+   y_hpf[n] = dirac(n-(N-1)/2) - y_lpf[n] = -1 * wc/pi * sinc(wc * (n- (N-1)/2)) , n!= (N-1)/2 : 
+             = 1 - wc/pi , n = (N-1)/2
+    
+    The "highPassFIRFilter" operation gives the coefficients of high pass filter  for 
+    cut-off freq, wc= 2*pi*fc and filter order, N .. choose N=odd for symmetry
+    ex: N = odd for symmetry &  Output : y= [0.08,0.54,1,0.54] 
+  }];
+
+  let arguments = (ins F64Tensor:$wc, F64Tensor:$N);
+  let results = (outs F64Tensor);
+  // let assemblyFormat = [{
+  //   `(` $input `:` type($input) `)` attr-dict `to` type(results)
+  // }];
+
+  // let assemblyFormat = [{
+  //   `(` $input `:` type($input) `)` attr-dict `to` `(` type($real) `,` type($img) `)`
+  // }];
+
+  // Enable registering canonicalization patterns with this operation.
+  // let hasCanonicalizer = 1;
+
+  // Allow building a LowpassFilter with from the input operand.
+  let builders = [
+    OpBuilder<(ins "Value":$wc,"Value":$N)>
+  ];
+
+  // Indicate that additional verification for this operation is necessary.
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// GetRangeOfVector
+//===----------------------------------------------------------------------===//
+
+
+def GetRangeOfVectorOp : DSP_Op<"getRangeOfVector",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "getRangeOfVector operation";
+  let description = [{
+   y[n] = [first to N elements using step]  -- similar to np.arange(first,last,step)
+   But here, the difference is we are mentioning NoOfElements and not the last element
+    
+    The "getRangeOfVector" operation gives the vector from first to last --
+    Ex: y = getRangeOfVector(0,5,2) => y = [0 2 4 6 8]
+    
+  }];
+
+  let arguments = (ins F64Tensor:$first, F64Tensor:$N, F64Tensor:$step);
+  let results = (outs F64Tensor);
+  // let assemblyFormat = [{
+  //   `(` $input `:` type($input) `)` attr-dict `to` type(results)
+  // }];
+
+  // let assemblyFormat = [{
+  //   `(` $input `:` type($input) `)` attr-dict `to` `(` type($real) `,` type($img) `)`
+  // }];
+
+  // Enable registering canonicalization patterns with this operation.
+  // let hasCanonicalizer = 1;
+
+  // Allow building a LowpassFilter with from the input operand.
+  let builders = [
+    OpBuilder<(ins "Value":$first,"Value":$N, "Value":$step)>
+  ];
+
+  // Indicate that additional verification for this operation is necessary.
+  let hasVerifier = 1;
+}
+
+
+//===----------------------------------------------------------------------===//
+// FIRFilterHammingOptimizedOp
+//===----------------------------------------------------------------------===//
+
+
+def FIRFilterHammingOptimizedOp : DSP_Op<"FIRFilterHammingOptimized",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "FIRFilterHammingOptimized operation";
+  let description = [{
+    y_FIRHamming[n] = [wc/pi * sinc(wc * (n- (N-1)/2))] * [0.54 - 0.46 cos(2 *pi * n/N-1)], 0<= n < (N-1)/2 : 
+             = wc/pi * 1 , n = (N-1)/2
+
+             and also, y_FIRHamming[N-1-n] = y[n]
+    
+    The "FIRFilterHammingOptimized" operation is the fusion of 2 operations and symmetry behaviour
+    ex: N = odd for symmetry &  Output for wc=2*pi*1/8 , N = 7 : y= 0.006002 0.049338 0.173311 0.250000 0.173311 0.049338 0.006002]
+    
+  }];
+
+  let arguments = (ins F64Tensor:$wc, F64Tensor:$n);
+  // let results = (outs F64Tensor:$real, F64Tensor:$img); //(ins F64Tensor:$lhs, F64Tensor:$rhs);
+  let results = (outs F64Tensor);
+  // let assemblyFormat = [{
+  //   `(` $input `:` type($input) `)` attr-dict `to` type(results)
+  // }];
+
+  // let assemblyFormat = [{
+  //   `(` $input `:` type($input) `)` attr-dict `to` `(` type($real) `,` type($img) `)`
+  // }];
+
+  // Enable registering canonicalization patterns with this operation.
+  // let hasCanonicalizer = 1;
+
+  // Allow building a LowpassFilter with from the input operand.
+  let builders = [
+    OpBuilder<(ins "Value":$wc,"Value":$n)>
+  ];
+
+  // Indicate that additional verification for this operation is necessary.
+  let hasVerifier = 1;
+}
+
+
+//===----------------------------------------------------------------------===//
+// HighPassFIRHammingOptimizedOp
+//===----------------------------------------------------------------------===//
+
+
+def HighPassFIRHammingOptimizedOp : DSP_Op<"highPassFIRHammingOptimizedOp",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "highPassFIRHammingOptimizedOp operation";
+  let description = [{
+    y_highFIRHamming[n] = -1 * [wc/pi * sinc(wc * (n- (N-1)/2))] * [0.54 - 0.46 cos(2 *pi * n/N-1)], 0<= n < (N-1)/2 : 
+             = 1 - wc/pi , n = (N-1)/2
+
+             and also, y_FIRHamming[N-1-n] = y[n]
+    
+    The "highPassFIRHammingOptimizedOp" operation is the fusion of 2 operations and symmetry behaviour
+    ex: N = odd for symmetry &  Output for wc=2*pi*1/8 , N = 7 : y= [-0.006002 -0.049338 -0.173311 0.750000 -0.173311 -0.049338 -0.00600]
+    
+  }];
+
+  let arguments = (ins F64Tensor:$wc, F64Tensor:$n);
+  // let results = (outs F64Tensor:$real, F64Tensor:$img); //(ins F64Tensor:$lhs, F64Tensor:$rhs);
+  let results = (outs F64Tensor);
+  // let assemblyFormat = [{
+  //   `(` $input `:` type($input) `)` attr-dict `to` type(results)
+  // }];
+
+  // let assemblyFormat = [{
+  //   `(` $input `:` type($input) `)` attr-dict `to` `(` type($real) `,` type($img) `)`
+  // }];
+
+  // Enable registering canonicalization patterns with this operation.
+  // let hasCanonicalizer = 1;
+
+  // Allow building a LowpassFilter with from the input operand.
+  let builders = [
+    OpBuilder<(ins "Value":$wc,"Value":$n)>
+  ];
+
+  // Indicate that additional verification for this operation is necessary.
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// ThresholdOp
+//===----------------------------------------------------------------------===//
+
+def ThresholdOp : DSP_Op<"threshold",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "limit the array values";
+  let description = [{
+   y_threshold[n] = a[i]  if a[i] >= threshld or, a[i] <= -threshld
+                  = 0 , else
+    
+    The "threshold" operation returns an array with only those values retained 
+    ex: Input x= [-0.08,-5.4,-1,3.4] and threshld = 2 , then y[n] = [ 0, -5.4, 0, 3.4]  
+  }];
+
+  let arguments = (ins F64Tensor:$input, F64Tensor:$threshld);
+  let results = (outs F64Tensor);
+
+  // let assemblyFormat = [{
+  //   `(` $input `:` type($input) `)` attr-dict `to` type(results)
+  // }];
+
+  // Enable registering canonicalization patterns with this operation.
+  // let hasCanonicalizer = 1;
+
+  // Allow building a GetElemAtIndx with from the input operand.
+  let builders = [
+    OpBuilder<(ins "Value":$input, "Value":$threshld)>
+  ];
+
+  // Indicate that additional verification for this operation is necessary.
+  let hasVerifier = 1;
+}
+
+
+//===----------------------------------------------------------------------===//
+// QuantizationOp
+//===----------------------------------------------------------------------===//
+
+
+def QuantizationOp : DSP_Op<"quantization",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "quantization operation";
+  let description = [{
+    y_quantized[i] = Round(a[i] - min) / step) * step + min
+        where, step = (max-min)/NoOfLevels 
+    
+    The "quantization" operation steps:
+    1) calulate NoOfLevels = 2^NoOfBits -- separate
+    2) Then calculate stepSize = (Max-Min)/NoOfLevels
+    3) iterate for all the elements and calculate quantizedCoeff
+
+    GetLevelForVal =  (a[i] - min)/step
+    RoundedVal = arith.FPToSI(GetLevelForVal)
+    QuantVal = RoundedVal * step + min_val
+
+    ex: Original coefficients: [ 3.2 -1.5  0.8 -2.9  4.5] & numLevels = 16, max=5, min=-5
+        Quantized coefficients: [ 3.125 -1.25   0.625 -3.125  4.375]
+  }];
+
+  let arguments = (ins F64Tensor:$input, F64Tensor:$Nlevels, F64Tensor:$max, F64Tensor:$min);
+  let results = (outs F64Tensor);
+
+  // Enable registering canonicalization patterns with this operation.
+  // let hasCanonicalizer = 1;
+
+  // Allow building a LowpassFilter with from the input operand.
+  let builders = [
+    OpBuilder<(ins "Value":$input, "Value":$Nlevels, "Value":$max, "Value":$min)>
+  ];
+
+  // Indicate that additional verification for this operation is necessary.
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// LMSFilterReponseOp
+//===----------------------------------------------------------------------===//
+
+
+def LMSFilterResponseOp : DSP_Op<"lmsFilterResponse",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "LMS filter Response";
+  let description = [{
+    When you select LMS for the Algorithm parameter, the block calculates the filter weights by using the least mean-square (LMS) algorithm and returns the predicted output i.e y(n). This algorithm is defined by these equations.
+    y(n)=wT(n−1)u(n)
+    e(n)=d(n)−y(n)
+    w(n)=αw(n−1)+f(u(n),e(n),μ)
+  }];
+
+  let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs, F64Tensor:$mu, F64Tensor:$filterLen);
+   let results = (outs F64Tensor);
+  /// Indicate that the operation has a custom parser and printer method.
+   // let hasCustomAssemblyFormat = 1;
+   // let assemblyFormat = [{
+   //   `(` $input `:` type($input1 , $input2) `)` attr-dict `to` type(results)
+   // }];
+   // Allow building a LMSFilterOp with from the two input operands.
+   let builders = [
+     OpBuilder<(ins "Value":$lhs, "Value":$rhs, "Value":$mu, "Value":$filterLen)>
+   ];
+
+   // Indicate that the operation has a custom parser and printer method.
+   // let hasCustomAssemblyFormat = 1;
+
+   // let hasVerifier = 1;
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// RunLenEncodingOp
+//===----------------------------------------------------------------------===//
+
+
+def RunLenEncodingOp : DSP_Op<"runLenEncoding",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "runLenEncoding operation";
+  let description = [{
+    y_rle[i] =  x[i] , if x[i] != x[i-1] , 1<=i<n
+                CountOfXi , at n<=i < 2n -1
+
+     ex: if input is [4,4,2,-1,-1,2,2,2] inputLen = 8
+    then output is [4,2,-1,2, 0,0,0,0 , 2,1,2,3,0,0,0,0]
+    }];
+
+  let arguments = (ins F64Tensor:$input);
+  let results = (outs F64Tensor);
+
+  // Enable registering canonicalization patterns with this operation.
+  // let hasCanonicalizer = 1;
+
+  // Allow building a LowpassFilter with from the input operand.
+  let builders = [
+    OpBuilder<(ins "Value":$input)>
+  ];
+
+  // Indicate that additional verification for this operation is necessary.
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// FIRFilterResSymmOptimizedOp
+//===----------------------------------------------------------------------===//
+def FIRFilterResSymmOptimizedOp : DSP_Op<"FIRFilterResSymmOptimized" ,
+    [Pure , DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "FIRFilterResSymmOptimizedOp";
+  let description = [{
+    The "FIRFilterResSymmOptimizedOp" operation is basically the convolution of input 1-D and filter vector when the filter is symmetrical ie,
+    h[0] = h[L-1] , h[1] = h[L-2] .. h[middle ie, (L-1)/2] is single element & filter length is odd always
+    ex: x[n] = [2,1,3,2,4], h[l] = [1, -1 ,1]  then
+      y[n] = sum(h(k) . x(n-k)) k=0 to N-1 can be rewritten as 
+      y[n] = sum(h[k] .{ x[n-k] + x[n-(L-1-k)]}) + h[L-1/2].x[n-(L-1)/2] , k=0 to L-1/2
+
+      Basically, we are trying to reduce the number of load/store operations by half -- so that we can reduce the operations
+  }];
+
+  let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs); //working -- F64
+  let results =  (outs F64Tensor);
+  // let results = (outs I64);
+
+  // Indicate that the operation has a custom parser and printer method.
+  // let hasCustomAssemblyFormat = 1;
+  // let assemblyFormat = [{
+  //   `(` $input `:` type($input1 , $input2) `)` attr-dict `to` type(results)
+  // }];
+  // Allow building a FIRFilterOp with from the two input operands.
+  let builders = [
+    OpBuilder<(ins "Value":$lhs, "Value":$rhs)>
+  ];
+
+  // Indicate that the operation has a custom parser and printer method.
+  // let hasCustomAssemblyFormat = 1;
+  
+  // Enable registering canonicalization patterns with this operation.
+  //let hasCanonicalizer = 1;
+
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// LengthOp
+//===----------------------------------------------------------------------===//
+def LengthOp : DSP_Op<"len" ,
+    [Pure , DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "LengthOp";
+  let description = [{
+    The "LengthOp" operation gives the length of vector
+    ex: x[n] = [2,1,3,2,4], len(x) will be 5
+    }];
+
+  let arguments = (ins F64Tensor:$input); //working -- F64
+  let results =  (outs F64Tensor);
+
+  // Indicate that the operation has a custom parser and printer method.
+  // let hasCustomAssemblyFormat = 1;
+  // let assemblyFormat = [{
+  //   `(` $input `:` type($input1 , $input2) `)` attr-dict `to` type(results)
+  // }];
+  // Allow building a FIRFilterOp with from the two input operands.
+  let builders = [
+    OpBuilder<(ins "Value":$input)>
+  ];
+
+  // Indicate that the operation has a custom parser and printer method.
+  // let hasCustomAssemblyFormat = 1;
+  
+  // Enable registering canonicalization patterns with this operation.
+  //let hasCanonicalizer = 1;
+
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// ReverseInputOp
+//===----------------------------------------------------------------------===//
+def ReverseInputOp : DSP_Op<"reverseInput" ,
+    [Pure , DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "ReverseInputOp";
+  let description = [{
+    The "ReverseInputOp" operation gives the reverse of vector
+    y[n] = x[N-1-n] ie, where N is length of input
+    ex: x[n] = [1,2,3,4], y[n] = [4,3,2,1]
+    }];
+
+  let arguments = (ins F64Tensor:$input); //working -- F64
+  let results =  (outs F64Tensor);
+
+  // Indicate that the operation has a custom parser and printer method.
+  // let hasCustomAssemblyFormat = 1;
+  // let assemblyFormat = [{
+  //   `(` $input `:` type($input1 , $input2) `)` attr-dict `to` type(results)
+  // }];
+  // Allow building a ReverseInputOp with from the two input operands.
+  let builders = [
+    OpBuilder<(ins "Value":$input)>
+  ];
+
+  // Indicate that the operation has a custom parser and printer method.
+  // let hasCustomAssemblyFormat = 1;
+  
+  // Enable registering canonicalization patterns with this operation.
+  //let hasCanonicalizer = 1;
+
+  let hasVerifier = 1;
+}
+
+
+//===----------------------------------------------------------------------===//
+// PaddingOp
+//===----------------------------------------------------------------------===//
+
+
+def PaddingOp : DSP_Op<"padding",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "PaddingOp(input , ValueToBePadded , PadLen) ";
+  let description = [{ 
+    PaddingOp : y[n] = x[n] for 0<=n< N
+                y[n] = val  for N<=n< N+PadLen 
+    ex: x[n] = [1,2,3,4] & padding(x, value=0, len=3) then 
+        y[n] = [1,2,3,4,0,0,0]
+  }];
+
+  let arguments = (ins F64Tensor:$input, F64Tensor:$PadValue, F64Tensor:$PadLen);
+   let results = (outs F64Tensor);
+  /// Indicate that the operation has a custom parser and printer method.
+   // let hasCustomAssemblyFormat = 1;
+   // let assemblyFormat = [{
+   //   `(` $input `:` type($input1 , $input2) `)` attr-dict `to` type(results)
+   // }];
+   // Allow building a LMSFilterOp with from the two input operands.
+   let builders = [
+     OpBuilder<(ins "Value":$input, "Value":$PadValue, "Value":$PadLen)>
+   ];
+
+   // Indicate that the operation has a custom parser and printer method.
+   // let hasCustomAssemblyFormat = 1;
+
+   // let hasVerifier = 1;
+  let hasVerifier = 1;
+}
+
+
+//===----------------------------------------------------------------------===//
+// FIRFilterYSymmOptimizedOp
+//===----------------------------------------------------------------------===//
+def FIRFilterYSymmOptimizedOp : DSP_Op<"FIRFilterYSymmOptimized" ,
+    [Pure , DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "FIRFilterYSymmOptimizedOp";
+  let description = [{
+    The "FIRFilterYSymmOptimizedOp" operation is basically the convolution of input 1-D and filter vector when filter is reverse of input then output is symmetrical ie,
+    say, x[n] convolution x[-n] ie, x[-n] = x[N-1-n]
+    ex: x[n] = [1,2], x[-n] = x[N-1-n] = [2,1]  then
+        y[n] is always of odd length & is symmetrical about middle
+        ie, y[n] = y[N-1-n] : we will reduce half the computations
+      y[n] = sum(h(k) . x(n-k)) k=0 to N-1 can be rewritten as 
+      
+      So, we are reducing the number of outer loops -- 
+  }];
+
+  let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs); //working -- F64
+  let results =  (outs F64Tensor);
+  // let results = (outs I64);
+
+  // Indicate that the operation has a custom parser and printer method.
+  // let hasCustomAssemblyFormat = 1;
+  // let assemblyFormat = [{
+  //   `(` $input `:` type($input1 , $input2) `)` attr-dict `to` type(results)
+  // }];
+  // Allow building a FIRFilterYSymmOptimizedOp with from the two input operands.
+  let builders = [
+    OpBuilder<(ins "Value":$lhs, "Value":$rhs)>
+  ];
+
+  // Indicate that the operation has a custom parser and printer method.
+  // let hasCustomAssemblyFormat = 1;
+  
+  // Enable registering canonicalization patterns with this operation.
+  //let hasCanonicalizer = 1;
+
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// FFT1DRealSymmOp
+//===----------------------------------------------------------------------===//
+def FFT1DRealSymmOp : DSP_Op<"fft1DRealSymm",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "fft1DRealSymm operation";
+  let description = [{
+    Actual Def: y[k] = y_real[k] & y_real = sumOver_n(x[n]*cos[2*pi * k *n/N ] 
+    Here: y[k] = sumOver_n(x[n]*cos[2*pi * k *n/N ] , 0<=k < (N+1)/2
+            & y[N-k] = y[k]  (N+1)/2<= k< N
+            ie,  y[1] = y[N-1] , y[2] = y[N-2] , 
+    The "fft1DRealSymm" operation gives symmetric output when input is symmetric & real 
+    ie: when x = [1,2,3,2,1] then y_real will be symm: except the first elem
+    ie, y = [9, -2.11 , 0.118, 0.118 , -2.11] 
+
+    The shapes of the tensor operands are expected to match.
+  }];
+
+  let arguments = (ins F64Tensor:$input);
+  let results = (outs F64Tensor:$real); //(ins F64Tensor:$lhs, F64Tensor:$rhs);
+  // let results = (outs F64Tensor:$real);
+  // let assemblyFormat = [{
+  //   `(` $input `:` type($input) `)` attr-dict `to` type(results)
+  // }];
+
+  // let assemblyFormat = [{
+  //   `(` $input `:` type($input) `)` attr-dict `to` `(` type($real) `,` type($img) `)`
+  // }];
+
+  // Enable registering canonicalization patterns with this operation.
+  // let hasCanonicalizer = 1;
+
+  // Allow building a fft1DRealSymm with from the input operand.
+  let builders = [
+    OpBuilder<(ins "Value":$lhs)>
+  ];
+
+  // Indicate that additional verification for this operation is necessary.
+  let hasVerifier = 1;
+
+  // let hasCanonicalizer = 1;
+}
+
+
+//===----------------------------------------------------------------------===//
+// FFT1DImgConjSymmOp
+//===----------------------------------------------------------------------===//
+def FFT1DImgConjSymmOp : DSP_Op<"fft1DimgConjSymm",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "fft1DimgConjSymm operation";
+  let description = [{
+    y[k] = y_img[k]
+    y_img = sumOver_n(x[n]*sin[2*pi * k *n/N ] * -1
+    & when the input is real & symmetric then output is symmetric ie,
+    y[k] = sumOver_n(x[n]*sin[2*pi * k *n/N ] * -1 0<=k<(N+1)/2
+        & y[N-k] = -y[k] , (N+1)/2 <=k < N
+    The "fft1DimgConjSymm" operation gives output in freq domain  -- 
+    ex: x=[1,2,3,2,1] &  Output : [0,-1.53,0.36,-0.36,1.53] , 
+    The shapes of the tensor operands are expected to match.
+  }];
+
+  let arguments = (ins F64Tensor:$input);
+  let results = (outs F64Tensor:$real); //(ins F64Tensor:$lhs, F64Tensor:$rhs);
+  // let results = (outs F64Tensor:$real);
+  // let assemblyFormat = [{
+  //   `(` $input `:` type($input) `)` attr-dict `to` type(results)
+  // }];
+
+  // let assemblyFormat = [{
+  //   `(` $input `:` type($input) `)` attr-dict `to` `(` type($real) `,` type($img) `)`
+  // }];
+
+  // Enable registering canonicalization patterns with this operation.
+  // let hasCanonicalizer = 1;
+
+  // Allow building a fft1DimgConjSymm with from the input operand.
+  let builders = [
+    OpBuilder<(ins "Value":$lhs)>
+  ];
+
+  // Indicate that additional verification for this operation is necessary.
+  let hasVerifier = 1;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// ShiftRightOp
+//===----------------------------------------------------------------------===//
+
+def ShiftRightOp : DSP_Op<"shiftRight", [Pure , DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+   let summary = "Bit-wise shift right a by b";
+   let description = [{
+     The shift right block shifts each element of a vector by right-hand side integer. 
+   }]; 
+
+   let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs); //Problem: how can we do logical shift with floating point tensor?
+   let results = (outs F64Tensor);
+
+   // Indicate that the operation has a custom parser and printer method.
+   // let hasCustomAssemblyFormat = 1;
+   // let assemblyFormat = [{
+   //   `(` $input `:` type($input1 , $input2) `)` attr-dict `to` type(results)
+   // }];
+   // Allow building a MulOp with from the two input operands.
+   let builders = [
+     OpBuilder<(ins "Value":$lhs, "Value":$rhs)>
+   ];
+
+   // Indicate that the operation has a custom parser and printer method.
+   // let hasCustomAssemblyFormat = 1;
+
+   // let hasVerifier = 1;
+ }
+
+
+//===----------------------------------------------------------------------===//
+// MatmulOp
+//===----------------------------------------------------------------------===//
+
+def MatmulOp : DSP_Op<"matmul", [Pure , DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+   let summary = "Matrix multiplication a * b";
+   let description = [{
+     Matrix multiplication between the left-hand side and right-hand side.
+   }]; 
+
+   let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs); //Problem: how can we do logical shift with floating point tensor?
+   let results = (outs F64Tensor);
+
+   // Indicate that the operation has a custom parser and printer method.
+   // let hasCustomAssemblyFormat = 1;
+   // let assemblyFormat = [{
+   //   `(` $input `:` type($input1 , $input2) `)` attr-dict `to` type(results)
+   // }];
+   // Allow building a MulOp with from the two input operands.
+   let builders = [
+     OpBuilder<(ins "Value":$lhs, "Value":$rhs)>
+   ];
+
+   // Indicate that the operation has a custom parser and printer method.
+   // let hasCustomAssemblyFormat = 1;
+
+   let hasVerifier = 1;
+ }
+
+
+
+
+//===----------------------------------------------------------------------===//
+// Conv2DOp
+//===----------------------------------------------------------------------===//
+
+def Conv2DOp : DSP_Op<"conv2d", 
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+    let summary = "DSP dialect convolution 2d operation";
+    let description = [{
+        Performs a 2D convolution on the input tensor using specified kernel.
+    }];
+
+    let arguments = (ins F64Tensor:$input, F64Tensor:$kernel, F64Tensor:$bias);
+
+    let results = (outs F64Tensor:$output);
+
+    let builders = [
+        OpBuilder<(ins "Value":$input, "Value":$kernel, "Value":$bias)>
+    ];
+
+    let extraClassDeclaration = [{
+        static StringRef getStrideName() { return "stride"; }
+        static StringRef getPaddingName() { return "padding"; }
+    }];
+
+    let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// ThresholdUpOp
+//===----------------------------------------------------------------------===//
+
+def ThresholdUpOp : DSP_Op<"thresholdUp", 
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+    let summary = "Converts all the values above threhold to 1 else 0";
+    let description = [{
+        Converts all the values above threhold to 1 if returnoriginal is false, returns original value if returnoriginal is true else 0
+    }];
+
+    let arguments = (ins F64Tensor:$input, F64Tensor:$threshold, F64Tensor:$returnoriginal);
+
+    let results = (outs F64Tensor:$output);
+
+    let builders = [
+        OpBuilder<(ins "Value":$input, "Value":$threshold, "Value":$returnoriginal)>
+    ];
+    let hasVerifier = 1;
+    let hasCanonicalizer = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// GenerateDTMFOp
+//===----------------------------------------------------------------------===//
+
+def GenerateDTMFOp : DSP_Op<"generateDtmf", 
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+    let summary = "Generates signal for the digit input.";
+    let description = [{
+        Converts the digit into a signal.
+    }];
+
+    let arguments = (ins F64Tensor:$digit, F64Tensor:$duration, F64Tensor:$fs);
+
+    let results = (outs F64Tensor:$output);
+
+    let builders = [
+        OpBuilder<(ins "Value":$digit, "Value":$duration, "Value":$fs)>
+    ];
+    let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// FFTFreqOp
+//===----------------------------------------------------------------------===//
+
+def FFTFreqOp : DSP_Op<"fftfreq", 
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+    let summary = "Based on --> np.fft.fftfreq(N, d=1/fs)";
+    let description = [{
+        Generates frequency bins for fft.
+    }];
+
+    let arguments = (ins F64Tensor:$length, F64Tensor:$distance);
+
+    let results = (outs F64Tensor:$output);
+
+    let builders = [
+        OpBuilder<(ins "Value":$length, "Value":$distance)>
+    ];
+    let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// FindDominantPeaksOp
+//===----------------------------------------------------------------------===//
+
+def FindDominantPeaksOp : DSP_Op<"findDominantPeaks", 
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+    let summary = "finds two dominant peaks in the frequency array.";
+    let description = [{
+        Designed for the DTMF Application.
+    }];
+
+    let arguments = (ins F64Tensor:$frequencies, F64Tensor:$magnitudes);
+
+    let results = (outs F64Tensor:$output);
+
+    let builders = [
+        OpBuilder<(ins "Value":$frequencies, "Value":$magnitudes)>
+    ];
+    let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// RecoverDTMFDigitOp
+//===----------------------------------------------------------------------===//
+
+def RecoverDTMFDigitOp : DSP_Op<"recoverDtmfDigit", 
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+    let summary = "Recover digit from given frequency pair else return -1.";
+    let description = [{
+        -----------------------------.
+    }];
+
+    let arguments = (ins F64Tensor:$frequencies, F64Tensor:$freqPairs);
+
+    let results = (outs F64Tensor:$output);
+
+    let builders = [
+        OpBuilder<(ins "Value":$frequencies, "Value":$freqPairs)>
+    ];
+    let hasVerifier = 1; 
+} 
+
+//===----------------------------------------------------------------------===//
+// FFTCombineOp
+//===----------------------------------------------------------------------===//
+
+def FFTCombineOp : DSP_Op<"fftCombine", 
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+    let summary = "Combines the real and imaginary parts to obtain an amplitude array.";
+    let description = [{
+        This function takes arrays of real and imaginary parts of frequency components 
+        and computes the amplitude of each frequency. The amplitudes are returned as an 
+        array representing the magnitudes of the corresponding complex values.
+    }];
+
+    let arguments = (ins F64Tensor:$real, F64Tensor:$imag);
+
+    let results = (outs F64Tensor:$output);
+
+    let builders = [
+        OpBuilder<(ins "Value":$real, "Value":$imag)>
+    ];
+    let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// GenerateVoiceSignatureOp
+//===----------------------------------------------------------------------===//
+
+def GenerateVoiceSignatureOp : DSP_Op<"generateVoiceSignature", 
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+    let summary = "Generate voice signature of the speaker.";
+    let description = [{
+        This function takes two frequencies as input along with the duration and 
+        the sampling frequency, and generate the voice signature of the speaker. 
+    }];
+
+    let arguments = (ins F64Tensor:$f1, F64Tensor:$f2, F64Tensor:$duration, F64Tensor:$fs);
+
+    let results = (outs F64Tensor:$output);
+
+    let builders = [
+        OpBuilder<(ins "Value":$f1, "Value":$f2, "Value":$duration, "Value":$fs)>
+    ];
+    let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// SqrtOp
+//===----------------------------------------------------------------------===//
+
+def SqrtOp : DSP_Op<"sqrt", 
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+    let summary = "Takes the element wise square root of the tensor.";
+    let description = [{
+        Takes the element wise square root of the tensor.
+    }];
+
+    let arguments = (ins F64Tensor:$input);
+
+    let results = (outs F64Tensor:$output);
+
+    let builders = [
+        OpBuilder<(ins "Value":$input)>
+    ];
+    let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// QamModulateRealOp real
+//===----------------------------------------------------------------------===//
+
+def QamModulateRealOp : DSP_Op<"qam_modulate_real", 
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+    let summary = "DSP dialect qam modulation real operation";
+    let description = [{
+        Performs a digital modulation on input tensor.
+    }];
+
+    let arguments = (ins F64Tensor:$signal);
+
+    let results = (outs F64Tensor:$real);
+
+
+    let builders = [
+        OpBuilder<(ins "Value":$signal)>
+    ];
+
+    let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// QamModulateImgOp imagine
+//===----------------------------------------------------------------------===//
+
+def QamModulateImgOp : DSP_Op<"qam_modulate_imagine", 
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+    let summary = "DSP dialect qam modulation imagine operation";
+    let description = [{
+        Performs a digital modulation on input tensor.
+    }];
+
+    let arguments = (ins F64Tensor:$signal);
+
+    let results = (outs F64Tensor:$imagine);
+
+
+    let builders = [
+        OpBuilder<(ins "Value":$signal)>
+    ];
+
+    let hasVerifier = 1;
+}
+
+
+//===----------------------------------------------------------------------===//
+// QamDemodulateOp
+//===----------------------------------------------------------------------===//
+
+def QamDemodulateOp : DSP_Op<"qam_demodulate", 
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+    let summary = "DSP dialect qam demodulation operation";
+    let description = [{
+        Takes in 2 arrays, one is the real part of a signal the other is the imaginary part of a signal.
+        Returns the decoded binary output.
+    }];
+
+    let arguments = (ins F64Tensor:$real, F64Tensor:$imagine);
+
+    let results = (outs F64Tensor:$output);
+
+    let builders = [
+        OpBuilder<(ins "Value":$real, "Value":$imagine)>
+    ];
+
+
+    let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// FindPeaksOp
+//===----------------------------------------------------------------------===//
+
+def FindPeaksOp : DSP_Op<"find_peaks", [Pure , DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+   let summary = "Find peaks from the signal. Since the number of peaks vary, the output is initialized as -1.";
+   let description = [{
+		Input: signal, height, distance
+		Output: indices of peaks. All of none-used values are initialized as -1, so the length can be measured by this.
+
+		Functionality: check the below original python-level code.
+		
+		def manual_find_peaks(signal, height, distance):
+			peaks = []
+			for i in range(1, len(signal) - 1):
+				# Check if the current point is higher than its neighbors
+				if signal[i] > signal[i-1] and signal[i] > signal[i+1]:
+					# Check if it meets the height criterion
+					if signal[i] >= height:
+						# Check if it's far enough from the previously detected peak
+						if not peaks or i - peaks[-1] >= distance:
+							peaks.append(i)
+			return np.array(peaks)
+
+   }]; 
+
+   let arguments = (ins F64Tensor:$signal, F64Tensor:$height, F64Tensor:$distance);
+   let results = (outs F64Tensor);
+
+   // Indicate that the operation has a custom parser and printer method.
+   // let hasCustomAssemblyFormat = 1;
+   // let assemblyFormat = [{
+   //   `(` $input `:` type($input1 , $input2) `)` attr-dict `to` type(results)
+   // }];
+   // Allow building a MulOp with from the two input operands.
+   
+   let builders = [
+     OpBuilder<(ins "Value":$signal, "Value":$height, "Value":$distance)>
+   ];
+
+   // Indicate that the operation has a custom parser and printer method.
+   // let hasCustomAssemblyFormat = 1;
+
+   // let hasVerifier = 1;
+   let hasCanonicalizer = 1;
+
+ }
+
+
+//===----------------------------------------------------------------------===//
+// BeamFormOp
+//===----------------------------------------------------------------------===//
+
+def BeamFormOp : DSP_Op<"beam_form", 
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+    let summary = "DSP dialect Beam forming operation";
+    let description = [{
+        Performs a beam forming signal encoding on the input tensor using specified weights.
+    }];
+
+    let arguments = (ins I64Attr:$antennas, I64Attr:$freq, F64Tensor:$time, F64Tensor:$weights);
+
+    let results = (outs F64Tensor:$output);
+
+    let builders = [
+        OpBuilder<(ins "int64_t":$antennas, "int64_t":$freq, "Value":$time, "Value":$weights)>
+    ];
+
+    let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// SpaceModulateOp
+//===----------------------------------------------------------------------===//
+
+def SpaceModulateOp : DSP_Op<"space_modulate", 
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+    let summary = "DSP dialect space modulation operation";
+    let description = [{
+        Takes in string input and convert it to binary.
+    }];
+
+    let arguments = (ins F64Tensor:$signal);
+
+    let results = (outs F64Tensor:$output);
+
+    let builders = [
+        OpBuilder<(ins "Value":$signal)>
+    ];
+
+    let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// SpaceDemodulateOp
+//===----------------------------------------------------------------------===//
+
+def SpaceDemodulateOp : DSP_Op<"space_demodulate", 
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+    let summary = "DSP dialect space demodulation operation";
+    let description = [{
+        Takes in binary input and convert it to string.
+    }];
+
+    let arguments = (ins F64Tensor:$binary);
+
+    let results = (outs F64Tensor:$output);
+
+    let builders = [
+        OpBuilder<(ins "Value":$binary)>
+    ];
+
+    let hasVerifier = 1;
+    let hasCanonicalizer = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// SpaceErrCorrectionOp
+//===----------------------------------------------------------------------===//
+
+def SpaceErrCorrectionOp : DSP_Op<"space_err_correction", 
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+    let summary = "DSP dialect space error correction operation";
+    let description = [{
+        Remove noise operation for signal transmission in space.
+    }];
+
+    let arguments = (ins F64Tensor:$signal);
+
+    let results = (outs F64Tensor:$output);
+
+    let builders = [
+        OpBuilder<(ins "Value":$signal)>
+    ];
+
+    let hasVerifier = 1;
+}
+
+
+//===----------------------------------------------------------------------===//
+// MaxOp
+//===----------------------------------------------------------------------===//
+
+def MaxOp : DSP_Op<"max", [Pure , DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+   let summary = "Find maximum value in tensor";
+   let description = [{
+		This operation finds and returns the maximum value of the tensor.
+   }]; 
+
+   let arguments = (ins F64Tensor:$input);
+   let results = (outs F64Tensor);
+
+   // Indicate that the operation has a custom parser and printer method.
+   // let hasCustomAssemblyFormat = 1;
+   // let assemblyFormat = [{
+   //   `(` $input `:` type($input1 , $input2) `)` attr-dict `to` type(results)
+   // }];
+   // Allow building a MulOp with from the two input operands.
+   
+   let builders = [
+     OpBuilder<(ins "Value":$input)>
+   ];
+
+   // Indicate that the operation has a custom parser and printer method.
+   // let hasCustomAssemblyFormat = 1;
+
+   // let hasVerifier = 1;
+   let hasCanonicalizer = 1;
+ }
+ 
+
+//===----------------------------------------------------------------------===//
+// MeanOp
+//===----------------------------------------------------------------------===//
+
+def MeanOp : DSP_Op<"mean", [Pure , DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+   let summary = "Find mean value of tensor. Requires (input, length).";
+   let description = [{
+		This operation finds and returns the mean value of the tensor.
+        Note that it requires length.
+		It would be better if we can implement both versions 
+		- no length argument -> automatically use the length of tensor
+		- with length argument -> use the provided length
+   }]; 
+
+   let arguments = (ins F64Tensor:$input, F64Tensor:$length);
+   let results = (outs F64Tensor);
+   
+   let builders = [
+     OpBuilder<(ins "Value":$input, "Value":$length)>
+   ];
+
+   // Indicate that the operation has a custom parser and printer method.
+   // let hasCustomAssemblyFormat = 1;
+
+   // let hasVerifier = 1;
+   let hasCanonicalizer = 1;
+ }
+ 
+
+
+//===----------------------------------------------------------------------===//
+// DiffOp
+//===----------------------------------------------------------------------===//
+
+def DiffOp : DSP_Op<"diff", [Pure , DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+   let summary = "np.diff (out[i] = a[i+1] - a[i]). It receives second argument as length";
+   let description = [{
+		This operation returns a tensor that contains diff (out[i] = a[i+1] - a[i]).
+		The length of the output tensor is len(input)-1, regardless of length parameter.
+		Note that it requires length.
+		It would be better if we can implement both versions 
+		- no length argument -> automatically use the length of tensor
+		- with length argument -> use the provided length
+   }]; 
+
+   let arguments = (ins F64Tensor:$input, F64Tensor:$length);
+   let results = (outs F64Tensor);
+   
+   let builders = [
+     OpBuilder<(ins "Value":$input, "Value":$length)>
+   ];
+
+   // Indicate that the operation has a custom parser and printer method.
+   // let hasCustomAssemblyFormat = 1;
+
+   // let hasVerifier = 1;
+ }
+ 
+//===----------------------------------------------------------------------===//
+// AbsOp
+//===----------------------------------------------------------------------===//
+
+def AbsOp : DSP_Op<"abs", [Pure , DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+   let summary = "np.abs -> calculate the absolute value element-wise";
+   let description = [{
+       This operation calculates the absolute value element-wise.
+   }]; 
+
+   let arguments = (ins F64Tensor:$input);
+   let results = (outs F64Tensor:$output);
+   
+   let builders = [
+     OpBuilder<(ins "Value":$input)>
+   ];
+ }
+
+
+//===----------------------------------------------------------------------===//
+// ArgMaxOp
+//===----------------------------------------------------------------------===//
+
+def ArgMaxOp : DSP_Op<"argmax", [Pure , DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+   let summary = "np.argmax -> find the indices of the maximum values along a specifies axis in an array.";
+   let description = [{
+       This operation find the indices of the maximum values along a specifies axis in an array.
+   }]; 
+
+   let arguments = (ins F64Tensor:$input, I64Attr:$axis);
+   let results = (outs F64Tensor:$output);
+   
+   let builders = [
+     OpBuilder<(ins "Value":$input, "int64_t":$axis)>
+   ];
+ }
+//===----------------------------------------------------------------------===//
+// NormalizeOp
+//===----------------------------------------------------------------------===//
+
+def NormalizeOp : DSP_Op<"normalize", [Pure , DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+   let summary = "normalize operation.";
+   let description = [{
+       normalization dsp operation.
+   }]; 
+
+   let arguments = (ins F64Tensor:$signal);
+   let results = (outs F64Tensor);
+   
+   let builders = [
+     OpBuilder<(ins "Value":$signal)>
+   ];
+
+    let hasCanonicalizer = 1;
+ }
+
+//===----------------------------------------------------------------------===//
+// NormLMSFilterResponseOptimizeOp
+//===----------------------------------------------------------------------===//
+
+
+def NormLMSFilterResponseOptimizeOp : DSP_Op<"norm_LMSFilterResponse_opt",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "LMS filter Response + norm optimize";
+  let description = [{
+      norm + lmsfilter
+  }];
+
+  let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs, F64Tensor:$mu, F64Tensor:$filterLen);
+
+   let results = (outs F64Tensor);
+
+   let builders = [
+     OpBuilder<(ins "Value":$lhs, "Value":$rhs, "Value":$mu, "Value":$filterLen)>
+   ];
+
+   let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Median2SlidingOptimizedOp
+//===----------------------------------------------------------------------===//
+
+def Median2SlidingOptimizedOp : DSP_Op<"median2slidingOp", 
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  
+  let arguments = (ins F64Tensor:$input);
+  let results = (outs F64Tensor);
+
+  let builders = [
+    OpBuilder<(ins "Value":$input)>
+  ];
+}
+
+
+//===----------------------------------------------------------------------===//
+// FIRFilterResSymmThresholdUpOptimizedOp
+//===----------------------------------------------------------------------===//
+def FIRFilterResSymmThresholdUpOptimizedOp : DSP_Op<"FIRFilterResSymmThresholdUpOptimizedOp" ,
+    [Pure , DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "FIRFilterResSymmThresholdUpOptimizedOp";
+  let description = [{
+    The "FIRFilterResSymmThresholdUpOptimizedOp" operation is basically the convolution of input 1-D and filter vector when the filter is symmetrical ie,
+    h[0] = h[L-1] , h[1] = h[L-2] .. h[middle ie, (L-1)/2] is single element & filter length is odd always
+    ex: x[n] = [2,1,3,2,4], h[l] = [1, -1 ,1]  then
+      y[n] = sum(h(k) . x(n-k)) k=0 to N-1 can be rewritten as 
+      y[n] = sum(h[k] .{ x[n-k] + x[n-(L-1-k)]}) + h[L-1/2].x[n-(L-1)/2] , k=0 to L-1/2
+
+      Basically, we are trying to reduce the number of load/store operations by half -- so that we can reduce the operations
+  }];
+
+  let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs, F64Tensor:$threshold, F64Tensor:$returnoriginal); 
+  let results =  (outs F64Tensor);
+
+  let builders = [
+    OpBuilder<(ins "Value":$lhs, "Value":$rhs, "Value":$threshold, "Value":$returnoriginal)>
+  ];
+
+}
+
+//===----------------------------------------------------------------------===//
+// FFTOp
+//===----------------------------------------------------------------------===//
+
+def FFTOp : DSP_Op<"fft", [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "Performs FFT Operation on the input";
+  let description = [{
+      This function accepts a 1D input array of size 2^n and returns the real part of its Fourier transform, 
+      producing an output array of the same size. The function is designed to work exclusively with input sizes that are powers of 2. 
+      Providing an array of any other size will result in a segmentation fault.
+  }];
+
+  let arguments = (ins F64Tensor:$lhs);
+  let results = (outs F64Tensor:$real, F64Tensor:$imag);
+
+  let builders = [
+    OpBuilder<(ins "Value":$lhs)>
+  ]; 
+
+  // let hasCanonicalizer = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// FFTAbsOp
+//===----------------------------------------------------------------------===//
+
+def FFTAbsOp : DSP_Op<"FFTAbsOp", [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "FFTAbsOp";
+  let description = [{
+      ------------------
+  }];
+
+  let arguments = (ins F64Tensor:$input);
+  let results = (outs F64Tensor:$amplitude);
+
+  let builders = [
+    OpBuilder<(ins "Value":$input)>
+  ]; 
+}
+
+//===----------------------------------------------------------------------===//
+// DFTAbsOp
+//===----------------------------------------------------------------------===//
+
+def DFTAbsOp : DSP_Op<"DFTAbsOp", [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "DFTAbsOp";
+  let description = [{
+      ------------------
+  }];
+
+  let arguments = (ins F64Tensor:$input);
+  let results = (outs F64Tensor:$amplitude);
+
+  let builders = [
+    OpBuilder<(ins "Value":$input)>
+  ]; 
+}
+
+//===----------------------------------------------------------------------===//
+// DFTAbsThresholdUpOp
+//===----------------------------------------------------------------------===//
+
+def DFTAbsThresholdUpOp : DSP_Op<"DFTAbsThresholdUpOp", [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "DFTAbsThresholdUpOp";
+  let description = [{
+      ------------------
+  }];
+
+  let arguments = (ins F64Tensor:$input, F64Tensor:$threshold, F64Tensor:$returnoriginal);
+  let results = (outs F64Tensor:$amplitude);
+
+  let builders = [
+    OpBuilder<(ins "Value":$input, "Value":$threshold, "Value":$returnoriginal)>
+  ]; 
+  let hasVerifier = 1;
+}
+
+
+//===----------------------------------------------------------------------===//
+// CorrelateOp
+//===----------------------------------------------------------------------===//
+
+def CorrelateOp : DSP_Op<"correlate", [Pure , DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+   let summary = "This implements scp.correlate. Right now we assume size of lhs == size of rhs";
+   let description = [{
+		This operation finds and returns the maximum value of the tensor.
+   }]; 
+
+   let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs);
+   let results = (outs F64Tensor);
+
+   // Indicate that the operation has a custom parser and printer method.
+   // let hasCustomAssemblyFormat = 1;
+   // let assemblyFormat = [{
+   //   `(` $input `:` type($input1 , $input2) `)` attr-dict `to` type(results)
+   // }];
+   // Allow building a MulOp with from the two input operands.
+   
+   let builders = [
+     OpBuilder<(ins "Value":$lhs, "Value":$rhs)>
+   ];
+
+   // let hasVerifier = 1;
+ }
+ 
+ 
+//===----------------------------------------------------------------------===//
+// SetSingleElemAtIdxOp
+//===----------------------------------------------------------------------===//
+
+def SetSingleElemAtIdxOp : DSP_Op<"setSingleElemAtIndx",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "This one access ranked tensor at idx and sets signle tensor with or without dimension.";
+
+  let arguments = (ins F64Tensor:$input, F64Tensor:$indx , F64Tensor:$val);
+  let results = (outs F64Tensor);
+
+  let builders = [
+    OpBuilder<(ins "Value":$input, "Value":$indx, "Value":$val)>
+  ];
+
+  // Indicate that additional verification for this operation is necessary.
+  //let hasVerifier = 1;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Correl2MaxOptimizedOp
+//===----------------------------------------------------------------------===//
+
+def Correl2MaxOptimizedOp : DSP_Op<"correl2max", [Pure , DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+   let summary = "Find the index of maximum value in tensor. outputs with floating point-converted index";
+   let description = [{
+		This operation fuses loops for correlate and max.
+   }]; 
+
+   let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs);
+   let results = (outs F64Tensor);
+   
+   let builders = [
+     OpBuilder<(ins "Value":$lhs, "Value":$rhs)>
+   ];
+ }
+
+//===----------------------------------------------------------------------===//
+// LMSFilterResponse2GainOp               
+//===----------------------------------------------------------------------===//
+                                     
+def LMSFilterResponse2GainOp : DSP_Op<"lmsFilterResponse2gain",
+    [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
+  let summary = "Fusing loop for LMSFilterResponseOp and GainOp";
+                                        
+  let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs, F64Tensor:$mu, F64Tensor:$filterLen, F64Tensor:$gain);
+  let results = (outs F64Tensor);
+                                                                            
+  let builders = [           
+    OpBuilder<(ins "Value":$lhs, "Value":$rhs, "Value":$mu, "Value":$filterLen, "Value":$gain)>
+  ];               
+}                                  
+
+#endif // DSP
+
+
+
diff --git a/mlir/include/mlir/Dialect/DSP/IR/DSPDialect.h b/mlir/include/mlir/Dialect/DSP/IR/DSPDialect.h
new file mode 100644
index 000000000000..f8e86c07f020
--- /dev/null
+++ b/mlir/include/mlir/Dialect/DSP/IR/DSPDialect.h
@@ -0,0 +1,59 @@
+//===- Dialect.h - Dialect definition for the DSP IR ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the IR Dialect for the DSP language.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_DSP_IR_DSPDIALECT_H_
+#define MLIR_DIALECT_DSP_IR_DSPDIALECT_H_
+
+#include "mlir/Bytecode/BytecodeOpInterface.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/Dialect.h"
+#include "mlir/IR/OpDefinition.h"
+#include "mlir/IR/SymbolTable.h"
+#include "mlir/Interfaces/CallInterfaces.h"
+#include "mlir/Interfaces/CastInterfaces.h"
+#include "mlir/Interfaces/FunctionInterfaces.h"
+#include "mlir/Interfaces/ShapeInferenceOpInterfaces.h"
+#include "mlir/Interfaces/SideEffectInterfaces.h"
+
+
+
+
+#include "mlir/Dialect/DSP/IR/DSPDialect.h.inc"
+
+// #include "mlir/Dialect/DSP/IR/DSPEnums.h.inc"
+
+/// Include the auto-generated header file containing the declarations of the
+/// DSP operations.
+//#define GET_OP_CLASSES
+//#include "mlir/Dialect/DSP/IR/DSP.h.inc"
+
+/// Include the auto-generated header file containing the declaration of the toy
+/// dialect.
+//#include "mlir/Dialect/DSP/IR/DSP_Dialect.h.inc"
+
+/// Include the auto-generated header file containing the declarations of the
+/// toy operations.
+#define GET_OP_CLASSES
+#include "mlir/Dialect/DSP/IR/DSP.h.inc"
+//#include "mlir/Dialect/DSP/IR/DSP_Ops.h.inc"
+
+
+namespace mlir{
+	
+void registerDSPOptions();
+bool getEnableCanonicalOpt();
+
+} //namespace
+
+
+
+#endif // MLIR_DIALECT_DSP_IR_DSPDIALECT_H_
diff --git a/mlir/include/mlir/Dialect/DSP/Pipelines/Passes.h b/mlir/include/mlir/Dialect/DSP/Pipelines/Passes.h
new file mode 100644
index 000000000000..0e5db671dc86
--- /dev/null
+++ b/mlir/include/mlir/Dialect/DSP/Pipelines/Passes.h
@@ -0,0 +1,62 @@
+//===- Passes.h - DSP pipeline entry points -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file defines prototypes of all DSP pipelines.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_DSP_PIPELINES_PASSES_H
+#define MLIR_DIALECT_DSP_PIPELINES_PASSES_H
+
+#include "mlir/Dialect/DSP/IR/DSPDialect.h"
+#include "mlir/Pass/PassOptions.h"
+
+namespace mlir {
+class Pass;
+
+namespace dsp {
+
+/// Options for the dsp pipeline.
+struct DSPPipelineOptions
+    : public PassPipelineOptions<DSPPipelineOptions> {
+  /* This is from Bufferization dialect.
+  PassOptions::Option<bool> privateFunctionDynamicOwnership{
+      *this, "private-function-dynamic-ownership",
+      llvm::cl::desc(
+          "Allows to add additional results to private functions to return "
+          "ownership of returned memrefs to callers. This can avoid spurious "
+          "buffer clones in the callee."),
+      llvm::cl::init(false)};
+
+  /// Implicit conversion to `DeallocationOptions`.
+  operator DeallocationOptions() const {
+    DeallocationOptions options;
+    options.privateFuncDynamicOwnership = privateFunctionDynamicOwnership;
+    return options;
+  }
+  */
+};
+
+//===----------------------------------------------------------------------===//
+// Building and Registering.
+//===----------------------------------------------------------------------===//
+
+/// Adds the dsp pipeline to the `OpPassManager`.
+void buildDSPPipeline(
+    OpPassManager &pm, const DSPPipelineOptions &options);
+
+/// Registers all pipelines for the `dsp` dialect. Currently,
+/// this includes only the "dsp-shapeinference".
+void registerDSPPipelines();
+
+std::unique_ptr<Pass> createShapeInferencePass();
+
+} // namespace dsp
+} // namespace mlir
+
+#endif // MLIR_DIALECT_BUFFERIZATION_PIPELINES_PASSES_H
diff --git a/mlir/include/mlir/Dialect/DSP/Transforms/CMakeLists.txt b/mlir/include/mlir/Dialect/DSP/Transforms/CMakeLists.txt
new file mode 100644
index 000000000000..1279ef94c617
--- /dev/null
+++ b/mlir/include/mlir/Dialect/DSP/Transforms/CMakeLists.txt
@@ -0,0 +1,5 @@
+set(LLVM_TARGET_DEFINITIONS Passes.td)
+mlir_tablegen(Passes.h.inc -gen-pass-decls -name DSP)
+add_public_tablegen_target(MLIRDSPTransformsIncGen)
+add_dependencies(mlir-headers MLIRDSPTransformsIncGen)
+add_mlir_doc(Passes DSPPasses ./ -gen-pass-doc)
diff --git a/mlir/include/mlir/Dialect/DSP/Transforms/Passes.h b/mlir/include/mlir/Dialect/DSP/Transforms/Passes.h
new file mode 100644
index 000000000000..abffb5e800a7
--- /dev/null
+++ b/mlir/include/mlir/Dialect/DSP/Transforms/Passes.h
@@ -0,0 +1,27 @@
+//===- Passes.h - Pass Entrypoints ------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef MLIR_DIALECT_DSP_TRANSFORMS_PASSES_H_
+#define MLIR_DIALECT_DSP_TRANSFORMS_PASSES_H_
+
+#include "mlir/Pass/Pass.h"
+
+namespace mlir {
+class ConversionTarget;
+namespace dsp {
+
+#define GEN_PASS_DECL_DSPINFERSHAPES
+#define GEN_PASS_REGISTRATION
+#include "mlir/Dialect/DSP/Transforms/Passes.h.inc"
+
+
+
+// void createDSPInferShapesPass();
+} // namespace dsp
+} // namespace mlir
+
+#endif // MLIR_DIALECT_DSP_TRANSFORMS_PASSES_H_
diff --git a/mlir/include/mlir/Dialect/DSP/Transforms/Passes.td b/mlir/include/mlir/Dialect/DSP/Transforms/Passes.td
new file mode 100644
index 000000000000..bf57beb16932
--- /dev/null
+++ b/mlir/include/mlir/Dialect/DSP/Transforms/Passes.td
@@ -0,0 +1,30 @@
+//===-- Passes.td - DSP pass definition file ------------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_DSP_TRANSFORMS_PASSES_TD
+#define MLIR_DIALECT_DSP_TRANSFORMS_PASSES_TD
+
+include "mlir/Pass/PassBase.td"
+
+// def DSPInferShapes : Pass<"dsp-infer-shapes", "func::FuncOp"> {
+//   let summary = "Propagate shapes across DSP operations";
+//   let description = [{
+//     Pass that uses operand types and propagates shapes to DSP operations.
+//     This includes legalizing rankless and dynamic shapes towards static.
+//   }];
+
+//   // let constructor = "createDSPInferShapesPass()";
+//   let dependentDialects = [
+//     "func::FuncDialect",
+//     "tensor::TensorDialect",
+//     "dsp::DSPDialect",
+//   ];
+// }
+
+#endif // MLIR_DIALECT_DSP_TRANSFORMS_PASSES_TD
+
diff --git a/mlir/include/mlir/InitAllDialects.h b/mlir/include/mlir/InitAllDialects.h
index d9db21073e15..fc4dad336b66 100644
--- a/mlir/include/mlir/InitAllDialects.h
+++ b/mlir/include/mlir/InitAllDialects.h
@@ -97,7 +97,7 @@
 #include "mlir/Target/LLVM/NVVM/Target.h"
 #include "mlir/Target/LLVM/ROCDL/Target.h"
 #include "mlir/Target/SPIRV/Target.h"
-
+#include "mlir/Dialect/DSP/IR/DSPDialect.h"
 namespace mlir {
 
 /// Add all the MLIR dialects to the provided registry.
@@ -116,6 +116,7 @@ inline void registerAllDialects(DialectRegistry &registry) {
                   cf::ControlFlowDialect,
                   complex::ComplexDialect,
                   DLTIDialect,
+                  dsp::DSPDialect,
                   emitc::EmitCDialect,
                   func::FuncDialect,
                   gpu::GPUDialect,
@@ -146,7 +147,8 @@ inline void registerAllDialects(DialectRegistry &registry) {
                   ub::UBDialect,
                   vector::VectorDialect,
                   x86vector::X86VectorDialect,
-                  xegpu::XeGPUDialect>();
+                  xegpu::XeGPUDialect
+                  >();
   // clang-format on
 
   // Register all external models.
diff --git a/mlir/include/mlir/InitAllPasses.h b/mlir/include/mlir/InitAllPasses.h
index 90406f555b0f..890e2f546f72 100644
--- a/mlir/include/mlir/InitAllPasses.h
+++ b/mlir/include/mlir/InitAllPasses.h
@@ -46,6 +46,8 @@
 #include "mlir/Dialect/Transform/Transforms/Passes.h"
 #include "mlir/Dialect/Vector/Transforms/Passes.h"
 #include "mlir/Dialect/XeGPU/Transforms/Passes.h"
+#include "mlir/Dialect/DSP/Transforms/Passes.h"
+#include "mlir/Dialect/DSP/Pipelines/Passes.h"
 #include "mlir/Transforms/Passes.h"
 
 #include <cstdlib>
@@ -94,6 +96,7 @@ inline void registerAllPasses() {
   arm_sve::registerArmSVEPasses();
   emitc::registerEmitCPasses();
   xegpu::registerXeGPUPasses();
+  dsp::registerDSPPasses();
 
   // Dialect pipelines
   bufferization::registerBufferizationPipelines();
@@ -102,6 +105,7 @@ inline void registerAllPasses() {
 #if MLIR_ENABLE_CUDA_CONVERSIONS
   gpu::registerGPUToNVVMPipeline();
 #endif
+  dsp::registerDSPPipelines();
 }
 
 } // namespace mlir
diff --git a/mlir/include/mlir/Interfaces/CMakeLists.txt b/mlir/include/mlir/Interfaces/CMakeLists.txt
index d81298bb4daf..65dc0ffb30cf 100644
--- a/mlir/include/mlir/Interfaces/CMakeLists.txt
+++ b/mlir/include/mlir/Interfaces/CMakeLists.txt
@@ -17,6 +17,17 @@ add_mlir_interface(TilingInterface)
 add_mlir_interface(ValueBoundsOpInterface)
 add_mlir_interface(VectorInterfaces)
 add_mlir_interface(ViewLikeInterface)
+#add_mlir_interface(ShapeInferenceOpInterfaces)
+
+set(LLVM_TARGET_DEFINITIONS ShapeInferenceOpInterfaces.td)
+mlir_tablegen(ShapeInferenceOpInterfaces.h.inc -gen-op-interface-decls)
+mlir_tablegen(ShapeInferenceOpInterfaces.cpp.inc -gen-op-interface-defs)
+add_public_tablegen_target(MLIRShapeInferenceOpInterfacesIncGen)
+
+#set(LLVM_TARGET_DEFINITIONS ShapeInferenceOpInterfaces.td)
+#mlir_tablegen(ShapeInferenceOpInterfaces.h.inc -gen-op-interface-decls)
+#mlir_tablegen(ShapeInferenceOpInterfaces.cpp.inc -gen-op-interface-defs)
+#add_public_tablegen_target(MLIRShapeInferenceOpInterfacesIncGen)
 
 set(LLVM_TARGET_DEFINITIONS MemorySlotInterfaces.td)
 mlir_tablegen(MemorySlotOpInterfaces.h.inc -gen-op-interface-decls)
diff --git a/mlir/include/mlir/Interfaces/ShapeInferenceOpInterfaces.h b/mlir/include/mlir/Interfaces/ShapeInferenceOpInterfaces.h
new file mode 100644
index 000000000000..1676d65ee595
--- /dev/null
+++ b/mlir/include/mlir/Interfaces/ShapeInferenceOpInterfaces.h
@@ -0,0 +1,30 @@
+//===- ShapedOpInterfaces.h - Interfaces for Shaped Ops ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a set of interfaces for ops that operate on shaped values.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_INTERFACES_SHAPEINFERENCEOPINTERFACES_H_
+#define MLIR_INTERFACES_SHAPEINFERENCEOPINTERFACES_H_
+
+#include "mlir/IR/OpDefinition.h"
+
+//namespace mlir {
+//namespace dsp {
+
+/// Verify invariants of ops that implement the ShapedDimOpInterface.
+// LogicalResult verifyShapedDimOpInterface(Operation *op);
+#include "mlir/Interfaces/ShapeInferenceOpInterfaces.h.inc"
+//} // namespace detail
+//} // namespace mlir
+
+/// Include the generated interface declarations.
+// #include "mlir/Interfaces/ShapeInferenceOpInterfaces.h.inc"
+
+#endif // MLIR_INTERFACES_SHAPEDOPINTERFACES_H_
diff --git a/mlir/include/mlir/Interfaces/ShapeInferenceOpInterfaces.td b/mlir/include/mlir/Interfaces/ShapeInferenceOpInterfaces.td
new file mode 100644
index 000000000000..a65f3e992992
--- /dev/null
+++ b/mlir/include/mlir/Interfaces/ShapeInferenceOpInterfaces.td
@@ -0,0 +1,70 @@
+//===-- ShapedOpInterfaces.td - Interfaces for Shaped Ops --*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a set of interfaces for ops that operate on shaped values.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_INTERFACES_SHAPEINFERENCEOPINTERFACES
+#define MLIR_INTERFACES_SHAPEINFERENCEOPINTERFACES
+
+include "mlir/IR/OpBase.td"
+
+//===----------------------------------------------------------------------===//
+// ShapedDimOpInterface
+//===----------------------------------------------------------------------===//
+
+// Ops that return the dimension of a shaped value.
+// def ShapeInferenceOpInterface : OpInterface<"ShapeInterface"> {
+//   let description = [{
+//     An interface for ops that return the dimension of a shaped value (such as a
+//     tensor or a memref).  It provides access to the source shaped value and to
+//     the dimension.
+//   }];
+//   let cppNamespace = "::mlir";
+
+//   let methods = [
+//     InterfaceMethod<
+//       /*desc=*/[{
+//         Return the shaped value operand. This is the value that the dimension
+//         is taken from.
+//       }],
+//       /*retTy=*/"::mlir::Value",
+//       /*methodName=*/"getShapeValue",
+//       /*args=*/(ins)
+//     >,
+//     InterfaceMethod<
+//       /*desc=*/[{
+//         Return the dimension operand. This can be a constant or an SSA value.
+//       }],
+//       /*retTy=*/"::mlir::OpFoldResult",
+//       /*methodName=*/"getDimension",
+//       /*args=*/(ins)
+//     >
+//   ];
+
+//   let verify = [{
+//     return verifyShapedDimOpInterface($_op);
+//   }];
+// }
+
+
+def ShapeInferenceOpInterface : OpInterface<"ShapeInference"> {
+  let description = [{
+    Interface to access a registered method to infer the return types for an
+    operation that can be used during type inference.
+  }];
+  let cppNamespace = "::mlir::dsp";
+
+  let methods = [
+    InterfaceMethod<"Infer and set the output shape for the current operation.",
+                    "void", "inferShapes">
+  ];
+}
+
+#endif // MLIR_INTERFACES_SHAPEDOPINTERFACES
diff --git a/mlir/include/mlir/Tools/mlir-opt/MlirOptMain.h b/mlir/include/mlir/Tools/mlir-opt/MlirOptMain.h
index 4f7f83cdb473..2dd91bd51da8 100644
--- a/mlir/include/mlir/Tools/mlir-opt/MlirOptMain.h
+++ b/mlir/include/mlir/Tools/mlir-opt/MlirOptMain.h
@@ -246,6 +246,8 @@ class MlirOptMainConfig {
   /// Verify that the input IR round-trips perfectly.
   bool verifyRoundtripFlag = false;
 
+  
+
   /// The reproducer output filename (no crash required).
   std::string generateReproducerFileFlag = "";
 };
diff --git a/mlir/lib/Conversion/CMakeLists.txt b/mlir/lib/Conversion/CMakeLists.txt
index 41ab7046b91c..427decdd4d75 100644
--- a/mlir/lib/Conversion/CMakeLists.txt
+++ b/mlir/lib/Conversion/CMakeLists.txt
@@ -65,3 +65,4 @@ add_subdirectory(VectorToGPU)
 add_subdirectory(VectorToLLVM)
 add_subdirectory(VectorToSCF)
 add_subdirectory(VectorToSPIRV)
+add_subdirectory(DSPToAffine)
diff --git a/mlir/lib/Conversion/DSPToAffine/CMakeLists.txt b/mlir/lib/Conversion/DSPToAffine/CMakeLists.txt
new file mode 100644
index 000000000000..db8becf5fb55
--- /dev/null
+++ b/mlir/lib/Conversion/DSPToAffine/CMakeLists.txt
@@ -0,0 +1,25 @@
+add_mlir_conversion_library(MLIRDSPToAffine
+  DSPToAffine.cpp
+  DSPToAffinePass.cpp
+
+  ADDITIONAL_HEADER_DIRS
+  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Conversion/DSPToAffine
+  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/DSP
+  ${MLIR_MAIN_INCLUDE_DIR}/mlir/IR
+
+  DEPENDS
+  MLIRConversionPassIncGen
+
+  LINK_LIBS PUBLIC
+  MLIRArithDialect
+  MLIRAffineDialect
+  MLIRIR
+  MLIRPass
+  MLIRDSPDialect
+  #MLIRDSPTransforms
+  MLIRSupport
+  MLIRLinalgDialect
+  MLIRLinalgTransforms
+  MLIRTransformUtils
+  MLIRTensorDialect
+  )
diff --git a/mlir/lib/Conversion/DSPToAffine/DSPToAffine.cpp b/mlir/lib/Conversion/DSPToAffine/DSPToAffine.cpp
new file mode 100644
index 000000000000..4098505cfd0c
--- /dev/null
+++ b/mlir/lib/Conversion/DSPToAffine/DSPToAffine.cpp
@@ -0,0 +1,12252 @@
+//====- LowerToAffineLoops.cpp - Partial lowering from Toy to Affine+Std --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a partial lowering of Toy operations to a combination of
+// affine loops, memref operations and standard operations. This lowering
+// expects that all calls have been inlined, and all shapes have been resolved.
+//
+//===----------------------------------------------------------------------===//
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wall"
+
+#include "mlir/Conversion/DSPToAffine/DSPToAffine.h"
+
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/Linalg/IR/Linalg.h"
+#include "mlir/Dialect/Math/IR/Math.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
+#include "mlir/Dialect/DSP/IR/DSPDialect.h"
+#include "mlir/IR/BuiltinAttributes.h"
+#include "mlir/IR/BuiltinDialect.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/Diagnostics.h"
+#include "mlir/IR/DialectRegistry.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/IR/ValueRange.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Support/LLVM.h"
+#include "mlir/Support/LogicalResult.h"
+#include "mlir/Support/TypeID.h"
+#include "mlir/Transforms/DialectConversion.h"
+//#include "toy/DebugConfig.h"
+//#include "toy/Dialect.h"
+//#include "toy/Passes.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Sequence.h"
+#include "llvm/Support/Casting.h"
+#include <algorithm>
+#include <cstdint>
+#include <functional>
+#include <memory>
+#include <utility>
+
+// For IntegerSet
+#include "mlir/Dialect/SCF/IR/SCF.h"
+#include "mlir/IR/IntegerSet.h"
+
+#include "mlir/Dialect/LLVMIR/LLVMAttrs.h"
+#include "mlir/Dialect/LLVMIR/LLVMTypes.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+
+#include <iostream>
+//#include "DSPToAffine.h"
+
+using namespace mlir;
+using namespace std;
+using namespace affine;
+using namespace dsp;
+using namespace mlir::dsp;
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns
+//===----------------------------------------------------------------------===//
+
+// #pragma warning(push, 0)
+/// Convert the given RankedTensorType into the corresponding MemRefType.
+static MemRefType convertTensorToMemRef(RankedTensorType type) {
+  return MemRefType::get(type.getShape(), type.getElementType());
+}
+
+/// Insert an allocation and deallocation for the given MemRefType.
+static Value insertAllocAndDealloc(MemRefType type, Location loc,
+                                   PatternRewriter &rewriter) {
+  auto alloc = rewriter.create<memref::AllocOp>(loc, type);
+
+  // Make sure to allocate at the beginning of the block.
+  auto *parentBlock = alloc->getBlock();
+  alloc->moveBefore(
+      &parentBlock->front()); // Abhinav-- move allock->block->front before
+                              // alloc operation??
+
+  // Make sure to deallocate this alloc at the end of the block. This is fine
+  // as dsp functions have no control flow.
+  auto dealloc = rewriter.create<memref::DeallocOp>(loc, alloc);
+  dealloc->moveBefore(
+      &parentBlock->back()); // move alloc->block->back before dealloc
+  return alloc;
+}
+
+/// This defines the function type used to process an iteration of a lowered
+/// loop. It takes as input an OpBuilder, an range of memRefOperands
+/// corresponding to the operands of the input operation, and the range of loop
+/// induction variables for the iteration. It returns a value to store at the
+/// current index of the iteration.
+using LoopIterationFn = function_ref<Value(
+    OpBuilder &rewriter, ValueRange memRefOperands, ValueRange loopIvs)>;
+
+static void lowerOpToLoops(Operation *op, ValueRange operands,
+                           PatternRewriter &rewriter,
+                           LoopIterationFn processIteration) {
+  auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+  // for (auto i : tensorType.getShape())
+  // {
+  //    llvm::errs() << "tensortype =" << i << "\n" ;
+  // }
+  // llvm::errs() << "tensortype.getElementType =" <<
+  // tensorType.getElementType() << "\n" ; llvm::errs() << "op->getLoc = " <<
+  // op->getLoc() << "\n"; //getDialect llvm::errs() << "op->getDialect = " <<
+  // op->getDialect() << "\n"; llvm::errs() << "op->getName = " << op->getName()
+  // << "\n";
+  // // llvm::errs() << "op->getType = " << op->getType() << "\n";
+  // llvm::errs() << "op->getParentRegion = " << op->getParentRegion() << "\n";
+  // llvm::errs() << "op->getParentOp = " << op->getParentOp()->getName() <<
+  // "\n";
+
+  // llvm::errs() << "op->getNumOperands = " << op->getNumOperands() << "\n";
+  // for (auto i : op->getOperands())
+  // {
+  //   llvm::errs() << "op->Operand = " << i << "\n";
+  // }
+
+  // llvm::errs() << "op->getParentOp = " << op->getParentOp()->getName() <<
+  // "\n"; llvm::errs() << "op->getParentOp = " << op->getParentOp()->getName()
+  // << "\n"; llvm::errs() << "op->getParentOp = " <<
+  // op->getParentOp()->getName() << "\n";
+
+  auto loc = op->getLoc();
+
+  // Insert an allocation and deallocation for the result of this operation.
+  auto memRefType = convertTensorToMemRef(tensorType);
+  auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+  // Create a nest of affine loops, with one loop per dimension of the shape.
+  // The buildAffineLoopNest function takes a callback that is used to construct
+  // the body of the innermost loop given a builder, a location and a range of
+  // loop induction variables.
+  SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value=*/0);
+  SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
+
+  // llvm::errs() << "tensorType.getRank() " << tensorType.getRank() << "\n";
+  // cout << "tensorType.getRank() .. " << tensorType.getRank() << "\n";
+  // for (auto i : tensorType.getRank())
+  // {
+  //   llvm::errs() << "tensorType.getRank() = " << i << "\n";
+  // }
+  // for (auto i : tensorType.getShape())
+  // {
+  //   llvm::errs() << "tensorType.getShape() = " << i << "\n";
+  // }
+  // llvm::errs() << "tensorType.getShape() " << tensorType.getShape() << "\n";
+  affine::buildAffineLoopNest(
+      rewriter, loc, lowerBounds, tensorType.getShape(), steps,
+      [&](OpBuilder &nestedBuilder, Location loc, ValueRange ivs) {
+        // Call the processing function with the rewriter, the memref operands,
+        // and the loop induction variables. This function will return the value
+        // to store at the current index.
+        Value valueToStore = processIteration(nestedBuilder, operands, ivs);
+        nestedBuilder.create<affine::AffineStoreOp>(loc, valueToStore, alloc,
+                                                    ivs);
+      });
+
+  // Replace this operation with the generated alloc.
+  rewriter.replaceOp(op, alloc);
+}
+
+#define TryJustAffineLoop 0       // working
+#define TryAffineForAndAffineIf 0 // working
+#define TryAffineIf2 0
+#define TryAffineMap 0    // working basic -- TO do --try with symbols
+#define TrySumOfVector 0  // Working
+#define TryMultiDimLoop 0 // Working
+#define TryFIRFilter 1
+#define TryMultiDimForAndIf 0         //
+#define TryMultiDimLoopAndAffineMap 0 // Working
+#define TryMultiDimLoopAndAffineSet 0 // Working
+static void lowerOpToLoopsFIR(Operation *op, ValueRange operands,
+                              PatternRewriter &rewriter,
+                              LoopIterationFn processIteration) {
+  auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+  auto loc = op->getLoc();
+
+  // Insert an allocation and deallocation for the result of this operation.
+  auto memRefType = convertTensorToMemRef(tensorType);
+  auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+  // Create a nest of affine loops, with one loop per dimension of the shape.
+  // The buildAffineLoopNest function takes a callback that is used to construct
+  // the body of the innermost loop given a builder, a location and a range of
+  // loop induction variables.
+  SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value=*/0);
+  SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
+
+  // llvm::errs() << "tensorType.getRank() " << tensorType.getRank() << "\n";
+  // cout << "tensorType.getRank() .. " << tensorType.getRank() << "\n";
+  // for (auto i : tensorType.getRank())
+  // {
+  //   llvm::errs() << "tensorType.getRank() = " << i << "\n";
+  // }
+  // for (auto i : tensorType.getShape())
+  // {
+  //   llvm::errs() << "tensorType.getShape() = " << i << "\n";
+  // }
+  // llvm::errs() << "tensorType.getShape() " << tensorType.getShape() << "\n";
+
+  // affine::AffineForOp forOp = rewriter.create<affine::AffineForOp>(
+  //   loc, lowerBounds, tensorType.getShape() , steps, ValueRange());
+  // mlir::IntegerSet set1 = mlir::IntegerSet::get(1, 0, map, {true});
+
+  // create an affineFor
+  //  affineFor It has one region containing its body & the region must contain
+  //  a block terminating with affine.yield
+  // block has argument of index type
+  //
+
+#if TryJustAffineLoop
+  int64_t lb = 0;
+  int64_t ub = tensorType.getShape()[0];
+  int64_t step = 1;
+
+  // create AffineMap and set
+  //  %1 = affine.load
+  //   if ( %arg0 >= 5)   ie, integerSet <(d0) : (d0 - 5 >= 0) >
+  AffineExpr dimExpr =
+      rewriter.getAffineDimExpr(0) - rewriter.getAffineConstantExpr(5);
+  // AffineMap map = AffineMap::get(1, 0, dimExpr);
+  // AffineMap map = AffineMap::get(1, 0 , rewriter.getAffineDimExpr(0) - 5);
+  IntegerSet set1 = IntegerSet::get(1, 0, {dimExpr}, {false});
+  affine::AffineForOp forOp1 =
+      rewriter.create<affine::AffineForOp>(loc, lb, ub, step);
+
+  // inside the forOp body --> create the operations & then close the body
+  //  OpBuilder::InsertionGuard guard(rewriter);
+  rewriter.setInsertionPointToStart(forOp1.getBody());
+
+  // start adding operations like a arith::constant = 100.0 to the body of
+  // forOp1
+  //  Inside the loop body:
+
+  Value constant15 = rewriter.create<arith::ConstantOp>(
+      loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(15));
+
+  llvm::errs() << "LINE = " << __LINE__ << "\n";
+  auto storeOp = rewriter.create<affine::AffineStoreOp>(
+      loc, constant15, alloc, forOp1.getInductionVar());
+
+#endif
+
+#if TryAffineForAndAffineIf
+  int64_t lb = 0;
+  int64_t ub = tensorType.getShape()[0];
+  int64_t step = 1;
+
+  // create AffineMap and set
+  //  %1 = affine.load
+  //   if ( %arg0 >= 5)   ie, integerSet <(d0) : (d0 - 5 >= 0) >
+  AffineExpr dimExpr =
+      rewriter.getAffineDimExpr(0) - rewriter.getAffineConstantExpr(5);
+  // AffineExpr dimExpr2 = rewriter
+  // AffineMap map = AffineMap::get(1, 0, dimExpr);
+  // AffineMap map = AffineMap::get(1, 0 , rewriter.getAffineDimExpr(0) - 5);
+  IntegerSet set1 = IntegerSet::get(1, 0, {dimExpr}, {false});
+
+  // affine.if %arg1 >= 0 and %5 <= %1 - 1
+  //  n-k >= 0 && n-k <= len -1 //n = %arg0 , k = %arg1
+  //  %arg0 >= 0 and %arg0 - %arg1 - %sym1 + 1 <= 0
+
+  affine::AffineForOp forOp1 =
+      rewriter.create<affine::AffineForOp>(loc, lb, ub, step);
+
+  // inside the forOp body --> create the operations & then close the body
+  //  OpBuilder::InsertionGuard guard(rewriter);
+  rewriter.setInsertionPointToStart(forOp1.getBody());
+  auto iv = forOp1.getInductionVar();
+  // start adding operations like a arith::constant = 100.0 to the body of
+  // forOp1
+  //  Inside the loop body:
+
+  // #set affine_set<(d0) : (d0 - 5 <= 0)>
+  // affine.for %arg0 = 0 to 10 {
+  //   %3 = affine.if #set (%arg0) {
+  //         %1 = arith.const 25
+  //         affine.yield %1
+  //     }
+  // else{
+  //       %2 = arith.const 15
+  //       affine.yield %2
+  //   }
+  //     affine.store %3, alloc[%arg0]
+  // }
+
+  // auto ifOp = rewriter.create<affine::AffineIfOp>( loc, set1 , ValueRange{iv}
+  // , false /*no else*/ ); auto ifOp = rewriter.create<affine::AffineIfOp>(
+  // loc, set1 , ValueRange{iv} , true /*no else*/ );
+
+  // use typeRange too:
+  Type floatType = rewriter.getF64Type();
+  auto ifOp = rewriter.create<affine::AffineIfOp>(
+      loc, TypeRange{floatType}, set1, ValueRange{iv}, true /*no else*/);
+
+  rewriter.setInsertionPointToStart(ifOp.getThenBlock());
+
+  FIRFilterResponseAdaptor firFilterOperands(operands);
+
+  // load from the input
+  Value loadInput =
+      rewriter.create<AffineLoadOp>(loc, firFilterOperands.getLhs(), iv);
+  Value constant25 = rewriter.create<arith::ConstantOp>(
+      loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(25));
+  Value constsq25 = rewriter.create<arith::MulFOp>(loc, loadInput, constant25);
+
+  rewriter.create<AffineStoreOp>(loc, constsq25, alloc, iv);
+  rewriter.create<AffineYieldOp>(loc, ValueRange{constsq25});
+  // rewriter.setInsertionPointToEnd(ifOp.getThenBlock());
+
+  rewriter.setInsertionPointToStart(ifOp.getElseBlock());
+  Value loadInput2 =
+      rewriter.create<AffineLoadOp>(loc, firFilterOperands.getRhs(), iv);
+  Value constant15 = rewriter.create<arith::ConstantOp>(
+      loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(15));
+  Value elseResult =
+      rewriter.create<arith::MulFOp>(loc, loadInput2, constant15);
+  rewriter.create<AffineStoreOp>(loc, elseResult, alloc, iv);
+  rewriter.create<AffineYieldOp>(loc, ValueRange{elseResult});
+  // rewriter.setInsertionPointToEnd(ifOp.getElseBlock());
+  rewriter.setInsertionPointAfter(ifOp);
+  ifOp->dump();
+  // forOp1->dump();
+  rewriter.create<AffineStoreOp>(loc, ifOp.getResult(0), alloc, iv);
+  // getParentBlock then use
+  //  rewriter.setInsertionPointToEnd(ifOp.getThenBlock()->getParentOp());
+  //  rewriter.setInsertionPointToEnd(ifOp->getBlock());
+  //  rewriter.setInsertionPoint(ifOp->getParentOp());
+  //  rewriter.create<AffineYieldOp>(loc, ValueRange{constant25});
+  //  rewriter.setInsertionPointToEnd(ifOp.getThenBlock());
+
+  // rewriter.setInsertionPointAfter(ifOp);
+  // rewriter.create<AffineStoreOp>(loc, ifOp.getResult(0) , alloc, iv);
+
+  // try to add the affine.If condition
+  // create affine.If ,
+  //  use integer set to represent the condition
+  // check the AffineArgs
+  //  affine.if operation contains two regions for the “then” and “else” clauses
+  // each region of affine.if must contain a single block with no args and
+  // terminated by affine.yield op
+  //  if affine.if defines no values --> no need for affine.yield
+
+  // affineIf.setConditional(set1, forOp1.getInductionVar());
+  // start then "block"
+  // "then" block
+
+  // Value constant15 = rewriter.create<arith::ConstantOp>(loc,
+  // rewriter.getF64Type(),
+  //                                                      rewriter.getF64FloatAttr(15));
+
+  //  rewriter.create<affine::AffineYieldOp>(loc, ValueRange{constant15});
+  // rewriter.setInsertionPointToEnd(ifOp.getThenBlock());
+  // else block
+  // rewriter.setInsertionPointToStart(ifOp.getElseBlock());
+
+  // Set insertion point to the end of the "then" block
+  // rewriter.setInsertionPointAfter(ifOp.getThenBlock()->getTerminator());
+
+  // rewriter.create<affine::AffineYieldOp>(loc, constant25);
+  llvm::errs() << "LINE = " << __LINE__ << "\n";
+  // Back to parentOp -- ifOp stops here
+  //  rewriter.setInsertionPointAfter(ifOp);
+
+  // also use affine::AffineStore to store at the loop induction variable
+  //  auto storeOp = rewriter.create<affine::AffineStoreOp>(loc,
+  //  ifOp.getResult(0), alloc, forOp1.getInductionVar()); auto storeOp =
+  //  rewriter.create<affine::AffineStoreOp>(loc, constant25, alloc,
+  //  forOp1.getInductionVar()); Back to parentOp -- forOp1
+  //  rewriter.setInsertionPointAfter(storeOp);
+
+  llvm::errs() << "LINE = " << __LINE__ << "  xx\n";
+  // create affine yield for the loop
+  //  rewriter.create<affine::AffineYieldOp>(loc);
+
+#endif
+
+#if TryAffineIf2
+
+  int64_t lb = 0;
+  int64_t ub = tensorType.getShape()[0];
+  int64_t step = 1;
+
+  // create AffineMap and set
+  //  %1 = affine.load
+  //   if ( %arg0 >= 5)   ie, integerSet <(d0) : (d0 - 5 >= 0) >
+  AffineExpr dimExpr =
+      rewriter.getAffineDimExpr(0) - rewriter.getAffineConstantExpr(5);
+  // AffineExpr dimExpr2 = rewriter
+  // AffineMap map = AffineMap::get(1, 0, dimExpr);
+  // AffineMap map = AffineMap::get(1, 0 , rewriter.getAffineDimExpr(0) - 5);
+  IntegerSet set1 = IntegerSet::get(1, 0, {dimExpr}, {false});
+
+  // affine.if %arg1 >= 0 and %5 <= %1 - 1
+  //  n-k >= 0 && n-k <= len -1 //n = %arg0 , k = %arg1
+  //  %arg0 >= 0 and %arg0 - %arg1 - %sym1 + 1 <= 0
+
+  affine::AffineForOp forOp1 =
+      rewriter.create<affine::AffineForOp>(loc, lb, ub, step);
+
+  // inside the forOp body --> create the operations & then close the body
+  //  OpBuilder::InsertionGuard guard(rewriter);
+  rewriter.setInsertionPointToStart(forOp1.getBody());
+  auto iv = forOp1.getInductionVar();
+  // start adding operations like a arith::constant = 100.0 to the body of
+  // forOp1
+  //  Inside the loop body:
+
+  // #set affine_set<(d0) : (d0 - 5 <= 0)>
+  // affine.for %arg0 = 0 to 10 {
+  //   %3 = affine.if #set (%arg0) {
+  //         %1 = arith.const 25
+  //         affine.yield %1
+  //     }
+  //     affine.store %3, alloc[%arg0]
+  // }
+
+  // auto ifOp = rewriter.create<affine::AffineIfOp>( loc, set1 , ValueRange{iv}
+  // , false /*no else*/ );
+  auto ifOp = rewriter.create<affine::AffineIfOp>(loc, set1, ValueRange{iv},
+                                                  true /*no else*/);
+  rewriter.setInsertionPointToStart(ifOp.getThenBlock());
+  // rewriter.setInsertionPointToEnd(ifOp.getThenBlock());
+  Value constant25 = rewriter.create<arith::ConstantOp>(
+      loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(25));
+  Value constsq25 = rewriter.create<arith::MulFOp>(loc, constant25, constant25);
+
+  // ifOp.setR
+  // rewriter.create<AffineStoreOp>(loc, constant25 , alloc, iv);
+  // rewriter.setInsertionPointToStart(ifOp.getElseBlock());
+  Value constant15 = rewriter.create<arith::ConstantOp>(
+      loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(15));
+  rewriter.create<AffineStoreOp>(loc, constsq25, alloc, iv);
+
+  // getParentBlock then use
+  //  rewriter.setInsertionPointToEnd(ifOp.getThenBlock()->getParentOp());
+  //  rewriter.setInsertionPointToEnd(ifOp->getBlock());
+  rewriter.setInsertionPoint(ifOp->getParentOp());
+  // rewriter.create<AffineYieldOp>(loc, ValueRange{constant25});
+  // rewriter.setInsertionPointToEnd(ifOp.getThenBlock());
+
+  // rewriter.setInsertionPointAfter(ifOp);
+  // rewriter.create<AffineStoreOp>(loc, ifOp.getResult(0) , alloc, iv);
+  // rewriter.cre
+
+#endif
+
+#if TryAffineMap
+  int64_t lb = 0;
+  int64_t ub = tensorType.getShape()[0] - 2;
+  int64_t step = 1;
+
+  affine::AffineForOp forOp1 =
+      rewriter.create<affine::AffineForOp>(loc, lb, ub, step);
+
+  // inside the forOp body --> create the operations & then close the body
+  //  OpBuilder::InsertionGuard guard(rewriter);
+  rewriter.setInsertionPointToStart(forOp1.getBody());
+  auto iv = forOp1.getInductionVar();
+  // start adding operations like a arith::constant = 100.0 to the body of
+  // forOp1
+  //  Inside the loop body:
+  // create affine for
+  // use affine-map expression for dimension then symbol then combination
+  // affine-map expression for dimension: affine_map<d0, d1)[s0] -> (d0 , d1 +
+  // s0, d1 - s0) use affine map Define an affine map: #map2 = affine_map<(d0)
+  // -> (d0 + 2)>
+  auto symbol1 = tensorType.getShape()[0];
+  AffineExpr indx = rewriter.getAffineDimExpr(0);
+  AffineExpr constantExpr = rewriter.getAffineConstantExpr(2);
+  AffineMap addMap = AffineMap::get(1, 0, symbol1 - indx);
+  auto outputIndex = rewriter.create<affine::AffineApplyOp>(loc, addMap, iv);
+
+  // Value constant15 = rewriter.create<arith::ConstantOp>(loc,
+  // rewriter.getF64Type(), rewriter.getF64FloatAttr(15));
+
+  // try replace constant15 ie, with input & filter
+  FIRFilterResponseOpAdaptor firOpAdaptor(operands);
+
+  Value inputForFilter =
+      rewriter.create<affine::AffineLoadOp>(loc, firOpAdaptor.getLhs(), iv);
+  // Value inputForFilterMapped = rewriter.create<affine::AffineLoadOp>(loc,
+  // firOpAdaptor.getLhs() , addMap, iv);
+
+  Value impulseFilter =
+      rewriter.create<AffineLoadOp>(loc, firOpAdaptor.getRhs(), iv);
+
+  auto storeOp = rewriter.create<affine::AffineStoreOp>(
+      loc, inputForFilter, alloc, ValueRange{outputIndex});
+
+  llvm::errs() << "LINE = " << __LINE__ << "\n";
+
+#endif
+
+#if TrySumOfVector
+  // here, we have to use iter
+  int64_t lb = 0;
+  int64_t ub = tensorType.getShape()[0];
+  int64_t step = 1;
+
+  Value constant0 = rewriter.create<arith::ConstantOp>(
+      loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+
+  affine::AffineForOp forOp1 = rewriter.create<affine::AffineForOp>(
+      loc, lb, ub, step, ValueRange{constant0});
+
+  rewriter.setInsertionPointToStart(forOp1.getBody());
+  auto iv = forOp1.getInductionVar();
+
+  // inside the forOp body --> create the operations & then close the body
+  //  OpBuilder::InsertionGuard guard(rewriter);
+  //  Initial sum set to 0.
+  //  %sum_0 = arith.constant 0.0 : f32
+  //  // iter_args binds initial values to the loop's region arguments.
+  //  %sum = affine.for %i = 0 to 10 step 1
+  //      iter_args(%sum_iter = %sum_0) -> (f32) {
+  //    %t = affine.load %buffer[%i] : memref<10xf32>
+  //    %sum_next = arith.addf %sum_iter, %t : f32
+  //    // Yield current iteration sum to next iteration %sum_iter or to %sum
+  //    // if final iteration.
+  //    affine.yield %sum_next : f32
+  //  }
+  //  return %sum : f32
+  //  }
+
+  // Inside the loop body:
+
+  // try replace constant15 ie, with input & filter
+  FIRFilterResponseOpAdaptor firOpAdaptor(operands);
+
+  Value inputForFilter =
+      rewriter.create<affine::AffineLoadOp>(loc, firOpAdaptor.getLhs(), iv);
+
+  // Get iter_arg
+  auto getIterArg =
+      forOp1.getBody()->getArgument(1); // forOp1.getIterOperands();
+  Value sumNext =
+      rewriter.create<arith::AddFOp>(loc, inputForFilter, getIterArg);
+  // Value sumNext = rewriter.create<arith::AddFOp>(loc, inputForFilter,
+  // constant0);
+
+  // here, at indx 0 , o/p = in[0]
+  //  at indx 1 , o/p = in[0] + in[1] & so on
+  // at indx last o/p[9] = sum of all input elements
+  auto storeOp = rewriter.create<affine::AffineStoreOp>(loc, sumNext, alloc,
+                                                        ValueRange{iv});
+  rewriter.create<AffineYieldOp>(loc, ValueRange{sumNext});
+  // rewriter.create<AffineYieldOp>(loc);
+  // auto result = forOp1.getResult(0);
+  llvm::errs() << "LINE = " << __LINE__ << "\n";
+
+#endif
+
+#if TryMultiDimLoop
+  // here, we have to use iter
+  int64_t lb = 0;
+  int64_t ub = tensorType.getShape()[0];
+  int64_t step = 1;
+
+  Value constant0 = rewriter.create<arith::ConstantOp>(
+      loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+
+  affine::AffineForOp forOp1 =
+      rewriter.create<affine::AffineForOp>(loc, lb, ub, step);
+
+  rewriter.setInsertionPointToStart(forOp1.getBody());
+  auto iv = forOp1.getInductionVar();
+
+  // create loadOp
+  FIRFilterResponseOpAdaptor firOpAdaptor(operands);
+
+  Value loadInput =
+      rewriter.create<affine::AffineLoadOp>(loc, firOpAdaptor.getLhs(), iv);
+
+  // create another loop --
+  affine::AffineForOp forOp2 = rewriter.create<affine::AffineForOp>(
+      loc, lb, ub, step, ValueRange{loadInput});
+
+  rewriter.setInsertionPointToStart(forOp2.getBody());
+  auto iv2 = forOp2.getInductionVar();
+  Value loadFilter =
+      rewriter.create<AffineLoadOp>(loc, firOpAdaptor.getRhs(), iv2);
+
+  // get iterArg
+  auto getIterArg = forOp2.getBody()->getArgument(1);
+  auto sumNext = rewriter.create<arith::AddFOp>(loc, loadInput, loadFilter);
+
+  // store the result to output
+  //  rewriter.create<AffineStoreOp>(loc, sumNext, alloc, iv );
+  rewriter.create<AffineYieldOp>(loc, ValueRange{sumNext});
+  rewriter.setInsertionPointAfter(forOp2);
+  rewriter.create<AffineStoreOp>(loc, forOp2.getResult(0), alloc, iv);
+  //
+  // yield the
+  // inside the forOp body --> create the operations & then close the body
+  // OpBuilder::InsertionGuard guard(rewriter);
+  // Initial sum set to 0.
+  // affine.for %arg0 = 0 to 10 {
+  //   %1 = affine.load input[%arg0]
+  //   %4 = affine.for %arg1 = 0 to 10 step 1
+  //     iter_args(%sum_iter = %1) {
+  //       %2 = affine.load filter[%arg1]
+  //       %3 = arith.add sum_iter , %2
+  //         affine.yield %3 : f64
+  //   }
+  //   affine.store %4, output[%arg0]
+  // }
+
+  // Inside the loop body:
+
+  llvm::errs() << "LINE = " << __LINE__ << "\n";
+
+#endif
+
+#if TryMultiDimForAndIf
+  int64_t lb = 0;
+  int64_t ub = tensorType.getShape()[0];
+  int64_t step = 1;
+
+  // create AffineMap and set
+  //  %1 = affine.load
+  //   if ( %arg0 >= 5)   ie, integerSet <(d0) : (d0 - 5 >= 0) >
+
+  // affine.if %arg1 >= 0 and %5 <= %1 - 1
+  //  n-k >= 0 && n-k <= len -1 //n = %arg0 , k = %arg1
+  //  %arg0 >= 0 and %arg0 - %arg1 - %sym1 + 1 <= 0
+
+  affine::AffineForOp forOp1 =
+      rewriter.create<affine::AffineForOp>(loc, lb, ub, step);
+
+  // inside the forOp body --> create the operations & then close the body
+  //  OpBuilder::InsertionGuard guard(rewriter);
+  rewriter.setInsertionPointToStart(forOp1.getBody());
+  auto iv = forOp1.getInductionVar();
+  // start adding operations like a arith::constant = 100.0 to the body of
+  // forOp1
+  //  Inside the loop body:
+
+  AffineExpr dimExpr =
+      rewriter.getAffineDimExpr(0) - rewriter.getAffineConstantExpr(5);
+  IntegerSet set1 = IntegerSet::get(1, 0, {dimExpr}, {false});
+
+  // create 2nd loop
+  // use loop inductn variable for 2nd loop
+  // use if condition on 2nd loop inductn variable
+  // get the result of inner for loop and store at output
+
+  affine::AffineForOp forOp2 =
+      rewriter.create<affine::AffineForOp>(loc, lb, ub, step);
+  rewriter.setInsertionPointToStart(forOp2.getBody());
+  auto iv2 = forOp2.getInductionVar();
+  AffineExpr dimExpr2 =
+      rewriter.getAffineDimExpr(1) - rewriter.getAffineConstantExpr(6);
+  IntegerSet set2 = IntegerSet::get(1, 0, {dimExpr, dimExpr2}, {false});
+
+  auto ifOp = rewriter.create<affine::AffineIfOp>(loc, set2, ValueRange{iv},
+                                                  false /*no else*/);
+  rewriter.setInsertionPointToStart(ifOp.getThenBlock());
+  Value constant25 = rewriter.create<arith::ConstantOp>(
+      loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(25));
+  Value resultFromInnerLoop =
+      rewriter.create<arith::MulFOp>(loc, constant25, constant25);
+
+  // rewriter.setInsertionPointAfter(forOp2);
+  // rewriter.setInsertionPointToEnd(forOp2->getBlock());
+  // rewriter.create<AffineStoreOp>(loc, constant25 , alloc, iv2);
+  // rewriter.create<AffineYieldOp>(loc, ValueRange{resultFromInnerLoop});
+  // rewriter.setInsertionPointAfter(ifOp);
+  // rewriter.create<AffineYieldOp>(loc, ValueRange{resultFromInnerLoop});
+  // rewriter.setInsertionPointAfter(forOp2);
+  rewriter.create<AffineStoreOp>(loc, constant25, alloc, iv);
+  // #set2 = affine_set<(d0, d1)[]: (d0 - 5 >= 0, d1- 5 >= 0 ) >
+  // affine.for %arg0 = 0 to 10 {
+  //     %N = len(output)
+  //   %4 =  affine.for %arg1 = 0 to 10 {
+  //         affine.if #set2(%arg0 , %arg1 )[%N] {
+  //             %1 = const 5
+  //             %2 = const 3
+  //             %3 = arith.mulf %1 , %2
+  //             affine.yield %3
+  //         }
+  //     }
+  //   affine.store %4, alloc[%arg0]
+  // }
+
+  // rewriter.create<AffineYieldOp>(loc, ValueRange{constant25});
+  // rewriter.setInsertionPointAfter(ifOp);
+  // rewriter.create<AffineStoreOp>(loc, ifOp.getResult(0) , alloc, iv);
+
+  // try to add the affine.If condition
+  // create affine.If ,
+  //  use integer set to represent the condition
+  // check the AffineArgs
+  //  affine.if operation contains two regions for the “then” and “else” clauses
+  // each region of affine.if must contain a single block with no args and
+  // terminated by affine.yield op
+  //  if affine.if defines no values --> no need for affine.yield
+
+  // affineIf.setConditional(set1, forOp1.getInductionVar());
+  // start then "block"
+  // "then" block
+
+  // rewriter.create<affine::AffineYieldOp>(loc, constant25);
+  llvm::errs() << "LINE = " << __LINE__ << "\n";
+  // Back to parentOp -- ifOp stops here
+  //  rewriter.setInsertionPointAfter(ifOp);
+
+  llvm::errs() << "LINE = " << __LINE__ << "  xx\n";
+
+#endif
+
+#if TryMultiDimLoopAndAffineMap
+  // here, we have to use iter
+  int64_t lb = 0;
+  int64_t ub = tensorType.getShape()[0];
+  int64_t step = 1;
+
+  Value constant0 = rewriter.create<arith::ConstantOp>(
+      loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+
+  affine::AffineForOp forOp1 =
+      rewriter.create<affine::AffineForOp>(loc, lb, ub, step);
+
+  rewriter.setInsertionPointToStart(forOp1.getBody());
+  auto iv = forOp1.getInductionVar();
+
+  // create loadOp
+  FIRFilterResponseOpAdaptor firOpAdaptor(operands);
+
+  Value loadInput =
+      rewriter.create<affine::AffineLoadOp>(loc, firOpAdaptor.getLhs(), iv);
+
+  // create another loop --
+  affine::AffineForOp forOp2 = rewriter.create<affine::AffineForOp>(
+      loc, lb, ub, step, ValueRange{loadInput});
+
+  rewriter.setInsertionPointToStart(forOp2.getBody());
+  auto iv2 = forOp2.getInductionVar();
+
+  // Use AffineMap for affine.load alloc_9[%arg0 - %arg1]
+  AffineExpr OuterIndx = rewriter.getAffineDimExpr(0);
+  AffineExpr InnerIndx = rewriter.getAffineDimExpr(1);
+  AffineMap addMap = AffineMap::get(2, 0, OuterIndx - InnerIndx);
+  // auto outputIndex = rewriter.create<affine::AffineApplyOp>(loc, addMap ,
+  // ValueRange{iv,iv2});
+
+  // Value constant15 = rewriter.create<arith::ConstantOp>(loc,
+  // rewriter.getF64Type(), rewriter.getF64FloatAttr(15));
+
+  // Value loadFilter = rewriter.create<AffineLoadOp>(loc, firOpAdaptor.getRhs()
+  // , addMap, ValueRange{iv2,iv});
+  Value loadFilter = rewriter.create<AffineLoadOp>(loc, firOpAdaptor.getRhs(),
+                                                   addMap, ValueRange{iv, iv2});
+  // Value loadFilter = rewriter.create<AffineLoadOp>(loc, firOpAdaptor.getRhs()
+  // , outputIndex); get iterArg
+  auto getIterArg = forOp2.getBody()->getArgument(1);
+  auto sumNext = rewriter.create<arith::AddFOp>(loc, getIterArg, loadFilter);
+
+  // store the result to output
+  //  rewriter.create<AffineStoreOp>(loc, sumNext, alloc, iv );
+  rewriter.create<AffineYieldOp>(loc, ValueRange{sumNext});
+  rewriter.setInsertionPointAfter(forOp2);
+  rewriter.create<AffineStoreOp>(loc, forOp2.getResult(0), alloc, iv);
+  //
+  // yield the
+  // inside the forOp body --> create the operations & then close the body
+  // OpBuilder::InsertionGuard guard(rewriter);
+  // Initial sum set to 0.
+  // affine.for %arg0 = 0 to 10 {
+  //   %1 = affine.load input[%arg0]
+  //   %4 = affine.for %arg1 = 0 to 10 step 1
+  //     iter_args(%sum_iter = %1) {
+  //       %2 = affine.load filter[%arg1]
+  //       %3 = arith.add sum_iter , %2
+  //         affine.yield %3 : f64
+  //   }
+  //   affine.store %4, output[%arg0]
+  // }
+
+  // Inside the loop body:
+
+  llvm::errs() << "LINE = " << __LINE__ << "\n";
+
+#endif
+
+#if TryMultiDimLoopAndAffineSet
+  // here, we have to use iter
+  int64_t lb = 0;
+  int64_t ub = tensorType.getShape()[0];
+  int64_t step = 1;
+
+  Value constant0 = rewriter.create<arith::ConstantOp>(
+      loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+
+  affine::AffineForOp forOp1 =
+      rewriter.create<affine::AffineForOp>(loc, lb, ub, step);
+
+  rewriter.setInsertionPointToStart(forOp1.getBody());
+  auto iv = forOp1.getInductionVar();
+
+  // create loadOp
+  FIRFilterResponseOpAdaptor firOpAdaptor(operands);
+
+  Value loadInput =
+      rewriter.create<affine::AffineLoadOp>(loc, firOpAdaptor.getLhs(), iv);
+
+  // create another loop --
+  affine::AffineForOp forOp2 = rewriter.create<affine::AffineForOp>(
+      loc, lb, ub, step, ValueRange{loadInput});
+
+  rewriter.setInsertionPointToStart(forOp2.getBody());
+  auto iv2 = forOp2.getInductionVar();
+
+  // Use AffineMap for affine.load alloc_9[%arg0 - %arg1]
+  AffineExpr OuterIndx = rewriter.getAffineDimExpr(0);
+  AffineExpr InnerIndx = rewriter.getAffineDimExpr(1);
+  AffineMap addMap = AffineMap::get(2, 0, OuterIndx - InnerIndx);
+  auto outputIndex =
+      rewriter.create<affine::AffineApplyOp>(loc, addMap, ValueRange{iv, iv2});
+
+  // Value constant15 = rewriter.create<arith::ConstantOp>(loc,
+  // rewriter.getF64Type(), rewriter.getF64FloatAttr(15));
+  AffineExpr dimExpr = OuterIndx - InnerIndx;
+  IntegerSet set1 = IntegerSet::get(2, 0, {dimExpr}, {false});
+
+  auto ifOp = rewriter.create<affine::AffineIfOp>(
+      loc, set1, ValueRange{iv, iv2}, false /*no else*/);
+  rewriter.setInsertionPointToStart(ifOp.getThenBlock());
+  // Value loadFilter = rewriter.create<AffineLoadOp>(loc, firOpAdaptor.getRhs()
+  // , addMap, ValueRange{iv2,iv});
+  Value loadFilter = rewriter.create<AffineLoadOp>(loc, firOpAdaptor.getRhs(),
+                                                   addMap, ValueRange{iv, iv2});
+  // get iterArg
+  auto getIterArg = forOp2.getBody()->getArgument(1);
+  auto sumNext = rewriter.create<arith::AddFOp>(loc, loadFilter, loadFilter);
+  // rewriter.create<AffineStoreOp>(loc, sumNext, alloc, iv );
+  rewriter.create<AffineYieldOp>(loc, ValueRange{sumNext});
+
+  // store the result to output
+  //  rewriter.create<AffineStoreOp>(loc, sumNext, alloc, iv );
+  rewriter.setInsertionPointAfter(ifOp);
+  rewriter.create<AffineYieldOp>(loc, ValueRange{sumNext});
+  rewriter.setInsertionPointAfter(forOp2);
+  rewriter.create<AffineStoreOp>(loc, forOp2.getResult(0), alloc, iv);
+  //
+  // yield the
+  // inside the forOp body --> create the operations & then close the body
+  // OpBuilder::InsertionGuard guard(rewriter);
+  // Initial sum set to 0.
+  // affine.for %arg0 = 0 to 10 {
+  //   %1 = affine.load input[%arg0]
+  //   %4 = affine.for %arg1 = 0 to 10 step 1
+  //     iter_args(%sum_iter = %1) {
+  //       %2 = affine.load filter[%arg1]
+  //       %3 = arith.add sum_iter , %2
+  //         affine.yield %3 : f64
+  //   }
+  //   affine.store %4, output[%arg0]
+  // }
+
+  // Inside the loop body:
+
+  llvm::errs() << "LINE = " << __LINE__ << "\n";
+
+#endif
+
+#if TryFIRFilter
+
+  int64_t lb = 0;
+  int64_t ub = tensorType.getShape()[0];
+  int64_t step = 1;
+
+  affine::AffineForOp forOp1 =
+      rewriter.create<affine::AffineForOp>(loc, lb, ub, step);
+  rewriter.setInsertionPointToStart(forOp1.getBody());
+  auto iv = forOp1.getInductionVar();
+
+  // Value sum0 = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
+  //                                             rewriter.getF64FloatAttr(0));
+  // get filter len
+  // auto tensorTypeFilter = llvm::cast<RankedTensorType>((*op->getOperand(1)));
+  // //operand_type_end auto tensorTypeFilter =
+  // llvm::cast<RankedTensorType>((*op->operand_type_begin()));
+  auto operandIt = op->operand_type_begin();
+  auto tensorTypeInput = llvm::cast<RankedTensorType>(*operandIt);
+  int64_t ubForInput = tensorTypeInput.getShape()[0];
+  // get second operand
+  operandIt = operandIt + 1;
+
+  // auto tensorTypeFilter =
+  // llvm::cast<RankedTensorType>((*op->operand_type_begin())); //operandIt
+  auto tensorTypeFilter = llvm::cast<RankedTensorType>(*operandIt);
+  int64_t ubForFilter = tensorTypeFilter.getShape()[0];
+
+  // llvm::errs() << "ubForFilter= " << ubForFilter << "\n";
+  // create a constant for sum
+  Value constant0 = rewriter.create<arith::ConstantOp>(
+      loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+  affine::AffineForOp forOp2 = rewriter.create<affine::AffineForOp>(
+      loc, lb, ubForFilter, step, ValueRange{constant0});
+  rewriter.setInsertionPointToStart(forOp2.getBody());
+  auto iv2 = forOp2.getInductionVar();
+
+  auto getIterArg =
+      forOp2.getBody()->getArgument(1); // forOp1.getIterOperands();
+
+  // AffineExpr dimExpr = rewriter.getAffineDimExpr(0);
+  AffineExpr dimExpr2 =
+      rewriter.getAffineDimExpr(0) - rewriter.getAffineDimExpr(1);
+  // n-k <= inputLen -1 or, k-n >= 1 - inputLen ie, k - n + inputLen - 1 >= 0
+  AffineExpr ExprForUpperBoundCheck =
+      rewriter.getAffineConstantExpr(ubForInput) +
+      rewriter.getAffineDimExpr(1) - rewriter.getAffineDimExpr(0) -
+      rewriter.getAffineConstantExpr(1);
+  IntegerSet set2 =
+      IntegerSet::get(2, 0, {dimExpr2, ExprForUpperBoundCheck}, {false, false});
+
+  // use typeRange too:
+  Type floatType = rewriter.getF64Type();
+  //  if n-k >= 0
+  auto ifOp = rewriter.create<affine::AffineIfOp>(
+      loc, TypeRange{floatType}, set2, ValueRange{iv, iv2}, true /*else*/);
+  rewriter.setInsertionPointToStart(ifOp.getThenBlock());
+
+  AffineMap addMap = AffineMap::get(2, 0, dimExpr2);
+  // auto inputIndex = rewriter.create<affine::AffineApplyOp>(loc, addMap ,
+  // ValueRange{iv,iv2});
+
+  FIRFilterResponseOpAdaptor firOpAdaptor(operands);
+  Value loadInput = rewriter.create<AffineLoadOp>(loc, firOpAdaptor.getLhs(),
+                                                  addMap, ValueRange{iv, iv2});
+
+  rewriter.create<AffineYieldOp>(loc, ValueRange{loadInput});
+  // else block
+  rewriter.setInsertionPointToStart(ifOp.getElseBlock());
+  Value const0ForElse = rewriter.create<arith::ConstantOp>(
+      loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+  rewriter.create<AffineYieldOp>(loc, ValueRange{const0ForElse});
+  rewriter.setInsertionPointAfter(ifOp);
+
+  // load filter and then mult and then sum
+  Value loadFilter =
+      rewriter.create<affine::AffineLoadOp>(loc, firOpAdaptor.getRhs(), iv2);
+  // Value constant25 = rewriter.create<arith::ConstantOp>(loc,
+  // rewriter.getF64Type(),
+  //                                                      rewriter.getF64FloatAttr(25));
+  Value filterMulInput =
+      rewriter.create<arith::MulFOp>(loc, ifOp.getResult(0), loadFilter);
+  Value sumNext =
+      rewriter.create<arith::AddFOp>(loc, filterMulInput, getIterArg);
+  rewriter.create<AffineYieldOp>(loc, ValueRange{sumNext});
+  // rewriter.setInsertionPointToEnd(forOp2->getBlock());
+  rewriter.setInsertionPointAfter(forOp2);
+  rewriter.create<AffineStoreOp>(loc, forOp2.getResult(0), alloc, iv);
+  rewriter.setInsertionPointAfter(forOp1);
+
+  // ifOp->dump();
+
+  // FIRFilterResponse code -- x[n] , h[n]
+
+  // iterate for output
+  // start with sum=0
+  // iterate for filter len
+  // check for input_indx must be within bounds
+  // load filter and input[indx]
+  // multiply them
+  // add this to sum
+  // update output with sum
+
+  // inside the forOp body --> create the operations & then close the body
+  //  OpBuilder::InsertionGuard guard(rewriter);
+
+  // start adding operations like a arith::constant = 100.0 to the body of
+  // forOp1
+  //  Inside the loop body:
+
+  // #set2 = affine_set<(d0, d1)[]: (d0 - 5 >= 0, d1- 5 >= 0 ) >
+  // affine.for %arg0 = 0 to 10 {
+  //     %N = len(output)
+  //   %4 =  affine.for %arg1 = 0 to 10 {
+  //         affine.if #set2(%arg0 , %arg1 )[%N] {
+  //             %1 = const 5
+  //             %2 = const 3
+  //             %3 = arith.mulf %1 , %2
+  //             affine.yield %3
+  //         }
+  //     }
+  //   affine.store %4, alloc[%arg0]
+  // }
+
+  // rewriter.create<AffineYieldOp>(loc, ValueRange{constant25});
+  // rewriter.setInsertionPointAfter(ifOp);
+  // rewriter.create<AffineStoreOp>(loc, ifOp.getResult(0) , alloc, iv);
+
+  // try to add the affine.If condition
+  // create affine.If ,
+  //  use integer set to represent the condition
+  // check the AffineArgs
+  //  affine.if operation contains two regions for the “then” and “else” clauses
+  // each region of affine.if must contain a single block with no args and
+  // terminated by affine.yield op
+  //  if affine.if defines no values --> no need for affine.yield
+
+  // affineIf.setConditional(set1, forOp1.getInductionVar());
+  // start then "block"
+  // "then" block
+
+  // rewriter.create<affine::AffineYieldOp>(loc, constant25);
+  // llvm::errs() << "LINE = " << __LINE__ << "\n";
+  // Back to parentOp -- ifOp stops here
+  // rewriter.setInsertionPointAfter(ifOp);
+
+  // llvm::errs() << "LINE = " << __LINE__ << "  xx\n";
+
+#endif
+  // Terminate the loop body with affine.yield.
+  // rewriter.create<affine::AffineYieldOp>(loc);
+
+  // Replace this operation with the generated alloc.
+  rewriter.replaceOp(op, alloc);
+}
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: FFT1DImg operations
+//===----------------------------------------------------------------------===//
+
+struct FFT1DImgConjSymmOpLowering : public ConversionPattern {
+  FFT1DImgConjSymmOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::FFT1DImgConjSymmOp::getOperationName(), 1, ctx) {
+  }
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // Pseudo-code:
+    //   y[k] = y_real[k] + j *y_img[k]
+    //  y_img = sumOver_n(x[n]*sin[2*pi * k *n/N ] * -1
+
+    // For k=0:
+    // y[0] = 0
+
+    // for k=1 to (N+1)/2
+    // sum = 0
+    // for n=0 to N
+    // sum = sum + x[n] * sin(2*pi*k*n/N)
+    // y[k] = -1 * sum
+    // y[N-k] = sum
+    // init  output mem for y_real & y_img as 0
+    // iterate for output from k=0 to last
+    // iterate for all x from n=0 to last
+    // perform the calculations : ie x[n] * cos[2*pi * k *n/N ] and sum and
+    // store them at y[k]
+    //
+    // replace this upsampling op with the output_mem_allocation op
+
+    // DEBUG_PRINT_NO_ARGS();
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    // auto memRefType2 = convertTensorToMemRef(tensorType1);
+    auto alloc_img = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
+
+    // affine.for %y = 0 to 4 {
+    //     affine.store %cst_3, %alloc_real[%y] : memref<4xf64>
+    //     affine.store %cst_3, %alloc_img[%y] : memref<4xf64>
+    // }
+    Value constant0 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+
+    // For loop -- iterate from 1 to last
+    int64_t lb = 0;
+    int64_t ub = tensorType.getShape()[0];
+    int64_t ubBy2 = (ub + 1) / 2;
+    int64_t step = 1;
+
+    // affine::AffineForOp forOp1 = rewriter.create<AffineForOp>(loc, lb, ub,
+    // step); auto iv = forOp1.getInductionVar();
+    // rewriter.setInsertionPointToStart(forOp1.getBody());
+    // rewriter.create<AffineStoreOp>(loc, constant0, alloc_img,
+    // ValueRange{iv}); rewriter.setInsertionPointAfter(forOp1);
+    // DEBUG_PRINT_NO_ARGS();
+    // for k=0
+    Value Indx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    rewriter.create<AffineStoreOp>(loc, constant0, alloc_img,
+                                   ValueRange{Indx0});
+
+    // loop for Y
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb + 1, ubBy2, step);
+    auto ivY = forOpY.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpY.getBody());
+
+    // loop for X
+    affine::AffineForOp forOpX =
+        rewriter.create<AffineForOp>(loc, lb, ub, step, ValueRange{constant0});
+    auto ivX = forOpX.getInductionVar();
+    auto getIterArg = forOpX.getBody()->getArgument(1);
+    rewriter.setInsertionPointToStart(forOpX.getBody());
+
+    // load from X, & y1 & y2
+    FFT1DImgConjSymmOpAdaptor fft1DImgConjSymmAdaptor(operands);
+    Value inputX = rewriter.create<AffineLoadOp>(
+        loc, fft1DImgConjSymmAdaptor.getInput(), ValueRange{ivX});
+    // Value loadYImg = rewriter.create<AffineLoadOp>(loc, alloc_img,
+    // ValueRange{ivY});
+
+    // convert index to f64
+    Value IndxY = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivY);
+    Value k =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxY);
+
+    Value IndxX = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivX);
+    Value i =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxX);
+
+    // get 2*pi * k * i / N
+    Value muli_k = rewriter.create<arith::MulFOp>(loc, k, i);
+
+    Value const2pi = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(6.28318530718));
+    Value mul2piKI = rewriter.create<arith::MulFOp>(loc, const2pi, muli_k);
+
+    // getOperand().getType()
+    // auto inputTensorType =
+    // llvm::cast<RankedTensorType>(op->getOperand(0).getType());
+    float LengthOfInput = (float)ub;
+    Value N = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(LengthOfInput));
+    // Value N = inputTensorType.getShape()[0];
+
+    Value divIndxByN = rewriter.create<arith::DivFOp>(loc, mul2piKI, N);
+
+    // Img part = -1 * Sum(x[i] * sin(div) )
+    Value GetSin = rewriter.create<math::SinOp>(loc, divIndxByN);
+    Value xMulSin = rewriter.create<arith::MulFOp>(loc, inputX, GetSin);
+    Value imgSum = rewriter.create<arith::SubFOp>(loc, getIterArg, xMulSin);
+
+    rewriter.create<AffineYieldOp>(loc, ValueRange{imgSum});
+    rewriter.setInsertionPointAfter(forOpX);
+
+    // store imgSum at y[k]
+    rewriter.create<AffineStoreOp>(loc, forOpX.getResult(0), alloc_img,
+                                   ValueRange{ivY});
+
+    // store -1 * imgSum at y[N-k]
+    AffineExpr ExprNminusK =
+        rewriter.getAffineConstantExpr(ub) - rewriter.getAffineDimExpr(0);
+    AffineMap mapNminusK = AffineMap::get(1, 0, ExprNminusK);
+    Value constMinus1 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(-1));
+    Value NegImgSum =
+        rewriter.create<arith::MulFOp>(loc, constMinus1, forOpX.getResult(0));
+
+    rewriter.create<AffineStoreOp>(loc, NegImgSum, alloc_img, mapNminusK,
+                                   ValueRange{ivY});
+
+    rewriter.setInsertionPointAfter(forOpY);
+    // debug
+    //  forOpX->dump();
+    //  forOpY->dump();
+    //  affine.for %y = 0 to 4 {
+    //      affine.store %cst_3, %alloc_real[%y] : memref<4xf64>
+    //      affine.store %cst_3, %alloc_img[%y] : memref<4xf64>
+    //  }
+
+    // affine.for %y = 0 to 4 {
+    // //   %0 = affine.load %alloc_3[%arg0] : memref<4xf64>
+    // //   affine.store %0, %alloc_real[%arg0] : memref<4xf64>
+    // affine.for %x = 0 to 4 {
+    //     // CAcluations
+    //           %1 = affine.load %alloc_3[%x] : memref<4xf64>
+    //           %2 = affine.load %alloc_real[%y] : memref<4xf64>
+    //           %3 = affine.load %alloc_img[%y] : memref<4xf64>
+    //           // index cast for multiply
+    //           %4 = arith.index_castui %y : index to i32
+    //           %k = arith.uitofp %4 : i32 to f64
+    //           %6 = arith.index_castui %x : index to i32
+    //           %i = arith.uitofp %6 : i32 to f64
+    //         //   %8 = arith.index_castui %arg3 : index to i32
+    //         //   %9 = arith.uitofp %8 : i32 to f64
+    //         //   %10 = arith.index_castui %arg4 : index to i32
+    //         //   %11 = arith.uitofp %10 : i32 to f64
+
+    //           %mul_1 = arith.mulf %i, %k : f64
+    //           %mul = arith.mulf %mul_1, %cst_2pi : f64
+    //         //  ixk / N
+    //           %div = arith.divf %mul, %N : f64
+    //         //   cos of the above
+    //           %res_cos = math.cos %div : f64
+    //         //   %16 = arith.addf %14, %15 : f64
+    //         //   %res_sin = arith.mulf %16, %cst_0 : f64
+
+    //           %res_sin = math.sin %div : f64
+    //           %real_prod = arith.mulf %1, %res_cos : f64
+    //           %img_prod_1 = arith.mulf %1, %res_sin : f64
+    //           %img_prod = arith.mulf %cst_5, %img_prod_1 : f64
+
+    //           %real = arith.addf %2, %real_prod : f64
+    //           %img = arith.addf %3, %img_prod : f64
+    //           affine.store %real, %alloc_real[%y] : memref<4xf64>
+    //         //    dsp.print %alloc_real : memref<4xf64>
+    //           affine.store %img, %alloc_img[%y] : memref<4xf64>
+
+    // }
+    // }
+    // rewriter.replaceOp(op, alloc_real);
+    rewriter.replaceOp(op, alloc_img);
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: FFT1DRealSymmOp operations
+//===----------------------------------------------------------------------===//
+
+struct FFT1DRealSymmOpLowering : public ConversionPattern {
+  FFT1DRealSymmOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::FFT1DRealSymmOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // Pseudo-code:
+    //  y[k] = sumOver_n(x[n]*cos[2*pi * k *n/N ] , 0<=k < (N+1)/2
+    //         & y[N-k] = y[k]  (N+1)/2<= k< N
+    //  For k=0:
+    // sum=0
+    //  for n= 0 to N
+    // sum = sum + x[n]
+    // y[0] = sum
+
+    // for k=1 to (N+1)/2
+    // sum = 0
+    // for n=0 to N
+    // sum = sum + x[n] * cos(2*pi*k*n/N)
+    // y[k] = sum
+    // y[N-k] = sum
+
+    // Actual definition
+    //   y[k] = y_real[k] + j *y_img[k]
+    //  y_real = sumOver_n(x[n]*cos[2*pi * k *n/N ]
+    //  y_img = sumOver_n(x[n]*sin[2*pi * k *n/N ] * -1
+    // init  output mem for y_real & y_img as 0
+    //  replace this upsampling op with the output_mem_allocation op
+
+    // DEBUG_PRINT_NO_ARGS() ;
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    // iterate to result1 --not needed for now but for future reference
+    //  auto tensorType1 =
+    //  llvm::cast<RankedTensorType>(*std::next(op->result_type_begin(), 1));
+
+    // DEBUG_PRINT_NO_ARGS() ;
+    // tensorType.getShape()[0]
+    // llvm::errs() << "tensorType1.getShape()[0] " << tensorType1.getShape()[0]
+    // << " func= " << __func__ << "\n";
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    // auto memRefType2 = convertTensorToMemRef(tensorType1);
+    auto alloc_real = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
+
+    // affine.for %y = 0 to 4 {
+    //     affine.store %cst_3, %alloc_real[%y] : memref<4xf64>
+    //     affine.store %cst_3, %alloc_img[%y] : memref<4xf64>
+    // }
+    Value constant0 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+
+    // For loop -- iterate from 1 to last
+    int64_t lb = 0;
+    int64_t ub = tensorType.getShape()[0];
+    int64_t ubBy2 = (ub + 1) / 2;
+    int64_t step = 1;
+
+    // affine::AffineForOp forOp1 = rewriter.create<AffineForOp>(loc, lb, ub,
+    // step); auto iv = forOp1.getInductionVar();
+    // rewriter.setInsertionPointToStart(forOp1.getBody());
+    // rewriter.create<AffineStoreOp>(loc, constant0, alloc_real,
+    // ValueRange{iv}); rewriter.setInsertionPointAfter(forOp1);
+    // DEBUG_PRINT_NO_ARGS();
+    // for k=0
+    Value Indx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    rewriter.create<AffineStoreOp>(loc, constant0, alloc_real,
+                                   ValueRange{Indx0});
+
+    // loop for Y
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb + 1, ubBy2, step);
+    auto ivY = forOpY.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpY.getBody());
+
+    // loop for X
+    affine::AffineForOp forOpX =
+        rewriter.create<AffineForOp>(loc, lb, ub, step, ValueRange{constant0});
+    auto ivX = forOpX.getInductionVar();
+    auto getIterArg = forOpX.getBody()->getArgument(1);
+    rewriter.setInsertionPointToStart(forOpX.getBody());
+
+    // load from X, & y1 & y2
+    FFT1DRealSymmOpAdaptor fft1DRealSymmAdaptor(operands);
+    Value inputX = rewriter.create<AffineLoadOp>(
+        loc, fft1DRealSymmAdaptor.getInput(), ValueRange{ivX});
+    // Value loadYImg = rewriter.create<AffineLoadOp>(loc, alloc_img,
+    // ValueRange{ivY});
+
+    // convert index to f64
+    Value IndxY = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivY);
+    Value k =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxY);
+
+    Value IndxX = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivX);
+    Value i =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxX);
+
+    // get 2*pi * k * i / N
+    Value muli_k = rewriter.create<arith::MulFOp>(loc, k, i);
+
+    Value const2pi = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(6.28318530718));
+    Value mul2piKI = rewriter.create<arith::MulFOp>(loc, const2pi, muli_k);
+
+    // getOperand().getType()
+    // auto inputTensorType =
+    // llvm::cast<RankedTensorType>(op->getOperand(0).getType());
+    float LengthOfInput = (float)ub;
+    Value N = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(LengthOfInput));
+    // Value N = inputTensorType.getShape()[0];
+
+    Value divIndxByN = rewriter.create<arith::DivFOp>(loc, mul2piKI, N);
+
+    Value GetCos = rewriter.create<math::CosOp>(loc, divIndxByN);
+    Value xMulCos = rewriter.create<arith::MulFOp>(loc, inputX, GetCos);
+
+    // realSu
+    Value sumNext = rewriter.create<arith::AddFOp>(loc, getIterArg, xMulCos);
+    // rewriter.create<AffineStoreOp>(loc, sumNext, alloc_real,
+    // ValueRange{ivX});
+
+    // DEBUG_PRINT_NO_ARGS() ;
+    rewriter.create<AffineYieldOp>(loc, ValueRange{sumNext});
+    rewriter.setInsertionPointAfter(forOpX);
+    // forOpX->dump();
+    // store realSum at y[k]
+    rewriter.create<AffineStoreOp>(loc, forOpX.getResult(0), alloc_real,
+                                   ValueRange{ivY});
+
+    // store realSum at y[N-k]
+    AffineExpr ExprNminusK =
+        rewriter.getAffineConstantExpr(ub) - rewriter.getAffineDimExpr(0);
+    AffineMap mapNminusK = AffineMap::get(1, 0, ExprNminusK);
+
+    rewriter.create<AffineStoreOp>(loc, forOpX.getResult(0), alloc_real,
+                                   mapNminusK, ValueRange{ivY});
+
+    rewriter.setInsertionPointAfter(forOpY);
+    rewriter.replaceOp(op, alloc_real);
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: FIRFilterYSymmOptimizedOp operations
+//===----------------------------------------------------------------------===//
+struct FIRFilterYSymmOptimizedOpLowering : public ConversionPattern {
+  FIRFilterYSymmOptimizedOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::FIRFilterYSymmOptimizedOp::getOperationName(), 1,
+                          ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    // dsp.FIRFilterYSymmOptimizedOp has 2 operands -- both of type tensor f64
+
+    // Get the location of FIRFilterYSymmOptimizedOp
+    auto loc = op->getLoc();
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+    // Pseudo-code:
+    // N=lenY , M=lenX here, output is symm ie, y[n] = y[N-1-n]
+    // y[n] = x[n] conv x[-n] ie, x[M-1-n] ie, x2[n]
+    // y[n] = SumOverAllk x[k] * x2[n-k]  , 0<=k<M  , 0<=n<N
+    //      = SumOverAllk x[k] * x[M-1-(n-k)] , check for 0<=M+k-1-n<M
+
+    // code:
+    // for n=0 to (N+1)/2
+    //  sum =0
+    //  for k=0 to M
+    //  if( 0<= M+k-n-1 <M)
+    //  sum = sum + x[k] * x[M+k-n-1]
+    // return sum
+    // y[n]= sum
+    // y[N-1-n] = sum
+
+    int64_t lb = 0;
+    int64_t ub = tensorType.getShape()[0];
+    int ubBy2 = (ub + 1) / 2;
+    int64_t step = 1;
+    // DEBUG_PRINT_NO_ARGS();
+    affine::AffineForOp forOp1 =
+        rewriter.create<affine::AffineForOp>(loc, lb, ubBy2, step);
+    rewriter.setInsertionPointToStart(forOp1.getBody());
+    auto iv = forOp1.getInductionVar();
+
+    // for n=0 to N
+    //  sum = 0, temp =0
+    // for n=0 to (N+1)/2
+    //  sum =0
+    // get filter len
+    auto operandIt = op->operand_type_begin();
+    auto tensorTypeInput = llvm::cast<RankedTensorType>(*operandIt);
+    int64_t ubForInput = tensorTypeInput.getShape()[0];
+    // DEBUG_PRINT_NO_ARGS();
+    // DEBUG_PRINT_WITH_ARGS("ubForInput=", ubForInput);
+
+    // create a constant for sum
+    Value constant0 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    affine::AffineForOp forOp2 = rewriter.create<affine::AffineForOp>(
+        loc, lb, ubForInput, step, ValueRange{constant0});
+    rewriter.setInsertionPointToStart(forOp2.getBody());
+    auto iv2 = forOp2.getInductionVar();
+    // get sum
+    auto getIterArg = forOp2.getBody()->getArgument(1);
+    // DEBUG_PRINT_NO_ARGS();
+    FIRFilterYSymmOptimizedOpAdaptor firFilterYSymmOpAdaptor(operands);
+
+    // if( 0<= M+k-n-1 <M)
+    // sum = sum + x[k] * x[M+k-n-1]
+    // For M+k-n-1
+    // LowerBoundSet: M+k-n-1 >=0  ie, 2 dimensions =n & k
+    // UpperBoundSet: M+k-n-1 <= M-1 ie, n-k>=0
+
+    // LowerBound Expr: M+k-n-1 >=0 ie, M-1 + k -n >= 0
+    AffineExpr ExprLowerBound = rewriter.getAffineConstantExpr(ubForInput - 1) +
+                                rewriter.getAffineDimExpr(1) -
+                                rewriter.getAffineDimExpr(0);
+    // UpperBoundSet: M+k-n-1 <= M-1 ie, n-k>=0
+    AffineExpr ExprUpperBound =
+        rewriter.getAffineDimExpr(0) - rewriter.getAffineDimExpr(1);
+    IntegerSet setForIf =
+        IntegerSet::get(2, 0, {ExprLowerBound, ExprUpperBound}, {false, false});
+    // DEBUG_PRINT_NO_ARGS();
+
+    // if( 0<= M+k-n-1 <M)
+    Type floatType = rewriter.getF64Type();
+    auto ifOp =
+        rewriter.create<affine::AffineIfOp>(loc, TypeRange{floatType}, setForIf,
+                                            ValueRange{iv, iv2}, true /*else*/);
+    rewriter.setInsertionPointToStart(ifOp.getThenBlock());
+    // DEBUG_PRINT_NO_ARGS();
+
+    // sum = sum + x[k] * x[M+k-n-1]
+    // load x[M+k-n-1]
+    AffineMap mapMPlusKMinusNmin1 = AffineMap::get(2, 0, ExprLowerBound);
+    Value loadInputIndx2 =
+        rewriter.create<AffineLoadOp>(loc, firFilterYSymmOpAdaptor.getLhs(),
+                                      mapMPlusKMinusNmin1, ValueRange{iv, iv2});
+    rewriter.create<AffineYieldOp>(loc, ValueRange{loadInputIndx2});
+
+    // else return 0
+    rewriter.setInsertionPointToStart(ifOp.getElseBlock());
+    Value const0ForElse = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    rewriter.create<AffineYieldOp>(loc, ValueRange{const0ForElse});
+    rewriter.setInsertionPointAfter(ifOp);
+
+    // outside if
+    // Now, sum = sum + val2 * x[k]
+    Value loadX = rewriter.create<AffineLoadOp>(
+        loc, firFilterYSymmOpAdaptor.getLhs(), ValueRange{iv2});
+    // DEBUG_PRINT_NO_ARGS();
+
+    // x[k] * x[M+k-n-1]   here, val2 = x[M+k-n-1]
+    Value XMulReverseXIndx =
+        rewriter.create<arith::MulFOp>(loc, loadX, ifOp.getResult(0));
+    // sum = sum + x[k] * x[M+k-n-1]
+    Value sumNext =
+        rewriter.create<arith::AddFOp>(loc, XMulReverseXIndx, getIterArg);
+    rewriter.create<AffineYieldOp>(loc, ValueRange{sumNext});
+
+    // DEBUG_PRINT_NO_ARGS();
+    rewriter.setInsertionPointAfter(forOp2);
+    // forOp2->dump();
+    // DEBUG_PRINT_NO_ARGS();
+
+    // y[n] = sum ie, y[n] = sumNext
+    rewriter.create<AffineStoreOp>(loc, forOp2.getResult(0), alloc, iv);
+    // y[N-1-n] = sum
+    AffineExpr ExprNminus1minYn =
+        rewriter.getAffineConstantExpr(ub - 1) - rewriter.getAffineDimExpr(0);
+    AffineMap mapNminus1minYn = AffineMap::get(1, 0, ExprNminus1minYn);
+
+    rewriter.create<AffineStoreOp>(loc, forOp2.getResult(0), alloc,
+                                   mapNminus1minYn, ValueRange{iv});
+    rewriter.setInsertionPointAfter(forOp1);
+    // DEBUG_PRINT_NO_ARGS();
+
+    rewriter.replaceOp(op, alloc);
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: PaddingOp operations
+//===----------------------------------------------------------------------===//
+
+struct PaddingOpLowering : public ConversionPattern {
+  PaddingOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::PaddingOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // Pseudo-code:
+    //   y[n] = x[n]  0<=n<N
+    //   y[n] = val  N<=n < N+len
+    // ie,
+    // for i=0 to N --inputLen
+    // y[n] = x[n]
+    // for i=N to N+len
+    // y[n] = val
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // DEBUG_PRINT_NO_ARGS();
+    // construct affine loops for the input
+    PaddingOpAdaptor paddingOpAdaptor(operands);
+    Value GetPadLenOperand = op->getOperand(2);
+    dsp::ConstantOp constantOp3rdArg =
+        GetPadLenOperand.getDefiningOp<dsp::ConstantOp>();
+
+    if (!constantOp3rdArg) {
+      llvm::errs() << "Fail:padding op 3rd operand is not constant\n";
+      return failure();
+    }
+    DenseElementsAttr constant3rdValue = constantOp3rdArg.getValue();
+    ;
+    auto elements1 = constant3rdValue.getValues<FloatAttr>();
+    float Padlen = elements1[0].getValueAsDouble();
+    // DEBUG_PRINT_WITH_ARGS("Padlen is", Padlen);
+    // first from 0 <= i < N
+    auto inputType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(0).getType());
+    int64_t lb = 0;
+    int64_t ub = inputType.getShape()[0];
+    int64_t step = 1;
+
+    // DEBUG_PRINT_NO_ARGS();
+
+    // loop from 0 <= i < N
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto ivY = forOpY.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpY.getBody());
+    Value InputX =
+        rewriter.create<AffineLoadOp>(loc, paddingOpAdaptor.getInput(), ivY);
+    rewriter.create<AffineStoreOp>(loc, InputX, alloc, ivY);
+    rewriter.setInsertionPointAfter(forOpY);
+
+    // loop from N to N+PadLen
+    int64_t lb2 = ub;
+    int64_t ub2 = ub + (int64_t)Padlen;
+
+    affine::AffineForOp forOp2 =
+        rewriter.create<AffineForOp>(loc, lb2, ub2, step);
+    auto iv2 = forOp2.getInductionVar();
+    rewriter.setInsertionPointToStart(forOp2.getBody());
+    Value PaddingValue = rewriter.create<AffineLoadOp>(
+        loc, paddingOpAdaptor.getPadValue(), ValueRange{}); // getPadValue
+    rewriter.create<AffineStoreOp>(loc, PaddingValue, alloc, iv2);
+    rewriter.setInsertionPointAfter(forOp2);
+
+    // debug
+    //  forOpX->dump();
+    //  forOpY->dump();
+
+    // %cst = arith.constant 6.2831853071800001 : f64
+    // %cst_0 = arith.constant 4.600000e-01 : f64
+    // %cst_1 = arith.constant 5.400000e-01 : f64
+    // %cst_2 = arith.constant 4.000000e+00 : f64
+    // %alloc = memref.alloc() : memref<4xf64>
+    // %alloc_3 = memref.alloc() : memref<f64>
+    // affine.store %cst_2, %alloc_3[] : memref<f64>
+    // affine.for %arg0 = 0 to 4 {
+    //   %0 = arith.index_castui %arg0 : index to i32
+    //   %1 = arith.uitofp %0 : i32 to f64
+    //   %2 = arith.mulf %1, %cst : f64
+    //   %3 = arith.divf %2, %cst_2 : f64
+    //   %4 = math.cos %3 : f64
+    //   %5 = arith.mulf %4, %cst_0 : f64
+    //   %6 = arith.subf %cst_1, %5 : f64
+    //   affine.store %6, %alloc[%arg0] : memref<4xf64>
+    // }
+
+    // }
+    // }
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: ReverseInputOp operations
+//===----------------------------------------------------------------------===//
+
+struct ReverseInputOpLowering : public ConversionPattern {
+  ReverseInputOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::ReverseInputOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // Pseudo-code:
+    // output = 0
+    // iterate for len = 0 to N
+    //   output[i] = a[N-1-i]
+
+    // DEBUG_PRINT_NO_ARGS();
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
+
+    // For loop
+    ReverseInputOpAdaptor reverseInputOpAdaptor(operands);
+    // DEBUG_PRINT_NO_ARGS() ;
+
+    int64_t lb = 0;
+    int64_t ub = tensorType.getShape()[0];
+    int64_t step = 1;
+
+    // for loop
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto iv = forOp1.getInductionVar();
+
+    rewriter.setInsertionPointToStart(forOp1.getBody());
+
+    // DEBUG_PRINT_NO_ARGS() ;
+    //: N-1 - i
+    AffineExpr reverseIndxExpr =
+        rewriter.getAffineConstantExpr(ub - 1) - rewriter.getAffineDimExpr(0);
+
+    AffineMap addMap2 = AffineMap::get(1, 0, reverseIndxExpr);
+    // load x[N-1-i]
+    // DEBUG_PRINT_NO_ARGS();
+    Value loadInputFrmReverseIndx = rewriter.create<AffineLoadOp>(
+        loc, reverseInputOpAdaptor.getInput(), addMap2, ValueRange{iv});
+
+    // store the result at indx i
+    rewriter.create<AffineStoreOp>(loc, loadInputFrmReverseIndx, alloc, iv);
+
+    rewriter.setInsertionPointAfter(forOp1);
+    // debug
+    //  forOp1->dump();
+    //    affine.for %arg0 = 0 to 5 {
+    //    %0 = affine.load %alloc_6[%arg0] : memref<5xf64>
+    //    %1 = arith.mulf %0, %0 : f64
+    //    affine.store %1, %alloc_5[%arg0] : memref<5xf64>
+    //  }
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: LengthOp operations
+//===----------------------------------------------------------------------===//
+struct LengthOpLowering : public ConversionPattern {
+  LengthOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::LengthOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // Pseudo-code:
+    //   output = len(input)
+
+    // DEBUG_PRINT_NO_ARGS();
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    // iterate to result1 --not needed for now but for future reference
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    auto inputType = llvm::dyn_cast<RankedTensorType>(
+        op->getOperand(0).getType()); // op->getOperand(
+
+    int64_t ub = inputType.getShape()[0];
+    Value constantUb = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(ub));
+
+    // DEBUG_PRINT_WITH_ARGS("\nCheck for index --here");
+    // load from X, using 2nd operand as index
+    //  DEBUG_PRINT_WITH_ARGS("Indx is" , SecondValueInt);
+    Value constantIndx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    rewriter.create<AffineStoreOp>(loc, constantUb, alloc,
+                                   ValueRange{constantIndx0});
+
+    // debug
+    //  forOpX->dump();
+    //  forOpY->dump();
+    //  affine.store %cst, %alloc_10[] : memref<f64>
+    //  %0 = affine.load %alloc_11[4] : memref<10xf64>
+    //  affine.store %0, %alloc[0] : memref<1xf64>
+
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: FFTRealOp operations
+//===----------------------------------------------------------------------===//
+
+struct FFTRealOpLowering : public ConversionPattern {
+  FFTRealOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::FFTRealOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    auto memrefType = convertTensorToMemRef(tensorType);
+
+    auto alloc_temp_real = insertAllocAndDealloc(memrefType, loc, rewriter);
+    auto alloc_temp_imag = insertAllocAndDealloc(memrefType, loc, rewriter);
+
+    FFTRealOpAdaptor fftRealOpAdaptor(operands);
+
+    auto input = fftRealOpAdaptor.getLhs();
+    auto lb = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    auto ub =
+        rewriter.create<arith::ConstantIndexOp>(loc, tensorType.getShape()[0]);
+    auto step = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+
+    // alloc memory for reversed and dealloc when not required
+    auto alloc_reversed_real = insertAllocAndDealloc(memrefType, loc, rewriter);
+    auto alloc_reversed_imag = insertAllocAndDealloc(memrefType, loc, rewriter);
+
+    // bits needed for bit  reversal
+    auto ubInt =
+        rewriter.create<arith::IndexCastOp>(loc, rewriter.getI64Type(), ub);
+    auto ubFloat =
+        rewriter.create<arith::SIToFPOp>(loc, rewriter.getF64Type(), ubInt);
+    auto bitsNeededFloat = rewriter.create<math::Log2Op>(loc, ubFloat);
+    auto bitsNeededInt = rewriter.create<arith::FPToSIOp>(
+        loc, rewriter.getI64Type(), bitsNeededFloat);
+    auto bitsNeeded = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getIndexType(), bitsNeededInt);
+
+    // bit reversal
+    auto bitReversalLoop = rewriter.create<scf::ForOp>(loc, lb, ub, step);
+    rewriter.setInsertionPointToStart(bitReversalLoop.getBody());
+    auto i = bitReversalLoop.getInductionVar();
+    auto iInt = rewriter.create<arith::IndexCastOp>(loc, rewriter.getI64Type(),
+                                                    i); // check here
+
+    // Calculate reversed index
+    // auto zero = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    auto initialRevIndex = rewriter.create<arith::ConstantIntOp>(loc, 0, 64);
+
+    auto innerLoop = rewriter.create<scf::ForOp>(loc, lb, bitsNeeded, step,
+                                                 ValueRange{initialRevIndex});
+    rewriter.setInsertionPointToStart(innerLoop.getBody());
+    auto j = innerLoop.getInductionVar();
+    auto jInt =
+        rewriter.create<arith::IndexCastOp>(loc, rewriter.getI64Type(), j);
+    auto carriedRevIndex = innerLoop.getRegionIterArgs()[0];
+
+    auto bitMask = rewriter.create<arith::ShLIOp>(
+        loc, rewriter.create<arith::ConstantIntOp>(loc, 1, 64), jInt);
+    auto iAndMask = rewriter.create<arith::AndIOp>(loc, iInt, bitMask);
+    auto isNonZero = rewriter.create<arith::CmpIOp>(
+        loc, arith::CmpIPredicate::ne, iAndMask,
+        rewriter.create<arith::ConstantIntOp>(loc, 0, 64));
+    auto shiftAmount = rewriter.create<arith::SubIOp>(
+        loc, rewriter.create<arith::SubIOp>(loc, bitsNeeded, j),
+        rewriter.create<arith::ConstantIndexOp>(loc, 1));
+    auto shiftAmountI64 = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getI64Type(), shiftAmount);
+    auto bitToSet = rewriter.create<arith::ShLIOp>(
+        loc, rewriter.create<arith::ConstantIntOp>(loc, 1, 64), shiftAmountI64);
+
+    // Update newRevIndex using a select operation
+    auto updatedRevIndex = rewriter.create<arith::OrIOp>(
+        loc, carriedRevIndex,
+        rewriter.create<arith::SelectOp>(
+            loc, isNonZero, bitToSet,
+            rewriter.create<arith::ConstantIntOp>(loc, 0, 64)));
+
+    // Yield the updated value to carry it forward
+    rewriter.create<scf::YieldOp>(loc, ValueRange{updatedRevIndex});
+
+    // auto revIndex = rewriter.create<arith::IndexCastOp>(loc,
+    // rewriter.getIndexType(), newRevIndex);
+
+    rewriter.setInsertionPointAfter(innerLoop);
+
+    auto finalRevIndex = innerLoop.getResult(0);
+    auto revIndex = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getIndexType(), finalRevIndex);
+
+    // Load from alloc_temp and store in alloc_reversed
+    auto realValue = rewriter.create<memref::LoadOp>(loc, input, ValueRange{i});
+    auto imagValue = rewriter.create<arith::ConstantFloatOp>(
+        loc, llvm::APFloat(0.0), rewriter.getF64Type());
+    rewriter.create<memref::StoreOp>(loc, realValue, alloc_reversed_real,
+                                     ValueRange{revIndex});
+    rewriter.create<memref::StoreOp>(loc, imagValue, alloc_reversed_imag,
+                                     ValueRange{revIndex});
+
+    rewriter.setInsertionPointAfter(bitReversalLoop);
+
+    // Cooley-Tukey FFT implementation
+    auto N = tensorType.getShape()[0];
+    auto stages = static_cast<int64_t>(std::log2(N));
+    auto stagesValue = rewriter.create<arith::ConstantIndexOp>(loc, stages);
+
+    // Constants for complex arithmetic
+    auto pi = rewriter.create<arith::ConstantFloatOp>(loc, llvm::APFloat(M_PI),
+                                                      rewriter.getF64Type());
+    auto neg2 = rewriter.create<arith::ConstantFloatOp>(
+        loc, llvm::APFloat(-2.0), rewriter.getF64Type());
+
+    auto fftLoop = rewriter.create<scf::ForOp>(loc, lb, stagesValue, step);
+    rewriter.setInsertionPointToStart(fftLoop.getBody());
+    auto stage = fftLoop.getInductionVar();
+    auto half_size = rewriter.create<arith::ShLIOp>(
+        loc, rewriter.create<arith::ConstantIndexOp>(loc, 1), stage);
+    auto full_size = rewriter.create<arith::ShLIOp>(
+        loc, half_size, rewriter.create<arith::ConstantIndexOp>(loc, 1));
+
+    auto outerLoop = rewriter.create<scf::ForOp>(loc, lb, ub, full_size);
+    rewriter.setInsertionPointToStart(outerLoop.getBody());
+    auto start = outerLoop.getInductionVar();
+
+    auto butterflyLoop = rewriter.create<scf::ForOp>(loc, lb, half_size, step);
+    rewriter.setInsertionPointToStart(butterflyLoop.getBody());
+    auto k = butterflyLoop.getInductionVar();
+
+    // Calculate indices for even and odd elements
+    auto even_index = rewriter.create<arith::AddIOp>(loc, start, k);
+    auto odd_index = rewriter.create<arith::AddIOp>(loc, even_index, half_size);
+
+    // Calculate twiddle factor
+    auto k_i64 =
+        rewriter.create<arith::IndexCastOp>(loc, rewriter.getI64Type(), k);
+    auto k_f64 =
+        rewriter.create<arith::SIToFPOp>(loc, rewriter.getF64Type(), k_i64);
+    auto full_size_i64 = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getI64Type(), full_size);
+    auto full_size_f64 = rewriter.create<arith::SIToFPOp>(
+        loc, rewriter.getF64Type(), full_size_i64);
+    auto angle_div = rewriter.create<arith::DivFOp>(loc, k_f64, full_size_f64);
+    auto angle_mul = rewriter.create<arith::MulFOp>(loc, neg2, angle_div);
+    auto angle_final = rewriter.create<arith::MulFOp>(loc, pi, angle_mul);
+    auto cos = rewriter.create<math::CosOp>(loc, angle_final);
+    auto sin = rewriter.create<math::SinOp>(loc, angle_final);
+
+    // Load odd value
+    auto odd_real = rewriter.create<memref::LoadOp>(loc, alloc_reversed_real,
+                                                    ValueRange{odd_index});
+    auto odd_imag = rewriter.create<memref::LoadOp>(loc, alloc_reversed_imag,
+                                                    ValueRange{odd_index});
+
+    // Multiply by twiddle factor
+    auto odd_real_cos = rewriter.create<arith::MulFOp>(loc, odd_real, cos);
+    auto odd_imag_sin = rewriter.create<arith::MulFOp>(loc, odd_imag, sin);
+    auto t_real =
+        rewriter.create<arith::SubFOp>(loc, odd_real_cos, odd_imag_sin);
+
+    auto odd_real_sin = rewriter.create<arith::MulFOp>(loc, odd_real, sin);
+    auto odd_imag_cos = rewriter.create<arith::MulFOp>(loc, odd_imag, cos);
+    auto t_imag =
+        rewriter.create<arith::AddFOp>(loc, odd_real_sin, odd_imag_cos);
+
+    // Load even value
+    auto even_real = rewriter.create<memref::LoadOp>(loc, alloc_reversed_real,
+                                                     ValueRange{even_index});
+    auto even_imag = rewriter.create<memref::LoadOp>(loc, alloc_reversed_imag,
+                                                     ValueRange{even_index});
+    // Butterfly operation
+    auto new_even_real = rewriter.create<arith::AddFOp>(loc, even_real, t_real);
+    auto new_even_imag = rewriter.create<arith::AddFOp>(loc, even_imag, t_imag);
+    auto new_odd_real = rewriter.create<arith::SubFOp>(loc, even_real, t_real);
+    auto new_odd_imag = rewriter.create<arith::SubFOp>(loc, even_imag, t_imag);
+
+    // Store results
+    rewriter.create<memref::StoreOp>(loc, new_even_real, alloc_reversed_real,
+                                     ValueRange{even_index});
+    rewriter.create<memref::StoreOp>(loc, new_even_imag, alloc_reversed_imag,
+                                     ValueRange{even_index});
+    rewriter.create<memref::StoreOp>(loc, new_odd_real, alloc_reversed_real,
+                                     ValueRange{odd_index});
+    rewriter.create<memref::StoreOp>(loc, new_odd_imag, alloc_reversed_imag,
+                                     ValueRange{odd_index});
+
+    // replace the operation with the final value
+    rewriter.replaceOp(op, alloc_reversed_real);
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: FFTImagOp operations
+//===----------------------------------------------------------------------===//
+
+struct FFTImagOpLowering : public ConversionPattern {
+  // constructor takes the mlir context and the operation as inputs
+  FFTImagOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::FFTImagOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    auto memrefType = convertTensorToMemRef(tensorType);
+
+    auto alloc_temp_real = insertAllocAndDealloc(memrefType, loc, rewriter);
+    auto alloc_temp_imag = insertAllocAndDealloc(memrefType, loc, rewriter);
+
+    FFTRealOpAdaptor fftRealOpAdaptor(operands);
+
+    auto input = fftRealOpAdaptor.getLhs();
+    auto lb = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    auto ub =
+        rewriter.create<arith::ConstantIndexOp>(loc, tensorType.getShape()[0]);
+    auto step = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+
+    // alloc memory for reversed and dealloc when not required
+    auto alloc_reversed_real = insertAllocAndDealloc(memrefType, loc, rewriter);
+    auto alloc_reversed_imag = insertAllocAndDealloc(memrefType, loc, rewriter);
+
+    // bits needed for bit  reversal
+    auto ubInt =
+        rewriter.create<arith::IndexCastOp>(loc, rewriter.getI64Type(), ub);
+    auto ubFloat =
+        rewriter.create<arith::SIToFPOp>(loc, rewriter.getF64Type(), ubInt);
+    auto bitsNeededFloat = rewriter.create<math::Log2Op>(loc, ubFloat);
+    auto bitsNeededInt = rewriter.create<arith::FPToSIOp>(
+        loc, rewriter.getI64Type(), bitsNeededFloat);
+    auto bitsNeeded = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getIndexType(), bitsNeededInt);
+
+    // bit reversal
+    auto bitReversalLoop = rewriter.create<scf::ForOp>(loc, lb, ub, step);
+    rewriter.setInsertionPointToStart(bitReversalLoop.getBody());
+    auto i = bitReversalLoop.getInductionVar();
+    auto iInt = rewriter.create<arith::IndexCastOp>(loc, rewriter.getI64Type(),
+                                                    i); // check here
+
+    // Calculate reversed index
+    // auto zero = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    auto initialRevIndex = rewriter.create<arith::ConstantIntOp>(loc, 0, 64);
+
+    auto innerLoop = rewriter.create<scf::ForOp>(loc, lb, bitsNeeded, step,
+                                                 ValueRange{initialRevIndex});
+    rewriter.setInsertionPointToStart(innerLoop.getBody());
+    auto j = innerLoop.getInductionVar();
+    auto jInt =
+        rewriter.create<arith::IndexCastOp>(loc, rewriter.getI64Type(), j);
+    auto carriedRevIndex = innerLoop.getRegionIterArgs()[0];
+
+    auto bitMask = rewriter.create<arith::ShLIOp>(
+        loc, rewriter.create<arith::ConstantIntOp>(loc, 1, 64), jInt);
+    auto iAndMask = rewriter.create<arith::AndIOp>(loc, iInt, bitMask);
+    auto isNonZero = rewriter.create<arith::CmpIOp>(
+        loc, arith::CmpIPredicate::ne, iAndMask,
+        rewriter.create<arith::ConstantIntOp>(loc, 0, 64));
+    auto shiftAmount = rewriter.create<arith::SubIOp>(
+        loc, rewriter.create<arith::SubIOp>(loc, bitsNeeded, j),
+        rewriter.create<arith::ConstantIndexOp>(loc, 1));
+    auto shiftAmountI64 = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getI64Type(), shiftAmount);
+    auto bitToSet = rewriter.create<arith::ShLIOp>(
+        loc, rewriter.create<arith::ConstantIntOp>(loc, 1, 64), shiftAmountI64);
+
+    // Update newRevIndex using a select operation
+    auto updatedRevIndex = rewriter.create<arith::OrIOp>(
+        loc, carriedRevIndex,
+        rewriter.create<arith::SelectOp>(
+            loc, isNonZero, bitToSet,
+            rewriter.create<arith::ConstantIntOp>(loc, 0, 64)));
+
+    // Yield the updated value to carry it forward
+    rewriter.create<scf::YieldOp>(loc, ValueRange{updatedRevIndex});
+
+    // auto revIndex = rewriter.create<arith::IndexCastOp>(loc,
+    // rewriter.getIndexType(), newRevIndex);
+
+    rewriter.setInsertionPointAfter(innerLoop);
+
+    auto finalRevIndex = innerLoop.getResult(0);
+    auto revIndex = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getIndexType(), finalRevIndex);
+
+    // Load from alloc_temp and store in alloc_reversed
+    auto realValue = rewriter.create<memref::LoadOp>(loc, input, ValueRange{i});
+    auto imagValue = rewriter.create<arith::ConstantFloatOp>(
+        loc, llvm::APFloat(0.0), rewriter.getF64Type());
+    rewriter.create<memref::StoreOp>(loc, realValue, alloc_reversed_real,
+                                     ValueRange{revIndex});
+    rewriter.create<memref::StoreOp>(loc, imagValue, alloc_reversed_imag,
+                                     ValueRange{revIndex});
+
+    rewriter.setInsertionPointAfter(bitReversalLoop);
+
+    // Cooley-Tukey FFT implementation
+    auto N = tensorType.getShape()[0];
+    auto stages = static_cast<int64_t>(std::log2(N));
+    auto stagesValue = rewriter.create<arith::ConstantIndexOp>(loc, stages);
+
+    // Constants for complex arithmetic
+    auto pi = rewriter.create<arith::ConstantFloatOp>(loc, llvm::APFloat(M_PI),
+                                                      rewriter.getF64Type());
+    auto neg2 = rewriter.create<arith::ConstantFloatOp>(
+        loc, llvm::APFloat(-2.0), rewriter.getF64Type());
+
+    auto fftLoop = rewriter.create<scf::ForOp>(loc, lb, stagesValue, step);
+    rewriter.setInsertionPointToStart(fftLoop.getBody());
+    auto stage = fftLoop.getInductionVar();
+    auto half_size = rewriter.create<arith::ShLIOp>(
+        loc, rewriter.create<arith::ConstantIndexOp>(loc, 1), stage);
+    auto full_size = rewriter.create<arith::ShLIOp>(
+        loc, half_size, rewriter.create<arith::ConstantIndexOp>(loc, 1));
+
+    auto outerLoop = rewriter.create<scf::ForOp>(loc, lb, ub, full_size);
+    rewriter.setInsertionPointToStart(outerLoop.getBody());
+    auto start = outerLoop.getInductionVar();
+
+    auto butterflyLoop = rewriter.create<scf::ForOp>(loc, lb, half_size, step);
+    rewriter.setInsertionPointToStart(butterflyLoop.getBody());
+    auto k = butterflyLoop.getInductionVar();
+
+    // Calculate indices for even and odd elements
+    auto even_index = rewriter.create<arith::AddIOp>(loc, start, k);
+    auto odd_index = rewriter.create<arith::AddIOp>(loc, even_index, half_size);
+
+    // Calculate twiddle factor
+    auto k_i64 =
+        rewriter.create<arith::IndexCastOp>(loc, rewriter.getI64Type(), k);
+    auto k_f64 =
+        rewriter.create<arith::SIToFPOp>(loc, rewriter.getF64Type(), k_i64);
+    auto full_size_i64 = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getI64Type(), full_size);
+    auto full_size_f64 = rewriter.create<arith::SIToFPOp>(
+        loc, rewriter.getF64Type(), full_size_i64);
+    auto angle_div = rewriter.create<arith::DivFOp>(loc, k_f64, full_size_f64);
+    auto angle_mul = rewriter.create<arith::MulFOp>(loc, neg2, angle_div);
+    auto angle_final = rewriter.create<arith::MulFOp>(loc, pi, angle_mul);
+    auto cos = rewriter.create<math::CosOp>(loc, angle_final);
+    auto sin = rewriter.create<math::SinOp>(loc, angle_final);
+
+    // Load odd value
+    auto odd_real = rewriter.create<memref::LoadOp>(loc, alloc_reversed_real,
+                                                    ValueRange{odd_index});
+    auto odd_imag = rewriter.create<memref::LoadOp>(loc, alloc_reversed_imag,
+                                                    ValueRange{odd_index});
+
+    // Multiply by twiddle factor
+    auto odd_real_cos = rewriter.create<arith::MulFOp>(loc, odd_real, cos);
+    auto odd_imag_sin = rewriter.create<arith::MulFOp>(loc, odd_imag, sin);
+    auto t_real =
+        rewriter.create<arith::SubFOp>(loc, odd_real_cos, odd_imag_sin);
+
+    auto odd_real_sin = rewriter.create<arith::MulFOp>(loc, odd_real, sin);
+    auto odd_imag_cos = rewriter.create<arith::MulFOp>(loc, odd_imag, cos);
+    auto t_imag =
+        rewriter.create<arith::AddFOp>(loc, odd_real_sin, odd_imag_cos);
+
+    // Load even value
+    auto even_real = rewriter.create<memref::LoadOp>(loc, alloc_reversed_real,
+                                                     ValueRange{even_index});
+    auto even_imag = rewriter.create<memref::LoadOp>(loc, alloc_reversed_imag,
+                                                     ValueRange{even_index});
+    // Butterfly operation
+    auto new_even_real = rewriter.create<arith::AddFOp>(loc, even_real, t_real);
+    auto new_even_imag = rewriter.create<arith::AddFOp>(loc, even_imag, t_imag);
+    auto new_odd_real = rewriter.create<arith::SubFOp>(loc, even_real, t_real);
+    auto new_odd_imag = rewriter.create<arith::SubFOp>(loc, even_imag, t_imag);
+
+    // Store results
+    rewriter.create<memref::StoreOp>(loc, new_even_real, alloc_reversed_real,
+                                     ValueRange{even_index});
+    rewriter.create<memref::StoreOp>(loc, new_even_imag, alloc_reversed_imag,
+                                     ValueRange{even_index});
+    rewriter.create<memref::StoreOp>(loc, new_odd_real, alloc_reversed_real,
+                                     ValueRange{odd_index});
+    rewriter.create<memref::StoreOp>(loc, new_odd_imag, alloc_reversed_imag,
+                                     ValueRange{odd_index});
+
+    // replace the operation with the final value
+    rewriter.replaceOp(op, alloc_reversed_imag);
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: FIRFilterResSymmOptimizedOp operations
+//===----------------------------------------------------------------------===//
+struct FIRFilterResSymmOptimizedOpLowering : public ConversionPattern {
+  FIRFilterResSymmOptimizedOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::FIRFilterResSymmOptimizedOp::getOperationName(),
+                          1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    // dsp.FIRFilterResSymmOptimizedOp has 2 operands -- both of type tensor f64
+
+    // Get the location of FIRFilterResSymmOptimizedOp
+    auto loc = op->getLoc();
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+    // Pseudo-Code
+    // y[n] = sum(h[k] .{ x[n-k] + x[n-(L-1-k)]}) + h[L-1/2].x[n-(L-1)/2] , k=0
+    // to L-1/2
+    //  N = lenY , M = lenX ,  L = lenH
+    // for n=0 to N
+    //  sum = 0, temp =0
+    //  for k = 0 to L-1/2
+    // if 0 <= n-k < M
+    // val1 = x[n-k] else, val1 = 0
+    // if 0 <= n+k - (L-1) < M
+    // val2 = x[n+k-(L-1)] else, val2 = 0
+    // temp = val1 + val2
+    //  sum = sum + h[k] . temp
+
+    // middle-one
+    //  if 0 <= n - (L-1)/2 < M
+    //  sum2 = sum + h[L-1/2] . x[n-(n - (L-1)/2)]
+    // y[n] = sum2
+
+    int64_t lb = 0;
+    int64_t ub = tensorType.getShape()[0];
+    int64_t step = 1;
+    // DEBUG_PRINT_NO_ARGS();
+    affine::AffineForOp forOp1 =
+        rewriter.create<affine::AffineForOp>(loc, lb, ub, step);
+    rewriter.setInsertionPointToStart(forOp1.getBody());
+    auto iv = forOp1.getInductionVar();
+
+    // for n=0 to N
+    //  sum = 0, temp =0
+    // get filter len
+    //  auto tensorTypeFilter =
+    //  llvm::cast<RankedTensorType>((*op->getOperand(1))); //operand_type_end
+    //  auto tensorTypeFilter =
+    //  llvm::cast<RankedTensorType>((*op->operand_type_begin()));
+    auto operandIt = op->operand_type_begin();
+    auto tensorTypeInput = llvm::cast<RankedTensorType>(*operandIt);
+    int64_t ubForInput = tensorTypeInput.getShape()[0];
+    // get second operand
+    operandIt = operandIt + 1;
+
+    // auto tensorTypeFilter =
+    // llvm::cast<RankedTensorType>((*op->operand_type_begin())); //operandIt
+    auto tensorTypeFilter = llvm::cast<RankedTensorType>(*operandIt);
+    int64_t ubForFilter = tensorTypeFilter.getShape()[0];
+    // DEBUG_PRINT_NO_ARGS();
+    // llvm::errs() << "ubForFilter= " << ubForFilter << "\n";
+    // create a constant for sum
+    Value constant0 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    affine::AffineForOp forOp2 = rewriter.create<affine::AffineForOp>(
+        loc, lb, ubForFilter / 2, step, ValueRange{constant0});
+    rewriter.setInsertionPointToStart(forOp2.getBody());
+    auto iv2 = forOp2.getInductionVar();
+
+    auto getIterArg =
+        forOp2.getBody()->getArgument(1); // forOp1.getIterOperands();
+    // DEBUG_PRINT_NO_ARGS();
+    FIRFilterResSymmOptimizedOpAdaptor firFilterResSymmOpAdaptor(operands);
+
+    // if 0 <= n-k < M
+    // val1 = x[n-k] else, val1 = 0
+    // For n-k
+    // if 0 <= n-k < M or, 0 <= n-k <= M -1
+    AffineExpr d0, d1, s0, s1;
+    bindDims(rewriter.getContext(), d0, d1);
+    AffineExpr ExprNMinusK = d0 - d1;
+    AffineMap mapNMinusK = AffineMap::get(2, 0, ExprNMinusK);
+    // n-k <= M -1 or, n-k-(M-1) <= 0
+    bindSymbols(rewriter.getContext(), s0, s1);
+    Value constantMMinus1Indx =
+        rewriter.create<arith::ConstantIndexOp>(loc, ubForInput - 1);
+
+    AffineExpr ExprNMinusKMinusMPlus1 = s0 - d0 + d1;
+    IntegerSet setForIf = IntegerSet::get(
+        2, 1, {ExprNMinusK, ExprNMinusKMinusMPlus1}, {false, false});
+    // DEBUG_PRINT_NO_ARGS();
+
+    // if 0 <= n-k <= M -1
+    // use typeRange too:
+    Type floatType = rewriter.getF64Type();
+    //  if n-k >= 0 && n-k <= M -1 or, M-1 -n + k >= 0
+    auto ifOp = rewriter.create<affine::AffineIfOp>(
+        loc, TypeRange{floatType}, setForIf,
+        ValueRange{iv, iv2, constantMMinus1Indx}, true /*else*/);
+    rewriter.setInsertionPointToStart(ifOp.getThenBlock());
+
+    // val1 = x[n-k] else, val1 = 0
+    // load x[n-k]
+    // DEBUG_PRINT_NO_ARGS();
+    Value loadInput =
+        rewriter.create<AffineLoadOp>(loc, firFilterResSymmOpAdaptor.getLhs(),
+                                      mapNMinusK, ValueRange{iv, iv2});
+    rewriter.create<AffineYieldOp>(loc, ValueRange{loadInput});
+    // else block
+    rewriter.setInsertionPointToStart(ifOp.getElseBlock());
+    Value const0ForElse = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    rewriter.create<AffineYieldOp>(loc, ValueRange{const0ForElse});
+    rewriter.setInsertionPointAfter(ifOp);
+
+    // if 0 <= n+k - (L-1) < M
+    // val2 = x[n+k-(L-1)] else, val2 = 0
+    // val2 lower bound
+    //  AffineExpr ExprNMinKMinLPlus1 = d0 - d1 - s0; //s0 = (L-1) => -s0 = -L+1
+    //  AffineExpr ExprLowerBoundVal2 = d0 - d1 - s0; //s0 = (L-1) => -s0 = -L+1
+    // Val2 LowerBound: n+k - (L-1) >= 0
+    AffineExpr ExprLowerBoundVal2 =
+        rewriter.getAffineDimExpr(0) + rewriter.getAffineDimExpr(1) -
+        rewriter.getAffineConstantExpr(ubForFilter - 1);
+    // Val2 UpperBound: n+k - (L-1) <= M -1 ie, M - 1 + L -1 -k -n >= 0 ie,
+    // (M+L-2) - k -n >= 0
+    //  AffineExpr ExprUpperBoundVal2 = s0 + s1 + d1 - d0; //s1 = M+L-2 = L-1 +
+    //  M -1
+    AffineExpr ExprUpperBoundVal2 =
+        rewriter.getAffineConstantExpr(ubForInput + ubForFilter - 2) -
+        rewriter.getAffineDimExpr(1) - rewriter.getAffineDimExpr(0);
+    // s0 = L -1
+    //  Value s0LMin1Indx = rewriter.create<arith::ConstantIndexOp>(loc,
+    //  ubForFilter - 1); s1 = M + L -2 for val2 upperBound Value
+    //  s1MPlusLPlus2Indx = rewriter.create<arith::ConstantIndexOp>(loc,
+    //  ubForInput + ubForFilter - 2); Value s1MMin1Indx =
+    //  rewriter.create<arith::ConstantIndexOp>(loc, ubForInput - 1);
+
+    IntegerSet setForIf2 = IntegerSet::get(
+        2, 0, {ExprLowerBoundVal2, ExprUpperBoundVal2}, {false, false});
+
+    auto ifOp2 = rewriter.create<affine::AffineIfOp>(
+        loc, TypeRange{floatType}, setForIf2, ValueRange{iv, iv2},
+        true /*else*/);
+    rewriter.setInsertionPointToStart(ifOp2.getThenBlock());
+
+    // val2 = x[n+k-(L-1)] else, val2 = 0
+    AffineMap addMap2 = AffineMap::get(2, 0, ExprLowerBoundVal2);
+    // load x[n+k-(L-1)]
+    // DEBUG_PRINT_NO_ARGS();
+    Value loadInputForVal2 = rewriter.create<AffineLoadOp>(
+        loc, firFilterResSymmOpAdaptor.getLhs(), addMap2, ValueRange{iv, iv2});
+    rewriter.create<AffineYieldOp>(loc, ValueRange{loadInputForVal2});
+    // else block
+    rewriter.setInsertionPointToStart(ifOp2.getElseBlock());
+    Value const0ForElse2 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    rewriter.create<AffineYieldOp>(loc, ValueRange{const0ForElse2});
+    rewriter.setInsertionPointAfter(ifOp2);
+
+    // temp = val1 + val2
+    //  sum = sum + h[k] . temp
+
+    Value Val1Plus2 = rewriter.create<arith::AddFOp>(loc, ifOp.getResult(0),
+                                                     ifOp2.getResult(0));
+
+    // load filter and then mult and then sum
+    Value loadFilter = rewriter.create<affine::AffineLoadOp>(
+        loc, firFilterResSymmOpAdaptor.getRhs(), iv2);
+
+    Value filterMulInput =
+        rewriter.create<arith::MulFOp>(loc, Val1Plus2, loadFilter);
+    Value sumNext =
+        rewriter.create<arith::AddFOp>(loc, filterMulInput, getIterArg);
+    rewriter.create<AffineYieldOp>(loc, ValueRange{sumNext});
+    // rewriter.setInsertionPointToEnd(forOp2->getBlock());
+    rewriter.setInsertionPointAfter(forOp2);
+    // DEBUG_PRINT_NO_ARGS();
+    // Middle - point
+    // if 0 <= n - (L-1)/2 < M
+    // sum2 = sum + h[L-1/2] . x[n-(L-1)/2)]
+    // y[n] = sum2
+
+    // if 0 <= n - (L-1)/2 < M
+    // AffineExpr ExprLowerBoundVal3 = d0 - s0; //s0 = (L-1)/2
+    // AffineExpr ExprUpperBoundVal3 = d0 - s1; //s1 = M+ (L-1)/2
+    int64_t midFilterLen = (ubForFilter - 1) / 2;
+    AffineExpr ExprLowerBoundVal3 =
+        rewriter.getAffineDimExpr(0) -
+        rewriter.getAffineConstantExpr(midFilterLen);
+    // UpperBound: n - (L-1)/2 <= M - 1 ie, M-1 + mid - n
+    AffineExpr ExprUpperBoundVal3 =
+        rewriter.getAffineConstantExpr(ubForInput + midFilterLen - 1) -
+        rewriter.getAffineDimExpr(0);
+
+    AffineMap addMap3 = AffineMap::get(1, 0, ExprLowerBoundVal3);
+
+    IntegerSet setForIf3 = IntegerSet::get(
+        1, 0, {ExprLowerBoundVal3, ExprUpperBoundVal3}, {false, false});
+
+    auto ifOp3 = rewriter.create<affine::AffineIfOp>(
+        loc, TypeRange{floatType}, setForIf3, ValueRange{iv}, true /*else*/);
+    rewriter.setInsertionPointToStart(ifOp3.getThenBlock());
+
+    // val3 = x[n-(L-1)/2)] else, val3 = 0
+    // load x[n-(L-1)/2)]
+    // DEBUG_PRINT_NO_ARGS();
+    Value loadInputForVal3 = rewriter.create<AffineLoadOp>(
+        loc, firFilterResSymmOpAdaptor.getLhs(), addMap3, ValueRange{iv});
+    rewriter.create<AffineYieldOp>(loc, ValueRange{loadInputForVal3});
+    // else block
+    rewriter.setInsertionPointToStart(ifOp3.getElseBlock());
+    Value const0ForElse3 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    rewriter.create<AffineYieldOp>(loc, ValueRange{const0ForElse3});
+    rewriter.setInsertionPointAfter(ifOp3);
+
+    // sum2 = sum + h[L-1/2] . x[n-(L-1)/2)]
+    //  y[n] = sum2
+    // load filter and then mult and then sum
+    Value midFilterLenIndx =
+        rewriter.create<arith::ConstantIndexOp>(loc, midFilterLen);
+
+    Value loadFilterMid = rewriter.create<affine::AffineLoadOp>(
+        loc, firFilterResSymmOpAdaptor.getRhs(), midFilterLenIndx);
+    Value filterMulInput2 =
+        rewriter.create<arith::MulFOp>(loc, ifOp3.getResult(0), loadFilterMid);
+    Value sum2 = rewriter.create<arith::AddFOp>(loc, filterMulInput2,
+                                                forOp2.getResult(0));
+    // rewriter.create<AffineStoreOp>(loc, forOp2.getResult(0) , alloc, iv);
+    rewriter.create<AffineStoreOp>(loc, sum2, alloc, iv);
+    rewriter.setInsertionPointAfter(forOp1);
+    // DEBUG_PRINT_NO_ARGS();
+    // ifOp->dump();
+    rewriter.replaceOp(op, alloc);
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: RunLenEncodingOp operations
+
+//===----------------------------------------------------------------------===//
+
+#define TryWhileLoop 0
+#define TryLoadStoreForWhile 0
+#define TryPassIterIndex 0 // Not working
+#define TryScf 0
+#define TryRLE 1
+struct RunLenEncodingOpLowering : public ConversionPattern {
+  RunLenEncodingOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::RunLenEncodingOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // Pseudo-code:
+    //   y_rle[i] =  x[i] , if x[i] != x[i-1] , 1<=i<n
+    //  CountOfXi , at n<=i < 2n -1
+
+    // steps:
+    //  count = 1 , y[0] = x[0] , k = 0
+    //  for i=1 to len/2
+    //  load prev = a[i-1] , current = a[i]
+    //  if prev == current
+    //  count = count + 1
+    // else
+    // store count at index k + N/2
+    // y[k] = current
+    // y[k + N/2] = count
+    // count = 1 and k = k+1
+    // if count > 1 ie, for last element
+    //  store the count value at k + N/2
+
+    // DEBUG_PRINT_NO_ARGS();
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    auto tensorType1 = RankedTensorType::get({1}, rewriter.getIndexType());
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto memRefType2 = convertTensorToMemRef(tensorType1);
+
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+    auto allocK = insertAllocAndDealloc(memRefType2, loc, rewriter);
+
+    // count = 1 , y[0] = x[0] ,
+    // loop from 0 to len
+    RunLenEncodingOpAdaptor runLenEncodingAdaptor(operands);
+    // DEBUG_PRINT_NO_ARGS();
+
+    //  len/2,k = n ie, len/2
+    int64_t lb = 1;
+    int64_t N = tensorType.getShape()[0];
+    int64_t ub = N / 2; // output len is twice the input len
+    int64_t step = 1;
+    int64_t k = 0;
+    int64_t lb1 = 0;
+
+    Value const0 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+
+    // init all output memory with zero
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb1, N, step);
+    // DEBUG_PRINT_NO_ARGS();
+    auto iv1 = forOp1.getInductionVar();
+    rewriter.setInsertionPointToStart(forOp1.getBody());
+    rewriter.create<AffineStoreOp>(loc, const0, alloc, iv1);
+    rewriter.setInsertionPointAfter(forOp1);
+
+    // DEBUG_PRINT_NO_ARGS();
+    // load from X,
+    Value constantIndx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    Value inputX0 = rewriter.create<AffineLoadOp>(
+        loc, runLenEncodingAdaptor.getInput(), ValueRange{constantIndx0});
+    rewriter.create<AffineStoreOp>(loc, inputX0, alloc,
+                                   ValueRange{constantIndx0});
+
+#if TryRLE
+
+    // Initial count and k values as SSA values, count = 1 , k = 0
+    // for i=1 to len/2
+    // load prev = a[i-1] , current = a[i]
+    // if prev == current
+    // count = count + 1
+    // else
+    // store count at index k + N/2
+    // y[k + N/2] = count
+    // k = k +1
+    // y[k] = current
+    // count = 1
+    // for last element
+    //  store the count value at k + N/2
+    // y[k + N/2] = count
+    Value countVal = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1));
+    Value Indx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+
+    Value IndxNBy2 = rewriter.create<arith::ConstantIndexOp>(loc, ub);
+    Value kVal = rewriter.create<arith::ConstantIndexOp>(loc, k);
+    rewriter.create<AffineStoreOp>(loc, kVal, allocK, ValueRange{Indx0});
+
+    Type floatType = rewriter.getF64Type();
+    // Type indexType = rewriter.getIndexType();
+    //// // for i=1 to len/2
+    // load prev = a[i-1] , current = a[i]
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step, ValueRange{countVal});
+    auto ivY = forOpY.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpY.getBody());
+    // DEBUG_PRINT_NO_ARGS();
+
+    auto countArg = forOpY.getRegionIterArgs()[0];
+
+    Value current = rewriter.create<AffineLoadOp>(
+        loc, runLenEncodingAdaptor.getInput(), ivY);
+    //
+    AffineExpr d0;
+    bindDims(rewriter.getContext(), d0);
+    AffineExpr ExprIMinus1 = d0 - rewriter.getAffineConstantExpr(1);
+    AffineMap mapExprIMinus1 = AffineMap::get(1, 0, ExprIMinus1);
+    Value prev = rewriter.create<AffineLoadOp>(
+        loc, runLenEncodingAdaptor.getInput(), mapExprIMinus1, ValueRange{ivY});
+    // DEBUG_PRINT_NO_ARGS();
+    // for i=1 to len/2
+    // load prev = a[i-1] , current = a[i]
+    // if prev == current
+    // count = count + 1
+    // else
+    // store count at index k + N/2
+    // y[k + N/2] = count
+    // k = k +1
+    // y[k] = current
+    // count = 1
+    // for last element
+    //  store the count value at k + N/2
+    // y[k + N/2] = count
+    // TypeRange typeRange = TypeRange{rewriter.getF64Type() ,
+    // rewriter.getIndexType()}; TypeRange typeRange =
+    // TypeRange({rewriter.getF64Type(), rewriter.getIndexType()});
+
+    // auto ifOp = rewriter.create<scf::IfOp>(loc,
+    // TypeRange{rewriter.getF64Type(), rewriter.getIndexType()},
+    // rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OEQ, prev,
+    // current), true, true);
+    auto CmpPrevCurrent = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OEQ, prev, current);
+
+    // create if block with else condition
+    //  if prev == current
+    //  count = count + 1
+    // auto ifOp = rewriter.create<scf::IfOp>(loc, TypeRange{floatType ,
+    // indexType}, CmpPrevCurrent , true /* else=1 */);
+    auto ifOp = rewriter.create<scf::IfOp>(loc, TypeRange{floatType},
+                                           CmpPrevCurrent, true /* else=1 */);
+
+    rewriter.setInsertionPointToStart(ifOp.thenBlock());
+    // DEBUG_PRINT_NO_ARGS();
+
+    auto CountPlusOne = rewriter.create<arith::AddFOp>(loc, countArg, countVal);
+    // DEBUG_PRINT_NO_ARGS();
+    rewriter.create<scf::YieldOp>(loc, ValueRange{CountPlusOne});
+    // else
+    // store count at index k + N/2
+    // y[k + N/2] = count
+    // k = k +1
+    // y[k] = current
+    // count = 1
+    rewriter.setInsertionPointToStart(ifOp.elseBlock());
+    // // out[k + N/2]= count
+    Value loadKVal =
+        rewriter.create<AffineLoadOp>(loc, allocK, ValueRange{Indx0});
+
+    Value kPlusNBy2 = rewriter.create<arith::AddIOp>(
+        loc, rewriter.getIndexType(), loadKVal, IndxNBy2);
+    rewriter.create<memref::StoreOp>(loc, countArg, alloc, kPlusNBy2);
+    // k = k+1
+    Value Indx1 = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+    Value kPlusOne = rewriter.create<arith::AddIOp>(
+        loc, rewriter.getIndexType(), loadKVal, Indx1);
+    rewriter.create<AffineStoreOp>(loc, kPlusOne, allocK, ValueRange{Indx0});
+
+    // y[k + 1] = current
+    rewriter.create<memref::StoreOp>(loc, current, alloc, kPlusOne);
+
+    // DEBUG_PRINT_NO_ARGS();
+    rewriter.create<scf::YieldOp>(loc, ValueRange{countVal});
+    rewriter.setInsertionPointAfter(ifOp);
+    // ifOp.dump();
+    Value countRes = ifOp.getResult(0);
+
+    rewriter.create<AffineYieldOp>(loc, ValueRange{countRes});
+    rewriter.setInsertionPointAfter(forOpY);
+    // forOpY->dump();
+
+    // check for last countArg value if countArg > 1, then store it at last
+    Value finalCountArg = forOpY.getResult(0);
+    Value finalkArg =
+        rewriter.create<AffineLoadOp>(loc, allocK, ValueRange{Indx0});
+
+    // //if count>1 ,then store count at index k + N/2
+    // auto ifOp1 = rewriter.create<scf::IfOp>(loc, CmpCountGt1 , false /*
+    // else=0 */);
+    // rewriter.setInsertionPointToStart(ifOp1.thenBlock());
+    // DEBUG_PRINT_NO_ARGS();
+    Value finalkPlusNBy2 = rewriter.create<arith::AddIOp>(
+        loc, rewriter.getIndexType(), finalkArg, IndxNBy2);
+
+    rewriter.create<memref::StoreOp>(loc, finalCountArg, alloc, finalkPlusNBy2);
+    // DEBUG_PRINT_NO_ARGS();
+    // rewriter.setInsertionPointAfter(ifOp1);
+#endif
+
+#if TryPassIterIndex
+    // store k at its location & load and do addition to 1 and
+    Value kVal = rewriter.create<arith::ConstantIndexOp>(loc, ub - 1);
+    Value Indx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+
+    auto kValStore =
+        rewriter.create<AffineStoreOp>(loc, kVal, alloc2, ValueRange{Indx0});
+
+    Type floatType = rewriter.getF64Type();
+    Type indexType = rewriter.getIndexType();
+    affine::AffineForOp forOpY = rewriter.create<AffineForOp>(
+        loc, lb, ub, step, ValueRange{inputX0, kVal});
+    // affine::AffineForOp forOpY = rewriter.create<AffineForOp>(loc, lb, ub,
+    // step, ValueRange{countVal, kVal});
+
+    auto ivY = forOpY.getInductionVar();
+    auto prev = forOpY.getRegionIterArgs()[0];
+    auto kArg = forOpY.getRegionIterArgs()[1];
+    rewriter.setInsertionPointToStart(forOpY.getBody());
+
+    Value Indx00 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    Value current = rewriter.create<AffineLoadOp>(
+        loc, runLenEncodingAdaptor.getInput(), ivY);
+    Value loadKVal =
+        rewriter.create<AffineLoadOp>(loc, alloc2, ValueRange{Indx0});
+    Value const1 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1));
+    Value currentPlus1 = rewriter.create<arith::AddFOp>(loc, prev, const1);
+
+    auto CmpPrevCurrent = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OGE, current, const1);
+
+    // create if block with else condition
+    //  if prev == current, count++
+    auto ifOp = rewriter.create<scf::IfOp>(loc, TypeRange{floatType},
+                                           CmpPrevCurrent, true /* else=1 */);
+    // auto ifOp = rewriter.create<scf::IfOp>(loc,  CmpPrevCurrent , true /*
+    // else=1 */);
+
+    rewriter.setInsertionPointToStart(ifOp.thenBlock());
+    // DEBUG_PRINT_NO_ARGS();
+
+    // store count at N+i
+    //  Value countPlus1 = rewriter.create<arith::AddFOp>(loc, countArg,
+    //  countVal);
+    Value Indx1 = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+    Value kPlusOne = rewriter.create<arith::AddIOp>(
+        loc, rewriter.getIndexType(), kArg, Indx1);
+
+    rewriter.create<AffineStoreOp>(loc, current, alloc, ValueRange{kArg});
+    // rewriter.create<AffineStoreOp>(loc, current, alloc,
+    // ValueRange{kPlusOne});
+    rewriter.create<memref::StoreOp>(loc, current, alloc, ValueRange{kPlusOne});
+    rewriter.create<AffineStoreOp>(loc, kPlusOne, alloc2, ValueRange{Indx0});
+    rewriter.create<scf::YieldOp>(loc, ValueRange{currentPlus1});
+
+    rewriter.setInsertionPointToStart(ifOp.elseBlock());
+    rewriter.create<AffineStoreOp>(loc, currentPlus1, alloc, ValueRange{ivY});
+    // yield the values
+    //  rewriter.create<AffineYieldOp>(loc, ValueRange{kPlusOne });
+    rewriter.create<scf::YieldOp>(loc, ValueRange{currentPlus1});
+
+    rewriter.setInsertionPointAfter(ifOp);
+    Value countRes = ifOp.getResult(0);
+    // Value kRes = ifOp.getResult(1);
+    // rewriter.create<AffineYieldOp>(loc, ValueRange{countRes,kRes });
+    rewriter.create<AffineYieldOp>(loc, ValueRange{countRes, Indx00});
+
+    rewriter.setInsertionPointAfter(forOpY);
+
+#endif
+
+#if TryWhileLoop
+
+    auto kVal = rewriter.create<arith::ConstantIndexOp>(loc, k);
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step, ValueRange{kVal});
+    auto ivY = forOpY.getInductionVar();
+    // auto countArg = forOpY.getRegionIterArgs()[0];
+    auto kArg = forOpY.getRegionIterArgs()[0];
+    rewriter.setInsertionPointToStart(forOpY.getBody());
+
+    Value current = rewriter.create<AffineLoadOp>(
+        loc, runLenEncodingAdaptor.getInput(), ivY);
+
+    // store count at N+i
+    //  Value countPlus1 = rewriter.create<arith::AddFOp>(loc, countArg,
+    //  countVal);
+    Value Indx1 = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+    Value kPlusOne = rewriter.create<arith::AddIOp>(
+        loc, rewriter.getIndexType(), kArg, Indx1);
+    // Value constInt1 =
+    // rewriter.create<arith::ConstantIntOp>(loc,rewriter.getI64IntegerAttr(1),
+    // rewriter.getI64Type() );
+
+    // Value kPlusOneIndex = rewriter.create<arith::IndexCastOp>(loc,
+    // rewriter.getIndexType(), kPlusOne);
+
+    // kPlusOne.dump();
+    // Value kArg1 = rewriter.create<arith::IndexCastUIOp>(loc,
+    // rewriter.getIndexType(), kArg);
+
+    // rewriter.create<AffineStoreOp>(loc, countPlus1, alloc, mapExprNPlusI,
+    // ValueRange{kPlusOne}); rewriter.create<AffineStoreOp>(loc, countPlus1,
+    // alloc, ValueRange{kArg}); Store the result
+    // rewriter.create<AffineStoreOp>(loc, current, alloc, ivY); //working
+    rewriter.create<AffineStoreOp>(loc, current, alloc, ValueRange{kArg});
+    // yield the values
+    rewriter.create<AffineYieldOp>(loc, ValueRange{kPlusOne});
+    // rewriter.create<AffineYieldOp>(loc, ValueRange{countPlus1 , kPlusOne});
+    rewriter.setInsertionPointAfter(forOpY);
+
+#endif
+
+#if TryLoadStoreForWhile
+    // store k at its location & load and do addition to 1 and
+    Value kVal = rewriter.create<arith::ConstantIndexOp>(loc, ub - 1);
+    Value Indx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+
+    auto kValStore =
+        rewriter.create<AffineStoreOp>(loc, kVal, alloc2, ValueRange{Indx0});
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step, ValueRange{inputX0});
+    auto ivY = forOpY.getInductionVar();
+    auto prev = forOpY.getRegionIterArgs()[0];
+    // auto kArg = forOpY.getRegionIterArgs()[0];
+    rewriter.setInsertionPointToStart(forOpY.getBody());
+
+    Value Indx00 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    Value current = rewriter.create<AffineLoadOp>(
+        loc, runLenEncodingAdaptor.getInput(), ivY);
+    Value loadKVal =
+        rewriter.create<AffineLoadOp>(loc, alloc2, ValueRange{Indx0});
+    Value const1 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1));
+    Value currentPlus1 = rewriter.create<arith::AddFOp>(loc, prev, const1);
+
+    auto CmpPrevCurrent = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OGE, current, const1);
+
+    // create if block with else condition
+    //  if prev == current, count++
+    //  auto ifOp = rewriter.create<scf::IfOp>(loc, TypeRange{floatType ,
+    //  indexType}, CmpPrevCurrent , true /* else=1 */);
+    auto ifOp =
+        rewriter.create<scf::IfOp>(loc, CmpPrevCurrent, true /* else=1 */);
+
+    rewriter.setInsertionPointToStart(ifOp.thenBlock());
+    // DEBUG_PRINT_NO_ARGS();
+
+    // store count at N+i
+    //  Value countPlus1 = rewriter.create<arith::AddFOp>(loc, countArg,
+    //  countVal);
+    Value Indx1 = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+    Value kPlusOne = rewriter.create<arith::AddIOp>(
+        loc, rewriter.getIndexType(), loadKVal, Indx1);
+
+    rewriter.create<AffineStoreOp>(loc, current, alloc, ValueRange{ivY});
+    // rewriter.create<AffineStoreOp>(loc, current, alloc,
+    // ValueRange{kPlusOne});
+    rewriter.create<memref::StoreOp>(loc, current, alloc, ValueRange{kPlusOne});
+    rewriter.create<AffineStoreOp>(loc, kPlusOne, alloc2, ValueRange{Indx0});
+
+    rewriter.setInsertionPointToStart(ifOp.elseBlock());
+    rewriter.create<AffineStoreOp>(loc, currentPlus1, alloc, ValueRange{ivY});
+    // yield the values
+    //  rewriter.create<AffineYieldOp>(loc, ValueRange{kPlusOne });
+    rewriter.setInsertionPointAfter(ifOp);
+    rewriter.create<AffineYieldOp>(loc, ValueRange{current});
+
+    rewriter.setInsertionPointAfter(forOpY);
+
+#endif
+
+    // debug
+    //  forOpY->dump();
+    //  affine.store %cst, %alloc_10[] : memref<f64>
+    //  %0 = affine.load %alloc_11[4] : memref<10xf64>
+    //  affine.store %0, %alloc[0] : memref<1xf64>
+
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: lmsFilterResponse operations
+//===----------------------------------------------------------------------===//
+
+struct LMSFilterResponseOpLowering : public ConversionPattern {
+  LMSFilterResponseOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::LMSFilterResponseOp::getOperationName(), 1,
+                          ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // Pseudo-code:
+    //  for (int n = 0; n < NUM_SAMPLES; n++) {
+    //		// we also need to initialize w
+    //		// w[n] = 0;
+    //      // Calculate the filter output y[n]
+    //      y[n] = 0;
+    //      for (int i = 0; i < FILTER_LENGTH; i++) {
+    //          if (n - i >= 0) { // affine if
+    //              y[n] = y[n] + (w[i] * x[n - i]);
+    //          }
+    //      }
+
+    //     // Calculate the error e[n]
+    //     e[n] = d[n] - y[n];
+
+    //     // Update the filter weights w[i]
+    //     for (int i = 0; i < FILTER_LENGTH; i++) {
+    //         if (n - i >= 0) {
+    //             w[i] +=  MU * e[n] * x[n - i];
+    //         }
+    //     }
+    // }
+
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
+
+    LMSFilterOpAdaptor lmsFilterAdaptor(operands);
+    // Value alpha = rewriter.create<arith::ConstantOp>(loc,
+    // rewriter.getF64Type(),
+    //                                                      rewriter.getF64FloatAttr(1));
+    Value zeroval = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    Value mu = rewriter.create<AffineLoadOp>(loc, lmsFilterAdaptor.getMu());
+
+    // For loop -- iterate from 0 to last
+    int64_t lb = 0;
+    int64_t numSamples = tensorType.getShape()[0];
+    int64_t step = 1;
+
+    Value GetFilterLOp = op->getOperand(3);
+    dsp::ConstantOp constantOp3rdArg =
+        GetFilterLOp.getDefiningOp<dsp::ConstantOp>();
+    DenseElementsAttr constant3rdValue = constantOp3rdArg.getValue();
+    ;
+    auto elements1 = constant3rdValue.getValues<FloatAttr>();
+    float filterlenval = elements1[0].getValueAsDouble();
+    auto FilterLength = (uint64_t)filterlenval;
+
+    auto yMemRefType = MemRefType::get({numSamples}, rewriter.getF64Type());
+    auto wAlloc = rewriter.create<memref::AllocOp>(loc, yMemRefType);
+
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb, numSamples, step);
+    auto iv = forOp1.getInductionVar();
+
+    rewriter.setInsertionPointToStart(forOp1.getBody());
+
+    // For affine expression: #map1 = affine_map<(%arg0)[] : (%arg0 - 1)
+    AffineExpr d0, d1, s0;
+    bindDims(rewriter.getContext(), d0, d1);
+    // AffineExpr ExprForXSlice = rewriter.getAffineDimExpr(0) -
+    // rewriter.getAffineDimExpr(1); //d0 - d1;
+    AffineExpr ExprForXSlice = d0 - d1;
+    AffineMap addMapForLMSFilter = AffineMap::get(2, 0, ExprForXSlice);
+    IntegerSet set1 = IntegerSet::get(2, 0, {ExprForXSlice}, {false});
+
+    // w[n] = 0;
+    // y[n] = 0;
+    // rewriter.create<AffineStoreOp>(loc, zeroval, alloc, ValueRange{iv});
+    // Allocate and initialize array for y
+    // Value constantIndx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+
+    rewriter.create<AffineStoreOp>(loc, zeroval, wAlloc, ValueRange{iv});
+    rewriter.create<AffineStoreOp>(loc, zeroval, alloc, ValueRange{iv});
+
+    affine::AffineForOp forOp2 =
+        rewriter.create<AffineForOp>(loc, lb, FilterLength, step);
+    auto iv2 = forOp2.getInductionVar();
+
+    rewriter.setInsertionPointToStart(forOp2.getBody());
+
+    auto ifOp = rewriter.create<affine::AffineIfOp>(
+        loc, set1, ValueRange{iv, iv2}, false /*no else*/);
+    rewriter.setInsertionPointToStart(ifOp.getThenBlock());
+
+    Value inputX =
+        rewriter.create<AffineLoadOp>(loc, lmsFilterAdaptor.getLhs(),
+                                      addMapForLMSFilter, ValueRange{iv, iv2});
+    Value w = rewriter.create<AffineLoadOp>(loc, wAlloc,
+                                            ValueRange{iv2}); // memRefType
+
+    Value wmulx = rewriter.create<arith::MulFOp>(loc, inputX, w);
+    Value ybefore = rewriter.create<AffineLoadOp>(loc, alloc, ValueRange{iv});
+    Value sumNext = rewriter.create<arith::AddFOp>(loc, wmulx, ybefore);
+    rewriter.create<AffineStoreOp>(loc, sumNext, alloc, ValueRange{iv});
+    rewriter.setInsertionPointAfter(ifOp);
+    rewriter.setInsertionPointAfter(forOp2);
+
+    //  get e[n] = d[n] - y[n]
+
+    Value desiredX = rewriter.create<AffineLoadOp>(
+        loc, lmsFilterAdaptor.getRhs(), ValueRange{iv});
+    Value ynew = rewriter.create<AffineLoadOp>(loc, alloc, ValueRange{iv});
+
+    Value err = rewriter.create<arith::SubFOp>(loc, desiredX, ynew);
+
+    affine::AffineForOp forOp3 =
+        rewriter.create<AffineForOp>(loc, lb, FilterLength, step);
+    auto iv3 = forOp3.getInductionVar();
+
+    rewriter.setInsertionPointToStart(forOp3.getBody());
+
+    auto ifOp2 = rewriter.create<affine::AffineIfOp>(
+        loc, set1, ValueRange{iv, iv3}, false /*no else*/);
+    rewriter.setInsertionPointToStart(ifOp2.getThenBlock());
+
+    Value inputX2 =
+        rewriter.create<AffineLoadOp>(loc, lmsFilterAdaptor.getLhs(),
+                                      addMapForLMSFilter, ValueRange{iv, iv3});
+
+    Value Prevw2 = rewriter.create<AffineLoadOp>(loc, wAlloc, ValueRange{iv3});
+
+    // f(u(n),e(n),μ)=μe(n)u∗(n)
+    Value mul1 = rewriter.create<arith::MulFOp>(loc, err, inputX2);
+    Value mul2 = rewriter.create<arith::MulFOp>(loc, mu, mul1);
+
+    // FInal w[n]
+    Value answer = rewriter.create<arith::AddFOp>(loc, Prevw2, mul2);
+
+    rewriter.create<AffineStoreOp>(loc, answer, wAlloc, ValueRange{iv3});
+    rewriter.setInsertionPointAfter(ifOp2);
+    rewriter.setInsertionPointAfter(forOp3);
+
+    rewriter.setInsertionPointAfter(forOp1);
+    // debug
+    //  forOp1->dump();
+
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: Quantization operations
+//===----------------------------------------------------------------------===//
+
+struct QuantizationOpLowering : public ConversionPattern {
+  QuantizationOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::QuantizationOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // Pseudo-code:
+    //  y_quantized[i] = Round(a[i] - min) / step) * step + min
+    //    where, step = (max-min)/ NoOfLevels , NoOLevels = 2^NoOfBits
+
+    // 	steps:
+    // 		1) given NoOfLevels
+    // 		2) Then calculate stepSize = (Max-Min)/NoOfLevels
+    // 		3) iterate for all the elements and calculate quantizedCoeff
+
+    // 			GetLevelForVal =  (a[i] - min)/step
+    // 			RoundedVal = arith.FPToSI(GetLevelForVal)
+    // 			QuantVal = RoundedVal * step + min_val
+
+    // DEBUG_PRINT_NO_ARGS();
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // create another memory location for getting NoOfLevels
+
+    // Value constant1 = rewriter.create<arith::ConstantOp>(loc,
+    // rewriter.getF64Type(),
+    //                                                      rewriter.getF64FloatAttr(1));
+
+    // 1) Then calculate stepSize = (Max-Min)/NoOfLevels
+
+    QuantizationOpAdaptor quantizationAdaptor(operands);
+    // DEBUG_PRINT_NO_ARGS();
+    Value getMaxMemref = quantizationAdaptor.getMax();
+    auto getMax =
+        rewriter.create<AffineLoadOp>(loc, getMaxMemref, ValueRange{});
+
+    Value getMinMemref = quantizationAdaptor.getMin();
+    auto getMin =
+        rewriter.create<AffineLoadOp>(loc, getMinMemref, ValueRange{});
+
+    Value getNLevelsMemref = quantizationAdaptor.getNlevels();
+    auto getNlevels =
+        rewriter.create<AffineLoadOp>(loc, getNLevelsMemref, ValueRange{});
+
+    Value MaxMinusMin = rewriter.create<arith::SubFOp>(loc, getMax, getMin);
+    Value StepSize =
+        rewriter.create<arith::DivFOp>(loc, MaxMinusMin, getNlevels);
+
+    // iterate for all the elements and calculate quantizedCoeff
+
+    // 			GetLevelForVal =  (a[i] - min)/step
+    // 			RoundedVal = arith.FPToSI(GetLevelForVal)
+    // 			QuantVal = RoundedVal * step + min_val
+    int64_t lb = 0;
+    int64_t ub = tensorType.getShape()[0];
+    int64_t step = 1;
+
+    // DEBUG_PRINT_NO_ARGS();
+
+    // for loop from 0 to len
+    //  use iter_arg as passing value for the loop
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto ivY = forOpY.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpY.getBody());
+    // Use iter_arg for taking prev_val
+    // Get iter_arg
+
+    // 			GetLevelForVal =  (a[i] - min)/step
+
+    // 			QuantVal = RoundedVal * step + min_val
+
+    Value inputX =
+        rewriter.create<AffineLoadOp>(loc, quantizationAdaptor.getInput(), ivY);
+    Value inputMinusMin = rewriter.create<arith::SubFOp>(loc, inputX, getMin);
+    Value aMinusMinDivStep =
+        rewriter.create<arith::DivFOp>(loc, inputMinusMin, StepSize);
+
+    // 	RoundedVal = arith.FPToSI(GetLevelForVal)
+    Value RoundedVal = rewriter.create<arith::FPToSIOp>(
+        loc, rewriter.getI64Type(), aMinusMinDivStep);
+    Value RoundValFloat = rewriter.create<arith::SIToFPOp>(
+        loc, rewriter.getF64Type(), RoundedVal);
+
+    // 	QuantVal = RoundedVal * step + min_val
+    Value RoundedMulStep =
+        rewriter.create<arith::MulFOp>(loc, RoundValFloat, StepSize);
+    Value QuantVal =
+        rewriter.create<arith::AddFOp>(loc, RoundedMulStep, getMin);
+    rewriter.create<AffineStoreOp>(loc, QuantVal, alloc, ValueRange{ivY});
+    rewriter.setInsertionPointAfter(forOpY);
+
+    // debug
+    //  forOpY->dump();
+    //  affine.store %cst, %alloc_10[] : memref<f64>
+    //  %0 = affine.load %alloc_11[4] : memref<10xf64>
+    //  affine.store %0, %alloc[0] : memref<1xf64>
+
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: lmsFilter operations
+//===----------------------------------------------------------------------===//
+
+struct LMSFilterOpLowering : public ConversionPattern {
+  LMSFilterOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::LMSFilterOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // Pseudo-code:
+    //  for (int n = 0; n < NUM_SAMPLES; n++) {
+    //      // Calculate the filter output y[n]
+    //      y[n] = 0;
+    //      for (int i = 0; i < FILTER_LENGTH; i++) {
+    //          if (n - i >= 0) { // affine if
+    //              y[n] = y[n] + (w[i] * x[n - i]);
+    //          }
+    //      }
+
+    //     // Calculate the error e[n]
+    //     e[n] = d[n] - y[n];
+
+    //     // Update the filter weights w[i]
+    //     for (int i = 0; i < FILTER_LENGTH; i++) {
+    //         if (n - i >= 0) {
+    //             w[i] +=  MU * e[n] * x[n - i];
+    //         }
+    //     }
+    // }
+
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
+
+    LMSFilterOpAdaptor lmsFilterAdaptor(operands);
+    // Value alpha = rewriter.create<arith::ConstantOp>(loc,
+    // rewriter.getF64Type(),
+    //                                                      rewriter.getF64FloatAttr(1));
+    Value zeroval = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    Value mu = rewriter.create<AffineLoadOp>(loc, lmsFilterAdaptor.getMu());
+
+    // For loop -- iterate from 0 to last
+    int64_t lb = 0;
+    int64_t numSamples = tensorType.getShape()[0];
+    int64_t step = 1;
+
+    Value GetFilterLOp = op->getOperand(3);
+    dsp::ConstantOp constantOp3rdArg =
+        GetFilterLOp.getDefiningOp<dsp::ConstantOp>();
+    DenseElementsAttr constant3rdValue = constantOp3rdArg.getValue();
+    ;
+    auto elements1 = constant3rdValue.getValues<FloatAttr>();
+    float filterlenval = elements1[0].getValueAsDouble();
+    auto FilterLength = (uint64_t)filterlenval;
+
+    Value GetItersLOp = op->getOperand(4);
+    dsp::ConstantOp constantOp4thArg =
+        GetItersLOp.getDefiningOp<dsp::ConstantOp>();
+    DenseElementsAttr constant4thValue = constantOp4thArg.getValue();
+    ;
+    auto elements = constant4thValue.getValues<FloatAttr>();
+    float interationsval = elements[0].getValueAsDouble();
+    auto TotalIterations = (uint64_t)interationsval;
+
+    auto yMemRefType = MemRefType::get({numSamples}, rewriter.getF64Type());
+    auto yAlloc = rewriter.create<memref::AllocOp>(loc, yMemRefType);
+
+    affine::AffineForOp forOpiter =
+        rewriter.create<AffineForOp>(loc, lb, TotalIterations, step);
+    rewriter.setInsertionPointToStart(forOpiter.getBody());
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb, numSamples, step);
+    auto iv = forOp1.getInductionVar();
+
+    rewriter.setInsertionPointToStart(forOp1.getBody());
+
+    // For affine expression: #map1 = affine_map<(%arg0)[] : (%arg0 - 1)
+    AffineExpr d0, d1, s0;
+    bindDims(rewriter.getContext(), d0, d1);
+    // AffineExpr ExprForXSlice = rewriter.getAffineDimExpr(0) -
+    // rewriter.getAffineDimExpr(1); //d0 - d1;
+    AffineExpr ExprForXSlice = d0 - d1;
+    AffineMap addMapForLMSFilter = AffineMap::get(2, 0, ExprForXSlice);
+    IntegerSet set1 = IntegerSet::get(2, 0, {ExprForXSlice}, {false});
+
+    // y[n] = 0;
+    // rewriter.create<AffineStoreOp>(loc, zeroval, alloc, ValueRange{iv});
+    // Allocate and initialize array for y
+    // Value constantIndx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+
+    rewriter.create<AffineStoreOp>(loc, zeroval, yAlloc, ValueRange{iv});
+
+    affine::AffineForOp forOp2 =
+        rewriter.create<AffineForOp>(loc, lb, FilterLength, step);
+    auto iv2 = forOp2.getInductionVar();
+
+    rewriter.setInsertionPointToStart(forOp2.getBody());
+
+    auto ifOp = rewriter.create<affine::AffineIfOp>(
+        loc, set1, ValueRange{iv, iv2}, false /*no else*/);
+    rewriter.setInsertionPointToStart(ifOp.getThenBlock());
+
+    Value inputX =
+        rewriter.create<AffineLoadOp>(loc, lmsFilterAdaptor.getLhs(),
+                                      addMapForLMSFilter, ValueRange{iv, iv2});
+    Value Prevw = rewriter.create<AffineLoadOp>(loc, alloc,
+                                                ValueRange{iv2}); // memRefType
+
+    Value wmulx = rewriter.create<arith::MulFOp>(loc, inputX, Prevw);
+    Value ybefore = rewriter.create<AffineLoadOp>(loc, yAlloc, ValueRange{iv});
+    Value sumNext = rewriter.create<arith::AddFOp>(loc, wmulx, ybefore);
+    rewriter.create<AffineStoreOp>(loc, sumNext, yAlloc, ValueRange{iv});
+    rewriter.setInsertionPointAfter(ifOp);
+    rewriter.setInsertionPointAfter(forOp2);
+
+    //  get e[n] = d[n] - y[n]
+
+    Value desiredX = rewriter.create<AffineLoadOp>(
+        loc, lmsFilterAdaptor.getRhs(), ValueRange{iv});
+    Value ynew = rewriter.create<AffineLoadOp>(loc, yAlloc, ValueRange{iv});
+
+    Value err = rewriter.create<arith::SubFOp>(loc, desiredX, ynew);
+
+    affine::AffineForOp forOp3 =
+        rewriter.create<AffineForOp>(loc, lb, FilterLength, step);
+    auto iv3 = forOp3.getInductionVar();
+
+    rewriter.setInsertionPointToStart(forOp3.getBody());
+
+    auto ifOp2 = rewriter.create<affine::AffineIfOp>(
+        loc, set1, ValueRange{iv, iv3}, false /*no else*/);
+    rewriter.setInsertionPointToStart(ifOp2.getThenBlock());
+
+    Value inputX2 =
+        rewriter.create<AffineLoadOp>(loc, lmsFilterAdaptor.getLhs(),
+                                      addMapForLMSFilter, ValueRange{iv, iv3});
+
+    Value Prevw2 = rewriter.create<AffineLoadOp>(loc, alloc, ValueRange{iv3});
+
+    // f(u(n),e(n),μ)=μe(n)u∗(n)
+    Value mul1 = rewriter.create<arith::MulFOp>(loc, err, inputX2);
+    Value mul2 = rewriter.create<arith::MulFOp>(loc, mu, mul1);
+
+    // FInal w[n]
+    Value answer = rewriter.create<arith::AddFOp>(loc, Prevw2, mul2);
+
+    rewriter.create<AffineStoreOp>(loc, answer, alloc, ValueRange{iv3});
+    rewriter.setInsertionPointAfter(ifOp2);
+    rewriter.setInsertionPointAfter(forOp3);
+
+    rewriter.setInsertionPointAfter(forOp1);
+    rewriter.setInsertionPointAfter(forOpiter);
+    // debug
+    //  forOp1->dump();
+
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: Threshold operations
+//===----------------------------------------------------------------------===//
+
+struct ThresholdOpLowering : public ConversionPattern {
+  ThresholdOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::ThresholdOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // Pseudo-code:
+    //   y[n] = a[n] , if a[i] >= threshld or, a[i] <= -threshld
+    //     = 0 , else
+
+    // DEBUG_PRINT_NO_ARGS();
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    Value constant0 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+
+    // y[n] = a[n] , if a[i] >= threshld or, a[i] <= -threshld
+    // loop from 0 to len
+
+    // load from X,
+    ThresholdOpAdaptor thresholdAdaptor(operands);
+    // DEBUG_PRINT_NO_ARGS();
+
+    int64_t lb = 0;
+    int64_t ub = tensorType.getShape()[0];
+    int64_t step = 1;
+
+    // DEBUG_PRINT_NO_ARGS();
+
+    // for loop from 0 to len(Output)
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto ivY = forOpY.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpY.getBody());
+
+    Value inputX =
+        rewriter.create<AffineLoadOp>(loc, thresholdAdaptor.getInput(), ivY);
+
+    // Load the threshold value from the memref
+    auto thresholdMemRef = thresholdAdaptor.getThreshld();
+    auto threshold =
+        rewriter.create<AffineLoadOp>(loc, thresholdMemRef, ValueRange{});
+
+    // Compare a[i] <= threshold
+    auto cmp1 = rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OLE,
+                                               inputX, threshold);
+
+    // Compare a[i] >= -threshold
+    auto negThreshold = rewriter.create<arith::NegFOp>(loc, threshold);
+    auto cmp2 = rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OGE,
+                                               inputX, negThreshold);
+
+    // Combine the comparisons using AND
+    auto cmpAnd = rewriter.create<arith::AndIOp>(loc, cmp1, cmp2);
+
+    // Use select to choose between 0 and a[i]
+    auto selectOp =
+        rewriter.create<arith::SelectOp>(loc, cmpAnd, constant0, inputX);
+
+    // Store the result
+    rewriter.create<AffineStoreOp>(loc, selectOp, alloc, ivY);
+
+    rewriter.setInsertionPointAfter(forOpY);
+    // debug
+    //  forOpY->dump();
+    //  affine.store %cst, %alloc_10[] : memref<f64>
+    //  %0 = affine.load %alloc_11[4] : memref<10xf64>
+    //  affine.store %0, %alloc[0] : memref<1xf64>
+
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: HighPassFIRHammingOptimizedOp operations
+//===----------------------------------------------------------------------===//
+
+struct HighPassFIRHammingOptimizedOpLowering : public ConversionPattern {
+  HighPassFIRHammingOptimizedOpLowering(MLIRContext *ctx)
+      : ConversionPattern(
+            dsp::HighPassFIRHammingOptimizedOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // Pseudo-code:
+    //  y_highFIRHamming[n] = -1 * [wc/pi * sinc(wc * (n- (N-1)/2))] * [0.54 -
+    //  0.46 cos(2 *pi * n/N-1)], 0<= n < (N-1)/2 : = 1 - wc/pi , n = (N-1)/2
+
+    // and also, y_FIRHamming[N-1-n] = y[n] ie, store at n and also at N-1-n
+
+    // 1 loops : first from 0 <= n < (N-1)/2 - 1
+    //
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
+
+    // first from 0 <= i < (N-1)/2 - 1
+    int64_t lb = 0;
+    int64_t N = tensorType.getShape()[0];
+    int64_t ub = (N - 1) / 2;
+    int64_t step = 1;
+
+    // DEBUG_PRINT_NO_ARGS();
+    HighPassFIRHammingOptimizedOpAdaptor highPassFIRHammingOptimizedOpAdaptor(
+        operands);
+    // Handle middle y[mid] = wc / pi
+    int64_t midIndx = ub;
+    Value constantIndxMid =
+        rewriter.create<arith::ConstantIndexOp>(loc, midIndx);
+    // rewriter.create<AffineStoreOp>(loc, constant0, alloc,
+    // ValueRange{constantIndx0});
+    Value wc = rewriter.create<AffineLoadOp>(
+        loc, highPassFIRHammingOptimizedOpAdaptor.getWc(), ValueRange{});
+    Value constant1 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1));
+    Value constantMinus1 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(-1));
+    Value constpi = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(3.14159265359));
+    Value wcByPi = rewriter.create<arith::DivFOp>(loc, wc, constpi);
+    Value OneMinusWcByPi =
+        rewriter.create<arith::SubFOp>(loc, constant1, wcByPi);
+    rewriter.create<AffineStoreOp>(loc, OneMinusWcByPi, alloc,
+                                   ValueRange{constantIndxMid});
+
+    // first from 0 <= i < (N-1)/2 - 1
+
+    // calculate i-(N-1)/2
+
+    Value Nminus1By2 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr((float)ub));
+
+    // calculate 0.54 - 0.46 cos(2 *pi * n/N-1)
+    Value constant0_54 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0.54));
+    Value constant0_46 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0.46));
+    Value const2pi = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(6.28318530718));
+    Value NMinus1 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr((float)N - 1));
+
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto ivY = forOpY.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpY.getBody());
+    // convert index to f64
+    Value IndxY = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivY);
+    Value i =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxY);
+
+    // get sin(wc * (i - (N-1)/ 2))
+    Value iMinusMid = rewriter.create<arith::SubFOp>(loc, i, Nminus1By2);
+    Value mulwc_iMinusMid = rewriter.create<arith::MulFOp>(loc, wc, iMinusMid);
+
+    Value GetSin = rewriter.create<math::SinOp>(loc, mulwc_iMinusMid);
+
+    // sin(wc*(i-(N-1)/2)) / pi * (i-(N-1)/2)
+
+    Value piMuliMinusMid =
+        rewriter.create<arith::MulFOp>(loc, constpi, iMinusMid);
+    Value GetDiv = rewriter.create<arith::DivFOp>(loc, GetSin, piMuliMinusMid);
+
+    // [sin(wc*(i-(N-1)/2)) / pi * (i-(N-1)/2)] * [0.54-0.46 cos(2*pi*i/N-1)
+
+    // get 2*pi * k / (N -1)
+    Value mul2pi_k = rewriter.create<arith::MulFOp>(loc, const2pi, i);
+    Value divIndxByNMinus1 =
+        rewriter.create<arith::DivFOp>(loc, mul2pi_k, NMinus1);
+
+    // get cos(2*pi * k/(N-1)
+    Value GetCos = rewriter.create<math::CosOp>(loc, divIndxByNMinus1);
+    Value MulCos0_46 =
+        rewriter.create<arith::MulFOp>(loc, constant0_46, GetCos);
+    Value Sub0_54_Cos =
+        rewriter.create<arith::SubFOp>(loc, constant0_54, MulCos0_46);
+
+    // Multiply Sub0_54_Cos and GetDiv -- sin(wc*(i-(N-1)/2)) / pi * (i-(N-1)/2)
+    Value MulFilterHamming =
+        rewriter.create<arith::MulFOp>(loc, GetDiv, Sub0_54_Cos);
+    Value MulByMinus1 =
+        rewriter.create<arith::MulFOp>(loc, constantMinus1, MulFilterHamming);
+    rewriter.create<AffineStoreOp>(loc, MulByMinus1, alloc, ValueRange{ivY});
+
+    // also , store same value at N-1-i using affine-Map
+    // For affine expression: #map1 = affine_map<(%arg0)[N] : (N - 1 -%arg0)
+    AffineExpr d0, s0;
+    bindDims(rewriter.getContext(), d0);
+    bindSymbols(rewriter.getContext(), s0);
+    // calulate N - 1 - i
+    AffineExpr ExprForNMinus1minusI = s0 - d0;
+    AffineMap addMapForNMinus1minusI =
+        AffineMap::get(1, 1, ExprForNMinus1minusI);
+
+    // store at N-1-i index , result
+    Value constantNMinus1Indx =
+        rewriter.create<arith::ConstantIndexOp>(loc, N - 1);
+    rewriter.create<AffineStoreOp>(loc, MulByMinus1, alloc,
+                                   addMapForNMinus1minusI,
+                                   ValueRange{ivY, constantNMinus1Indx});
+    rewriter.setInsertionPointAfter(forOpY);
+
+    // debug
+    //  forOpX->dump();
+    //  forOpY->dump();
+
+    // affine.for %arg0 = 0 to 3 {
+    //   %12 = arith.index_castui %arg0 : index to i32
+    //   %13 = arith.uitofp %12 : i32 to f64
+    //   %14 = arith.subf %13, %cst_3 : f64
+    //   %15 = arith.mulf %9, %14 : f64
+    //   %16 = math.sin %15 : f64
+    //   %17 = arith.mulf %14, %cst_9 : f64
+    //   %18 = arith.divf %16, %17 : f64
+    //   %19 = arith.mulf %13, %cst_0 : f64
+    //   %20 = arith.divf %19, %cst : f64
+    //   %21 = math.cos %20 : f64
+    //   %22 = arith.mulf %21, %cst_1 : f64
+    //   %23 = arith.subf %cst_2, %22 : f64
+    //   %24 = arith.mulf %18, %23 : f64
+    //   %25 = arith.mulf %24, %cst_4 : f64
+    //   affine.store %25, %alloc[%arg0] : memref<7xf64>
+    //   affine.store %25, %alloc[-%arg0 + 6] : memref<7xf64>
+    // }
+
+    // }
+    // }
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: FIRFilterHammingOptimizedOp operations
+//===----------------------------------------------------------------------===//
+
+struct FIRFilterHammingOptimizedOpLowering : public ConversionPattern {
+  FIRFilterHammingOptimizedOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::FIRFilterHammingOptimizedOp::getOperationName(),
+                          1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // Pseudo-code:
+    //   y_FIRHamming[n] = [wc/pi * sinc(wc * (n- (N-1)/2))] * [0.54 - 0.46
+    //   cos(2 *pi * n/N-1)], 0<= n < (N-1)/2 :
+    //  = wc/pi * 1 , n = (N-1)/2
+
+    // and also, y_FIRHamming[N-1-n] = y[n] ie, store at n and also at N-1-n
+
+    // 1 loops : first from 0 <= n < (N-1)/2 - 1
+    //
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
+
+    // first from 0 <= i < (N-1)/2 - 1
+    int64_t lb = 0;
+    int64_t N = tensorType.getShape()[0];
+    int64_t ub = (N - 1) / 2;
+    int64_t step = 1;
+
+    // DEBUG_PRINT_NO_ARGS();
+    FIRFilterHammingOptimizedOpAdaptor firFilterHammingOptimizedOpAdaptor(
+        operands);
+    // Handle middle y[mid] = wc / pi
+    int64_t midIndx = ub;
+    Value constantIndxMid =
+        rewriter.create<arith::ConstantIndexOp>(loc, midIndx);
+    // rewriter.create<AffineStoreOp>(loc, constant0, alloc,
+    // ValueRange{constantIndx0});
+    Value wc = rewriter.create<AffineLoadOp>(
+        loc, firFilterHammingOptimizedOpAdaptor.getWc(), ValueRange{});
+
+    Value constpi = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(3.14159265359));
+    Value wcByPi = rewriter.create<arith::DivFOp>(loc, wc, constpi);
+
+    rewriter.create<AffineStoreOp>(loc, wcByPi, alloc,
+                                   ValueRange{constantIndxMid});
+
+    // first from 0 <= i < (N-1)/2 - 1
+
+    // calculate i-(N-1)/2
+
+    Value Nminus1By2 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr((float)ub));
+
+    // calculate 0.54 - 0.46 cos(2 *pi * n/N-1)
+    Value constant0_54 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0.54));
+    Value constant0_46 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0.46));
+    Value const2pi = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(6.28318530718));
+    Value NMinus1 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr((float)N - 1));
+
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto ivY = forOpY.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpY.getBody());
+    // convert index to f64
+    Value IndxY = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivY);
+    Value i =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxY);
+
+    // get sin(wc * (i - (N-1)/ 2))
+    Value iMinusMid = rewriter.create<arith::SubFOp>(loc, i, Nminus1By2);
+    Value mulwc_iMinusMid = rewriter.create<arith::MulFOp>(loc, wc, iMinusMid);
+
+    Value GetSin = rewriter.create<math::SinOp>(loc, mulwc_iMinusMid);
+
+    // sin(wc*(i-(N-1)/2)) / pi * (i-(N-1)/2)
+
+    Value piMuliMinusMid =
+        rewriter.create<arith::MulFOp>(loc, constpi, iMinusMid);
+    Value GetDiv = rewriter.create<arith::DivFOp>(loc, GetSin, piMuliMinusMid);
+
+    // [sin(wc*(i-(N-1)/2)) / pi * (i-(N-1)/2)] * [0.54-0.46 cos(2*pi*i/N-1)
+
+    // get 2*pi * k / (N -1)
+    Value mul2pi_k = rewriter.create<arith::MulFOp>(loc, const2pi, i);
+    Value divIndxByNMinus1 =
+        rewriter.create<arith::DivFOp>(loc, mul2pi_k, NMinus1);
+
+    // get cos(2*pi * k/(N-1)
+    Value GetCos = rewriter.create<math::CosOp>(loc, divIndxByNMinus1);
+    Value MulCos0_46 =
+        rewriter.create<arith::MulFOp>(loc, constant0_46, GetCos);
+    Value Sub0_54_Cos =
+        rewriter.create<arith::SubFOp>(loc, constant0_54, MulCos0_46);
+
+    // Multiply Sub0_54_Cos and GetDiv -- sin(wc*(i-(N-1)/2)) / pi * (i-(N-1)/2)
+    Value MulFilterHamming =
+        rewriter.create<arith::MulFOp>(loc, GetDiv, Sub0_54_Cos);
+    rewriter.create<AffineStoreOp>(loc, MulFilterHamming, alloc,
+                                   ValueRange{ivY});
+
+    // also , store same value at N-1-i using affine-Map
+    // For affine expression: #map1 = affine_map<(%arg0)[N] : (N - 1 -%arg0)
+    AffineExpr d0, s0;
+    bindDims(rewriter.getContext(), d0);
+    bindSymbols(rewriter.getContext(), s0);
+    // calulate N - 1 - i
+    AffineExpr ExprForNMinus1minusI = s0 - d0;
+    AffineMap addMapForNMinus1minusI =
+        AffineMap::get(1, 1, ExprForNMinus1minusI);
+
+    // store at N-1-i index , result
+    Value constantNMinus1Indx =
+        rewriter.create<arith::ConstantIndexOp>(loc, N - 1);
+    rewriter.create<AffineStoreOp>(loc, MulFilterHamming, alloc,
+                                   addMapForNMinus1minusI,
+                                   ValueRange{ivY, constantNMinus1Indx});
+    rewriter.setInsertionPointAfter(forOpY);
+
+    // debug
+    //  forOpX->dump();
+    //  forOpY->dump();
+
+    // %cst = arith.constant 6.2831853071800001 : f64
+    // %cst_0 = arith.constant 4.600000e-01 : f64
+    // %cst_1 = arith.constant 5.400000e-01 : f64
+    // %cst_2 = arith.constant 4.000000e+00 : f64
+    // %alloc = memref.alloc() : memref<4xf64>
+    // %alloc_3 = memref.alloc() : memref<f64>
+    // affine.store %cst_2, %alloc_3[] : memref<f64>
+    // affine.for %arg0 = 0 to 4 {
+    //   %0 = arith.index_castui %arg0 : index to i32
+    //   %1 = arith.uitofp %0 : i32 to f64
+    //   %2 = arith.mulf %1, %cst : f64
+    //   %3 = arith.divf %2, %cst_2 : f64
+    //   %4 = math.cos %3 : f64
+    //   %5 = arith.mulf %4, %cst_0 : f64
+    //   %6 = arith.subf %cst_1, %5 : f64
+    //   affine.store %6, %alloc[%arg0] : memref<4xf64>
+    // }
+
+    // }
+    // }
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: GetRangeOfVectorOp operations
+//===----------------------------------------------------------------------===//
+
+struct GetRangeOfVectorOpLowering : public ConversionPattern {
+  GetRangeOfVectorOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::GetRangeOfVectorOp::getOperationName(), 1, ctx) {
+  }
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // Pseudo-code:
+    //   y[0] = first:
+    //   y[i] = y[i-1] + step for  1<=i<N
+    //
+    // Alt:  y[0] = first , prev_val = first
+    //   for i =1 to N
+    //    y[i] = prev_val
+    //    prev_val = prev_val + step
+
+    // output for result type
+	(*op->result_type_begin());
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
+    GetRangeOfVectorOpAdaptor getRangeOfVectorOpOpAdaptor(operands);
+    Value GetValueAtIndx2ndArg = op->getOperand(0);
+    dsp::ConstantOp constantOp2ndArg =
+        GetValueAtIndx2ndArg.getDefiningOp<dsp::ConstantOp>();
+    DenseElementsAttr constantRhsValue = constantOp2ndArg.getValue();
+    auto elements = constantRhsValue.getValues<FloatAttr>();
+    float FirstValue = elements[0].getValueAsDouble();
+
+    // DEBUG_PRINT_WITH_ARGS("FirstValue is", FirstValue);
+    Value GetStepOp = op->getOperand(2);
+    dsp::ConstantOp constantOp3rdArg =
+        GetStepOp.getDefiningOp<dsp::ConstantOp>();
+    DenseElementsAttr constant3rdValue = constantOp3rdArg.getValue();
+    ;
+    auto elements1 = constant3rdValue.getValues<FloatAttr>();
+    float StepValue = elements1[0].getValueAsDouble();
+
+    // first from 1 <= i < N
+    int64_t lb = 1;
+    int64_t ub = tensorType.getShape()[0];
+    // int64_t ub = (N-1) / 2 ;
+    int64_t step = 1;
+
+    // DEBUG_PRINT_NO_ARGS();
+
+    float valAtIndxI = FirstValue;
+
+    Value constantIndx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    Value constantFirst = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(valAtIndxI));
+    Value constantStep = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(StepValue));
+
+    rewriter.create<AffineStoreOp>(loc, constantFirst, alloc,
+                                   ValueRange{constantIndx0});
+
+    // loop from 1 <= i < N
+
+    affine::AffineForOp forOpY = rewriter.create<AffineForOp>(
+        loc, lb, ub, step, ValueRange{constantFirst});
+    auto ivY = forOpY.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpY.getBody());
+    // Use iter_arg for taking prev_val
+    // Get iter_arg
+    auto getIterArg = forOpY.getBody()->getArgument(1);
+    // getIterArg.dump();
+
+    Value sumNext =
+        rewriter.create<arith::AddFOp>(loc, getIterArg, constantStep);
+    rewriter.create<AffineStoreOp>(loc, sumNext, alloc, ValueRange{ivY});
+    // llvm::errs() << "LINE " << __LINE__ << " file= " << __FILE__ << "\n" ;
+    rewriter.create<AffineYieldOp>(loc, ValueRange{sumNext});
+    rewriter.setInsertionPointAfter(forOpY);
+
+    // debug
+    //  forOpX->dump();
+    //  forOpY->dump();
+
+    // %cst = arith.constant 6.2831853071800001 : f64
+    // %cst_0 = arith.constant 4.600000e-01 : f64
+    // %cst_1 = arith.constant 5.400000e-01 : f64
+    // %cst_2 = arith.constant 4.000000e+00 : f64
+    // %alloc = memref.alloc() : memref<4xf64>
+    // %alloc_3 = memref.alloc() : memref<f64>
+    // affine.store %cst_2, %alloc_3[] : memref<f64>
+    // affine.for %arg0 = 0 to 4 {
+    //   %0 = arith.index_castui %arg0 : index to i32
+    //   %1 = arith.uitofp %0 : i32 to f64
+    //   %2 = arith.mulf %1, %cst : f64
+    //   %3 = arith.divf %2, %cst_2 : f64
+    //   %4 = math.cos %3 : f64
+    //   %5 = arith.mulf %4, %cst_0 : f64
+    //   %6 = arith.subf %cst_1, %5 : f64
+    //   affine.store %6, %alloc[%arg0] : memref<4xf64>
+    // }
+
+    // }
+    // }
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: HighPassFIRFilterOp operations
+//===----------------------------------------------------------------------===//
+
+struct HighPassFIRFilterOpLowering : public ConversionPattern {
+  HighPassFIRFilterOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::HighPassFIRFilterOp::getOperationName(), 1,
+                          ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // Pseudo-code:
+    //   y_lpf[n] = wc/pi * sinc(wc * (n- (N-1)/2)) , n!= (N-1)/2 :
+    //            = wc/pi , n = (N-1)/2
+    //  y_hpf[n] = dirac(n- (N-1)/2) - y_lpf[n] = -1 * wc/pi * sinc(wc * (n-
+    //  (N-1)/2)) , n!= (N-1)/2 :
+    //           = 1 - wc/pi , n = (N-1)/2
+
+    // 2 loops : first from 0 <= n <= (N-1)/2 - 1
+    //      2nd from (N-1)/2 +1 <= n < N
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
+
+    // first from 0 <= i <= (N-1)/2 - 1
+    int64_t lb = 0;
+    int64_t N = tensorType.getShape()[0];
+    int64_t ub = (N - 1) / 2;
+    int64_t step = 1;
+
+    // DEBUG_PRINT_NO_ARGS();
+    HighPassFIRFilterOpAdaptor highPassfirFilterOpAdaptor(operands);
+    // Handle middle y[mid] = wc / pi
+    int64_t midIndx = ub;
+    Value constantIndxMid =
+        rewriter.create<arith::ConstantIndexOp>(loc, midIndx);
+    // rewriter.create<AffineStoreOp>(loc, constant0, alloc,
+    // ValueRange{constantIndx0});
+    Value constant1 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1));
+    Value constantMinus1 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(-1));
+
+    Value wc = rewriter.create<AffineLoadOp>(
+        loc, highPassfirFilterOpAdaptor.getWc(), ValueRange{});
+
+    Value constpi = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(3.14159265359));
+    Value wcByPi = rewriter.create<arith::DivFOp>(loc, wc, constpi);
+    Value OneMinusWcByPi =
+        rewriter.create<arith::SubFOp>(loc, constant1, wcByPi);
+    rewriter.create<AffineStoreOp>(loc, OneMinusWcByPi, alloc,
+                                   ValueRange{constantIndxMid});
+
+    // first from 0 <= i <= (N-1)/2 - 1
+
+    // calculate i-(N-1)/2
+    Value Nminus1By2 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr((float)ub));
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto ivY = forOpY.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpY.getBody());
+    // convert index to f64
+    Value IndxY = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivY);
+    Value i =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxY);
+
+    // get sin(wc * (i - (N-1)/ 2))
+    Value iMinusMid = rewriter.create<arith::SubFOp>(loc, i, Nminus1By2);
+    Value mulwc_iMinusMid = rewriter.create<arith::MulFOp>(loc, wc, iMinusMid);
+
+    Value GetSin = rewriter.create<math::SinOp>(loc, mulwc_iMinusMid);
+
+    // get sin(wc*i) / pi * i
+
+    Value piMuliMinusMid =
+        rewriter.create<arith::MulFOp>(loc, constpi, iMinusMid);
+    Value GetDiv = rewriter.create<arith::DivFOp>(loc, GetSin, piMuliMinusMid);
+    Value MulByMinus1 =
+        rewriter.create<arith::MulFOp>(loc, constantMinus1, GetDiv);
+    rewriter.create<AffineStoreOp>(loc, MulByMinus1, alloc, ValueRange{ivY});
+    // llvm::errs() << "LINE " << __LINE__ << " file= " << __FILE__ << "\n" ;
+    rewriter.setInsertionPointAfter(forOpY);
+
+    // 2nd loop from (N-1)/2 + 1 <= i < N
+    lb = ub + 1;
+    ub = N;
+
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto iv1 = forOp1.getInductionVar();
+    rewriter.setInsertionPointToStart(forOp1.getBody());
+    // convert index to f64
+    Value Indx1 = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), iv1);
+    Value i1 =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), Indx1);
+
+    // get sin(wc * (i1 - (N-1)/ 2))
+    Value iMinusMid1 = rewriter.create<arith::SubFOp>(loc, i1, Nminus1By2);
+    Value mulwc_iMinusMid1 =
+        rewriter.create<arith::MulFOp>(loc, wc, iMinusMid1);
+    Value GetSin1 = rewriter.create<math::SinOp>(loc, mulwc_iMinusMid1);
+
+    // get sin(i1 - (N-1)/ 2) / (i1 - (N-1)/ 2) * pi
+    //  get sin(wc*i1) / pi * i1
+
+    Value piMuliMinusMid1 =
+        rewriter.create<arith::MulFOp>(loc, constpi, iMinusMid1);
+    Value GetDiv1 =
+        rewriter.create<arith::DivFOp>(loc, GetSin1, piMuliMinusMid1);
+
+    Value GetDiv1MulNeg1 =
+        rewriter.create<arith::MulFOp>(loc, constantMinus1, GetDiv1);
+
+    rewriter.create<AffineStoreOp>(loc, GetDiv1MulNeg1, alloc, ValueRange{iv1});
+    // llvm::errs() << "LINE " << __LINE__ << " file= " << __FILE__ << "\n" ;
+    rewriter.setInsertionPointAfter(forOp1);
+
+    // debug
+    //  forOpX->dump();
+    //  forOpY->dump();
+
+    // %cst = arith.constant 6.2831853071800001 : f64
+    // %cst_0 = arith.constant 4.600000e-01 : f64
+    // %cst_1 = arith.constant 5.400000e-01 : f64
+    // %cst_2 = arith.constant 4.000000e+00 : f64
+    // %alloc = memref.alloc() : memref<4xf64>
+    // %alloc_3 = memref.alloc() : memref<f64>
+    // affine.store %cst_2, %alloc_3[] : memref<f64>
+    // affine.for %arg0 = 0 to 4 {
+    //   %0 = arith.index_castui %arg0 : index to i32
+    //   %1 = arith.uitofp %0 : i32 to f64
+    //   %2 = arith.mulf %1, %cst : f64
+    //   %3 = arith.divf %2, %cst_2 : f64
+    //   %4 = math.cos %3 : f64
+    //   %5 = arith.mulf %4, %cst_0 : f64
+    //   %6 = arith.subf %cst_1, %5 : f64
+    //   affine.store %6, %alloc[%arg0] : memref<4xf64>
+    // }
+
+    // }
+    // }
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: LowPassFIRFilterOp operations
+//===----------------------------------------------------------------------===//
+
+struct LowPassFIRFilterOpLowering : public ConversionPattern {
+  LowPassFIRFilterOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::LowPassFIRFilterOp::getOperationName(), 1, ctx) {
+  }
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // Pseudo-code:
+    //   y_lpf[n] = wc/pi * sinc(wc * (n- (N-1)/2)) , n!= (N-1)/2 :
+    //            = wc/pi , n = (N-1)/2
+
+    // 2 loops : first from 0 <= n <= (N-1)/2 - 1
+    //      2nd from (N-1)/2 +1 <= n < N
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
+
+    // first from 0 <= i <= (N-1)/2 - 1
+    int64_t lb = 0;
+    int64_t N = tensorType.getShape()[0];
+    int64_t ub = (N - 1) / 2;
+    int64_t step = 1;
+
+    // DEBUG_PRINT_NO_ARGS();
+    LowPassFIRFilterOpAdaptor lowPassfirFilterOpAdaptor(operands);
+    // Handle middle y[mid] = wc / pi
+    int64_t midIndx = ub;
+    Value constantIndxMid =
+        rewriter.create<arith::ConstantIndexOp>(loc, midIndx);
+    // rewriter.create<AffineStoreOp>(loc, constant0, alloc,
+    // ValueRange{constantIndx0});
+    Value wc = rewriter.create<AffineLoadOp>(
+        loc, lowPassfirFilterOpAdaptor.getWc(), ValueRange{});
+
+    Value constpi = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(3.14159265359));
+    Value wcByPi = rewriter.create<arith::DivFOp>(loc, wc, constpi);
+
+    rewriter.create<AffineStoreOp>(loc, wcByPi, alloc,
+                                   ValueRange{constantIndxMid});
+
+    // first from 0 <= i <= (N-1)/2 - 1
+
+    // calculate i-(N-1)/2
+    Value Nminus1By2 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr((float)ub));
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto ivY = forOpY.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpY.getBody());
+    // convert index to f64
+    Value IndxY = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivY);
+    Value i =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxY);
+
+    // get sin(wc * (i - (N-1)/ 2))
+    Value iMinusMid = rewriter.create<arith::SubFOp>(loc, i, Nminus1By2);
+    Value mulwc_iMinusMid = rewriter.create<arith::MulFOp>(loc, wc, iMinusMid);
+
+    Value GetSin = rewriter.create<math::SinOp>(loc, mulwc_iMinusMid);
+
+    // get sin(wc*i) / pi * i
+
+    Value piMuliMinusMid =
+        rewriter.create<arith::MulFOp>(loc, constpi, iMinusMid);
+    Value GetDiv = rewriter.create<arith::DivFOp>(loc, GetSin, piMuliMinusMid);
+    rewriter.create<AffineStoreOp>(loc, GetDiv, alloc, ValueRange{ivY});
+    // llvm::errs() << "LINE " << __LINE__ << " file= " << __FILE__ << "\n" ;
+    rewriter.setInsertionPointAfter(forOpY);
+
+    // 2nd loop from (N-1)/2 + 1 <= i < N
+    lb = ub + 1;
+    ub = N;
+
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto iv1 = forOp1.getInductionVar();
+    rewriter.setInsertionPointToStart(forOp1.getBody());
+    // convert index to f64
+    Value Indx1 = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), iv1);
+    Value i1 =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), Indx1);
+
+    // get sin(wc * (i1 - (N-1)/ 2))
+    Value iMinusMid1 = rewriter.create<arith::SubFOp>(loc, i1, Nminus1By2);
+    Value mulwc_iMinusMid1 =
+        rewriter.create<arith::MulFOp>(loc, wc, iMinusMid1);
+    Value GetSin1 = rewriter.create<math::SinOp>(loc, mulwc_iMinusMid1);
+
+    // get sin(i1 - (N-1)/ 2) / (i1 - (N-1)/ 2) * pi
+    //  get sin(wc*i1) / pi * i1
+
+    Value piMuliMinusMid1 =
+        rewriter.create<arith::MulFOp>(loc, constpi, iMinusMid1);
+    Value GetDiv1 =
+        rewriter.create<arith::DivFOp>(loc, GetSin1, piMuliMinusMid1);
+    rewriter.create<AffineStoreOp>(loc, GetDiv1, alloc, ValueRange{iv1});
+    // llvm::errs() << "LINE " << __LINE__ << " file= " << __FILE__ << "\n" ;
+    rewriter.setInsertionPointAfter(forOp1);
+
+    // debug
+    //  forOpX->dump();
+    //  forOpY->dump();
+
+    // %cst = arith.constant 6.2831853071800001 : f64
+    // %cst_0 = arith.constant 4.600000e-01 : f64
+    // %cst_1 = arith.constant 5.400000e-01 : f64
+    // %cst_2 = arith.constant 4.000000e+00 : f64
+    // %alloc = memref.alloc() : memref<4xf64>
+    // %alloc_3 = memref.alloc() : memref<f64>
+    // affine.store %cst_2, %alloc_3[] : memref<f64>
+    // affine.for %arg0 = 0 to 4 {
+    //   %0 = arith.index_castui %arg0 : index to i32
+    //   %1 = arith.uitofp %0 : i32 to f64
+    //   %2 = arith.mulf %1, %cst : f64
+    //   %3 = arith.divf %2, %cst_2 : f64
+    //   %4 = math.cos %3 : f64
+    //   %5 = arith.mulf %4, %cst_0 : f64
+    //   %6 = arith.subf %cst_1, %5 : f64
+    //   affine.store %6, %alloc[%arg0] : memref<4xf64>
+    // }
+
+    // }
+    // }
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: SetElemAtIndx operations
+//===----------------------------------------------------------------------===//
+
+struct SetElemAtIndxOpLowering : public ConversionPattern {
+  SetElemAtIndxOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::SetElemAtIndxOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // Pseudo-code:
+    //   output = input[index]
+
+    // replace this upsampling op with the output_mem_allocation op
+
+    // DEBUG_PRINT_NO_ARGS();
+
+    // output for result type
+    SetElemAtIndxOpAdaptor setElemAtIndxAdaptor(operands);
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    // auto tensorType =
+    // llvm::cast<RankedTensorType>(setElemAtIndxAdaptor.getInput());
+    // iterate to result1 --not needed for now but for future reference
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // For loop -- iterate from 1 to last
+    //  int64_t lb = 0 ;
+    //  int64_t ub = tensorType.getShape()[0];
+    //  int64_t step = 1;
+    //  affine::AffineForOp forOpY = rewriter.create<AffineForOp>(loc, lb, ub,
+    //  step); auto ivY = forOpY.getInductionVar();
+    //  rewriter.setInsertionPointToStart(forOpY.getBody());
+
+    // Value inputX = rewriter.create<AffineLoadOp>(loc,
+    // setElemAtIndxAdaptor.getInput(), ValueRange{ivY});
+    // rewriter.create<AffineStoreOp>(loc, inputX, alloc, ValueRange{ivY});
+
+    // rewriter.setInsertionPointAfter(forOpY);
+    // DEBUG_PRINT_WITH_ARGS("\nCheck for index --here");
+    // load from X, using 2nd operand as index
+
+    // Value GetValueAtIndx2ndArg = setElemAtIndxAdaptor.getIndx(); //
+    // getOperand(1);
+    // DEBUG_PRINT_NO_ARGS();
+    Value GetValueAtIndx2ndArg = op->getOperand(1);
+    dsp::ConstantOp constantOp2ndArg =
+        GetValueAtIndx2ndArg.getDefiningOp<dsp::ConstantOp>();
+    DenseElementsAttr constantRhsValue = constantOp2ndArg.getValue();
+    ;
+    auto elements = constantRhsValue.getValues<FloatAttr>();
+    float SecondValue = elements[0].getValueAsDouble();
+    int SecondValueInt = (int64_t)SecondValue;
+    // DEBUG_PRINT_WITH_ARGS("Indx is", SecondValueInt);
+
+    Value constantIndx2Indx =
+        rewriter.create<arith::ConstantIndexOp>(loc, SecondValueInt);
+    Value constantIndx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    // Value constant0 = rewriter.create<arith::ConstantOp>(loc,
+    // rewriter.getF64Type(),
+    //                                                      rewriter.getF64FloatAttr(15));
+
+    // Value ValToStore = setElemAtIndxAdaptor.getVal();
+    // Value ValToStore = op->getOperand(2);
+    Value ValToStore = rewriter.create<AffineLoadOp>(
+        loc, setElemAtIndxAdaptor.getVal(), ValueRange{constantIndx0});
+    // Value ValToStore = rewriter.create<AffineLoadOp>(loc,
+    // setElemAtIndxAdaptor.getVal(), ValueRange{});
+
+    // rewriter.create<AffineStoreOp>(loc, constant0, alloc,
+    // ValueRange{constantIndx2Indx});
+    rewriter.create<AffineStoreOp>(loc, ValToStore,
+                                   setElemAtIndxAdaptor.getInput(),
+                                   ValueRange{constantIndx2Indx});
+
+    // debug
+    //  forOpY->dump();
+    //  affine.store %cst, %alloc_10[] : memref<f64>
+    //  %0 = affine.load %alloc_11[4] : memref<10xf64>
+    //  affine.store %0, %alloc[0] : memref<1xf64>
+
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: GetElemAtIndx operations
+//===----------------------------------------------------------------------===//
+
+struct GetElemAtIndxOpLowering : public ConversionPattern {
+  GetElemAtIndxOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::GetElemAtIndxOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // Pseudo-code:
+    //   output = input[index]
+
+    // replace this upsampling op with the output_mem_allocation op
+
+    // DEBUG_PRINT_NO_ARGS();
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    // iterate to result1 --not needed for now but for future reference
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    // auto memRefType2 = convertTensorToMemRef(tensorType1);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // Value constant0 = rewriter.create<arith::ConstantOp>(loc,
+    // rewriter.getF64Type(),
+    //                                                      rewriter.getF64FloatAttr(0));
+
+    // DEBUG_PRINT_WITH_ARGS("\nCheck for index --here");
+    // load from X, using 2nd operand as index
+    GetElemAtIndxOpAdaptor getElemAtIndxAdaptor(operands);
+    // Value GetValueAtIndx2ndArg = getElemAtIndxAdaptor.getIndx(); //
+    // getOperand(1);
+    // DEBUG_PRINT_NO_ARGS();
+    Value GetValueAtIndx2ndArg = op->getOperand(1);
+    dsp::ConstantOp constantOp2ndArg =
+        GetValueAtIndx2ndArg.getDefiningOp<dsp::ConstantOp>();
+    DenseElementsAttr constantRhsValue = constantOp2ndArg.getValue();
+    ;
+    auto elements = constantRhsValue.getValues<FloatAttr>();
+    float SecondValue = elements[0].getValueAsDouble();
+    int SecondValueInt = (int64_t)SecondValue;
+    // DEBUG_PRINT_WITH_ARGS("Indx is", SecondValueInt);
+
+    Value constantIndx2Indx =
+        rewriter.create<arith::ConstantIndexOp>(loc, SecondValueInt);
+    Value constantIndx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+
+    Value inputX = rewriter.create<AffineLoadOp>(
+        loc, getElemAtIndxAdaptor.getInput(), ValueRange{constantIndx2Indx});
+    rewriter.create<AffineStoreOp>(loc, inputX, alloc,
+                                   ValueRange{constantIndx0});
+
+    // debug
+    //  forOpX->dump();
+    //  forOpY->dump();
+    //  affine.store %cst, %alloc_10[] : memref<f64>
+    //  %0 = affine.load %alloc_11[4] : memref<10xf64>
+    //  affine.store %0, %alloc[0] : memref<1xf64>
+
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: SincOp operations
+//===----------------------------------------------------------------------===//
+
+struct SincOpLowering : public ConversionPattern {
+  SincOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::SincOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // Pseudo-code:
+    //   y = sinc(wc * n) = [1, sin(wc)/pi , sin(2* wc)/2*pi , ... sin(n *
+    //   wc)/n*pi] , 0<=n<=N
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
+
+    // For loop -- iterate from 1 to last
+    int64_t lb = 1;
+    int64_t ub = tensorType.getShape()[0];
+    int64_t step = 1;
+
+    // DEBUG_PRINT_NO_ARGS();
+    // get constants -- 0.54 & 0.46
+    Value constantIndx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    // rewriter.create<AffineStoreOp>(loc, constant0, alloc,
+    // ValueRange{constantIndx0});
+
+    Value constant1 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1));
+    Value constpi = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(3.14159265359));
+    rewriter.create<AffineStoreOp>(loc, constant1, alloc,
+                                   ValueRange{constantIndx0});
+
+    // For loop
+    SincOpAdaptor sincOpAdaptor(operands);
+    // loop for Y
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto ivY = forOpY.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpY.getBody());
+    // convert index to f64
+    Value IndxY = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivY);
+    Value i =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxY);
+
+    // get wc * i
+    Value wc =
+        rewriter.create<AffineLoadOp>(loc, sincOpAdaptor.getWc(), ValueRange{});
+
+    Value mulwc_i = rewriter.create<arith::MulFOp>(loc, wc, i);
+
+    // get sin(wc*i) / pi * i
+    Value GetSin = rewriter.create<math::SinOp>(loc, mulwc_i);
+    Value piMuli = rewriter.create<arith::MulFOp>(loc, constpi, i);
+    Value GetDiv = rewriter.create<arith::DivFOp>(loc, GetSin, piMuli);
+    rewriter.create<AffineStoreOp>(loc, GetDiv, alloc, ValueRange{ivY});
+    // llvm::errs() << "LINE " << __LINE__ << " file= " << __FILE__ << "\n" ;
+    rewriter.setInsertionPointAfter(forOpY);
+    // debug
+    //  forOpX->dump();
+    //  forOpY->dump();
+
+    // %cst = arith.constant 6.2831853071800001 : f64
+    // %cst_0 = arith.constant 4.600000e-01 : f64
+    // %cst_1 = arith.constant 5.400000e-01 : f64
+    // %cst_2 = arith.constant 4.000000e+00 : f64
+    // %alloc = memref.alloc() : memref<4xf64>
+    // %alloc_3 = memref.alloc() : memref<f64>
+    // affine.store %cst_2, %alloc_3[] : memref<f64>
+    // affine.for %arg0 = 0 to 4 {
+    //   %0 = arith.index_castui %arg0 : index to i32
+    //   %1 = arith.uitofp %0 : i32 to f64
+    //   %2 = arith.mulf %1, %cst : f64
+    //   %3 = arith.divf %2, %cst_2 : f64
+    //   %4 = math.cos %3 : f64
+    //   %5 = arith.mulf %4, %cst_0 : f64
+    //   %6 = arith.subf %cst_1, %5 : f64
+    //   affine.store %6, %alloc[%arg0] : memref<4xf64>
+    // }
+
+    // }
+    // }
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: FFT1DImg operations
+//===----------------------------------------------------------------------===//
+
+struct FFT1DImgOpLowering : public ConversionPattern {
+  FFT1DImgOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::FFT1DImgOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // Pseudo-code:
+    //   y[k] = y_real[k] + j *y_img[k]
+    //  y_img = sumOver_n(x[n]*sin[2*pi * k *n/N ] * -1
+    // init  output mem for y_real & y_img as 0
+    // iterate for output from k=0 to last
+    // iterate for all x from n=0 to last
+    // perform the calculations : ie x[n] * cos[2*pi * k *n/N ] and sum and
+    // store them at y[k]
+    //
+    // replace this upsampling op with the output_mem_allocation op
+
+    // DEBUG_PRINT_NO_ARGS() ;
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    // iterate to result1 --not needed for now but for future reference
+    //  auto tensorType1 =
+    //  llvm::cast<RankedTensorType>(*std::next(op->result_type_begin(), 1));
+
+    // DEBUG_PRINT_NO_ARGS() ;
+    // tensorType.getShape()[0]
+    // llvm::errs() << "tensorType1.getShape()[0] " << tensorType1.getShape()[0]
+    // << " func= " << __func__ << "\n";
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    // auto memRefType2 = convertTensorToMemRef(tensorType1);
+    auto alloc_img = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
+
+    // affine.for %y = 0 to 4 {
+    //     affine.store %cst_3, %alloc_real[%y] : memref<4xf64>
+    //     affine.store %cst_3, %alloc_img[%y] : memref<4xf64>
+    // }
+    Value constant0 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+
+    // For loop -- iterate from 1 to last
+    int64_t lb = 0;
+    int64_t ub = tensorType.getShape()[0];
+    int64_t step = 1;
+
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto iv = forOp1.getInductionVar();
+    rewriter.setInsertionPointToStart(forOp1.getBody());
+    rewriter.create<AffineStoreOp>(loc, constant0, alloc_img, ValueRange{iv});
+    rewriter.setInsertionPointAfter(forOp1);
+
+    // loop for Y
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto ivY = forOpY.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpY.getBody());
+
+    // loop for X
+    affine::AffineForOp forOpX =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto ivX = forOpX.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpX.getBody());
+
+    // load from X, & y1 & y2
+    FFT1DImgOpAdaptor fft1DImgAdaptor(operands);
+    Value inputX = rewriter.create<AffineLoadOp>(
+        loc, fft1DImgAdaptor.getInput(), ValueRange{ivX});
+    Value loadYImg =
+        rewriter.create<AffineLoadOp>(loc, alloc_img, ValueRange{ivY});
+
+    // convert index to f64
+    Value IndxY = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivY);
+    Value k =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxY);
+
+    Value IndxX = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivX);
+    Value i =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxX);
+
+    // get 2*pi * k * i / N
+    Value muli_k = rewriter.create<arith::MulFOp>(loc, k, i);
+
+    Value const2pi = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(6.28318530718));
+    Value mul2piKI = rewriter.create<arith::MulFOp>(loc, const2pi, muli_k);
+
+    // getOperand().getType()
+    // auto inputTensorType =
+    // llvm::cast<RankedTensorType>(op->getOperand(0).getType());
+    float LengthOfInput = (float)ub;
+    Value N = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(LengthOfInput));
+    // Value N = inputTensorType.getShape()[0];
+
+    Value divIndxByN = rewriter.create<arith::DivFOp>(loc, mul2piKI, N);
+
+    // Img part = -1 * Sum(x[i] * sin(div) )
+    Value GetSin = rewriter.create<math::SinOp>(loc, divIndxByN);
+    Value xMulSin = rewriter.create<arith::MulFOp>(loc, inputX, GetSin);
+    Value imgSum = rewriter.create<arith::SubFOp>(loc, loadYImg, xMulSin);
+
+    // Value constMinus1 = rewriter.create<arith::ConstantOp>(loc,
+    // rewriter.getF64Type(),
+    //                                                      rewriter.getF64FloatAttr(-1));
+    // Value NegImgSum = rewriter.create<arith::MulFOp>(loc, constMinus1 ,
+    // imgSum);
+    rewriter.create<AffineStoreOp>(loc, imgSum, alloc_img, ValueRange{ivY});
+    // x[n-1]
+    rewriter.setInsertionPointAfter(forOpX);
+    // Calculate y[k] = 1/N * y[k]
+
+    rewriter.setInsertionPointAfter(forOpY);
+    // debug
+    //  forOpX->dump();
+    //  forOpY->dump();
+    //  affine.for %y = 0 to 4 {
+    //      affine.store %cst_3, %alloc_real[%y] : memref<4xf64>
+    //      affine.store %cst_3, %alloc_img[%y] : memref<4xf64>
+    //  }
+
+    // affine.for %y = 0 to 4 {
+    // //   %0 = affine.load %alloc_3[%arg0] : memref<4xf64>
+    // //   affine.store %0, %alloc_real[%arg0] : memref<4xf64>
+    // affine.for %x = 0 to 4 {
+    //     // CAcluations
+    //           %1 = affine.load %alloc_3[%x] : memref<4xf64>
+    //           %2 = affine.load %alloc_real[%y] : memref<4xf64>
+    //           %3 = affine.load %alloc_img[%y] : memref<4xf64>
+    //           // index cast for multiply
+    //           %4 = arith.index_castui %y : index to i32
+    //           %k = arith.uitofp %4 : i32 to f64
+    //           %6 = arith.index_castui %x : index to i32
+    //           %i = arith.uitofp %6 : i32 to f64
+    //         //   %8 = arith.index_castui %arg3 : index to i32
+    //         //   %9 = arith.uitofp %8 : i32 to f64
+    //         //   %10 = arith.index_castui %arg4 : index to i32
+    //         //   %11 = arith.uitofp %10 : i32 to f64
+
+    //           %mul_1 = arith.mulf %i, %k : f64
+    //           %mul = arith.mulf %mul_1, %cst_2pi : f64
+    //         //  ixk / N
+    //           %div = arith.divf %mul, %N : f64
+    //         //   cos of the above
+    //           %res_cos = math.cos %div : f64
+    //         //   %16 = arith.addf %14, %15 : f64
+    //         //   %res_sin = arith.mulf %16, %cst_0 : f64
+
+    //           %res_sin = math.sin %div : f64
+    //           %real_prod = arith.mulf %1, %res_cos : f64
+    //           %img_prod_1 = arith.mulf %1, %res_sin : f64
+    //           %img_prod = arith.mulf %cst_5, %img_prod_1 : f64
+
+    //           %real = arith.addf %2, %real_prod : f64
+    //           %img = arith.addf %3, %img_prod : f64
+    //           affine.store %real, %alloc_real[%y] : memref<4xf64>
+    //         //    dsp.print %alloc_real : memref<4xf64>
+    //           affine.store %img, %alloc_img[%y] : memref<4xf64>
+
+    // }
+    // }
+    // rewriter.replaceOp(op, alloc_real);
+    rewriter.replaceOp(op, alloc_img);
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: FFT1DReal operations
+//===----------------------------------------------------------------------===//
+
+struct FFT1DRealOpLowering : public ConversionPattern {
+  FFT1DRealOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::FFT1DRealOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // Pseudo-code:
+    //   y[k] = y_real[k] + j *y_img[k]
+    //  y_real = sumOver_n(x[n]*cos[2*pi * k *n/N ]
+    //  y_img = sumOver_n(x[n]*sin[2*pi * k *n/N ] * -1
+    // init  output mem for y_real & y_img as 0
+    // iterate for output from k=0 to last
+    // iterate for all x from n=0 to last
+    // perform the calculations : ie x[n] * cos[2*pi * k *n/N ] and sum and
+    // store them at y[k]
+    //
+    // replace this upsampling op with the output_mem_allocation op
+
+    // DEBUG_PRINT_NO_ARGS() ;
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    // iterate to result1 --not needed for now but for future reference
+    //  auto tensorType1 =
+    //  llvm::cast<RankedTensorType>(*std::next(op->result_type_begin(), 1));
+
+    // DEBUG_PRINT_NO_ARGS() ;
+    // tensorType.getShape()[0]
+    // llvm::errs() << "tensorType1.getShape()[0] " << tensorType1.getShape()[0]
+    // << " func= " << __func__ << "\n";
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    // auto memRefType2 = convertTensorToMemRef(tensorType1);
+    auto alloc_real = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
+
+    // affine.for %y = 0 to 4 {
+    //     affine.store %cst_3, %alloc_real[%y] : memref<4xf64>
+    //     affine.store %cst_3, %alloc_img[%y] : memref<4xf64>
+    // }
+    Value constant0 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+
+    // For loop -- iterate from 1 to last
+    int64_t lb = 0;
+    int64_t ub = tensorType.getShape()[0];
+    int64_t step = 1;
+
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto iv = forOp1.getInductionVar();
+    rewriter.setInsertionPointToStart(forOp1.getBody());
+    rewriter.create<AffineStoreOp>(loc, constant0, alloc_real, ValueRange{iv});
+    rewriter.setInsertionPointAfter(forOp1);
+
+    // loop for Y
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto ivY = forOpY.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpY.getBody());
+
+    // loop for X
+    affine::AffineForOp forOpX =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto ivX = forOpX.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpX.getBody());
+
+    // load from X, & y1 & y2
+    FFT1DRealOpAdaptor fft1DrealAdaptor(operands);
+    Value inputX = rewriter.create<AffineLoadOp>(
+        loc, fft1DrealAdaptor.getInput(), ValueRange{ivX});
+    Value loadYReal =
+        rewriter.create<AffineLoadOp>(loc, alloc_real, ValueRange{ivY});
+
+    // convert index to f64
+    Value IndxY = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivY);
+    Value k =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxY);
+
+    Value IndxX = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivX);
+    Value i =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxX);
+
+    // get 2*pi * k * i / N
+    Value muli_k = rewriter.create<arith::MulFOp>(loc, k, i);
+
+    Value const2pi = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(6.28318530718));
+    Value mul2piKI = rewriter.create<arith::MulFOp>(loc, const2pi, muli_k);
+
+    // getOperand().getType()
+    // auto inputTensorType =
+    // llvm::cast<RankedTensorType>(op->getOperand(0).getType());
+    float LengthOfInput = (float)ub;
+    Value N = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(LengthOfInput));
+    // Value N = inputTensorType.getShape()[0];
+
+    Value divIndxByN = rewriter.create<arith::DivFOp>(loc, mul2piKI, N);
+
+    // Real part = Sum(x[i] * cos(div) )
+    Value GetCos = rewriter.create<math::CosOp>(loc, divIndxByN);
+    Value xMulCos = rewriter.create<arith::MulFOp>(loc, inputX, GetCos);
+    Value realSum = rewriter.create<arith::AddFOp>(loc, loadYReal, xMulCos);
+    rewriter.create<AffineStoreOp>(loc, realSum, alloc_real, ValueRange{ivY});
+
+    // DEBUG_PRINT_NO_ARGS() ;
+
+    rewriter.setInsertionPointAfter(forOpX);
+    // forOpX->dump();
+    // rewriter.create<AffineYieldOp>(loc, ValueRange{alloc_real, alloc_img});
+    rewriter.setInsertionPointAfter(forOpY);
+    // debug
+    //  forOpX->dump();
+    //  forOpY->dump();
+    //  affine.for %y = 0 to 4 {
+    //      affine.store %cst_3, %alloc_real[%y] : memref<4xf64>
+    //      affine.store %cst_3, %alloc_img[%y] : memref<4xf64>
+    //  }
+
+    // affine.for %y = 0 to 4 {
+    // //   %0 = affine.load %alloc_3[%arg0] : memref<4xf64>
+    // //   affine.store %0, %alloc_real[%arg0] : memref<4xf64>
+    // affine.for %x = 0 to 4 {
+    //     // CAcluations
+    //           %1 = affine.load %alloc_3[%x] : memref<4xf64>
+    //           %2 = affine.load %alloc_real[%y] : memref<4xf64>
+    //           %3 = affine.load %alloc_img[%y] : memref<4xf64>
+    //           // index cast for multiply
+    //           %4 = arith.index_castui %y : index to i32
+    //           %k = arith.uitofp %4 : i32 to f64
+    //           %6 = arith.index_castui %x : index to i32
+    //           %i = arith.uitofp %6 : i32 to f64
+    //         //   %8 = arith.index_castui %arg3 : index to i32
+    //         //   %9 = arith.uitofp %8 : i32 to f64
+    //         //   %10 = arith.index_castui %arg4 : index to i32
+    //         //   %11 = arith.uitofp %10 : i32 to f64
+
+    //           %mul_1 = arith.mulf %i, %k : f64
+    //           %mul = arith.mulf %mul_1, %cst_2pi : f64
+    //         //  ixk / N
+    //           %div = arith.divf %mul, %N : f64
+    //         //   cos of the above
+    //           %res_cos = math.cos %div : f64
+    //         //   %16 = arith.addf %14, %15 : f64
+    //         //   %res_sin = arith.mulf %16, %cst_0 : f64
+
+    //           %res_sin = math.sin %div : f64
+    //           %real_prod = arith.mulf %1, %res_cos : f64
+    //           %img_prod_1 = arith.mulf %1, %res_sin : f64
+    //           %img_prod = arith.mulf %cst_5, %img_prod_1 : f64
+
+    //           %real = arith.addf %2, %real_prod : f64
+    //           %img = arith.addf %3, %img_prod : f64
+    //           affine.store %real, %alloc_real[%y] : memref<4xf64>
+    //         //    dsp.print %alloc_real : memref<4xf64>
+    //           affine.store %img, %alloc_img[%y] : memref<4xf64>
+
+    // }
+    // }
+    // rewriter.replaceOp(op, alloc_real);
+    rewriter.replaceOp(op, alloc_real);
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: SquareOp operations
+//===----------------------------------------------------------------------===//
+
+struct SquareOpLowering : public ConversionPattern {
+  SquareOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::SquareOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // Pseudo-code:
+    // output = 0
+    // iterate for len = 0 to inputLen
+    //   elem = a[i]
+    //   output[i] = elem * elem
+    //   store output
+
+    // DEBUG_PRINT_NO_ARGS() ;
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
+
+    // For loop
+    SquareOpAdaptor squareOpAdaptor(operands);
+    // DEBUG_PRINT_NO_ARGS() ;
+
+    int64_t lb = 0;
+    int64_t ub = tensorType.getShape()[0];
+    int64_t step = 1;
+
+    // for loop
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto iv = forOp1.getInductionVar();
+
+    rewriter.setInsertionPointToStart(forOp1.getBody());
+
+    // DEBUG_PRINT_NO_ARGS() ;
+    Value elemIn =
+        rewriter.create<AffineLoadOp>(loc, squareOpAdaptor.getInput(), iv);
+    Value square = rewriter.create<arith::MulFOp>(loc, elemIn, elemIn);
+
+    // store the result
+    rewriter.create<AffineStoreOp>(loc, square, alloc, iv);
+
+    rewriter.setInsertionPointAfter(forOp1);
+    // debug
+    //  forOp1->dump();
+    //    affine.for %arg0 = 0 to 5 {
+    //    %0 = affine.load %alloc_6[%arg0] : memref<5xf64>
+    //    %1 = arith.mulf %0, %0 : f64
+    //    affine.store %1, %alloc_5[%arg0] : memref<5xf64>
+    //  }
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: SumOp operations
+//===----------------------------------------------------------------------===//
+
+struct SumOpLowering : public ConversionPattern {
+  SumOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::SumOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // Pseudo-code:
+    // output = 0
+    // iterate for len = 0 to inputLen
+    //   output = load output
+    //   elem = a[i]
+    //   output = output + elem
+    //   store output
+
+    // DEBUG_PRINT_NO_ARGS() ;
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
+
+    // For loop
+    SumOpAdaptor sumOpAdaptor(operands);
+    // DEBUG_PRINT_NO_ARGS() ;
+    auto inputType = llvm::dyn_cast<RankedTensorType>(
+        op->getOperand(0).getType()); // op->getOperand(
+    // auto inputType =
+    // llvm::dyn_cast<RankedTensorType>(sumOpAdaptor.getInput().getType());
+    // DEBUG_PRINT_NO_ARGS() ;
+
+    int64_t lb = 0;
+    int64_t ub = inputType.getShape()[0];
+    int64_t step = 1;
+
+    // init 0 for output
+    Value constantIndx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    // Value GetInputX0 = rewriter.create<AffineLoadOp>(loc,
+    // lowPassFilterAdaptor.getLhs(), /* iv */ ValueRange{constantIndx0});
+    Value constant0 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    // Value elemIn = rewriter.create<AffineLoadOp>(loc,
+    // upsamplingAdaptor.getLhs(), iv); DEBUG_PRINT_NO_ARGS() ;
+    rewriter.create<AffineStoreOp>(loc, constant0, alloc,
+                                   ValueRange{constantIndx0});
+
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto iv = forOp1.getInductionVar();
+
+    rewriter.setInsertionPointToStart(forOp1.getBody());
+
+    // DEBUG_PRINT_NO_ARGS() ;
+    Value elemIn =
+        rewriter.create<AffineLoadOp>(loc, sumOpAdaptor.getInput(), iv);
+    Value loadSum =
+        rewriter.create<AffineLoadOp>(loc, alloc, ValueRange{constantIndx0});
+
+    Value sum = rewriter.create<arith::AddFOp>(loc, elemIn, loadSum);
+
+    // store the result
+    rewriter.create<AffineStoreOp>(loc, sum, alloc, ValueRange{constantIndx0});
+
+    rewriter.setInsertionPointAfter(forOp1);
+    // debug
+    //  forOp1->dump();
+    //    %cont3 = arith.const 3 : f64
+    //    affine.for %arg0 = 0 to 8 {
+    //     %elem1 = affine.load input[%arg0]
+    //     #map1 = affine_map<(%arg0)[] : (%arg0 + 1)
+    //     #map2 = affine_map<(%arg0)[] : (%arg0 + 2)
+    //     %elem2 = affine.load input[#map1] <-- affine apply
+    //     %elem3 = affine.load input[#map2]
+
+    //    %sum1 = arith.addf %elem1 , %elem2
+    //    %sum2 = arith.addf %sum1, %elem3
+    //    %res = arith.divf %sum2 ,
+    //    affine.store %sum2, out[%arg0]
+    // }
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: FIRFilterResponse operations
+//===----------------------------------------------------------------------===//
+struct filterOpLowering : public ConversionPattern {
+  filterOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::filterOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    // dsp.filterOp has 3 operands -- both of type tensor f64
+
+    // Pseudo-code:
+    //  y[i] = sum(b[j] * x(i-j) - a[j] *x[i-j] ) j=1 to i and  i=1 to len(x)
+    //  also, y[0] = b[0] * x[0]
+
+    // 1) calculate y[0]
+    // 2) iterate for indx=1 to input_len:
+    //     load y[indx] = b[0] * x[indx]
+    //     3) iterate for j=1 to indx :
+    //             load b[j] , x[i-j] , a[j] , y[i-j]
+    //             y[indx] = y[indx] + b[j] * x[i-j] - a[j]*y[i-j]
+
+    auto loc = op->getLoc();
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+    filterOpAdaptor filterOpAdaptor1(operands);
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
+
+    // IR:
+    // ConstantIndx0
+    // b0 = affine.load(b, ConstantIndx0)
+    // x0 = affine.load(x, ConstantIndx0)
+    // tempY0 = arith.mulf(b0,x0)
+
+    // lb = 1, ub = x.size() , ivY = forLoopY.inductionVariable()
+    // forLoopY
+    // xIvY = affine.load(x,ivY )
+    // tempYIndx = affine.mulf(b0, xIvY)
+    // affine.store(xIvY, y, ivY)
+
+    //     forloopJ , ivJ = forloopJ.inductionVariable()
+    //         //optional get min ivY and len(b) -- iterate for this
+    //         load (b,ivJ) ; (x, map(ivY - ivJ)) , (a, ivJ) ,
+    //         (y, map(ivY - ivJ) ), (y , ivJ)
+
+    //         tempBxX = arith.mulf(b , x)
+    //         tempAxY = arith.mulf(a , Y_i-j)
+    //         tempB_A = arith.subf( tempBxX - tempAxY)
+    //         sumY_A = arith.addf( Y , tempB_A )
+    //         affine.store(sumY_A , y , ivY)
+
+    // ConstantIndx0
+    // b0 = affine.load(b, ConstantIndx0)
+    // x0 = affine.load(x, ConstantIndx0)
+    // tempY0 = arith.mulf(b0,x0)
+
+    Value constantIndx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    Value b0 = rewriter.create<affine::AffineLoadOp>(
+        loc, filterOpAdaptor1.getB(), ValueRange{constantIndx0});
+    Value x0 = rewriter.create<affine::AffineLoadOp>(
+        loc, filterOpAdaptor1.getX(), ValueRange{constantIndx0});
+    Value tempY0 = rewriter.create<arith::MulFOp>(loc, b0, x0);
+
+    // store at Y0
+    rewriter.create<affine::AffineStoreOp>(loc, tempY0, alloc,
+                                           ValueRange{constantIndx0});
+
+    // For loop -- iterate from 1 to last
+    //  lb = 1, ub = x.size() , ivY = forLoopY.inductionVariable()
+    //      forLoopY
+    //      xIvY = affine.load(x,ivY )
+    //      tempYIndx = affine.mulf(b0, xIvY)
+    //      affine.store(tempYIndx, y, ivY)
+
+    int64_t lb = 1;
+    int64_t ub = tensorType.getShape()[0];
+    int64_t step = 1;
+
+    // DEBUG_PRINT_NO_ARGS() ;
+
+    // loop for Y
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto ivY = forOpY.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpY.getBody());
+
+    Value xIvY = rewriter.create<affine::AffineLoadOp>(
+        loc, filterOpAdaptor1.getX(), ivY);
+    Value b0mulxIvY = rewriter.create<arith::MulFOp>(loc, b0, xIvY);
+    rewriter.create<affine::AffineStoreOp>(loc, b0mulxIvY, alloc, ivY);
+
+    // loop for X-- 1 to upperIndx ie, ivY
+    //  forloopJ , ivJ = forloopJ.inductionVariable()
+    //  //optional get min ivY and len(b) -- iterate for this
+    //  load (b,ivJ) ; (x, map(ivY - ivJ)) , (a, ivJ) ,
+    //  (y, map(ivY - ivJ) ), (y , ivJ)
+
+    // tempBxX = arith.mulf(b , x)
+    // tempAxY = arith.mulf(a , Y_i-j)
+    // tempB_A = arith.subf( tempBxX - tempAxY)
+    // sumY_A = arith.addf( Y , tempB_A )
+    // affine.store(sumY_A , y , ivY)
+
+    // look for here
+    //  DEBUG_PRINT_NO_ARGS() ;
+    // Future -- try to loop
+    //  Value forlb = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+    AffineExpr expr0;
+    bindDims(rewriter.getContext(), expr0);
+    // AffineMap lbMap = AffineMap::get(1, 0, expr0);
+
+    // affine::AffineForOp forOpJ = rewriter.create<AffineForOp>(loc, lbMap,
+    // ValueRange{forlb} ,lbMap , ValueRange{ivY}, step);
+    affine::AffineForOp forOpJ =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+
+    auto ivJ = forOpJ.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpJ.getBody());
+
+    // load from X, & Y
+    //  DCTOpAdaptor dctAdaptor(operands);
+    // For affine expression: #map1 = affine_map<(%ivY , ivJ)[] : (%ivY - ivJ)
+    AffineExpr d0, d1, s0;
+    bindDims(rewriter.getContext(), d0, d1);
+    // AffineExpr ExprForIndxYminusX = rewriter.getAffineDimExpr(0) -
+    // rewriter.getAffineDimExpr(1); //d0 - d1;
+    AffineExpr ExprForIndxYminusX = d0 - d1;
+
+    AffineMap addMapForYminusX = AffineMap::get(2, 0, ExprForIndxYminusX);
+
+    // load (b,ivJ) ; (x, map(ivY - ivJ)) , (a, ivJ) ,
+    // (y, map(ivY - ivJ) ), (y , ivJ)
+    Value inputX = rewriter.create<AffineLoadOp>(
+        loc, filterOpAdaptor1.getX(), addMapForYminusX, ValueRange{ivY, ivJ});
+    Value inputB = rewriter.create<AffineLoadOp>(loc, filterOpAdaptor1.getB(),
+                                                 ValueRange{ivJ});
+    Value inputA = rewriter.create<AffineLoadOp>(loc, filterOpAdaptor1.getA(),
+                                                 ValueRange{ivJ});
+    Value inputPrevY = rewriter.create<AffineLoadOp>(
+        loc, alloc, addMapForYminusX, ValueRange{ivY, ivJ});
+    Value outY = rewriter.create<AffineLoadOp>(loc, alloc, ValueRange{ivY});
+
+    // tempBxX = arith.mulf(b , x)
+    // tempAxY = arith.mulf(a , Y_i-j)
+    // tempB_A = arith.subf( tempBxX - tempAxY)
+    // sumY_A = arith.addf( Y , tempB_A )
+    // affine.store(sumY_A , y , ivY)
+
+    Value tempBxX = rewriter.create<arith::MulFOp>(loc, inputB, inputX);
+    Value tempAxY = rewriter.create<arith::MulFOp>(loc, inputA, inputPrevY);
+    Value tempBminusA = rewriter.create<arith::SubFOp>(loc, tempBxX, tempAxY);
+    Value sumY_A = rewriter.create<arith::AddFOp>(loc, outY, tempBminusA);
+    rewriter.create<affine::AffineStoreOp>(loc, sumY_A, alloc, ivY);
+
+    rewriter.setInsertionPointAfter(forOpJ);
+    rewriter.setInsertionPointAfter(forOpY);
+    // forOpJ->dump();
+
+    // debug
+    //  forOpJ->dump();
+    //  forOpY->dump();
+    //  affine.for %y = 0 to 4 {
+    //      affine.store %cst_3, %alloc[%y] : memref<4xf64>
+    //      affine.store %cst_3, %alloc_img[%y] : memref<4xf64>
+    //  }
+
+    // affine.for %y = 0 to 4 {
+    // //   %0 = affine.load %alloc_3[%arg0] : memref<4xf64>
+    // //   affine.store %0, %alloc[%arg0] : memref<4xf64>
+    // affine.for %x = 0 to 4 {
+    //     // CAcluations
+    //           %1 = affine.load %alloc_3[%x] : memref<4xf64>
+    //           %2 = affine.load %alloc[%y] : memref<4xf64>
+    //           %3 = affine.load %alloc_img[%y] : memref<4xf64>
+    //           // index cast for multiply
+    //           %4 = arith.index_castui %y : index to i32
+    //           %k = arith.uitofp %4 : i32 to f64
+    //           %6 = arith.index_castui %x : index to i32
+    //           %i = arith.uitofp %6 : i32 to f64
+    //         //   %8 = arith.index_castui %arg3 : index to i32
+    //         //   %9 = arith.uitofp %8 : i32 to f64
+    //         //   %10 = arith.index_castui %arg4 : index to i32
+    //         //   %11 = arith.uitofp %10 : i32 to f64
+
+    //           %mul_1 = arith.mulf %i, %k : f64
+    //           %mul = arith.mulf %mul_1, %cst_2pi : f64
+    //         //  ixk / N
+    //           %div = arith.divf %mul, %N : f64
+    //         //   cos of the above
+    //           %res_cos = math.cos %div : f64
+    //         //   %16 = arith.addf %14, %15 : f64
+    //         //   %res_sin = arith.mulf %16, %cst_0 : f64
+
+    //           %res_sin = math.sin %div : f64
+    //           %real_prod = arith.mulf %1, %res_cos : f64
+    //           %img_prod_1 = arith.mulf %1, %res_sin : f64
+    //           %img_prod = arith.mulf %cst_5, %img_prod_1 : f64
+
+    //           %real = arith.addf %2, %real_prod : f64
+    //           %img = arith.addf %3, %img_prod : f64
+    //           affine.store %real, %alloc[%y] : memref<4xf64>
+    //         //    dsp.print %alloc : memref<4xf64>
+    //           affine.store %img, %alloc_img[%y] : memref<4xf64>
+
+    // }
+    // }
+    rewriter.replaceOp(op, alloc);
+    // rewriter.replaceOp(op, ValueRange{alloc,alloc_img});
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: DCT operations
+//===----------------------------------------------------------------------===//
+
+struct DCTOpLowering : public ConversionPattern {
+  DCTOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::DCTOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // Pseudo-code:
+    //   y[k] = sqrt(2/N) * SumOverAllN( x[n] cos(pi * k * (n +0.5)/N)) ,
+    //   0<=n<=N-1 :
+    //  for y[0] , the answer will be multiplied by 1/sqrt(2)
+
+    // init  output mem for y as 0
+    // iterate for output from k=0 to last
+    // iterate for all x from n=0 to last
+    // perform the calculations : ie x[n] cos(pi * k * (n +0.5)/N) and sum and
+    // store them at y[k]
+    //
+    // replace this upsampling op with the output_mem_allocation op
+
+    // DEBUG_PRINT_NO_ARGS() ;
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+    DCTOpAdaptor dctAdaptor(operands);
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
+
+    // constant values:
+    const float sqrt2 = 1.41421356237;
+    const float pi = 3.14159265358;
+
+    // affine.for %y = 0 to 4 {
+    //     affine.store %cst_3, %alloc[%y] : memref<4xf64>
+    // }
+    Value constant0 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+
+    // For loop -- iterate from 0 to last
+    int64_t lb = 0;
+    int64_t ub = tensorType.getShape()[0];
+    int64_t step = 1;
+
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto iv = forOp1.getInductionVar();
+    rewriter.setInsertionPointToStart(forOp1.getBody());
+    rewriter.create<AffineStoreOp>(loc, constant0, alloc, ValueRange{iv});
+    rewriter.setInsertionPointAfter(forOp1);
+    // DEBUG_PRINT_NO_ARGS() ;
+
+    // loop for Y
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto ivY = forOpY.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpY.getBody());
+
+    // loop for X
+    affine::AffineForOp forOpX =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto ivX = forOpX.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpX.getBody());
+
+    // load from X, & Y
+    //  DCTOpAdaptor dctAdaptor(operands);
+    Value inputX = rewriter.create<AffineLoadOp>(loc, dctAdaptor.getInput(),
+                                                 ValueRange{ivX});
+    Value loadYReal =
+        rewriter.create<AffineLoadOp>(loc, alloc, ValueRange{ivY});
+
+    // convert index to f64
+    Value IndxY = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivY);
+    Value k =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxY);
+
+    Value IndxX = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivX);
+    Value i =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxX);
+
+    // get pi * k * (i + 0.5) / N
+    Value constant0_5 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0.5));
+
+    Value add_i_half = rewriter.create<arith::AddFOp>(loc, i, constant0_5);
+    Value muli_k = rewriter.create<arith::MulFOp>(loc, k, add_i_half);
+
+    Value constpi = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(pi));
+    Value mulpiKI_half = rewriter.create<arith::MulFOp>(loc, constpi, muli_k);
+
+    // Get N
+    // DEBUG_PRINT_NO_ARGS() ;
+    float LengthOfInput = (float)ub;
+    Value N = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(LengthOfInput));
+
+    Value divIndxByN = rewriter.create<arith::DivFOp>(loc, mulpiKI_half, N);
+
+    // Get cos ( pi * k * (n +0.5)/N))
+    // DEBUG_PRINT_NO_ARGS() ;
+    Value GetCos = rewriter.create<math::CosOp>(loc, divIndxByN);
+    Value xMulCos = rewriter.create<arith::MulFOp>(loc, inputX, GetCos);
+    Value realSum = rewriter.create<arith::AddFOp>(loc, loadYReal, xMulCos);
+    rewriter.create<AffineStoreOp>(loc, realSum, alloc, ValueRange{ivY});
+
+    rewriter.setInsertionPointAfter(forOpX);
+
+    // multiply Y(k) with sqrt(2) / sqrt(N)
+    //  DEBUG_PRINT_NO_ARGS() ;
+    Value loadYReal1 =
+        rewriter.create<AffineLoadOp>(loc, alloc, ValueRange{ivY});
+    Value constSqrt2 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(sqrt2));
+    // Type floatType = rewriter.getF64Type();
+    Value N2 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(LengthOfInput));
+    // Define fast math flags
+    // auto fastMathFlags = arith::FastMathFlagsAttr::get(
+    //   rewriter.getContext(), arith::FastMathFlags::none);
+    // arith::FastMathFlags::ApproximateSqrt |
+    // arith::FastMathFlags::AllowReciprocal);
+    Value sqrtN = rewriter.create<math::RsqrtOp>(loc, N2);
+    // Value sqrtN = rewriter.create<math::RsqrtOp>(loc, TypeRange{ floatType }
+    // , N2 , fastMathFlags );
+
+    Value mulSqrt2ByN = rewriter.create<arith::MulFOp>(loc, constSqrt2, sqrtN);
+    Value mulSqrt2ByNByY =
+        rewriter.create<arith::MulFOp>(loc, mulSqrt2ByN, loadYReal1);
+    // DEBUG_PRINT_NO_ARGS() ;
+    rewriter.create<AffineStoreOp>(loc, mulSqrt2ByNByY, alloc, ValueRange{ivY});
+    rewriter.setInsertionPointAfter(forOpY);
+
+    // get Y0 multiplied by sqrt(2)
+    Value constantIndx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    Value GetY0 = rewriter.create<AffineLoadOp>(
+        loc, alloc, /* iv */ ValueRange{constantIndx0});
+    Value valSqrt2 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(sqrt2));
+    Value Y0MulSqrt2 = rewriter.create<arith::DivFOp>(loc, GetY0, valSqrt2);
+    rewriter.create<AffineStoreOp>(loc, Y0MulSqrt2, alloc,
+                                   ValueRange{constantIndx0});
+
+    // debug
+    //  forOpX->dump();
+    //  forOpY->dump();
+    //  affine.for %y = 0 to 4 {
+    //      affine.store %cst_3, %alloc[%y] : memref<4xf64>
+    //      affine.store %cst_3, %alloc_img[%y] : memref<4xf64>
+    //  }
+
+    // affine.for %y = 0 to 4 {
+    // //   %0 = affine.load %alloc_3[%arg0] : memref<4xf64>
+    // //   affine.store %0, %alloc[%arg0] : memref<4xf64>
+    // affine.for %x = 0 to 4 {
+    //     // CAcluations
+    //           %1 = affine.load %alloc_3[%x] : memref<4xf64>
+    //           %2 = affine.load %alloc[%y] : memref<4xf64>
+    //           %3 = affine.load %alloc_img[%y] : memref<4xf64>
+    //           // index cast for multiply
+    //           %4 = arith.index_castui %y : index to i32
+    //           %k = arith.uitofp %4 : i32 to f64
+    //           %6 = arith.index_castui %x : index to i32
+    //           %i = arith.uitofp %6 : i32 to f64
+    //         //   %8 = arith.index_castui %arg3 : index to i32
+    //         //   %9 = arith.uitofp %8 : i32 to f64
+    //         //   %10 = arith.index_castui %arg4 : index to i32
+    //         //   %11 = arith.uitofp %10 : i32 to f64
+
+    //           %mul_1 = arith.mulf %i, %k : f64
+    //           %mul = arith.mulf %mul_1, %cst_2pi : f64
+    //         //  ixk / N
+    //           %div = arith.divf %mul, %N : f64
+    //         //   cos of the above
+    //           %res_cos = math.cos %div : f64
+    //         //   %16 = arith.addf %14, %15 : f64
+    //         //   %res_sin = arith.mulf %16, %cst_0 : f64
+
+    //           %res_sin = math.sin %div : f64
+    //           %real_prod = arith.mulf %1, %res_cos : f64
+    //           %img_prod_1 = arith.mulf %1, %res_sin : f64
+    //           %img_prod = arith.mulf %cst_5, %img_prod_1 : f64
+
+    //           %real = arith.addf %2, %real_prod : f64
+    //           %img = arith.addf %3, %img_prod : f64
+    //           affine.store %real, %alloc[%y] : memref<4xf64>
+    //         //    dsp.print %alloc : memref<4xf64>
+    //           affine.store %img, %alloc_img[%y] : memref<4xf64>
+
+    // }
+    // }
+    rewriter.replaceOp(op, alloc);
+    // rewriter.replaceOp(op, ValueRange{alloc,alloc_img});
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: HammingWindowOp operations
+//===----------------------------------------------------------------------===//
+
+struct HammingWindowOpLowering : public ConversionPattern {
+  HammingWindowOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::HammingWindowOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // Pseudo-code:
+    //   y[k] = 0.54 - 0.46 cos(2 *pi * k/N-1) , 0<=n<N
+    // DEBUG_PRINT_NO_ARGS() ;
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    // llvm::errs() << "tensorType " << tensorType.get;
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
+
+    // For loop -- iterate from 1 to last
+    // DEBUG_PRINT_NO_ARGS();
+    int64_t lb = 0;
+    int64_t ub = tensorType.getShape()[0];
+    int64_t step = 1;
+
+    // DEBUG_PRINT_NO_ARGS();
+    // get constants -- 0.54 & 0.46
+    Value constant0_54 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0.54));
+    Value constant0_46 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0.46));
+    Value const2pi = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(6.28318530718));
+
+    // loop for Y
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto ivY = forOpY.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpY.getBody());
+    // convert index to f64
+    Value IndxY = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivY);
+    Value k =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxY);
+
+    // get 2*pi * k / (N -1)
+    Value mul2pi_k = rewriter.create<arith::MulFOp>(loc, const2pi, k);
+
+    // getOperand().getType()
+    // auto inputTensorType =
+    // llvm::cast<RankedTensorType>(op->getOperand(0).getType());
+    float LengthOfInput = (float)ub;
+    Value NMinus1 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(),
+        rewriter.getF64FloatAttr(LengthOfInput - 1));
+
+    Value divIndxByNMinus1 =
+        rewriter.create<arith::DivFOp>(loc, mul2pi_k, NMinus1);
+
+    // get cos(2*pi * k/(N-1)
+    Value GetCos = rewriter.create<math::CosOp>(loc, divIndxByNMinus1);
+    Value MulCos0_46 =
+        rewriter.create<arith::MulFOp>(loc, constant0_46, GetCos);
+    Value Sub0_54_Cos =
+        rewriter.create<arith::SubFOp>(loc, constant0_54, MulCos0_46);
+    rewriter.create<AffineStoreOp>(loc, Sub0_54_Cos, alloc, ValueRange{ivY});
+    // DEBUG_PRINT_NO_ARGS();
+    rewriter.setInsertionPointAfter(forOpY);
+    // debug
+    //  forOpX->dump();
+    //  forOpY->dump();
+
+    // %cst = arith.constant 6.2831853071800001 : f64
+    // %cst_0 = arith.constant 4.600000e-01 : f64
+    // %cst_1 = arith.constant 5.400000e-01 : f64
+    // %cst_2 = arith.constant 4.000000e+00 : f64
+    // %alloc = memref.alloc() : memref<4xf64>
+    // %alloc_3 = memref.alloc() : memref<f64>
+    // affine.store %cst_2, %alloc_3[] : memref<f64>
+    // affine.for %arg0 = 0 to 4 {
+    //   %0 = arith.index_castui %arg0 : index to i32
+    //   %1 = arith.uitofp %0 : i32 to f64
+    //   %2 = arith.mulf %1, %cst : f64
+    //   %3 = arith.divf %2, %cst_2 : f64
+    //   %4 = math.cos %3 : f64
+    //   %5 = arith.mulf %4, %cst_0 : f64
+    //   %6 = arith.subf %cst_1, %5 : f64
+    //   affine.store %6, %alloc[%arg0] : memref<4xf64>
+    // }
+
+    // }
+    // }
+    rewriter.replaceOp(op, alloc);
+    // rewriter.replaceOp(op, ValueRange{alloc,alloc_img});
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: IFFT1DOp operations
+//===----------------------------------------------------------------------===//
+
+struct IFFT1DOpLowering : public ConversionPattern {
+  IFFT1DOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::IFFT1DOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // Pseudo-code:
+    //   y[k] = y_real[k] + j *y_img[k]
+    //  y_real = sumOver_n(x[k]*cos[2*pi * k *n/N ]
+    //  y_img = sumOver_n(x[k]*sin[2*pi * k *n/N ]
+    //  here, x[k] is complex ie, x_real[k] + x_complex[k]
+    // so, y[k] = sumOver_n(x[k]e^(2*pi * k *n/N))
+    //  ==>   = x_real[k]cos(2*pi * k *n/N) - x_complex[k]sin(2*pi * k *n/N)
+
+    // init  output mem for y_real
+    // iterate for output from k=0 to last
+    // iterate for all x from n=0 to last
+    // perform the calculations : ie x_real[k]cos(2*pi * k *n/N) -
+    // x_complex[k]sin(2*pi * k *n/N) and sum and store them at y[k]
+    //
+
+    // DEBUG_PRINT_NO_ARGS();
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    // iterate to result1 --not needed for now but for future reference
+    //  DEBUG_PRINT_NO_ARGS() ;
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc_real = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
+
+    // affine.for %y = 0 to 4 {
+    //     affine.store %cst_3, %alloc_real[%y] : memref<4xf64>
+    //     affine.store %cst_3, %alloc_img[%y] : memref<4xf64>
+    // }
+    Value constant0 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+
+    // DEBUG_PRINT_NO_ARGS();
+    // For loop -- iterate from 0 to last
+    int64_t lb = 0;
+    int64_t ub = tensorType.getShape()[0];
+    int64_t step = 1;
+
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto iv = forOp1.getInductionVar();
+    rewriter.setInsertionPointToStart(forOp1.getBody());
+    rewriter.create<AffineStoreOp>(loc, constant0, alloc_real, ValueRange{iv});
+    rewriter.setInsertionPointAfter(forOp1);
+
+    // loop for Y
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto ivY = forOpY.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpY.getBody());
+
+    // loop for X
+    affine::AffineForOp forOpX =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto ivX = forOpX.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpX.getBody());
+
+    // load from X, & y1 & y2
+    IFFT1DOpAdaptor ifft1DAdaptor(operands);
+    Value inputReal = rewriter.create<AffineLoadOp>(
+        loc, ifft1DAdaptor.getReal(), ValueRange{ivX});
+    Value loadYReal =
+        rewriter.create<AffineLoadOp>(loc, alloc_real, ValueRange{ivY});
+
+    // convert index to f64
+    Value IndxY = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivY);
+    Value k =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxY);
+
+    Value IndxX = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivX);
+    Value i =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxX);
+
+    // get 2*pi * k * i / N
+    Value muli_k = rewriter.create<arith::MulFOp>(loc, k, i);
+
+    Value const2pi = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(6.28318530718));
+    Value mul2piKI = rewriter.create<arith::MulFOp>(loc, const2pi, muli_k);
+
+    // getOperand().getType()
+    // auto inputTensorType =
+    // llvm::cast<RankedTensorType>(op->getOperand(0).getType());
+    float LengthOfInput = (float)ub;
+    Value N = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(LengthOfInput));
+    // Value N = inputTensorType.getShape()[0];
+
+    Value divIndxByN = rewriter.create<arith::DivFOp>(loc, mul2piKI, N);
+
+    // Real Cos part = x_real[i] * cos(div)
+    Value GetCos = rewriter.create<math::CosOp>(loc, divIndxByN);
+    Value xMulCos = rewriter.create<arith::MulFOp>(loc, inputReal, GetCos);
+
+    // Real Sin part =  x_complex[i] * sin(div)
+    Value inputImg = rewriter.create<AffineLoadOp>(loc, ifft1DAdaptor.getImg(),
+                                                   ValueRange{ivX});
+    Value GetSin = rewriter.create<math::SinOp>(loc, divIndxByN);
+    Value xMulSin = rewriter.create<arith::MulFOp>(loc, inputImg, GetSin);
+
+    // Get real Ans = x_real[i] * cos(div) - x_complex[i] * sin(div)
+    // Then sum over real_Ans by loading YReal
+    Value realAns = rewriter.create<arith::SubFOp>(loc, xMulCos, xMulSin);
+    Value realSum = rewriter.create<arith::AddFOp>(loc, loadYReal, realAns);
+    rewriter.create<AffineStoreOp>(loc, realSum, alloc_real, ValueRange{ivY});
+
+    // x[n-1]
+    // DEBUG_PRINT_NO_ARGS();
+    // Value xMinusPrevX = rewriter.create<arith::SubFOp>(loc, inputX ,PrevX );
+
+    rewriter.setInsertionPointAfter(forOpX);
+    // Calculate y[k] = 1/N * y[k]
+    Value loadY =
+        rewriter.create<AffineLoadOp>(loc, alloc_real, ValueRange{ivY});
+    // float LengthOfInput = (float) ub;
+    Value N1 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(LengthOfInput));
+    Value SumDivByN = rewriter.create<arith::DivFOp>(loc, loadY, N1);
+    rewriter.create<AffineStoreOp>(loc, SumDivByN, alloc_real, ValueRange{ivY});
+
+    rewriter.setInsertionPointAfter(forOpY);
+    // debug
+    //  forOpX->dump();
+    //  forOpY->dump();
+    //  affine.for %y = 0 to 4 {
+    //      affine.store %cst_3, %alloc_real[%y] : memref<4xf64>
+    //      affine.store %cst_3, %alloc_img[%y] : memref<4xf64>
+    //  }
+
+    // affine.for %y = 0 to 4 {
+    // //   %0 = affine.load %alloc_3[%arg0] : memref<4xf64>
+    // //   affine.store %0, %alloc_real[%arg0] : memref<4xf64>
+    // affine.for %x = 0 to 4 {
+    //     // CAcluations
+    //           %1 = affine.load %alloc_3[%x] : memref<4xf64>
+    //           %2 = affine.load %alloc_real[%y] : memref<4xf64>
+    //           %3 = affine.load %alloc_img[%y] : memref<4xf64>
+    //           // index cast for multiply
+    //           %4 = arith.index_castui %y : index to i32
+    //           %k = arith.uitofp %4 : i32 to f64
+    //           %6 = arith.index_castui %x : index to i32
+    //           %i = arith.uitofp %6 : i32 to f64
+    //         //   %8 = arith.index_castui %arg3 : index to i32
+    //         //   %9 = arith.uitofp %8 : i32 to f64
+    //         //   %10 = arith.index_castui %arg4 : index to i32
+    //         //   %11 = arith.uitofp %10 : i32 to f64
+
+    //           %mul_1 = arith.mulf %i, %k : f64
+    //           %mul = arith.mulf %mul_1, %cst_2pi : f64
+    //         //  ixk / N
+    //           %div = arith.divf %mul, %N : f64
+    //         //   cos of the above
+    //           %res_cos = math.cos %div : f64
+    //         //   %16 = arith.addf %14, %15 : f64
+    //         //   %res_sin = arith.mulf %16, %cst_0 : f64
+
+    //           %res_sin = math.sin %div : f64
+    //           %real_prod = arith.mulf %1, %res_cos : f64
+    //           %img_prod_1 = arith.mulf %1, %res_sin : f64
+    //           %img_prod = arith.mulf %cst_5, %img_prod_1 : f64
+
+    //           %real = arith.addf %2, %real_prod : f64
+    //           %img = arith.addf %3, %img_prod : f64
+    //           affine.store %real, %alloc_real[%y] : memref<4xf64>
+    //         //    dsp.print %alloc_real : memref<4xf64>
+    //           affine.store %img, %alloc_img[%y] : memref<4xf64>
+
+    // }
+    // }
+    rewriter.replaceOp(op, alloc_real);
+    // rewriter.replaceOp(op, ValueRange{alloc_real,alloc_img});
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: FFT1D operations
+//===----------------------------------------------------------------------===//
+
+struct FFT1DOpLowering : public ConversionPattern {
+  FFT1DOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::FFT1DOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // Pseudo-code:
+    //   y[k] = y_real[k] + j *y_img[k]
+    //  y_real = sumOver_n(x[n]*cos[2*pi * k *n/N ]
+    //  y_img = sumOver_n(x[n]*sin[2*pi * k *n/N ] * -1
+    // init  output mem for y_real & y_img as 0
+    // iterate for output from k=0 to last
+    // iterate for all x from n=0 to last
+    // perform the calculations : ie x[n] * cos[2*pi * k *n/N ] and sum and
+    // store them at y[k]
+    //
+    // replace this upsampling op with the output_mem_allocation op
+
+    // DEBUG_PRINT_NO_ARGS() ;
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    // iterate to result1 --not needed for now but for future reference
+    //  auto tensorType1 =
+    //  llvm::cast<RankedTensorType>(*std::next(op->result_type_begin(), 1));
+
+    // DEBUG_PRINT_NO_ARGS() ;
+    // tensorType.getShape()[0]
+    // llvm::errs() << "tensorType1.getShape()[0] " << tensorType1.getShape()[0]
+    // << " func= " << __func__ << "\n";
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    // auto memRefType2 = convertTensorToMemRef(tensorType1);
+    auto alloc_real = insertAllocAndDealloc(memRefType, loc, rewriter);
+    auto alloc_img = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
+
+    // affine.for %y = 0 to 4 {
+    //     affine.store %cst_3, %alloc_real[%y] : memref<4xf64>
+    //     affine.store %cst_3, %alloc_img[%y] : memref<4xf64>
+    // }
+    Value constant0 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+
+    // For loop -- iterate from 1 to last
+    int64_t lb = 0;
+    int64_t ub = tensorType.getShape()[0];
+    int64_t step = 1;
+
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto iv = forOp1.getInductionVar();
+    rewriter.setInsertionPointToStart(forOp1.getBody());
+    rewriter.create<AffineStoreOp>(loc, constant0, alloc_real, ValueRange{iv});
+    rewriter.create<AffineStoreOp>(loc, constant0, alloc_img, ValueRange{iv});
+    rewriter.setInsertionPointAfter(forOp1);
+
+    // loop for Y
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto ivY = forOpY.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpY.getBody());
+
+    // loop for X
+    affine::AffineForOp forOpX =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto ivX = forOpX.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpX.getBody());
+
+    // load from X, & y1 & y2
+    FFT1DOpAdaptor fft1DAdaptor(operands);
+    Value inputX = rewriter.create<AffineLoadOp>(loc, fft1DAdaptor.getInput(),
+                                                 ValueRange{ivX});
+    Value loadYReal =
+        rewriter.create<AffineLoadOp>(loc, alloc_real, ValueRange{ivY});
+    Value loadYImg =
+        rewriter.create<AffineLoadOp>(loc, alloc_img, ValueRange{ivY});
+
+    // convert index to f64
+    Value IndxY = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivY);
+    Value k =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxY);
+
+    Value IndxX = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivX);
+    Value i =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxX);
+
+    // get 2*pi * k * i / N
+    Value muli_k = rewriter.create<arith::MulFOp>(loc, k, i);
+
+    Value const2pi = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(6.28318530718));
+    Value mul2piKI = rewriter.create<arith::MulFOp>(loc, const2pi, muli_k);
+
+    // getOperand().getType()
+    // auto inputTensorType =
+    // llvm::cast<RankedTensorType>(op->getOperand(0).getType());
+    float LengthOfInput = (float)ub;
+    Value N = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(LengthOfInput));
+    // Value N = inputTensorType.getShape()[0];
+
+    Value divIndxByN = rewriter.create<arith::DivFOp>(loc, mul2piKI, N);
+
+    // Real part = Sum(x[i] * cos(div) )
+    Value GetCos = rewriter.create<math::CosOp>(loc, divIndxByN);
+    Value xMulCos = rewriter.create<arith::MulFOp>(loc, inputX, GetCos);
+    Value realSum = rewriter.create<arith::AddFOp>(loc, loadYReal, xMulCos);
+    rewriter.create<AffineStoreOp>(loc, realSum, alloc_real, ValueRange{ivY});
+
+    // Img part = -1 * Sum(x[i] * sin(div) )
+    Value GetSin = rewriter.create<math::SinOp>(loc, divIndxByN);
+    Value xMulSin = rewriter.create<arith::MulFOp>(loc, inputX, GetSin);
+    Value imgSum = rewriter.create<arith::SubFOp>(loc, loadYImg, xMulSin);
+
+    // Value constMinus1 = rewriter.create<arith::ConstantOp>(loc,
+    // rewriter.getF64Type(),
+    //                                                      rewriter.getF64FloatAttr(-1));
+    // Value NegImgSum = rewriter.create<arith::MulFOp>(loc, constMinus1 ,
+    // imgSum);
+    rewriter.create<AffineStoreOp>(loc, imgSum, alloc_img, ValueRange{ivY});
+    // x[n-1]
+    //  DEBUG_PRINT_NO_ARGS() ;
+    //  Value xMinusPrevX = rewriter.create<arith::SubFOp>(loc, inputX ,PrevX );
+
+    rewriter.setInsertionPointAfter(forOpX);
+    // forOpX->dump();
+    // rewriter.create<AffineYieldOp>(loc, ValueRange{alloc_real, alloc_img});
+    rewriter.setInsertionPointAfter(forOpY);
+    // debug
+    //  forOpX->dump();
+    //  forOpY->dump();
+    //  affine.for %y = 0 to 4 {
+    //      affine.store %cst_3, %alloc_real[%y] : memref<4xf64>
+    //      affine.store %cst_3, %alloc_img[%y] : memref<4xf64>
+    //  }
+
+    // affine.for %y = 0 to 4 {
+    // //   %0 = affine.load %alloc_3[%arg0] : memref<4xf64>
+    // //   affine.store %0, %alloc_real[%arg0] : memref<4xf64>
+    // affine.for %x = 0 to 4 {
+    //     // CAcluations
+    //           %1 = affine.load %alloc_3[%x] : memref<4xf64>
+    //           %2 = affine.load %alloc_real[%y] : memref<4xf64>
+    //           %3 = affine.load %alloc_img[%y] : memref<4xf64>
+    //           // index cast for multiply
+    //           %4 = arith.index_castui %y : index to i32
+    //           %k = arith.uitofp %4 : i32 to f64
+    //           %6 = arith.index_castui %x : index to i32
+    //           %i = arith.uitofp %6 : i32 to f64
+    //         //   %8 = arith.index_castui %arg3 : index to i32
+    //         //   %9 = arith.uitofp %8 : i32 to f64
+    //         //   %10 = arith.index_castui %arg4 : index to i32
+    //         //   %11 = arith.uitofp %10 : i32 to f64
+
+    //           %mul_1 = arith.mulf %i, %k : f64
+    //           %mul = arith.mulf %mul_1, %cst_2pi : f64
+    //         //  ixk / N
+    //           %div = arith.divf %mul, %N : f64
+    //         //   cos of the above
+    //           %res_cos = math.cos %div : f64
+    //         //   %16 = arith.addf %14, %15 : f64
+    //         //   %res_sin = arith.mulf %16, %cst_0 : f64
+
+    //           %res_sin = math.sin %div : f64
+    //           %real_prod = arith.mulf %1, %res_cos : f64
+    //           %img_prod_1 = arith.mulf %1, %res_sin : f64
+    //           %img_prod = arith.mulf %cst_5, %img_prod_1 : f64
+
+    //           %real = arith.addf %2, %real_prod : f64
+    //           %img = arith.addf %3, %img_prod : f64
+    //           affine.store %real, %alloc_real[%y] : memref<4xf64>
+    //         //    dsp.print %alloc_real : memref<4xf64>
+    //           affine.store %img, %alloc_img[%y] : memref<4xf64>
+
+    // }
+    // }
+    // rewriter.replaceOp(op, alloc_real);
+    rewriter.replaceOp(op, ValueRange{alloc_real, alloc_img});
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: HighPassFilter operations
+//===----------------------------------------------------------------------===//
+
+struct HighPassFilterOpLowering : public ConversionPattern {
+  HighPassFilterOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::HighPassFilterOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // Pseudo-code:
+    // init first value of output with first value of input: y[0] = x[0]
+    // iterate for output from 1st to last
+    // y[i] = x[i] - x[i -1 ]
+    //  replace this upsampling op with the output_mem_allocation op
+
+    // DEBUG_PRINT_NO_ARGS();
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
+
+    // Init y for the first index ie, index0
+    Value constantIndx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+
+    HighPassFilterOpAdaptor highPassFilterAdaptor(operands);
+    Value GetInputX0 =
+        rewriter.create<AffineLoadOp>(loc, highPassFilterAdaptor.getInput(),
+                                      /* iv */ ValueRange{constantIndx0});
+    rewriter.create<AffineStoreOp>(loc, GetInputX0, alloc,
+                                   ValueRange{constantIndx0});
+
+    // For loop -- iterate from 1 to last
+    int64_t lb = 1;
+    int64_t ub = tensorType.getShape()[0];
+    int64_t step = 1;
+
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto iv = forOp1.getInductionVar();
+
+    rewriter.setInsertionPointToStart(forOp1.getBody());
+
+    // For affine expression: #map1 = affine_map<(%arg0)[] : (%arg0 - 1)
+    AffineExpr d0, s0;
+    bindDims(rewriter.getContext(), d0);
+    AffineExpr ExprForPrevX = d0 - 1;
+    AffineMap addMapForHighPassFilter = AffineMap::get(1, 0, ExprForPrevX);
+
+    // x[n-1]
+    // DEBUG_PRINT_NO_ARGS();
+    Value PrevX = rewriter.create<AffineLoadOp>(
+        loc, highPassFilterAdaptor.getInput(), addMapForHighPassFilter,
+        ValueRange{iv}); // memRefType
+    // PrevX.dump();
+    Value inputX = rewriter.create<AffineLoadOp>(
+        loc, highPassFilterAdaptor.getInput(), ValueRange{iv});
+
+    // get y[i] = x[i] - x[i -1 ]
+    Value xMinusPrevX = rewriter.create<arith::SubFOp>(loc, inputX, PrevX);
+    // Value cosRes = rewriter.create<math::CosOp>(loc, xMinusPrevX);
+    rewriter.create<AffineStoreOp>(
+        loc, xMinusPrevX, alloc,
+        ValueRange{iv}); // PrevX //AddmulAlphaXAndPreYAlphaMinus1
+
+    rewriter.setInsertionPointAfter(forOp1);
+    // debug
+    //  forOp1->dump();
+    //  init first value of output with first value of input: y[0] = x[0]
+    //  iterate for output from 1st to last
+    //  y[i] = x[i] - x[i -1 ]
+    //  replace this upsampling op with the output_mem_allocation op
+    //   %indx0 = arith.constantIndex 0 : index
+    //  %0 = affine.load in[indx0 ] : f64
+    //   affine.store %0 ,out[indx0]
+    //  affine.for %arg0 = 1 to len_y {
+    //     #map1 = affine_map<(%arg0)[] : (%arg0 - 1)
+    //     %1 = affine.load in[#map1]
+    //      %load_in = affine.load in[%arg0]
+    //      %2 = arith.subf %const1 , alpha
+    //      affine.store %2, out[%arg0]
+    // }
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: LowPassFilter operations
+//===----------------------------------------------------------------------===//
+
+struct LowPassFilter1stOrderOpLowering : public ConversionPattern {
+  LowPassFilter1stOrderOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::LowPassFilter1stOrderOp::getOperationName(), 1,
+                          ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // Pseudo-code:
+    // init first value of output with first value of input: y[0] = x[0]
+    // iterate for output from 1st to last
+    // y[i] = (1 - alpha) * y[i-1] + alpha * x[i]
+    //  replace this upsampling op with the output_mem_allocation op
+
+    // DEBUG_PRINT_NO_ARGS() ;
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
+
+    // Init y for the first index ie, index0
+    Value constantIndx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+
+    LowPassFilter1stOrderOpAdaptor lowPassFilterAdaptor(operands);
+    Value GetInputX0 = rewriter.create<AffineLoadOp>(
+        loc, lowPassFilterAdaptor.getLhs(), /* iv */ ValueRange{constantIndx0});
+    rewriter.create<AffineStoreOp>(loc, GetInputX0, alloc,
+                                   ValueRange{constantIndx0});
+
+    // For loop -- iterate from 1 to last
+    int64_t lb = 1;
+    int64_t ub = tensorType.getShape()[0];
+    int64_t step = 1;
+
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto iv = forOp1.getInductionVar();
+
+    rewriter.setInsertionPointToStart(forOp1.getBody());
+
+    // For affine expression: #map1 = affine_map<(%arg0)[] : (%arg0 - 1)
+    AffineExpr d0, s0;
+    bindDims(rewriter.getContext(), d0);
+    AffineExpr ExprForPrevY = d0 - 1;
+    AffineMap addMapForLowPassFilter = AffineMap::get(1, 0, ExprForPrevY);
+
+    // y[n-1]
+    //  DEBUG_PRINT_NO_ARGS() ;
+    //  Value PrevY = rewriter.create<AffineLoadOp>(loc,
+    //  lowPassFilterAdaptor.getLhs(), addMapForLowPassFilter,
+    //                ValueRange{iv});
+    //  Value PrevY = rewriter.create<AffineLoadOp>(loc,
+    //  (*op->result_type_begin()), addMapForLowPassFilter,
+    //                ValueRange{iv}); //memRefType
+    Value PrevY = rewriter.create<AffineLoadOp>(
+        loc, alloc, addMapForLowPassFilter, ValueRange{iv}); // memRefType
+    // PrevY.dump();
+    Value constant1 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1));
+    // Value alpha = lowPassFilterAdaptor.getRhs(); //op->getOperand(1);
+    Value alpha = rewriter.create<AffineLoadOp>(
+        loc, lowPassFilterAdaptor.getRhs(), /* iv */ ValueRange{});
+    // get y[n] = (1- alpha ) * y[n-1] + alpha * x[n]
+    Value oneMinusAlpha = rewriter.create<arith::SubFOp>(loc, constant1, alpha);
+    Value mulPrevYAlphaMinus1 =
+        rewriter.create<arith::MulFOp>(loc, oneMinusAlpha, PrevY);
+
+    Value inputX = rewriter.create<AffineLoadOp>(
+        loc, lowPassFilterAdaptor.getLhs(), ValueRange{iv});
+    Value mulAlphaX = rewriter.create<arith::MulFOp>(loc, alpha, inputX);
+
+    Value AddmulAlphaXAndPreYAlphaMinus1 =
+        rewriter.create<arith::AddFOp>(loc, mulPrevYAlphaMinus1, mulAlphaX);
+    // DEBUG_PRINT_NO_ARGS() ;
+    // AddmulAlphaXAndPreYAlphaMinus1.dump();
+    // forOp1->dump();
+
+    rewriter.create<AffineStoreOp>(
+        loc, AddmulAlphaXAndPreYAlphaMinus1, alloc,
+        ValueRange{iv}); // PrevY //AddmulAlphaXAndPreYAlphaMinus1
+
+    rewriter.setInsertionPointAfter(forOp1);
+    // debug
+    //  forOp1->dump();
+    //  init first value of output with first value of input: y[0] = x[0]
+    //  iterate for output from 1st to last
+    //  y[i] = (1 - alpha) * y[i-1] + alpha * x[i]
+    //  replace this upsampling op with the output_mem_allocation op
+    //   %indx0 = arith.constantIndex 0 : index
+    //  %0 = affine.load in[indx0 ] : f64
+    //   affine.store %0 ,out[indx0]
+    //  affine.for %arg0 = 1 to len_y {
+    //     #map1 = affine_map<(%arg0)[] : (%arg0 - 1)
+    //     %1 = affine.load out[#map1]
+    //      %2 = arith.subf %const1 , alpha
+    //      %3 = arith.mulf %2 , %1
+
+    //      %load_in = affine.load in[%arg0]
+    //      %4 = arith.mulf alpha, %load_in
+    //      %5 = arith.addf %4, %3
+    //      affine.store %5, out[%arg0]
+    // }
+    //   %2ndOperand = arith.const 3 : f64
+    //   affine.for %arg0 = 0 to input_len {
+    //      %elem1 = affine.load input[%arg0] <-- affine apply
+    //      #map1 = affine_map<(%arg0)[2ndOperand] : (%arg0 * 2ndOperand)
+    //
+    //      affine.store %elem1, out[#map1]
+    // }
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: Upsampling operations
+//===----------------------------------------------------------------------===//
+
+struct UpSamplingOpLowering : public ConversionPattern {
+  UpSamplingOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::UpsamplingOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // Pseudo-code:
+    // init all out values with 0 using affine loop
+    // Update certain y_values with corresponding x
+    // iterate for input : i = 0 to len
+    // get the corresponding output mapping index = M * i
+    //  store in y at that index
+    //  replace this upsampling op with the output_mem_allocation op
+
+    // DEBUG_PRINT_NO_ARGS() ;
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
+
+    // For loop
+    int64_t lb = 0;
+    int64_t ub = tensorType.getShape()[0];
+    int64_t step = 1;
+
+    // init all the output mem location with 0
+    affine::AffineForOp forOpSetOut0Loop =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto ivforOpSetOut0Loop = forOpSetOut0Loop.getInductionVar();
+
+    rewriter.setInsertionPointToStart(forOpSetOut0Loop.getBody());
+    Value constant0 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    // store the result
+    rewriter.create<AffineStoreOp>(loc, constant0, alloc, ivforOpSetOut0Loop);
+    rewriter.setInsertionPointAfter(forOpSetOut0Loop);
+
+    Value upsampling2ndArg = op->getOperand(1);
+    UpsamplingOpAdaptor upsamplingAdaptor(operands);
+    auto inputType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(0).getType());
+    int64_t ub2 = inputType.getShape()[0]; // tensorType.getShape()[0];
+    // create another for loop for updating corresponding y with x
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb, ub2, step);
+    auto iv = forOp1.getInductionVar();
+
+    rewriter.setInsertionPointToStart(forOp1.getBody());
+    // Load input elem
+
+    Value elemIn =
+        rewriter.create<AffineLoadOp>(loc, upsamplingAdaptor.getLhs(), iv);
+
+    // Value elemIn = rewriter.create<AffineLoadOp>(loc,
+    // upsamplingAdaptor.getLhs(), addMapForUpSampling,
+    //               ValueRange{iv,constantSamplingRateIndx});
+
+    // For affine expression: #map1 = affine_map<(%arg0)[2ndOperand] : (%arg0 *
+    // 2ndOperand)
+    AffineExpr d0, s0;
+    bindDims(rewriter.getContext(), d0);
+    bindSymbols(rewriter.getContext(), s0);
+
+    // AffineExpr ExprForUpSampling = rewriter.getAffineDimExpr(0) *
+    // rewriter.getAffineSymbolExpr(0);
+    AffineExpr ExprForUpSampling = d0 * s0;
+    // Value constant3 = rewriter.create<arith::ConstantOp>(loc,
+    // rewriter.getI64Type(),
+    // rewriter.getIntegerAttr(rewriter.getIntegerType(64), 3));
+    Value constant3 =
+        rewriter.create<arith::ConstantIndexOp>(loc, 3); // working
+    constant3.dump();
+
+    int64_t SecondValueInt = 1;
+
+    dsp::ConstantOp constantOp2ndArg =
+        upsampling2ndArg.getDefiningOp<dsp::ConstantOp>();
+    DenseElementsAttr constantRhsValue = constantOp2ndArg.getValue();
+    ;
+    auto elements = constantRhsValue.getValues<FloatAttr>();
+    float SecondValue = elements[0].getValueAsDouble();
+    SecondValueInt = (int64_t)SecondValue;
+
+    // Value downSamplingRateAsIndex = rewriter.create<arith::IndexCastOp>(loc,
+    // rewriter.getIndexType(),UpsamplingRate);
+    Value constantSamplingRateIndx =
+        rewriter.create<arith::ConstantIndexOp>(loc, SecondValueInt);
+    constantSamplingRateIndx.dump();
+
+    AffineMap addMapForUpSampling = AffineMap::get(1, 1, ExprForUpSampling);
+
+    // DEBUG_PRINT_NO_ARGS() ;
+    // Value elem2 = rewriter.create<AffineLoadOp>(loc,
+    // upsamplingAdaptor.getLhs(), addMapForUpSampling,
+    //               ValueRange{iv,constantSamplingRateIndx});
+    // elem2.dump();
+    // store the result
+    rewriter.create<AffineStoreOp>(loc, elemIn, alloc, addMapForUpSampling,
+                                   ValueRange{iv, constantSamplingRateIndx});
+
+    rewriter.setInsertionPointAfter(forOp1);
+    // debug
+    //  forOp1->dump();
+    //    %0 = arith.const 0 : f64
+    //    affine.for %arg0 = 0 to out_y {
+    //       affine.store %0, out[%arg0]
+    //  }
+    //    %2ndOperand = arith.const 3 : f64
+    //    affine.for %arg0 = 0 to input_len {
+    //       %elem1 = affine.load input[%arg0] <-- affine apply
+    //       #map1 = affine_map<(%arg0)[2ndOperand] : (%arg0 * 2ndOperand)
+    //
+    //       affine.store %elem1, out[#map1]
+    //  }
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: Downsampling operations
+//===----------------------------------------------------------------------===//
+
+struct DownSamplingOpLowering : public ConversionPattern {
+  DownSamplingOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::DownsamplingOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // Pseudo-code:
+    // iterate for output len : i = 0 to len
+    // get the input elem using  input mapping index = M* i
+    //  store in y
+    //  replace this op with the output_mem
+
+    // DEBUG_PRINT_NO_ARGS() ;
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
+
+    // For loop
+    int64_t lb = 0;
+    int64_t ub = tensorType.getShape()[0];
+    int64_t step = 1;
+
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto iv = forOp1.getInductionVar();
+
+    rewriter.setInsertionPointToStart(forOp1.getBody());
+    DownsamplingOpAdaptor downsamplingAdaptor(operands);
+
+    // For affine expression: #map1 = affine_map<(%arg0)[2ndOperand] : (%arg0 *
+    // 2ndOperand)
+    AffineExpr d0, s0;
+    bindDims(rewriter.getContext(), d0);
+    bindSymbols(rewriter.getContext(), s0);
+
+    // AffineExpr ExprForDownSampling = rewriter.getAffineDimExpr(0) *
+    // rewriter.getAffineSymbolExpr(0);
+    AffineExpr ExprForDownSampling = d0 * s0;
+    // Value constant3 = rewriter.create<arith::ConstantOp>(loc,
+    // rewriter.getI64Type(),
+    // rewriter.getIntegerAttr(rewriter.getIntegerType(64), 3));
+    Value constant3 =
+        rewriter.create<arith::ConstantIndexOp>(loc, 3); // working
+    constant3.dump();
+
+    int64_t SecondValueInt = 1;
+    Value downsampling2ndArg = op->getOperand(1);
+    dsp::ConstantOp constantOp2ndArg =
+        downsampling2ndArg.getDefiningOp<dsp::ConstantOp>();
+    DenseElementsAttr constantRhsValue = constantOp2ndArg.getValue();
+    ;
+    auto elements = constantRhsValue.getValues<FloatAttr>();
+    float SecondValue = elements[0].getValueAsDouble();
+    SecondValueInt = (int64_t)SecondValue;
+
+    // Value downSamplingRateAsIndex = rewriter.create<arith::IndexCastOp>(loc,
+    // rewriter.getIndexType(),DownsamplingRate);
+    Value constantSamplingRateIndx =
+        rewriter.create<arith::ConstantIndexOp>(loc, SecondValueInt);
+    constantSamplingRateIndx.dump();
+
+    AffineMap addMapForDownSampling = AffineMap::get(1, 1, ExprForDownSampling);
+    // AffineMap addMapForDownSampling = AffineMap::get(1, 1, ValueRange{d0,s0
+    // }); AffineMap addMapForDownSampling = AffineMap::get(1, 1,
+    // ExprForDownSampling, rewriter.getContext()); AffineMap
+    // addMapForDownSampling = AffineMap::get(1, 0, { d0}); //Working
+    // DEBUG_PRINT_NO_ARGS() ;
+    Value elem2 = rewriter.create<AffineLoadOp>(
+        loc, downsamplingAdaptor.getLhs(), addMapForDownSampling,
+        ValueRange{iv, constantSamplingRateIndx});
+    elem2.dump();
+    // store the result
+    rewriter.create<AffineStoreOp>(loc, elem2, alloc, iv);
+
+    rewriter.setInsertionPointAfter(forOp1);
+    // debug
+    //  forOp1->dump();
+    //    %2ndOperand = arith.const 3 : f64
+    //    affine.for %arg0 = 0 to 10 {
+    //     #map1 = affine_map<(%arg0)[2ndOperand] : (%arg0 * 2ndOperand)
+    //     %elem1 = affine.load input[#map1] <-- affine apply
+    //     affine.store %elem1, out[%arg0]
+    //  }
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: MedianFilterOp operations
+//===----------------------------------------------------------------------===//
+
+struct MedianFilterOpLowering : public ConversionPattern {
+  MedianFilterOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::MedianFilterOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), 1);
+
+    // For loop
+    int64_t lb = 0;
+    int64_t ub = tensorType.getShape()[0];
+    int64_t step = 1;
+
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto iv = forOp1.getInductionVar();
+
+    rewriter.setInsertionPointToStart(forOp1.getBody());
+    MedianFilterOpAdaptor medianFilterOpAdaptor(operands);
+
+    Value elem1 = rewriter.create<AffineLoadOp>(
+        loc, medianFilterOpAdaptor.getInput(), iv);
+    AffineExpr ExprForElem2 =
+        rewriter.getAffineDimExpr(0) + rewriter.getAffineConstantExpr(1);
+    AffineExpr ExprForElem3 =
+        rewriter.getAffineDimExpr(0) + rewriter.getAffineConstantExpr(2);
+    AffineMap addMapForElem2 = AffineMap::get(1, 0, ExprForElem2);
+    AffineMap addMapForElem3 = AffineMap::get(1, 0, ExprForElem3);
+    Value elem2 = rewriter.create<AffineLoadOp>(
+        loc, medianFilterOpAdaptor.getInput(), addMapForElem2, ValueRange{iv});
+    Value elem3 = rewriter.create<AffineLoadOp>(
+        loc, medianFilterOpAdaptor.getInput(), addMapForElem3, ValueRange{iv});
+
+    // sum
+    Value sum1 = rewriter.create<arith::AddFOp>(loc, elem1, elem2);
+    Value sum = rewriter.create<arith::AddFOp>(loc, sum1, elem3);
+
+    // min
+    Value minElem1Elem2 = rewriter.create<arith::MinimumFOp>(loc, elem1, elem2);
+    Value min = rewriter.create<arith::MinimumFOp>(loc, minElem1Elem2, elem3);
+
+    // max
+    Value maxElem1Elem2 = rewriter.create<arith::MaximumFOp>(loc, elem1, elem2);
+    Value max = rewriter.create<arith::MaximumFOp>(loc, maxElem1Elem2, elem3);
+
+    // median
+    Value min_plus_max = rewriter.create<arith::AddFOp>(loc, min, max);
+    Value median = rewriter.create<arith::SubFOp>(loc, sum, min_plus_max);
+
+    // store in alloc
+    rewriter.create<AffineStoreOp>(loc, median, alloc, iv);
+    rewriter.setInsertionPointAfter(forOp1);
+    rewriter.replaceOp(op, alloc);
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: SlidingWindowAvg operations
+//===----------------------------------------------------------------------===//
+
+struct SlidingWindowAvgOpLowering : public ConversionPattern {
+  SlidingWindowAvgOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::SlidingWindowAvgOp::getOperationName(), 1, ctx) {
+  }
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // Pseudo-code:
+    // iterate for len = len - 2
+    // get 3 elements
+    // get the sum
+    // get the avg = sum / 3
+    //  store the result to output_mem
+    //  replace this op with the output_mem
+
+    // DEBUG_PRINT_NO_ARGS() ;
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
+
+    Value constant3 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(3));
+    // For loop
+    int64_t lb = 0;
+    int64_t ub = tensorType.getShape()[0];
+    int64_t step = 1;
+
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto iv = forOp1.getInductionVar();
+
+    rewriter.setInsertionPointToStart(forOp1.getBody());
+    SlidingWindowAvgOpAdaptor slidingWinAvgAdaptor(operands);
+
+    Value elem1 =
+        rewriter.create<AffineLoadOp>(loc, slidingWinAvgAdaptor.getInput(), iv);
+
+    // affine-maps for elem2 and elem3
+    AffineExpr ExprForElem2 =
+        rewriter.getAffineDimExpr(0) + rewriter.getAffineConstantExpr(1);
+    AffineExpr ExprForElem3 =
+        rewriter.getAffineDimExpr(0) + rewriter.getAffineConstantExpr(2);
+
+    AffineMap addMapForElem2 = AffineMap::get(1, 0, ExprForElem2);
+    AffineMap addMapForElem3 = AffineMap::get(1, 0, ExprForElem3);
+    Value elem2 = rewriter.create<AffineLoadOp>(
+        loc, slidingWinAvgAdaptor.getInput(), addMapForElem2, ValueRange{iv});
+    Value elem3 = rewriter.create<AffineLoadOp>(
+        loc, slidingWinAvgAdaptor.getInput(), addMapForElem3, ValueRange{iv});
+
+    Value sum1 = rewriter.create<arith::AddFOp>(loc, elem1, elem2);
+    Value sum2 = rewriter.create<arith::AddFOp>(loc, sum1, elem3);
+    Value avg = rewriter.create<arith::DivFOp>(loc, sum2, constant3);
+
+    // store the result
+    rewriter.create<AffineStoreOp>(loc, avg, alloc, iv);
+
+    rewriter.setInsertionPointAfter(forOp1);
+    // debug
+    //  forOp1->dump();
+    //    %cont3 = arith.const 3 : f64
+    //    affine.for %arg0 = 0 to 8 {
+    //     %elem1 = affine.load input[%arg0]
+    //     #map1 = affine_map<(%arg0)[] : (%arg0 + 1)
+    //     #map2 = affine_map<(%arg0)[] : (%arg0 + 2)
+    //     %elem2 = affine.load input[#map1] <-- affine apply
+    //     %elem3 = affine.load input[#map2]
+
+    //    %sum1 = arith.addf %elem1 , %elem2
+    //    %sum2 = arith.addf %sum1, %elem3
+    //    %res = arith.divf %sum2 ,
+    //    affine.store %sum2, out[%arg0]
+    // }
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: FIRFilterResponse operations
+//===----------------------------------------------------------------------===//
+struct FIRFilterResponseOpLowering : public ConversionPattern {
+  FIRFilterResponseOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::FIRFilterResponseOp::getOperationName(), 1,
+                          ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    // dsp.FIRFilterResponseOp has 2 operands -- both of type tensor f64
+
+    // Get the location of FIRFilterResponseOp
+    auto loc = op->getLoc();
+
+    // Pseudo-Code
+    //  y[n] = sum( h[k] * x[n-k]) k = 0 to lenOfh
+
+    // Range for each element of the output tensor -- i = %arg0
+    //   Create a tempValue = 0
+    //   Range for each of the elements of filter len -- k = %arg1
+    //   check for the condition that %arg0  - %arg1 >= 0 && < inputLen
+    //   get elem1 = filter[k] , elem2 = x[i-k]
+    //  use affine-map expression for calculating i-k
+    //   tempValue = tempValue + elem1 * elem2
+    // y[i] = tempValue
+
+    lowerOpToLoopsFIR(
+        op, operands, rewriter,
+        [loc, op](OpBuilder &builder, ValueRange memRefOperands,
+                  ValueRange loopIvs) {
+          // ValueRange loopIvs) {
+
+          // Generate an adaptor for the remapped operands of the
+          // BinaryOp. This allows for using the nice named accessors
+          // that are generated by the ODS.
+          dsp::FIRFilterResponseOpAdaptor firFilterAdaptor(memRefOperands);
+
+          // Generate loads for the element of 'lhs' and 'rhs' at the
+          // inner loop.
+          // auto lhsTensor = delayAdaptor.getLhs();
+          auto lhsTensor = builder.create<affine::AffineLoadOp>(
+              loc, firFilterAdaptor.getLhs(), loopIvs);
+
+          // auto rhsScalar = op->getOperand(1);
+          auto rhsScalar = builder.create<affine::AffineLoadOp>(
+              loc, firFilterAdaptor.getRhs(), loopIvs);
+
+          auto resultMulOp =
+              builder.create<arith::MulFOp>(loc, lhsTensor, rhsScalar);
+
+          return resultMulOp;
+        });
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: Delay operations
+//===----------------------------------------------------------------------===//
+struct DelayOpLowering : public ConversionPattern {
+  DelayOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::DelayOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    // dsp.DelayOp has 2 operands -- both of type tensor f64
+
+    // Get the location of delayop
+    auto loc = op->getLoc();
+
+    // Pseudo-code
+    // 2 affine loops --
+    // first from 0 to delay_2ndArg
+    //           here, inside AffineNest
+    //           create affine:load from the arith.const operation with value 0
+    //           use affine:store to store at result_op at indx
+    //
+    // 2nd from delay_2ndArg to lengthOfOperand0 of delayOp
+    //           here, inside AffineNest
+    //           create affine:load from input memref & indx = indx -
+    //           delay_2ndArg create affine:store at result_op indx
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
+
+    // For loop
+    int64_t ub = tensorType.getShape()[0];
+
+    // Get 2nd Arg
+    DelayOpAdaptor delayOpAdaptor(operands);
+
+    Value constant0 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    // DEBUG_PRINT_NO_ARGS();
+    // Creating SSA values for the lower bound and upper bound
+    Value lowerBound = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getIndexType(),
+        rewriter.getIntegerAttr(rewriter.getIndexType(), 0));
+    // Cast the f64 value directly to the index type
+    Value inputUnit = rewriter.create<AffineLoadOp>(
+        loc, delayOpAdaptor.getRhs(), ValueRange{});
+    Value i64UpperBound =
+        rewriter.create<arith::FPToSIOp>(loc, rewriter.getI64Type(), inputUnit);
+    // Cast the i64 value to index type
+    Value delay2ndArg = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getIndexType(), i64UpperBound);
+    // Value inputLen = rewriter.create<arith::ConstantOp>(loc,
+    // rewriter.getIndexType(), rewriter.getIntegerAttr(rewriter.getIndexType(),
+    // ub));
+    // DEBUG_PRINT_WITH_ARGS("print delay2ndArg.dump() for debugging");
+
+    // DEBUG_PRINT_NO_ARGS();
+    // Create an empty affine map list
+    // SmallVector<AffineMap, 4> lbMaps, ubMaps;
+    // Create identity affine maps for bounds
+    // AffineMap lbMap = AffineMap::get(/*dimCount=*/0, /*symbolCount=*/0,
+    // rewriter.getContext()); AffineMap ubMap = AffineMap::get(/*dimCount=*/0,
+    // /*symbolCount=*/0, rewriter.getContext());
+
+    // Create an AffineForOp with SSA values for the bounds
+    Value step1 = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+
+    scf::ForOp forOp1 =
+        rewriter.create<scf::ForOp>(loc, lowerBound, delay2ndArg, step1);
+    // Affine loop with non-int loop indices
+    //  affine::AffineForOp forOp1 = rewriter.create<affine::AffineForOp>(loc,
+    //  lowerBound, lbMap, inputLen, ubMap, 1);
+    // DEBUG_PRINT_NO_ARGS();
+
+    auto iv = forOp1.getInductionVar();
+
+    rewriter.setInsertionPointToStart(forOp1.getBody());
+    // store the result
+    //  rewriter.create<AffineStoreOp>(loc, constant0, alloc, iv);
+    rewriter.create<memref::StoreOp>(loc, constant0, alloc, iv);
+
+    rewriter.setInsertionPointAfter(forOp1);
+
+    // Create the constants for lb2, step1, and calculate ub2
+    Value lb2 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    Value lenOfInput = rewriter.create<arith::ConstantIndexOp>(
+        loc, /*length of input*/ ub); // Replace with the actual length
+    Value ub2 = rewriter.create<arith::SubIOp>(loc, lenOfInput, delay2ndArg);
+    Value step2 = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+
+    // Create the second scf.for loop
+    scf::ForOp forOp2 = rewriter.create<scf::ForOp>(loc, lb2, ub2, step2);
+    Value iv2 = forOp2.getInductionVar();
+
+    // Set insertion point to the start of the loop body
+    rewriter.setInsertionPointToStart(forOp2.getBody());
+
+    // Load value from allocIP[iv2]
+    Value loadedVal =
+        rewriter.create<memref::LoadOp>(loc, delayOpAdaptor.getLhs(), iv2);
+
+    // Calculate the index iv2 + delaySecondArg
+    Value newIndex = rewriter.create<arith::AddIOp>(loc, iv2, delay2ndArg);
+
+    // Store the loaded value at alloc[newIndex]
+    rewriter.create<memref::StoreOp>(loc, loadedVal, alloc, newIndex);
+    rewriter.setInsertionPointAfter(forOp2);
+    // DEBUG_PRINT_NO_ARGS();
+    // For 2nd loop --
+    // loop from 0 to lenOfInput - 2ndArg
+    //  load from index
+    //  store at index + 2ndArg
+
+    // forOp1.dump();
+    // Expected MLIR-Affine
+    // %0 = affine.load %alloc_0[] : memref<f64>
+    // %1 = arith.fptosi %0 : f64 to i64
+    // %2 = arith.index_cast %1 : i64 to index
+    // %c1_15 = arith.constant 1 : index
+    // scf.for %arg0 = %c0_14 to %2 step %c1_15 {
+    //   memref.store %cst_13, %alloc[%arg0] : memref<10xf64>
+    // }
+    // %c0_16 = arith.constant 0 : index
+    // %c10 = arith.constant 10 : index
+    // %3 = arith.subi %c10, %2 : index
+    // %c1_17 = arith.constant 1 : index
+    // scf.for %arg0 = %c0_16 to %3 step %c1_17 {
+    //   %4 = memref.load %alloc_1[%arg0] : memref<10xf64>
+    //   %5 = arith.addi %arg0, %2 : index
+    //   memref.store %4, %alloc[%5] : memref<10xf64>
+    // }
+
+    rewriter.replaceOp(op, alloc);
+    // DEBUG_PRINT_NO_ARGS();
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: Gain operations
+//===----------------------------------------------------------------------===//
+struct GainOpLowering : public ConversionPattern {
+  GainOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::GainOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    // dsp.GainOp has 2 operands -- both of type tensor f64 , 2ndOperand should
+    // have only 1 element
+
+    // Get the location of GainOp
+    auto loc = op->getLoc();
+
+    // Pseudo-code:
+    //   y[i] = y[i] * gain for  0<=i<N
+    //
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
+    GainOpAdaptor gainOpOpAdaptor(operands);
+    // Value GetValueAtIndx2ndArg = op->getOperand(1);
+    // dsp::ConstantOp constantOp2ndArg =
+    // GetValueAtIndx2ndArg.getDefiningOp<dsp::ConstantOp>(); DenseElementsAttr
+    // constantRhsValue = constantOp2ndArg.getValue();; auto elements =
+    // constantRhsValue.getValues<FloatAttr>(); float gain =
+    // elements[0].getValueAsDouble();
+
+    // Value gain = gainOpOpAdaptor.getRhs();
+
+    // DEBUG_PRINT_NO_ARGS();
+
+    // first from 1 <= i < N
+    int64_t lb = 0;
+    int64_t ub = tensorType.getShape()[0];
+    int64_t step = 1;
+
+    // DEBUG_PRINT_NO_ARGS();
+
+    // loop from 0 <= i < N
+
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    rewriter.setInsertionPointToStart(forOpY.getBody());
+    auto ivY = forOpY.getInductionVar();
+
+    Value getLhs =
+        rewriter.create<AffineLoadOp>(loc, gainOpOpAdaptor.getLhs(), ValueRange{ivY});
+    Value getRhs = rewriter.create<AffineLoadOp>(loc, gainOpOpAdaptor.getRhs(),
+                                                 ValueRange{});
+    Value mulProd = rewriter.create<arith::MulFOp>(loc, getLhs, getRhs);
+    rewriter.create<AffineStoreOp>(loc, mulProd, alloc, ValueRange{ivY});
+    // DEBUG_PRINT_NO_ARGS();
+    rewriter.setInsertionPointAfter(forOpY);
+
+    // debug
+    //  forOpX->dump();
+    //  forOpY->dump();
+
+    // %cst = arith.constant 6.2831853071800001 : f64
+    // %cst_0 = arith.constant 4.600000e-01 : f64
+    // %cst_1 = arith.constant 5.400000e-01 : f64
+    // %cst_2 = arith.constant 4.000000e+00 : f64
+    // %alloc = memref.alloc() : memref<4xf64>
+    // %alloc_3 = memref.alloc() : memref<f64>
+    // affine.store %cst_2, %alloc_3[] : memref<f64>
+    // affine.for %arg0 = 0 to 4 {
+    //   %0 = arith.index_castui %arg0 : index to i32
+    //   %1 = arith.uitofp %0 : i32 to f64
+    //   %2 = arith.mulf %1, %cst : f64
+    //   %3 = arith.divf %2, %cst_2 : f64
+    //   %4 = math.cos %3 : f64
+    //   %5 = arith.mulf %4, %cst_0 : f64
+    //   %6 = arith.subf %cst_1, %5 : f64
+    //   affine.store %6, %alloc[%arg0] : memref<4xf64>
+    // }
+
+    // }
+    // }
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: BitwiseAndOp operations
+//===----------------------------------------------------------------------===//
+
+struct BitwiseAndOpLowering : public ConversionPattern {
+  BitwiseAndOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::BitwiseAndOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    // dsp.bitwiseandop has 2 operands -- both of type tensor f64 , of the same
+    // size
+
+    // Get the location of BitwiseAndOp
+    auto loc = op->getLoc();
+
+    // Pseudo-code:
+    //   y[i] = bitwiseand(lhs[i], rhs[i]) for  0<=i<N
+    //
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
+    BitwiseAndOpAdaptor bitwiseandOpAdaptor(operands);
+
+    // DEBUG_PRINT_NO_ARGS();
+
+    // first from 0 <= i < N
+    int64_t lb = 0;
+    int64_t ub = tensorType.getShape()[0];
+    int64_t step = 1;
+
+    // DEBUG_PRINT_NO_ARGS();
+
+    // loop from 0 <= i < N
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto ivY = forOpY.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpY.getBody());
+
+    Value getLhs =
+        rewriter.create<AffineLoadOp>(loc, bitwiseandOpAdaptor.getLhs(), ivY);
+    Value getRhs =
+        rewriter.create<AffineLoadOp>(loc, bitwiseandOpAdaptor.getRhs(), ivY);
+    Value lhsInt =
+        rewriter.create<arith::FPToSIOp>(loc, rewriter.getI64Type(), getLhs);
+    Value rhsInt =
+        rewriter.create<arith::FPToSIOp>(loc, rewriter.getI64Type(), getRhs);
+    Value andiResult = rewriter.create<arith::AndIOp>(loc, lhsInt, rhsInt);
+    Value resultFp = rewriter.create<arith::SIToFPOp>(
+        loc, rewriter.getF64Type(), andiResult);
+
+    rewriter.create<AffineStoreOp>(loc, resultFp, alloc, ValueRange{ivY});
+    rewriter.setInsertionPointAfter(forOpY);
+
+    // debug
+    forOpY->dump();
+
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  };
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: BitwiseAndOp operations
+//===----------------------------------------------------------------------===//
+
+struct zeroCrossCountOpLowering : public ConversionPattern {
+  zeroCrossCountOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::zeroCrossCountOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    // dsp.zerocrosscount has 1 operand -- of type tensor f64
+
+    // Get the location of zeroCrossCountOp
+    auto loc = op->getLoc();
+
+    // Pseudo-code is based on the C++ implementation here:
+    // https://toto-share.com/2011/05/cc-zero-crossing-code/
+    //   for 1<=i<N
+    //      if sign of operand[i] is not equal to sign of operand[i-1]
+    //         increment zero-cross count
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    Type integerType = rewriter.getI64Type();
+
+    // allocation & deallocation for the result of this operation
+    // auto memRefType = convertTensorToMemRef(tensorType);
+    // Force the result to be a tensor of size 1
+    auto alloc = insertAllocAndDealloc(
+        MemRefType::get(ArrayRef<int64_t>(1), tensorType.getElementType()), loc,
+        rewriter);
+    zeroCrossCountOpAdaptor zeroCrossCountOpAdaptor(operands);
+    // DEBUG_PRINT_NO_ARGS();
+
+    // Define constants
+    Value constant0 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getI64Type(),
+        rewriter.getIntegerAttr(rewriter.getI64Type(), 0));
+    Value constant1 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getI64Type(),
+        rewriter.getIntegerAttr(rewriter.getI64Type(), 1));
+    Value Indx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+
+    // Define bounds
+    Value lb = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getIndexType(),
+        rewriter.getIntegerAttr(rewriter.getIndexType(), 1));
+    Value ub = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getIndexType(),
+        rewriter.getIntegerAttr(rewriter.getIndexType(),
+                                tensorType.getShape()[0]));
+    Value step = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+
+    // Set up for loop
+    auto forOpY =
+        rewriter.create<scf::ForOp>(loc, lb, ub, step, ValueRange{constant0});
+    auto ivY = forOpY.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpY.getBody());
+    auto countArg = forOpY.getRegionIterArgs()[0];
+
+    // Get the current and previous elements
+    Value ivYPrev = rewriter.create<arith::SubIOp>(loc, ivY, step);
+    Value getLhsPrev = rewriter.create<memref::LoadOp>(
+        loc, zeroCrossCountOpAdaptor.getLhs(), ivYPrev);
+    Value getLhs = rewriter.create<memref::LoadOp>(
+        loc, zeroCrossCountOpAdaptor.getLhs(), ivY);
+
+    // Convert from float to integer
+    Value lhsPrevInt = rewriter.create<arith::FPToSIOp>(
+        loc, rewriter.getI64Type(), getLhsPrev);
+    Value lhsInt =
+        rewriter.create<arith::FPToSIOp>(loc, rewriter.getI64Type(), getLhs);
+
+    // Check whether the elements are less than zero
+    Value signLhsPrev = rewriter.create<arith::CmpIOp>(
+        loc, arith::CmpIPredicate::slt, lhsPrevInt, constant0);
+    Value signLhs = rewriter.create<arith::CmpIOp>(
+        loc, arith::CmpIPredicate::slt, lhsInt, constant0);
+    Value equal = rewriter.create<arith::CmpIOp>(loc, arith::CmpIPredicate::eq,
+                                                 signLhsPrev, signLhs);
+
+    // If the signs aren't the same, increment the zero cross counter
+    auto ifOp =
+        rewriter.create<scf::IfOp>(loc, TypeRange{integerType}, equal, true);
+
+    // If block
+    rewriter.setInsertionPointToStart(ifOp.thenBlock());
+    rewriter.create<scf::YieldOp>(loc, ValueRange{countArg});
+
+    // Else block
+    rewriter.setInsertionPointToStart(ifOp.elseBlock());
+    auto countPlusOne =
+        rewriter.create<arith::AddIOp>(loc, countArg, constant1);
+    rewriter.create<scf::YieldOp>(loc, ValueRange{countPlusOne});
+
+    rewriter.setInsertionPointAfter(ifOp);
+    auto countRes = ifOp.getResults()[0];
+    rewriter.create<scf::YieldOp>(loc, ValueRange{countRes});
+
+    rewriter.setInsertionPointAfter(forOpY);
+
+    // debug
+    // forOpY->dump();
+    // %15 = "scf.for"(%12, %13, %14, %9) ({
+    //     ^bb0(%arg0: index, %arg1: i64):
+    //     %17 = "arith.subi"(%arg0, %14) <{overflowFlags =
+    //     #arith.overflow<none>}>
+    // : (index, index) -> index %18 = "memref.load"(%1, %17) <{nontemporal =
+    // false}> : (memref<3xf64>, index) -> f64 %19 = "memref.load"(%1, %arg0)
+    // <{nontemporal = false}> : (memref<3xf64>, index) -> f64 %20 =
+    // "arith.fptosi"(%18) : (f64) -> i64 %21 = "arith.fptosi"(%19) : (f64) ->
+    // i64
+    //     %22 = "arith.cmpi"(%20, %9) <{predicate = 2 : i64}> : (i64, i64) ->
+    //     i1 %23 = "arith.cmpi"(%21, %9) <{predicate = 2 : i64}> : (i64, i64)
+    //     -> i1 %24 = "arith.cmpi"(%22, %23) <{predicate = 0 : i64}> : (i1, i1)
+    //     -> i1 %25 = "scf.if"(%24) ({
+    //         "scf.yield"(%arg1) : (i64) -> ()
+    //     }, {
+    //         %26 = "arith.addi"(%arg1, %10) <{overflowFlags =
+    // #arith.overflow<none>}> : (i64, i64) -> i64 "scf.yield"(%26) : (i64) ->
+    // ()
+    //     }) : (i1) -> i64
+    //     "scf.yield"(%25) : (i64) -> ()
+    // }) : (index, index, index, i64) -> i64
+
+    auto finalCountArg = forOpY.getResults()[0];
+    Value finalCountArgFloat = rewriter.create<arith::SIToFPOp>(
+        loc, rewriter.getF64Type(), finalCountArg);
+
+    rewriter.create<AffineStoreOp>(loc, finalCountArgFloat, alloc, Indx0);
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  };
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: Binary operations
+//===----------------------------------------------------------------------===//
+
+template <typename BinaryOp, typename LoweredBinaryOp>
+struct BinaryOpLowering : public ConversionPattern {
+  BinaryOpLowering(MLIRContext *ctx)
+      : ConversionPattern(BinaryOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+    lowerOpToLoops(op, operands, rewriter,
+                   [loc](OpBuilder &builder, ValueRange memRefOperands,
+                         ValueRange loopIvs) {
+                     // Generate an adaptor for the remapped operands of the
+                     // BinaryOp. This allows for using the nice named accessors
+                     // that are generated by the ODS.
+                     typename BinaryOp::Adaptor binaryAdaptor(memRefOperands);
+
+                     // Generate loads for the element of 'lhs' and 'rhs' at the
+                     // inner loop.
+                     auto loadedLhs = builder.create<affine::AffineLoadOp>(
+                         loc, binaryAdaptor.getLhs(), loopIvs);
+                     auto loadedRhs = builder.create<affine::AffineLoadOp>(
+                         loc, binaryAdaptor.getRhs(), loopIvs);
+
+                     // Create the binary operation performed on the loaded
+                     // values.
+                     return builder.create<LoweredBinaryOp>(loc, loadedLhs,
+                                                            loadedRhs);
+                   });
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine AdditionalPatterns: Shift operations
+//===----------------------------------------------------------------------===//
+
+struct ShiftRightOpLowering : public ConversionPattern {
+  ShiftRightOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::ShiftRightOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+
+    // Get the location of GainOp
+    auto loc = op->getLoc();
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // first from 1 <= i < N
+    int64_t lb = 0;
+    int64_t ub = tensorType.getShape()[0];
+    int64_t step = 1;
+
+    typename dsp::ShiftRightOp::Adaptor binaryAdaptor(operands);
+
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto ivY = forOpY.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpY.getBody());
+
+    auto loadedLhs =
+        rewriter.create<affine::AffineLoadOp>(loc, binaryAdaptor.getLhs(), ivY);
+    Value IntegerLhs =
+        rewriter.create<arith::FPToSIOp>(loc, rewriter.getI64Type(), loadedLhs);
+
+    auto loadedRhs =
+        rewriter.create<affine::AffineLoadOp>(loc, binaryAdaptor.getRhs(), ivY);
+    Value IntegerRhs =
+        rewriter.create<arith::FPToSIOp>(loc, rewriter.getI64Type(), loadedRhs);
+
+    auto LoweredOp =
+        rewriter.create<arith::ShRSIOp>(loc, IntegerLhs, IntegerRhs);
+
+    Value FloatOp =
+        rewriter.create<arith::SIToFPOp>(loc, rewriter.getF64Type(), LoweredOp);
+
+    rewriter.create<AffineStoreOp>(loc, FloatOp, alloc, ValueRange{ivY});
+
+    rewriter.setInsertionPointAfter(forOpY);
+
+    // DEBUG_PRINT_NO_ARGS();
+
+    // rewriter.replaceOp(op, FloatOp);
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine AdditionalPatterns: Matmul operations
+//===----------------------------------------------------------------------===//
+
+// template <typename BinaryOp>
+
+struct MatmulOpLowering : public ConversionPattern {
+  MatmulOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::MatmulOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+
+    // Get the location of GainOp
+    auto loc = op->getLoc();
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc_output = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    typename dsp::MatmulOp::Adaptor binaryAdaptor(operands);
+
+    auto lhsType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(0).getType());
+    // auto rhsType =
+    // llvm::dyn_cast<RankedTensorType>(op->getOperand(1).getType());
+
+    // first from 1 <= i < N
+    int64_t lb = 0;
+    int64_t ub_0 = lhsType.getShape()[0];
+    int64_t ub_1 = lhsType.getShape()[1];
+    int64_t step = 1;
+
+    Value constantZero = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+
+    // NOTE: matrix [y, x] --> y means row, x means column
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub_0, step);
+    auto ivY = forOpY.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpY.getBody());
+
+    affine::AffineForOp forOpX =
+        rewriter.create<AffineForOp>(loc, lb, ub_1, step);
+    auto ivX = forOpX.getInductionVar();
+    // auto getIterArg =  forOpX.getBody()->getArgument(1); //HWISOO: Find this
+    // to check how previous codes did
+    rewriter.setInsertionPointToStart(forOpX.getBody());
+
+    rewriter.create<AffineStoreOp>(loc, constantZero, alloc_output,
+                                   ValueRange{ivY, ivX});
+
+    affine::AffineForOp forOpIndex =
+        rewriter.create<AffineForOp>(loc, lb, ub_1, step);
+    auto ivIndex = forOpIndex.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpIndex.getBody());
+
+    auto loadedLhs = rewriter.create<affine::AffineLoadOp>(
+        loc, binaryAdaptor.getLhs(), ValueRange{ivY, ivIndex});
+
+    auto loadedRhs = rewriter.create<affine::AffineLoadOp>(
+        loc, binaryAdaptor.getRhs(), ValueRange{ivIndex, ivX});
+
+    Value mulLhsRhs = rewriter.create<arith::MulFOp>(loc, loadedLhs, loadedRhs);
+
+    auto loadedResult = rewriter.create<affine::AffineLoadOp>(
+        loc, alloc_output, ValueRange{ivY, ivX});
+
+    Value addResultAndMul =
+        rewriter.create<arith::AddFOp>(loc, loadedResult, mulLhsRhs);
+
+    rewriter.create<AffineStoreOp>(loc, addResultAndMul, alloc_output,
+                                   ValueRange{ivY, ivX});
+
+    /*
+    auto loadedLhs = rewriter.create<affine::AffineLoadOp>(loc,
+binaryAdaptor.getLhs(), ivY); Value IntegerLhs =
+rewriter.create<arith::FPToSIOp>(loc, rewriter.getI64Type(),  loadedLhs);
+
+auto loadedRhs = rewriter.create<affine::AffineLoadOp>(loc,
+binaryAdaptor.getRhs(), ivY); Value IntegerRhs =
+rewriter.create<arith::FPToSIOp>(loc, rewriter.getI64Type(),  loadedRhs);
+
+    auto LoweredOp = rewriter.create<LoweredBinaryOp>(loc, IntegerLhs,
+IntegerRhs);
+
+    Value FloatOp = rewriter.create<arith::SIToFPOp>(loc, rewriter.getF64Type(),
+LoweredOp);
+
+    rewriter.create<AffineStoreOp>(loc, FloatOp, alloc, ValueRange{ivY});
+
+    */
+
+    rewriter.setInsertionPointAfter(forOpY);
+
+    // DEBUG_PRINT_NO_ARGS();
+
+    // rewriter.replaceOp(op, FloatOp);
+    rewriter.replaceOp(op, alloc_output);
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine AdditionalPatterns: Find peaks operations
+//===----------------------------------------------------------------------===//
+
+// template <typename BinaryOp>
+
+struct FindPeaksOpLowering : public ConversionPattern {
+  FindPeaksOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::FindPeaksOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+
+    // Get the location of GainOp
+    auto loc = op->getLoc();
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc_output = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    auto countMemRefType = MemRefType::get({}, rewriter.getIndexType());
+    auto alloc_peaks_count =
+        insertAllocAndDealloc(countMemRefType, loc, rewriter);
+
+    typename dsp::FindPeaksOp::Adaptor findPeaksOpAdaptor(operands);
+
+    Value constant_minus_one = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(-1));
+
+    Value constant_index_zero = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getIndexType(), rewriter.getIndexAttr(0));
+    Value constant_index_one = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getIndexType(), rewriter.getIndexAttr(1));
+
+    rewriter.create<AffineStoreOp>(loc, constant_index_zero, alloc_peaks_count,
+                                   ValueRange{});
+
+    auto heightArgType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(1).getType());
+
+    int heightArgShape = heightArgType.getShape().size();
+
+    ValueRange heightValueRange;
+
+    if (heightArgShape == 0)
+      heightValueRange = ValueRange{};
+    else
+      heightValueRange = ValueRange{constant_index_zero};
+
+    auto distanceArgType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(2).getType());
+
+    int distanceArgShape = distanceArgType.getShape().size();
+
+    ValueRange distanceValueRange;
+
+    if (distanceArgShape == 0)
+      distanceValueRange = ValueRange{};
+    else
+      distanceValueRange = ValueRange{constant_index_zero};
+
+    auto signalType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(0).getType());
+    int64_t lb = 1;
+    int64_t ub = signalType.getShape()[0] - 1;
+    int64_t step = 1;
+
+    //%distance = affine.load %alloc_distance[] : memref<index>
+    auto distance_fp = rewriter.create<affine::AffineLoadOp>(
+        loc, findPeaksOpAdaptor.getDistance(), distanceValueRange);
+    // f64 to index
+    Value distance_ui = rewriter.create<arith::FPToUIOp>(
+        loc, rewriter.getIntegerType(32), distance_fp);
+    Value distance = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getIndexType(), distance_ui);
+
+    auto height = rewriter.create<affine::AffineLoadOp>(
+        loc, findPeaksOpAdaptor.getHeight(), heightValueRange);
+
+    affine::AffineForOp forOpInit =
+        rewriter.create<AffineForOp>(loc, 0, tensorType.getShape()[0], step);
+    auto init_iter = forOpInit.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpInit.getBody());
+
+    rewriter.create<AffineStoreOp>(loc, constant_minus_one, alloc_output,
+                                   ValueRange{init_iter});
+
+    rewriter.setInsertionPointAfter(forOpInit);
+
+    affine::AffineForOp forOpSignal =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto current_index = forOpSignal.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpSignal.getBody());
+
+    // %prev_index = arith.subi %current_index, %cst_one_index : index
+    // %signal_prev = memref.load %alloc_signal[%prev_index] : memref<10xf64>
+    // %signal_current = affine.load %alloc_signal[%current_index] :
+    // memref<10xf64> %signal_next = affine.load %alloc_signal[%current_index+1]
+    // : memref<10xf64> Q. How can I do this? %height = affine.load
+    // %alloc_height[] : memref<f64>
+
+    AffineExpr ExprForPrev =
+        rewriter.getAffineDimExpr(0) - rewriter.getAffineConstantExpr(1);
+    AffineMap addMapForPrev = AffineMap::get(1, 0, ExprForPrev);
+
+    AffineExpr ExprForNext =
+        rewriter.getAffineDimExpr(0) + rewriter.getAffineConstantExpr(1);
+    AffineMap addMapForNext = AffineMap::get(1, 0, ExprForNext);
+
+    auto signal_prev =
+        rewriter.create<AffineLoadOp>(loc, findPeaksOpAdaptor.getSignal(),
+                                      addMapForPrev, ValueRange{current_index});
+    auto signal_current = rewriter.create<affine::AffineLoadOp>(
+        loc, findPeaksOpAdaptor.getSignal(), ValueRange{current_index});
+    auto signal_next =
+        rewriter.create<AffineLoadOp>(loc, findPeaksOpAdaptor.getSignal(),
+                                      addMapForNext, ValueRange{current_index});
+
+    //%cmp_current_prev = arith.cmpf ogt, %signal_current, %signal_prev : f64
+    //%cmp_current_next = arith.cmpf ogt, %signal_current, %signal_next : f64
+    //%cmp_current_height = arith.cmpf oge, %signal_current, %signal_next : f64
+    auto cmp_current_prev = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OGT, signal_current, signal_prev);
+    auto cmp_current_next = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OGT, signal_current, signal_next);
+    auto cmp_current_height = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OGE, signal_current, height);
+
+    //%and_two_cmps = arith.andi %cmp_current_prev, %cmp_current_next : index
+    //%and_three_cmps = arith.andi %and_two_cmps, cmp_current_height : index
+    auto and_two_cmps =
+        rewriter.create<arith::AndIOp>(loc, cmp_current_prev, cmp_current_next);
+    auto and_three_cmps =
+        rewriter.create<arith::AndIOp>(loc, and_two_cmps, cmp_current_height);
+
+    // scf.if %and_three_cmps {
+    auto firstIfOp =
+        rewriter.create<scf::IfOp>(loc, and_three_cmps, false /* else=1 */);
+    rewriter.setInsertionPointToStart(firstIfOp.thenBlock());
+
+    //%peaks_count = affine.load %alloc_peaks_count[] : memref<index>
+    //%cmp_new_peak = arith.cmpi eq, %peaks_count, %cst_zero_index : index
+    auto peaks_count = rewriter.create<affine::AffineLoadOp>(
+        loc, alloc_peaks_count, ValueRange{});
+    auto cmp_new_peak = rewriter.create<arith::CmpIOp>(
+        loc, arith::CmpIPredicate::eq, peaks_count, constant_index_zero);
+
+    // scf.if %cmp_new_peak {
+    //     memref.store %current_index, %alloc_peaks[%peaks_count] :
+    //     memref<10xindex> %peaks_count_inc = arith.addi %peaks_count,
+    //     %cst_one_index : index affine.store %peaks_count_inc,
+    //     %alloc_peaks_count[] : memref<index>
+    // }
+    auto secondIfOp =
+        rewriter.create<scf::IfOp>(loc, cmp_new_peak, true /* else=1 */);
+    rewriter.setInsertionPointToStart(secondIfOp.thenBlock());
+    // index to f64
+    Value current_index_to_ui = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), current_index);
+    Value current_index_to_f64 = rewriter.create<arith::UIToFPOp>(
+        loc, rewriter.getF64Type(), current_index_to_ui);
+    rewriter.create<memref::StoreOp>(loc, current_index_to_f64, alloc_output,
+                                     ValueRange{peaks_count});
+    auto peaks_count_inc =
+        rewriter.create<arith::AddIOp>(loc, peaks_count, constant_index_one);
+    rewriter.create<AffineStoreOp>(loc, peaks_count_inc, alloc_peaks_count,
+                                   ValueRange{});
+
+    /*
+    else {
+        %last_peaks_count = arith.subi %peaks_count, %cst_one_index : index
+        %last_peak_index = memref.load %alloc_peaks[%last_peaks_count] :
+    memref<10xindex> %subtract_current_index_last_peak = arith.subi
+    %current_index, %last_peak_index : index %cmp_sub_distance = arith.cmpi sge,
+    %subtract_current_index_last_peak, %distance : index
+        */
+    rewriter.setInsertionPointToStart(secondIfOp.elseBlock());
+    // auto last_peak_index = rewriter.create<AffineLoadOp>(loc, alloc_output,
+    // addMapForPrev, ValueRange{peaks_count}); HWISOO: It does not work since
+    // it gives "error: 'affine.load' op index must be a valid dimension or
+    // symbol identifier" here.
+    Value last_peaks_count =
+        rewriter.create<arith::SubIOp>(loc, peaks_count, constant_index_one);
+    auto last_peak_index_fp = rewriter.create<memref::LoadOp>(
+        loc, alloc_output, ValueRange{last_peaks_count});
+    // f64 to index
+    Value last_peak_index_ui = rewriter.create<arith::FPToUIOp>(
+        loc, rewriter.getIntegerType(32), last_peak_index_fp);
+    Value last_peak_index = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getIndexType(), last_peak_index_ui);
+    Value subtract_current_index_last_peak =
+        rewriter.create<arith::SubIOp>(loc, current_index, last_peak_index);
+    auto cmp_sub_distance = rewriter.create<arith::CmpIOp>(
+        loc, arith::CmpIPredicate::sge, subtract_current_index_last_peak,
+        distance);
+
+    /*
+        scf.if %cmp_sub_distance {
+    memref.store %current_index, %alloc_peaks[%peaks_count] : memref<10xindex>
+    %peaks_count_inc = arith.addi %peaks_count, %cst_one_index : index
+    affine.store %peaks_count_inc, %alloc_peaks_count[] : memref<index>
+            }
+    }
+    */
+    auto thirdIfOp =
+        rewriter.create<scf::IfOp>(loc, cmp_sub_distance, true /* else=1 */);
+    rewriter.setInsertionPointToStart(thirdIfOp.thenBlock());
+    // index to f64
+    Value current_index_to_ui_2 = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), current_index);
+    Value current_index_to_f64_2 = rewriter.create<arith::UIToFPOp>(
+        loc, rewriter.getF64Type(), current_index_to_ui_2);
+    rewriter.create<memref::StoreOp>(loc, current_index_to_f64_2, alloc_output,
+                                     ValueRange{peaks_count});
+    auto peaks_count_inc_2 =
+        rewriter.create<arith::AddIOp>(loc, peaks_count, constant_index_one);
+    rewriter.create<AffineStoreOp>(loc, peaks_count_inc_2, alloc_peaks_count,
+                                   ValueRange{});
+
+    rewriter.setInsertionPointAfter(forOpSignal);
+
+    /* Setting last element of the output as the count of peaks.
+    Note that last-last ([-2]) should be always -1. */
+    auto peaks_count_final = rewriter.create<affine::AffineLoadOp>(
+        loc, alloc_peaks_count, ValueRange{});
+    // index to f64
+    Value peaks_count_final_to_ui = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), peaks_count_final);
+    Value peaks_count_final_to_f64 = rewriter.create<arith::UIToFPOp>(
+        loc, rewriter.getF64Type(), peaks_count_final_to_ui);
+
+    Value result_size = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getIndexType(),
+        rewriter.getIndexAttr(tensorType.getShape()[0]));
+    Value result_size_minusOne =
+        rewriter.create<arith::SubIOp>(loc, result_size, constant_index_one);
+    rewriter.create<AffineStoreOp>(loc, peaks_count_final_to_f64, alloc_output,
+                                   ValueRange{result_size_minusOne});
+
+    rewriter.replaceOp(op, alloc_output);
+
+    return success();
+  }
+};
+
+struct MaxOpLowering : public ConversionPattern {
+  MaxOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::MaxOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+
+    // Get the location of GainOp
+    auto loc = op->getLoc();
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc_output = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    typename dsp::MaxOp::Adaptor maxOpAdaptor(operands);
+
+    Value constantZero = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+
+    // Value cst_idx_zero = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+
+    rewriter.create<AffineStoreOp>(loc, constantZero, alloc_output,
+                                   ValueRange{});
+
+    auto inputType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(0).getType());
+
+    // loop for 0 <= i < N
+    int64_t lb = 0;
+    int64_t ub = inputType.getShape()[0];
+    int64_t step = 1;
+
+    affine::AffineForOp forOp = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto idx = forOp.getInductionVar();
+    rewriter.setInsertionPointToStart(forOp.getBody());
+
+    auto loadedInput = rewriter.create<affine::AffineLoadOp>(
+        loc, maxOpAdaptor.getInput(), ValueRange{idx});
+    auto loadedOutput =
+        rewriter.create<affine::AffineLoadOp>(loc, alloc_output, ValueRange{});
+    auto compare_input_output = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OGT, loadedInput, loadedOutput);
+
+    auto ifOp = rewriter.create<scf::IfOp>(loc, compare_input_output, false);
+
+    rewriter.setInsertionPointToStart(ifOp.thenBlock());
+
+    rewriter.create<AffineStoreOp>(loc, loadedInput, alloc_output,
+                                   ValueRange{});
+
+    rewriter.setInsertionPointAfter(forOp);
+
+    rewriter.replaceOp(op, alloc_output);
+
+    return success();
+  }
+};
+
+struct MeanOpLowering : public ConversionPattern {
+  MeanOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::MeanOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+
+    // Get the location of GainOp
+    auto loc = op->getLoc();
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc_output = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    typename dsp::MeanOp::Adaptor meanOpAdaptor(operands);
+
+    Value constantZero = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    Value cst_idx_zero = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+
+    rewriter.create<AffineStoreOp>(loc, constantZero, alloc_output,
+                                   ValueRange{});
+
+    auto lengthArgType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(1).getType());
+
+    int lengthArgShape = lengthArgType.getShape().size();
+
+    ValueRange lengthValueRange;
+
+    if (lengthArgShape == 0)
+      lengthValueRange = ValueRange{};
+    else
+      lengthValueRange = ValueRange{cst_idx_zero};
+
+    auto loadedLength = rewriter.create<affine::AffineLoadOp>(
+        loc, meanOpAdaptor.getLength(), lengthValueRange);
+
+    // f64 to index
+    Value length_ui = rewriter.create<arith::FPToUIOp>(
+        loc, rewriter.getIntegerType(32), loadedLength);
+    Value length_index = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getIndexType(), length_ui);
+
+    // loop for 0 <= i < length
+    // Note: we need to use scf.for and memref::LoadOp/StoreOp (can we use
+    // dynamic ub for affine.for?)
+    auto lb = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    auto step = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+    auto forOp = rewriter.create<scf::ForOp>(loc, lb, length_index, step);
+    auto idx = forOp.getInductionVar();
+    rewriter.setInsertionPointToStart(forOp.getBody());
+
+    auto loadedInput = rewriter.create<memref::LoadOp>(
+        loc, meanOpAdaptor.getInput(), ValueRange{idx});
+    auto loadedOutput =
+        rewriter.create<memref::LoadOp>(loc, alloc_output, ValueRange{});
+    auto added_output =
+        rewriter.create<arith::AddFOp>(loc, loadedInput, loadedOutput);
+    rewriter.create<memref::StoreOp>(loc, added_output, alloc_output,
+                                     ValueRange{});
+
+    rewriter.setInsertionPointAfter(forOp);
+
+    auto loadedOutput2 =
+        rewriter.create<affine::AffineLoadOp>(loc, alloc_output, ValueRange{});
+    auto divided_output =
+        rewriter.create<arith::DivFOp>(loc, loadedOutput2, loadedLength);
+    rewriter.create<AffineStoreOp>(loc, divided_output, alloc_output,
+                                   ValueRange{});
+
+    rewriter.replaceOp(op, alloc_output);
+
+    return success();
+  }
+};
+
+struct DiffOpLowering : public ConversionPattern {
+  DiffOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::DiffOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+
+    // Get the location of GainOp
+    auto loc = op->getLoc();
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc_output = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    typename dsp::DiffOp::Adaptor diffOpAdaptor(operands);
+
+    Value cst_idx_zero = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    Value cst_idx_one = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+
+    auto lengthArgType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(1).getType());
+
+    int lengthArgShape = lengthArgType.getShape().size();
+
+    ValueRange lengthValueRange;
+
+    if (lengthArgShape == 0)
+      lengthValueRange = ValueRange{};
+    else
+      lengthValueRange = ValueRange{cst_idx_zero};
+
+    auto loadedLength = rewriter.create<affine::AffineLoadOp>(
+        loc, diffOpAdaptor.getLength(), lengthValueRange);
+
+    // f64 to index
+    Value length_ui = rewriter.create<arith::FPToUIOp>(
+        loc, rewriter.getIntegerType(32), loadedLength);
+    Value length_index = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getIndexType(), length_ui);
+    Value length_index_minus =
+        rewriter.create<arith::SubIOp>(loc, length_index, cst_idx_one);
+
+    // loop for 0 <= i < N-1
+    // Note: we need to use scf.for and memref::LoadOp/StoreOp (can we use
+    // dynamic ub for affine.for?)
+    auto lb = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    auto step = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+    auto forOp = rewriter.create<scf::ForOp>(loc, lb, length_index_minus, step);
+    auto idx = forOp.getInductionVar();
+    rewriter.setInsertionPointToStart(forOp.getBody());
+
+    Value constant_index_one = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getIndexType(), rewriter.getIndexAttr(1));
+    Value idx_next =
+        rewriter.create<arith::AddIOp>(loc, idx, constant_index_one);
+
+    auto input_current = rewriter.create<memref::LoadOp>(
+        loc, diffOpAdaptor.getInput(), ValueRange{idx});
+    auto input_next = rewriter.create<memref::LoadOp>(
+        loc, diffOpAdaptor.getInput(), ValueRange{idx_next});
+
+    auto diff_input =
+        rewriter.create<arith::SubFOp>(loc, input_next, input_current);
+    rewriter.create<memref::StoreOp>(loc, diff_input, alloc_output,
+                                     ValueRange{idx});
+
+    rewriter.setInsertionPointAfter(forOp);
+
+    rewriter.replaceOp(op, alloc_output);
+
+    return success();
+  }
+};
+
+struct GetSingleElemAtIdxOpLowering : public ConversionPattern {
+  GetSingleElemAtIdxOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::GetSingleElemAtIdxOp::getOperationName(), 1,
+                          ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // auto tensorType =
+    // llvm::cast<UnrankedTensorType>((*op->result_type_begin())); auto
+    // memRefType = convertTensorToMemRef(tensorType);
+    auto memRefType = MemRefType::get({}, rewriter.getF64Type());
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    typename dsp::GetSingleElemAtIdxOp::Adaptor getSingleElemAtIdxAdaptor(
+        operands);
+
+    auto indxArgType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(1).getType());
+
+    int indxArgShape = indxArgType.getShape().size();
+
+    ValueRange indexValueRange;
+
+    if (indxArgShape == 0)
+      indexValueRange = ValueRange{};
+    else {
+      Value cst_idx_zero = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+      indexValueRange = ValueRange{cst_idx_zero};
+    }
+
+    Value loadedIndx = rewriter.create<AffineLoadOp>(
+        loc, getSingleElemAtIdxAdaptor.getIndx(), indexValueRange);
+
+    // f64 to index
+    Value indx_ui = rewriter.create<arith::FPToUIOp>(
+        loc, rewriter.getIntegerType(32), loadedIndx);
+    Value indx_index = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getIndexType(), indx_ui);
+
+    Value loadedElement = rewriter.create<AffineLoadOp>(
+        loc, getSingleElemAtIdxAdaptor.getInput(), ValueRange{indx_index});
+
+    rewriter.create<AffineStoreOp>(loc, loadedElement, alloc, ValueRange{});
+
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+struct Diff2MeanOptimizedOpLowering : public ConversionPattern {
+  Diff2MeanOptimizedOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::Diff2MeanOptimizedOp::getOperationName(), 1,
+                          ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+
+    // Get the location of GainOp
+    auto loc = op->getLoc();
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc_output = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    typename dsp::Diff2MeanOptimizedOp::Adaptor diff2MeanOptimizedOpAdaptor(
+        operands);
+
+    Value cst_idx_zero = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+
+    auto lengthArgType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(1).getType());
+
+    int lengthArgShape = lengthArgType.getShape().size();
+
+    ValueRange lengthValueRange;
+
+    if (lengthArgShape == 0)
+      lengthValueRange = ValueRange{};
+    else
+      lengthValueRange = ValueRange{cst_idx_zero};
+
+    auto loadedLength = rewriter.create<affine::AffineLoadOp>(
+        loc, diff2MeanOptimizedOpAdaptor.getLength(), lengthValueRange);
+
+    // f64 to index
+    Value length_ui = rewriter.create<arith::FPToUIOp>(
+        loc, rewriter.getIntegerType(32), loadedLength);
+    Value length_index = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getIndexType(), length_ui);
+
+    auto input_first = rewriter.create<memref::LoadOp>(
+        loc, diff2MeanOptimizedOpAdaptor.getInput(), ValueRange{cst_idx_zero});
+    auto input_last = rewriter.create<memref::LoadOp>(
+        loc, diff2MeanOptimizedOpAdaptor.getInput(), ValueRange{length_index});
+
+    auto diff_input =
+        rewriter.create<arith::SubFOp>(loc, input_last, input_first);
+
+    auto div_input =
+        rewriter.create<arith::DivFOp>(loc, diff_input, loadedLength);
+
+    rewriter.create<memref::StoreOp>(loc, div_input, alloc_output,
+                                     ValueRange{});
+
+    rewriter.replaceOp(op, alloc_output);
+
+    return success();
+  }
+};
+
+struct FindPeaks2Diff2MeanOptimizedOpLowering : public ConversionPattern {
+  FindPeaks2Diff2MeanOptimizedOpLowering(MLIRContext *ctx)
+      : ConversionPattern(
+            dsp::FindPeaks2Diff2MeanOptimizedOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+
+    // Get the location of GainOp
+    auto loc = op->getLoc();
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc_output = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    auto alloc_output_last = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    auto countMemRefType = MemRefType::get({}, rewriter.getIndexType());
+    auto alloc_peaks_count =
+        insertAllocAndDealloc(countMemRefType, loc, rewriter);
+
+    typename dsp::FindPeaks2Diff2MeanOptimizedOp::Adaptor
+        findPeaks2Diff2MeanOptOpAdaptor(operands);
+
+    Value constant_minus_one = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(-1));
+
+    Value constant_index_zero = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getIndexType(), rewriter.getIndexAttr(0));
+    Value constant_index_one = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getIndexType(), rewriter.getIndexAttr(1));
+
+    rewriter.create<AffineStoreOp>(loc, constant_index_zero, alloc_peaks_count,
+                                   ValueRange{});
+
+    auto heightArgType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(1).getType());
+
+    int heightArgShape = heightArgType.getShape().size();
+
+    ValueRange heightValueRange;
+
+    if (heightArgShape == 0)
+      heightValueRange = ValueRange{};
+    else
+      heightValueRange = ValueRange{constant_index_zero};
+
+    auto distanceArgType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(2).getType());
+
+    int distanceArgShape = distanceArgType.getShape().size();
+
+    ValueRange distanceValueRange;
+
+    if (distanceArgShape == 0)
+      distanceValueRange = ValueRange{};
+    else
+      distanceValueRange = ValueRange{constant_index_zero};
+
+    auto signalType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(0).getType());
+    int64_t lb = 1;
+    int64_t ub = signalType.getShape()[0] - 1;
+    int64_t step = 1;
+
+    //%distance = affine.load %alloc_distance[] : memref<index>
+    auto distance_fp = rewriter.create<affine::AffineLoadOp>(
+        loc, findPeaks2Diff2MeanOptOpAdaptor.getDistance(), distanceValueRange);
+    // f64 to index
+    Value distance_ui = rewriter.create<arith::FPToUIOp>(
+        loc, rewriter.getIntegerType(32), distance_fp);
+    Value distance = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getIndexType(), distance_ui);
+
+    auto height = rewriter.create<affine::AffineLoadOp>(
+        loc, findPeaks2Diff2MeanOptOpAdaptor.getHeight(), heightValueRange);
+
+    rewriter.create<AffineStoreOp>(loc, constant_minus_one, alloc_output,
+                                   ValueRange{});
+
+    rewriter.create<AffineStoreOp>(loc, constant_minus_one, alloc_output_last,
+                                   ValueRange{});
+
+    affine::AffineForOp forOpSignal =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto current_index = forOpSignal.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpSignal.getBody());
+
+    // %prev_index = arith.subi %current_index, %cst_one_index : index
+    // %signal_prev = memref.load %alloc_signal[%prev_index] : memref<10xf64>
+    // %signal_current = affine.load %alloc_signal[%current_index] :
+    // memref<10xf64> %signal_next = affine.load %alloc_signal[%current_index+1]
+    // : memref<10xf64> Q. How can I do this? %height = affine.load
+    // %alloc_height[] : memref<f64>
+
+    AffineExpr ExprForPrev =
+        rewriter.getAffineDimExpr(0) - rewriter.getAffineConstantExpr(1);
+    AffineMap addMapForPrev = AffineMap::get(1, 0, ExprForPrev);
+
+    AffineExpr ExprForNext =
+        rewriter.getAffineDimExpr(0) + rewriter.getAffineConstantExpr(1);
+    AffineMap addMapForNext = AffineMap::get(1, 0, ExprForNext);
+
+    auto signal_prev = rewriter.create<AffineLoadOp>(
+        loc, findPeaks2Diff2MeanOptOpAdaptor.getSignal(), addMapForPrev,
+        ValueRange{current_index});
+    auto signal_current = rewriter.create<affine::AffineLoadOp>(
+        loc, findPeaks2Diff2MeanOptOpAdaptor.getSignal(),
+        ValueRange{current_index});
+    auto signal_next = rewriter.create<AffineLoadOp>(
+        loc, findPeaks2Diff2MeanOptOpAdaptor.getSignal(), addMapForNext,
+        ValueRange{current_index});
+
+    //%cmp_current_prev = arith.cmpf ogt, %signal_current, %signal_prev : f64
+    //%cmp_current_next = arith.cmpf ogt, %signal_current, %signal_next : f64
+    //%cmp_current_height = arith.cmpf oge, %signal_current, %signal_next : f64
+    auto cmp_current_prev = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OGT, signal_current, signal_prev);
+    auto cmp_current_next = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OGT, signal_current, signal_next);
+    auto cmp_current_height = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OGE, signal_current, height);
+
+    //%and_two_cmps = arith.andi %cmp_current_prev, %cmp_current_next : index
+    //%and_three_cmps = arith.andi %and_two_cmps, cmp_current_height : index
+    auto and_two_cmps =
+        rewriter.create<arith::AndIOp>(loc, cmp_current_prev, cmp_current_next);
+    auto and_three_cmps =
+        rewriter.create<arith::AndIOp>(loc, and_two_cmps, cmp_current_height);
+
+    // scf.if %and_three_cmps {
+    auto firstIfOp =
+        rewriter.create<scf::IfOp>(loc, and_three_cmps, false /* else=1 */);
+    rewriter.setInsertionPointToStart(firstIfOp.thenBlock());
+
+    //%peaks_count = affine.load %alloc_peaks_count[] : memref<index>
+    //%cmp_new_peak = arith.cmpi eq, %peaks_count, %cst_zero_index : index
+    auto peaks_count = rewriter.create<affine::AffineLoadOp>(
+        loc, alloc_peaks_count, ValueRange{});
+    auto cmp_new_peak = rewriter.create<arith::CmpIOp>(
+        loc, arith::CmpIPredicate::eq, peaks_count, constant_index_zero);
+
+    // scf.if %cmp_new_peak {
+    //     memref.store %current_index, %alloc_peaks[%peaks_count] :
+    //     memref<10xindex> %peaks_count_inc = arith.addi %peaks_count,
+    //     %cst_one_index : index affine.store %peaks_count_inc,
+    //     %alloc_peaks_count[] : memref<index>
+    // }
+    auto secondIfOp =
+        rewriter.create<scf::IfOp>(loc, cmp_new_peak, true /* else=1 */);
+    rewriter.setInsertionPointToStart(secondIfOp.thenBlock());
+    // index to f64
+    Value current_index_to_ui = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), current_index);
+    Value current_index_to_f64 = rewriter.create<arith::UIToFPOp>(
+        loc, rewriter.getF64Type(), current_index_to_ui);
+    rewriter.create<memref::StoreOp>(loc, current_index_to_f64, alloc_output,
+                                     ValueRange{});
+    rewriter.create<memref::StoreOp>(loc, current_index_to_f64,
+                                     alloc_output_last, ValueRange{});
+
+    auto peaks_count_inc =
+        rewriter.create<arith::AddIOp>(loc, peaks_count, constant_index_one);
+    rewriter.create<AffineStoreOp>(loc, peaks_count_inc, alloc_peaks_count,
+                                   ValueRange{});
+
+    /*
+    else {
+        %last_peaks_count = arith.subi %peaks_count, %cst_one_index : index
+        %last_peak_index = memref.load %alloc_peaks[%last_peaks_count] :
+    memref<10xindex> %subtract_current_index_last_peak = arith.subi
+    %current_index, %last_peak_index : index %cmp_sub_distance = arith.cmpi sge,
+    %subtract_current_index_last_peak, %distance : index
+        */
+    rewriter.setInsertionPointToStart(secondIfOp.elseBlock());
+    // auto last_peak_index = rewriter.create<AffineLoadOp>(loc, alloc_output,
+    // addMapForPrev, ValueRange{peaks_count}); HWISOO: It does not work since
+    // it gives "error: 'affine.load' op index must be a valid dimension or
+    // symbol identifier" here.
+    Value last_peaks_count =
+        rewriter.create<arith::SubIOp>(loc, peaks_count, constant_index_one);
+    auto last_peak_index_fp =
+        rewriter.create<memref::LoadOp>(loc, alloc_output_last, ValueRange{});
+    // f64 to index
+    Value last_peak_index_ui = rewriter.create<arith::FPToUIOp>(
+        loc, rewriter.getIntegerType(32), last_peak_index_fp);
+    Value last_peak_index = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getIndexType(), last_peak_index_ui);
+    Value subtract_current_index_last_peak =
+        rewriter.create<arith::SubIOp>(loc, current_index, last_peak_index);
+    auto cmp_sub_distance = rewriter.create<arith::CmpIOp>(
+        loc, arith::CmpIPredicate::sge, subtract_current_index_last_peak,
+        distance);
+
+    /*
+        scf.if %cmp_sub_distance {
+    memref.store %current_index, %alloc_peaks[%peaks_count] : memref<10xindex>
+    %peaks_count_inc = arith.addi %peaks_count, %cst_one_index : index
+    affine.store %peaks_count_inc, %alloc_peaks_count[] : memref<index>
+            }
+    }
+    */
+    auto thirdIfOp =
+        rewriter.create<scf::IfOp>(loc, cmp_sub_distance, true /* else=1 */);
+    rewriter.setInsertionPointToStart(thirdIfOp.thenBlock());
+    // index to f64
+    Value current_index_to_ui_2 = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), current_index);
+    Value current_index_to_f64_2 = rewriter.create<arith::UIToFPOp>(
+        loc, rewriter.getF64Type(), current_index_to_ui_2);
+    rewriter.create<memref::StoreOp>(loc, current_index_to_f64_2,
+                                     alloc_output_last, ValueRange{});
+    auto peaks_count_inc_2 =
+        rewriter.create<arith::AddIOp>(loc, peaks_count, constant_index_one);
+    rewriter.create<AffineStoreOp>(loc, peaks_count_inc_2, alloc_peaks_count,
+                                   ValueRange{});
+
+    rewriter.setInsertionPointAfter(forOpSignal);
+
+    auto final_loaded_peak_first =
+        rewriter.create<memref::LoadOp>(loc, alloc_output, ValueRange{});
+
+    auto final_loaded_peak_last =
+        rewriter.create<memref::LoadOp>(loc, alloc_output_last, ValueRange{});
+    Value difference = rewriter.create<arith::SubFOp>(
+        loc, final_loaded_peak_last, final_loaded_peak_first);
+    auto peaks_count_final = rewriter.create<affine::AffineLoadOp>(
+        loc, alloc_peaks_count, ValueRange{});
+    // index to f64
+    Value peaks_count_final_to_ui = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), peaks_count_final);
+    Value peaks_count_final_to_f64 = rewriter.create<arith::UIToFPOp>(
+        loc, rewriter.getF64Type(), peaks_count_final_to_ui);
+    Value peaks_count_minus = rewriter.create<arith::AddFOp>(
+        loc, peaks_count_final_to_f64, constant_minus_one);
+
+    Value final_output =
+        rewriter.create<arith::DivFOp>(loc, difference, peaks_count_minus);
+
+    rewriter.create<AffineStoreOp>(loc, final_output, alloc_output,
+                                   ValueRange{});
+
+    rewriter.replaceOp(op, alloc_output);
+
+    return success();
+  }
+};
+
+struct LMS2FindPeaksOptimizedOpLowering : public ConversionPattern {
+  LMS2FindPeaksOptimizedOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::LMS2FindPeaksOptimizedOp::getOperationName(), 1,
+                          ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // Pseudo-code:
+    //  for (int n = 0; n < NUM_SAMPLES; n++) {
+    //      // Calculate the filter output y[n]
+    //      y[n] = 0;
+    //      for (int i = 0; i < FILTER_LENGTH; i++) {
+    //          if (n - i >= 0) { // affine if
+    //              y[n] = y[n] + (w[i] * x[n - i]);
+    //          }
+    //      }
+
+    //     // Calculate the error e[n]
+    //     e[n] = d[n] - y[n];
+
+    //     // Update the filter weights w[i]
+    //     for (int i = 0; i < FILTER_LENGTH; i++) {
+    //         if (n - i >= 0) {
+    //             w[i] +=  MU * e[n] * x[n - i];
+    //         }
+    //     }
+    // }
+
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    auto lhsType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(0).getType());
+
+    ArrayRef<int64_t> lhsShape = lhsType.getShape();
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = MemRefType::get(lhsShape, rewriter.getF64Type());
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    auto memRefTypeOutput = convertTensorToMemRef(tensorType);
+    auto alloc_output = insertAllocAndDealloc(memRefTypeOutput, loc, rewriter);
+
+    auto countMemRefType = MemRefType::get({}, rewriter.getIndexType());
+    auto alloc_peaks_count =
+        insertAllocAndDealloc(countMemRefType, loc, rewriter);
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(lhsType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(lhsType.getRank(), /*Value=*/1);
+
+    typename dsp::LMS2FindPeaksOptimizedOp::Adaptor lfr2fpAdaptor(operands);
+
+    // Value alpha = rewriter.create<arith::ConstantOp>(loc,
+    // rewriter.getF64Type(),
+    //                                                      rewriter.getF64FloatAttr(1));
+    Value zeroval = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    Value mu = rewriter.create<AffineLoadOp>(loc, lfr2fpAdaptor.getMu());
+
+    Value cst_idx_zero = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    Value cst_idx_one = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+    Value constant_minus_one = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(-1));
+
+    // initialization for findPeaks
+    rewriter.create<AffineStoreOp>(loc, cst_idx_zero, alloc_peaks_count,
+                                   ValueRange{});
+
+    auto heightArgType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(4).getType());
+
+    int heightArgShape = heightArgType.getShape().size();
+
+    ValueRange heightValueRange;
+
+    if (heightArgShape == 0)
+      heightValueRange = ValueRange{};
+    else
+      heightValueRange = ValueRange{cst_idx_zero};
+
+    auto distanceArgType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(5).getType());
+
+    int distanceArgShape = distanceArgType.getShape().size();
+
+    ValueRange distanceValueRange;
+
+    if (distanceArgShape == 0)
+      distanceValueRange = ValueRange{};
+    else
+      distanceValueRange = ValueRange{cst_idx_zero};
+
+    auto distance_fp = rewriter.create<affine::AffineLoadOp>(
+        loc, lfr2fpAdaptor.getDistance(), distanceValueRange);
+    Value distance_ui = rewriter.create<arith::FPToUIOp>(
+        loc, rewriter.getIntegerType(32), distance_fp);
+    Value distance = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getIndexType(), distance_ui);
+
+    auto height = rewriter.create<affine::AffineLoadOp>(
+        loc, lfr2fpAdaptor.getHeight(), heightValueRange);
+
+    affine::AffineForOp forOpInit =
+        rewriter.create<AffineForOp>(loc, 0, tensorType.getShape()[0], 1);
+    auto init_iter = forOpInit.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpInit.getBody());
+
+    rewriter.create<AffineStoreOp>(loc, constant_minus_one, alloc_output,
+                                   ValueRange{init_iter});
+
+    rewriter.setInsertionPointAfter(forOpInit);
+
+    // unrolled two iterations.
+    int64_t lb = 0;
+    int64_t step = 1;
+
+    Value GetFilterLOp = op->getOperand(3);
+    dsp::ConstantOp constantOp3rdArg =
+        GetFilterLOp.getDefiningOp<dsp::ConstantOp>();
+    DenseElementsAttr constant3rdValue = constantOp3rdArg.getValue();
+
+    auto elements1 = constant3rdValue.getValues<FloatAttr>();
+    float filterlenval = elements1[0].getValueAsDouble();
+    auto FilterLength = (uint64_t)filterlenval;
+
+    int64_t numSamples = lhsType.getShape()[0];
+
+    auto yMemRefType = MemRefType::get({numSamples}, rewriter.getF64Type());
+    // auto wAlloc = rewriter.create<memref::AllocOp>(loc, yMemRefType);
+    auto wAlloc = insertAllocAndDealloc(yMemRefType, loc, rewriter);
+
+    // For affine expression: #map1 = affine_map<(%arg0)[] : (%arg0 - 1)
+    AffineExpr d0, d1, s0;
+    bindDims(rewriter.getContext(), d0, d1);
+    // AffineExpr ExprForXSlice = rewriter.getAffineDimExpr(0) -
+    // rewriter.getAffineDimExpr(1); //d0 - d1;
+    AffineExpr ExprForXSlice = d0 - d1;
+    AffineMap addMapForLMSFilter = AffineMap::get(2, 0, ExprForXSlice);
+    IntegerSet set1 = IntegerSet::get(2, 0, {ExprForXSlice}, {false});
+
+    {
+
+      // w[n] = 0;
+      // y[n] = 0;
+      // rewriter.create<AffineStoreOp>(loc, zeroval, alloc, ValueRange{iv});
+      // Allocate and initialize array for y
+      // Value constantIndx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+      rewriter.create<AffineStoreOp>(loc, zeroval, wAlloc,
+                                     ValueRange{cst_idx_zero});
+      rewriter.create<AffineStoreOp>(loc, zeroval, alloc,
+                                     ValueRange{cst_idx_zero});
+
+      affine::AffineForOp forOp2 =
+          rewriter.create<AffineForOp>(loc, lb, FilterLength, step);
+      auto iv2 = forOp2.getInductionVar();
+
+      rewriter.setInsertionPointToStart(forOp2.getBody());
+
+      auto ifOp = rewriter.create<affine::AffineIfOp>(
+          loc, set1, ValueRange{cst_idx_zero, iv2}, false /*no else*/);
+      rewriter.setInsertionPointToStart(ifOp.getThenBlock());
+
+      Value inputX = rewriter.create<AffineLoadOp>(
+          loc, lfr2fpAdaptor.getLhs(), addMapForLMSFilter,
+          ValueRange{cst_idx_zero, iv2});
+      Value w = rewriter.create<AffineLoadOp>(loc, wAlloc,
+                                              ValueRange{iv2}); // memRefType
+
+      Value wmulx = rewriter.create<arith::MulFOp>(loc, inputX, w);
+      Value ybefore =
+          rewriter.create<AffineLoadOp>(loc, alloc, ValueRange{cst_idx_zero});
+      Value sumNext = rewriter.create<arith::AddFOp>(loc, wmulx, ybefore);
+      rewriter.create<AffineStoreOp>(loc, sumNext, alloc,
+                                     ValueRange{cst_idx_zero});
+      rewriter.setInsertionPointAfter(ifOp);
+      rewriter.setInsertionPointAfter(forOp2);
+
+      //  get e[n] = d[n] - y[n]
+
+      Value desiredX = rewriter.create<AffineLoadOp>(
+          loc, lfr2fpAdaptor.getRhs(), ValueRange{cst_idx_zero});
+      Value ynew =
+          rewriter.create<AffineLoadOp>(loc, alloc, ValueRange{cst_idx_zero});
+
+      Value err = rewriter.create<arith::SubFOp>(loc, desiredX, ynew);
+
+      affine::AffineForOp forOp3 =
+          rewriter.create<AffineForOp>(loc, lb, FilterLength, step);
+      auto iv3 = forOp3.getInductionVar();
+
+      rewriter.setInsertionPointToStart(forOp3.getBody());
+
+      auto ifOp2 = rewriter.create<affine::AffineIfOp>(
+          loc, set1, ValueRange{cst_idx_zero, iv3}, false /*no else*/);
+      rewriter.setInsertionPointToStart(ifOp2.getThenBlock());
+
+      Value inputX2 = rewriter.create<AffineLoadOp>(
+          loc, lfr2fpAdaptor.getLhs(), addMapForLMSFilter,
+          ValueRange{cst_idx_zero, iv3});
+
+      Value Prevw2 =
+          rewriter.create<AffineLoadOp>(loc, wAlloc, ValueRange{iv3});
+
+      // f(u(n),e(n),μ)=μe(n)u∗(n)
+      Value mul1 = rewriter.create<arith::MulFOp>(loc, err, inputX2);
+      Value mul2 = rewriter.create<arith::MulFOp>(loc, mu, mul1);
+
+      // FInal w[n]
+      Value answer = rewriter.create<arith::AddFOp>(loc, Prevw2, mul2);
+
+      rewriter.create<AffineStoreOp>(loc, answer, wAlloc, ValueRange{iv3});
+
+      rewriter.setInsertionPointAfter(ifOp2);
+      rewriter.setInsertionPointAfter(forOp3);
+    }
+
+    {
+      // w[n] = 0;
+      // y[n] = 0;
+      // rewriter.create<AffineStoreOp>(loc, zeroval, alloc, ValueRange{iv});
+      // Allocate and initialize array for y
+      // Value constantIndx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+      rewriter.create<AffineStoreOp>(loc, zeroval, wAlloc,
+                                     ValueRange{cst_idx_one});
+      rewriter.create<AffineStoreOp>(loc, zeroval, alloc,
+                                     ValueRange{cst_idx_one});
+
+      affine::AffineForOp forOp2 =
+          rewriter.create<AffineForOp>(loc, lb, FilterLength, step);
+      auto iv2 = forOp2.getInductionVar();
+
+      rewriter.setInsertionPointToStart(forOp2.getBody());
+
+      auto ifOp = rewriter.create<affine::AffineIfOp>(
+          loc, set1, ValueRange{cst_idx_one, iv2}, false /*no else*/);
+      rewriter.setInsertionPointToStart(ifOp.getThenBlock());
+
+      Value inputX = rewriter.create<AffineLoadOp>(
+          loc, lfr2fpAdaptor.getLhs(), addMapForLMSFilter,
+          ValueRange{cst_idx_one, iv2});
+      Value w = rewriter.create<AffineLoadOp>(loc, wAlloc,
+                                              ValueRange{iv2}); // memRefType
+
+      Value wmulx = rewriter.create<arith::MulFOp>(loc, inputX, w);
+      Value ybefore =
+          rewriter.create<AffineLoadOp>(loc, alloc, ValueRange{cst_idx_one});
+      Value sumNext = rewriter.create<arith::AddFOp>(loc, wmulx, ybefore);
+      rewriter.create<AffineStoreOp>(loc, sumNext, alloc,
+                                     ValueRange{cst_idx_one});
+      rewriter.setInsertionPointAfter(ifOp);
+      rewriter.setInsertionPointAfter(forOp2);
+
+      //  get e[n] = d[n] - y[n]
+
+      Value desiredX = rewriter.create<AffineLoadOp>(
+          loc, lfr2fpAdaptor.getRhs(), ValueRange{cst_idx_one});
+      Value ynew =
+          rewriter.create<AffineLoadOp>(loc, alloc, ValueRange{cst_idx_one});
+
+      Value err = rewriter.create<arith::SubFOp>(loc, desiredX, ynew);
+
+      affine::AffineForOp forOp3 =
+          rewriter.create<AffineForOp>(loc, lb, FilterLength, step);
+      auto iv3 = forOp3.getInductionVar();
+
+      rewriter.setInsertionPointToStart(forOp3.getBody());
+
+      auto ifOp2 = rewriter.create<affine::AffineIfOp>(
+          loc, set1, ValueRange{cst_idx_one, iv3}, false /*no else*/);
+      rewriter.setInsertionPointToStart(ifOp2.getThenBlock());
+
+      Value inputX2 = rewriter.create<AffineLoadOp>(
+          loc, lfr2fpAdaptor.getLhs(), addMapForLMSFilter,
+          ValueRange{cst_idx_one, iv3});
+
+      Value Prevw2 =
+          rewriter.create<AffineLoadOp>(loc, wAlloc, ValueRange{iv3});
+
+      // f(u(n),e(n),μ)=μe(n)u∗(n)
+      Value mul1 = rewriter.create<arith::MulFOp>(loc, err, inputX2);
+      Value mul2 = rewriter.create<arith::MulFOp>(loc, mu, mul1);
+
+      // FInal w[n]
+      Value answer = rewriter.create<arith::AddFOp>(loc, Prevw2, mul2);
+
+      rewriter.create<AffineStoreOp>(loc, answer, wAlloc, ValueRange{iv3});
+
+      rewriter.setInsertionPointAfter(ifOp2);
+      rewriter.setInsertionPointAfter(forOp3);
+    }
+
+    // Outer for loop -- iterate from 2 to last
+    int64_t lb_outer = 2;
+
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb_outer, numSamples, step);
+    auto iv = forOp1.getInductionVar();
+
+    rewriter.setInsertionPointToStart(forOp1.getBody());
+    // w[n] = 0;
+    // y[n] = 0;
+    // rewriter.create<AffineStoreOp>(loc, zeroval, alloc, ValueRange{iv});
+    // Allocate and initialize array for y
+    // Value constantIndx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+
+    rewriter.create<AffineStoreOp>(loc, zeroval, wAlloc, ValueRange{iv});
+    rewriter.create<AffineStoreOp>(loc, zeroval, alloc, ValueRange{iv});
+
+    affine::AffineForOp forOp2 =
+        rewriter.create<AffineForOp>(loc, lb, FilterLength, step);
+    auto iv2 = forOp2.getInductionVar();
+
+    rewriter.setInsertionPointToStart(forOp2.getBody());
+
+    auto ifOp = rewriter.create<affine::AffineIfOp>(
+        loc, set1, ValueRange{iv, iv2}, false /*no else*/);
+    rewriter.setInsertionPointToStart(ifOp.getThenBlock());
+
+    Value inputX = rewriter.create<AffineLoadOp>(
+        loc, lfr2fpAdaptor.getLhs(), addMapForLMSFilter, ValueRange{iv, iv2});
+    Value w = rewriter.create<AffineLoadOp>(loc, wAlloc,
+                                            ValueRange{iv2}); // memRefType
+
+    Value wmulx = rewriter.create<arith::MulFOp>(loc, inputX, w);
+    Value ybefore = rewriter.create<AffineLoadOp>(loc, alloc, ValueRange{iv});
+    Value sumNext = rewriter.create<arith::AddFOp>(loc, wmulx, ybefore);
+    rewriter.create<AffineStoreOp>(loc, sumNext, alloc, ValueRange{iv});
+    rewriter.setInsertionPointAfter(ifOp);
+    rewriter.setInsertionPointAfter(forOp2);
+
+    //  get e[n] = d[n] - y[n]
+
+    Value desiredX = rewriter.create<AffineLoadOp>(loc, lfr2fpAdaptor.getRhs(),
+                                                   ValueRange{iv});
+    Value ynew = rewriter.create<AffineLoadOp>(loc, alloc, ValueRange{iv});
+
+    Value err = rewriter.create<arith::SubFOp>(loc, desiredX, ynew);
+
+    affine::AffineForOp forOp3 =
+        rewriter.create<AffineForOp>(loc, lb, FilterLength, step);
+    auto iv3 = forOp3.getInductionVar();
+
+    rewriter.setInsertionPointToStart(forOp3.getBody());
+
+    auto ifOp2 = rewriter.create<affine::AffineIfOp>(
+        loc, set1, ValueRange{iv, iv3}, false /*no else*/);
+    rewriter.setInsertionPointToStart(ifOp2.getThenBlock());
+
+    Value inputX2 = rewriter.create<AffineLoadOp>(
+        loc, lfr2fpAdaptor.getLhs(), addMapForLMSFilter, ValueRange{iv, iv3});
+
+    Value Prevw2 = rewriter.create<AffineLoadOp>(loc, wAlloc, ValueRange{iv3});
+
+    // f(u(n),e(n),μ)=μe(n)u∗(n)
+    Value mul1 = rewriter.create<arith::MulFOp>(loc, err, inputX2);
+    Value mul2 = rewriter.create<arith::MulFOp>(loc, mu, mul1);
+
+    // FInal w[n]
+    Value answer = rewriter.create<arith::AddFOp>(loc, Prevw2, mul2);
+
+    rewriter.create<AffineStoreOp>(loc, answer, wAlloc, ValueRange{iv3});
+    rewriter.setInsertionPointAfter(ifOp2);
+    rewriter.setInsertionPointAfter(forOp3);
+
+    // HERE WE SHOULD INSERT FIND_PEAKS FOR FUSING LOOP
+
+    AffineExpr ExprForPrev =
+        rewriter.getAffineDimExpr(0) - rewriter.getAffineConstantExpr(2);
+    AffineMap addMapForPrev = AffineMap::get(1, 0, ExprForPrev);
+
+    AffineExpr ExprForCurrent =
+        rewriter.getAffineDimExpr(0) - rewriter.getAffineConstantExpr(1);
+    AffineMap addMapForCurrent = AffineMap::get(1, 0, ExprForCurrent);
+
+    auto signal_prev = rewriter.create<AffineLoadOp>(loc, alloc, addMapForPrev,
+                                                     ValueRange{iv});
+    auto signal_current = rewriter.create<affine::AffineLoadOp>(
+        loc, alloc, addMapForCurrent, ValueRange{iv});
+    auto signal_next =
+        rewriter.create<AffineLoadOp>(loc, alloc, ValueRange{iv});
+
+    auto cmp_current_prev = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OGT, signal_current, signal_prev);
+    auto cmp_current_next = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OGT, signal_current, signal_next);
+    auto cmp_current_height = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OGE, signal_current, height);
+
+    auto and_two_cmps =
+        rewriter.create<arith::AndIOp>(loc, cmp_current_prev, cmp_current_next);
+    auto and_three_cmps =
+        rewriter.create<arith::AndIOp>(loc, and_two_cmps, cmp_current_height);
+
+    auto firstIfOp =
+        rewriter.create<scf::IfOp>(loc, and_three_cmps, false /* else=1 */);
+    rewriter.setInsertionPointToStart(firstIfOp.thenBlock());
+
+    auto peaks_count = rewriter.create<affine::AffineLoadOp>(
+        loc, alloc_peaks_count, ValueRange{});
+    auto cmp_new_peak = rewriter.create<arith::CmpIOp>(
+        loc, arith::CmpIPredicate::eq, peaks_count, cst_idx_zero);
+
+    auto current_index = rewriter.create<arith::SubIOp>(loc, iv, cst_idx_one);
+
+    auto secondIfOp =
+        rewriter.create<scf::IfOp>(loc, cmp_new_peak, true /* else=1 */);
+    rewriter.setInsertionPointToStart(secondIfOp.thenBlock());
+    Value current_index_to_ui = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), current_index);
+    Value current_index_to_f64 = rewriter.create<arith::UIToFPOp>(
+        loc, rewriter.getF64Type(), current_index_to_ui);
+    rewriter.create<memref::StoreOp>(loc, current_index_to_f64, alloc_output,
+                                     ValueRange{peaks_count});
+    auto peaks_count_inc =
+        rewriter.create<arith::AddIOp>(loc, peaks_count, cst_idx_one);
+    rewriter.create<AffineStoreOp>(loc, peaks_count_inc, alloc_peaks_count,
+                                   ValueRange{});
+
+    rewriter.setInsertionPointToStart(secondIfOp.elseBlock());
+
+    Value last_peaks_count =
+        rewriter.create<arith::SubIOp>(loc, peaks_count, cst_idx_one);
+    auto last_peak_index_fp = rewriter.create<memref::LoadOp>(
+        loc, alloc_output, ValueRange{last_peaks_count});
+    Value last_peak_index_ui = rewriter.create<arith::FPToUIOp>(
+        loc, rewriter.getIntegerType(32), last_peak_index_fp);
+    Value last_peak_index = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getIndexType(), last_peak_index_ui);
+    Value subtract_current_index_last_peak =
+        rewriter.create<arith::SubIOp>(loc, current_index, last_peak_index);
+    auto cmp_sub_distance = rewriter.create<arith::CmpIOp>(
+        loc, arith::CmpIPredicate::sge, subtract_current_index_last_peak,
+        distance);
+
+    auto thirdIfOp =
+        rewriter.create<scf::IfOp>(loc, cmp_sub_distance, true /* else=1 */);
+    rewriter.setInsertionPointToStart(thirdIfOp.thenBlock());
+    Value current_index_to_ui_2 = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), current_index);
+    Value current_index_to_f64_2 = rewriter.create<arith::UIToFPOp>(
+        loc, rewriter.getF64Type(), current_index_to_ui_2);
+    rewriter.create<memref::StoreOp>(loc, current_index_to_f64_2, alloc_output,
+                                     ValueRange{peaks_count});
+    auto peaks_count_inc_2 =
+        rewriter.create<arith::AddIOp>(loc, peaks_count, cst_idx_one);
+    rewriter.create<AffineStoreOp>(loc, peaks_count_inc_2, alloc_peaks_count,
+                                   ValueRange{});
+
+    rewriter.setInsertionPointAfter(forOp1);
+    // debug
+    //  forOp1->dump();
+
+    /* Setting last element of the output as the count of peaks. */
+    auto peaks_count_final = rewriter.create<affine::AffineLoadOp>(
+        loc, alloc_peaks_count, ValueRange{});
+    // index to f64
+    Value peaks_count_final_to_ui = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), peaks_count_final);
+    Value peaks_count_final_to_f64 = rewriter.create<arith::UIToFPOp>(
+        loc, rewriter.getF64Type(), peaks_count_final_to_ui);
+
+    Value result_size = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getIndexType(),
+        rewriter.getIndexAttr(tensorType.getShape()[0]));
+
+    rewriter.create<AffineStoreOp>(loc, peaks_count_final_to_f64, alloc_output,
+                                   addMapForCurrent, ValueRange{result_size});
+
+    // auto testValue = rewriter.create<affine::AffineLoadOp>(
+    // loc, alloc, ValueRange{cst_idx_zero});
+
+    // rewriter.create<AffineStoreOp>(loc, testValue, alloc_output,
+    // addMapForCurrent, ValueRange{result_size});
+
+    rewriter.replaceOp(op, alloc_output);
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: Unary operations
+//===----------------------------------------------------------------------===//
+
+template <typename UnaryOp, typename LoweredUnaryOp>
+struct UnaryOpLowering : public ConversionPattern {
+  UnaryOpLowering(MLIRContext *ctx)
+      : ConversionPattern(UnaryOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+    lowerOpToLoops(op, operands, rewriter,
+                   [loc](OpBuilder &builder, ValueRange memRefOperands,
+                         ValueRange loopIvs) {
+                     // Generate an adaptor for the remapped operands of the
+                     // UnaryOp. This allows for using the nice named accessors
+                     // that are generated by the ODS.
+                     typename UnaryOp::Adaptor unaryAdaptor(memRefOperands);
+
+                     // Generate loads for the element of 'lhs' and 'rhs' at the
+                     // inner loop.
+                     auto loadedInput = builder.create<affine::AffineLoadOp>(
+                         loc, unaryAdaptor.getInput(), loopIvs);
+
+                     // Create the unary operation performed on the loaded
+                     // values.
+                     return builder.create<LoweredUnaryOp>(loc, loadedInput);
+                   });
+    return success();
+  }
+};
+
+using AddOpLowering = BinaryOpLowering<dsp::AddOp, arith::AddFOp>;
+using ModuloOpLowering = BinaryOpLowering<dsp::ModuloOp, arith::RemFOp>;
+using SubOpLowering = BinaryOpLowering<dsp::SubOp, arith::SubFOp>;
+using MulOpLowering = BinaryOpLowering<dsp::MulOp, arith::MulFOp>;
+using DivOpLowering = BinaryOpLowering<dsp::DivOp, arith::DivFOp>;
+using AbsOpLowering = UnaryOpLowering<dsp::AbsOp, math::AbsFOp>;
+using SinOpLowering = UnaryOpLowering<dsp::SinOp, math::SinOp>;
+using CosOpLowering = UnaryOpLowering<dsp::CosOp, math::CosOp>;
+using SqrtOpLowering = UnaryOpLowering<dsp::SqrtOp, math::SqrtOp>;
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: Constant operations
+//===----------------------------------------------------------------------===//
+
+struct ConstantOpLowering : public OpRewritePattern<dsp::ConstantOp> {
+  using OpRewritePattern<dsp::ConstantOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(dsp::ConstantOp op,
+                                PatternRewriter &rewriter) const final {
+    DenseElementsAttr constantValue = op.getValue();
+    Location loc = op.getLoc();
+
+    // When lowering the constant operation, we allocate and assign the constant
+    // values to a corresponding memref allocation.
+    auto tensorType = llvm::cast<RankedTensorType>(op.getType());
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // We will be generating constant indices up-to the largest dimension.
+    // Create these constants up-front to avoid large amounts of redundant
+    // operations.
+    auto valueShape = memRefType.getShape();
+    SmallVector<Value, 8> constantIndices;
+
+    if (!valueShape.empty()) {
+      for (auto i : llvm::seq<int64_t>(
+               0, *std::max_element(valueShape.begin(), valueShape.end())))
+        constantIndices.push_back(
+            rewriter.create<arith::ConstantIndexOp>(loc, i));
+    } else {
+      // This is the case of a tensor of rank 0.
+      constantIndices.push_back(
+          rewriter.create<arith::ConstantIndexOp>(loc, 0));
+    }
+
+    // The constant operation represents a multi-dimensional constant, so we
+    // will need to generate a store for each of the elements. The following
+    // functor recursively walks the dimensions of the constant shape,
+    // generating a store when the recursion hits the base case.
+    SmallVector<Value, 2> indices;
+    auto valueIt = constantValue.value_begin<FloatAttr>();
+    std::function<void(uint64_t)> storeElements = [&](uint64_t dimension) {
+      // The last dimension is the base case of the recursion, at this point
+      // we store the element at the given index.
+      if (dimension == valueShape.size()) {
+        rewriter.create<affine::AffineStoreOp>(
+            loc, rewriter.create<arith::ConstantOp>(loc, *valueIt++), alloc,
+            llvm::ArrayRef(indices));
+        return;
+      }
+
+      // Otherwise, iterate over the current dimension and add the indices to
+      // the list.
+      for (uint64_t i = 0, e = valueShape[dimension]; i != e; ++i) {
+        indices.push_back(constantIndices[i]);
+        storeElements(dimension + 1);
+        indices.pop_back();
+      }
+    };
+
+    // Start the element storing recursion from the first dimension.
+    storeElements(/*dimension=*/0);
+
+    // Replace this operation with the generated alloc.
+    rewriter.replaceOp(op, alloc);
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: Func operations
+//===----------------------------------------------------------------------===//
+
+struct FuncOpLowering : public OpConversionPattern<dsp::FuncOp> {
+  using OpConversionPattern<dsp::FuncOp>::OpConversionPattern;
+
+  LogicalResult
+  matchAndRewrite(dsp::FuncOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const final {
+    // We only lower the main function as we expect that all other functions
+    // have been inlined.
+    if (op.getName() != "main")
+      return failure();
+
+    // Verify that the given main has no inputs and results.
+    if (op.getNumArguments() || op.getFunctionType().getNumResults()) {
+      return rewriter.notifyMatchFailure(op, [](Diagnostic &diag) {
+        diag << "expected 'main' to have 0 inputs and 0 results";
+      });
+    }
+
+    // Create a new non-dsp function, with the same region.
+    auto func = rewriter.create<mlir::func::FuncOp>(op.getLoc(), op.getName(),
+                                                    op.getFunctionType());
+    rewriter.inlineRegionBefore(op.getRegion(), func.getBody(), func.end());
+    rewriter.eraseOp(op);
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: Print operations
+//===----------------------------------------------------------------------===//
+
+/* struct PrintOpLowering : public OpConversionPattern<dsp::PrintOp> {
+  using OpConversionPattern<dsp::PrintOp>::OpConversionPattern;
+
+  LogicalResult
+  matchAndRewrite(dsp::PrintOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const final {
+    // We don't lower "dsp.print" in this pass, but we need to update its
+    // operands.
+    rewriter.modifyOpInPlace(op,
+                             [&] { op->setOperands(adaptor.getOperands()); });
+    return success();
+  }
+};
+*/
+
+/// Lowers `dsp.print` to a loop nest calling `printf` on each of the individual
+/// elements of the array.
+//class PrintOpLowering : public ConversionPattern {
+struct PrintOpLowering : public OpConversionPattern<dsp::PrintOp> {
+using OpConversionPattern<dsp::PrintOp>::OpConversionPattern;
+
+//public:
+  //explicit PrintOpLowering(MLIRContext *context)
+  //    : OpConversionPattern<dsp::PrintOp>(dsp::PrintOp::getOperationName(), 1, context) {}
+
+  //LogicalResult
+  //matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+  //                ConversionPatternRewriter &rewriter) const override {
+
+    LogicalResult
+    matchAndRewrite(dsp::PrintOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const final {
+
+    rewriter.modifyOpInPlace(op,
+                             [&] { op->setOperands(adaptor.getOperands()); });
+					  
+    auto *context = rewriter.getContext();
+    auto memRefType = llvm::cast<MemRefType>((*op->operand_type_begin()));
+    auto memRefShape = memRefType.getShape();
+    auto loc = op->getLoc();
+
+    ModuleOp parentModule = op->getParentOfType<ModuleOp>();
+
+    // Get a symbol reference to the printf function, inserting it if necessary.
+    auto printfRef = getOrInsertPrintf(rewriter, parentModule);
+    Value formatSpecifierCst = getOrCreateGlobalString(
+        loc, rewriter, "frmt_spec", StringRef("%f \0", 4), parentModule);
+    Value newLineCst = getOrCreateGlobalString(
+        loc, rewriter, "nl", StringRef("\n\0", 2), parentModule);
+
+    // Create a loop for each of the dimensions within the shape.
+    SmallVector<Value, 4> loopIvs;
+    for (unsigned i = 0, e = memRefShape.size(); i != e; ++i) {
+      auto lowerBound = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+      auto upperBound =
+          rewriter.create<arith::ConstantIndexOp>(loc, memRefShape[i]);
+      auto step = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+      auto loop =
+          rewriter.create<scf::ForOp>(loc, lowerBound, upperBound, step);
+      for (Operation &nested : *loop.getBody())
+        rewriter.eraseOp(&nested);
+      loopIvs.push_back(loop.getInductionVar());
+
+      // Terminate the loop body.
+      rewriter.setInsertionPointToEnd(loop.getBody());
+
+      // Insert a newline after each of the inner dimensions of the shape.
+      if (i != e - 1)
+        rewriter.create<LLVM::CallOp>(loc, getPrintfType(context), printfRef,
+                                      newLineCst);
+      rewriter.create<scf::YieldOp>(loc);
+      rewriter.setInsertionPointToStart(loop.getBody());
+    }
+
+    // Generate a call to printf for the current element of the loop.
+    auto printOp = cast<dsp::PrintOp>(op);
+    auto elementLoad =
+        rewriter.create<memref::LoadOp>(loc, printOp.getInput(), loopIvs);
+    rewriter.create<LLVM::CallOp>(
+        loc, getPrintfType(context), printfRef,
+        ArrayRef<Value>({formatSpecifierCst, elementLoad}));
+
+    // Notify the rewriter that this operation has been removed.
+    rewriter.eraseOp(op);
+    return success();
+  }
+
+//private:
+  /// Create a function declaration for printf, the signature is:
+  ///   * `i32 (i8*, ...)`
+  static LLVM::LLVMFunctionType getPrintfType(MLIRContext *context) {
+    auto llvmI32Ty = IntegerType::get(context, 32);
+    auto llvmPtrTy = LLVM::LLVMPointerType::get(context);
+    auto llvmFnType = LLVM::LLVMFunctionType::get(llvmI32Ty, llvmPtrTy,
+                                                  /*isVarArg=*/true);
+    return llvmFnType;
+  }
+
+  /// Return a symbol reference to the printf function, inserting it into the
+  /// module if necessary.
+  static FlatSymbolRefAttr getOrInsertPrintf(PatternRewriter &rewriter,
+                                             ModuleOp module) {
+    auto *context = module.getContext();
+    if (module.lookupSymbol<LLVM::LLVMFuncOp>("printf"))
+      return SymbolRefAttr::get(context, "printf");
+
+    // Insert the printf function into the body of the parent module.
+    PatternRewriter::InsertionGuard insertGuard(rewriter);
+    rewriter.setInsertionPointToStart(module.getBody());
+    rewriter.create<LLVM::LLVMFuncOp>(module.getLoc(), "printf",
+                                      getPrintfType(context));
+    return SymbolRefAttr::get(context, "printf");
+  }
+
+  /// Return a value representing an access into a global string with the given
+  /// name, creating the string if necessary.
+  static Value getOrCreateGlobalString(Location loc, OpBuilder &builder,
+                                       StringRef name, StringRef value,
+                                       ModuleOp module) {
+    // Create the global at the entry of the module.
+    LLVM::GlobalOp global;
+    if (!(global = module.lookupSymbol<LLVM::GlobalOp>(name))) {
+      OpBuilder::InsertionGuard insertGuard(builder);
+      builder.setInsertionPointToStart(module.getBody());
+      auto type = LLVM::LLVMArrayType::get(
+          IntegerType::get(builder.getContext(), 8), value.size());
+      global = builder.create<LLVM::GlobalOp>(loc, type, /*isConstant=*/true,
+                                              LLVM::Linkage::Internal, name,
+                                              builder.getStringAttr(value),
+                                              /*alignment=*/0);
+    }
+
+    // Get the pointer to the first character in the global string.
+    Value globalPtr = builder.create<LLVM::AddressOfOp>(loc, global);
+    Value cst0 = builder.create<LLVM::ConstantOp>(loc, builder.getI64Type(),
+                                                  builder.getIndexAttr(0));
+    return builder.create<LLVM::GEPOp>(
+        loc, LLVM::LLVMPointerType::get(builder.getContext()), global.getType(),
+        globalPtr, ArrayRef<Value>({cst0, cst0}));
+  }
+};
+
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: Return operations
+//===----------------------------------------------------------------------===//
+
+struct ReturnOpLowering : public OpRewritePattern<dsp::ReturnOp> {
+  using OpRewritePattern<dsp::ReturnOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(dsp::ReturnOp op,
+                                PatternRewriter &rewriter) const final {
+    // During this lowering, we expect that all function calls have been
+    // inlined.
+    if (op.hasOperand())
+      return failure();
+
+    // We lower "dsp.return" directly to "func.return".
+    rewriter.replaceOpWithNewOp<func::ReturnOp>(op);
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: Transpose operations
+//===----------------------------------------------------------------------===//
+
+struct TransposeOpLowering : public ConversionPattern {
+  TransposeOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::TransposeOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+    lowerOpToLoops(op, operands, rewriter,
+                   [loc](OpBuilder &builder, ValueRange memRefOperands,
+                         ValueRange loopIvs) {
+                     // Generate an adaptor for the remapped operands of the
+                     // TransposeOp. This allows for using the nice named
+                     // accessors that are generated by the ODS.
+                     dsp::TransposeOpAdaptor transposeAdaptor(memRefOperands);
+                     Value input = transposeAdaptor.getInput();
+
+                     // Transpose the elements by generating a load from the
+                     // reverse indices.
+                     SmallVector<Value, 2> reverseIvs(llvm::reverse(loopIvs));
+                     return builder.create<affine::AffineLoadOp>(loc, input,
+                                                                 reverseIvs);
+                   });
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: Transpose operations
+//===----------------------------------------------------------------------===//
+
+struct Conv2DOpLowering : public ConversionPattern {
+  Conv2DOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::Conv2DOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+
+    auto loc = op->getLoc();
+    // output mem alloc and dealloc
+    auto output = llvm::dyn_cast<RankedTensorType>((*op->result_type_begin()));
+    auto outputMem = convertTensorToMemRef(output);
+    auto alloc = insertAllocAndDealloc(outputMem, loc, rewriter);
+
+    Conv2DOpAdaptor conv2dAdaptor(operands);
+    Value input = conv2dAdaptor.getInput();
+    Value kernel = conv2dAdaptor.getKernel();
+
+    // ranked tensor type
+    auto inputType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(0).getType());
+    auto kernelType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(1).getType());
+
+    ArrayRef<int64_t> inputShape = inputType.getShape();
+    ArrayRef<int64_t> kernelShape = kernelType.getShape();
+
+    // input layout
+    int64_t IH = inputShape[0];
+    int64_t IW = inputShape[1];
+
+    // kernel layout
+    int64_t KH = kernelShape[0];
+    int64_t KW = kernelShape[1];
+
+    // output layout
+    ArrayRef<int64_t> outputShape = output.getShape();
+    int64_t OH = outputShape[0];
+    int64_t OW = outputShape[1];
+
+    AffineExpr d0, d1, d2, d3; // declare affine expression: i, j, p, q
+    bindDims(
+        rewriter.getContext(), d0, d1, d2,
+        d3); // bind affine expr d0, d1 to current input dimension i, j, p, q
+
+    // input affine map
+    AffineMap inputMap = AffineMap::get(
+        4, 0, ArrayRef<AffineExpr>{d0 + d2, d1 + d3}, rewriter.getContext());
+    // kernel affine map
+    AffineMap kernelMap = AffineMap::get(4, 0, ArrayRef<AffineExpr>{d2, d3},
+                                         rewriter.getContext());
+
+    // loops
+    int64_t lb = 0, step = 1;
+    /* looping i*/
+    AffineForOp forOpI = rewriter.create<AffineForOp>(loc, lb, OH, step);
+    rewriter.setInsertionPointToStart(forOpI.getBody());
+    auto ivI = forOpI.getInductionVar();
+
+    /* looping j*/
+    AffineForOp forOpJ = rewriter.create<AffineForOp>(loc, lb, OW, step);
+    rewriter.setInsertionPointToStart(forOpJ.getBody());
+    auto ivJ = forOpJ.getInductionVar();
+
+    // initilize output val
+    Value zeroVal = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    rewriter.create<AffineStoreOp>(loc, zeroVal, alloc, ValueRange{ivI, ivJ});
+
+    /* looping p*/
+    AffineForOp forOpP = rewriter.create<AffineForOp>(loc, lb, KH, step);
+    rewriter.setInsertionPointToStart(forOpP.getBody());
+    auto ivP = forOpP.getInductionVar();
+
+    /* looping q*/
+    AffineForOp forOpQ = rewriter.create<AffineForOp>(loc, lb, KW, step);
+    rewriter.setInsertionPointToStart(forOpQ.getBody());
+    auto ivQ = forOpQ.getInductionVar();
+
+    // input bound check
+    Value inputRow = rewriter.create<AffineApplyOp>(
+        loc, inputMap.getSubMap(0), ValueRange{ivI, ivJ, ivP, ivQ});
+    Value inputCol = rewriter.create<AffineApplyOp>(
+        loc, inputMap.getSubMap(1), ValueRange{ivI, ivJ, ivP, ivQ});
+    Value rowUB = rewriter.create<arith::CmpIOp>(
+        loc, arith::CmpIPredicate::slt, inputRow,
+        rewriter.create<arith::ConstantIndexOp>(loc, IH));
+    Value colUB = rewriter.create<arith::CmpIOp>(
+        loc, arith::CmpIPredicate::slt, inputCol,
+        rewriter.create<arith::ConstantIndexOp>(loc, IW));
+    Value bound = rewriter.create<arith::AndIOp>(loc, rowUB, colUB);
+
+    // bound condition
+    rewriter.create<scf::IfOp>(
+        loc, bound, [&](OpBuilder &builder, Location loc) {
+          // load input
+          Value inputVal = builder.create<AffineLoadOp>(
+              loc, input, inputMap, ValueRange{ivI, ivJ, ivP, ivQ});
+          Value kernelVal = builder.create<AffineLoadOp>(
+              loc, kernel, kernelMap, ValueRange{ivI, ivJ, ivP, ivQ});
+          // mul
+          Value prod = builder.create<arith::MulFOp>(loc, inputVal, kernelVal);
+          Value outputVal =
+              builder.create<AffineLoadOp>(loc, alloc, ValueRange{ivI, ivJ});
+          Value sum = builder.create<arith::AddFOp>(loc, prod, outputVal);
+
+          // store the computed output
+          builder.create<AffineStoreOp>(loc, sum, alloc, ValueRange{ivI, ivJ});
+
+          builder.create<scf::YieldOp>(loc);
+        });
+
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+}; // conv2d
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: ThresholdUpOpLowering operations
+//===----------------------------------------------------------------------===//
+
+struct ThresholdUpOpLowering : public ConversionPattern {
+  ThresholdUpOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::ThresholdUpOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // Pseudo-code:
+    //   y[n] = 1 , if a[i] >= threshld
+    //     = 0 , else
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    Value constant0 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+
+    Value constant1 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1));
+
+    // y[n] = a[n] , if a[i] >= threshld
+    // loop from 0 to len
+
+    // load from X,
+    ThresholdUpOpAdaptor thresholdUpAdaptor(operands);
+    auto input = thresholdUpAdaptor.getInput();
+    auto thresholdMemRef = thresholdUpAdaptor.getThreshold();
+    auto returnOriginalMemRef = thresholdUpAdaptor.getReturnoriginal();
+
+    int64_t lb = 0;
+    int64_t ub = tensorType.getShape()[0];
+    int64_t step = 1;
+
+    // for loop from 0 to len(Output)
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    rewriter.setInsertionPointToStart(forOpY.getBody());
+    auto ivY = forOpY.getInductionVar();
+
+    Value inputX =
+        rewriter.create<AffineLoadOp>(loc, input, ValueRange{ivY});
+
+    // Load the threshold value from the memref
+    auto threshold =
+        rewriter.create<AffineLoadOp>(loc, thresholdMemRef, ValueRange{});
+    auto returnOriginal =
+        rewriter.create<AffineLoadOp>(loc, returnOriginalMemRef, ValueRange{});
+
+    // Compare a[i] >= threshold
+    auto cmp1 = rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OGE,
+                                               inputX, threshold);
+    // Compare if return original is true or false and return 1 or original
+    // value
+    auto cmpro = rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OEQ,
+                                                constant1, returnOriginal);
+
+    // Use select to choose between inputX and 1
+    auto selectreturn =
+        rewriter.create<arith::SelectOp>(loc, cmpro, inputX, constant1);
+
+    // Use select to choose between 0 and selectreturn
+    auto selectOp =
+        rewriter.create<arith::SelectOp>(loc, cmp1, selectreturn, constant0);
+
+    // Store the result
+    rewriter.create<AffineStoreOp>(loc, selectOp, alloc, ValueRange{ivY});
+
+    rewriter.setInsertionPointAfter(forOpY);
+    // debug
+    //  forOpY->dump();
+    //  affine.store %cst, %alloc_10[] : memref<f64>
+    //  %0 = affine.load %alloc_11[4] : memref<10xf64>
+    //  affine.store %0, %alloc[0] : memref<1xf64>
+
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: GenerateDTMFOpLowering operations
+//===----------------------------------------------------------------------===//
+
+struct GenerateDTMFOpLowering : public ConversionPattern {
+  GenerateDTMFOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::GenerateDTMFOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    GenerateDTMFOpAdaptor generatedtmfAdaptor(operands);
+    std::vector<std::vector<int64_t>> freqPairs = {
+        {941, 1336}, {697, 1209}, {697, 1336}, {697, 1477}, {770, 1209},
+        {770, 1336}, {770, 1477}, {852, 1209}, {852, 1336}, {852, 1477}};
+
+    auto GetDigitInput = op->getOperand(0);
+    auto inputvl = GetDigitInput.getDefiningOp<dsp::ConstantOp>();
+    auto inputvalue = inputvl.getValue();
+    auto elements1 = inputvalue.getValues<FloatAttr>();
+    float input = elements1[0].getValueAsDouble();
+
+    auto GetDurationOp = op->getOperand(1);
+    auto constantOp2ndArg = GetDurationOp.getDefiningOp<dsp::ConstantOp>();
+    auto constant2ndValue = constantOp2ndArg.getValue();
+    auto elements2 = constant2ndValue.getValues<FloatAttr>();
+    float duration = elements2[0].getValueAsDouble();
+
+    auto GetFreqOp = op->getOperand(2);
+    auto constantOp3rdArg = GetFreqOp.getDefiningOp<dsp::ConstantOp>();
+    auto constant3rdValue = constantOp3rdArg.getValue();
+    auto elements3 = constant3rdValue.getValues<FloatAttr>();
+    float freq = elements3[0].getValueAsDouble();
+
+    const std::vector<int64_t> &pair = freqPairs[input];
+    auto f1 = pair[0];
+    auto f2 = pair[1];
+    auto ub = tensorType.getShape()[0];
+    auto step = 1;
+
+    // Create constants
+    auto const2pi = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(6.28318530718));
+    auto const10 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(10));
+    auto constFs = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(freq));
+    auto constF1 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(f1));
+    auto constF2 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(f2));
+
+    // Create a loop to generate the DTMF tone
+    auto forOp = rewriter.create<scf::ForOp>(
+        loc, rewriter.create<arith::ConstantIndexOp>(loc, 0),
+        rewriter.create<arith::ConstantIndexOp>(loc, ub),
+        rewriter.create<arith::ConstantIndexOp>(loc, 1));
+
+    rewriter.setInsertionPointToStart(forOp.getBody());
+
+    // Get the loop induction variable
+    auto iv = forOp.getInductionVar();
+
+    // Convert loop index to time
+    auto indexToI64 =
+        rewriter.create<arith::IndexCastOp>(loc, rewriter.getI64Type(), iv);
+    auto indexToFloat = rewriter.create<arith::SIToFPOp>(
+        loc, rewriter.getF64Type(), indexToI64);
+    auto time = rewriter.create<arith::DivFOp>(loc, indexToFloat, constFs);
+
+    // Generate sine wave for f1
+    auto mulFreqTime1 = rewriter.create<arith::MulFOp>(loc, constF1, time);
+    auto mul2Pi1 = rewriter.create<arith::MulFOp>(loc, const2pi, mulFreqTime1);
+    auto sine1 = rewriter.create<math::SinOp>(loc, mul2Pi1);
+
+    // Generate sine wave for f2
+    auto mulFreqTime2 = rewriter.create<arith::MulFOp>(loc, constF2, time);
+    auto mul2Pi2 = rewriter.create<arith::MulFOp>(loc, const2pi, mulFreqTime2);
+    auto sine2 = rewriter.create<math::SinOp>(loc, mul2Pi2);
+
+    // Combine the two sine waves
+    auto sumSines = rewriter.create<arith::AddFOp>(loc, sine1, sine2);
+    auto scaledSum = rewriter.create<arith::MulFOp>(loc, const10, sumSines);
+
+    // Store the result in the allocated memref
+    rewriter.create<memref::StoreOp>(loc, scaledSum, alloc, iv);
+
+    rewriter.setInsertionPointAfter(forOp);
+
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: FFTFreqOpLowering operations
+//===----------------------------------------------------------------------===//
+
+struct FFTFreqOpLowering : public ConversionPattern {
+  FFTFreqOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::FFTFreqOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // Get the result type of the operation
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // Extract the operands
+    auto n = op->getOperand(0);
+    auto nArg = n.getDefiningOp<dsp::ConstantOp>();
+    auto nValue = nArg.getValue();
+    auto elements0 = nValue.getValues<FloatAttr>();
+    float nDouble = elements0[0].getValueAsDouble();
+
+    auto d = op->getOperand(1);
+    auto dArg = d.getDefiningOp<dsp::ConstantOp>();
+    auto dValue = dArg.getValue();
+    auto elements1 = dValue.getValues<FloatAttr>();
+    float dDouble = elements1[0].getValueAsDouble();
+
+    // Create constants
+    auto constN = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(nDouble));
+    auto constD = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(dDouble));
+
+    auto lb = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    auto ub =
+        rewriter.create<arith::ConstantIndexOp>(loc, tensorType.getShape()[0]);
+    auto step = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+
+    auto NtimesD = rewriter.create<arith::MulFOp>(loc, constN, constD);
+    auto half = rewriter.create<arith::ConstantFloatOp>(loc, llvm::APFloat(0.5),
+                                                        rewriter.getF64Type());
+    auto one = rewriter.create<arith::ConstantFloatOp>(loc, llvm::APFloat(1.0),
+                                                       rewriter.getF64Type());
+    auto nPlusOne = rewriter.create<arith::SubFOp>(loc, constN, one);
+    auto nPlusOneByTwo = rewriter.create<arith::MulFOp>(loc, nPlusOne, half);
+
+    auto forOp = rewriter.create<scf::ForOp>(loc, lb, ub, step);
+    rewriter.setInsertionPointToStart(forOp.getBody());
+    auto iv = forOp.getInductionVar();
+    auto ivInt =
+        rewriter.create<arith::IndexCastOp>(loc, rewriter.getI64Type(), iv);
+    auto ivFloat =
+        rewriter.create<arith::SIToFPOp>(loc, rewriter.getF64Type(), ivInt);
+
+    auto ifCondition = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OLE, ivFloat, nPlusOneByTwo);
+    auto ifOp = rewriter.create<scf::IfOp>(
+        loc, TypeRange{rewriter.getF64Type()}, ifCondition, true);
+
+    rewriter.setInsertionPointToStart(ifOp.thenBlock());
+    auto freq = rewriter.create<arith::DivFOp>(loc, ivFloat, NtimesD);
+    rewriter.create<memref::StoreOp>(loc, freq, alloc, ValueRange{iv});
+    rewriter.create<scf::YieldOp>(loc, ValueRange{freq});
+
+    rewriter.setInsertionPointToStart(ifOp.elseBlock());
+    auto ivminusN = rewriter.create<arith::SubFOp>(loc, ivFloat, constN);
+    auto negfreq = rewriter.create<arith::DivFOp>(loc, ivminusN, NtimesD);
+    rewriter.create<memref::StoreOp>(loc, negfreq, alloc, ValueRange{iv});
+    rewriter.create<scf::YieldOp>(loc, ValueRange{negfreq});
+
+    rewriter.setInsertionPointAfter(ifOp);
+
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: FindDominantPeaksOpLowering operations
+//===----------------------------------------------------------------------===//
+
+struct FindDominantPeaksOpLowering : public ConversionPattern {
+  FindDominantPeaksOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::FindDominantPeaksOp::getOperationName(), 1,
+                          ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    auto frequencyOperand = op->getOperand(0);
+    auto frequenciesType =
+        llvm::dyn_cast<RankedTensorType>(frequencyOperand.getType());
+    auto frequenciesLength = frequenciesType.getNumElements();
+
+    auto frequenciesLengthIndex = rewriter.create<arith::ConstantIndexOp>(loc, frequenciesLength);
+    auto frequenciesLengthI64 = rewriter.create<arith::IndexCastOp>(loc, rewriter.getI64Type(), frequenciesLengthIndex);
+
+    auto frequenciesLengthF64 = rewriter.create<arith::SIToFPOp>(loc, 
+    rewriter.getF64Type(), // frequenciesLength);
+    frequenciesLengthI64  
+    );
+
+    auto two = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(2.0));
+
+    auto frequenciesHalfLength = rewriter.create<arith::DivFOp>(loc, frequenciesLengthF64, two);
+
+    auto frequenciesHalfLengthI32 = rewriter.create<arith::FPToUIOp>(loc, rewriter.getIntegerType(32), frequenciesHalfLength);
+    auto frequenciesHalfLengthIndex = rewriter.create<arith::IndexCastOp>(loc, rewriter.getIndexType(), frequenciesHalfLengthI32);
+    // Value length_ui = rewriter.create<arith::FPToUIOp>(
+    //     loc, rewriter.getIntegerType(32), loadedLength);
+    // Value length_index = rewriter.create<arith::IndexCastOp>(
+    //     loc, rewriter.getIndexType(), length_ui);
+
+    FindDominantPeaksOpAdaptor findDominantPeaksOpAdaptor(operands);
+    auto frequencies = findDominantPeaksOpAdaptor.getFrequencies();
+    auto magnitudes = findDominantPeaksOpAdaptor.getMagnitudes();
+
+    // Initialize variables to track the two highest magnitudes and their
+    // corresponding frequencies
+    auto max1 = rewriter.create<arith::ConstantFloatOp>(loc, llvm::APFloat(0.0),
+                                                        rewriter.getF64Type());
+    auto max2 = rewriter.create<arith::ConstantFloatOp>(loc, llvm::APFloat(0.0),
+                                                        rewriter.getF64Type());
+    auto freq1 = rewriter.create<arith::ConstantFloatOp>(
+        loc, llvm::APFloat(0.0), rewriter.getF64Type());
+    auto freq2 = rewriter.create<arith::ConstantFloatOp>(
+        loc, llvm::APFloat(0.0), rewriter.getF64Type());
+
+    auto lb = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    auto ub = frequenciesHalfLengthIndex; // rewriter.create<arith::ConstantIndexOp>(loc, frequenciesLength);
+    auto step = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+
+    auto forOp = rewriter.create<scf::ForOp>(
+        loc, lb, ub, step, ValueRange{max1, max2, freq1, freq2});
+    rewriter.setInsertionPointToStart(forOp.getBody());
+    auto iv = forOp.getInductionVar();
+    // Load current frequency and magnitude
+    auto currentFreq =
+        rewriter.create<memref::LoadOp>(loc, frequencies, ValueRange{iv});
+    auto currentMag =
+        rewriter.create<memref::LoadOp>(loc, magnitudes, ValueRange{iv});
+
+    // Check if frequency is positive
+    auto zero = rewriter.create<arith::ConstantFloatOp>(loc, llvm::APFloat(0.0),
+                                                        rewriter.getF64Type());
+    auto isPositive = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OGE, currentFreq, zero);
+
+    // Create if operation for positive frequency check
+    auto ifOp = rewriter.create<scf::IfOp>(loc, forOp.getResultTypes(),
+                                           isPositive, true);
+    rewriter.setInsertionPointToStart(&ifOp.getThenRegion().front());
+    // Compare current magnitude with max1
+    auto cmpMax1 = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OGT, currentMag,
+        forOp.getRegionIterArgs()[0]);
+    auto ifMax1 =
+        rewriter.create<scf::IfOp>(loc, forOp.getResultTypes(), cmpMax1, true);
+
+    rewriter.setInsertionPointToStart(&ifMax1.getThenRegion().front());
+    // Update max2 and freq2 with previous max1 and freq1
+    auto newMax2 = forOp.getRegionIterArgs()[0];
+    auto newFreq2 = forOp.getRegionIterArgs()[2];
+    // Update max1 and freq1 with current values
+    auto newMax1 = currentMag;
+    auto newFreq1 = currentFreq;
+    rewriter.create<scf::YieldOp>(
+        loc, ValueRange({newMax1, newMax2, newFreq1, newFreq2}));
+
+    rewriter.setInsertionPointToStart(&ifMax1.getElseRegion().front());
+    // Compare current magnitude with max2
+    auto cmpMax2 = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OGT, currentMag,
+        forOp.getRegionIterArgs()[1]);
+    auto ifMax2 =
+        rewriter.create<scf::IfOp>(loc, forOp.getResultTypes(), cmpMax2, true);
+
+    rewriter.setInsertionPointToStart(&ifMax2.getThenRegion().front());
+    // Update max2 and freq2 with current values
+    rewriter.create<scf::YieldOp>(
+        loc, ValueRange{forOp.getRegionIterArgs()[0], currentMag,
+                        forOp.getRegionIterArgs()[2], currentFreq});
+
+    rewriter.setInsertionPointToStart(&ifMax2.getElseRegion().front());
+    // No update, yield original values
+    rewriter.create<scf::YieldOp>(loc, forOp.getRegionIterArgs());
+
+    rewriter.setInsertionPointAfter(ifMax2);
+    rewriter.create<scf::YieldOp>(loc, ifMax2.getResults());
+
+    rewriter.setInsertionPointAfter(ifMax1);
+    rewriter.create<scf::YieldOp>(loc, ifMax1.getResults());
+
+    rewriter.setInsertionPointToStart(&ifOp.getElseRegion().front());
+    // No update for negative frequencies, yield original values
+    rewriter.create<scf::YieldOp>(loc, forOp.getRegionIterArgs());
+
+    rewriter.setInsertionPointAfter(ifOp);
+    rewriter.create<scf::YieldOp>(loc, ifOp.getResults());
+
+    rewriter.setInsertionPointAfter(forOp);
+
+    // Compare freq1 and freq2 to determine the order
+    auto cmpFreq = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OLT, forOp.getResult(2), forOp.getResult(3));
+
+    auto ifFreq = rewriter.create<scf::IfOp>(
+        loc, TypeRange{rewriter.getF64Type(), rewriter.getF64Type()}, cmpFreq,
+        true);
+
+    rewriter.setInsertionPointToStart(&ifFreq.getThenRegion().front());
+    // freq1 < freq2, so keep the order
+    rewriter.create<scf::YieldOp>(
+        loc, ValueRange{forOp.getResult(2), forOp.getResult(3)});
+
+    rewriter.setInsertionPointToStart(&ifFreq.getElseRegion().front());
+    // freq1 >= freq2, so swap the order
+    rewriter.create<scf::YieldOp>(
+        loc, ValueRange{forOp.getResult(3), forOp.getResult(2)});
+
+    rewriter.setInsertionPointAfter(ifFreq);
+
+    // Store the two highest peak frequencies in the result memref, now in the
+    // correct order
+    auto storeFreq1 = rewriter.create<memref::StoreOp>(
+        loc, ifFreq.getResult(0), alloc,
+        ValueRange{rewriter.create<arith::ConstantIndexOp>(loc, 0)});
+    auto storeFreq2 = rewriter.create<memref::StoreOp>(
+        loc, ifFreq.getResult(1), alloc,
+        ValueRange{rewriter.create<arith::ConstantIndexOp>(loc, 1)});
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: RecoverDTMFDigitOpLowering operations
+//===----------------------------------------------------------------------===//
+
+struct RecoverDTMFDigitOpLowering : public ConversionPattern {
+  RecoverDTMFDigitOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::RecoverDTMFDigitOp::getOperationName(), 1, ctx) {
+  }
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    auto indexMemRefType = MemRefType::get({}, rewriter.getIndexType());
+    auto finalMatchIndex_alloc =
+        insertAllocAndDealloc(indexMemRefType, loc, rewriter);
+
+    RecoverDTMFDigitOpAdaptor recoverDTMFDigitOpAdaptor(operands);
+
+    auto frequencies = recoverDTMFDigitOpAdaptor.getFrequencies();
+    auto freqPairs = recoverDTMFDigitOpAdaptor.getFreqPairs();
+
+    auto highFreqIndex = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    auto lowFreqIndex = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+
+    auto highFreq = rewriter.create<memref::LoadOp>(loc, frequencies,
+                                                    ValueRange{highFreqIndex});
+    auto lowFreq = rewriter.create<memref::LoadOp>(loc, frequencies,
+                                                   ValueRange{lowFreqIndex});
+
+    auto initialMatchIndex = rewriter.create<arith::ConstantIndexOp>(loc, -1);
+    rewriter.create<AffineStoreOp>(loc, initialMatchIndex,
+                                   finalMatchIndex_alloc, ValueRange{});
+
+    auto tolerance = rewriter.create<arith::ConstantFloatOp>(
+        loc, llvm::APFloat(3.0), rewriter.getF64Type());
+
+    auto lb = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    auto ub = rewriter.create<arith::ConstantIndexOp>(loc, 10);
+    auto step = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+
+    auto forOp = rewriter.create<scf::ForOp>(loc, lb, ub, step);
+    rewriter.setInsertionPointToStart(forOp.getBody());
+    auto iv = forOp.getInductionVar();
+
+    auto matchIndex = rewriter.create<memref::LoadOp>(
+        loc, finalMatchIndex_alloc, ValueRange{});
+
+    auto highFreqOg = rewriter.create<memref::LoadOp>(
+        loc, freqPairs, ValueRange{iv, highFreqIndex});
+    auto lowFreqOg = rewriter.create<memref::LoadOp>(
+        loc, freqPairs, ValueRange{iv, lowFreqIndex});
+
+    auto highFreqDiff =
+        rewriter.create<arith::SubFOp>(loc, highFreqOg, highFreq);
+    auto lowFreqDiff = rewriter.create<arith::SubFOp>(loc, lowFreqOg, lowFreq);
+
+    auto absHighFreqDiff = rewriter.create<math::AbsFOp>(loc, highFreqDiff);
+    auto absLowFreqDiff = rewriter.create<math::AbsFOp>(loc, lowFreqDiff);
+
+    auto highFreqMatch = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OLE, absHighFreqDiff, tolerance);
+    auto lowFreqMatch = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OLE, absLowFreqDiff, tolerance);
+    auto bothMatch =
+        rewriter.create<arith::AndIOp>(loc, highFreqMatch, lowFreqMatch);
+
+    auto newMatchIndex =
+        rewriter.create<arith::SelectOp>(loc, bothMatch, iv, matchIndex);
+
+    rewriter.create<memref::StoreOp>(loc, newMatchIndex, finalMatchIndex_alloc,
+                                     ValueRange{});
+
+    rewriter.setInsertionPointAfter(forOp);
+
+    auto finalMatchIndex = rewriter.create<memref::LoadOp>(
+        loc, finalMatchIndex_alloc, ValueRange{});
+
+    auto finalMatchIndexI64 = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getI64Type(), finalMatchIndex);
+    auto finalMatchIndexF64 = rewriter.create<arith::SIToFPOp>(
+        loc, rewriter.getF64Type(), finalMatchIndexI64);
+
+    auto zero = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    rewriter.create<memref::StoreOp>(loc, finalMatchIndexF64, alloc,
+                                     ValueRange{zero});
+
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+// Store finalMatchIndexF64 into alloc
+// auto zero = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+// rewriter.create<memref::StoreOp>(loc, finalMatchIndexF64, alloc,
+// ValueRange{zero});
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: GenerateVoiceSignatureOpLowering operations
+//===----------------------------------------------------------------------===//
+
+struct GenerateVoiceSignatureOpLowering : public ConversionPattern {
+  GenerateVoiceSignatureOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::GenerateVoiceSignatureOp::getOperationName(), 1,
+                          ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    auto GetF1Op = op->getOperand(0);
+    auto constantOp0thArg = GetF1Op.getDefiningOp<dsp::ConstantOp>();
+    auto constant0thValue = constantOp0thArg.getValue();
+    auto elements0 = constant0thValue.getValues<FloatAttr>();
+    float f1 = elements0[0].getValueAsDouble();
+
+    auto GetF2Op = op->getOperand(1);
+    auto constantOp1stArg = GetF2Op.getDefiningOp<dsp::ConstantOp>();
+    auto constant1stValue = constantOp1stArg.getValue();
+    auto elements1 = constant1stValue.getValues<FloatAttr>();
+    float f2 = elements1[0].getValueAsDouble();
+
+    auto GetDurationOp = op->getOperand(2);
+    auto constantOp2ndArg = GetDurationOp.getDefiningOp<dsp::ConstantOp>();
+    auto constant2ndValue = constantOp2ndArg.getValue();
+    auto elements2 = constant2ndValue.getValues<FloatAttr>();
+    float duration = elements2[0].getValueAsDouble();
+
+    auto GetFreqOp = op->getOperand(3);
+    auto constantOp3rdArg = GetFreqOp.getDefiningOp<dsp::ConstantOp>();
+    auto constant3rdValue = constantOp3rdArg.getValue();
+    auto elements3 = constant3rdValue.getValues<FloatAttr>();
+    float freq = elements3[0].getValueAsDouble();
+
+    auto lb = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    auto ub =
+        rewriter.create<arith::ConstantIndexOp>(loc, tensorType.getShape()[0]);
+    auto step = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+
+    // Create constants
+    auto const2pi = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(6.28318530718));
+    auto const05 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0.5));
+    auto constFs = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(freq));
+    auto constF1 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(f1));
+    auto constF2 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(f2));
+
+    // Create a loop to generate the DTMF tone
+    auto forOp = rewriter.create<scf::ForOp>(loc, lb, ub, step);
+    rewriter.setInsertionPointToStart(forOp.getBody());
+    // Get the loop induction variable
+    auto iv = forOp.getInductionVar();
+
+    // Convert loop index to time
+    auto indexToI64 =
+        rewriter.create<arith::IndexCastOp>(loc, rewriter.getI64Type(), iv);
+    auto indexToFloat = rewriter.create<arith::SIToFPOp>(
+        loc, rewriter.getF64Type(), indexToI64);
+    auto time = rewriter.create<arith::DivFOp>(loc, indexToFloat, constFs);
+
+    // Generate sine wave for f1
+    auto mulFreqTime1 = rewriter.create<arith::MulFOp>(loc, constF1, time);
+    auto mul2Pi1 = rewriter.create<arith::MulFOp>(loc, const2pi, mulFreqTime1);
+    auto sine1 = rewriter.create<math::SinOp>(loc, mul2Pi1);
+
+    // Generate sine wave for f2
+    auto mulFreqTime2 = rewriter.create<arith::MulFOp>(loc, constF2, time);
+    auto mul2Pi2 = rewriter.create<arith::MulFOp>(loc, const2pi, mulFreqTime2);
+    auto sine2 = rewriter.create<math::SinOp>(loc, mul2Pi2);
+
+    // Combine the two sine waves
+    auto sumSines = rewriter.create<arith::AddFOp>(loc, sine1, sine2);
+    // auto scaledSum = rewriter.create<arith::MulFOp>(loc, const05, sumSines);
+
+    // Store the result in the allocated memref
+    rewriter.create<memref::StoreOp>(loc, sumSines, alloc, iv);
+
+    rewriter.setInsertionPointAfter(forOp);
+
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: FFTCombineOpLowering operations
+//===----------------------------------------------------------------------===//
+
+struct FFTCombineOpLowering : public ConversionPattern {
+  FFTCombineOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::FFTCombineOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    FFTCombineOpAdaptor fftCombineOpAdaptor(operands);
+
+    auto real = fftCombineOpAdaptor.getReal();
+    auto imag = fftCombineOpAdaptor.getImag();
+
+    auto lb = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    auto ub =
+        rewriter.create<arith::ConstantIndexOp>(loc, tensorType.getShape()[0]);
+    auto step = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+
+    auto forOp = rewriter.create<scf::ForOp>(loc, lb, ub, step);
+    rewriter.setInsertionPointToStart(forOp.getBody());
+    auto iv = forOp.getInductionVar();
+
+    auto realInput = rewriter.create<memref::LoadOp>(loc, real, ValueRange{iv});
+    auto imagInput = rewriter.create<memref::LoadOp>(loc, imag, ValueRange{iv});
+    auto realInputSquared =
+        rewriter.create<arith::MulFOp>(loc, realInput, realInput);
+    auto imagInputSquared =
+        rewriter.create<arith::MulFOp>(loc, imagInput, imagInput);
+    auto sum =
+        rewriter.create<arith::AddFOp>(loc, realInputSquared, imagInputSquared);
+    auto root = rewriter.create<math::SqrtOp>(loc, sum);
+
+    rewriter.create<memref::StoreOp>(loc, root, alloc, ValueRange{iv});
+
+    rewriter.setInsertionPointAfter(forOp);
+
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+// Store finalMatchIndexF64 into alloc
+// auto zero = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+// rewriter.create<memref::StoreOp>(loc, finalMatchIndexF64, alloc,
+// ValueRange{zero});
+
+struct QamModulateRealOpLowering : public ConversionPattern {
+  QamModulateRealOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::QamModulateRealOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    auto output = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    auto outputMem = convertTensorToMemRef(output);
+    auto alloc = insertAllocAndDealloc(outputMem, loc, rewriter);
+
+    QamModulateRealOpAdaptor adaptor(operands);
+    Value signal = adaptor.getSignal();
+
+    llvm::ArrayRef<int64_t> outputShape = output.getShape();
+
+    // constant vals;
+    Value negOneVal = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(-1));
+    Value zeroVal = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    Value oneVal = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1));
+
+    // get i*2 from input signal
+    AffineExpr realExpr = rewriter.getAffineDimExpr(0) * rewriter.getAffineConstantExpr(2);
+
+    // real affine map
+    AffineMap signalMap = AffineMap::get(1, 0, realExpr);
+
+    // loops
+    int64_t lb = 0, step = 1, ub = outputShape[0];
+    /* looping i*/
+    AffineForOp forOpI = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    rewriter.setInsertionPointToStart(forOpI.getBody());
+    auto ivI = forOpI.getInductionVar();
+
+    // input bound check
+    Value signalNum =
+        rewriter.create<AffineLoadOp>(loc, signal, signalMap, ValueRange{ivI});
+
+    Value zeroReal = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OEQ, signalNum, zeroVal);
+
+    Value out =
+        rewriter.create<arith::SelectOp>(loc, zeroReal, negOneVal, oneVal);
+
+    rewriter.create<AffineStoreOp>(loc, out, alloc, ValueRange{ivI});
+
+    rewriter.setInsertionPointAfter(forOpI);
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+struct QamModulateImgOpLowering : public ConversionPattern {
+  QamModulateImgOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::QamModulateImgOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    auto output = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    auto outputMem = convertTensorToMemRef(output);
+    auto alloc = insertAllocAndDealloc(outputMem, loc, rewriter);
+
+    QamModulateImgOpAdaptor adaptor(operands);
+    Value signal = adaptor.getSignal();
+
+    llvm::ArrayRef<int64_t> outputShape = output.getShape();
+
+    // constant vals;
+    Value negOneVal = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(-1));
+    Value zeroVal = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    Value oneVal = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1));
+
+    AffineExpr imgExpr = rewriter.getAffineDimExpr(0) * rewriter.getAffineConstantExpr(2) + rewriter.getAffineConstantExpr(1);
+
+    // real affine map
+    AffineMap signalMap = AffineMap::get(1, 0, imgExpr);
+    // loops
+    int64_t lb = 0, step = 1, ub = outputShape[0];
+    /* looping i*/
+    AffineForOp forOpI = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    rewriter.setInsertionPointToStart(forOpI.getBody());
+    auto ivI = forOpI.getInductionVar();
+
+    // input bound check
+    Value signalNum =
+        rewriter.create<AffineLoadOp>(loc, signal, signalMap, ValueRange{ivI});
+
+    Value zeroReal = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OEQ, signalNum, zeroVal);
+
+    Value out =
+        rewriter.create<arith::SelectOp>(loc, zeroReal, negOneVal, oneVal);
+
+    rewriter.create<AffineStoreOp>(loc, out, alloc, ValueRange{ivI});
+
+    rewriter.setInsertionPointAfter(forOpI);
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: QAM demodulate operations
+//===----------------------------------------------------------------------===//
+// #define DUMP(x) llvm::errs() << x << "\n";
+
+struct QamDemodulateOpLowering : public ConversionPattern {
+  QamDemodulateOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::QamDemodulateOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+
+    auto loc = op->getLoc();
+    // output mem alloc and dealloc
+    auto output = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    auto outputMem = convertTensorToMemRef(output);
+    auto alloc = insertAllocAndDealloc(outputMem, loc, rewriter);
+
+    QamDemodulateOpAdaptor qamDemodualteAdaptor(operands);
+    Value realVal = qamDemodualteAdaptor.getReal();
+    Value imgVal = qamDemodualteAdaptor.getImagine();
+
+    // ranked tensor type
+    auto realType =
+        llvm::cast<RankedTensorType>(op->getOperand(0).getType());
+
+    llvm::ArrayRef<int64_t> realShape = realType.getShape();
+
+    // constant vals;
+    Value negOneVal = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(-1));
+    Value zeroVal = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    Value oneVal = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1));
+
+    AffineExpr signalExpr = rewriter.getAffineDimExpr(0).floorDiv(2);
+    AffineExpr outputExpr = rewriter.getAffineDimExpr(0) + rewriter.getAffineConstantExpr(1);
+
+    // output affine map
+    AffineMap signalMap = AffineMap::get(1, 0, signalExpr);
+    AffineMap outputMap = AffineMap::get(1, 0, outputExpr);
+
+    // loops
+    int64_t lb = 0, step = 2, ub = output.getShape()[0];
+    /* looping i*/
+    AffineForOp forOpI = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    rewriter.setInsertionPointToStart(forOpI.getBody());
+    auto ivI = forOpI.getInductionVar();
+
+    // input bound check
+    Value realNum =
+        rewriter.create<AffineLoadOp>(loc, realVal, signalMap, ValueRange{ivI});
+    Value imgNum =
+        rewriter.create<AffineLoadOp>(loc, imgVal, signalMap, ValueRange{ivI});
+
+    Value negReal = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OEQ, realNum, negOneVal);
+    Value negImagine = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OEQ, imgNum, negOneVal);
+
+    Value out1 =
+        rewriter.create<arith::SelectOp>(loc, negReal, zeroVal, oneVal);
+    Value out2 =
+        rewriter.create<arith::SelectOp>(loc, negImagine, zeroVal, oneVal);
+
+    rewriter.create<AffineStoreOp>(loc, out1, alloc, ValueRange{ivI});
+    rewriter.create<AffineStoreOp>(loc, out2, alloc, outputMap, ValueRange{ivI});
+
+    rewriter.setInsertionPointAfter(forOpI);
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+}; // qam_demodulate op
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: BeamForm operations
+//===----------------------------------------------------------------------===//
+
+struct BeamFormOpLowering : public ConversionPattern {
+  BeamFormOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::BeamFormOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+    auto beamFormOp = llvm::cast<mlir::dsp::BeamFormOp>(op);
+
+    // allocating space for output
+    auto output = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    auto outputMemRefType = convertTensorToMemRef(output);
+    auto alloc = insertAllocAndDealloc(outputMemRefType, loc, rewriter);
+
+    BeamFormOpAdaptor beamFormAdaptor(operands);
+    auto time = beamFormAdaptor.getTime();
+    auto weights = beamFormAdaptor.getWeights();
+
+    // allocating space for internal generated signals
+    int64_t timeDim = output.getShape()[0]; // dry run: 9
+    int64_t antennas = beamFormOp.getAntennas();
+    int64_t frequency = beamFormOp.getFreq();
+
+    llvm::SmallVector<int64_t, 2> signalShapeVec{antennas, timeDim};
+    llvm::ArrayRef<int64_t> signalShape(signalShapeVec);
+
+    auto signalType = output.clone(signalShape, output.getElementType()); 
+    auto signalMemRefType = convertTensorToMemRef(signalType);
+    auto allocSignal = insertAllocAndDealloc(signalMemRefType, loc, rewriter);
+
+    AffineExpr d0, d1; // i, j for generated signal dimension
+    bindDims(rewriter.getContext(), d0, d1);
+
+    // generated input map
+    AffineMap genInputMap =
+        AffineMap::get(2 /* dim */, 0 /* sym */, ArrayRef<AffineExpr>{d1, d0},
+                       rewriter.getContext());
+    // time affine map
+    AffineMap timeMap =
+        AffineMap::get(2 /* dim */, 0 /* sym */, ArrayRef<AffineExpr>{d1},
+                       rewriter.getContext());
+
+    // // output map
+    // AffineMap outputMap =
+    // AffineMap::get(2, 0, ArrayRef<AffineExpr>{d0}, rewriter.getContext());
+
+    auto pi = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(3.1415926));
+    auto zero = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
+                                                   rewriter.getF64FloatAttr(0));
+    auto one = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
+                                                  rewriter.getF64FloatAttr(1));
+    auto two = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
+                                                  rewriter.getF64FloatAttr(2));
+    auto four = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
+                                                   rewriter.getF64FloatAttr(4));
+    auto two_pi = rewriter.create<arith::MulFOp>(loc, pi, two); // 2 * pi
+    auto freq_val = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(frequency));
+    auto phase_var =
+        rewriter.create<arith::MulFOp>(loc, two_pi, freq_val); // 2*pi*freq
+
+    // for loop from 0 to phase
+    int64_t lb = 0, ub = antennas, step = 1;
+    affine::AffineForOp forOpI =
+        rewriter.create<AffineForOp>(loc, lb, ub, step, ValueRange{zero});
+    auto ivI = forOpI.getInductionVar(); // i : phase
+    rewriter.setInsertionPointToStart(forOpI.getBody());
+
+    // get the induction var to phase variable
+    auto floatI = forOpI.getBody()->getArgument(1);
+
+    auto iter_tmp = rewriter.create<arith::MulFOp>(loc, floatI, pi); // i * pi
+    auto iter_args =
+        rewriter.create<arith::DivFOp>(loc, iter_tmp, four); // i*pi/4
+
+    // for loop from 0 to timeDim
+    ub = timeDim;
+    affine::AffineForOp forOpJ =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto ivJ = forOpJ.getInductionVar(); // i : phase
+    rewriter.setInsertionPointToStart(forOpJ.getBody());
+
+    // loop body
+    auto time_var =
+        rewriter.create<AffineLoadOp>(loc, time, timeMap, ValueRange{ivI, ivJ});
+    auto mul_var = rewriter.create<arith::MulFOp>(loc, time_var, phase_var);
+    auto sin_body = rewriter.create<arith::AddFOp>(loc, mul_var, iter_args);
+    auto result = rewriter.create<math::SinOp>(loc, sin_body);
+    rewriter.create<AffineStoreOp>(loc, result, allocSignal,
+                                   ValueRange{ivI, ivJ});
+
+    rewriter.setInsertionPointAfter(forOpJ); // end for loop: j
+
+    auto increFloatI = rewriter.create<arith::AddFOp>(loc, floatI, one);
+    rewriter.create<AffineYieldOp>(loc, ValueRange{increFloatI});
+
+    rewriter.setInsertionPointAfter(forOpI); // end for loop: i
+
+    ub = timeDim;
+    affine::AffineForOp forOpIOut =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto ivIoutput = forOpIOut.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpIOut.getBody());
+
+    ub = antennas;
+    affine::AffineForOp forOpJOut =
+        rewriter.create<AffineForOp>(loc, lb, ub, step, ValueRange{zero});
+    auto ivJoutput = forOpJOut.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpJOut.getBody());
+
+    // load from signal input
+    auto signalInput = rewriter.create<AffineLoadOp>(
+        loc, allocSignal, genInputMap, ValueRange{ivIoutput, ivJoutput});
+    auto weight = rewriter.create<AffineLoadOp>(
+        loc, weights, timeMap, ValueRange{ivIoutput, ivJoutput});
+    auto intermediateVal =
+        rewriter.create<arith::MulFOp>(loc, signalInput, weight);
+
+    // iterargs
+    auto sumVal = forOpJOut.getBody()->getArgument(1);
+    auto beamOut = rewriter.create<arith::AddFOp>(loc, intermediateVal, sumVal);
+
+    rewriter.create<AffineStoreOp>(loc, beamOut, alloc, ValueRange{ivIoutput});
+    rewriter.create<AffineYieldOp>(loc, ValueRange{beamOut});
+
+    rewriter.setInsertionPointAfter(forOpJOut);
+    rewriter.setInsertionPointAfter(forOpIOut);
+
+    rewriter.replaceOp(op, alloc);
+
+    return mlir::success();
+  }
+};
+
+struct SpaceModulateOpLowering : public ConversionPattern {
+  SpaceModulateOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::SpaceModulateOp::getOperationName(), 1, ctx) {}
+
+  mlir::LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // output
+    auto output = llvm::dyn_cast<RankedTensorType>((*op->result_type_begin()));
+    auto outputMem = convertTensorToMemRef(output);
+    auto alloc = insertAllocAndDealloc(outputMem, loc, rewriter);
+
+    SpaceModulateOpAdaptor spaceModAdaptor(operands);
+    Value signal = spaceModAdaptor.getSignal();
+    auto signalType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(0).getType());
+    llvm::ArrayRef<int64_t> signalShape = signalType.getShape();
+
+    Value negOneVal = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(-1));
+    // Value zeroVal = rewriter.create<arith::ConstantOp>(
+    //     loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    Value oneVal = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1));
+
+    // one dim loop
+    int64_t lb = 0, ub = signalShape[0], step = 1;
+    AffineForOp forOp = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    rewriter.setInsertionPointToStart(forOp.getBody());
+    auto iv = forOp.getInductionVar();
+
+    Value bit = rewriter.create<AffineLoadOp>(loc, signal, ValueRange{iv});
+
+    Value isOne = rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OEQ,
+                                                 bit, oneVal);
+
+    auto out = rewriter.create<arith::SelectOp>(loc, isOne, oneVal, negOneVal);
+
+    rewriter.create<AffineStoreOp>(loc, out, alloc, ValueRange{iv});
+    rewriter.setInsertionPointAfter(forOp);
+
+    rewriter.replaceOp(op, alloc);
+    return mlir::success();
+  }
+}; // space modulate
+
+struct SpaceDemodulateOpLowering : public ConversionPattern {
+  SpaceDemodulateOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::SpaceDemodulateOp::getOperationName(), 1, ctx) {}
+
+  mlir::LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // output
+    auto output = llvm::dyn_cast<RankedTensorType>((*op->result_type_begin()));
+    auto outputMem = convertTensorToMemRef(output);
+    auto alloc = insertAllocAndDealloc(outputMem, loc, rewriter);
+
+    SpaceDemodulateOpAdaptor spaceDemodAdaptor(operands);
+    Value binary = spaceDemodAdaptor.getBinary();
+    auto binaryType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(0).getType());
+    llvm::ArrayRef<int64_t> binaryShape = binaryType.getShape();
+
+    // Value negOneVal = rewriter.create<arith::ConstantOp>(
+    //     loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(-1));
+    Value zeroVal = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    Value oneVal = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1));
+
+    // one dim loop
+    int64_t lb = 0, ub = binaryShape[0], step = 1;
+    AffineForOp forOp = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    rewriter.setInsertionPointToStart(forOp.getBody());
+    auto iv = forOp.getInductionVar();
+
+    Value bit = rewriter.create<AffineLoadOp>(loc, binary, ValueRange{iv});
+
+    Value isOne = rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OGE,
+                                                 bit, oneVal);
+
+    auto out = rewriter.create<arith::SelectOp>(loc, isOne, oneVal, zeroVal);
+
+    rewriter.create<AffineStoreOp>(loc, out, alloc, ValueRange{iv});
+
+    rewriter.setInsertionPointAfter(forOp);
+    rewriter.replaceOp(op, alloc);
+    return mlir::success();
+  }
+}; // soace demodulate
+
+struct SpaceErrCorrectionOpLowering : public ConversionPattern {
+  SpaceErrCorrectionOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::SpaceErrCorrectionOp::getOperationName(), 1,
+                          ctx) {}
+
+  mlir::LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // output
+    auto output = llvm::dyn_cast<RankedTensorType>((*op->result_type_begin()));
+    auto outputMem = convertTensorToMemRef(output);
+    auto alloc = insertAllocAndDealloc(outputMem, loc, rewriter);
+
+    SpaceErrCorrectionOpAdaptor adaptor(operands);
+    Value signal = adaptor.getSignal();
+    auto signalType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(0).getType());
+    llvm::ArrayRef<int64_t> signalShape = signalType.getShape();
+
+    Value zeroVal = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    Value oneVal = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1));
+    Value twoVal = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(2));
+
+    AffineExpr d0, d1;
+    bindDims(rewriter.getContext(), d0, d1);
+    AffineMap first =
+        AffineMap::get(2, 0, ArrayRef<AffineExpr>{d0}, rewriter.getContext());
+    AffineMap index = AffineMap::get(2, 0, ArrayRef<AffineExpr>{d0 + d1},
+                                     rewriter.getContext());
+
+    int64_t lb = 0, ub = signalShape[0], step = 8;
+    AffineForOp forOpI = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    rewriter.setInsertionPointToStart(forOpI.getBody());
+    auto ivI = forOpI.getInductionVar();
+
+    auto firstVal = rewriter.create<AffineLoadOp>(
+        loc, signal, ValueRange{ivI}); // signal [0]
+    rewriter.create<AffineStoreOp>(
+        loc, firstVal, alloc, ValueRange{ivI}); // store signal[0] to alloc[0]
+
+    int64_t inner_lb = 1, inner_ub = 8, inner_step = 1;
+    AffineForOp forOpJ =
+        rewriter.create<AffineForOp>(loc, inner_lb, inner_ub, inner_step);
+    rewriter.setInsertionPointToStart(forOpJ.getBody());
+    auto ivJ = forOpJ.getInductionVar();
+
+    auto stored = rewriter.create<AffineLoadOp>(
+        loc, alloc, first, ValueRange{ivI, ivJ}); // load alloc[0]
+    auto loaded = rewriter.create<AffineLoadOp>(
+        loc, signal, index, ValueRange{ivI, ivJ}); // load signal[1...7]
+
+    auto added = rewriter.create<arith::AddFOp>(loc, stored, loaded); // add
+    rewriter.create<AffineStoreOp>(loc, added, alloc,
+                                   ValueRange{ivI}); // store val to alloc[0]
+    rewriter.create<AffineStoreOp>(
+        loc, loaded, alloc, index,
+        ValueRange{ivI, ivJ}); // store val to alloc[1...7]
+
+    rewriter.setInsertionPointAfter(forOpJ);
+
+    auto initVal = rewriter.create<AffineLoadOp>(
+        loc, signal, ValueRange{ivI}); // load signal[0]
+    auto oneCount = rewriter.create<AffineLoadOp>(
+        loc, alloc, ValueRange{ivI}); // load alloc[0]
+    auto parityCheck = rewriter.create<arith::RemFOp>(
+        loc, oneCount,
+        twoVal); // get remainder from oneCount / 2 -> either 1 or 0
+
+    auto oddParity =
+        rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OEQ, oneVal,
+                                       parityCheck); // if paritycheck == 1
+    auto valToAlloc = rewriter.create<arith::SelectOp>(
+        loc, oddParity, zeroVal, initVal); // if true: valToAlloc = 0 else NC
+
+    rewriter.create<AffineStoreOp>(
+        loc, valToAlloc, alloc, ValueRange{ivI}); // store the value to alloc[0]
+
+    rewriter.setInsertionPointAfter(forOpI);
+
+    rewriter.replaceOp(op, alloc);
+    return mlir::success();
+  }
+};
+
+struct ArgMaxOpLowering : public ConversionPattern {
+  ArgMaxOpLowering(MLIRContext *context)
+      : ConversionPattern(dsp::ArgMaxOp::getOperationName(), 1, context) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    auto zeroVal = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    auto oneVal = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1));
+
+    // argmax adaptor
+    ArgMaxOpAdaptor adaptor(operands);
+    auto input = adaptor.getInput();
+    auto inputType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(0).getType());
+
+    // get operation
+    auto argmaxOp = llvm::dyn_cast<dsp::ArgMaxOp>(op);
+
+    // get attribute
+    int64_t axis = argmaxOp.getAxis();
+
+    // output allocation
+    auto output = llvm::dyn_cast<RankedTensorType>((*op->result_type_begin()));
+    auto outputMemRef = convertTensorToMemRef(output);
+    auto alloc = insertAllocAndDealloc(outputMemRef, loc,
+                                       rewriter); // stroing max ele index
+
+    auto allocEle =
+        insertAllocAndDealloc(outputMemRef, loc, rewriter); // stroing max ele
+
+    auto outputShape = output.getShape();
+    auto outputSizeOp = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(),
+        rewriter.getF64FloatAttr(outputShape.size()));
+
+    auto sizeSwitch = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OEQ, outputSizeOp,
+        oneVal); // if outputsize > 1
+    AffineExpr d0;
+    bindDims(rewriter.getContext(), d0);
+    AffineMap zeroIdx = AffineMap::get(1, 0, ArrayRef<AffineExpr>{d0 - d0},
+                                       rewriter.getContext());
+
+    auto ifOp = rewriter.create<scf::IfOp>(
+        loc, sizeSwitch,
+        true); // FIXME: else condition for 2 dimensional tensor input
+    rewriter.setInsertionPointToStart(ifOp.thenBlock());
+    // output size == 1
+    /* -> one loop through tensor, recording max val and its index
+     */
+    Value iv0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    rewriter.create<AffineStoreOp>(loc, zeroVal, allocEle, ValueRange{iv0});
+
+    auto zero = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
+                                                   rewriter.getF64FloatAttr(0));
+    auto one = rewriter.create<arith::ConstantOp>(loc, rewriter.getF64Type(),
+                                                  rewriter.getF64FloatAttr(1));
+
+    int lb = 0, ub = inputType.getShape()[0], step = 1;
+    auto forOp =
+        rewriter.create<AffineForOp>(loc, lb, ub, step, ValueRange{zero});
+    auto ivI = forOp.getInductionVar();
+    rewriter.setInsertionPointToStart(forOp.getBody());
+
+    auto floatI = forOp.getBody()->getArgument(1);
+
+    auto curMax =
+        rewriter.create<AffineLoadOp>(loc, allocEle, zeroIdx, ValueRange{ivI});
+    auto curMaxIdx =
+        rewriter.create<AffineLoadOp>(loc, alloc, zeroIdx, ValueRange{ivI});
+    auto curEle = rewriter.create<AffineLoadOp>(loc, input, ivI);
+    auto cmpOp = rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OGT,
+                                                curEle, curMax);
+    // if ele > max: update val
+    auto maxOp = rewriter.create<arith::SelectOp>(loc, cmpOp, curEle, curMax);
+
+    // store the idx based on cmp output
+    auto idxOp =
+        rewriter.create<arith::SelectOp>(loc, cmpOp, floatI, curMaxIdx);
+
+    rewriter.create<AffineStoreOp>(loc, maxOp, allocEle, zeroIdx,
+                                   ValueRange{ivI});
+    rewriter.create<AffineStoreOp>(loc, idxOp, alloc, zeroIdx, ValueRange{ivI});
+
+    auto increFloatI = rewriter.create<arith::AddFOp>(loc, floatI, one);
+    rewriter.create<AffineYieldOp>(loc, ValueRange{increFloatI});
+
+    rewriter.setInsertionPointAfter(forOp);
+    rewriter.setInsertionPointAfter(ifOp);
+
+    rewriter.replaceOp(op, alloc);
+    return mlir::success();
+  }
+};
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: Power operations
+//===----------------------------------------------------------------------===//
+
+struct PowOpLowering : public ConversionPattern {
+  PowOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::PowOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    dsp::PowOpAdaptor powerAdaptor(operands);
+    Value lhs = powerAdaptor.getLhs();
+    Value rhs = powerAdaptor.getRhs();
+
+    auto inputType = llvm::cast<RankedTensorType>(lhs.getType());
+    auto resultType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocate space for result
+    auto memRefType = convertTensorToMemRef(resultType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // affine loops for input
+    int64_t lb = 0;
+    int64_t ub = inputType.getShape()[0];
+    int64_t step = 1;
+
+    affine::AffineForOp forOp = rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto iv = forOp.getInductionVar();
+
+    rewriter.setInsertionPointToStart(forOp.getBody());
+
+    Value loadLHS = rewriter.create<AffineLoadOp>(loc, lhs, ValueRange{iv});
+    Value loadRHS = rewriter.create<AffineLoadOp>(loc, rhs, ValueRange{});
+
+    Value power = rewriter.create<math::PowFOp>(loc, loadLHS, loadRHS);
+
+    // store result
+    rewriter.create<AffineStoreOp>(loc, power, alloc, ValueRange{iv});
+    rewriter.setInsertionPointAfter(forOp);
+
+    // replace op
+    rewriter.replaceOp(op, alloc);
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: Normalize operations
+//===----------------------------------------------------------------------===//
+
+struct NormalizeOpLowering : public ConversionPattern {
+  NormalizeOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::NormalizeOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    auto tensorType =
+        llvm::dyn_cast<RankedTensorType>(*op->result_type_begin());
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+    auto shape = tensorType.getShape()[0];
+
+    dsp::NormalizeOpAdaptor adaptor(operands);
+    Value signal = adaptor.getSignal();
+
+    Value min = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(INT64_MAX));
+    Value max = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(INT64_MIN));
+
+    int64_t lb = 0, ub = shape, step = 1;
+    // finding min and max;
+    affine::AffineForOp forOp =
+        rewriter.create<AffineForOp>(loc, lb, ub, step, ValueRange{min, max});
+    auto iv = forOp.getInductionVar();
+    rewriter.setInsertionPointToStart(forOp.getBody());
+
+    auto minVal = forOp.getBody()->getArgument(1);
+    auto maxVal = forOp.getBody()->getArgument(2);
+
+    auto cmpVal = rewriter.create<AffineLoadOp>(loc, signal, ValueRange{iv});
+    Value isMin = rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OLT,
+                                                 cmpVal, minVal);
+    Value isMax = rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OGT,
+                                                 cmpVal, maxVal);
+
+    auto minOut = rewriter.create<arith::SelectOp>(loc, isMin, cmpVal, minVal);
+    auto maxOut = rewriter.create<arith::SelectOp>(loc, isMax, cmpVal, maxVal);
+
+    rewriter.create<AffineYieldOp>(
+        loc, ValueRange{minOut.getResult(), maxOut.getResult()});
+    rewriter.setInsertionPointAfter(forOp);
+
+    auto minSignal = forOp.getResults()[0];
+    auto maxSignal = forOp.getResults()[1];
+
+    auto divisor = rewriter.create<arith::SubFOp>(loc, maxSignal, minSignal);
+    // ele-wise normalize
+    affine::AffineForOp forOpI =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto ivI = forOpI.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpI.getBody());
+
+    auto loadedVal =
+        rewriter.create<AffineLoadOp>(loc, signal, ValueRange{ivI});
+    auto subVal = rewriter.create<arith::SubFOp>(loc, loadedVal, minSignal);
+    auto resultVal = rewriter.create<arith::DivFOp>(loc, subVal, divisor);
+
+    rewriter.create<AffineStoreOp>(loc, resultVal, alloc, ValueRange{ivI});
+    rewriter.setInsertionPointAfter(forOpI);
+
+    rewriter.replaceOp(op, alloc);
+    return mlir::success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: NormLMSFilterResponseOptimizeOp operations
+//===----------------------------------------------------------------------===//
+
+struct NormLMSFilterResponseOptimizeOpLowering : public ConversionPattern {
+  NormLMSFilterResponseOptimizeOpLowering(MLIRContext *ctx)
+      : ConversionPattern(
+            dsp::NormLMSFilterResponseOptimizeOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
+
+    LMSFilterOpAdaptor lmsFilterAdaptor(operands);
+
+    Value zeroval = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    Value mu = rewriter.create<AffineLoadOp>(loc, lmsFilterAdaptor.getMu());
+
+    // For loop -- iterate from 0 to last
+    int64_t lb = 0;
+    int64_t numSamples = tensorType.getShape()[0];
+    int64_t step = 1;
+
+    Value GetFilterLOp = op->getOperand(3);
+    dsp::ConstantOp constantOp3rdArg =
+        GetFilterLOp.getDefiningOp<dsp::ConstantOp>();
+    DenseElementsAttr constant3rdValue = constantOp3rdArg.getValue();
+
+    auto elements1 = constant3rdValue.getValues<FloatAttr>();
+    float filterlenval = elements1[0].getValueAsDouble();
+    auto FilterLength = (uint64_t)filterlenval;
+
+    auto yMemRefType = MemRefType::get({numSamples}, rewriter.getF64Type());
+    auto wAlloc = rewriter.create<memref::AllocOp>(loc, yMemRefType);
+
+    Value min = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(INT64_MAX));
+    Value max = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(INT64_MIN));
+
+    affine::AffineForOp forOp1 = rewriter.create<AffineForOp>(
+        loc, lb, numSamples, step, ValueRange{min, max});
+    auto iv = forOp1.getInductionVar();
+
+    rewriter.setInsertionPointToStart(forOp1.getBody());
+
+    AffineExpr d0, d1, s0;
+    bindDims(rewriter.getContext(), d0, d1);
+    AffineExpr ExprForXSlice = d0 - d1;
+    AffineMap addMapForLMSFilter = AffineMap::get(2, 0, ExprForXSlice);
+    IntegerSet set1 = IntegerSet::get(2, 0, {ExprForXSlice}, {false});
+
+    rewriter.create<AffineStoreOp>(loc, zeroval, alloc, ValueRange{iv});
+
+    affine::AffineForOp forOp2 =
+        rewriter.create<AffineForOp>(loc, lb, FilterLength, step);
+    auto iv2 = forOp2.getInductionVar();
+
+    rewriter.setInsertionPointToStart(forOp2.getBody());
+
+    auto ifOp = rewriter.create<affine::AffineIfOp>(
+        loc, set1, ValueRange{iv, iv2}, false /*no else*/);
+    rewriter.setInsertionPointToStart(ifOp.getThenBlock());
+
+    Value inputX =
+        rewriter.create<AffineLoadOp>(loc, lmsFilterAdaptor.getLhs(),
+                                      addMapForLMSFilter, ValueRange{iv, iv2});
+    Value w = rewriter.create<AffineLoadOp>(loc, wAlloc,
+                                            ValueRange{iv2}); // memRefType
+
+    auto wmulx = rewriter.create<arith::MulFOp>(loc, inputX, w);
+    auto ybefore = rewriter.create<AffineLoadOp>(loc, alloc, ValueRange{iv});
+    auto sumNext = rewriter.create<arith::AddFOp>(loc, wmulx, ybefore);
+    rewriter.create<AffineStoreOp>(loc, sumNext, alloc, ValueRange{iv});
+    rewriter.setInsertionPointAfter(ifOp);
+    rewriter.setInsertionPointAfter(forOp2);
+
+    auto cmpVal = rewriter.create<AffineLoadOp>(loc, alloc, ValueRange{iv});
+    Value minVal = forOp1.getBody()->getArgument(1);
+    Value maxVal = forOp1.getBody()->getArgument(2);
+
+    auto minOut = rewriter.create<arith::MinNumFOp>(loc, cmpVal, minVal);
+    auto maxOut = rewriter.create<arith::MaxNumFOp>(loc, cmpVal, maxVal);
+    //  get e[n] = d[n] - y[n]
+
+    Value desiredX = rewriter.create<AffineLoadOp>(
+        loc, lmsFilterAdaptor.getRhs(), ValueRange{iv});
+    Value ynew = rewriter.create<AffineLoadOp>(loc, alloc, ValueRange{iv});
+
+    Value err = rewriter.create<arith::SubFOp>(loc, desiredX, ynew);
+
+    affine::AffineForOp forOp3 =
+        rewriter.create<AffineForOp>(loc, lb, FilterLength, step);
+    auto iv3 = forOp3.getInductionVar();
+
+    rewriter.setInsertionPointToStart(forOp3.getBody());
+
+    auto ifOp2 = rewriter.create<affine::AffineIfOp>(
+        loc, set1, ValueRange{iv, iv3}, false /*no else*/);
+    rewriter.setInsertionPointToStart(ifOp2.getThenBlock());
+
+    Value inputX2 =
+        rewriter.create<AffineLoadOp>(loc, lmsFilterAdaptor.getLhs(),
+                                      addMapForLMSFilter, ValueRange{iv, iv3});
+
+    Value Prevw2 = rewriter.create<AffineLoadOp>(loc, wAlloc, ValueRange{iv3});
+
+    // f(u(n),e(n),μ)=μe(n)u∗(n)
+    Value mul1 = rewriter.create<arith::MulFOp>(loc, err, inputX2);
+    Value mul2 = rewriter.create<arith::MulFOp>(loc, mu, mul1);
+
+    // FInal w[n]
+    Value answer = rewriter.create<arith::AddFOp>(loc, Prevw2, mul2);
+
+    rewriter.create<AffineStoreOp>(loc, answer, wAlloc, ValueRange{iv3});
+    rewriter.setInsertionPointAfter(ifOp2);
+    rewriter.setInsertionPointAfter(forOp3);
+
+    rewriter.create<AffineYieldOp>(
+        loc, ValueRange{minOut.getResult(), maxOut.getResult()});
+    rewriter.setInsertionPointAfter(forOp1);
+
+    Value minSignal = forOp1.getResults()[0];
+    Value maxSignal = forOp1.getResults()[1];
+
+    Value divisor = rewriter.create<arith::SubFOp>(loc, maxSignal, minSignal);
+
+    // ele-wise normalize
+    affine::AffineForOp forOpI =
+        rewriter.create<AffineForOp>(loc, lb, numSamples, step);
+    auto ivI = forOpI.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpI.getBody());
+
+    auto loadedVal = rewriter.create<AffineLoadOp>(loc, alloc, ValueRange{ivI});
+    auto subVal = rewriter.create<arith::SubFOp>(loc, loadedVal, minSignal);
+    auto resultVal = rewriter.create<arith::DivFOp>(loc, subVal, divisor);
+
+    rewriter.create<AffineStoreOp>(loc, resultVal, alloc, ValueRange{ivI});
+    rewriter.setInsertionPointAfter(forOpI);
+
+    rewriter.replaceOp(op, alloc);
+    return success();
+  }
+};
+
+struct Median2SlidingOptimizedOpLowering : public ConversionPattern {
+  Median2SlidingOptimizedOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::Median2SlidingOptimizedOp::getOperationName(), 1,
+                          ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), 1);
+
+    // For loop
+    int64_t lb = 0;
+    int64_t ub = tensorType.getShape()[0];
+    int64_t step = 1;
+
+    Value constant_three = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(3));
+
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto iv = forOp1.getInductionVar();
+
+    rewriter.setInsertionPointToStart(forOp1.getBody());
+    typename dsp::Median2SlidingOptimizedOp::Adaptor
+        median2SlidingOptimizedOpAdaptor(operands);
+
+    Value elem1 = rewriter.create<AffineLoadOp>(
+        loc, median2SlidingOptimizedOpAdaptor.getInput(), iv);
+    AffineExpr ExprForElem2 =
+        rewriter.getAffineDimExpr(0) + rewriter.getAffineConstantExpr(1);
+    AffineExpr ExprForElem3 =
+        rewriter.getAffineDimExpr(0) + rewriter.getAffineConstantExpr(2);
+    AffineExpr ExprForElem4 =
+        rewriter.getAffineDimExpr(0) + rewriter.getAffineConstantExpr(3);
+    AffineExpr ExprForElem5 =
+        rewriter.getAffineDimExpr(0) + rewriter.getAffineConstantExpr(4);
+
+    AffineMap addMapForElem2 = AffineMap::get(1, 0, ExprForElem2);
+    AffineMap addMapForElem3 = AffineMap::get(1, 0, ExprForElem3);
+    AffineMap addMapForElem4 = AffineMap::get(1, 0, ExprForElem4);
+    AffineMap addMapForElem5 = AffineMap::get(1, 0, ExprForElem5);
+
+    Value elem2 = rewriter.create<AffineLoadOp>(
+        loc, median2SlidingOptimizedOpAdaptor.getInput(), addMapForElem2,
+        ValueRange{iv});
+    Value elem3 = rewriter.create<AffineLoadOp>(
+        loc, median2SlidingOptimizedOpAdaptor.getInput(), addMapForElem3,
+        ValueRange{iv});
+    Value elem4 = rewriter.create<AffineLoadOp>(
+        loc, median2SlidingOptimizedOpAdaptor.getInput(), addMapForElem4,
+        ValueRange{iv});
+    Value elem5 = rewriter.create<AffineLoadOp>(
+        loc, median2SlidingOptimizedOpAdaptor.getInput(), addMapForElem5,
+        ValueRange{iv});
+
+    // sums
+    Value sum23 = rewriter.create<arith::AddFOp>(loc, elem2, elem3);
+    Value sum34 = rewriter.create<arith::AddFOp>(loc, elem3, elem4);
+
+    Value sum123 = rewriter.create<arith::AddFOp>(loc, elem1, sum23);
+    Value sum234 = rewriter.create<arith::AddFOp>(loc, sum23, elem4);
+    Value sum345 = rewriter.create<arith::AddFOp>(loc, sum34, elem5);
+
+    // min
+    Value min23 = rewriter.create<arith::MinimumFOp>(loc, elem2, elem3);
+    Value min34 = rewriter.create<arith::MinimumFOp>(loc, elem3, elem4);
+
+    Value min123 = rewriter.create<arith::MinimumFOp>(loc, elem1, min23);
+    Value min234 = rewriter.create<arith::MinimumFOp>(loc, min23, elem4);
+    Value min345 = rewriter.create<arith::MinimumFOp>(loc, min34, elem5);
+
+    // max
+    Value max23 = rewriter.create<arith::MaximumFOp>(loc, elem2, elem3);
+    Value max34 = rewriter.create<arith::MaximumFOp>(loc, elem3, elem4);
+
+    Value max123 = rewriter.create<arith::MaximumFOp>(loc, elem1, max23);
+    Value max234 = rewriter.create<arith::MaximumFOp>(loc, max23, elem4);
+    Value max345 = rewriter.create<arith::MaximumFOp>(loc, max34, elem5);
+
+    // median
+    Value min_plus_max_123 =
+        rewriter.create<arith::AddFOp>(loc, min123, max123);
+    Value min_plus_max_234 =
+        rewriter.create<arith::AddFOp>(loc, min234, max234);
+    Value min_plus_max_345 =
+        rewriter.create<arith::AddFOp>(loc, min345, max345);
+
+    Value median123 =
+        rewriter.create<arith::SubFOp>(loc, sum123, min_plus_max_123);
+    Value median234 =
+        rewriter.create<arith::SubFOp>(loc, sum234, min_plus_max_234);
+    Value median345 =
+        rewriter.create<arith::SubFOp>(loc, sum345, min_plus_max_345);
+
+    // mean of three medians
+    Value two_medians =
+        rewriter.create<arith::AddFOp>(loc, median123, median234);
+    Value three_medians =
+        rewriter.create<arith::AddFOp>(loc, two_medians, median345);
+    Value median_mean =
+        rewriter.create<arith::DivFOp>(loc, three_medians, constant_three);
+
+    // store in alloc
+    rewriter.create<AffineStoreOp>(loc, median_mean, alloc, iv);
+    rewriter.setInsertionPointAfter(forOp1);
+    rewriter.replaceOp(op, alloc);
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: FIRFilterResSymmThresholdUpOptimizedOp
+// operations
+//===----------------------------------------------------------------------===//
+struct FIRFilterResSymmThresholdUpOptimizedOpLowering
+    : public ConversionPattern {
+  FIRFilterResSymmThresholdUpOptimizedOpLowering(MLIRContext *ctx)
+      : ConversionPattern(
+            dsp::FIRFilterResSymmThresholdUpOptimizedOp::getOperationName(), 1,
+            ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    // dsp.FIRFilterResSymmThresholdUpOptimizedOp has 2 operands -- both of type
+    // tensor f64
+
+    // Get the location of FIRFilterResSymmThresholdUpOptimizedOp
+    auto loc = op->getLoc();
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+    // Pseudo-Code
+    // y[n] = sum(h[k] .{ x[n-k] + x[n-(L-1-k)]}) + h[L-1/2].x[n-(L-1)/2] , k=0
+    // to L-1/2
+    //  N = lenY , M = lenX ,  L = lenH
+    // for n=0 to N
+    //  sum = 0, temp =0
+    //  for k = 0 to L-1/2
+    // if 0 <= n-k < M
+    // val1 = x[n-k] else, val1 = 0
+    // if 0 <= n+k - (L-1) < M
+    // val2 = x[n+k-(L-1)] else, val2 = 0
+    // temp = val1 + val2
+    //  sum = sum + h[k] . temp
+
+    // middle-one
+    //  if 0 <= n - (L-1)/2 < M
+    //  sum2 = sum + h[L-1/2] . x[n-(n - (L-1)/2)]
+    // y[n] = sum2
+
+    int64_t lb = 0;
+    int64_t ub = tensorType.getShape()[0];
+    int64_t step = 1;
+    // DEBUG_PRINT_NO_ARGS();
+    affine::AffineForOp forOp1 =
+        rewriter.create<affine::AffineForOp>(loc, lb, ub, step);
+    rewriter.setInsertionPointToStart(forOp1.getBody());
+    auto iv = forOp1.getInductionVar();
+
+    // for n=0 to N
+    //  sum = 0, temp =0
+    // get filter len
+    //  auto tensorTypeFilter =
+    //  llvm::cast<RankedTensorType>((*op->getOperand(1))); //operand_type_end
+    //  auto tensorTypeFilter =
+    //  llvm::cast<RankedTensorType>((*op->operand_type_begin()));
+    auto operandIt = op->operand_type_begin();
+    auto tensorTypeInput = llvm::cast<RankedTensorType>(*operandIt);
+    int64_t ubForInput = tensorTypeInput.getShape()[0];
+    // get second operand
+    operandIt = operandIt + 1;
+
+    // auto tensorTypeFilter =
+    // llvm::cast<RankedTensorType>((*op->operand_type_begin())); //operandIt
+    auto tensorTypeFilter = llvm::cast<RankedTensorType>(*operandIt);
+    int64_t ubForFilter = tensorTypeFilter.getShape()[0];
+    // DEBUG_PRINT_NO_ARGS();
+    // llvm::errs() << "ubForFilter= " << ubForFilter << "\n";
+    // create a constant for sum
+    Value constant0 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    affine::AffineForOp forOp2 = rewriter.create<affine::AffineForOp>(
+        loc, lb, ubForFilter / 2, step, ValueRange{constant0});
+    rewriter.setInsertionPointToStart(forOp2.getBody());
+    auto iv2 = forOp2.getInductionVar();
+
+    auto getIterArg =
+        forOp2.getBody()->getArgument(1); // forOp1.getIterOperands();
+    // DEBUG_PRINT_NO_ARGS();
+    FIRFilterResSymmThresholdUpOptimizedOpAdaptor
+        firFilterResSymmThresholdUpOpAdaptor(operands);
+
+    // if 0 <= n-k < M
+    // val1 = x[n-k] else, val1 = 0
+    // For n-k
+    // if 0 <= n-k < M or, 0 <= n-k <= M -1
+    AffineExpr d0, d1, s0, s1;
+    bindDims(rewriter.getContext(), d0, d1);
+    AffineExpr ExprNMinusK = d0 - d1;
+    AffineMap mapNMinusK = AffineMap::get(2, 0, ExprNMinusK);
+    // n-k <= M -1 or, n-k-(M-1) <= 0
+    bindSymbols(rewriter.getContext(), s0, s1);
+    Value constantMMinus1Indx =
+        rewriter.create<arith::ConstantIndexOp>(loc, ubForInput - 1);
+
+    AffineExpr ExprNMinusKMinusMPlus1 = s0 - d0 + d1;
+    IntegerSet setForIf = IntegerSet::get(
+        2, 1, {ExprNMinusK, ExprNMinusKMinusMPlus1}, {false, false});
+    // DEBUG_PRINT_NO_ARGS();
+
+    // if 0 <= n-k <= M -1
+    // use typeRange too:
+    Type floatType = rewriter.getF64Type();
+    //  if n-k >= 0 && n-k <= M -1 or, M-1 -n + k >= 0
+    auto ifOp = rewriter.create<affine::AffineIfOp>(
+        loc, TypeRange{floatType}, setForIf,
+        ValueRange{iv, iv2, constantMMinus1Indx}, true /*else*/);
+    rewriter.setInsertionPointToStart(ifOp.getThenBlock());
+
+    // val1 = x[n-k] else, val1 = 0
+    // load x[n-k]
+    // DEBUG_PRINT_NO_ARGS();
+    Value loadInput = rewriter.create<AffineLoadOp>(
+        loc, firFilterResSymmThresholdUpOpAdaptor.getLhs(), mapNMinusK,
+        ValueRange{iv, iv2});
+    rewriter.create<AffineYieldOp>(loc, ValueRange{loadInput});
+    // else block
+    rewriter.setInsertionPointToStart(ifOp.getElseBlock());
+    Value const0ForElse = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    rewriter.create<AffineYieldOp>(loc, ValueRange{const0ForElse});
+    rewriter.setInsertionPointAfter(ifOp);
+
+    // if 0 <= n+k - (L-1) < M
+    // val2 = x[n+k-(L-1)] else, val2 = 0
+    // val2 lower bound
+    //  AffineExpr ExprNMinKMinLPlus1 = d0 - d1 - s0; //s0 = (L-1) => -s0 = -L+1
+    //  AffineExpr ExprLowerBoundVal2 = d0 - d1 - s0; //s0 = (L-1) => -s0 = -L+1
+    // Val2 LowerBound: n+k - (L-1) >= 0
+    AffineExpr ExprLowerBoundVal2 =
+        rewriter.getAffineDimExpr(0) + rewriter.getAffineDimExpr(1) -
+        rewriter.getAffineConstantExpr(ubForFilter - 1);
+    // Val2 UpperBound: n+k - (L-1) <= M -1 ie, M - 1 + L -1 -k -n >= 0 ie,
+    // (M+L-2) - k -n >= 0
+    //  AffineExpr ExprUpperBoundVal2 = s0 + s1 + d1 - d0; //s1 = M+L-2 = L-1 +
+    //  M -1
+    AffineExpr ExprUpperBoundVal2 =
+        rewriter.getAffineConstantExpr(ubForInput + ubForFilter - 2) -
+        rewriter.getAffineDimExpr(1) - rewriter.getAffineDimExpr(0);
+    // s0 = L -1
+    //  Value s0LMin1Indx = rewriter.create<arith::ConstantIndexOp>(loc,
+    //  ubForFilter - 1); s1 = M + L -2 for val2 upperBound Value
+    //  s1MPlusLPlus2Indx = rewriter.create<arith::ConstantIndexOp>(loc,
+    //  ubForInput + ubForFilter - 2); Value s1MMin1Indx =
+    //  rewriter.create<arith::ConstantIndexOp>(loc, ubForInput - 1);
+
+    IntegerSet setForIf2 = IntegerSet::get(
+        2, 0, {ExprLowerBoundVal2, ExprUpperBoundVal2}, {false, false});
+
+    auto ifOp2 = rewriter.create<affine::AffineIfOp>(
+        loc, TypeRange{floatType}, setForIf2, ValueRange{iv, iv2},
+        true /*else*/);
+    rewriter.setInsertionPointToStart(ifOp2.getThenBlock());
+
+    // val2 = x[n+k-(L-1)] else, val2 = 0
+    AffineMap addMap2 = AffineMap::get(2, 0, ExprLowerBoundVal2);
+    // load x[n+k-(L-1)]
+    // DEBUG_PRINT_NO_ARGS();
+    Value loadInputForVal2 = rewriter.create<AffineLoadOp>(
+        loc, firFilterResSymmThresholdUpOpAdaptor.getLhs(), addMap2,
+        ValueRange{iv, iv2});
+    rewriter.create<AffineYieldOp>(loc, ValueRange{loadInputForVal2});
+    // else block
+    rewriter.setInsertionPointToStart(ifOp2.getElseBlock());
+    Value const0ForElse2 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    rewriter.create<AffineYieldOp>(loc, ValueRange{const0ForElse2});
+    rewriter.setInsertionPointAfter(ifOp2);
+
+    // temp = val1 + val2
+    //  sum = sum + h[k] . temp
+
+    Value Val1Plus2 = rewriter.create<arith::AddFOp>(loc, ifOp.getResult(0),
+                                                     ifOp2.getResult(0));
+
+    // load filter and then mult and then sum
+    Value loadFilter = rewriter.create<affine::AffineLoadOp>(
+        loc, firFilterResSymmThresholdUpOpAdaptor.getRhs(), iv2);
+
+    Value filterMulInput =
+        rewriter.create<arith::MulFOp>(loc, Val1Plus2, loadFilter);
+    Value sumNext =
+        rewriter.create<arith::AddFOp>(loc, filterMulInput, getIterArg);
+    rewriter.create<AffineYieldOp>(loc, ValueRange{sumNext});
+    // rewriter.setInsertionPointToEnd(forOp2->getBlock());
+    rewriter.setInsertionPointAfter(forOp2);
+    // DEBUG_PRINT_NO_ARGS();
+    // Middle - point
+    // if 0 <= n - (L-1)/2 < M
+    // sum2 = sum + h[L-1/2] . x[n-(L-1)/2)]
+    // y[n] = sum2
+
+    // if 0 <= n - (L-1)/2 < M
+    // AffineExpr ExprLowerBoundVal3 = d0 - s0; //s0 = (L-1)/2
+    // AffineExpr ExprUpperBoundVal3 = d0 - s1; //s1 = M+ (L-1)/2
+    int64_t midFilterLen = (ubForFilter - 1) / 2;
+    AffineExpr ExprLowerBoundVal3 =
+        rewriter.getAffineDimExpr(0) -
+        rewriter.getAffineConstantExpr(midFilterLen);
+    // UpperBound: n - (L-1)/2 <= M - 1 ie, M-1 + mid - n
+    AffineExpr ExprUpperBoundVal3 =
+        rewriter.getAffineConstantExpr(ubForInput + midFilterLen - 1) -
+        rewriter.getAffineDimExpr(0);
+
+    AffineMap addMap3 = AffineMap::get(1, 0, ExprLowerBoundVal3);
+
+    IntegerSet setForIf3 = IntegerSet::get(
+        1, 0, {ExprLowerBoundVal3, ExprUpperBoundVal3}, {false, false});
+
+    auto ifOp3 = rewriter.create<affine::AffineIfOp>(
+        loc, TypeRange{floatType}, setForIf3, ValueRange{iv}, true /*else*/);
+    rewriter.setInsertionPointToStart(ifOp3.getThenBlock());
+
+    // val3 = x[n-(L-1)/2)] else, val3 = 0
+    // load x[n-(L-1)/2)]
+    // DEBUG_PRINT_NO_ARGS();
+    Value loadInputForVal3 = rewriter.create<AffineLoadOp>(
+        loc, firFilterResSymmThresholdUpOpAdaptor.getLhs(), addMap3,
+        ValueRange{iv});
+    rewriter.create<AffineYieldOp>(loc, ValueRange{loadInputForVal3});
+    // else block
+    rewriter.setInsertionPointToStart(ifOp3.getElseBlock());
+    Value const0ForElse3 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    rewriter.create<AffineYieldOp>(loc, ValueRange{const0ForElse3});
+    rewriter.setInsertionPointAfter(ifOp3);
+
+    // sum2 = sum + h[L-1/2] . x[n-(L-1)/2)]
+    //  y[n] = sum2
+    // load filter and then mult and then sum
+    Value midFilterLenIndx =
+        rewriter.create<arith::ConstantIndexOp>(loc, midFilterLen);
+
+    Value loadFilterMid = rewriter.create<affine::AffineLoadOp>(
+        loc, firFilterResSymmThresholdUpOpAdaptor.getRhs(), midFilterLenIndx);
+    Value filterMulInput2 =
+        rewriter.create<arith::MulFOp>(loc, ifOp3.getResult(0), loadFilterMid);
+    Value sum2 = rewriter.create<arith::AddFOp>(loc, filterMulInput2,
+                                                forOp2.getResult(0));
+    // rewriter.create<AffineStoreOp>(loc, forOp2.getResult(0) , alloc, iv);
+
+    // Optimize here, compare with threshold, then if returnoriginal then store
+    // same value else 1
+
+    auto thresholdMemRef = firFilterResSymmThresholdUpOpAdaptor.getThreshold();
+    auto returnOriginalMemRef =
+        firFilterResSymmThresholdUpOpAdaptor.getReturnoriginal();
+
+    auto threshold =
+        rewriter.create<AffineLoadOp>(loc, thresholdMemRef, ValueRange{});
+    auto returnOriginal =
+        rewriter.create<AffineLoadOp>(loc, returnOriginalMemRef, ValueRange{});
+    Value constant00 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+
+    Value constant11 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1));
+    // Compare a[i] >= threshold
+    auto cmp1 = rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OGE,
+                                               sum2, threshold);
+    // Compare if return original is true or false and return 1 or original
+    // value
+    auto cmpro = rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OEQ,
+                                                constant11, returnOriginal);
+
+    // Use select to choose between inputX and 1
+    auto selectreturn =
+        rewriter.create<arith::SelectOp>(loc, cmpro, sum2, constant11);
+
+    // Use select to choose between 0 and selectreturn
+    auto selectOp =
+        rewriter.create<arith::SelectOp>(loc, cmp1, selectreturn, constant00);
+
+    // Store the result
+    rewriter.create<AffineStoreOp>(loc, selectOp, alloc, iv);
+
+    // rewriter.create<AffineStoreOp>(loc, sum2, alloc, iv);
+    rewriter.setInsertionPointAfter(forOp1);
+    // DEBUG_PRINT_NO_ARGS();
+    // ifOp->dump();
+    rewriter.replaceOp(op, alloc);
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: FFTOp operations
+//===----------------------------------------------------------------------===//
+
+struct FFTOpLowering : public ConversionPattern {
+  FFTOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::FFTOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    auto memrefType = convertTensorToMemRef(tensorType);
+
+    auto alloc_temp_real = insertAllocAndDealloc(memrefType, loc, rewriter);
+    auto alloc_temp_imag = insertAllocAndDealloc(memrefType, loc, rewriter);
+
+    FFTRealOpAdaptor fftRealOpAdaptor(operands);
+
+    auto input = fftRealOpAdaptor.getLhs();
+    auto lb = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    auto ub =
+        rewriter.create<arith::ConstantIndexOp>(loc, tensorType.getShape()[0]);
+    auto step = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+
+    // alloc memory for reversed and dealloc when not required
+    auto alloc_reversed_real = insertAllocAndDealloc(memrefType, loc, rewriter);
+    auto alloc_reversed_imag = insertAllocAndDealloc(memrefType, loc, rewriter);
+
+    // bits needed for bit  reversal
+    auto ubInt =
+        rewriter.create<arith::IndexCastOp>(loc, rewriter.getI64Type(), ub);
+    auto ubFloat =
+        rewriter.create<arith::SIToFPOp>(loc, rewriter.getF64Type(), ubInt);
+    auto bitsNeededFloat = rewriter.create<math::Log2Op>(loc, ubFloat);
+    auto bitsNeededInt = rewriter.create<arith::FPToSIOp>(
+        loc, rewriter.getI64Type(), bitsNeededFloat);
+    auto bitsNeeded = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getIndexType(), bitsNeededInt);
+
+    // bit reversal
+    auto bitReversalLoop = rewriter.create<scf::ForOp>(loc, lb, ub, step);
+    rewriter.setInsertionPointToStart(bitReversalLoop.getBody());
+    auto i = bitReversalLoop.getInductionVar();
+    auto iInt = rewriter.create<arith::IndexCastOp>(loc, rewriter.getI64Type(),
+                                                    i); // check here
+
+    // Calculate reversed index
+    // auto zero = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    auto initialRevIndex = rewriter.create<arith::ConstantIntOp>(loc, 0, 64);
+
+    auto innerLoop = rewriter.create<scf::ForOp>(loc, lb, bitsNeeded, step,
+                                                 ValueRange{initialRevIndex});
+    rewriter.setInsertionPointToStart(innerLoop.getBody());
+    auto j = innerLoop.getInductionVar();
+    auto jInt =
+        rewriter.create<arith::IndexCastOp>(loc, rewriter.getI64Type(), j);
+    auto carriedRevIndex = innerLoop.getRegionIterArgs()[0];
+
+    auto bitMask = rewriter.create<arith::ShLIOp>(
+        loc, rewriter.create<arith::ConstantIntOp>(loc, 1, 64), jInt);
+    auto iAndMask = rewriter.create<arith::AndIOp>(loc, iInt, bitMask);
+    auto isNonZero = rewriter.create<arith::CmpIOp>(
+        loc, arith::CmpIPredicate::ne, iAndMask,
+        rewriter.create<arith::ConstantIntOp>(loc, 0, 64));
+    auto shiftAmount = rewriter.create<arith::SubIOp>(
+        loc, rewriter.create<arith::SubIOp>(loc, bitsNeeded, j),
+        rewriter.create<arith::ConstantIndexOp>(loc, 1));
+    auto shiftAmountI64 = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getI64Type(), shiftAmount);
+    auto bitToSet = rewriter.create<arith::ShLIOp>(
+        loc, rewriter.create<arith::ConstantIntOp>(loc, 1, 64), shiftAmountI64);
+
+    // Update newRevIndex using a select operation
+    auto updatedRevIndex = rewriter.create<arith::OrIOp>(
+        loc, carriedRevIndex,
+        rewriter.create<arith::SelectOp>(
+            loc, isNonZero, bitToSet,
+            rewriter.create<arith::ConstantIntOp>(loc, 0, 64)));
+
+    // Yield the updated value to carry it forward
+    rewriter.create<scf::YieldOp>(loc, ValueRange{updatedRevIndex});
+
+    // auto revIndex = rewriter.create<arith::IndexCastOp>(loc,
+    // rewriter.getIndexType(), newRevIndex);
+
+    rewriter.setInsertionPointAfter(innerLoop);
+
+    auto finalRevIndex = innerLoop.getResult(0);
+    auto revIndex = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getIndexType(), finalRevIndex);
+
+    // Load from alloc_temp and store in alloc_reversed
+    auto realValue = rewriter.create<memref::LoadOp>(loc, input, ValueRange{i});
+    auto imagValue = rewriter.create<arith::ConstantFloatOp>(
+        loc, llvm::APFloat(0.0), rewriter.getF64Type());
+    rewriter.create<memref::StoreOp>(loc, realValue, alloc_reversed_real,
+                                     ValueRange{revIndex});
+    rewriter.create<memref::StoreOp>(loc, imagValue, alloc_reversed_imag,
+                                     ValueRange{revIndex});
+
+    rewriter.setInsertionPointAfter(bitReversalLoop);
+
+    // Cooley-Tukey FFT implementation
+    auto N = tensorType.getShape()[0];
+    auto stages = static_cast<int64_t>(std::log2(N));
+    auto stagesValue = rewriter.create<arith::ConstantIndexOp>(loc, stages);
+
+    // Constants for complex arithmetic
+    auto pi = rewriter.create<arith::ConstantFloatOp>(loc, llvm::APFloat(M_PI),
+                                                      rewriter.getF64Type());
+    auto neg2 = rewriter.create<arith::ConstantFloatOp>(
+        loc, llvm::APFloat(-2.0), rewriter.getF64Type());
+
+    auto fftLoop = rewriter.create<scf::ForOp>(loc, lb, stagesValue, step);
+    rewriter.setInsertionPointToStart(fftLoop.getBody());
+    auto stage = fftLoop.getInductionVar();
+    auto half_size = rewriter.create<arith::ShLIOp>(
+        loc, rewriter.create<arith::ConstantIndexOp>(loc, 1), stage);
+    auto full_size = rewriter.create<arith::ShLIOp>(
+        loc, half_size, rewriter.create<arith::ConstantIndexOp>(loc, 1));
+
+    auto outerLoop = rewriter.create<scf::ForOp>(loc, lb, ub, full_size);
+    rewriter.setInsertionPointToStart(outerLoop.getBody());
+    auto start = outerLoop.getInductionVar();
+
+    auto butterflyLoop = rewriter.create<scf::ForOp>(loc, lb, half_size, step);
+    rewriter.setInsertionPointToStart(butterflyLoop.getBody());
+    auto k = butterflyLoop.getInductionVar();
+
+    // Calculate indices for even and odd elements
+    auto even_index = rewriter.create<arith::AddIOp>(loc, start, k);
+    auto odd_index = rewriter.create<arith::AddIOp>(loc, even_index, half_size);
+
+    // Calculate twiddle factor
+    auto k_i64 =
+        rewriter.create<arith::IndexCastOp>(loc, rewriter.getI64Type(), k);
+    auto k_f64 =
+        rewriter.create<arith::SIToFPOp>(loc, rewriter.getF64Type(), k_i64);
+    auto full_size_i64 = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getI64Type(), full_size);
+    auto full_size_f64 = rewriter.create<arith::SIToFPOp>(
+        loc, rewriter.getF64Type(), full_size_i64);
+    auto angle_div = rewriter.create<arith::DivFOp>(loc, k_f64, full_size_f64);
+    auto angle_mul = rewriter.create<arith::MulFOp>(loc, neg2, angle_div);
+    auto angle_final = rewriter.create<arith::MulFOp>(loc, pi, angle_mul);
+    auto cos = rewriter.create<math::CosOp>(loc, angle_final);
+    auto sin = rewriter.create<math::SinOp>(loc, angle_final);
+
+    // Load odd value
+    auto odd_real = rewriter.create<memref::LoadOp>(loc, alloc_reversed_real,
+                                                    ValueRange{odd_index});
+    auto odd_imag = rewriter.create<memref::LoadOp>(loc, alloc_reversed_imag,
+                                                    ValueRange{odd_index});
+
+    // Multiply by twiddle factor
+    auto odd_real_cos = rewriter.create<arith::MulFOp>(loc, odd_real, cos);
+    auto odd_imag_sin = rewriter.create<arith::MulFOp>(loc, odd_imag, sin);
+    auto t_real =
+        rewriter.create<arith::SubFOp>(loc, odd_real_cos, odd_imag_sin);
+
+    auto odd_real_sin = rewriter.create<arith::MulFOp>(loc, odd_real, sin);
+    auto odd_imag_cos = rewriter.create<arith::MulFOp>(loc, odd_imag, cos);
+    auto t_imag =
+        rewriter.create<arith::AddFOp>(loc, odd_real_sin, odd_imag_cos);
+
+    // Load even value
+    auto even_real = rewriter.create<memref::LoadOp>(loc, alloc_reversed_real,
+                                                     ValueRange{even_index});
+    auto even_imag = rewriter.create<memref::LoadOp>(loc, alloc_reversed_imag,
+                                                     ValueRange{even_index});
+    // Butterfly operation
+    auto new_even_real = rewriter.create<arith::AddFOp>(loc, even_real, t_real);
+    auto new_even_imag = rewriter.create<arith::AddFOp>(loc, even_imag, t_imag);
+    auto new_odd_real = rewriter.create<arith::SubFOp>(loc, even_real, t_real);
+    auto new_odd_imag = rewriter.create<arith::SubFOp>(loc, even_imag, t_imag);
+
+    // Store results
+    rewriter.create<memref::StoreOp>(loc, new_even_real, alloc_reversed_real,
+                                     ValueRange{even_index});
+    rewriter.create<memref::StoreOp>(loc, new_even_imag, alloc_reversed_imag,
+                                     ValueRange{even_index});
+    rewriter.create<memref::StoreOp>(loc, new_odd_real, alloc_reversed_real,
+                                     ValueRange{odd_index});
+    rewriter.create<memref::StoreOp>(loc, new_odd_imag, alloc_reversed_imag,
+                                     ValueRange{odd_index});
+
+    // replace the operation with the final value
+    rewriter.replaceOp(op,
+                       ValueRange{alloc_reversed_real, alloc_reversed_imag});
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: FFTAbsOp operations
+//===----------------------------------------------------------------------===//
+
+struct FFTAbsOpLowering : public ConversionPattern {
+  FFTAbsOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::FFTAbsOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    auto memrefType = convertTensorToMemRef(tensorType);
+
+    auto alloc_temp_real = insertAllocAndDealloc(memrefType, loc, rewriter);
+    auto alloc_temp_imag = insertAllocAndDealloc(memrefType, loc, rewriter);
+
+    FFTAbsOpAdaptor fftAbsOpAdaptor(operands);
+
+    auto input = fftAbsOpAdaptor.getInput();
+    auto lb = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    auto ub =
+        rewriter.create<arith::ConstantIndexOp>(loc, tensorType.getShape()[0]);
+    auto step = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+
+    // alloc memory for reversed and dealloc when not required
+    auto alloc_reversed_real = insertAllocAndDealloc(memrefType, loc, rewriter);
+    auto alloc_reversed_imag = insertAllocAndDealloc(memrefType, loc, rewriter);
+    auto alloc_amplitude = insertAllocAndDealloc(memrefType, loc, rewriter);
+
+    // bits needed for bit  reversal
+    auto ubInt =
+        rewriter.create<arith::IndexCastOp>(loc, rewriter.getI64Type(), ub);
+    auto ubFloat =
+        rewriter.create<arith::SIToFPOp>(loc, rewriter.getF64Type(), ubInt);
+    auto bitsNeededFloat = rewriter.create<math::Log2Op>(loc, ubFloat);
+    auto bitsNeededInt = rewriter.create<arith::FPToSIOp>(
+        loc, rewriter.getI64Type(), bitsNeededFloat);
+    auto bitsNeeded = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getIndexType(), bitsNeededInt);
+
+    // bit reversal
+    auto bitReversalLoop = rewriter.create<scf::ForOp>(loc, lb, ub, step);
+    rewriter.setInsertionPointToStart(bitReversalLoop.getBody());
+    auto i = bitReversalLoop.getInductionVar();
+    auto iInt = rewriter.create<arith::IndexCastOp>(loc, rewriter.getI64Type(),
+                                                    i); // check here
+
+    // Calculate reversed index
+    // auto zero = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    auto initialRevIndex = rewriter.create<arith::ConstantIntOp>(loc, 0, 64);
+
+    auto innerLoop = rewriter.create<scf::ForOp>(loc, lb, bitsNeeded, step,
+                                                 ValueRange{initialRevIndex});
+    rewriter.setInsertionPointToStart(innerLoop.getBody());
+    auto j = innerLoop.getInductionVar();
+    auto jInt =
+        rewriter.create<arith::IndexCastOp>(loc, rewriter.getI64Type(), j);
+    auto carriedRevIndex = innerLoop.getRegionIterArgs()[0];
+
+    auto bitMask = rewriter.create<arith::ShLIOp>(
+        loc, rewriter.create<arith::ConstantIntOp>(loc, 1, 64), jInt);
+    auto iAndMask = rewriter.create<arith::AndIOp>(loc, iInt, bitMask);
+    auto isNonZero = rewriter.create<arith::CmpIOp>(
+        loc, arith::CmpIPredicate::ne, iAndMask,
+        rewriter.create<arith::ConstantIntOp>(loc, 0, 64));
+    auto shiftAmount = rewriter.create<arith::SubIOp>(
+        loc, rewriter.create<arith::SubIOp>(loc, bitsNeeded, j),
+        rewriter.create<arith::ConstantIndexOp>(loc, 1));
+    auto shiftAmountI64 = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getI64Type(), shiftAmount);
+    auto bitToSet = rewriter.create<arith::ShLIOp>(
+        loc, rewriter.create<arith::ConstantIntOp>(loc, 1, 64), shiftAmountI64);
+
+    // Update newRevIndex using a select operation
+    auto updatedRevIndex = rewriter.create<arith::OrIOp>(
+        loc, carriedRevIndex,
+        rewriter.create<arith::SelectOp>(
+            loc, isNonZero, bitToSet,
+            rewriter.create<arith::ConstantIntOp>(loc, 0, 64)));
+
+    // Yield the updated value to carry it forward
+    rewriter.create<scf::YieldOp>(loc, ValueRange{updatedRevIndex});
+
+    // auto revIndex = rewriter.create<arith::IndexCastOp>(loc,
+    // rewriter.getIndexType(), newRevIndex);
+
+    rewriter.setInsertionPointAfter(innerLoop);
+
+    auto finalRevIndex = innerLoop.getResult(0);
+    auto revIndex = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getIndexType(), finalRevIndex);
+
+    // Load from alloc_temp and store in alloc_reversed
+    auto realValue = rewriter.create<memref::LoadOp>(loc, input, ValueRange{i});
+    auto imagValue = rewriter.create<arith::ConstantFloatOp>(
+        loc, llvm::APFloat(0.0), rewriter.getF64Type());
+    rewriter.create<memref::StoreOp>(loc, realValue, alloc_reversed_real,
+                                     ValueRange{revIndex});
+    rewriter.create<memref::StoreOp>(loc, imagValue, alloc_reversed_imag,
+                                     ValueRange{revIndex});
+
+    rewriter.setInsertionPointAfter(bitReversalLoop);
+
+    // Cooley-Tukey FFT implementation
+    auto N = tensorType.getShape()[0];
+    auto stages = static_cast<int64_t>(std::log2(N));
+    auto stagesValue = rewriter.create<arith::ConstantIndexOp>(loc, stages);
+
+    // Constants for complex arithmetic
+    auto pi = rewriter.create<arith::ConstantFloatOp>(loc, llvm::APFloat(M_PI),
+                                                      rewriter.getF64Type());
+    auto neg2 = rewriter.create<arith::ConstantFloatOp>(
+        loc, llvm::APFloat(-2.0), rewriter.getF64Type());
+
+    auto fftLoop = rewriter.create<scf::ForOp>(loc, lb, stagesValue, step);
+    rewriter.setInsertionPointToStart(fftLoop.getBody());
+    auto stage = fftLoop.getInductionVar();
+    auto half_size = rewriter.create<arith::ShLIOp>(
+        loc, rewriter.create<arith::ConstantIndexOp>(loc, 1), stage);
+    auto full_size = rewriter.create<arith::ShLIOp>(
+        loc, half_size, rewriter.create<arith::ConstantIndexOp>(loc, 1));
+
+    auto outerLoop = rewriter.create<scf::ForOp>(loc, lb, ub, full_size);
+    rewriter.setInsertionPointToStart(outerLoop.getBody());
+    auto start = outerLoop.getInductionVar();
+
+    auto butterflyLoop = rewriter.create<scf::ForOp>(loc, lb, half_size, step);
+    rewriter.setInsertionPointToStart(butterflyLoop.getBody());
+    auto k = butterflyLoop.getInductionVar();
+
+    // Calculate indices for even and odd elements
+    auto even_index = rewriter.create<arith::AddIOp>(loc, start, k);
+    auto odd_index = rewriter.create<arith::AddIOp>(loc, even_index, half_size);
+
+    // Calculate twiddle factor
+    auto k_i64 =
+        rewriter.create<arith::IndexCastOp>(loc, rewriter.getI64Type(), k);
+    auto k_f64 =
+        rewriter.create<arith::SIToFPOp>(loc, rewriter.getF64Type(), k_i64);
+    auto full_size_i64 = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getI64Type(), full_size);
+    auto full_size_f64 = rewriter.create<arith::SIToFPOp>(
+        loc, rewriter.getF64Type(), full_size_i64);
+    auto angle_div = rewriter.create<arith::DivFOp>(loc, k_f64, full_size_f64);
+    auto angle_mul = rewriter.create<arith::MulFOp>(loc, neg2, angle_div);
+    auto angle_final = rewriter.create<arith::MulFOp>(loc, pi, angle_mul);
+    auto cos = rewriter.create<math::CosOp>(loc, angle_final);
+    auto sin = rewriter.create<math::SinOp>(loc, angle_final);
+
+    // Load odd value
+    auto odd_real = rewriter.create<memref::LoadOp>(loc, alloc_reversed_real,
+                                                    ValueRange{odd_index});
+    auto odd_imag = rewriter.create<memref::LoadOp>(loc, alloc_reversed_imag,
+                                                    ValueRange{odd_index});
+
+    // Multiply by twiddle factor
+    auto odd_real_cos = rewriter.create<arith::MulFOp>(loc, odd_real, cos);
+    auto odd_imag_sin = rewriter.create<arith::MulFOp>(loc, odd_imag, sin);
+    auto t_real =
+        rewriter.create<arith::SubFOp>(loc, odd_real_cos, odd_imag_sin);
+
+    auto odd_real_sin = rewriter.create<arith::MulFOp>(loc, odd_real, sin);
+    auto odd_imag_cos = rewriter.create<arith::MulFOp>(loc, odd_imag, cos);
+    auto t_imag =
+        rewriter.create<arith::AddFOp>(loc, odd_real_sin, odd_imag_cos);
+
+    // Load even value
+    auto even_real = rewriter.create<memref::LoadOp>(loc, alloc_reversed_real,
+                                                     ValueRange{even_index});
+    auto even_imag = rewriter.create<memref::LoadOp>(loc, alloc_reversed_imag,
+                                                     ValueRange{even_index});
+    // Butterfly operation
+    auto new_even_real = rewriter.create<arith::AddFOp>(loc, even_real, t_real);
+    auto new_even_imag = rewriter.create<arith::AddFOp>(loc, even_imag, t_imag);
+    auto new_odd_real = rewriter.create<arith::SubFOp>(loc, even_real, t_real);
+    auto new_odd_imag = rewriter.create<arith::SubFOp>(loc, even_imag, t_imag);
+
+    // Calculate amplitude for even index
+    auto new_even_real_squared =
+        rewriter.create<arith::MulFOp>(loc, new_even_real, new_even_real);
+    auto new_even_imag_squared =
+        rewriter.create<arith::MulFOp>(loc, new_even_imag, new_even_imag);
+    auto sum_even = rewriter.create<arith::AddFOp>(loc, new_even_real_squared,
+                                                   new_even_imag_squared);
+    auto sqrt_even = rewriter.create<math::SqrtOp>(loc, sum_even);
+
+    // Calculate amplitude for odd index
+    auto new_odd_real_squared =
+        rewriter.create<arith::MulFOp>(loc, new_odd_real, new_odd_real);
+    auto new_odd_imag_squared =
+        rewriter.create<arith::MulFOp>(loc, new_odd_imag, new_odd_imag);
+    auto sum_odd = rewriter.create<arith::AddFOp>(loc, new_odd_real_squared,
+                                                  new_odd_imag_squared);
+    auto sqrt_odd = rewriter.create<math::SqrtOp>(loc, sum_odd);
+
+    // Store results
+    rewriter.create<memref::StoreOp>(loc, new_even_real, alloc_reversed_real,
+                                     ValueRange{even_index});
+    rewriter.create<memref::StoreOp>(loc, new_even_imag, alloc_reversed_imag,
+                                     ValueRange{even_index});
+    rewriter.create<memref::StoreOp>(loc, new_odd_real, alloc_reversed_real,
+                                     ValueRange{odd_index});
+    rewriter.create<memref::StoreOp>(loc, new_odd_imag, alloc_reversed_imag,
+                                     ValueRange{odd_index});
+    rewriter.create<memref::StoreOp>(loc, sqrt_even, alloc_amplitude,
+                                     ValueRange{even_index});
+    rewriter.create<memref::StoreOp>(loc, sqrt_odd, alloc_amplitude,
+                                     ValueRange{odd_index});
+
+    // replace the operation with the final value
+    rewriter.replaceOp(op, alloc_amplitude);
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: DFTAbsOp operations
+//===----------------------------------------------------------------------===//
+
+struct DFTAbsOpLowering : public ConversionPattern {
+  DFTAbsOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::DFTAbsOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+
+    auto loc = op->getLoc();
+
+    // Pseudo-code:
+    //   y[k] = y_real[k] + j *y_img[k]
+    //  y_real = sumOver_n(x[n]*cos[2*pi * k *n/N ]
+    //  y_img = sumOver_n(x[n]*sin[2*pi * k *n/N ] * -1
+    // init  output mem for y_real & y_img as 0
+    // iterate for output from k=0 to last
+    // iterate for all x from n=0 to last
+    // perform the calculations : ie x[n] * cos[2*pi * k *n/N ] and sum and
+    // store them at y[k]
+    //
+    // replace this upsampling op with the output_mem_allocation op
+
+    // DEBUG_PRINT_NO_ARGS() ;
+
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    // iterate to result1 --not needed for now but for future reference
+    //  auto tensorType1 =
+    //  llvm::cast<RankedTensorType>(*std::next(op->result_type_begin(), 1));
+
+    // DEBUG_PRINT_NO_ARGS() ;
+    // tensorType.getShape()[0]
+    // llvm::errs() << "tensorType1.getShape()[0] " << tensorType1.getShape()[0]
+    // << " func= " << __func__ << "\n";
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    // auto memRefType2 = convertTensorToMemRef(tensorType1);
+    auto alloc_real = insertAllocAndDealloc(memRefType, loc, rewriter);
+    auto alloc_img = insertAllocAndDealloc(memRefType, loc, rewriter);
+    auto alloc_mag = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
+
+    // affine.for %y = 0 to 4 {
+    //     affine.store %cst_3, %alloc_real[%y] : memref<4xf64>
+    //     affine.store %cst_3, %alloc_img[%y] : memref<4xf64>
+    // }
+    Value constant0 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+
+    // For loop -- iterate from 1 to last
+    int64_t lb = 0;
+    int64_t ub = tensorType.getShape()[0];
+    int64_t step = 1;
+
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto iv = forOp1.getInductionVar();
+    rewriter.setInsertionPointToStart(forOp1.getBody());
+    rewriter.create<AffineStoreOp>(loc, constant0, alloc_real, ValueRange{iv});
+    rewriter.create<AffineStoreOp>(loc, constant0, alloc_img, ValueRange{iv});
+    rewriter.create<AffineStoreOp>(loc, constant0, alloc_mag, ValueRange{iv});
+    rewriter.setInsertionPointAfter(forOp1);
+
+    // loop for Y
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto ivY = forOpY.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpY.getBody());
+
+    // loop for X
+    affine::AffineForOp forOpX =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto ivX = forOpX.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpX.getBody());
+
+    // load from X, & y1 & y2
+    DFTAbsOpAdaptor fft1DAdaptor(operands);
+    Value inputX = rewriter.create<AffineLoadOp>(loc, fft1DAdaptor.getInput(),
+                                                 ValueRange{ivX});
+    Value loadYReal =
+        rewriter.create<AffineLoadOp>(loc, alloc_real, ValueRange{ivY});
+    Value loadYImg =
+        rewriter.create<AffineLoadOp>(loc, alloc_img, ValueRange{ivY});
+
+    // convert index to f64
+    Value IndxY = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivY);
+    Value k =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxY);
+
+    Value IndxX = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivX);
+    Value i =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxX);
+
+    // get 2*pi * k * i / N
+    Value muli_k = rewriter.create<arith::MulFOp>(loc, k, i);
+
+    Value const2pi = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(6.28318530718));
+    Value mul2piKI = rewriter.create<arith::MulFOp>(loc, const2pi, muli_k);
+
+    // getOperand().getType()
+    // auto inputTensorType =
+    // llvm::cast<RankedTensorType>(op->getOperand(0).getType());
+    float LengthOfInput = (float)ub;
+    Value N = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(LengthOfInput));
+    // Value N = inputTensorType.getShape()[0];
+
+    Value divIndxByN = rewriter.create<arith::DivFOp>(loc, mul2piKI, N);
+
+    // Real part = Sum(x[i] * cos(div) )
+    Value GetCos = rewriter.create<math::CosOp>(loc, divIndxByN);
+    Value xMulCos = rewriter.create<arith::MulFOp>(loc, inputX, GetCos);
+    Value realSum = rewriter.create<arith::AddFOp>(loc, loadYReal, xMulCos);
+    rewriter.create<AffineStoreOp>(loc, realSum, alloc_real, ValueRange{ivY});
+
+    // Img part = -1 * Sum(x[i] * sin(div) )
+    Value GetSin = rewriter.create<math::SinOp>(loc, divIndxByN);
+    Value xMulSin = rewriter.create<arith::MulFOp>(loc, inputX, GetSin);
+    Value imgSum = rewriter.create<arith::SubFOp>(loc, loadYImg, xMulSin);
+
+    rewriter.create<AffineStoreOp>(loc, imgSum, alloc_img, ValueRange{ivY});
+    rewriter.setInsertionPointAfter(forOpX);
+    Value final_real =
+        rewriter.create<AffineLoadOp>(loc, alloc_real, ValueRange{ivY});
+    Value final_img =
+        rewriter.create<AffineLoadOp>(loc, alloc_img, ValueRange{ivY});
+
+    // Calculate amplitude
+    auto real_squared =
+        rewriter.create<arith::MulFOp>(loc, final_real, final_real);
+    auto img_squared =
+        rewriter.create<arith::MulFOp>(loc, final_img, final_img);
+    auto sum_odd =
+        rewriter.create<arith::AddFOp>(loc, real_squared, img_squared);
+    auto amplitude = rewriter.create<math::SqrtOp>(loc, sum_odd);
+
+    // replace the operation with the final value
+    rewriter.create<AffineStoreOp>(loc, amplitude, alloc_mag, ValueRange{ivY});
+    rewriter.setInsertionPointAfter(forOpY);
+    rewriter.replaceOp(op, alloc_mag);
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: DFTAbsThresholdUpOp operations
+//===----------------------------------------------------------------------===//
+
+struct DFTAbsThresholdUpOpLowering : public ConversionPattern {
+  DFTAbsThresholdUpOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::DFTAbsThresholdUpOp::getOperationName(), 1,
+                          ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+
+    auto loc = op->getLoc();
+    // output for result type
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    // auto memRefType2 = convertTensorToMemRef(tensorType1);
+    auto alloc_real = insertAllocAndDealloc(memRefType, loc, rewriter);
+    auto alloc_img = insertAllocAndDealloc(memRefType, loc, rewriter);
+    auto alloc_mag = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
+
+    Value constant0 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+
+    // For loop -- iterate from 1 to last
+    int64_t lb = 0;
+    int64_t ub = tensorType.getShape()[0];
+    int64_t step = 1;
+
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto iv = forOp1.getInductionVar();
+    rewriter.setInsertionPointToStart(forOp1.getBody());
+    rewriter.create<AffineStoreOp>(loc, constant0, alloc_real, ValueRange{iv});
+    rewriter.create<AffineStoreOp>(loc, constant0, alloc_img, ValueRange{iv});
+    rewriter.create<AffineStoreOp>(loc, constant0, alloc_mag, ValueRange{iv});
+    rewriter.setInsertionPointAfter(forOp1);
+
+    // loop for Y
+    affine::AffineForOp forOpY =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto ivY = forOpY.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpY.getBody());
+
+    // loop for X
+    affine::AffineForOp forOpX =
+        rewriter.create<AffineForOp>(loc, lb, ub, step);
+    auto ivX = forOpX.getInductionVar();
+    rewriter.setInsertionPointToStart(forOpX.getBody());
+
+    // load from X, & y1 & y2
+    DFTAbsThresholdUpOpAdaptor dftAbsThresholdUpOp(operands);
+    Value inputX = rewriter.create<AffineLoadOp>(
+        loc, dftAbsThresholdUpOp.getInput(), ValueRange{ivX});
+    Value loadYReal =
+        rewriter.create<AffineLoadOp>(loc, alloc_real, ValueRange{ivY});
+    Value loadYImg =
+        rewriter.create<AffineLoadOp>(loc, alloc_img, ValueRange{ivY});
+
+    // convert index to f64
+    Value IndxY = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivY);
+    Value k =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxY);
+
+    Value IndxX = rewriter.create<arith::IndexCastUIOp>(
+        loc, rewriter.getIntegerType(32), ivX);
+    Value i =
+        rewriter.create<arith::UIToFPOp>(loc, rewriter.getF64Type(), IndxX);
+
+    // get 2*pi * k * i / N
+    Value muli_k = rewriter.create<arith::MulFOp>(loc, k, i);
+
+    Value const2pi = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(6.28318530718));
+    Value mul2piKI = rewriter.create<arith::MulFOp>(loc, const2pi, muli_k);
+
+    // getOperand().getType()
+    // auto inputTensorType =
+    // llvm::cast<RankedTensorType>(op->getOperand(0).getType());
+    float LengthOfInput = (float)ub;
+    Value N = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(LengthOfInput));
+    // Value N = inputTensorType.getShape()[0];
+
+    Value divIndxByN = rewriter.create<arith::DivFOp>(loc, mul2piKI, N);
+
+    // Real part = Sum(x[i] * cos(div) )
+    Value GetCos = rewriter.create<math::CosOp>(loc, divIndxByN);
+    Value xMulCos = rewriter.create<arith::MulFOp>(loc, inputX, GetCos);
+    Value realSum = rewriter.create<arith::AddFOp>(loc, loadYReal, xMulCos);
+    rewriter.create<AffineStoreOp>(loc, realSum, alloc_real, ValueRange{ivY});
+
+    // Img part = -1 * Sum(x[i] * sin(div) )
+    Value GetSin = rewriter.create<math::SinOp>(loc, divIndxByN);
+    Value xMulSin = rewriter.create<arith::MulFOp>(loc, inputX, GetSin);
+    Value imgSum = rewriter.create<arith::SubFOp>(loc, loadYImg, xMulSin);
+
+    rewriter.create<AffineStoreOp>(loc, imgSum, alloc_img, ValueRange{ivY});
+    rewriter.setInsertionPointAfter(forOpX);
+    Value final_real =
+        rewriter.create<AffineLoadOp>(loc, alloc_real, ValueRange{ivY});
+    Value final_img =
+        rewriter.create<AffineLoadOp>(loc, alloc_img, ValueRange{ivY});
+
+    // Calculate amplitude
+    auto real_squared =
+        rewriter.create<arith::MulFOp>(loc, final_real, final_real);
+    auto img_squared =
+        rewriter.create<arith::MulFOp>(loc, final_img, final_img);
+    auto sum_odd =
+        rewriter.create<arith::AddFOp>(loc, real_squared, img_squared);
+    auto amplitude = rewriter.create<math::SqrtOp>(loc, sum_odd);
+
+    auto thresholdMemRef = dftAbsThresholdUpOp.getThreshold();
+    auto returnOriginalMemRef = dftAbsThresholdUpOp.getReturnoriginal();
+
+    auto threshold =
+        rewriter.create<AffineLoadOp>(loc, thresholdMemRef, ValueRange{});
+    auto returnOriginal =
+        rewriter.create<AffineLoadOp>(loc, returnOriginalMemRef, ValueRange{});
+    Value constant00 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+
+    Value constant11 = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(1));
+    // Compare a[i] >= threshold
+    auto cmp1 = rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OGE,
+                                               amplitude, threshold);
+    // Compare if return original is true or false and return 1 or original
+    // value
+    auto cmpro = rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OEQ,
+                                                constant11, returnOriginal);
+
+    // Use select to choose between inputX and 1
+    auto selectreturn =
+        rewriter.create<arith::SelectOp>(loc, cmpro, amplitude, constant11);
+
+    // Use select to choose between 0 and selectreturn
+    auto selectOp =
+        rewriter.create<arith::SelectOp>(loc, cmp1, selectreturn, constant00);
+
+    // replace the operation with the final value
+    rewriter.create<AffineStoreOp>(loc, selectOp, alloc_mag, ValueRange{ivY});
+    rewriter.setInsertionPointAfter(forOpY);
+    rewriter.replaceOp(op, alloc_mag);
+    return success();
+  }
+};
+
+
+struct CorrelateOpLowering : public ConversionPattern {
+  CorrelateOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::CorrelateOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+
+    auto loc = op->getLoc();
+
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc_output = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    typename dsp::CorrelateOp::Adaptor correlateOpAdaptor(operands);
+
+    Value cst_idx_zero = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    Value cst_idx_one = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+
+    // ranked tensor type
+    auto inputType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(0).getType());
+
+    ArrayRef<int64_t> inputShape = inputType.getShape();
+
+    int64_t N = inputShape[0];
+	
+	// First outer loop for k in range (0, N)
+    auto lb1 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+	auto ub1 = rewriter.create<arith::ConstantIndexOp>(loc, N);
+    auto step = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+	
+    Value constant_N_minus_one = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getIndexType(), rewriter.getIndexAttr(N-1));
+	
+    auto floatMemRefType = MemRefType::get({}, rewriter.getF64Type());
+    auto alloc_iter_sum =
+        insertAllocAndDealloc(floatMemRefType, loc, rewriter);
+		
+    Value constant_zero = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+	
+    auto forOp1 = rewriter.create<scf::ForOp>(loc, lb1, ub1, step);	
+    auto k1 = forOp1.getInductionVar();
+    rewriter.setInsertionPointToStart(forOp1.getBody());
+	
+	rewriter.create<memref::StoreOp>(loc, constant_zero, alloc_iter_sum, ValueRange{});
+	
+    Value lb1_inner = rewriter.create<arith::SubIOp>(loc, constant_N_minus_one, k1);
+        
+	auto forOp1_1 = rewriter.create<scf::ForOp>(loc, lb1_inner, ub1, step);	
+    auto iy1 = forOp1_1.getInductionVar();
+    rewriter.setInsertionPointToStart(forOp1_1.getBody());
+	
+	Value ix1 = rewriter.create<arith::SubIOp>(loc, iy1, lb1_inner);
+	Value loadedLhs = rewriter.create<memref::LoadOp>(loc,
+							correlateOpAdaptor.getLhs(), ValueRange{ix1});
+	Value loadedRhs = rewriter.create<memref::LoadOp>(loc,
+							correlateOpAdaptor.getRhs(), ValueRange{iy1});
+	Value mul1 = rewriter.create<arith::MulFOp>(loc, loadedLhs, loadedRhs);
+	
+	Value loaded_sum1 = rewriter.create<memref::LoadOp>(loc,
+							alloc_iter_sum, ValueRange{});
+							
+	Value inter_sum1 = rewriter.create<arith::AddFOp>(loc, loaded_sum1, mul1);
+	
+	rewriter.create<memref::StoreOp>(loc, inter_sum1, alloc_iter_sum, ValueRange{});
+
+	rewriter.setInsertionPointAfter(forOp1_1);
+	
+	auto loaded_sum1_outer = rewriter.create<memref::LoadOp>(loc,
+							alloc_iter_sum, ValueRange{});
+	rewriter.create<memref::StoreOp>(loc, loaded_sum1_outer, alloc_output, ValueRange{k1});							
+	
+	rewriter.setInsertionPointAfter(forOp1);
+
+	// Second outer loop for k in range (N, 2*N-1)
+	auto ub2 = rewriter.create<arith::ConstantIndexOp>(loc, 2*N-1);
+
+    //lb2 = ub1	
+    auto forOp2 = rewriter.create<scf::ForOp>(loc, ub1, ub2, step);	
+    auto k2 = forOp2.getInductionVar();
+    rewriter.setInsertionPointToStart(forOp2.getBody());
+	
+	rewriter.create<memref::StoreOp>(loc, constant_zero, alloc_iter_sum, ValueRange{});
+	
+    Value lb2_inner = rewriter.create<arith::SubIOp>(loc, k2, constant_N_minus_one);
+        
+	//NOTE: ub = ub1 (N)
+	auto forOp2_1 = rewriter.create<scf::ForOp>(loc, lb2_inner, ub1, step);	
+    auto ix2 = forOp2_1.getInductionVar();
+    rewriter.setInsertionPointToStart(forOp2_1.getBody());
+	
+	Value iy2 = rewriter.create<arith::SubIOp>(loc, ix2, lb2_inner);
+	Value loadedLhs2 = rewriter.create<memref::LoadOp>(loc,
+							correlateOpAdaptor.getLhs(), ValueRange{ix2});
+	Value loadedRhs2 = rewriter.create<memref::LoadOp>(loc,
+							correlateOpAdaptor.getRhs(), ValueRange{iy2});
+	Value mul2 = rewriter.create<arith::MulFOp>(loc, loadedLhs2, loadedRhs2);
+	
+	Value loaded_sum2 = rewriter.create<memref::LoadOp>(loc,
+							alloc_iter_sum, ValueRange{});
+							
+	Value inter_sum2 = rewriter.create<arith::AddFOp>(loc, loaded_sum2, mul2);
+	
+	rewriter.create<memref::StoreOp>(loc, inter_sum2, alloc_iter_sum, ValueRange{});
+
+	rewriter.setInsertionPointAfter(forOp2_1);
+	
+	auto loaded_sum2_outer = rewriter.create<memref::LoadOp>(loc,
+							alloc_iter_sum, ValueRange{});
+	rewriter.create<memref::StoreOp>(loc, loaded_sum2_outer, alloc_output, ValueRange{k2});
+	
+	rewriter.setInsertionPointAfter(forOp2);
+
+
+    rewriter.replaceOp(op, alloc_output);
+
+    return success();
+  }
+};
+
+
+struct SetSingleElemAtIdxOpLowering : public ConversionPattern {
+  SetSingleElemAtIdxOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::SetSingleElemAtIdxOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // output for result type
+    SetSingleElemAtIdxOpAdaptor setSingleElemAtIdxAdaptor(operands);
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+
+    auto indxArgType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(1).getType());
+
+    int indxArgShape = indxArgType.getShape().size();
+
+    ValueRange indexValueRange;
+
+    Value cst_idx_zero = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+
+    if (indxArgShape == 0)
+      indexValueRange = ValueRange{};
+    else 
+      indexValueRange = ValueRange{cst_idx_zero};
+
+    Value loadedIndx = rewriter.create<AffineLoadOp>(
+        loc, setSingleElemAtIdxAdaptor.getIndx(), indexValueRange);
+		
+    // f64 to index
+    Value indx_ui = rewriter.create<arith::FPToUIOp>(
+        loc, rewriter.getIntegerType(32), loadedIndx);
+    Value indx_index = rewriter.create<arith::IndexCastOp>(
+        loc, rewriter.getIndexType(), indx_ui);
+
+    ValueRange valValueRange;
+
+    if (indxArgShape == 0)
+      valValueRange = ValueRange{};
+    else
+      valValueRange = ValueRange{cst_idx_zero};
+
+    Value loadedVal = rewriter.create<AffineLoadOp>(
+        loc, setSingleElemAtIdxAdaptor.getVal(), valValueRange);
+
+    rewriter.create<AffineStoreOp>(loc, loadedVal,
+                                   setSingleElemAtIdxAdaptor.getInput(),
+                                   ValueRange{indx_index});
+
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+
+
+struct Correl2MaxOptimizedOpLowering : public ConversionPattern {
+  Correl2MaxOptimizedOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::Correl2MaxOptimizedOp::getOperationName(), 1, ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+
+    auto loc = op->getLoc();
+
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc_output = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    typename dsp::Correl2MaxOptimizedOp::Adaptor correl2MaxOpAdaptor(operands);
+
+    Value cst_idx_zero = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    Value cst_idx_one = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+
+    // ranked tensor type
+    auto inputType =
+        llvm::dyn_cast<RankedTensorType>(op->getOperand(0).getType());
+
+    ArrayRef<int64_t> inputShape = inputType.getShape();
+
+    int64_t N = inputShape[0];
+	
+	// First outer loop for k in range (0, N)
+    auto lb1 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+	auto ub1 = rewriter.create<arith::ConstantIndexOp>(loc, N);
+    auto step = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+	
+    Value constant_N_minus_one = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getIndexType(), rewriter.getIndexAttr(N-1));
+	
+    auto floatMemRefType = MemRefType::get({}, rewriter.getF64Type());
+    auto alloc_iter_sum =
+        insertAllocAndDealloc(floatMemRefType, loc, rewriter);
+		
+    Value constant_zero = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+		
+	rewriter.create<memref::StoreOp>(loc, constant_zero, alloc_output, ValueRange{});							
+	
+    auto forOp1 = rewriter.create<scf::ForOp>(loc, lb1, ub1, step);	
+    auto k1 = forOp1.getInductionVar();
+    rewriter.setInsertionPointToStart(forOp1.getBody());
+	
+	rewriter.create<memref::StoreOp>(loc, constant_zero, alloc_iter_sum, ValueRange{});
+	
+    Value lb1_inner = rewriter.create<arith::SubIOp>(loc, constant_N_minus_one, k1);
+        
+	auto forOp1_1 = rewriter.create<scf::ForOp>(loc, lb1_inner, ub1, step);	
+    auto iy1 = forOp1_1.getInductionVar();
+    rewriter.setInsertionPointToStart(forOp1_1.getBody());
+	
+	Value ix1 = rewriter.create<arith::SubIOp>(loc, iy1, lb1_inner);
+	Value loadedLhs = rewriter.create<memref::LoadOp>(loc,
+							correl2MaxOpAdaptor.getLhs(), ValueRange{ix1});
+	Value loadedRhs = rewriter.create<memref::LoadOp>(loc,
+							correl2MaxOpAdaptor.getRhs(), ValueRange{iy1});
+	Value mul1 = rewriter.create<arith::MulFOp>(loc, loadedLhs, loadedRhs);
+	
+	Value loaded_sum1 = rewriter.create<memref::LoadOp>(loc,
+							alloc_iter_sum, ValueRange{});
+							
+	Value inter_sum1 = rewriter.create<arith::AddFOp>(loc, loaded_sum1, mul1);
+	
+	rewriter.create<memref::StoreOp>(loc, inter_sum1, alloc_iter_sum, ValueRange{});
+
+	rewriter.setInsertionPointAfter(forOp1_1);
+	
+	auto loaded_sum1_outer = rewriter.create<memref::LoadOp>(loc,
+							alloc_iter_sum, ValueRange{});
+	auto loaded_output1 = rewriter.create<memref::LoadOp>(loc,
+							alloc_output, ValueRange{});
+
+	// If this is larger than current max, we need to change max
+    auto compare_sum1_output1 = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OGT, loaded_sum1_outer, loaded_output1);
+
+    auto ifOp1 = rewriter.create<scf::IfOp>(loc, compare_sum1_output1, false);
+
+    rewriter.setInsertionPointToStart(ifOp1.thenBlock());
+	
+	rewriter.create<memref::StoreOp>(loc, loaded_sum1_outer, alloc_output, ValueRange{});
+	
+	rewriter.setInsertionPointAfter(forOp1);
+
+	// Second outer loop for k in range (N, 2*N-1)
+	auto ub2 = rewriter.create<arith::ConstantIndexOp>(loc, 2*N-1);
+
+    //lb2 = ub1	
+    auto forOp2 = rewriter.create<scf::ForOp>(loc, ub1, ub2, step);	
+    auto k2 = forOp2.getInductionVar();
+    rewriter.setInsertionPointToStart(forOp2.getBody());
+	
+	rewriter.create<memref::StoreOp>(loc, constant_zero, alloc_iter_sum, ValueRange{});
+	
+    Value lb2_inner = rewriter.create<arith::SubIOp>(loc, k2, constant_N_minus_one);
+        
+	//NOTE: ub = ub1 (N)
+	auto forOp2_1 = rewriter.create<scf::ForOp>(loc, lb2_inner, ub1, step);	
+    auto ix2 = forOp2_1.getInductionVar();
+    rewriter.setInsertionPointToStart(forOp2_1.getBody());
+	
+	Value iy2 = rewriter.create<arith::SubIOp>(loc, ix2, lb2_inner);
+	Value loadedLhs2 = rewriter.create<memref::LoadOp>(loc,
+							correl2MaxOpAdaptor.getLhs(), ValueRange{ix2});
+	Value loadedRhs2 = rewriter.create<memref::LoadOp>(loc,
+							correl2MaxOpAdaptor.getRhs(), ValueRange{iy2});
+	Value mul2 = rewriter.create<arith::MulFOp>(loc, loadedLhs2, loadedRhs2);
+	
+	Value loaded_sum2 = rewriter.create<memref::LoadOp>(loc,
+							alloc_iter_sum, ValueRange{});
+							
+	Value inter_sum2 = rewriter.create<arith::AddFOp>(loc, loaded_sum2, mul2);
+	
+	rewriter.create<memref::StoreOp>(loc, inter_sum2, alloc_iter_sum, ValueRange{});
+
+	rewriter.setInsertionPointAfter(forOp2_1);
+	
+	auto loaded_sum2_outer = rewriter.create<memref::LoadOp>(loc,
+							alloc_iter_sum, ValueRange{});
+	auto loaded_output2 = rewriter.create<memref::LoadOp>(loc,
+							alloc_output, ValueRange{});
+
+	// If this is larger than current max, we need to change max
+    auto compare_sum2_output2 = rewriter.create<arith::CmpFOp>(
+        loc, arith::CmpFPredicate::OGT, loaded_sum2_outer, loaded_output2);
+
+    auto ifOp2 = rewriter.create<scf::IfOp>(loc, compare_sum2_output2, false);
+
+    rewriter.setInsertionPointToStart(ifOp2.thenBlock());
+	
+	rewriter.create<memref::StoreOp>(loc, loaded_sum2_outer, alloc_output, ValueRange{});							
+
+	
+	rewriter.setInsertionPointAfter(forOp2);
+
+
+    rewriter.replaceOp(op, alloc_output);
+
+    return success();
+  }
+};
+
+
+
+
+//===----------------------------------------------------------------------===//
+// ToyToAffine RewritePatterns: lmsFilterResponse operations
+//===----------------------------------------------------------------------===//
+
+struct LMSFilterResponse2GainOpLowering : public ConversionPattern {
+  LMSFilterResponse2GainOpLowering(MLIRContext *ctx)
+      : ConversionPattern(dsp::LMSFilterResponse2GainOp::getOperationName(), 1,
+                          ctx) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+
+    // Pseudo-code:
+    //  for (int n = 0; n < NUM_SAMPLES; n++) {
+    //		// we also need to initialize w
+    //		// w[n] = 0;
+    //      // Calculate the filter output y[n]
+    //      y[n] = 0;
+    //      for (int i = 0; i < FILTER_LENGTH; i++) {
+    //          if (n - i >= 0) { // affine if
+    //              y[n] = y[n] + (w[i] * x[n - i]);
+    //          }
+    //      }
+    //     // Calculate the error e[n]
+    //     e[n] = d[n] - y[n];
+	//     y[n] = y[n] * gain;
+    //     // Update the filter weights w[i]
+    //     for (int i = 0; i < FILTER_LENGTH; i++) {
+    //         if (n - i >= 0) {
+    //             w[i] +=  MU * e[n] * x[n - i];
+    //         }
+    //     }
+    // }
+
+    auto tensorType = llvm::cast<RankedTensorType>((*op->result_type_begin()));
+
+    // allocation & deallocation for the result of this operation
+    auto memRefType = convertTensorToMemRef(tensorType);
+    auto alloc = insertAllocAndDealloc(memRefType, loc, rewriter);
+
+    // construct affine loops for the input
+    SmallVector<int64_t, 4> lowerBounds(tensorType.getRank(), /*Value*/ 0);
+    SmallVector<int64_t, 4> steps(tensorType.getRank(), /*Value=*/1);
+
+    LMSFilterResponse2GainOpAdaptor lmsFilterResponse2GainAdaptor(operands);
+    // Value alpha = rewriter.create<arith::ConstantOp>(loc,
+    // rewriter.getF64Type(),
+    //                                                      rewriter.getF64FloatAttr(1));
+    Value zeroval = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+    Value mu = rewriter.create<AffineLoadOp>(loc, lmsFilterResponse2GainAdaptor.getMu());
+
+	// Before for loop, load the gain value
+    Value gain = rewriter.create<AffineLoadOp>(loc, lmsFilterResponse2GainAdaptor.getGain());
+
+    // For loop -- iterate from 0 to last
+    int64_t lb = 0;
+    int64_t numSamples = tensorType.getShape()[0];
+    int64_t step = 1;
+
+    Value GetFilterLOp = op->getOperand(3);
+    dsp::ConstantOp constantOp3rdArg =
+        GetFilterLOp.getDefiningOp<dsp::ConstantOp>();
+    DenseElementsAttr constant3rdValue = constantOp3rdArg.getValue();
+    ;
+    auto elements1 = constant3rdValue.getValues<FloatAttr>();
+    float filterlenval = elements1[0].getValueAsDouble();
+    auto FilterLength = (uint64_t)filterlenval;
+
+    auto yMemRefType = MemRefType::get({numSamples}, rewriter.getF64Type());
+    auto wAlloc = rewriter.create<memref::AllocOp>(loc, yMemRefType);
+
+    affine::AffineForOp forOp1 =
+        rewriter.create<AffineForOp>(loc, lb, numSamples, step);
+    auto iv = forOp1.getInductionVar();
+
+    rewriter.setInsertionPointToStart(forOp1.getBody());
+
+    // For affine expression: #map1 = affine_map<(%arg0)[] : (%arg0 - 1)
+    AffineExpr d0, d1, s0;
+    bindDims(rewriter.getContext(), d0, d1);
+    // AffineExpr ExprForXSlice = rewriter.getAffineDimExpr(0) -
+    // rewriter.getAffineDimExpr(1); //d0 - d1;
+    AffineExpr ExprForXSlice = d0 - d1;
+    AffineMap addMapForLMSFilter = AffineMap::get(2, 0, ExprForXSlice);
+    IntegerSet set1 = IntegerSet::get(2, 0, {ExprForXSlice}, {false});
+
+    // w[n] = 0;
+    // y[n] = 0;
+    // rewriter.create<AffineStoreOp>(loc, zeroval, alloc, ValueRange{iv});
+    // Allocate and initialize array for y
+    // Value constantIndx0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+
+    rewriter.create<AffineStoreOp>(loc, zeroval, wAlloc, ValueRange{iv});
+    rewriter.create<AffineStoreOp>(loc, zeroval, alloc, ValueRange{iv});
+
+    affine::AffineForOp forOp2 =
+        rewriter.create<AffineForOp>(loc, lb, FilterLength, step);
+    auto iv2 = forOp2.getInductionVar();
+
+    rewriter.setInsertionPointToStart(forOp2.getBody());
+
+    auto ifOp = rewriter.create<affine::AffineIfOp>(
+        loc, set1, ValueRange{iv, iv2}, false /*no else*/);
+    rewriter.setInsertionPointToStart(ifOp.getThenBlock());
+
+    Value inputX =
+        rewriter.create<AffineLoadOp>(loc, lmsFilterResponse2GainAdaptor.getLhs(),
+                                      addMapForLMSFilter, ValueRange{iv, iv2});
+    Value w = rewriter.create<AffineLoadOp>(loc, wAlloc,
+                                            ValueRange{iv2}); // memRefType
+
+    Value wmulx = rewriter.create<arith::MulFOp>(loc, inputX, w);
+    Value ybefore = rewriter.create<AffineLoadOp>(loc, alloc, ValueRange{iv});
+    Value sumNext = rewriter.create<arith::AddFOp>(loc, wmulx, ybefore);
+    rewriter.create<AffineStoreOp>(loc, sumNext, alloc, ValueRange{iv});
+    rewriter.setInsertionPointAfter(ifOp);
+    rewriter.setInsertionPointAfter(forOp2);
+
+    //  get e[n] = d[n] - y[n]
+
+    Value desiredX = rewriter.create<AffineLoadOp>(
+        loc, lmsFilterResponse2GainAdaptor.getRhs(), ValueRange{iv});
+    Value ynew = rewriter.create<AffineLoadOp>(loc, alloc, ValueRange{iv});
+
+    Value err = rewriter.create<arith::SubFOp>(loc, desiredX, ynew);
+	
+	// y[n] = y[n] * gain for fusion
+    Value ynewGain = rewriter.create<arith::MulFOp>(loc, ynew, gain);
+    rewriter.create<AffineStoreOp>(loc, ynewGain, alloc, ValueRange{iv});
+
+
+    affine::AffineForOp forOp3 =
+        rewriter.create<AffineForOp>(loc, lb, FilterLength, step);
+    auto iv3 = forOp3.getInductionVar();
+
+    rewriter.setInsertionPointToStart(forOp3.getBody());
+
+    auto ifOp2 = rewriter.create<affine::AffineIfOp>(
+        loc, set1, ValueRange{iv, iv3}, false /*no else*/);
+    rewriter.setInsertionPointToStart(ifOp2.getThenBlock());
+
+    Value inputX2 =
+        rewriter.create<AffineLoadOp>(loc, lmsFilterResponse2GainAdaptor.getLhs(),
+                                      addMapForLMSFilter, ValueRange{iv, iv3});
+
+    Value Prevw2 = rewriter.create<AffineLoadOp>(loc, wAlloc, ValueRange{iv3});
+
+    // f(u(n),e(n),μ)=μe(n)u∗(n)
+    Value mul1 = rewriter.create<arith::MulFOp>(loc, err, inputX2);
+    Value mul2 = rewriter.create<arith::MulFOp>(loc, mu, mul1);
+
+    // FInal w[n]
+    Value answer = rewriter.create<arith::AddFOp>(loc, Prevw2, mul2);
+
+    rewriter.create<AffineStoreOp>(loc, answer, wAlloc, ValueRange{iv3});
+    rewriter.setInsertionPointAfter(ifOp2);
+    rewriter.setInsertionPointAfter(forOp3);
+
+    rewriter.setInsertionPointAfter(forOp1);
+    // debug
+    //  forOp1->dump();
+
+    rewriter.replaceOp(op, alloc);
+
+    return success();
+  }
+};
+
+
+
+//===----------------------------------------------------------------------===//
+// Pattern population
+//===----------------------------------------------------------------------===//
+
+void mlir::dsp::populateDSPToAffineConversionPatterns(RewritePatternSet &patterns) {
+  MLIRContext *ctx = patterns.getContext();
+
+  // clang-format off
+  patterns.add<AddOpLowering, ModuloOpLowering, ConstantOpLowering, FuncOpLowering,
+  MulOpLowering, PrintOpLowering, ReturnOpLowering, TransposeOpLowering,
+  DelayOpLowering, GainOpLowering, SubOpLowering,
+  FIRFilterResponseOpLowering, SlidingWindowAvgOpLowering,
+  DownSamplingOpLowering, UpSamplingOpLowering,
+  LowPassFilter1stOrderOpLowering, HighPassFilterOpLowering,
+  FFT1DOpLowering, IFFT1DOpLowering, HammingWindowOpLowering, DCTOpLowering,
+  filterOpLowering, DivOpLowering, BitwiseAndOpLowering, PowOpLowering,
+  zeroCrossCountOpLowering, SumOpLowering, SinOpLowering, CosOpLowering,
+  SquareOpLowering, FFT1DRealOpLowering, FFT1DImgOpLowering, SincOpLowering,
+  GetElemAtIndxOpLowering, SetElemAtIndxOpLowering,
+  LowPassFIRFilterOpLowering, HighPassFIRFilterOpLowering,
+  GetRangeOfVectorOpLowering, FIRFilterHammingOptimizedOpLowering,
+  HighPassFIRHammingOptimizedOpLowering, LMSFilterOpLowering,
+  ThresholdOpLowering, QuantizationOpLowering, LMSFilterResponseOpLowering,
+  RunLenEncodingOpLowering, FIRFilterResSymmOptimizedOpLowering,
+  LengthOpLowering, ReverseInputOpLowering, PaddingOpLowering,
+  FIRFilterYSymmOptimizedOpLowering, FFT1DRealSymmOpLowering,
+  FFT1DImgConjSymmOpLowering, FFTRealOpLowering, FFTImagOpLowering,
+  Conv2DOpLowering, ShiftRightOpLowering, MatmulOpLowering,
+  ThresholdUpOpLowering, QamModulateRealOpLowering,
+  QamModulateImgOpLowering, QamDemodulateOpLowering, FindPeaksOpLowering,
+  BeamFormOpLowering, SpaceModulateOpLowering, SpaceDemodulateOpLowering,
+  SpaceErrCorrectionOpLowering, FindPeaksOpLowering, MaxOpLowering,
+  MeanOpLowering, DiffOpLowering, GetSingleElemAtIdxOpLowering,
+  Diff2MeanOptimizedOpLowering, Median2SlidingOptimizedOpLowering,
+  NormalizeOpLowering, AbsOpLowering, MedianFilterOpLowering,
+  LMS2FindPeaksOptimizedOpLowering, FindPeaks2Diff2MeanOptimizedOpLowering,
+  NormLMSFilterResponseOptimizeOpLowering,
+  FIRFilterResSymmThresholdUpOptimizedOpLowering, FFTCombineOpLowering,
+  GenerateDTMFOpLowering, GenerateVoiceSignatureOpLowering, SqrtOpLowering,
+  FFTFreqOpLowering, FindDominantPeaksOpLowering,
+  RecoverDTMFDigitOpLowering, FFTOpLowering, FFTAbsOpLowering,
+  DFTAbsOpLowering, DFTAbsThresholdUpOpLowering, ArgMaxOpLowering, CorrelateOpLowering,
+  SetSingleElemAtIdxOpLowering, Correl2MaxOptimizedOpLowering, LMSFilterResponse2GainOpLowering>(ctx);
+  // clang-format on
+}
diff --git a/mlir/lib/Conversion/DSPToAffine/DSPToAffinePass.cpp b/mlir/lib/Conversion/DSPToAffine/DSPToAffinePass.cpp
new file mode 100644
index 000000000000..861c3c6b59bc
--- /dev/null
+++ b/mlir/lib/Conversion/DSPToAffine/DSPToAffinePass.cpp
@@ -0,0 +1,73 @@
+//===- DSPToAffinePass.cpp - Lowering DSP to Affine Dialect -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This  file implements a pass to convert DSP dialect to the Affine dialect.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Conversion/DSPToAffine/DSPToAffinePass.h"
+#include "mlir/Conversion/DSPToAffine/DSPToAffine.h"
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/Linalg/IR/Linalg.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/SCF/IR/SCF.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/Math/IR/Math.h"
+#include "mlir/Dialect/DSP/IR/DSPDialect.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+
+#include "mlir/Pass/Pass.h"
+#include "mlir/Transforms/DialectConversion.h"
+
+
+
+namespace mlir {
+#define GEN_PASS_DEF_CONVERTDSPTOAFFINE
+#include "mlir/Conversion/Passes.h.inc"
+} // namespace mlir
+
+using namespace mlir;
+
+namespace {
+class ConvertDSPToAffinePass
+    : public impl::ConvertDSPToAffineBase<ConvertDSPToAffinePass> {
+
+  void runOnOperation() override {
+    auto &context = getContext();
+    ConversionTarget target(context);
+
+    target.addLegalDialect<mlir::affine::AffineDialect, mlir::BuiltinDialect,
+                           mlir::arith::ArithDialect,  mlir::func::FuncDialect,
+                           mlir::memref::MemRefDialect,  mlir::math::MathDialect,
+                           mlir::scf::SCFDialect, mlir::LLVM::LLVMDialect>();
+
+    target.addIllegalDialect<dsp::DSPDialect>();
+    target.addDynamicallyLegalOp<dsp::PrintOp>([](dsp::PrintOp op) {
+      // return ::nollvmne_of(op->getOperandTypes(), [](Type type) {
+      //   return llvm::isa<TensorType>(type);
+      // });
+      return llvm::none_of(op->getOperandTypes(), [](Type type) {
+        return llvm::isa<TensorType>(type);
+      });
+      
+    });
+    RewritePatternSet patterns(&context);
+    dsp::populateDSPToAffineConversionPatterns(patterns);
+
+    if (failed(applyPartialConversion(getOperation(), target,
+                                      std::move(patterns))))
+      return signalPassFailure();
+  }
+};
+} // namespace
+std::unique_ptr<OperationPass<ModuleOp>>
+mlir::createConvertDSPToAffinePass() {
+  return std::make_unique<ConvertDSPToAffinePass>();
+}
\ No newline at end of file
diff --git a/mlir/lib/Dialect/CMakeLists.txt b/mlir/lib/Dialect/CMakeLists.txt
index a324ce7f9b19..864073edf123 100644
--- a/mlir/lib/Dialect/CMakeLists.txt
+++ b/mlir/lib/Dialect/CMakeLists.txt
@@ -9,6 +9,7 @@ add_subdirectory(Async)
 add_subdirectory(Bufferization)
 add_subdirectory(Complex)
 add_subdirectory(ControlFlow)
+add_subdirectory(DSP)
 add_subdirectory(DLTI)
 add_subdirectory(EmitC)
 add_subdirectory(Func)
diff --git a/mlir/lib/Dialect/DSP/CMakeLists.txt b/mlir/lib/Dialect/DSP/CMakeLists.txt
new file mode 100644
index 000000000000..b5ae13163fa7
--- /dev/null
+++ b/mlir/lib/Dialect/DSP/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_subdirectory(IR)
+add_subdirectory(Transforms)
+add_subdirectory(Pipelines)
diff --git a/mlir/lib/Dialect/DSP/IR/CMakeLists.txt b/mlir/lib/Dialect/DSP/IR/CMakeLists.txt
new file mode 100644
index 000000000000..197dcac3f898
--- /dev/null
+++ b/mlir/lib/Dialect/DSP/IR/CMakeLists.txt
@@ -0,0 +1,34 @@
+set(LLVM_OPTIONAL_SOURCES
+  DSPDialect.cpp
+  ToyCombine.cpp
+  )
+
+set(LLVM_TARGET_DEFINITIONS ToyCombine.td)
+mlir_tablegen(ToyCombine.inc -gen-rewriters)
+add_public_tablegen_target(MLIRToyCombineIncGen)
+
+add_mlir_dialect_library(MLIRDSPDialect
+  DSPDialect.cpp
+  ToyCombine.cpp
+
+  ADDITIONAL_HEADER_DIRS
+  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/DSP
+
+  DEPENDS
+  MLIRShapeInferenceOpInterfacesIncGen
+  
+  #DspSimpleBlocksOpsIncGen
+  #DspSimpleBlocksCombineIncGen
+
+  # MLIRDSPEnumsGen
+  # MLIRDSPAttributesIncGen
+  # MLIRDSPIncGen
+
+  LINK_LIBS PUBLIC
+  MLIRArithDialect
+  MLIRIR
+  MLIRSideEffectInterfaces
+  MLIRShapedOpInterfaces
+  
+  #MLIRToyCombineIncGen
+  )
diff --git a/mlir/lib/Dialect/DSP/IR/DSPDialect.cpp b/mlir/lib/Dialect/DSP/IR/DSPDialect.cpp
new file mode 100644
index 000000000000..9b5cf5beb75a
--- /dev/null
+++ b/mlir/lib/Dialect/DSP/IR/DSPDialect.cpp
@@ -0,0 +1,3760 @@
+//===- Dialect.cpp - DSP IR Dialect registration in MLIR ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the dialect for the DSP IR: custom type parsing and
+// operation verification.
+//
+//===----------------------------------------------------------------------===//
+#include "mlir/Dialect/DSP/IR/DSPDialect.h"
+#include <iostream>
+
+#include "mlir/IR/Attributes.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/Location.h"
+#include "mlir/IR/OpImplementation.h"
+#include "mlir/IR/OperationSupport.h"
+#include "mlir/IR/ValueRange.h"
+#include "mlir/Interfaces/CallInterfaces.h"
+#include "mlir/Interfaces/FunctionImplementation.h"
+#include "mlir/Support/LLVM.h"
+#include "mlir/Support/LogicalResult.h"
+#include "mlir/Transforms/InliningUtils.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Casting.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <string>
+
+using namespace mlir;
+using namespace mlir::dsp;
+using namespace std;
+
+#include "mlir/Dialect/DSP/IR/DSPDialect.cpp.inc"
+
+#include "llvm/Support/CommandLine.h"
+
+
+//===----------------------------------------------------------------------===//
+// DSP Dialect-specific flags    
+//===----------------------------------------------------------------------===//
+
+//namespace { 
+struct DSPOptions {
+	llvm::cl::opt<bool> enableCanonicalOpt{"dsp-canonicalize",
+											llvm::cl::desc("Enables canonicalization for DSP dialect operations."),
+											llvm::cl::init(false)};
+											
+};
+//} // namespace
+
+static llvm::ManagedStatic<DSPOptions> dspOptions;
+
+
+namespace mlir {
+void registerDSPOptions() {
+  // Make sure that the options struct has been initialized.
+  *dspOptions;                     
+}
+
+
+bool getEnableCanonicalOpt() {
+    return dspOptions->enableCanonicalOpt;
+}
+
+
+} // namespace mlir
+
+
+
+//bool getEnableCanonicalOpt() {
+    //return enableCanonicalOpt;
+//      return false;
+//}
+
+
+
+
+//===----------------------------------------------------------------------===//
+// DSPInlinerInterface
+//===----------------------------------------------------------------------===//
+
+/// This class defines the interface for handling inlining with DSP
+/// operations.
+struct DSPInlinerInterface : public DialectInlinerInterface {
+  using DialectInlinerInterface::DialectInlinerInterface;
+
+  //===--------------------------------------------------------------------===//
+  // Analysis Hooks
+  //===--------------------------------------------------------------------===//
+
+  /// All call operations within dsp can be inlined.
+  bool isLegalToInline(Operation *call, Operation *callable,
+                       bool wouldBeCloned) const final {
+    return true;
+  }
+
+  /// All operations within dsp can be inlined.
+  bool isLegalToInline(Operation *, Region *, bool, IRMapping &) const final {
+    return true;
+  }
+
+  // All functions within dsp can be inlined.
+  bool isLegalToInline(Region *, Region *, bool, IRMapping &) const final {
+    return true;
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Transformation Hooks
+  //===--------------------------------------------------------------------===//
+
+  /// Handle the given inlined terminator(dsp.return) by replacing it with a new
+  /// operation as necessary.
+  void handleTerminator(Operation *op, ValueRange valuesToRepl) const final {
+    // Only "DSP.return" needs to be handled here.
+    auto returnOp = cast<ReturnOp>(op);
+
+    // Replace the values directly with the return operands.
+    assert(returnOp.getNumOperands() == valuesToRepl.size());
+    for (const auto &it : llvm::enumerate(returnOp.getOperands()))
+      valuesToRepl[it.index()].replaceAllUsesWith(it.value());
+  }
+
+  /// Attempts to materialize a conversion for a type mismatch between a call
+  /// from this dialect, and a callable region. This method should generate an
+  /// operation that takes 'input' as the only operand, and produces a single
+  /// result of 'resultType'. If a conversion can not be generated, nullptr
+  /// should be returned.
+  Operation *materializeCallConversion(OpBuilder &builder, Value input,
+                                       Type resultType,
+                                       Location conversionLoc) const final {
+    return builder.create<CastOp>(conversionLoc, resultType, input);
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// DSPDialect
+//===----------------------------------------------------------------------===//
+
+void DSPDialect::initialize() {
+  addOperations<
+#define GET_OP_LIST
+#include "mlir/Dialect/DSP/IR/DSP.cpp.inc"
+      >();
+  addInterfaces<DSPInlinerInterface>();
+}
+
+//===----------------------------------------------------------------------===//
+// DSP Operations
+//===----------------------------------------------------------------------===//
+
+/// A generalized parser for binary operations. This parses the different forms
+/// of 'printBinaryOp' below.
+static mlir::ParseResult parseBinaryOp(mlir::OpAsmParser &parser,
+                                       mlir::OperationState &result) {
+  SmallVector<mlir::OpAsmParser::UnresolvedOperand, 2> operands;
+  SMLoc operandsLoc = parser.getCurrentLocation();
+  Type type;
+  if (parser.parseOperandList(operands, /*requiredOperandCount=*/2) ||
+      parser.parseOptionalAttrDict(result.attributes) ||
+      parser.parseColonType(type))
+    return mlir::failure();
+
+  // If the type is a function type, it contains the input and result types of
+  // this operation.
+  if (FunctionType funcType = llvm::dyn_cast<FunctionType>(type)) {
+    if (parser.resolveOperands(operands, funcType.getInputs(), operandsLoc,
+                               result.operands))
+      return mlir::failure();
+    result.addTypes(funcType.getResults());
+    return mlir::success();
+  }
+
+  // Otherwise, the parsed type is the type of both operands and results.
+  if (parser.resolveOperands(operands, type, result.operands))
+    return mlir::failure();
+  result.addTypes(type);
+  return mlir::success();
+}
+
+/// A generalized printer for binary operations. It prints in two different
+/// forms depending on if all of the types match.
+static void printBinaryOp(mlir::OpAsmPrinter &printer, mlir::Operation *op) {
+  printer << " " << op->getOperands();
+  printer.printOptionalAttrDict(op->getAttrs());
+  printer << " : ";
+
+  // If all of the types are the same, print the type directly.
+  Type resultType = *op->result_type_begin();
+  if (llvm::all_of(op->getOperandTypes(),
+                   [=](Type type) { return type == resultType; })) {
+    printer << resultType;
+    return;
+  }
+
+  // Otherwise, print a functional type.
+  printer.printFunctionalType(op->getOperandTypes(), op->getResultTypes());
+}
+
+//===----------------------------------------------------------------------===//
+// ConstantOp
+//===----------------------------------------------------------------------===//
+
+/// Build a constant operation.
+/// The builder is passed as an argument, so is the state that this method is
+/// expected to fill in order to build the operation.
+void ConstantOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                       double value) {
+  auto dataType = RankedTensorType::get({}, builder.getF64Type());
+  auto dataAttribute = DenseElementsAttr::get(dataType, value);
+  ConstantOp::build(builder, state, dataType, dataAttribute);
+}
+
+// void ConstantOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+//                        int value) {
+//   auto dataType = RankedTensorType::get({}, builder.getI64Type());
+//   auto dataAttribute = DenseElementsAttr::get(dataType, value);
+//   ConstantOp::build(builder, state, dataType, dataAttribute);
+// }
+
+/// The 'OpAsmParser' class provides a collection of methods for parsing
+/// various punctuation, as well as attributes, operands, types, etc. Each of
+/// these methods returns a `ParseResult`. This class is a wrapper around
+/// `LogicalResult` that can be converted to a boolean `true` value on failure,
+/// or `false` on success. This allows for easily chaining together a set of
+/// parser rules. These rules are used to populate an `mlir::OperationState`
+/// similarly to the `build` methods described above.
+mlir::ParseResult ConstantOp::parse(mlir::OpAsmParser &parser,
+                                    mlir::OperationState &result) {
+  mlir::DenseElementsAttr value;
+  if (parser.parseOptionalAttrDict(result.attributes) ||
+      parser.parseAttribute(value, "value", result.attributes))
+    return failure();
+
+  result.addTypes(value.getType());
+  return success();
+}
+
+/// The 'OpAsmPrinter' class is a stream that allows for formatting
+/// strings, attributes, operands, types, etc.
+void ConstantOp::print(mlir::OpAsmPrinter &printer) {
+  printer << " ";
+  printer.printOptionalAttrDict((*this)->getAttrs(), /*elidedAttrs=*/{"value"});
+  printer << getValue();
+}
+
+/// Verifier for the constant operation. This corresponds to the
+/// `let hasVerifier = 1` in the op definition.
+mlir::LogicalResult ConstantOp::verify() {
+  // If the return type of the constant is not an unranked tensor, the shape
+  // must match the shape of the attribute holding the data.
+  auto resultType =
+      llvm::dyn_cast<mlir::RankedTensorType>(getResult().getType());
+  if (!resultType)
+    return success();
+
+  // Check that the rank of the attribute type matches the rank of the constant
+  // result type.
+  auto attrType = llvm::cast<mlir::RankedTensorType>(getValue().getType());
+  if (attrType.getRank() != resultType.getRank()) {
+    return emitOpError("return type must match the one of the attached value "
+                       "attribute: ")
+           << attrType.getRank() << " != " << resultType.getRank();
+  }
+
+  // Check that each of the dimensions match between the two types.
+  for (int dim = 0, dimE = attrType.getRank(); dim < dimE; ++dim) {
+    if (attrType.getShape()[dim] != resultType.getShape()[dim]) {
+      return emitOpError(
+                 "return type shape mismatches its attribute at dimension ")
+             << dim << ": " << attrType.getShape()[dim]
+             << " != " << resultType.getShape()[dim];
+    }
+  }
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// ModuloOp
+//===----------------------------------------------------------------------===//
+
+void ModuloOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                     mlir::Value lhs, mlir::Value rhs) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands({lhs, rhs});
+}
+
+void ModuloOp::inferShapes() { getResult().setType(getLhs().getType()); }
+
+//===----------------------------------------------------------------------===//
+// AddOp
+//===----------------------------------------------------------------------===//
+
+void AddOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                  mlir::Value lhs, mlir::Value rhs) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands({lhs, rhs});
+}
+
+mlir::ParseResult AddOp::parse(mlir::OpAsmParser &parser,
+                               mlir::OperationState &result) {
+  return parseBinaryOp(parser, result);
+}
+
+void AddOp::print(mlir::OpAsmPrinter &p) { printBinaryOp(p, *this); }
+
+/// Infer the output shape of the AddOp, this is required by the shape inference
+/// interface.
+void AddOp::inferShapes() { getResult().setType(getLhs().getType()); }
+
+//===----------------------------------------------------------------------===//
+// CastOp
+//===----------------------------------------------------------------------===//
+
+/// Infer the output shape of the CastOp, this is required by the shape
+/// inference interface.
+void CastOp::inferShapes() { getResult().setType(getInput().getType()); }
+
+/// Returns true if the given set of input and result types are compatible with
+/// this cast operation. This is required by the `CastOpInterface` to verify
+/// this operation and provide other additional utilities.
+bool CastOp::areCastCompatible(TypeRange inputs, TypeRange outputs) {
+  if (inputs.size() != 1 || outputs.size() != 1)
+    return false;
+  // The inputs must be Tensors with the same element type.
+  TensorType input = llvm::dyn_cast<TensorType>(inputs.front());
+  TensorType output = llvm::dyn_cast<TensorType>(outputs.front());
+  if (!input || !output || input.getElementType() != output.getElementType())
+    return false;
+  // The shape is required to match if both types are ranked.
+  return !input.hasRank() || !output.hasRank() || input == output;
+}
+
+//===----------------------------------------------------------------------===//
+// FuncOp
+//===----------------------------------------------------------------------===//
+
+void FuncOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                   llvm::StringRef name, mlir::FunctionType type,
+                   llvm::ArrayRef<mlir::NamedAttribute> attrs) {
+  // FunctionOpInterface provides a convenient `build` method that will populate
+  // the state of our FuncOp, and create an entry block.
+  buildWithEntryBlock(builder, state, name, type, attrs, type.getInputs());
+}
+
+mlir::ParseResult FuncOp::parse(mlir::OpAsmParser &parser,
+                                mlir::OperationState &result) {
+  // Dispatch to the FunctionOpInterface provided utility method that parses the
+  // function operation.
+  auto buildFuncType =
+      [](mlir::Builder &builder, llvm::ArrayRef<mlir::Type> argTypes,
+         llvm::ArrayRef<mlir::Type> results,
+         mlir::function_interface_impl::VariadicFlag,
+         std::string &) { return builder.getFunctionType(argTypes, results); };
+
+  return mlir::function_interface_impl::parseFunctionOp(
+      parser, result, /*allowVariadic=*/false,
+      getFunctionTypeAttrName(result.name), buildFuncType,
+      getArgAttrsAttrName(result.name), getResAttrsAttrName(result.name));
+}
+
+void FuncOp::print(mlir::OpAsmPrinter &p) {
+  // Dispatch to the FunctionOpInterface provided utility method that prints the
+  // function operation.
+  mlir::function_interface_impl::printFunctionOp(
+      p, *this, /*isVariadic=*/false, getFunctionTypeAttrName(),
+      getArgAttrsAttrName(), getResAttrsAttrName());
+}
+
+//===----------------------------------------------------------------------===//
+// GenericCallOp
+//===----------------------------------------------------------------------===//
+
+void GenericCallOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                          StringRef callee, ArrayRef<mlir::Value> arguments) {
+  // Generic call always returns an unranked Tensor initially.
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands(arguments);
+  state.addAttribute("callee",
+                     mlir::SymbolRefAttr::get(builder.getContext(), callee));
+}
+
+/// Return the callee of the generic call operation, this is required by the
+/// call interface.
+CallInterfaceCallable GenericCallOp::getCallableForCallee() {
+  return (*this)->getAttrOfType<SymbolRefAttr>("callee");
+}
+
+/// Set the callee for the generic call operation, this is required by the call
+/// interface.
+void GenericCallOp::setCalleeFromCallable(CallInterfaceCallable callee) {
+  (*this)->setAttr("callee", callee.get<SymbolRefAttr>());
+}
+
+/// Get the argument operands to the called function, this is required by the
+/// call interface.
+Operation::operand_range GenericCallOp::getArgOperands() { return getInputs(); }
+
+/// Get the argument operands to the called function as a mutable range, this is
+/// required by the call interface.
+MutableOperandRange GenericCallOp::getArgOperandsMutable() {
+  return getInputsMutable();
+}
+
+//===----------------------------------------------------------------------===//
+// MulOp
+//===----------------------------------------------------------------------===//
+
+void MulOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                  mlir::Value lhs, mlir::Value rhs) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands({lhs, rhs});
+}
+
+mlir::ParseResult MulOp::parse(mlir::OpAsmParser &parser,
+                               mlir::OperationState &result) {
+  return parseBinaryOp(parser, result);
+}
+
+void MulOp::print(mlir::OpAsmPrinter &p) { printBinaryOp(p, *this); }
+
+/// Infer the output shape of the MulOp, this is required by the shape inference
+/// interface.
+void MulOp::inferShapes() { getResult().setType(getLhs().getType()); }
+
+//===----------------------------------------------------------------------===//
+// DivOp
+//===----------------------------------------------------------------------===//
+
+void DivOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                  mlir::Value lhs, mlir::Value rhs) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands({lhs, rhs});
+}
+
+mlir::ParseResult DivOp::parse(mlir::OpAsmParser &parser,
+                               mlir::OperationState &result) {
+  return parseBinaryOp(parser, result);
+}
+
+void DivOp::print(mlir::OpAsmPrinter &p) { printBinaryOp(p, *this); }
+
+/// Infer the output shape of the DivOp, this is required by the shape inference
+/// interface.
+void DivOp::inferShapes() { getResult().setType(getLhs().getType()); }
+
+//===----------------------------------------------------------------------===//
+// BitwiseAndOp
+//===----------------------------------------------------------------------===//
+
+void BitwiseAndOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                         mlir::Value lhs, mlir::Value rhs) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands({lhs, rhs});
+}
+
+mlir::ParseResult BitwiseAndOp::parse(mlir::OpAsmParser &parser,
+                                      mlir::OperationState &result) {
+  return parseBinaryOp(parser, result);
+}
+
+void BitwiseAndOp::print(mlir::OpAsmPrinter &p) { printBinaryOp(p, *this); }
+
+/// Infer the output shape of the BitwiseAndOp, this is required by the shape
+/// inference interface.
+void BitwiseAndOp::inferShapes() { getResult().setType(getLhs().getType()); }
+
+//===----------------------------------------------------------------------===//
+// ReturnOp
+//===----------------------------------------------------------------------===//
+
+mlir::LogicalResult ReturnOp::verify() {
+  // We know that the parent operation is a function, because of the 'HasParent'
+  // trait attached to the operation definition.
+  auto function = cast<FuncOp>((*this)->getParentOp());
+
+  /// ReturnOps can only have a single optional operand.
+  if (getNumOperands() > 1)
+    return emitOpError() << "expects at most 1 return operand";
+
+  // The operand number and types must match the function signature.
+  const auto &results = function.getFunctionType().getResults();
+  if (getNumOperands() != results.size())
+    return emitOpError() << "does not return the same number of values ("
+                         << getNumOperands() << ") as the enclosing function ("
+                         << results.size() << ")";
+
+  // If the operation does not have an input, we are done.
+  if (!hasOperand())
+    return mlir::success();
+
+  auto inputType = *operand_type_begin();
+  auto resultType = results.front();
+
+  // Check that the result type of the function matches the operand type.
+  if (inputType == resultType ||
+      llvm::isa<mlir::UnrankedTensorType>(inputType) ||
+      llvm::isa<mlir::UnrankedTensorType>(resultType))
+    return mlir::success();
+
+  return emitError() << "type of return operand (" << inputType
+                     << ") doesn't match function result type (" << resultType
+                     << ")";
+}
+
+//===----------------------------------------------------------------------===//
+// TransposeOp
+//===----------------------------------------------------------------------===//
+
+void TransposeOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                        mlir::Value value) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands(value);
+}
+
+void TransposeOp::inferShapes() {
+  auto arrayTy = llvm::cast<RankedTensorType>(getOperand().getType());
+  SmallVector<int64_t, 2> dims(llvm::reverse(arrayTy.getShape()));
+  getResult().setType(RankedTensorType::get(dims, arrayTy.getElementType()));
+}
+
+mlir::LogicalResult TransposeOp::verify() {
+  auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand().getType());
+  auto resultType = llvm::dyn_cast<RankedTensorType>(getType());
+  if (!inputType || !resultType)
+    return mlir::success();
+
+  auto inputShape = inputType.getShape();
+  if (!std::equal(inputShape.begin(), inputShape.end(),
+                  resultType.getShape().rbegin())) {
+    return emitError()
+           << "expected result shape to be a transpose of the input";
+  }
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// DelayOp
+//===----------------------------------------------------------------------===//
+// void DelayOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+//                          mlir::Value lhs, unsigned rhs){
+void DelayOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                    mlir::Value lhs, mlir::Value rhs) {
+  //
+  // state.addTypes(UnrankedTensorType::get(builder.getF64Type()),
+  // builder.getI32Type());
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type())); // working
+  state.addOperands({lhs, rhs});
+  // state.addOperands(value);
+}
+
+mlir::LogicalResult DelayOp::verify() {
+  // auto inputType1 =
+  // llvm::dyn_cast<RankedTensorType>(getOperand(0).getType()); auto inputType2
+  // = llvm::dyn_cast<RankedTensorType>(getOperand(1).getType()); auto
+  // resultType = llvm::dyn_cast<RankedTensorType>(getType()); if(!inputType ||
+  // !resultType)
+  //   return mlir::success();
+
+  return mlir::success();
+}
+
+// void DelayOp::inferShapes() { getResult().setType(getOperand(0).getType()) ;}
+// getLHS defined with Operation as :
+//   fro addOp
+//     ::mlir::TypedValue<::mlir::TensorType> AddOp::getLhs() {
+//   return
+//   ::llvm::cast<::mlir::TypedValue<::mlir::TensorType>>(*getODSOperands(0).begin());
+// }
+void DelayOp::inferShapes() { getResult().setType(getLhs().getType()); }
+
+//===----------------------------------------------------------------------===//
+// GainOp
+//===----------------------------------------------------------------------===//
+// void GainOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+//                          mlir::Value lhs, unsigned rhs){
+// void GainOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+//                          mlir::Value lhs, mlir::Float64Type rhs){
+void GainOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                   mlir::Value lhs, mlir::Value rhs) {
+  // state.addTypes(UnrankedTensorType::get(builder.getF64Type()),
+  // builder.getI32Type());
+  // state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  // state.addTypes({UnrankedTensorType::get(builder.getF64Type()),
+  // builder.getF64Type()}); //working
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands({lhs, rhs});
+  // state.addOperands({rhs});
+  // state.addTypes();
+  // state.addAttribute("rhs", rhs);
+  // state.addAttribute("rhs", builder.getF64FloatAttr(builder.getF64Type()));
+  // state.addAttribute("rhs", builder.getF64Type());
+  // state.addAttribute("rhs", builder.getFloatAttr(builder.getF64Type() ,
+  // rhs)); state.addOperands(value);
+}
+
+//  mlir::LogicalResult GainOp::verify(){
+//     auto inputType1 =
+//     llvm::dyn_cast<RankedTensorType>(getOperand(0).getType()); auto
+//     inputType2 = llvm::dyn_cast<Float64Type>(getOperand(1).getType());
+//     // auto inputType2 =
+//     llvm::dyn_cast<RankedTensorType>(getOperand(1).getType());
+//     // auto resultType = llvm::dyn_cast<RankedTensorType>(getType());
+//     // if(!inputType || !resultType)
+//     //   return mlir::success();
+
+//     return mlir::success();
+//  }
+
+// void GainOp::inferShapes() { getResult().setType(getOperand(0).getType()) ;}
+// getLHS defined with Operation as :
+//   fro addOp
+//     ::mlir::TypedValue<::mlir::TensorType> AddOp::getLhs() {
+//   return
+//   ::llvm::cast<::mlir::TypedValue<::mlir::TensorType>>(*getODSOperands(0).begin());
+// }
+void GainOp::inferShapes() { getResult().setType(getLhs().getType()); }
+
+//===----------------------------------------------------------------------===//
+// SubOp
+//===----------------------------------------------------------------------===//
+
+void SubOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                  mlir::Value lhs, mlir::Value rhs) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands({lhs, rhs});
+}
+
+// mlir::ParseResult SubOp::parse(mlir::OpAsmParser &parser,
+//                                mlir::OperationState &result) {
+//   return parseBinaryOp(parser, result);
+// }
+
+// void SubOp::print(mlir::OpAsmPrinter &p) { printBinaryOp(p, *this); }
+
+/// Infer the output shape of the SubOp, this is required by the shape inference
+/// interface.
+void SubOp::inferShapes() { getResult().setType(getLhs().getType()); }
+
+//===----------------------------------------------------------------------===//
+// FFTRealOp
+//===----------------------------------------------------------------------===//
+
+void FFTRealOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                      mlir::Value lhs) {
+  state.addTypes(lhs.getType());
+  state.addOperands({lhs});
+}
+
+void FFTRealOp::inferShapes() { getResult().setType(getLhs().getType()); }
+
+//===----------------------------------------------------------------------===//
+// FFTImagOp
+//===----------------------------------------------------------------------===//
+
+void FFTImagOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                      mlir::Value lhs) {
+  state.addTypes(lhs.getType());
+  state.addOperands({lhs});
+}
+
+void FFTImagOp::inferShapes() { getResult().setType(getLhs().getType()); }
+
+//===----------------------------------------------------------------------===//
+// MatmulOp
+//===----------------------------------------------------------------------===//
+
+void MatmulOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                     mlir::Value lhs, mlir::Value rhs) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands({lhs, rhs});
+}
+
+// mlir::ParseResult MatmulOp::parse(mlir::OpAsmParser &parser,
+//                                mlir::OperationState &result) {
+//   return parseBinaryOp(parser, result);
+// }
+
+// void MatmulOp::print(mlir::OpAsmPrinter &p) { printBinaryOp(p, *this); }
+
+mlir::LogicalResult MatmulOp::verify() {
+
+  // auto resultType =
+  // llvm::dyn_cast<mlir::RankedTensorType>(getResult().getType());
+
+  auto tensorLhs = getLhs().getType();
+  auto shapeOfLhs = tensorLhs.getShape();
+
+  auto tensorRhs = getRhs().getType();
+  auto shapeOfRhs = tensorRhs.getShape();
+
+  if (shapeOfLhs[1] != shapeOfRhs[0])
+    return emitOpError("Matmul: the second dimension of LHS should be equal to "
+                       "the first dimention of RHS.");
+  return mlir::success();
+}
+
+/// Infer the output shape of the MatmulOp, this is required by the shape
+/// inference interface.
+void MatmulOp::inferShapes() {
+
+  // get the shape of Lhs & rhs
+  // add the shape for each dimension
+  //  auto tensorInput =  llvm::cast<RankedTensorType>(getLhs().getType());
+  auto tensorLhs = getLhs().getType();
+  auto shapeOfLhs = tensorLhs.getShape();
+
+  auto tensorRhs = getRhs().getType();
+  auto shapeOfRhs = tensorRhs.getShape();
+
+  std::vector<int64_t> shapeForOutput;
+
+  shapeForOutput.push_back(shapeOfLhs[0]);
+  shapeForOutput.push_back(shapeOfRhs[1]);
+
+  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(
+      shapeForOutput, getLhs().getType().getElementType());
+
+  // getResult().setType(getLhs().getType());
+  getResult().setType(manipulatedType);
+}
+
+//===----------------------------------------------------------------------===//
+// FindPeaksOp
+//===----------------------------------------------------------------------===//
+
+void FindPeaksOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                        mlir::Value signal, mlir::Value height,
+                        mlir::Value distance) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands({signal, height, distance});
+}
+
+void FindPeaksOp::inferShapes() {
+  // Maximum possible number of peaks = (length of signal -1) / distance + 1.
+  // We will return a tensor with size (length of signal -1) / distance + 1 +
+  // 1(last one to provide number of peaks).
+  auto signalType = getSignal().getType();
+  auto signalShape = signalType.getShape();
+  int64_t len_signal = signalShape[0];
+
+  Value distanceArg = getOperand(2);
+  dsp::ConstantOp constantOpDistance =
+      distanceArg.getDefiningOp<dsp::ConstantOp>();
+  DenseElementsAttr constantDistanceValue = constantOpDistance.getValue();
+
+  auto elements = constantDistanceValue.getValues<FloatAttr>();
+  float distanceFloat = elements[0].getValueAsDouble();
+  // SecondValueInt = (int64_t)SecondValue;
+
+  int64_t sizeOfOutput = (len_signal - 1) / distanceFloat + 2;
+
+  std::vector<int64_t> shapeForOutput;
+  shapeForOutput.push_back(sizeOfOutput);
+
+  mlir::TensorType manipulatedType =
+      mlir::RankedTensorType::get(shapeForOutput, signalType.getElementType());
+
+  getResult().setType(manipulatedType);
+}
+
+//===----------------------------------------------------------------------===//
+// MaxOp
+//===----------------------------------------------------------------------===//
+
+void MaxOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                  mlir::Value input) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands({input});
+}
+
+/// Infer the output shape of the MaxOp, this is required by the shape inference
+/// interface.
+void MaxOp::inferShapes() {
+  auto tensorInput = getInput().getType();
+  // auto shapeOfInput = tensorInput.getShape();
+
+  std::vector<int64_t> shapeForOutput;
+
+  mlir::TensorType manipulatedType =
+      mlir::RankedTensorType::get(shapeForOutput, tensorInput.getElementType());
+
+  getResult().setType(manipulatedType);
+}
+
+//===----------------------------------------------------------------------===//
+// MeanOp
+//===----------------------------------------------------------------------===//
+
+void MeanOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                   mlir::Value input, mlir::Value length) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands({input, length});
+}
+
+void MeanOp::inferShapes() {
+  auto tensorInput = getInput().getType();
+
+  std::vector<int64_t> shapeForOutput;
+
+  mlir::TensorType manipulatedType =
+      mlir::RankedTensorType::get(shapeForOutput, tensorInput.getElementType());
+
+  getResult().setType(manipulatedType);
+}
+
+//===----------------------------------------------------------------------===//
+// DiffOp
+//===----------------------------------------------------------------------===//
+
+void DiffOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                   mlir::Value input, mlir::Value length) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands({input, length});
+}
+
+void DiffOp::inferShapes() {
+  auto tensorInput = getInput().getType();
+  auto shapeOfInput = tensorInput.getShape();
+
+  std::vector<int64_t> shapeForOutput;
+  shapeForOutput.push_back(shapeOfInput[0] - 1);
+
+  mlir::TensorType manipulatedType =
+      mlir::RankedTensorType::get(shapeForOutput, tensorInput.getElementType());
+
+  getResult().setType(manipulatedType);
+}
+
+//===----------------------------------------------------------------------===//
+// AbsOp
+//===----------------------------------------------------------------------===//
+
+void AbsOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                  mlir::Value input) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands({input});
+}
+
+void AbsOp::inferShapes() { getResult().setType(getInput().getType()); }
+
+//===----------------------------------------------------------------------===//
+// ArgMaxOp
+//===----------------------------------------------------------------------===//
+
+void ArgMaxOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                     mlir::Value input, int64_t axis) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addAttribute("axis", builder.getI64IntegerAttr(axis));
+  state.addOperands({input});
+}
+
+void ArgMaxOp::inferShapes() {
+
+  auto inputType = getInput().getType();
+  auto inputRank = inputType.getRank();
+  auto inputShape = inputType.getShape();
+
+  if (inputRank == 1) {
+    vector<int64_t> outputShape(1, 1);
+    auto outputType =
+        mlir::RankedTensorType::get(outputShape, inputType.getElementType());
+    getResult().setType(outputType);
+    return;
+  }
+
+  int64_t axis = getAxis();
+  int64_t dim = axis == 1 ? 0 : 1;
+
+  auto outputType =
+      mlir::RankedTensorType::get(inputShape[dim], inputType.getElementType());
+
+  getResult().setType(outputType);
+}
+
+//===----------------------------------------------------------------------===//
+// PowOp
+//===----------------------------------------------------------------------===//
+
+void PowOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                  mlir::Value lhs, mlir::Value rhs) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands({lhs, rhs});
+}
+
+void PowOp::inferShapes() { getResult().setType(getLhs().getType()); }
+
+mlir::LogicalResult PowOp::verify() {
+  auto lhsType = llvm::dyn_cast<RankedTensorType>(getLhs().getType());
+  auto resultType = llvm::dyn_cast<RankedTensorType>(getType());
+
+  if (!lhsType || !resultType)
+    return mlir::success();
+
+  // ensure result shape matches lhs shape
+  auto resultShape = resultType.getShape();
+  if (!std::equal(lhsType.getShape().begin(), lhsType.getShape().end(),
+                  resultShape.rbegin())) {
+    return emitError()
+           << "expected result shape to be the same as the lhs input operand.";
+  }
+
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// zeroCrossCountOp
+//===----------------------------------------------------------------------===//
+
+void zeroCrossCountOp::build(mlir::OpBuilder &builder,
+                             mlir::OperationState &state, mlir::Value lhs) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  // state.addTypes(builder.getF64Type()));
+  // state.addTypes(builder.getI64Type());
+  state.addOperands({lhs});
+}
+
+/// Infer the output shape of the zeroCrossCountOp, this is required by the
+/// shape inference interface.
+void zeroCrossCountOp::inferShapes() {
+  getResult().setType(getLhs().getType());
+}
+
+//===----------------------------------------------------------------------===//
+// FIRFilterResponseOp
+//===----------------------------------------------------------------------===//
+
+void FIRFilterResponseOp::build(mlir::OpBuilder &builder,
+                                mlir::OperationState &state, mlir::Value lhs,
+                                mlir::Value rhs) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands({lhs, rhs});
+}
+
+/// Infer the output shape of the FIRFilterResponseOp, this is required by the
+/// shape inference interface.
+// ToDo -- shape should be the length of Lhs + Rhs - 1
+void FIRFilterResponseOp::inferShapes() {
+  // get the shape of Lhs & rhs
+  // add the shape for each dimension
+  //  auto tensorInput =  llvm::cast<RankedTensorType>(getLhs().getType());
+  auto tensorInput = getLhs().getType();
+  auto shapeOfInput = tensorInput.getShape();
+
+  auto tensorFilter = getRhs().getType();
+  auto shapeOfFilter = tensorFilter.getShape();
+  std::vector<int64_t> shapeForOutput;
+
+  for (size_t i = 0; i < shapeOfInput.size(); i++) {
+    shapeForOutput.push_back(shapeOfInput[i] + shapeOfFilter[i] - 1);
+  }
+
+  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(
+      shapeForOutput, getLhs().getType().getElementType());
+
+  // getResult().setType(getLhs().getType());
+  getResult().setType(manipulatedType);
+}
+
+// get rank of Input & Filter -- make sure it is of rank 1
+mlir::LogicalResult FIRFilterResponseOp::verify() {
+  // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand(0).getType());
+  // auto filterType =
+  // llvm::dyn_cast<RankedTensorType>(getOperand(1).getType());
+  // // auto resultType = llvm::dyn_cast<RankedTensorType>(getType());
+
+  // auto inputRank = inputType.getRank();
+  // auto filterRank = filterType.getRank();
+
+  // if( inputRank != 1 || filterRank != 1)
+  // {
+  //   return emitError()
+  //          << "expected rank of input & filter is 1";
+  // }
+
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// MedianFilterOp
+//===----------------------------------------------------------------------===//
+
+void MedianFilterOp::build(mlir::OpBuilder &builder,
+                           mlir::OperationState &state, mlir::Value value) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands(value);
+}
+
+void MedianFilterOp::inferShapes() {
+  // for each rank
+  // Get the shape/size of input
+  // output size = input_size - 2
+  auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand().getType());
+
+  auto shapeOfInput = inputType.getShape();
+
+  std::vector<int64_t> shapeForOutput;
+
+  // Iterate for each rank : tensor<1x2x3x2> = rank 4
+  for (size_t i = 0; i < shapeOfInput.size(); i++) {
+    shapeForOutput.push_back(shapeOfInput[i] - 2);
+  }
+
+  mlir::TensorType outputType = mlir::RankedTensorType::get(
+      shapeForOutput, getInput().getType().getElementType());
+  // getOperand().getType());
+  // getOperand().getType().getElementType());
+
+  getResult().setType(outputType);
+}
+
+//===----------------------------------------------------------------------===//
+// SlidingWindowAvgOp
+//===----------------------------------------------------------------------===//
+
+void SlidingWindowAvgOp::build(mlir::OpBuilder &builder,
+                               mlir::OperationState &state, mlir::Value value) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands(value);
+}
+
+void SlidingWindowAvgOp::inferShapes() {
+  // for each rank
+  // Get the shape/size of input
+  // output size = input_size - 2
+  auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand().getType());
+
+  auto shapeOfInput = inputType.getShape();
+
+  std::vector<int64_t> shapeForOutput;
+
+  // Iterate for each rank : tensor<1x2x3x2> = rank 4
+  for (size_t i = 0; i < shapeOfInput.size(); i++) {
+    shapeForOutput.push_back(shapeOfInput[i] - 2);
+  }
+
+  mlir::TensorType outputType = mlir::RankedTensorType::get(
+      shapeForOutput, getInput().getType().getElementType());
+  // getOperand().getType());
+  // getOperand().getType().getElementType());
+
+  getResult().setType(outputType);
+}
+
+mlir::LogicalResult SlidingWindowAvgOp::verify() {
+  // llvm::errs() << "LINE " << __LINE__ << " file= " << __FILE__ << "\n" ;
+  // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand().getType());
+  // if(!inputType)
+  // {
+  //   llvm::errs() << "SlidingWindowAvgOp failed --\n";
+  //   return failure();
+  // }
+  // auto shapeOfInput = inputType.getShape();
+
+  // for(size_t i=0; i < shapeOfInput.size() ; i++){
+  //   if(shapeOfInput[i] < 3){
+  //     llvm::errs() << "Warning:SlidingWindowAvgOp = Input size < 3 " <<
+  //     "size= " << shapeOfInput[i] << "\n"  ;
+  //   }
+  // }
+
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// DownsamplingOp
+//===----------------------------------------------------------------------===//
+
+void DownsamplingOp::build(mlir::OpBuilder &builder,
+                           mlir::OperationState &state, mlir::Value lhs,
+                           mlir::Value rhs) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands({lhs, rhs});
+}
+
+/// Infer the output shape of the DownsamplingOp, this is required by the shape
+/// inference interface.
+// ToDo -- shape should be the length of Lhs + Rhs - 1
+void DownsamplingOp::inferShapes() {
+  // get the shape of Lhs & rhs
+  // add the shape for each dimension
+  //  auto tensorInput =  llvm::cast<RankedTensorType>(getLhs().getType());
+  auto tensorInput = getLhs().getType();
+  auto shapeOfInput = tensorInput.getShape();
+
+  // auto tensorDownsampling = getRhs().getType();
+  // auto shapeOfDownsampling = tensorDownsampling.getShape(); //shape is the
+  // dimension
+
+  std::vector<int64_t> shapeForOutput;
+
+  int64_t SecondValueInt = 1;
+
+  // To extract value from the SSA value:
+  // get the Operand
+  // convert it to ConstantOp
+  // convert it to corresponding elements attribute
+  // extract the value as float then convert to int
+  Value downsampling2ndArg = getOperand(1);
+  dsp::ConstantOp constantOp2ndArg =
+      downsampling2ndArg.getDefiningOp<dsp::ConstantOp>();
+  DenseElementsAttr constantRhsValue = constantOp2ndArg.getValue();
+  ;
+  auto elements = constantRhsValue.getValues<FloatAttr>();
+  float SecondValue = elements[0].getValueAsDouble();
+  SecondValueInt = (int64_t)SecondValue;
+  // llvm::errs() << "Downsampling: SamplingRate: " << SecondValueInt << " \n";
+  // //downsamplingRate
+
+  for (size_t i = 0; i < shapeOfInput.size(); i++) {
+    double GetLenForOutput =
+        static_cast<double>(shapeOfInput[i]) / SecondValueInt;
+    if (fmod(GetLenForOutput, 1.0) != 0) {
+      // if remainder remains
+      GetLenForOutput = ceil(GetLenForOutput);
+    }
+    int64_t OutlenInt = static_cast<int64_t>(GetLenForOutput);
+    llvm::errs() << "Downsampling: OutlenInt: " << OutlenInt << " \n";
+    shapeForOutput.push_back(OutlenInt);
+  }
+
+  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(
+      shapeForOutput, getLhs().getType().getElementType());
+
+  // getResult().setType(getLhs().getType());
+  getResult().setType(manipulatedType);
+}
+
+// get rank of Input & Downsampling -- make sure it is of rank 1
+mlir::LogicalResult DownsamplingOp::verify() {
+  // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand(0).getType());
+  // auto samplingRateType =
+  // llvm::dyn_cast<RankedTensorType>(getOperand(1).getType());
+  // // auto resultType = llvm::dyn_cast<RankedTensorType>(getType());
+
+  // auto inputRank = inputType.getRank();
+  // auto samplingRateRank = samplingRateType.getRank();
+
+  // // llvm::errs() << "inputRank: " << inputRank << " samplingRateRank: " <<
+  // samplingRateRank << "\n";
+  // //once ensured only 1 rank from above -- also make sure there is just 1
+  // elem if( inputRank != 1 || samplingRateRank != 0 )
+  // {
+  //   llvm::errs() << "inputRank: " << inputRank << " samplingRateRank: " <<
+  //   samplingRateRank << "\n"; return emitError()
+  //          << "expected rank of input & Downsampling is 1";
+  // }
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// UpsamplingOp
+//===----------------------------------------------------------------------===//
+
+void UpsamplingOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                         mlir::Value lhs, mlir::Value rhs) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands({lhs, rhs});
+}
+
+/// Infer the output shape of the UpsamplingOp, this is required by the shape
+/// inference interface.
+// ToDo -- shape should be the length of input * UpsamplingRate ie, Rhs
+void UpsamplingOp::inferShapes() {
+  // get the shape of Lhs & rhs
+  // add the shape for each dimension
+  //  auto tensorInput =  llvm::cast<RankedTensorType>(getLhs().getType());
+  auto tensorInput = getLhs().getType();
+  auto shapeOfInput = tensorInput.getShape();
+
+  // auto tensorUpsampling = getRhs().getType();
+  // auto shapeOfUpsampling = tensorUpsampling.getShape(); //shape is the length
+
+  std::vector<int64_t> shapeForOutput;
+
+  int64_t SecondValueInt = 1;
+
+  // To extract value from the SSA value:
+  // get the Operand
+  // convert it to ConstantOp
+  // convert it to corresponding elements attribute
+  // extract the value as float then convert to int
+  Value upsampling2ndArg = getOperand(1);
+  dsp::ConstantOp constantOp2ndArg =
+      upsampling2ndArg.getDefiningOp<dsp::ConstantOp>();
+  DenseElementsAttr constantRhsValue = constantOp2ndArg.getValue();
+  ;
+  auto elements = constantRhsValue.getValues<FloatAttr>();
+  float SecondValue = elements[0].getValueAsDouble();
+  SecondValueInt = (int64_t)SecondValue;
+  // llvm::errs() << "Upsampling: SamplingRate: " << SecondValueInt << " \n";
+  // //downsamplingRate
+
+  for (size_t i = 0; i < shapeOfInput.size(); i++) {
+    double GetLenForOutput =
+        static_cast<double>(shapeOfInput[i]) * SecondValueInt;
+    int64_t OutlenInt = static_cast<int64_t>(GetLenForOutput);
+    llvm::errs() << "Upsampling: OutlenInt: " << OutlenInt << " \n";
+    shapeForOutput.push_back(OutlenInt);
+  }
+
+  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(
+      shapeForOutput, getLhs().getType().getElementType());
+
+  // getResult().setType(getLhs().getType());
+  getResult().setType(manipulatedType);
+}
+
+// get rank of Input & Upsampling -- make sure it is of rank 1
+mlir::LogicalResult UpsamplingOp::verify() {
+  // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand(0).getType());
+  // auto samplingRateType =
+  // llvm::dyn_cast<RankedTensorType>(getOperand(1).getType());
+  // // auto resultType = llvm::dyn_cast<RankedTensorType>(getType());
+
+  // auto inputRank = inputType.getRank();
+  // auto samplingRateRank = samplingRateType.getRank();
+
+  // // llvm::errs() << "inputRank: " << inputRank << " samplingRateRank: " <<
+  // samplingRateRank << "\n";
+  // //once ensured only 1 rank from above -- also make sure there is just 1
+  // elem if( inputRank != 1 || samplingRateRank != 0 )
+  // {
+  //   llvm::errs() << "inputRank: " << inputRank << " samplingRateRank: " <<
+  //   samplingRateRank << "\n"; return emitError()
+  //          << "expected rank of input is 1 & Upsampling is 0";
+  // }
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// LowPassFilter1stOrderOp
+//===----------------------------------------------------------------------===//
+
+void LowPassFilter1stOrderOp::build(mlir::OpBuilder &builder,
+                                    mlir::OperationState &state,
+                                    mlir::Value lhs, mlir::Value rhs) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands({lhs, rhs});
+}
+
+/// Infer the output shape of the LowPassFilter1stOrderOp, this is required by
+/// the shape inference interface.
+void LowPassFilter1stOrderOp::inferShapes() {
+  // get the shape of Lhs & rhs
+  //  auto tensorInput =  llvm::cast<RankedTensorType>(getLhs().getType());
+  auto tensorInput = getLhs().getType();
+  getResult().setType(tensorInput);
+}
+
+// get rank of Input & alphaValue -- make sure it is of rank 1
+mlir::LogicalResult LowPassFilter1stOrderOp::verify() {
+  // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand(0).getType());
+  // auto alphaValueType =
+  // llvm::dyn_cast<RankedTensorType>(getOperand(1).getType());
+  // // auto resultType = llvm::dyn_cast<RankedTensorType>(getType());
+
+  // auto inputRank = inputType.getRank();
+  // auto alphaValueRank = alphaValueType.getRank();
+
+  // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " <<
+  // alphaValueRank << "\n";
+  // //once ensured only 1 rank from above -- also make sure there is just 1
+  // elem if( inputRank != 1 || alphaValueRank != 0 )
+  // {
+  //   llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " <<
+  //   alphaValueRank << "\n"; return emitError()
+  //          << "expected rank of input & Upsampling is 1";
+  // }
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// HighPassFilterOp
+//===----------------------------------------------------------------------===//
+
+void HighPassFilterOp::build(mlir::OpBuilder &builder,
+                             mlir::OperationState &state, mlir::Value value) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands(value);
+}
+
+void HighPassFilterOp::inferShapes() {
+  // for each rank
+  // Get the shape/size of input
+  // output size = input_size
+  auto tensorInput = getInput().getType();
+  getResult().setType(tensorInput);
+}
+
+mlir::LogicalResult HighPassFilterOp::verify() {
+  // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand().getType());
+  // auto inputRank = inputType.getRank();
+
+  // llvm::errs() << "inputRank: " << inputRank <<  "\n";
+  // //once ensured only 1 rank from above --
+  // if( inputRank != 1 )
+  // {
+  //   llvm::errs() << "inputRank: " << inputRank <<  "\n";
+  //   return emitError()
+  //          << "expected rank of input  is 1";
+  // }
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// FFT1DOp
+//===----------------------------------------------------------------------===//
+
+void FFT1DOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                    mlir::Value value) {
+  
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type()),
+                  UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands(value);
+  
+}
+
+void FFT1DOp::inferShapes() {
+  // for each rank
+  // Get the shape/size of input
+  // output size = input_size
+  auto tensorInput = getInput().getType();
+  // getResult().setType(tensorInput);
+  getResult(0).setType(tensorInput);
+  getResult(1).setType(tensorInput);
+  // getResult(2).setType(tensorInput);
+}
+
+mlir::LogicalResult FFT1DOp::verify() {
+  
+  // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand().getType());
+  // auto inputRank = inputType.getRank();
+
+  // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " <<
+  // alphaValueRank << "\n";
+  // //once ensured only 1 rank from above --
+  // if( inputRank != 1 )
+  // {
+  //   llvm::errs() << "inputRank: " << inputRank <<  "\n";
+  //   return emitError()
+  //          << "expected rank of input  is 1";
+  // }
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// IFFT1DOp
+//===----------------------------------------------------------------------===//
+
+void IFFT1DOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                     mlir::Value real, mlir::Value img) {
+  
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({real, img});
+  
+}
+
+void IFFT1DOp::inferShapes() {
+  // for each rank
+  // Get the shape/size of input
+  // output size = input_size
+  auto tensorInput = getReal().getType();
+  getResult().setType(tensorInput);
+  // getResult(0).setType(tensorInput);
+  // getResult(1).setType(tensorInput);
+}
+
+mlir::LogicalResult IFFT1DOp::verify() {
+  
+  // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand().getType());
+  // auto inputRank = inputType.getRank();
+
+  // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " <<
+  // alphaValueRank << "\n";
+  // //once ensured only 1 rank from above --
+  // if( inputRank != 1 )
+  // {
+  //   llvm::errs() << "inputRank: " << inputRank <<  "\n";
+  //   return emitError()
+  //          << "expected rank of input  is 1";
+  // }
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// HammingWindowOp
+//===----------------------------------------------------------------------===//
+
+void HammingWindowOp::build(mlir::OpBuilder &builder,
+                            mlir::OperationState &state, mlir::Value value) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands(value);
+}
+
+void HammingWindowOp::inferShapes() {
+  // for each rank
+  // Get the shape/size of input
+  // output size = input_size
+  //  auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand().getType());
+
+  // auto shapeOfInput = inputType.getShape();
+
+  std::vector<int64_t> shapeForOutput;
+
+  int64_t FirstOpInt = 1;
+
+  // To extract value from the SSA value:
+  // get the Operand
+  // convert it to ConstantOp
+  // convert it to corresponding elements attribute
+  // extract the value as float then convert to int
+  // llvm::errs() << "LINE " << __LINE__ << " file= " << __FILE__ << "\n" ;
+  Value hammingLen = getOperand();
+  dsp::ConstantOp constantOp1stArg =
+      hammingLen.getDefiningOp<dsp::ConstantOp>();
+  // llvm::errs() << "LINE " << __LINE__ << " file= " << __FILE__ << "\n" ;
+  DenseElementsAttr constantLhsValue = constantOp1stArg.getValue();
+  // llvm::errs() << "LINE " << __LINE__ << " file= " << __FILE__ << "\n" ;
+  auto elements = constantLhsValue.getValues<FloatAttr>();
+  float FirstValue = elements[0].getValueAsDouble();
+  FirstOpInt = (int64_t)FirstValue;
+  // llvm::errs() << "FirstOpInt " << FirstOpInt << "\n" ;
+  // llvm::errs() << "shapeOfInput.size() " << shapeOfInput.size() << "\n" ;
+
+  // for(size_t i=0; i < shapeOfInput.size() ; i++){
+  // llvm::errs() << "LINE " << __LINE__ << " file= " << __FILE__ << "\n" ;
+  shapeForOutput.push_back(FirstOpInt);
+  // }
+
+  mlir::TensorType outputType = mlir::RankedTensorType::get(
+      shapeForOutput, getInput().getType().getElementType());
+  // getOperand().getType());
+  // getOperand().getType().getElementType());
+
+  getResult().setType(outputType);
+  // llvm::errs() << "LINE " << __LINE__ << " file= " << __FILE__ << "\n" ;
+}
+
+mlir::LogicalResult HammingWindowOp::verify() {
+  // llvm::errs() << "LINE " << __LINE__ << " file= " << __FILE__ << "\n" ;
+  // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand().getType());
+  // if(!inputType)
+  // {
+  //   llvm::errs() << "HammingWindowOp failed --\n";
+  //   return failure();
+  // }
+  // auto shapeOfInput = inputType.getShape();
+
+  // for(size_t i=0; i < shapeOfInput.size() ; i++){
+  //   if(shapeOfInput[i] < 3){
+  //     llvm::errs() << "Warning:HammingWindowOp = Input size < 3 " << "size= "
+  //     << shapeOfInput[i] << "\n"  ;
+  //   }
+  // }
+
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// DCTOp
+//===----------------------------------------------------------------------===//
+
+void DCTOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                  mlir::Value value) {
+  // DEBUG_PRINT_NO_ARGS() ;
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands(value);
+  // DEBUG_PRINT_NO_ARGS() ;
+}
+
+void DCTOp::inferShapes() {
+  // for each rank
+  // Get the shape/size of input
+  // output size = input_size
+  auto tensorInput = getInput().getType();
+  getResult().setType(tensorInput);
+  // getResult(0).setType(tensorInput);
+  // getResult(1).setType(tensorInput);
+}
+
+mlir::LogicalResult DCTOp::verify() {
+  // DEBUG_PRINT_NO_ARGS() ;
+  auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand().getType());
+  auto inputRank = inputType.getRank();
+
+  // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " <<
+  // alphaValueRank << "\n";
+  // once ensured only 1 rank from above --
+  if (inputRank != 1) {
+    llvm::errs() << "inputRank: " << inputRank << "\n";
+    return emitError() << "expected rank of input  is 1";
+  }
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// filterOp
+//===----------------------------------------------------------------------===//
+
+void filterOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                     mlir::Value b, mlir::Value a, mlir::Value x) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands({b, a, x});
+}
+
+/// Infer the output shape of the filterOp, this is required by the shape
+/// inference interface.
+// ToDo -- shape should be the length of Lhs + Rhs - 1
+void filterOp::inferShapes() {
+  // get the shape of Lhs & rhs
+  // add the shape for each dimension
+  //  auto tensorInput =  llvm::cast<RankedTensorType>(getLhs().getType());
+  auto tensorInput = getX().getType();
+  getResult().setType(tensorInput);
+}
+
+// get rank of Input & Filter -- make sure it is of rank 1
+mlir::LogicalResult filterOp::verify() {
+  // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand(0).getType());
+  // auto filterType =
+  // llvm::dyn_cast<RankedTensorType>(getOperand(1).getType());
+  // // auto resultType = llvm::dyn_cast<RankedTensorType>(getType());
+
+  // auto inputRank = inputType.getRank();
+  // auto filterRank = filterType.getRank();
+
+  // if( inputRank != 1 || filterRank != 1)
+  // {
+  //   return emitError()
+  //          << "expected rank of input & filter is 1";
+  // }
+
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// SumOp
+//===----------------------------------------------------------------------===//
+
+void SumOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                  mlir::Value value) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands(value);
+}
+
+void SumOp::inferShapes() {
+  // auto tensorInput =  getInput().getType();
+  // auto shapeOfInput = tensorInput.getShape();
+  std::vector<int64_t> shapeForOutput;
+
+  shapeForOutput.push_back(1);
+
+  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(
+      shapeForOutput, getInput().getType().getElementType());
+  getResult().setType(manipulatedType);
+}
+
+mlir::LogicalResult SumOp::verify() {
+  // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand().getType());
+  // auto resultType = llvm::dyn_cast<RankedTensorType>(getType());
+  // if (!inputType || !resultType)
+  //   return mlir::success();
+
+  // auto inputShape = inputType.getShape();
+  // if (!std::equal(inputShape.begin(), inputShape.end(),
+  //                 resultType.getShape().rbegin())) {
+  //   return emitError()
+  //          << "expected result shape to be a transpose of the input";
+  // }
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// CosOp
+//===----------------------------------------------------------------------===//
+
+void CosOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                  mlir::Value value) {
+  // DEBUG_PRINT_NO_ARGS() ;
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands(value);
+  // DEBUG_PRINT_NO_ARGS() ;
+}
+
+void CosOp::inferShapes() {
+  // for each rank
+  // Get the shape/size of input
+  // output size = input_size
+  auto tensorInput = getInput().getType();
+  getResult().setType(tensorInput);
+  // getResult(0).setType(tensorInput);
+  // getResult(1).setType(tensorInput);
+}
+
+mlir::LogicalResult CosOp::verify() {
+  // DEBUG_PRINT_NO_ARGS() ;
+  //  auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand().getType());
+  //  auto inputRank = inputType.getRank();
+
+  //  // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " <<
+  //  alphaValueRank << "\n";
+  //  //once ensured only 1 rank from above --
+  //  if( inputRank != 1 )
+  //  {
+  //    llvm::errs() << "inputRank: " << inputRank <<  "\n";
+  //    return emitError()
+  //           << "expected rank of input  is 1";
+  //  }
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// SinOp
+//===----------------------------------------------------------------------===//
+
+void SinOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                  mlir::Value value) {
+  // DEBUG_PRINT_NO_ARGS() ;
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands(value);
+  // DEBUG_PRINT_NO_ARGS() ;
+}
+
+void SinOp::inferShapes() {
+  // for each rank
+  // Get the shape/size of input
+  // output size = input_size
+  auto tensorInput = getInput().getType();
+  getResult().setType(tensorInput);
+  // getResult(0).setType(tensorInput);
+  // getResult(1).setType(tensorInput);
+}
+
+mlir::LogicalResult SinOp::verify() {
+  // DEBUG_PRINT_NO_ARGS() ;
+  //  auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand().getType());
+  //  auto inputRank = inputType.getRank();
+
+  //  // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " <<
+  //  alphaValueRank << "\n";
+  //  //once ensured only 1 rank from above --
+  //  if( inputRank != 1 )
+  //  {
+  //    llvm::errs() << "inputRank: " << inputRank <<  "\n";
+  //    return emitError()
+  //           << "expected rank of input  is 1";
+  //  }
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// SquareOp
+//===----------------------------------------------------------------------===//
+
+void SquareOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                     mlir::Value value) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands(value);
+}
+
+void SquareOp::inferShapes() {
+  auto tensorInput = getInput().getType();
+  // mlir::TensorType manipulatedType =
+  // mlir::RankedTensorType::get(shapeForOutput,
+  // getInput().getType().getElementType());
+  getResult().setType(tensorInput);
+}
+
+mlir::LogicalResult SquareOp::verify() {
+  // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand().getType());
+  // auto resultType = llvm::dyn_cast<RankedTensorType>(getType());
+  // if (!inputType || !resultType)
+  //   return mlir::success();
+
+  // auto inputShape = inputType.getShape();
+  // if (!std::equal(inputShape.begin(), inputShape.end(),
+  //                 resultType.getShape().rbegin())) {
+  //   return emitError()
+  //          << "expected result shape to be a transpose of the input";
+  // }
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// FFT1DRealOp
+//===----------------------------------------------------------------------===//
+
+void FFT1DRealOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                        mlir::Value value) {
+  
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands(value);
+  
+}
+
+void FFT1DRealOp::inferShapes() {
+  // for each rank
+  // Get the shape/size of input
+  // output size = input_size
+  auto tensorInput = getInput().getType();
+  // getResult().setType(tensorInput);
+  getResult().setType(tensorInput);
+  // getResult(2).setType(tensorInput);
+}
+
+mlir::LogicalResult FFT1DRealOp::verify() {
+  
+  // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand().getType());
+  // auto inputRank = inputType.getRank();
+
+  // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " <<
+  // alphaValueRank << "\n";
+  // //once ensured only 1 rank from above --
+  // if( inputRank != 1 )
+  // {
+  //   llvm::errs() << "inputRank: " << inputRank <<  "\n";
+  //   return emitError()
+  //          << "expected rank of input  is 1";
+  // }
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// FFT1DImgOp
+//===----------------------------------------------------------------------===//
+
+void FFT1DImgOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                       mlir::Value value) {
+  
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands(value);
+  
+}
+
+void FFT1DImgOp::inferShapes() {
+  // for each rank
+  // Get the shape/size of input
+  // output size = input_size
+  auto tensorInput = getInput().getType();
+  // getResult().setType(tensorInput);
+  getResult().setType(tensorInput);
+  // getResult(2).setType(tensorInput);
+}
+
+mlir::LogicalResult FFT1DImgOp::verify() {
+  
+  // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand().getType());
+  // auto inputRank = inputType.getRank();
+
+  // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " <<
+  // alphaValueRank << "\n";
+  // //once ensured only 1 rank from above --
+  // if( inputRank != 1 )
+  // {
+  //   llvm::errs() << "inputRank: " << inputRank <<  "\n";
+  //   return emitError()
+  //          << "expected rank of input  is 1";
+  // }
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// SincOp
+//===----------------------------------------------------------------------===//
+
+void SincOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                   mlir::Value wc, mlir::Value n) {
+  
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({wc, n});
+  
+}
+
+void SincOp::inferShapes() {
+  // for each rank
+  // Get the shape/size of input
+  // output size = input_size
+  //  auto inputType = llvm::dyn_cast<RankedTensorType>(getN().getType());
+
+  // auto shapeOfInput = inputType.getShape();
+
+  std::vector<int64_t> shapeForOutput;
+
+  int64_t GetLen = 1;
+
+  // To extract value from the SSA value:
+  // get the Operand
+  // convert it to ConstantOp
+  // convert it to corresponding elements attribute
+  // extract the value as float then convert to int
+  
+  Value inputLen = getOperand(1);
+  dsp::ConstantOp constantOp1stArg = inputLen.getDefiningOp<dsp::ConstantOp>();
+  
+  DenseElementsAttr constantLhsValue = constantOp1stArg.getValue();
+  auto elements = constantLhsValue.getValues<FloatAttr>();
+  float LenN = elements[0].getValueAsDouble();
+  GetLen = (int64_t)LenN;
+  shapeForOutput.push_back(GetLen);
+  mlir::TensorType outputType = mlir::RankedTensorType::get(
+      shapeForOutput, getWc().getType().getElementType());
+
+  getResult().setType(outputType);
+}
+
+mlir::LogicalResult SincOp::verify() {
+  
+  // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand().getType());
+  // auto inputRank = inputType.getRank();
+
+  // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " <<
+  // alphaValueRank << "\n";
+  // //once ensured only 1 rank from above --
+  // if( inputRank != 1 )
+  // {
+  //   llvm::errs() << "inputRank: " << inputRank <<  "\n";
+  //   return emitError()
+  //          << "expected rank of input  is 1";
+  // }
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// GetElemAtIndxOp
+//===----------------------------------------------------------------------===//
+
+void GetElemAtIndxOp::build(mlir::OpBuilder &builder,
+                            mlir::OperationState &state, mlir::Value input,
+                            mlir::Value indx) {
+  
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({input, indx});
+  
+}
+
+void GetElemAtIndxOp::inferShapes() {
+  // auto tensorInput =  getInput().getType();
+  // auto shapeOfInput = tensorInput.getShape();
+  std::vector<int64_t> shapeForOutput;
+  
+  shapeForOutput.push_back(1);
+
+  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(
+      shapeForOutput, getInput().getType().getElementType());
+  getResult().setType(manipulatedType);
+  
+}
+
+mlir::LogicalResult GetElemAtIndxOp::verify() {
+  // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand().getType());
+  // auto resultType = llvm::dyn_cast<RankedTensorType>(getType());
+  // if (!inputType || !resultType)
+  //   return mlir::success();
+
+  // auto inputShape = inputType.getShape();
+  // if (!std::equal(inputShape.begin(), inputShape.end(),
+  //                 resultType.getShape().rbegin())) {
+  //   return emitError()
+  //          << "expected result shape to be a transpose of the input";
+  // }
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// GetSingleElemAtIdxOp
+//===----------------------------------------------------------------------===//
+
+void GetSingleElemAtIdxOp::build(mlir::OpBuilder &builder,
+                                 mlir::OperationState &state, mlir::Value input,
+                                 mlir::Value indx) {
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({input, indx});
+}
+
+void GetSingleElemAtIdxOp::inferShapes() {
+  std::vector<int64_t> shapeForOutput;
+
+  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(
+      shapeForOutput, getInput().getType().getElementType());
+  getResult().setType(manipulatedType);
+}
+
+//===----------------------------------------------------------------------===//
+// Diff2MeanOptimizedOp
+//===----------------------------------------------------------------------===//
+
+void Diff2MeanOptimizedOp::build(mlir::OpBuilder &builder,
+                                 mlir::OperationState &state, mlir::Value input,
+                                 mlir::Value length) {
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({input, length});
+}
+
+void Diff2MeanOptimizedOp::inferShapes() {
+  mlir::TensorType manipulatedType =
+      mlir::RankedTensorType::get({}, getInput().getType().getElementType());
+  getResult().setType(manipulatedType);
+}
+
+//===----------------------------------------------------------------------===//
+// FindPeaks2Diff2MeanOptimizedOp
+//===----------------------------------------------------------------------===//
+
+void FindPeaks2Diff2MeanOptimizedOp::build(mlir::OpBuilder &builder,
+                                           mlir::OperationState &state,
+                                           mlir::Value signal,
+                                           mlir::Value height,
+                                           mlir::Value distance) {
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({signal, height, distance});
+}
+
+void FindPeaks2Diff2MeanOptimizedOp::inferShapes() {
+  mlir::TensorType manipulatedType =
+      mlir::RankedTensorType::get({}, getSignal().getType().getElementType());
+  getResult().setType(manipulatedType);
+}
+
+//===----------------------------------------------------------------------===//
+// Median2SlidingOptimizedOp
+//===----------------------------------------------------------------------===//
+
+void Median2SlidingOptimizedOp::build(mlir::OpBuilder &builder,
+                                      mlir::OperationState &state,
+                                      mlir::Value input) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands(input);
+}
+
+void Median2SlidingOptimizedOp::inferShapes() {
+  // for each rank
+  // Get the shape/size of input
+  // output size = input_size - 4
+  auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand().getType());
+
+  auto shapeOfInput = inputType.getShape();
+
+  std::vector<int64_t> shapeForOutput;
+
+  // Iterate for each rank : tensor<1x2x3x2> = rank 4
+  for (size_t i = 0; i < shapeOfInput.size(); i++) {
+    shapeForOutput.push_back(shapeOfInput[i] - 4);
+  }
+
+  mlir::TensorType outputType = mlir::RankedTensorType::get(
+      shapeForOutput, getInput().getType().getElementType());
+  // getOperand().getType());
+  // getOperand().getType().getElementType());
+
+  getResult().setType(outputType);
+}
+
+//===----------------------------------------------------------------------===//
+// LMS2FindPeaksOptimizedOp
+//===----------------------------------------------------------------------===//
+
+void LMS2FindPeaksOptimizedOp::build(mlir::OpBuilder &builder,
+                                     mlir::OperationState &state,
+                                     mlir::Value lhs, mlir::Value rhs,
+                                     mlir::Value mu, mlir::Value filterLen,
+                                     mlir::Value height, mlir::Value distance) {
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({lhs, rhs, mu, filterLen, height, distance});
+}
+
+void LMS2FindPeaksOptimizedOp::inferShapes() {
+
+  //  getResult().setType(getLhs().getType());
+
+  // The above is for LMSFilterResponseOp
+
+  // Maximum possible number of peaks = (length of signal -1) / distance + 1.
+  // We will return a tensor with size (length of signal -1) / distance + 1 +
+  // 1(last one to provide number of peaks).
+  auto signalType = getLhs().getType();
+  auto signalShape = signalType.getShape();
+  int64_t len_signal = signalShape[0];
+
+  Value distanceArg = getOperand(5);
+  dsp::ConstantOp constantOpDistance =
+      distanceArg.getDefiningOp<dsp::ConstantOp>();
+  DenseElementsAttr constantDistanceValue = constantOpDistance.getValue();
+
+  auto elements = constantDistanceValue.getValues<FloatAttr>();
+  float distanceFloat = elements[0].getValueAsDouble();
+  // SecondValueInt = (int64_t)SecondValue;
+
+  int64_t sizeOfOutput = (len_signal - 1) / distanceFloat + 2;
+
+  std::vector<int64_t> shapeForOutput;
+  shapeForOutput.push_back(sizeOfOutput);
+
+  mlir::TensorType manipulatedType =
+      mlir::RankedTensorType::get(shapeForOutput, signalType.getElementType());
+
+  getResult().setType(manipulatedType);
+}
+
+//===----------------------------------------------------------------------===//
+// SetElemAtIndxOp
+//===----------------------------------------------------------------------===//
+
+void SetElemAtIndxOp::build(mlir::OpBuilder &builder,
+                            mlir::OperationState &state, mlir::Value input,
+                            mlir::Value indx, mlir::Value val) {
+  
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({input, indx, val});
+  
+}
+
+void SetElemAtIndxOp::inferShapes() {
+  // auto tensorInput =  getInput().getType();
+  // auto shapeOfInput = tensorInput.getShape();
+  std::vector<int64_t> shapeForOutput;
+  
+  shapeForOutput.push_back(1);
+
+  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(
+      shapeForOutput, getInput().getType().getElementType());
+  getResult().setType(manipulatedType);
+  
+}
+
+mlir::LogicalResult SetElemAtIndxOp::verify() { return mlir::success(); }
+
+//===----------------------------------------------------------------------===//
+// LowPassFIRFilterOp
+//===----------------------------------------------------------------------===//
+
+void LowPassFIRFilterOp::build(mlir::OpBuilder &builder,
+                               mlir::OperationState &state, mlir::Value wc,
+                               mlir::Value n) {
+  
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({wc, n});
+  
+}
+
+void LowPassFIRFilterOp::inferShapes() {
+  // for each rank
+  // Get the shape/size of input
+  // output size = input_size
+  //  auto inputType = llvm::dyn_cast<RankedTensorType>(getN().getType());
+
+  // auto shapeOfInput = inputType.getShape();
+
+  std::vector<int64_t> shapeForOutput;
+
+  uint64_t GetLen = 1;
+
+  // To extract value from the SSA value:
+  // get the Operand
+  // convert it to ConstantOp
+  // convert it to corresponding elements attribute
+  // extract the value as float then convert to int
+  
+  Value inputLen = getOperand(1);
+  dsp::ConstantOp constantOp1stArg = inputLen.getDefiningOp<dsp::ConstantOp>();
+  
+  DenseElementsAttr constantLhsValue = constantOp1stArg.getValue();
+  auto elements = constantLhsValue.getValues<FloatAttr>();
+  float LenN = elements[0].getValueAsDouble();
+  GetLen = (uint64_t)LenN;
+
+  // int64_t N = tensorType.getShape()[0];
+
+  shapeForOutput.push_back(GetLen);
+  mlir::TensorType outputType = mlir::RankedTensorType::get(
+      shapeForOutput, getWc().getType().getElementType());
+
+  getResult().setType(outputType);
+}
+
+mlir::LogicalResult LowPassFIRFilterOp::verify() {
+  uint64_t GetLen = 1;
+
+  // To extract value from the SSA value:
+  // get the Operand
+  // convert it to ConstantOp
+  // convert it to corresponding elements attribute
+  // extract the value as float then convert to int
+  
+  Value inputLen = getOperand(1);
+  dsp::ConstantOp constantOp1stArg = inputLen.getDefiningOp<dsp::ConstantOp>();
+  
+  DenseElementsAttr constantLhsValue = constantOp1stArg.getValue();
+  auto elements = constantLhsValue.getValues<FloatAttr>();
+  float LenN = elements[0].getValueAsDouble();
+  GetLen = (uint64_t)LenN;
+
+  // filter-order even not supported -- so making it odd
+  if (GetLen % 2 == 0) {
+    // GetLen = GetLen + 1;
+    llvm::errs() << "N for lowPassFilter must be odd but is " << GetLen << "\n";
+    return mlir::failure();
+  }
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// LMSFilterOp
+//===----------------------------------------------------------------------===//
+
+void LMSFilterOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                        mlir::Value lhs, mlir::Value rhs, mlir::Value mu,
+                        mlir::Value filterLen, mlir::Value iters) {
+
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands({lhs, rhs, mu, filterLen, iters});
+}
+
+void LMSFilterOp::inferShapes() { getResult().setType(getLhs().getType()); }
+
+mlir::LogicalResult LMSFilterOp::verify() {
+  
+  // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand(0).getType());
+  // auto filterType =
+  // llvm::dyn_cast<RankedTensorType>(getOperand(1).getType());
+  // // auto resultType = llvm::dyn_cast<RankedTensorType>(getType());
+
+  // auto inputRank = inputType.getRank();
+  // auto filterRank = filterType.getRank();
+
+  // if( inputRank != 1 || filterRank != 1)
+  // {
+  //   return emitError()
+  //          << "expected rank of input & filter is 1";
+  // }
+
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// HighPassFIRFilterOp
+//===----------------------------------------------------------------------===//
+
+void HighPassFIRFilterOp::build(mlir::OpBuilder &builder,
+                                mlir::OperationState &state, mlir::Value wc,
+                                mlir::Value n) {
+  
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({wc, n});
+  
+}
+
+void HighPassFIRFilterOp::inferShapes() {
+  // for each rank
+  // Get the shape/size of input
+  // output size = input_size
+  //  auto inputType = llvm::dyn_cast<RankedTensorType>(getN().getType());
+
+  // auto shapeOfInput = inputType.getShape();
+
+  std::vector<int64_t> shapeForOutput;
+
+  int64_t GetLen = 1;
+
+  // To extract value from the SSA value:
+  // get the Operand
+  // convert it to ConstantOp
+  // convert it to corresponding elements attribute
+  // extract the value as float then convert to int
+  
+  Value inputLen = getOperand(1);
+  dsp::ConstantOp constantOp1stArg = inputLen.getDefiningOp<dsp::ConstantOp>();
+  
+  DenseElementsAttr constantLhsValue = constantOp1stArg.getValue();
+  auto elements = constantLhsValue.getValues<FloatAttr>();
+  float LenN = elements[0].getValueAsDouble();
+  GetLen = (int64_t)LenN;
+
+  shapeForOutput.push_back(GetLen);
+  mlir::TensorType outputType = mlir::RankedTensorType::get(
+      shapeForOutput, getWc().getType().getElementType());
+
+  getResult().setType(outputType);
+}
+
+mlir::LogicalResult HighPassFIRFilterOp::verify() {
+  
+  // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand().getType());
+  // auto inputRank = inputType.getRank();
+
+  // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " <<
+  // alphaValueRank << "\n";
+  // //once ensured only 1 rank from above --
+  // if( inputRank != 1 )
+  // {
+  //   llvm::errs() << "inputRank: " << inputRank <<  "\n";
+  //   return emitError()
+  //          << "expected rank of input  is 1";
+  // }
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// GetRangeOfVectorOp
+//===----------------------------------------------------------------------===//
+
+void GetRangeOfVectorOp::build(mlir::OpBuilder &builder,
+                               mlir::OperationState &state, mlir::Value first,
+                               mlir::Value N, mlir::Value step) {
+  
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({first, N, step});
+  
+}
+
+void GetRangeOfVectorOp::inferShapes() {
+  // for each rank
+  // Get the shape/size of input
+  // output size = input_size
+  //  auto inputType = llvm::dyn_cast<RankedTensorType>(getN().getType());
+
+  // auto shapeOfInput = inputType.getShape();
+
+  std::vector<int64_t> shapeForOutput;
+
+  int64_t GetLen = 1;
+
+  // To extract value from the SSA value:
+  // get the Operand
+  // convert it to ConstantOp
+  // convert it to corresponding elements attribute
+  // extract the value as float then convert to int
+  
+  Value inputLen = getOperand(1);
+  dsp::ConstantOp constantOp1stArg = inputLen.getDefiningOp<dsp::ConstantOp>();
+  
+  DenseElementsAttr constantLhsValue = constantOp1stArg.getValue();
+  auto elements = constantLhsValue.getValues<FloatAttr>();
+  float LenN = elements[0].getValueAsDouble();
+  GetLen = (int64_t)LenN;
+
+  shapeForOutput.push_back(GetLen);
+  mlir::TensorType outputType = mlir::RankedTensorType::get(
+      shapeForOutput, getFirst().getType().getElementType());
+
+  getResult().setType(outputType);
+}
+
+mlir::LogicalResult GetRangeOfVectorOp::verify() {
+  
+  // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand().getType());
+  // auto inputRank = inputType.getRank();
+
+  // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " <<
+  // alphaValueRank << "\n";
+  // //once ensured only 1 rank from above --
+  // if( inputRank != 1 )
+  // {
+  //   llvm::errs() << "inputRank: " << inputRank <<  "\n";
+  //   return emitError()
+  //          << "expected rank of input  is 1";
+  // }
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// FIRFilterHammingOptimizedOp
+//===----------------------------------------------------------------------===//
+
+void FIRFilterHammingOptimizedOp::build(mlir::OpBuilder &builder,
+                                        mlir::OperationState &state,
+                                        mlir::Value wc, mlir::Value n) {
+  
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({wc, n});
+  
+}
+
+void FIRFilterHammingOptimizedOp::inferShapes() {
+  // for each rank
+  // Get the shape/size of input
+  // output size = input_size
+  //  auto inputType = llvm::dyn_cast<RankedTensorType>(getN().getType());
+
+  // auto shapeOfInput = inputType.getShape();
+
+  std::vector<int64_t> shapeForOutput;
+
+  uint64_t GetLen = 1;
+
+  // To extract value from the SSA value:
+  // get the Operand
+  // convert it to ConstantOp
+  // convert it to corresponding elements attribute
+  // extract the value as float then convert to int
+  
+  Value inputLen = getOperand(1);
+  dsp::ConstantOp constantOp1stArg = inputLen.getDefiningOp<dsp::ConstantOp>();
+  
+  DenseElementsAttr constantLhsValue = constantOp1stArg.getValue();
+  auto elements = constantLhsValue.getValues<FloatAttr>();
+  float LenN = elements[0].getValueAsDouble();
+  GetLen = (uint64_t)LenN;
+
+  // int64_t N = tensorType.getShape()[0];
+
+  shapeForOutput.push_back(GetLen);
+  mlir::TensorType outputType = mlir::RankedTensorType::get(
+      shapeForOutput, getWc().getType().getElementType());
+
+  getResult().setType(outputType);
+}
+
+mlir::LogicalResult FIRFilterHammingOptimizedOp::verify() {
+  uint64_t GetLen = 1;
+
+  // To extract value from the SSA value:
+  // get the Operand
+  // convert it to ConstantOp
+  // convert it to corresponding elements attribute
+  // extract the value as float then convert to int
+  
+  Value inputLen = getOperand(1);
+  dsp::ConstantOp constantOp1stArg = inputLen.getDefiningOp<dsp::ConstantOp>();
+  
+  DenseElementsAttr constantLhsValue = constantOp1stArg.getValue();
+  auto elements = constantLhsValue.getValues<FloatAttr>();
+  float LenN = elements[0].getValueAsDouble();
+  GetLen = (uint64_t)LenN;
+
+  // filter-order even not supported -- so making it odd
+  if (GetLen % 2 == 0) {
+    // GetLen = GetLen + 1;
+    llvm::errs() << "N for lowPassFilter must be odd but is " << GetLen << "\n";
+    return mlir::failure();
+  }
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// HighPassFIRHammingOptimizedOp
+//===----------------------------------------------------------------------===//
+
+void HighPassFIRHammingOptimizedOp::build(mlir::OpBuilder &builder,
+                                          mlir::OperationState &state,
+                                          mlir::Value wc, mlir::Value n) {
+  
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({wc, n});
+  
+}
+
+void HighPassFIRHammingOptimizedOp::inferShapes() {
+  // for each rank
+  // Get the shape/size of input
+  // output size = input_size
+  //  auto inputType = llvm::dyn_cast<RankedTensorType>(getN().getType());
+
+  // auto shapeOfInput = inputType.getShape();
+
+  std::vector<int64_t> shapeForOutput;
+
+  uint64_t GetLen = 1;
+
+  // To extract value from the SSA value:
+  // get the Operand
+  // convert it to ConstantOp
+  // convert it to corresponding elements attribute
+  // extract the value as float then convert to int
+  
+  Value inputLen = getOperand(1);
+  dsp::ConstantOp constantOp1stArg = inputLen.getDefiningOp<dsp::ConstantOp>();
+  
+  DenseElementsAttr constantLhsValue = constantOp1stArg.getValue();
+  auto elements = constantLhsValue.getValues<FloatAttr>();
+  float LenN = elements[0].getValueAsDouble();
+  GetLen = (uint64_t)LenN;
+
+  // int64_t N = tensorType.getShape()[0];
+
+  shapeForOutput.push_back(GetLen);
+  mlir::TensorType outputType = mlir::RankedTensorType::get(
+      shapeForOutput, getWc().getType().getElementType());
+
+  getResult().setType(outputType);
+}
+
+mlir::LogicalResult HighPassFIRHammingOptimizedOp::verify() {
+  uint64_t GetLen = 1;
+
+  // To extract value from the SSA value:
+  // get the Operand
+  // convert it to ConstantOp
+  // convert it to corresponding elements attribute
+  // extract the value as float then convert to int
+  
+  Value inputLen = getOperand(1);
+  dsp::ConstantOp constantOp1stArg = inputLen.getDefiningOp<dsp::ConstantOp>();
+  
+  DenseElementsAttr constantLhsValue = constantOp1stArg.getValue();
+  auto elements = constantLhsValue.getValues<FloatAttr>();
+  float LenN = elements[0].getValueAsDouble();
+  GetLen = (uint64_t)LenN;
+
+  // filter-order even not supported -- so making it odd
+  if (GetLen % 2 == 0) {
+    // GetLen = GetLen + 1;
+    llvm::errs() << "N for lowPassFilter must be odd but is " << GetLen << "\n";
+    return mlir::failure();
+  }
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// ThresholdOp
+//===----------------------------------------------------------------------===//
+
+void ThresholdOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                        mlir::Value input, mlir::Value threshld) {
+  
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({input, threshld});
+  
+}
+
+void ThresholdOp::inferShapes() {
+  
+  auto tensorInput = getInput().getType();
+  getResult().setType(tensorInput);
+  
+}
+
+mlir::LogicalResult ThresholdOp::verify() {
+  // To extract value from the SSA value:
+  // get the Operand
+  // convert it to ConstantOp
+  // convert it to corresponding elements attribute
+  // extract the value as float then convert to int
+  
+  Value threshold = getOperand(1);
+  dsp::ConstantOp constantOp1stArg = threshold.getDefiningOp<dsp::ConstantOp>();
+  
+  DenseElementsAttr constantLhsValue = constantOp1stArg.getValue();
+  auto elements = constantLhsValue.getValues<FloatAttr>();
+  float GetThresholdVal = elements[0].getValueAsDouble();
+
+  // filter-order even not supported -- so making it odd
+  if (GetThresholdVal <= 0) {
+    // GetThresholdVal = GetThresholdVal + 1;
+    llvm::errs() << "threshold value must be >= 0 but got: " << GetThresholdVal
+                 << "\n";
+    return mlir::failure();
+  }
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// QuantizationOp
+//===----------------------------------------------------------------------===//
+
+void QuantizationOp::build(mlir::OpBuilder &builder,
+                           mlir::OperationState &state, mlir::Value input,
+                           mlir::Value nLevels, mlir::Value max,
+                           mlir::Value min) {
+  
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({input, nLevels, max, min});
+  
+}
+
+void QuantizationOp::inferShapes() {
+  
+  auto tensorInput = getInput().getType();
+  getResult().setType(tensorInput);
+  
+}
+
+mlir::LogicalResult QuantizationOp::verify() {
+  // To extract value from the SSA value:
+  // get the Operand
+  // convert it to ConstantOp
+  // convert it to corresponding elements attribute
+  // extract the value as float then convert to int
+  // 
+  // check max > min && NoOfLevels = powerOf2
+
+  Value maxOperand = getOperand(2);
+  dsp::ConstantOp constantOp1stArg =
+      maxOperand.getDefiningOp<dsp::ConstantOp>();
+  
+  DenseElementsAttr constantLhsValue = constantOp1stArg.getValue();
+  auto elements = constantLhsValue.getValues<FloatAttr>();
+  float getMax = elements[0].getValueAsDouble();
+
+  Value minOperand = getOperand(3);
+  constantOp1stArg = minOperand.getDefiningOp<dsp::ConstantOp>();
+
+  if (!constantOp1stArg) {
+    llvm::errs()
+        << "QuantizationOp: unable to get Constant for minOp -- 4th opernad "
+        << "\n";
+    return mlir::failure();
+  }
+  
+  constantLhsValue = constantOp1stArg.getValue();
+  elements = constantLhsValue.getValues<FloatAttr>();
+  float getMin = elements[0].getValueAsDouble();
+
+  if (getMax < getMin) {
+    llvm::errs() << "QuantizatnOp : Max < Min --" << " Max: " << getMax;
+    llvm::errs() << " Min: " << getMin;
+    return mlir::failure();
+  }
+
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// LMSFilterResponseOp
+//===----------------------------------------------------------------------===//
+
+void LMSFilterResponseOp::build(mlir::OpBuilder &builder,
+                                mlir::OperationState &state, mlir::Value lhs,
+                                mlir::Value rhs, mlir::Value mu,
+                                mlir::Value filterLen) {
+
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands({lhs, rhs, mu, filterLen});
+}
+
+void LMSFilterResponseOp::inferShapes() {
+  getResult().setType(getLhs().getType());
+}
+
+mlir::LogicalResult LMSFilterResponseOp::verify() {
+  
+  // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand(0).getType());
+  // auto filterType =
+  // llvm::dyn_cast<RankedTensorType>(getOperand(1).getType());
+  // // auto resultType = llvm::dyn_cast<RankedTensorType>(getType());
+
+  // auto inputRank = inputType.getRank();
+  // auto filterRank = filterType.getRank();
+
+  // if( inputRank != 1 || filterRank != 1)
+  // {
+  //   return emitError()
+  //          << "expected rank of input & filter is 1";
+  // }
+
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// RunLenEncodingOp
+//===----------------------------------------------------------------------===//
+
+void RunLenEncodingOp::build(mlir::OpBuilder &builder,
+                             mlir::OperationState &state, mlir::Value input) {
+  
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({input});
+  
+}
+
+void RunLenEncodingOp::inferShapes() {
+  
+  auto tensorInput = getInput().getType();
+  auto shapeOfInput = tensorInput.getShape();
+
+  // auto tensorUpsampling = getRhs().getType();
+  // auto shapeOfUpsampling = tensorUpsampling.getShape(); //shape is the length
+  // Assume rank is 1 , then get the shape of output
+  // shapeOfInput
+
+  std::vector<int64_t> shapeForOutput;
+
+  int64_t LengthOfInput = shapeOfInput[0];
+  int64_t lenOfOutput = 2 * LengthOfInput;
+  shapeForOutput.push_back(lenOfOutput);
+
+  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(
+      shapeForOutput, getInput().getType().getElementType());
+
+  getResult().setType(manipulatedType);
+  
+}
+
+mlir::LogicalResult RunLenEncodingOp::verify() {
+  // To extract value from the SSA value:
+  // get the Operand
+  // convert it to ConstantOp
+  // convert it to corresponding elements attribute
+  // extract the value as float then convert to int
+  // 
+
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// FIRFilterResSymmOptimizedOp
+//===----------------------------------------------------------------------===//
+
+void FIRFilterResSymmOptimizedOp::build(mlir::OpBuilder &builder,
+                                        mlir::OperationState &state,
+                                        mlir::Value lhs, mlir::Value rhs) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands({lhs, rhs});
+}
+
+/// Infer the output shape of the FIRFilterResSymmOptimizedOp, this is required
+/// by the shape inference interface.
+// ToDo -- shape should be the length of Lhs + Rhs - 1
+void FIRFilterResSymmOptimizedOp::inferShapes() {
+  // get the shape of Lhs & rhs
+  // add the shape for each dimension
+  //  auto tensorInput =  llvm::cast<RankedTensorType>(getLhs().getType());
+  auto tensorInput = getLhs().getType();
+  auto shapeOfInput = tensorInput.getShape();
+
+  auto tensorFilter = getRhs().getType();
+  auto shapeOfFilter = tensorFilter.getShape();
+  std::vector<int64_t> shapeForOutput;
+
+  for (size_t i = 0; i < shapeOfInput.size(); i++) {
+    shapeForOutput.push_back(shapeOfInput[i] + shapeOfFilter[i] - 1);
+  }
+
+  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(
+      shapeForOutput, getLhs().getType().getElementType());
+
+  // getResult().setType(getLhs().getType());
+  getResult().setType(manipulatedType);
+}
+
+// get rank of Input & Filter -- make sure it is of rank 1
+mlir::LogicalResult FIRFilterResSymmOptimizedOp::verify() {
+  // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand(0).getType());
+  // auto filterType =
+  // llvm::dyn_cast<RankedTensorType>(getOperand(1).getType());
+  // // auto resultType = llvm::dyn_cast<RankedTensorType>(getType());
+
+  // auto inputRank = inputType.getRank();
+  // auto filterRank = filterType.getRank();
+
+  // if( inputRank != 1 || filterRank != 1)
+  // {
+  //   return emitError()
+  //          << "expected rank of input & filter is 1";
+  // }
+
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// LengthOp
+//===----------------------------------------------------------------------===//
+
+void LengthOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                     mlir::Value input) {
+  
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({input});
+  
+}
+
+void LengthOp::inferShapes() {
+  // auto tensorInput =  getInput().getType();
+  // auto shapeOfInput = tensorInput.getShape();
+  std::vector<int64_t> shapeForOutput;
+  
+  shapeForOutput.push_back(1);
+
+  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(
+      shapeForOutput, getInput().getType().getElementType());
+  getResult().setType(manipulatedType);
+  
+}
+
+mlir::LogicalResult LengthOp::verify() {
+  // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand().getType());
+  // auto resultType = llvm::dyn_cast<RankedTensorType>(getType());
+  // if (!inputType || !resultType)
+  //   return mlir::success();
+
+  // auto inputShape = inputType.getShape();
+  // if (!std::equal(inputShape.begin(), inputShape.end(),
+  //                 resultType.getShape().rbegin())) {
+  //   return emitError()
+  //          << "expected result shape to be a transpose of the input";
+  // }
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// ReverseInputOp
+//===----------------------------------------------------------------------===//
+
+void ReverseInputOp::build(mlir::OpBuilder &builder,
+                           mlir::OperationState &state, mlir::Value input) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands(input);
+}
+
+void ReverseInputOp::inferShapes() {
+  auto tensorInput = getInput().getType();
+  // mlir::TensorType manipulatedType =
+  // mlir::RankedTensorType::get(shapeForOutput,
+  // getInput().getType().getElementType());
+  getResult().setType(tensorInput);
+}
+
+mlir::LogicalResult ReverseInputOp::verify() {
+  // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand().getType());
+  // auto resultType = llvm::dyn_cast<RankedTensorType>(getType());
+  // if (!inputType || !resultType)
+  //   return mlir::success();
+
+  // auto inputShape = inputType.getShape();
+  // if (!std::equal(inputShape.begin(), inputShape.end(),
+  //                 resultType.getShape().rbegin())) {
+  //   return emitError()
+  //          << "expected result shape to be a transpose of the input";
+  // }
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// PaddingOp
+//===----------------------------------------------------------------------===//
+
+void PaddingOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                      mlir::Value input, mlir::Value PadValue,
+                      mlir::Value PadLen) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands({input, PadValue, PadLen});
+}
+
+/// Infer the output shape of the PaddingOp, this is required by the shape
+/// inference interface.
+// ToDo -- shape should be the length of input * UpsamplingRate ie, Rhs
+void PaddingOp::inferShapes() {
+  // get the shape of Lhs & rhs
+  // add the shape for each dimension
+  //  auto tensorInput =  llvm::cast<RankedTensorType>(getLhs().getType());
+  auto tensorInput = getInput().getType();
+  auto shapeOfInput = tensorInput.getShape();
+
+  // auto tensorUpsampling = getRhs().getType();
+  // auto shapeOfUpsampling = tensorUpsampling.getShape(); //shape is the length
+
+  std::vector<int64_t> shapeForOutput;
+
+  int64_t SecondValueInt = 1;
+
+  // To extract value from the SSA value:
+  // get the Operand
+  // convert it to ConstantOp
+  // convert it to corresponding elements attribute
+  // extract the value as float then convert to int
+  
+  Value padding3rdArg = getOperand(2);
+  dsp::ConstantOp constantOp2ndArg =
+      padding3rdArg.getDefiningOp<dsp::ConstantOp>();
+  
+  DenseElementsAttr constantRhsValue = constantOp2ndArg.getValue();
+  ;
+  auto elements = constantRhsValue.getValues<FloatAttr>();
+  float SecondValue = elements[0].getValueAsDouble();
+  SecondValueInt = (int64_t)SecondValue;
+  // llvm::errs() << "Upsampling: SamplingRate: " << SecondValueInt << " \n";
+  // //downsamplingRate
+
+  
+  for (size_t i = 0; i < shapeOfInput.size(); i++) {
+    double GetLenForOutput =
+        static_cast<double>(shapeOfInput[i]) + SecondValueInt;
+    int64_t OutlenInt = static_cast<int64_t>(GetLenForOutput);
+    shapeForOutput.push_back(OutlenInt);
+  }
+
+  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(
+      shapeForOutput, getInput().getType().getElementType());
+
+  // getResult().setType(getLhs().getType());
+  getResult().setType(manipulatedType);
+}
+
+// get rank of Input & Upsampling -- make sure it is of rank 1
+mlir::LogicalResult PaddingOp::verify() {
+  // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand(0).getType());
+  // auto samplingRateType =
+  // llvm::dyn_cast<RankedTensorType>(getOperand(1).getType());
+  // // auto resultType = llvm::dyn_cast<RankedTensorType>(getType());
+
+  // auto inputRank = inputType.getRank();
+  // auto samplingRateRank = samplingRateType.getRank();
+
+  // // llvm::errs() << "inputRank: " << inputRank << " samplingRateRank: " <<
+  // samplingRateRank << "\n";
+  // //once ensured only 1 rank from above -- also make sure there is just 1
+  // elem if( inputRank != 1 || samplingRateRank != 0 )
+  // {
+  //   llvm::errs() << "inputRank: " << inputRank << " samplingRateRank: " <<
+  //   samplingRateRank << "\n"; return emitError()
+  //          << "expected rank of input is 1 & Upsampling is 0";
+  // }
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// FIRFilterYSymmOptimizedOp
+//===----------------------------------------------------------------------===//
+
+void FIRFilterYSymmOptimizedOp::build(mlir::OpBuilder &builder,
+                                      mlir::OperationState &state,
+                                      mlir::Value lhs, mlir::Value rhs) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands({lhs, rhs});
+}
+
+/// Infer the output shape of the FIRFilterYSymmOptimizedOp, this is required by
+/// the shape inference interface.
+// ToDo -- shape should be the length of Lhs + Rhs - 1
+void FIRFilterYSymmOptimizedOp::inferShapes() {
+  // get the shape of Lhs & rhs
+  // add the shape for each dimension
+  //  auto tensorInput =  llvm::cast<RankedTensorType>(getLhs().getType());
+  auto tensorInput = getLhs().getType();
+  auto shapeOfInput = tensorInput.getShape();
+
+  auto tensorFilter = getRhs().getType();
+  auto shapeOfFilter = tensorFilter.getShape();
+  std::vector<int64_t> shapeForOutput;
+
+  for (size_t i = 0; i < shapeOfInput.size(); i++) {
+    shapeForOutput.push_back(shapeOfInput[i] + shapeOfFilter[i] - 1);
+  }
+
+  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(
+      shapeForOutput, getLhs().getType().getElementType());
+
+  // getResult().setType(getLhs().getType());
+  getResult().setType(manipulatedType);
+}
+
+// get rank of Input & Filter -- make sure it is of rank 1
+mlir::LogicalResult FIRFilterYSymmOptimizedOp::verify() {
+  // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand(0).getType());
+  // auto filterType =
+  // llvm::dyn_cast<RankedTensorType>(getOperand(1).getType());
+  // // auto resultType = llvm::dyn_cast<RankedTensorType>(getType());
+
+  // auto inputRank = inputType.getRank();
+  // auto filterRank = filterType.getRank();
+
+  // if( inputRank != 1 || filterRank != 1)
+  // {
+  //   return emitError()
+  //          << "expected rank of input & filter is 1";
+  // }
+
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// FFT1DRealSymmOp
+//===----------------------------------------------------------------------===//
+
+void FFT1DRealSymmOp::build(mlir::OpBuilder &builder,
+                            mlir::OperationState &state, mlir::Value value) {
+  
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands(value);
+  
+}
+
+void FFT1DRealSymmOp::inferShapes() {
+  // for each rank
+  // Get the shape/size of input
+  // output size = input_size
+  auto tensorInput = getInput().getType();
+  // getResult().setType(tensorInput);
+  getResult().setType(tensorInput);
+  // getResult(2).setType(tensorInput);
+}
+
+mlir::LogicalResult FFT1DRealSymmOp::verify() {
+  
+  // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand().getType());
+  // auto inputRank = inputType.getRank();
+
+  // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " <<
+  // alphaValueRank << "\n";
+  // //once ensured only 1 rank from above --
+  // if( inputRank != 1 )
+  // {
+  //   llvm::errs() << "inputRank: " << inputRank <<  "\n";
+  //   return emitError()
+  //          << "expected rank of input  is 1";
+  // }
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// FFT1DImgConjSymmOp
+//===----------------------------------------------------------------------===//
+
+void FFT1DImgConjSymmOp::build(mlir::OpBuilder &builder,
+                               mlir::OperationState &state, mlir::Value value) {
+  
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands(value);
+  
+}
+
+void FFT1DImgConjSymmOp::inferShapes() {
+  // for each rank
+  // Get the shape/size of input
+  // output size = input_size
+  auto tensorInput = getInput().getType();
+  // getResult().setType(tensorInput);
+  getResult().setType(tensorInput);
+  // getResult(2).setType(tensorInput);
+}
+
+mlir::LogicalResult FFT1DImgConjSymmOp::verify() {
+  
+  // auto inputType = llvm::dyn_cast<RankedTensorType>(getOperand().getType());
+  // auto inputRank = inputType.getRank();
+
+  // // llvm::errs() << "inputRank: " << inputRank << " alphaValueRank: " <<
+  // alphaValueRank << "\n";
+  // //once ensured only 1 rank from above --
+  // if( inputRank != 1 )
+  // {
+  //   llvm::errs() << "inputRank: " << inputRank <<  "\n";
+  //   return emitError()
+  //          << "expected rank of input  is 1";
+  // }
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// ShiftRightOp
+//===----------------------------------------------------------------------===//
+
+void ShiftRightOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                         mlir::Value lhs, mlir::Value rhs) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands({lhs, rhs});
+}
+
+// mlir::ParseResult SubOp::parse(mlir::OpAsmParser &parser,
+//                                mlir::OperationState &result) {
+//   return parseBinaryOp(parser, result);
+// }
+
+// void SubOp::print(mlir::OpAsmPrinter &p) { printBinaryOp(p, *this); }
+
+// Infer the output shape of the ShiftRightOp, this is required by the shape
+// inference. interface.
+void ShiftRightOp::inferShapes() { getResult().setType(getLhs().getType()); }
+
+//===----------------------------------------------------------------------===//
+// Conv2DOp
+//===----------------------------------------------------------------------===//
+
+void Conv2DOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                     mlir::Value input, mlir::Value weight, mlir::Value bias) {
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({input, weight, bias});
+}
+void Conv2DOp::inferShapes() {
+  auto inputType = llvm::dyn_cast<RankedTensorType>(getInput().getType());
+  auto kernelType = llvm::dyn_cast<RankedTensorType>(getKernel().getType());
+
+  int64_t IH = inputType.getShape()[0];
+  int64_t IW = inputType.getShape()[1];
+  int64_t KH = kernelType.getShape()[0];
+  int64_t KW = kernelType.getShape()[1];
+  int64_t OH = IH - KH + 1, OW = IW - KW + 1;
+
+  SmallVector<int64_t, 2> dims = {OH, OW};
+  getResult().setType(RankedTensorType::get(dims, inputType.getElementType()));
+}
+
+mlir::LogicalResult Conv2DOp::verify() {
+
+  auto inputType = llvm::dyn_cast<RankedTensorType>(getInput().getType());
+  auto kernelType = llvm::dyn_cast<RankedTensorType>(getKernel().getType());
+  auto biasType = llvm::dyn_cast<RankedTensorType>(getBias().getType());
+
+  if (!inputType) {
+    llvm::errs() << "expect a ranked tensor for input, get " << getInput();
+    return mlir::failure();
+  }
+  if (!kernelType) {
+    llvm::errs() << "expect a ranked tensor for kernel, get " << getKernel();
+    return mlir::failure();
+  }
+  if (!biasType) {
+    llvm::errs() << "expect a one dimensional ranked tensor for bias, get "
+                 << getBias();
+    return mlir::failure();
+  }
+
+  auto inputRank = inputType.getRank();
+  auto kernelRank = kernelType.getRank();
+
+  if (inputRank != 2) {
+    llvm::errs() << "expect 2 dimensional input, format N IH IW IC, get "
+                 << inputRank;
+    return mlir::failure();
+  }
+  if (kernelRank != 2) {
+    llvm::errs() << "expect 2 dimensional kernel, format OC KH KW IC.";
+    return mlir::failure();
+  }
+
+  if (inputType.getShape()[0] < kernelType.getShape()[0]) {
+    llvm::errs() << "input shape < kernel shape at 1st dimension";
+    return mlir::failure();
+  }
+
+  if (inputType.getShape()[1] < kernelType.getShape()[1]) {
+    llvm::errs() << "input shape < kernel shape at 2nd dimension";
+    return mlir::failure();
+  }
+
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// ThresholdUpOp
+//===----------------------------------------------------------------------===//
+
+mlir::LogicalResult ThresholdUpOp::verify() {
+  int64_t returnOriginal = 5;
+  Value returnoriginal = getOperand(2);
+  dsp::ConstantOp constantOp1stArg =
+      returnoriginal.getDefiningOp<dsp::ConstantOp>();
+  DenseElementsAttr constantLhsValue = constantOp1stArg.getValue();
+  auto elements = constantLhsValue.getValues<FloatAttr>();
+  float LenN = elements[0].getValueAsDouble();
+  returnOriginal = (int64_t)LenN;
+
+  // filter-order even not supported -- so making it odd
+  if (returnOriginal != 0 && returnOriginal != 1) {
+    return mlir::failure();
+  }
+  return mlir::success();
+}
+
+void ThresholdUpOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                          mlir::Value input, mlir::Value threshold,
+                          mlir::Value returnoriginal) {
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({input, threshold, returnoriginal});
+}
+void ThresholdUpOp::inferShapes() { getResult().setType(getInput().getType()); }
+
+//===----------------------------------------------------------------------===//
+// GenerateDTMFOp
+//===----------------------------------------------------------------------===//
+
+mlir::LogicalResult GenerateDTMFOp::verify() {
+  auto digitType = llvm::dyn_cast<RankedTensorType>(getDigit().getType());
+  auto durationType = llvm::dyn_cast<RankedTensorType>(getDuration().getType());
+  auto fsType = llvm::dyn_cast<RankedTensorType>(getFs().getType());
+
+  if (!digitType) {
+    return emitError() << "Digit must be a ranked tensor";
+    return mlir::failure();
+  }
+  if (!durationType) {
+    return emitError() << "Duration must be a ranked tensor";
+    return mlir::failure();
+  }
+  if (!fsType) {
+    return emitError() << "Frequency must be a ranked tensor";
+    return mlir::failure();
+  }
+
+  auto digitNoOfElements = digitType.getNumElements();
+  auto durationNoOfElements = durationType.getNumElements();
+  auto fsNoOfElements = fsType.getNumElements();
+
+  if (digitNoOfElements != 1) {
+    return emitError() << "Digit must contain exactly one element";
+    return mlir::failure();
+  }
+  if (durationNoOfElements != 1) {
+    return emitError() << "Duration must contain exactly one element";
+    return mlir::failure();
+  }
+  if (fsNoOfElements != 1) {
+    return emitError() << "Frequency must contain exactly one element";
+    return mlir::failure();
+  }
+
+  auto digit = getDigit();
+  auto digitConst = digit.getDefiningOp<dsp::ConstantOp>();
+  auto digitValue = digitConst.getValue();
+  auto digitFloat = digitValue.getValues<FloatAttr>();
+  auto dig = digitFloat[0].getValueAsDouble();
+
+  if (dig != 0 && dig != 1 && dig != 2 && dig != 3 && dig != 4 && dig != 5 &&
+      dig != 6 && dig != 7 && dig != 8 && dig != 9) {
+    return emitError() << "Digit can only take one of the following values: "
+                          "0,1,2,3,4,5,6,7,8,9";
+    return mlir::failure();
+  }
+
+  return mlir::success();
+}
+
+void GenerateDTMFOp::build(mlir::OpBuilder &builder,
+                           mlir::OperationState &state, mlir::Value digit,
+                           mlir::Value duration, mlir::Value fs) {
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({digit, duration, fs});
+}
+void GenerateDTMFOp::inferShapes() {
+  auto digitType = llvm::dyn_cast<RankedTensorType>(getDigit().getType());
+  auto durationType = llvm::dyn_cast<RankedTensorType>(getDuration().getType());
+  auto fsType = llvm::dyn_cast<RankedTensorType>(getFs().getType());
+  // auto digitElementType = digitType.getElementType();
+
+  auto duration = getDuration();
+  auto durationConst = duration.getDefiningOp<dsp::ConstantOp>();
+  auto durationValue = durationConst.getValue();
+  auto durationFloat = durationValue.getValues<FloatAttr>();
+  auto dur = durationFloat[0].getValueAsDouble();
+
+  auto fs = getFs();
+  auto fsConst = fs.getDefiningOp<dsp::ConstantOp>();
+  auto fsValue = fsConst.getValue();
+  auto fsFloat = fsValue.getValues<FloatAttr>();
+  auto freq = fsFloat[0].getValueAsDouble();
+
+  auto output = dur * freq;
+  auto outputShape = (int64_t)output;
+
+  getResult().setType(
+      RankedTensorType::get(outputShape, digitType.getElementType()));
+}
+
+//===----------------------------------------------------------------------===//
+// FFTFreqOp
+//===----------------------------------------------------------------------===//
+
+void FFTFreqOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                      mlir::Value length, mlir::Value distance) {
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({length, distance});
+}
+
+mlir::LogicalResult FFTFreqOp::verify() { return mlir::success(); }
+
+void FFTFreqOp::inferShapes() {
+  auto lengthType = llvm::dyn_cast<RankedTensorType>(getLength().getType());
+  auto length = getLength();
+  auto lengthConst = length.getDefiningOp<dsp::ConstantOp>();
+  auto lengthValue = lengthConst.getValue();
+  auto lengthFloat = lengthValue.getValues<FloatAttr>();
+  auto l = lengthFloat[0].getValueAsDouble();
+  auto outputShape = (int64_t)l;
+
+  getResult().setType(
+      RankedTensorType::get(outputShape, lengthType.getElementType()));
+}
+
+//===----------------------------------------------------------------------===//
+// FindDominantPeaksOp
+//===----------------------------------------------------------------------===//
+
+void FindDominantPeaksOp::build(mlir::OpBuilder &builder,
+                                mlir::OperationState &state,
+                                mlir::Value frequencies,
+                                mlir::Value magnitudes) {
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({frequencies, magnitudes});
+}
+
+void FindDominantPeaksOp::inferShapes() {
+  auto frequenciesType =
+      llvm::dyn_cast<RankedTensorType>(getFrequencies().getType());
+  SmallVector<int64_t, 1> resultShape{2};
+  auto resultType =
+      RankedTensorType::get(resultShape, frequenciesType.getElementType());
+  getResult().setType(resultType);
+}
+
+mlir::LogicalResult FindDominantPeaksOp::verify() {
+  auto frequenciesType =
+      llvm::dyn_cast<RankedTensorType>(getFrequencies().getType());
+  auto magnitudesType =
+      llvm::dyn_cast<RankedTensorType>(getMagnitudes().getType());
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// RecoverDTMFDigitOp
+//===----------------------------------------------------------------------===//
+
+void RecoverDTMFDigitOp::build(mlir::OpBuilder &builder,
+                               mlir::OperationState &state,
+                               mlir::Value frequencies, mlir::Value freqPairs) {
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({frequencies, freqPairs});
+}
+
+void RecoverDTMFDigitOp::inferShapes() {
+  auto frequenciesType =
+      llvm::dyn_cast<RankedTensorType>(getFrequencies().getType());
+  SmallVector<int64_t, 1> resultShape{1};
+  auto resultType =
+      RankedTensorType::get(resultShape, frequenciesType.getElementType());
+  getResult().setType(resultType);
+}
+
+mlir::LogicalResult RecoverDTMFDigitOp::verify() {
+  auto frequenciesType =
+      llvm::dyn_cast<RankedTensorType>(getFrequencies().getType());
+  auto freqPairsType =
+      llvm::dyn_cast<RankedTensorType>(getFreqPairs().getType());
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// FFTCombineOp
+//===----------------------------------------------------------------------===//
+
+void FFTCombineOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                         mlir::Value real, mlir::Value imag) {
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({real, imag});
+}
+
+mlir::LogicalResult FFTCombineOp::verify() {
+  auto realType = llvm::dyn_cast<RankedTensorType>(getReal().getType());
+  auto imagType = llvm::dyn_cast<RankedTensorType>(getImag().getType());
+
+  auto realNoOfElements = realType.getNumElements();
+  auto imagNoOfElements = imagType.getNumElements();
+
+  if (realNoOfElements != imagNoOfElements) {
+    return emitError()
+           << "Real and Imaginary parts should have same number of elements.\n";
+    return mlir::failure();
+  }
+  return mlir::success();
+}
+
+void FFTCombineOp::inferShapes() { getResult().setType(getReal().getType()); }
+
+//===----------------------------------------------------------------------===//
+// GenerateVoiceSignatureOp
+//===----------------------------------------------------------------------===//
+
+void GenerateVoiceSignatureOp::build(mlir::OpBuilder &builder,
+                                     mlir::OperationState &state,
+                                     mlir::Value f1, mlir::Value f2,
+                                     mlir::Value duration, mlir::Value fs) {
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({f1, f2, duration, fs});
+}
+
+mlir::LogicalResult GenerateVoiceSignatureOp::verify() {
+  auto f1Type = llvm::dyn_cast<RankedTensorType>(getF1().getType());
+  auto f2Type = llvm::dyn_cast<RankedTensorType>(getF2().getType());
+  auto durationType = llvm::dyn_cast<RankedTensorType>(getDuration().getType());
+  auto fsType = llvm::dyn_cast<RankedTensorType>(getFs().getType());
+
+  if (!f1Type) {
+    return emitError() << "f1 must be a ranked tensor";
+    return mlir::failure();
+  }
+  if (!f2Type) {
+    return emitError() << "f2 must be a ranked tensor";
+    return mlir::failure();
+  }
+  if (!durationType) {
+    return emitError() << "Duration must be a ranked tensor";
+    return mlir::failure();
+  }
+  if (!fsType) {
+    return emitError() << "Frequency must be a ranked tensor";
+    return mlir::failure();
+  }
+  auto f1NoOfElements = f1Type.getNumElements();
+  auto f2NoOfElements = f2Type.getNumElements();
+  auto durationNoOfElements = durationType.getNumElements();
+  auto fsNoOfElements = fsType.getNumElements();
+
+  if (f1NoOfElements != 1) {
+    return emitError() << "f1 must contain exactly one element";
+    return mlir::failure();
+  }
+  if (f2NoOfElements != 1) {
+    return emitError() << "f2 must contain exactly one element";
+    return mlir::failure();
+  }
+  if (durationNoOfElements != 1) {
+    return emitError() << "Duration must contain exactly one element";
+    return mlir::failure();
+  }
+  if (fsNoOfElements != 1) {
+    return emitError() << "Frequency must contain exactly one element";
+    return mlir::failure();
+  }
+  return mlir::success();
+}
+
+void GenerateVoiceSignatureOp::inferShapes() {
+  auto durationType = llvm::dyn_cast<RankedTensorType>(getDuration().getType());
+  auto fsType = llvm::dyn_cast<RankedTensorType>(getFs().getType());
+  // auto digitElementType = digitType.getElementType();
+
+  auto duration = getDuration();
+  auto durationConst = duration.getDefiningOp<dsp::ConstantOp>();
+  auto durationValue = durationConst.getValue();
+  auto durationFloat = durationValue.getValues<FloatAttr>();
+  auto dur = durationFloat[0].getValueAsDouble();
+
+  auto fs = getFs();
+  auto fsConst = fs.getDefiningOp<dsp::ConstantOp>();
+  auto fsValue = fsConst.getValue();
+  auto fsFloat = fsValue.getValues<FloatAttr>();
+  auto freq = fsFloat[0].getValueAsDouble();
+
+  auto output = dur * freq;
+  auto outputShape = (int64_t)output;
+
+  getResult().setType(
+      RankedTensorType::get(outputShape, fsType.getElementType()));
+}
+
+//===----------------------------------------------------------------------===//
+// SqrtOp
+//===----------------------------------------------------------------------===//
+
+void SqrtOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                   mlir::Value input) {
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({input});
+}
+
+mlir::LogicalResult SqrtOp::verify() {
+  auto inputType = llvm::dyn_cast<RankedTensorType>(getInput().getType());
+  return mlir::success();
+}
+
+void SqrtOp::inferShapes() { getResult().setType(getInput().getType()); }
+
+//===----------------------------------------------------------------------===//
+// QamDemodulateOp
+//===----------------------------------------------------------------------===//
+
+void QamDemodulateOp::build(mlir::OpBuilder &builder,
+                            mlir::OperationState &state, mlir::Value real,
+                            mlir::Value imagine) {
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({real, imagine});
+}
+
+void QamDemodulateOp::inferShapes() {
+  auto realType = llvm::dyn_cast<RankedTensorType>(getReal().getType());
+  auto realShape = realType.getShape();
+  SmallVector<long int, 2> outputShape(realShape);
+
+  for (size_t i = 0; i < realShape.size(); ++i) {
+    outputShape[i] = realShape[i] * 2;
+  }
+  getResult().setType(
+      RankedTensorType::get(outputShape, realType.getElementType()));
+}
+
+mlir::LogicalResult QamDemodulateOp::verify() {
+  auto realType = llvm::dyn_cast<RankedTensorType>(getReal().getType());
+  auto imagineType = llvm::dyn_cast<RankedTensorType>(getImagine().getType());
+
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// QamModulateRealOp
+//===----------------------------------------------------------------------===//
+
+void QamModulateRealOp::build(mlir::OpBuilder &builder,
+                              mlir::OperationState &state, mlir::Value signal) {
+  auto tensorType = UnrankedTensorType::get(builder.getF64Type());
+  state.addTypes({tensorType});
+
+  state.addOperands({signal});
+}
+void QamModulateRealOp::inferShapes() {
+  auto signalType = llvm::dyn_cast<RankedTensorType>(getSignal().getType());
+  auto signalShape = signalType.getShape();
+
+  SmallVector<long int, 8> outputShape(signalShape);
+  for (size_t i = 0; i < signalShape.size(); ++i) {
+    outputShape[i] = signalShape[i] / 2;
+  }
+
+  getResult().setType(
+      RankedTensorType::get(outputShape, signalType.getElementType()));
+}
+
+mlir::LogicalResult QamModulateRealOp::verify() {
+
+  // auto signalType = llvm::dyn_cast<RankedTensorType>(getSignal().getType());
+  //
+  // if(!signalType) {
+  // llvm::errs() << "expect a ranked tensor for signal input, get " <<
+  // getSignal(); return mlir::failure();
+  //}
+  //
+  // auto signalRank = signalType.getRank();
+  //
+  // if(signalRank != 1 ) {
+  // llvm::errs() << "expect 1 dimensional signal, get " << signalRank;
+  // return mlir::failure();
+  //}
+  //
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// QamModulateImgOp
+//===----------------------------------------------------------------------===//
+
+void QamModulateImgOp::build(mlir::OpBuilder &builder,
+                             mlir::OperationState &state, mlir::Value signal) {
+  auto tensorType = UnrankedTensorType::get(builder.getF64Type());
+  state.addTypes({tensorType});
+
+  state.addOperands({signal});
+}
+void QamModulateImgOp::inferShapes() {
+  auto signalType = llvm::dyn_cast<RankedTensorType>(getSignal().getType());
+  auto signalShape = signalType.getShape();
+
+  SmallVector<long int, 8> outputShape(signalShape);
+  for (size_t i = 0; i < signalShape.size(); ++i) {
+    outputShape[i] = signalShape[i] / 2;
+  }
+
+  getResult().setType(
+      RankedTensorType::get(outputShape, signalType.getElementType()));
+}
+
+mlir::LogicalResult QamModulateImgOp::verify() {
+
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// BeamFormOp
+//===----------------------------------------------------------------------===//
+
+void BeamFormOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                       int64_t antennas, int64_t freq, mlir::Value time,
+                       mlir::Value weights) {
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addAttribute("antennas", builder.getI64IntegerAttr(antennas));
+  state.addAttribute("freq", builder.getI64IntegerAttr(freq));
+  state.addOperands({time, weights});
+}
+
+void BeamFormOp::inferShapes() { getResult().setType(getTime().getType()); }
+
+mlir::LogicalResult BeamFormOp::verify() {
+    return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// SpaceModulateOp
+//===----------------------------------------------------------------------===//
+
+void SpaceModulateOp::build(mlir::OpBuilder &builder,
+                            mlir::OperationState &state, mlir::Value signals) {
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({signals});
+}
+
+void SpaceModulateOp::inferShapes() {
+  getResult().setType(getSignal().getType());
+}
+
+mlir::LogicalResult SpaceModulateOp::verify() { return mlir::success(); }
+
+//===----------------------------------------------------------------------===//
+// SpaceDemodulateOp
+//===----------------------------------------------------------------------===//
+
+void SpaceDemodulateOp::build(mlir::OpBuilder &builder,
+                              mlir::OperationState &state, mlir::Value binary) {
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({binary});
+}
+
+void SpaceDemodulateOp::inferShapes() {
+  getResult().setType(getBinary().getType());
+}
+
+mlir::LogicalResult SpaceDemodulateOp::verify() { return mlir::success(); }
+
+//===----------------------------------------------------------------------===//
+// SpaceDemodulateOp
+//===----------------------------------------------------------------------===//
+
+void SpaceErrCorrectionOp::build(mlir::OpBuilder &builder,
+                                 mlir::OperationState &state,
+                                 mlir::Value signal) {
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({signal});
+}
+
+void SpaceErrCorrectionOp::inferShapes() {
+  getResult().setType(getSignal().getType());
+}
+
+mlir::LogicalResult SpaceErrCorrectionOp::verify() { return mlir::success(); }
+
+//===----------------------------------------------------------------------===//
+// NormalizeOp
+//===----------------------------------------------------------------------===//
+
+void NormalizeOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                        mlir::Value signal) {
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({signal});
+}
+
+void NormalizeOp::inferShapes() { getResult().setType(getSignal().getType()); }
+
+//===----------------------------------------------------------------------===//
+// NormLMSFilterResponseOptimizeOp
+//===----------------------------------------------------------------------===//
+
+void NormLMSFilterResponseOptimizeOp::build(mlir::OpBuilder &builder,
+                                            mlir::OperationState &state,
+                                            mlir::Value lhs, mlir::Value rhs,
+                                            mlir::Value mu,
+                                            mlir::Value filterLen) {
+
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands({lhs, rhs, mu, filterLen});
+}
+
+void NormLMSFilterResponseOptimizeOp::inferShapes() {
+  getResult().setType(getLhs().getType());
+}
+
+mlir::LogicalResult NormLMSFilterResponseOptimizeOp::verify() {
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// FIRFilterResSymmThresholdUpOptimizedOp
+//===----------------------------------------------------------------------===//
+
+void FIRFilterResSymmThresholdUpOptimizedOp::build(
+    mlir::OpBuilder &builder, mlir::OperationState &state, mlir::Value lhs,
+    mlir::Value rhs, mlir::Value threshold, mlir::Value returnoriginal) {
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands({lhs, rhs, threshold, returnoriginal});
+}
+
+/// Infer the output shape of the FIRFilterResSymmThresholdUpOptimizedOp, this
+/// is required by the shape inference interface.
+// ToDo -- shape should be the length of Lhs + Rhs - 1
+void FIRFilterResSymmThresholdUpOptimizedOp::inferShapes() {
+  // get the shape of Lhs & rh@id:github.copilot-chats
+  // add the shape for each dimension
+  //  auto tensorInput =  llvm::cast<RankedTensorType>(getLhs().getType());
+  auto tensorInput = getLhs().getType();
+  auto shapeOfInput = tensorInput.getShape();
+
+  auto tensorFilter = getRhs().getType();
+  auto shapeOfFilter = tensorFilter.getShape();
+  std::vector<int64_t> shapeForOutput;
+
+  for (size_t i = 0; i < shapeOfInput.size(); i++) {
+    shapeForOutput.push_back(shapeOfInput[i] + shapeOfFilter[i] - 1);
+  }
+
+  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(
+      shapeForOutput, getLhs().getType().getElementType());
+
+  // getResult().setType(getLhs().getType());
+  getResult().setType(manipulatedType);
+}
+
+//===----------------------------------------------------------------------===//
+// FFTOp
+//===----------------------------------------------------------------------===//
+
+void FFTOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                  mlir::Value lhs) {
+  state.addTypes({lhs.getType(), lhs.getType()});
+  state.addOperands({lhs});
+}
+
+void FFTOp::inferShapes() {
+  getResult(0).setType(getLhs().getType());
+  getResult(1).setType(getLhs().getType());
+}
+
+//===----------------------------------------------------------------------===//
+// FFTAbsOp
+//===----------------------------------------------------------------------===//
+
+void FFTAbsOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                     mlir::Value input) {
+  state.addTypes(input.getType());
+  state.addOperands({input});
+}
+
+void FFTAbsOp::inferShapes() { getResult().setType(getInput().getType()); }
+
+//===----------------------------------------------------------------------===//
+// DFTAbsOp
+//===----------------------------------------------------------------------===//
+
+void DFTAbsOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                     mlir::Value input) {
+  state.addTypes(input.getType());
+  state.addOperands({input});
+}
+
+void DFTAbsOp::inferShapes() { getResult().setType(getInput().getType()); }
+
+//===----------------------------------------------------------------------===//
+// DFTAbsThresholdUpOp
+//===----------------------------------------------------------------------===//
+
+void DFTAbsThresholdUpOp::build(mlir::OpBuilder &builder,
+                                mlir::OperationState &state, mlir::Value input,
+                                mlir::Value threshold,
+                                mlir::Value returnoriginal) {
+  state.addTypes(input.getType());
+  state.addOperands({input, threshold, returnoriginal});
+}
+
+void DFTAbsThresholdUpOp::inferShapes() {
+  getResult().setType(getInput().getType());
+}
+
+mlir::LogicalResult DFTAbsThresholdUpOp::verify() {
+  int64_t returnOriginal = 5;
+  Value returnoriginal = getOperand(2);
+  dsp::ConstantOp constantOp1stArg =
+      returnoriginal.getDefiningOp<dsp::ConstantOp>();
+  DenseElementsAttr constantLhsValue = constantOp1stArg.getValue();
+  auto elements = constantLhsValue.getValues<FloatAttr>();
+  float LenN = elements[0].getValueAsDouble();
+  returnOriginal = (int64_t)LenN;
+
+  // filter-order even not supported -- so making it odd
+  if (returnOriginal != 0 && returnOriginal != 1) {
+    return mlir::failure();
+  }
+  return mlir::success();
+}
+
+
+//===----------------------------------------------------------------------===//
+ // CorrelateOp
+ //===----------------------------------------------------------------------===//
+
+ void CorrelateOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                   mlir::Value lhs, mlir::Value rhs) {
+    state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+    state.addOperands({lhs, rhs});
+ }
+
+ void CorrelateOp::inferShapes() {
+  auto tensorLhs = getLhs().getType();
+  auto shapeOfLhs = tensorLhs.getShape();
+
+  std::vector<int64_t> shapeForOutput;   
+  shapeForOutput.push_back(shapeOfLhs[0]*2-1);
+  
+  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(
+	  shapeForOutput, tensorLhs.getElementType());
+
+  getResult().setType(manipulatedType);
+}
+
+//===----------------------------------------------------------------------===//
+// SetSingleElemAtIdxOp
+//===----------------------------------------------------------------------===//
+
+void SetSingleElemAtIdxOp::build(mlir::OpBuilder &builder,
+                            mlir::OperationState &state, mlir::Value input,
+                            mlir::Value indx, mlir::Value val) {
+  state.addTypes({UnrankedTensorType::get(builder.getF64Type())});
+  state.addOperands({input, indx, val});
+}
+
+void SetSingleElemAtIdxOp::inferShapes() {
+  std::vector<int64_t> shapeForOutput;
+  
+  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(
+  shapeForOutput, getInput().getType().getElementType());
+  getResult().setType(manipulatedType);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Correl2MaxOptimizedOp
+//===----------------------------------------------------------------------===//
+
+ void Correl2MaxOptimizedOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
+                   mlir::Value lhs, mlir::Value rhs) {
+    state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+    state.addOperands({lhs, rhs});
+ }
+
+ void Correl2MaxOptimizedOp::inferShapes() {
+  auto tensorInput = getLhs().getType();
+  std::vector<int64_t> shapeForOutput;
+  
+  mlir::TensorType manipulatedType = mlir::RankedTensorType::get(
+      shapeForOutput, tensorInput.getElementType());
+
+  getResult().setType(manipulatedType);      
+}
+
+
+//===----------------------------------------------------------------------===//
+// LMSFilterResponse2GainOp
+//===----------------------------------------------------------------------===//
+                   
+
+void LMSFilterResponse2GainOp::build(mlir::OpBuilder &builder,
+                                mlir::OperationState &state, mlir::Value lhs,
+                                mlir::Value rhs, mlir::Value mu,
+                                mlir::Value filterLen, mlir::Value gain) {
+                                     
+  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addOperands({lhs, rhs, mu, filterLen, gain});
+}                                          
+                                                   
+void LMSFilterResponse2GainOp::inferShapes() {
+  getResult().setType(getLhs().getType());
+}                 
+
+
+//===----------------------------------------------------------------------===//
+// TableGen'd op method definitions
+//===----------------------------------------------------------------------===//
+                  
+#define GET_OP_CLASSES
+#include "mlir/Dialect/DSP/IR/DSP.cpp.inc"
diff --git a/mlir/lib/Dialect/DSP/IR/ToyCombine.cpp b/mlir/lib/Dialect/DSP/IR/ToyCombine.cpp
new file mode 100644
index 000000000000..977b3380b1ec
--- /dev/null
+++ b/mlir/lib/Dialect/DSP/IR/ToyCombine.cpp
@@ -0,0 +1,1608 @@
+//===- ToyCombine.cpp - Toy High Level Optimizer --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a set of simple combiners for optimizing operations in
+// the Toy dialect.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+
+#include "mlir/IR/MLIRContext.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/IR/Value.h"
+#include "mlir/Support/LogicalResult.h"
+//#include "toy/DebugConfig.h"
+//#include "toy/Dialect.h"
+#include "mlir/Dialect/DSP/IR/DSPDialect.h"
+#include <numeric>
+
+#include "mlir/Tools/mlir-opt/MlirOptMain.h"
+
+
+#include "llvm/Support/CommandLine.h"
+
+using namespace mlir;
+using namespace dsp;
+using namespace std;
+
+
+
+namespace {
+/// Include the patterns defined in the Declarative Rewrite framework.
+#include "ToyCombine.inc"
+} // namespace
+
+// Declare the function to get the option value
+//extern bool mlir::getEnableCanonicalOpt();
+						
+						
+
+/// This is an example of a c++ rewrite pattern for the TransposeOp. It
+/// optimizes the following scenario: transpose(transpose(x)) -> x
+struct SimplifyRedundantTranspose : public mlir::OpRewritePattern<TransposeOp> {
+  /// We register this pattern to match every dsp.transpose in the IR.
+  /// The "benefit" is used by the framework to order the patterns and process
+  /// them in order of profitability.
+  SimplifyRedundantTranspose(mlir::MLIRContext *context)
+      : OpRewritePattern<TransposeOp>(context, /*benefit=*/1) {}
+
+  /// This method attempts to match a pattern and rewrite it. The rewriter
+  /// argument is the orchestrator of the sequence of rewrites. The pattern is
+  /// expected to interact with it to perform any changes to the IR from here.
+  mlir::LogicalResult
+  matchAndRewrite(TransposeOp op,
+                  mlir::PatternRewriter &rewriter) const override {
+    // Look through the input of the current transpose.
+    mlir::Value transposeInput = op.getOperand();
+    TransposeOp transposeInputOp = transposeInput.getDefiningOp<TransposeOp>();
+
+    // Input defined by another transpose? If not, no match.
+    if (!transposeInputOp)
+      return failure();
+
+    // Otherwise, we have a redundant transpose. Use the rewriter.
+    rewriter.replaceOp(op, {transposeInputOp.getOperand()});
+    return success();
+  }
+};
+
+// Pseudo-Code
+// Find back to back gain operation
+//  result1 = upsampling(input1, rate1)
+//  result2 = downsampling(result1, rate2)
+// if rate1 == rate2 then result2 = input1
+// result2 will be now delay(input1, gain1 + gain2)
+// replaceOp
+struct SimplifyUpsamplingDownsampling
+    : public mlir::OpRewritePattern<DownsamplingOp> {
+  /// We register this pattern to match every dsp.downsampling in the IR.
+  /// The "benefit" is used by the framework to order the patterns and process
+  /// them in order of profitability.
+  SimplifyUpsamplingDownsampling(mlir::MLIRContext *context)
+      : OpRewritePattern<DownsamplingOp>(context, /*benefit=*/1) {}
+
+  /// This method attempts to match a pattern and rewrite it. The rewriter
+  /// argument is the orchestrator of the sequence of rewrites. The pattern is
+  /// expected to interact with it to perform any changes to the IR from here.
+  mlir::LogicalResult
+  matchAndRewrite(DownsamplingOp op,
+                  mlir::PatternRewriter &rewriter) const override {
+    // Look through the input of the current downsampling.
+    mlir::Value downsamplingOperand1_Rate = op.getOperand(1);
+    mlir::Value downsamplingOperand0_input = op.getOperand(0);
+    dsp::UpsamplingOp prev_UpSamplingOp =
+        downsamplingOperand0_input.getDefiningOp<UpsamplingOp>();
+
+    // Input defined by another downsampling? If not, no match.
+    if (!prev_UpSamplingOp)
+      return failure();
+
+    // Get operands for UpSamplingOp
+    mlir::Value UpsamplingOperand1_Rate = prev_UpSamplingOp.getOperand(1);
+    mlir::Value UpsamplingOperand0_input = prev_UpSamplingOp.getOperand(0);
+
+    // get constant value from the downsamplingOp -- operand1
+    dsp::ConstantOp constant_Op1_downsamplingOp =
+        downsamplingOperand1_Rate.getDefiningOp<dsp::ConstantOp>();
+    // DEBUG_PRINT_NO_ARGS();
+    DenseElementsAttr DenseValueFrmDownsampling =
+        constant_Op1_downsamplingOp.getValue();
+    // DEBUG_PRINT_NO_ARGS();
+    auto elements = DenseValueFrmDownsampling.getValues<FloatAttr>();
+    float FirstValue = elements[0].getValueAsDouble();
+    int64_t DownsamplingRate = (int64_t)FirstValue;
+
+    // Get constant value from upsampling: -- operand1
+    dsp::ConstantOp constant_Op1_upSamplingOp =
+        UpsamplingOperand1_Rate.getDefiningOp<dsp::ConstantOp>();
+    // DEBUG_PRINT_NO_ARGS();
+    DenseElementsAttr DenseValueFrmUpsampling =
+        constant_Op1_upSamplingOp.getValue();
+    // DEBUG_PRINT_NO_ARGS();
+    elements = DenseValueFrmUpsampling.getValues<FloatAttr>();
+    FirstValue = elements[0].getValueAsDouble();
+    int64_t UpsamplingRate = (int64_t)FirstValue;
+
+    llvm::errs() << "DownsamplingRate = " << DownsamplingRate
+                 << " UpsamplingRate" << UpsamplingRate << "\n";
+    if (DownsamplingRate == UpsamplingRate) {
+      // Otherwise, we have a redundant downsampling. Use the rewriter.
+      // rewriter.replaceOp(op, {downsamplingInputOp.getOperand()});
+      // //downsamplingOperand0_input
+      llvm::errs() << "Going for Downsampling pass\n";
+      rewriter.replaceOp(op, UpsamplingOperand0_input);
+      return success();
+
+    } else if (UpsamplingRate > DownsamplingRate) {
+      // check if UpSamplingRate is a multiple of DownsamplingRate
+      // if yes, final result should be UpSampling with SamplingRate as division
+      if (UpsamplingRate % DownsamplingRate != 0) {
+        return failure();
+      }
+
+      //
+      if (DownsamplingRate == 0) {
+        llvm::errs() << "DownSamplingRate= 0 Not allowed" << "\n";
+        return failure();
+      }
+      double finalUpSamplingRate = (double)UpsamplingRate / DownsamplingRate;
+
+      auto constOp_finalSamplingRate =
+          rewriter.create<ConstantOp>(op.getLoc(), finalUpSamplingRate);
+
+      auto finalUpSamplingOp = rewriter.create<UpsamplingOp>(
+          op.getLoc(), UpsamplingOperand0_input, constOp_finalSamplingRate);
+
+      llvm::errs() << "Going for Downsampling pass\n";
+      rewriter.replaceOp(op, finalUpSamplingOp);
+    }
+    return failure();
+  }
+};
+
+// Pseudo-Code
+// Find back to back gain operation
+//  result1 = gain(input1, gain1)
+//  result2 = gain(result1, gain2)
+// if result1 is coming from another delay operation
+// result2 will be now delay(input1, gain1 + gain2)
+// replaceOp
+struct SimplifyBack2BackGain : public mlir::OpRewritePattern<GainOp> {
+  //
+  SimplifyBack2BackGain(mlir::MLIRContext *context)
+      : OpRewritePattern<GainOp>(context, 1) {}
+
+  mlir::LogicalResult
+  matchAndRewrite(GainOp op, mlir::PatternRewriter &rewriter) const override {
+
+    //
+    mlir::Value gainOp_operand0 = op.getOperand(0);
+
+    // check if this is coming from another gain operation
+    GainOp prev_gainOp = gainOp_operand0.getDefiningOp<GainOp>();
+
+    if (!prev_gainOp)
+      return failure();
+
+    mlir::Value gainOp_operand1 = op.getOperand(1);
+    mlir::Value prev_gainOp_operand0 = prev_gainOp.getOperand(0);
+    mlir::Value prev_gainOp_operand1 = prev_gainOp.getOperand(1);
+
+    // create add op
+    auto addOp = rewriter.create<MulOp>(op.getLoc(), prev_gainOp_operand1,
+                                        gainOp_operand1);
+    auto newGainOp = rewriter.create<GainOp>(op.getLoc(), prev_gainOp_operand0,
+                                             addOp.getResult());
+
+    // Repalce the use of original gain operation with this newGainOp
+    rewriter.replaceOp(op, newGainOp.getResult());
+    return mlir::success();
+  }
+};
+
+// Pseudo-Code
+//  Mean of diff is equal to (input[-1] - input[0])/len(input).
+//  For example, for array (a, b, c, d, e)
+//  diff(array) = (b-a, c-b, d-c, e-d)
+//  mean(diff(array)) = ((b-a) + (c-b) + (d-c) + (e-d))/4 = (e-a)/4
+//  result1 = diff(input1, diff_length) //NOTE: len(result1) == diff_length-1
+//  virtually (tensor size is fixed as len(input)-1). result2 = mean(result1,
+//  mean_length)
+// if mean_length <= (diff_length-1),
+// result2 will be now (input1[mean_length] - input[0])/mean_length
+// replaceOp
+struct SimplifyDiff2Mean : public mlir::OpRewritePattern<MeanOp> {
+  //
+  SimplifyDiff2Mean(mlir::MLIRContext *context)
+      : OpRewritePattern<MeanOp>(context, 1) {}
+
+  mlir::LogicalResult
+  matchAndRewrite(MeanOp op, mlir::PatternRewriter &rewriter) const override {
+
+    //
+    mlir::Value meanOp_operand0 = op.getOperand(0);
+
+    // check if this is coming from diff operation.
+    DiffOp prev_diffOp = meanOp_operand0.getDefiningOp<DiffOp>();
+
+    if (!prev_diffOp)
+      return failure();
+
+    mlir::Value meanOp_operand1 = op.getOperand(1);
+    mlir::Value prev_diffOp_operand0 = prev_diffOp.getOperand(0);
+
+    auto optimizedOp = rewriter.create<dsp::Diff2MeanOptimizedOp>(
+        op.getLoc(), prev_diffOp_operand0, meanOp_operand1);
+
+    // Repalce the use of original diff operation with this operation
+    rewriter.replaceOp(op, optimizedOp.getResult());
+    return mlir::success();
+  }
+};
+
+struct SimplifyLMS2FindPeaks : public mlir::OpRewritePattern<FindPeaksOp> {
+  //
+  SimplifyLMS2FindPeaks(mlir::MLIRContext *context)
+      : OpRewritePattern<FindPeaksOp>(context, 1) {}
+
+  mlir::LogicalResult
+  matchAndRewrite(FindPeaksOp op,
+                  mlir::PatternRewriter &rewriter) const override {
+    //
+    mlir::Value findPeaksOp_operand0 = op.getOperand(0);
+
+    // check if this is coming from diff operation.
+    LMSFilterResponseOp prev_lmsFilterResponseOp =
+        findPeaksOp_operand0.getDefiningOp<LMSFilterResponseOp>();
+
+    if (!prev_lmsFilterResponseOp)
+      return failure();
+
+    mlir::Value findPeaksOp_operand1 = op.getOperand(1);
+    mlir::Value findPeaksOp_operand2 = op.getOperand(2);
+    mlir::Value prev_lmsFilterResponseOp_operand0 =
+        prev_lmsFilterResponseOp.getOperand(0);
+    mlir::Value prev_lmsFilterResponseOp_operand1 =
+        prev_lmsFilterResponseOp.getOperand(1);
+    mlir::Value prev_lmsFilterResponseOp_operand2 =
+        prev_lmsFilterResponseOp.getOperand(2);
+    mlir::Value prev_lmsFilterResponseOp_operand3 =
+        prev_lmsFilterResponseOp.getOperand(3);
+
+    auto optimizedOp = rewriter.create<dsp::LMS2FindPeaksOptimizedOp>(
+        op.getLoc(), prev_lmsFilterResponseOp_operand0,
+        prev_lmsFilterResponseOp_operand1, prev_lmsFilterResponseOp_operand2,
+        prev_lmsFilterResponseOp_operand3, findPeaksOp_operand1,
+        findPeaksOp_operand2);
+
+    // Repalce the use of original diff operation with this operation
+    rewriter.replaceOp(op, optimizedOp.getResult());
+    return mlir::success();
+  }
+};
+
+struct SimplifyFindPeaks2Diff2Mean : public mlir::OpRewritePattern<MeanOp> {
+  //
+  SimplifyFindPeaks2Diff2Mean(mlir::MLIRContext *context)
+      : OpRewritePattern<MeanOp>(context, 1) {}
+
+  mlir::LogicalResult
+  matchAndRewrite(MeanOp op, mlir::PatternRewriter &rewriter) const override {
+
+    //
+    mlir::Value meanOp_operand0 = op.getOperand(0);
+
+    // check if this is coming from diff operation.
+    DiffOp prev_diffOp = meanOp_operand0.getDefiningOp<DiffOp>();
+
+    if (!prev_diffOp)
+      return failure();
+
+    mlir::Value prev_diffOp_operand0 = prev_diffOp.getOperand(0);
+    FindPeaksOp prev_findPeaksOp =
+        prev_diffOp_operand0.getDefiningOp<FindPeaksOp>();
+
+    if (!prev_findPeaksOp)
+      return failure();
+
+    mlir::Value prev_findPeaksOp_operand0 = prev_findPeaksOp.getOperand(0);
+    mlir::Value prev_findPeaksOp_operand1 = prev_findPeaksOp.getOperand(1);
+    mlir::Value prev_findPeaksOp_operand2 = prev_findPeaksOp.getOperand(2);
+
+    auto optimizedOp = rewriter.create<dsp::FindPeaks2Diff2MeanOptimizedOp>(
+        op.getLoc(), prev_findPeaksOp_operand0, prev_findPeaksOp_operand1,
+        prev_findPeaksOp_operand2);
+
+    // Repalce the use of original diff operation with this operation
+    rewriter.replaceOp(op, optimizedOp.getResult());
+    return mlir::success();
+  }
+};
+
+struct SimplifyMedian2Sliding
+    : public mlir::OpRewritePattern<SlidingWindowAvgOp> {
+  //
+  SimplifyMedian2Sliding(mlir::MLIRContext *context)
+      : OpRewritePattern<SlidingWindowAvgOp>(context, 1) {}
+
+  mlir::LogicalResult
+  matchAndRewrite(SlidingWindowAvgOp op,
+                  mlir::PatternRewriter &rewriter) const override {
+
+    mlir::Value slidingOp_operand0 = op.getOperand();
+
+    // check if this is coming from medianFilter operation.
+    MedianFilterOp prev_medianFilterOp =
+        slidingOp_operand0.getDefiningOp<MedianFilterOp>();
+
+    if (!prev_medianFilterOp)
+      return failure();
+
+    mlir::Value prev_medianFilterOp_operand0 = prev_medianFilterOp.getOperand();
+
+    auto optimizedOp = rewriter.create<dsp::Median2SlidingOptimizedOp>(
+        op.getLoc(), prev_medianFilterOp_operand0);
+
+    rewriter.replaceOp(op, optimizedOp.getResult());
+    return mlir::success();
+  }
+};
+
+struct SimplifyBack2BackDelay : public mlir::OpRewritePattern<DelayOp> {
+  //
+  SimplifyBack2BackDelay(mlir::MLIRContext *context)
+      : OpRewritePattern<DelayOp>(context, 1) {}
+
+  mlir::LogicalResult
+  matchAndRewrite(DelayOp op, mlir::PatternRewriter &rewriter) const override {
+
+    //
+    mlir::Value delayOp_operand0 = op.getOperand(0);
+
+    // check if this is coming from another delay operation
+    DelayOp prev_delayOp = delayOp_operand0.getDefiningOp<DelayOp>();
+
+    if (!prev_delayOp)
+      return failure();
+
+    mlir::Value delayOp_operand1 = op.getOperand(1);
+    mlir::Value prev_delayOp_operand0 = prev_delayOp.getOperand(0);
+    mlir::Value prev_delayOp_operand1 = prev_delayOp.getOperand(1);
+
+    // create add op
+    auto addOp = rewriter.create<AddOp>(op.getLoc(), prev_delayOp_operand1,
+                                        delayOp_operand1);
+    auto newDelayOp = rewriter.create<DelayOp>(
+        op.getLoc(), prev_delayOp_operand0, addOp.getResult());
+
+    // Repalce the use of original delay operation with this newDelayOp
+    rewriter.replaceOp(op, newDelayOp.getResult());
+    return mlir::success();
+  }
+};
+
+// Pseudo-code
+// if operand of square is coming from real part of fft1d
+// replace fft1d with fft1dreal
+// still squareOp will remain same
+struct SimplifyFFTSquare : public mlir::OpRewritePattern<SquareOp> {
+  /// We register this pattern to match every dsp.downsampling in the IR.
+  /// The "benefit" is used by the framework to order the patterns and process
+  /// them in order of profitability.
+  SimplifyFFTSquare(mlir::MLIRContext *context)
+      : OpRewritePattern<SquareOp>(context, /*benefit=*/1) {}
+
+  /// This method attempts to match a pattern and rewrite it. The rewriter
+  /// argument is the orchestrator of the sequence of rewrites. The pattern is
+  /// expected to interact with it to perform any changes to the IR from here.
+  mlir::LogicalResult
+  matchAndRewrite(SquareOp op, mlir::PatternRewriter &rewriter) const override {
+    // Look through the input of the current downsampling.
+    // mlir::Value squareOperand1_Rate = op.getOperand(1);
+    mlir::Value squareOperand0_input = op.getInput();
+    dsp::FFT1DOp prev_FFT1DOp = squareOperand0_input.getDefiningOp<FFT1DOp>();
+    // DEBUG_PRINT_NO_ARGS();
+    // Input defined by another FFT1D? If not, no match.
+    if (!prev_FFT1DOp)
+      return failure();
+
+    // Replace fft1d with fft1dreal
+    // DEBUG_PRINT_WITH_ARGS(squareOperand0_input);
+    // DEBUG_PRINT_WITH_ARGS("Going fr some");
+    // DEBUG_PRINT_NO_ARGS();
+    mlir::Value prev_FFT1DOp_Operand = prev_FFT1DOp.getInput();
+    auto fft1drealOp1 =
+        rewriter.create<FFT1DRealOp>(op.getLoc(), prev_FFT1DOp_Operand);
+    // DEBUG_PRINT_NO_ARGS();
+    auto SquareOp1 = rewriter.create<SquareOp>(op.getLoc(), fft1drealOp1);
+
+    rewriter.replaceOp(op, SquareOp1);
+    return mlir::success();
+  }
+};
+
+struct SimplifyGainwZero : public mlir::OpRewritePattern<GainOp> {
+  SimplifyGainwZero(mlir::MLIRContext *context)
+      : OpRewritePattern<GainOp>(context, 1) {}
+
+  mlir::LogicalResult
+  matchAndRewrite(GainOp op, mlir::PatternRewriter &rewriter) const override {
+
+    //
+    mlir::Value gainOp_operand1 = op.getOperand(1);
+
+    // check if the value is zero
+    // DEBUG_PRINT_NO_ARGS();
+    dsp::ConstantOp constant_Op1 =
+        gainOp_operand1.getDefiningOp<dsp::ConstantOp>();
+    DenseElementsAttr DenseValueFrmgainOp = constant_Op1.getValue();
+    auto elements = DenseValueFrmgainOp.getValues<FloatAttr>();
+    float FirstValue = elements[0].getValueAsDouble();
+    int64_t GainRate = (int64_t)FirstValue;
+
+    if (!GainRate == 0)
+      return failure();
+
+    mlir::Value gainOp_operand0 = op.getOperand(0);
+    dsp::ConstantOp constant_Op0 =
+        gainOp_operand0.getDefiningOp<dsp::ConstantOp>();
+    DenseElementsAttr InputValueFrmgainOp = constant_Op0.getValue();
+    int64_t inputSize = InputValueFrmgainOp.size();
+
+    // Define the type of the tensor (tensor<f64>).
+    RankedTensorType tensorType =
+        RankedTensorType::get({inputSize}, rewriter.getF64Type());
+
+    // Create a constant operation with the specified value and type.
+    DenseElementsAttr zerovalue = DenseElementsAttr::get(tensorType, 0.0);
+    Operation *constantOp = rewriter.create<ConstantOp>(op.getLoc(), zerovalue);
+
+    rewriter.replaceOp(op, constantOp);
+    return mlir::success();
+  }
+};
+
+// Pseudo-code
+// if operands of MulOp are coming from lowPassFIRFilter & hamming
+// then replace the MulOp with the symmetrical operation
+struct SimplifyFilterMulHamming : public mlir::OpRewritePattern<MulOp> {
+  /// We register this pattern to match every dsp.downsampling in the IR.
+  /// The "benefit" is used by the framework to order the patterns and process
+  /// them in order of profitability.
+  SimplifyFilterMulHamming(mlir::MLIRContext *context)
+      : OpRewritePattern<MulOp>(context, /*benefit=*/1) {}
+
+  /// This method attempts to match a pattern and rewrite it. The rewriter
+  /// argument is the orchestrator of the sequence of rewrites. The pattern is
+  /// expected to interact with it to perform any changes to the IR from here.
+  mlir::LogicalResult
+  matchAndRewrite(MulOp op, mlir::PatternRewriter &rewriter) const override {
+    // Get the operands operation from MulFOp
+    // check if op0 is Low/HighPassFIRFilterOp & op1 is HammingWindowOp
+    // if this true then get the operands of op0 ie, Low/HighPassFIRFilterOp
+    // use these operands to form FIRHammingOptimizedOp
+    // mlir::Value squareOperand1_Rate = op.getOperand(1);
+    mlir::Value mulOperand0_Lhs = op.getLhs();
+    mlir::Value mulOperand1_Rhs = op.getRhs();
+    dsp::LowPassFIRFilterOp op_LowPassFIRFilterOp =
+        mulOperand0_Lhs.getDefiningOp<LowPassFIRFilterOp>();
+    dsp::HammingWindowOp op_HammingWindowOp =
+        mulOperand1_Rhs.getDefiningOp<HammingWindowOp>();
+
+    // DEBUG_PRINT_NO_ARGS();
+    // Inputs are LowPassFIRFilterOp && HammingWindowOp => If not, no match.
+    if (!op_LowPassFIRFilterOp || !op_HammingWindowOp)
+      return failure();
+
+    // Replace fft1d with fft1dreal
+    // DEBUG_PRINT_WITH_ARGS(mulOperand0_Lhs);
+    // DEBUG_PRINT_WITH_ARGS("SimplifyFilterMulHamming - ConditionMet");
+    // DEBUG_PRINT_NO_ARGS();
+    mlir::Value LowPassFIRFilterOperand_wc = op_LowPassFIRFilterOp.getWc();
+    mlir::Value LowPassFIRFilterOperand_N = op_LowPassFIRFilterOp.getN();
+
+    auto firFilterHammingOptimized =
+        rewriter.create<FIRFilterHammingOptimizedOp>(
+            op.getLoc(), LowPassFIRFilterOperand_wc, LowPassFIRFilterOperand_N);
+    // DEBUG_PRINT_NO_ARGS();
+
+    rewriter.replaceOp(op, firFilterHammingOptimized);
+    return mlir::success();
+  }
+};
+
+// Pseudo-code
+// if operands of MulOp are coming from highPassFIRFilter & hamming
+// then replace the MulOp with the symmetrical operation
+struct SimplifyHighPassFIRHamming : public mlir::OpRewritePattern<MulOp> {
+  /// We register this pattern to match every dsp.downsampling in the IR.
+  /// The "benefit" is used by the framework to order the patterns and process
+  /// them in order of profitability.
+  SimplifyHighPassFIRHamming(mlir::MLIRContext *context)
+      : OpRewritePattern<MulOp>(context, /*benefit=*/1) {}
+
+  /// This method attempts to match a pattern and rewrite it. The rewriter
+  /// argument is the orchestrator of the sequence of rewrites. The pattern is
+  /// expected to interact with it to perform any changes to the IR from here.
+  mlir::LogicalResult
+  matchAndRewrite(MulOp op, mlir::PatternRewriter &rewriter) const override {
+    // Get the operands operation from MulFOp
+    // check if op0 is Low/HighPassFIRFilterOp & op1 is HammingWindowOp
+    // if this true then get the operands of op0 ie, Low/HighPassFIRFilterOp
+    // use these operands to form FIRHammingOptimizedOp
+    // mlir::Value squareOperand1_Rate = op.getOperand(1);
+    mlir::Value mulOperand0_Lhs = op.getLhs();
+    mlir::Value mulOperand1_Rhs = op.getRhs();
+    dsp::HighPassFIRFilterOp op_HighPassFIRFilterOp =
+        mulOperand0_Lhs.getDefiningOp<HighPassFIRFilterOp>();
+    dsp::HammingWindowOp op_HammingWindowOp =
+        mulOperand1_Rhs.getDefiningOp<HammingWindowOp>();
+
+    // DEBUG_PRINT_NO_ARGS();
+    // Inputs are HighPassFIRFilterOp && HammingWindowOp => If not, no match.
+    if (!op_HighPassFIRFilterOp || !op_HammingWindowOp)
+      return failure();
+
+    // Replace fft1d with fft1dreal
+    // DEBUG_PRINT_WITH_ARGS(mulOperand0_Lhs);
+    // DEBUG_PRINT_WITH_ARGS("SimplifyHighPassFIRHamming - ConditionMet");
+    // DEBUG_PRINT_NO_ARGS();
+    mlir::Value HighPassFIRFilterOperand_wc = op_HighPassFIRFilterOp.getWc();
+    mlir::Value HighPassFIRFilterOperand_N = op_HighPassFIRFilterOp.getN();
+
+    auto highPassFIRHammingOptimized =
+        rewriter.create<HighPassFIRHammingOptimizedOp>(
+            op.getLoc(), HighPassFIRFilterOperand_wc,
+            HighPassFIRFilterOperand_N);
+    // DEBUG_PRINT_NO_ARGS();
+
+    rewriter.replaceOp(op, highPassFIRHammingOptimized);
+    return mlir::success();
+  }
+};
+
+// Pseudo-Code
+// Find FIRFilterResponse & FIRFilterHammingOptimized &  operation
+//  result1 = dsp.FIRFilterHammingOptimized(input1, rate1) //filter and hamming
+//  result2 = dsp.FIRFilterResponse(result1, rate2) //FilterResponse
+// For above pattern , replace dsp.FIRFilterResponse with
+// FIRFilterResSymmOptimized result1 = dsp.FIRFilterHammingOptimized(input1,
+// rate1) result2 = dsp.FIRFilterResSymmOptimized(result1, rate2)
+struct SimplifyFIRFilterRespnseWithSymmFilter
+    : public mlir::OpRewritePattern<FIRFilterResponseOp> {
+  /// We register this pattern to match every dsp.downsampling in the IR.
+  /// The "benefit" is used by the framework to order the patterns and process
+  /// them in order of profitability.
+  SimplifyFIRFilterRespnseWithSymmFilter(mlir::MLIRContext *context)
+      : OpRewritePattern<FIRFilterResponseOp>(context, /*benefit=*/1) {}
+
+  /// This method attempts to match a pattern and rewrite it. The rewriter
+  /// argument is the orchestrator of the sequence of rewrites. The pattern is
+  /// expected to interact with it to perform any changes to the IR from here.
+  mlir::LogicalResult
+  matchAndRewrite(FIRFilterResponseOp op,
+                  mlir::PatternRewriter &rewriter) const override {
+    // Look through the input of the current downsampling.
+    // if 1 of the operands is FIRFilterHammingOptimized then go for rewrite
+    // ie, if
+    mlir::Value Operand1_forFIRFilterResp = op.getOperand(1);
+    mlir::Value Operand0_forFIRFilterResp = op.getOperand(0);
+    dsp::FIRFilterHammingOptimizedOp prev_FIRFilterSymmOp =
+        Operand1_forFIRFilterResp.getDefiningOp<FIRFilterHammingOptimizedOp>();
+
+    // Input defined by another downsampling? If not, no match.
+    if (!prev_FIRFilterSymmOp) {
+      return failure();
+    }
+
+    // create FIRFilterHammingOptimizedOp with current operands
+    // DEBUG_PRINT_WITH_ARGS("Going for FIRFilterresponse Opt when the operand1 "
+    //                      "is a symmetric filter");
+
+    auto firFilterResSymmOptimizedOp =
+        rewriter.create<FIRFilterResSymmOptimizedOp>(
+            op.getLoc(), Operand0_forFIRFilterResp, Operand1_forFIRFilterResp);
+
+    // DEBUG_PRINT_NO_ARGS();
+    rewriter.replaceOp(op, firFilterResSymmOptimizedOp);
+
+    return mlir::success();
+  }
+};
+
+// label: pass 1st
+// Pseudo code:
+//  if the FFT1DRealOp & FFT1DImgOp has same input then replace them with single
+//  %4 = "dsp.fft1dreal"(%3) : (tensor<10xf64>) -> tensor<10xf64>
+//  %5 = "dsp.fft1dimg"(%3) : (tensor<10xf64>) -> tensor<10xf64>
+//  replace with %4, %5 = "dsp.fft1d"(%3) : (tensor<10xf64>) -> (tensor<10xf64 ,
+//  tensor<10xf64)>
+//
+//  Define the canonicalization pattern.
+struct SimplifyFFTRealAndImg : public OpRewritePattern<FFT1DRealOp> {
+  SimplifyFFTRealAndImg(MLIRContext *context)
+      : OpRewritePattern<FFT1DRealOp>(context, /*benefit=*/1) {}
+
+  LogicalResult matchAndRewrite(FFT1DRealOp realOp,
+                                PatternRewriter &rewriter) const override {
+    // Check if there is a corresponding FFT1DImgOp with the same input.
+    Operation *nextOp = realOp->getNextNode();
+    if (!nextOp || !isa<FFT1DImgOp>(nextOp))
+      return failure();
+
+    // DEBUG_PRINT_NO_ARGS();
+    auto imgOp = cast<FFT1DImgOp>(nextOp);
+    if (realOp.getInput() != imgOp.getInput())
+      return failure();
+
+    // Replace the two operations with the combined FFT1D operation.
+    // DEBUG_PRINT_NO_ARGS();
+    auto combinedOp =
+        rewriter.create<FFT1DOp>(realOp.getLoc(), realOp.getInput());
+    rewriter.replaceOp(realOp, combinedOp.getResult(0));
+    rewriter.replaceOp(imgOp, combinedOp.getResult(1));
+
+    return success();
+  }
+};
+
+// Pseudo-Code
+// Find FIRFilterResponse & reverseInput
+//  %1 = "dsp.reverseInput"(%0) : (tensor<4xf64>) -> tensor<*xf64>
+//  %2 = "dsp.FIRFilterResponse"(%0, %1) : (tensor<4xf64>, tensor<*xf64>) ->
+//  tensor<*xf64>
+// For above pattern , replace dsp.FIRFilterResponse with
+// FIRFilterYSymmOptimized %1 = "dsp.reverseInput"(%0) result2 =
+// dsp.FIRFilterYSymmOptimized(result1, rate2)
+struct SimplifyFilterRespX_ReverseXYSymmFilter
+    : public mlir::OpRewritePattern<FIRFilterResponseOp> {
+  /// We register this pattern to match every dsp.downsampling in the IR.
+  /// The "benefit" is used by the framework to order the patterns and process
+  /// them in order of profitability.
+  SimplifyFilterRespX_ReverseXYSymmFilter(mlir::MLIRContext *context)
+      : OpRewritePattern<FIRFilterResponseOp>(context, /*benefit=*/1) {}
+
+  /// This method attempts to match a pattern and rewrite it. The rewriter
+  /// argument is the orchestrator of the sequence of rewrites. The pattern is
+  /// expected to interact with it to perform any changes to the IR from here.
+  mlir::LogicalResult
+  matchAndRewrite(FIRFilterResponseOp op,
+                  mlir::PatternRewriter &rewriter) const override {
+    // Look through the input of the current downsampling.
+    // if 1 of the operands is FIRFilterHammingOptimized then go for rewrite
+    // ie, if
+    mlir::Value Operand1_forFIRFilterResp = op.getOperand(1);
+    mlir::Value Operand0_forFIRFilterResp = op.getOperand(0);
+    dsp::ReverseInputOp prev_ReverseOp =
+        Operand1_forFIRFilterResp.getDefiningOp<ReverseInputOp>();
+
+    // Operand1 defined by another ReverseOp? If not, no match.
+    if (!prev_ReverseOp) {
+      return failure();
+    }
+
+    // create FIRFilterYSymmOptimizedOp with current operands
+    // DEBUG_PRINT_WITH_ARGS("Going for FIRFilterResponse Opt when the operand1 "
+    //                      "is a ReverseInputOp");
+
+    auto firFilterResYSymmOptimizedOp =
+        rewriter.create<FIRFilterYSymmOptimizedOp>(
+            op.getLoc(), Operand0_forFIRFilterResp, Operand1_forFIRFilterResp);
+
+    // DEBUG_PRINT_NO_ARGS();
+    rewriter.replaceOp(op, firFilterResYSymmOptimizedOp);
+
+    return mlir::success();
+  }
+};
+
+// Pseudo code:
+//  if the  input of FFT1DRealOp = FIRFilterYSymmOptimizedOp then replace it
+//  with FFT1DRealSymmOp Define the canonicalization pattern.
+struct SimplifyFFTRealAtInputRealSymm : public OpRewritePattern<FFT1DRealOp> {
+  SimplifyFFTRealAtInputRealSymm(MLIRContext *context)
+      : OpRewritePattern<FFT1DRealOp>(context, /*benefit=*/1) {}
+
+  LogicalResult matchAndRewrite(FFT1DRealOp Op,
+                                PatternRewriter &rewriter) const override {
+    // Check if there is a corresponding FFT1DImgOp with the same input.
+    mlir::Value fftOperand_input = Op.getInput();
+    dsp::FIRFilterYSymmOptimizedOp op_FIRFilterYSymmOptimizedOp =
+        fftOperand_input.getDefiningOp<FIRFilterYSymmOptimizedOp>();
+
+    if (!op_FIRFilterYSymmOptimizedOp)
+      return failure();
+
+    // DEBUG_PRINT_NO_ARGS();
+
+    // Replace the two operations with the combined FFT1D operation.
+    auto fft1dRealSymmOp =
+        rewriter.create<FFT1DRealSymmOp>(Op.getLoc(), Op.getInput());
+    // DEBUG_PRINT_NO_ARGS();
+    // rewriter.replaceOp(Op, fft1dRealSymmOp.getResult());
+    rewriter.replaceOp(Op, fft1dRealSymmOp);
+    // DEBUG_PRINT_NO_ARGS();
+    return success();
+  }
+};
+
+// Pseudo code:
+//  if the  input of FFT1DImgOp = FIRFilterYSymmOptimizedOp then replace it with
+//  FFT1DImgConjSymmOp Define the canonicalization pattern.
+struct SimplifyFFTImgAtInputRealSymm : public OpRewritePattern<FFT1DImgOp> {
+  SimplifyFFTImgAtInputRealSymm(MLIRContext *context)
+      : OpRewritePattern<FFT1DImgOp>(context, /*benefit=*/1) {}
+
+  LogicalResult matchAndRewrite(FFT1DImgOp Op,
+                                PatternRewriter &rewriter) const override {
+    // Check if there is a corresponding FFT1DImgOp with the same input.
+    mlir::Value fftOperand_input = Op.getInput();
+    dsp::FIRFilterYSymmOptimizedOp op_FIRFilterYSymmOptimizedOp =
+        fftOperand_input.getDefiningOp<FIRFilterYSymmOptimizedOp>();
+
+    if (!op_FIRFilterYSymmOptimizedOp)
+      return failure();
+
+    // DEBUG_PRINT_NO_ARGS();
+
+    // Replace the two operations with the combined FFT1D operation.
+
+    auto fft1dImgConjSymmOp =
+        rewriter.create<FFT1DImgConjSymmOp>(Op.getLoc(), Op.getInput());
+    // DEBUG_PRINT_NO_ARGS();
+    // rewriter.replaceOp(Op, fft1dImgConjSymmOp.getResult());
+    rewriter.replaceOp(Op, fft1dImgConjSymmOp);
+    // DEBUG_PRINT_NO_ARGS();
+    return success();
+  }
+};
+
+// Pseudo-Code
+// Find lmsFIlter with gain operation
+//  result1 = lmsFilter(noisy_sig, clean_sig, mu, filterSize, iter);
+//  result2 = gain(result1, G1)
+// result2 will be now lmsFilter(noisy_sig, clean_sig, mu*g1,
+// filterSize, iter); replaceOp
+struct SimplifyLMSFilterwithGain
+    : public mlir::OpRewritePattern<GainOp> {
+  SimplifyLMSFilterwithGain(mlir::MLIRContext *context)
+      : OpRewritePattern<GainOp>(context, 1) {}
+
+  mlir::LogicalResult
+  matchAndRewrite(GainOp op, mlir::PatternRewriter &rewriter) const override {
+
+    mlir::Value gainOp_operand0 = op.getOperand(0);
+
+    LMSFilterOp prev_LMSFilterOp =
+        gainOp_operand0.getDefiningOp<LMSFilterOp>();
+
+    if (!prev_LMSFilterOp)
+      return failure();
+
+    mlir::Value gainOp_operand1 = op.getOperand(1);
+    mlir::Value prev_LMSFilterOp_0 =
+        prev_LMSFilterOp.getOperand(0);
+    mlir::Value prev_LMSFilterOp_1 =
+        prev_LMSFilterOp.getOperand(1);
+    mlir::Value prev_LMSFilterOp_mu =
+        prev_LMSFilterOp.getOperand(2);
+    mlir::Value prev_LMSFilterOp_3 =
+        prev_LMSFilterOp.getOperand(3);
+    mlir::Value prev_LMSFilterOp_4 =
+        prev_LMSFilterOp.getOperand(4);
+
+    // create mul op
+    auto mulOp = rewriter.create<MulOp>(
+        op.getLoc(), prev_LMSFilterOp_mu, gainOp_operand1);
+    auto newLMSFilterOp = rewriter.create<LMSFilterOp>(
+        op.getLoc(), prev_LMSFilterOp_0, prev_LMSFilterOp_1,
+        mulOp.getResult(), prev_LMSFilterOp_3, prev_LMSFilterOp_4);
+
+    // Repalce the use of original gain operation with this newGainOp
+    rewriter.replaceOp(op, newLMSFilterOp.getResult());
+    return mlir::success();
+  }
+};
+
+
+
+
+
+
+
+
+// Pseudo-Code
+// Find lmsFIlterResponse with gain operation
+//  result1 = lmsFilterResponse(noisy_sig, clean_sig, mu, filterSize);
+//  result2 = gain(result1, G1)
+// result2 will be now lmsFilterResponse(noisy_sig, clean_sig, mu*g1,
+// filterSize); replaceOp
+struct SimplifyLMSFilterResponsewithGain
+    : public mlir::OpRewritePattern<GainOp> {
+  SimplifyLMSFilterResponsewithGain(mlir::MLIRContext *context)
+      : OpRewritePattern<GainOp>(context, 1) {}
+
+  mlir::LogicalResult
+  matchAndRewrite(GainOp op, mlir::PatternRewriter &rewriter) const override {
+
+    mlir::Value gainOp_operand0 = op.getOperand(0);
+
+    LMSFilterResponseOp prev_LMSFilterResponseOp =
+        gainOp_operand0.getDefiningOp<LMSFilterResponseOp>();
+
+    if (!prev_LMSFilterResponseOp)
+      return failure();
+
+    mlir::Value gainOp_operand1 = op.getOperand(1);
+    mlir::Value prev_LMSFilterResponseOp_0 =
+        prev_LMSFilterResponseOp.getOperand(0);
+    mlir::Value prev_LMSFilterResponseOp_1 =
+        prev_LMSFilterResponseOp.getOperand(1);
+    mlir::Value prev_LMSFilterResponseOp_2 =
+        prev_LMSFilterResponseOp.getOperand(2);
+    mlir::Value prev_LMSFilterResponseOp_3 =
+        prev_LMSFilterResponseOp.getOperand(3);
+
+    auto OptimizedOp = rewriter.create<LMSFilterResponse2GainOp>(
+        op.getLoc(), prev_LMSFilterResponseOp_0, prev_LMSFilterResponseOp_1,
+        prev_LMSFilterResponseOp_2, prev_LMSFilterResponseOp_3, gainOp_operand1);
+
+    // Repalce the use of original gain operation with this newGainOp
+    rewriter.replaceOp(op, OptimizedOp.getResult());
+ 
+		
+		
+    return mlir::success();
+  }
+};
+
+
+
+
+
+
+
+
+
+
+struct SimplifySpaceModDemodulate
+    : public mlir::OpRewritePattern<SpaceDemodulateOp> {
+  SimplifySpaceModDemodulate(mlir::MLIRContext *context)
+      : OpRewritePattern<SpaceDemodulateOp>(context, 1) {}
+
+  mlir::LogicalResult
+  matchAndRewrite(SpaceDemodulateOp op,
+                  mlir::PatternRewriter &rewriter) const override {
+
+    // a flag checking if the define operation chain of demod op contains mod op
+    bool opt = false;
+    SpaceModulateOp prev_mod;
+    auto iter = op.getOperand();
+    while (iter.getDefiningOp()) {
+      auto pred = iter.getDefiningOp();
+      // llvm::errs() << pred->getName().getStringRef() << "\n";
+      if (llvm::dyn_cast<SpaceModulateOp>(*pred)) {
+        opt = true;
+        prev_mod = llvm::dyn_cast<SpaceModulateOp>(*pred);
+        break;
+      }
+      iter = (*pred).getOperand(0);
+    }
+
+    if (!opt)
+      return failure();
+
+    auto constVal = prev_mod.getOperand().getDefiningOp();
+    rewriter.replaceOp(op, constVal);
+    return mlir::success();
+  }
+};
+
+struct SimplifyNormLMSFilterResponse
+    : public mlir::OpRewritePattern<NormalizeOp> {
+  SimplifyNormLMSFilterResponse(mlir::MLIRContext *ctx)
+      : OpRewritePattern<NormalizeOp>(ctx, 1) {}
+
+  mlir::LogicalResult
+  matchAndRewrite(NormalizeOp op,
+                  mlir::PatternRewriter &rewriter) const override {
+
+    Value signal = op.getOperand();
+    Operation *filterOp = signal.getDefiningOp<LMSFilterResponseOp>();
+
+    if (!filterOp)
+      return failure();
+
+    Value filterOp_operand0 = filterOp->getOperand(0);
+    Value filterOp_operand1 = filterOp->getOperand(1);
+    Value filterOp_operand2 = filterOp->getOperand(2);
+    Value filterOp_operand3 = filterOp->getOperand(3);
+
+    auto normLMSfilterOpt = rewriter.create<NormLMSFilterResponseOptimizeOp>(
+        op.getLoc(), filterOp_operand0, filterOp_operand1, filterOp_operand2,
+        filterOp_operand3);
+
+    rewriter.replaceOp(op, normLMSfilterOpt);
+    if (filterOp->use_empty()) {
+      rewriter.eraseOp(filterOp);
+    }
+
+    return mlir::success();
+  }
+};
+
+struct SimplifyDSSDPass : public mlir::OpRewritePattern<DivOp> {
+  SimplifyDSSDPass(mlir::MLIRContext *ctx) : OpRewritePattern<DivOp>(ctx, 1) {}
+
+  mlir::LogicalResult
+  matchAndRewrite(DivOp op, mlir::PatternRewriter &rewriter) const override {
+
+#define CHECK(x)                                                               \
+  if (!x)                                                                      \
+    return failure();
+#define REMOVE(x)                                                              \
+  if (x->use_empty())                                                          \
+    rewriter.eraseOp(x);
+#define DEBUG(x)                                                               \
+  { llvm::errs() << "check for " << x << "\n"; }
+#define PASS llvm::errs() << "pass\n";
+
+    auto loc = op.getLoc();
+
+    // pattern -> CHECK()
+    Operation *sumOp = op.getOperand(0).getDefiningOp<SumOp>();
+    CHECK(sumOp);
+
+    Operation *addOp = sumOp->getOperand(0).getDefiningOp<AddOp>();
+    CHECK(addOp);
+
+    Operation *sqrtOp0 = addOp->getOperand(0).getDefiningOp<SquareOp>();
+    CHECK(sqrtOp0);
+
+    Operation *sqrtOp1 = addOp->getOperand(1).getDefiningOp<SquareOp>();
+    CHECK(sqrtOp1);
+
+    Operation *fftRealOp = sqrtOp0->getOperand(0).getDefiningOp<FFT1DRealOp>();
+    CHECK(fftRealOp);
+
+    // See defining op: suppose to be fftImg, but modified beforhand by <label>
+    // pass 1st
+    Operation *fftImgOp = sqrtOp1->getOperand(0).getDefiningOp<FFT1DRealOp>();
+    CHECK(fftImgOp);
+
+    // check if come from same input
+    Value input1 = fftRealOp->getOperand(0);
+    Value input2 = fftImgOp->getOperand(0);
+    CHECK((input1 == input2));
+
+    auto newSqrt = rewriter.create<SquareOp>(loc, input1);
+    auto newResult = rewriter.create<SumOp>(loc, newSqrt);
+
+    rewriter.replaceOp(op, newResult);
+
+    REMOVE(fftImgOp);
+    REMOVE(fftRealOp);
+    REMOVE(sqrtOp1);
+    REMOVE(sqrtOp0);
+    REMOVE(addOp);
+    REMOVE(sumOp);
+
+    return mlir::success();
+  }
+};
+
+struct SimplifyFIRFilterHammingThreholdUpOptimized
+    : public mlir::OpRewritePattern<ThresholdUpOp> {
+  SimplifyFIRFilterHammingThreholdUpOptimized(mlir::MLIRContext *context)
+      : OpRewritePattern<ThresholdUpOp>(context, /*benefit=*/1) {}
+
+  /// This method attempts to match a pattern and rewrite it. The rewriter
+  /// argument is the orchestrator of the sequence of rewrites. The pattern is
+  /// expected to interact with it to perform any changes to the IR from here.
+  mlir::LogicalResult
+  matchAndRewrite(ThresholdUpOp op,
+                  mlir::PatternRewriter &rewriter) const override {
+    mlir::Value Operand0_threshold = op.getOperand(0);
+    mlir::Value Operand1_threshold = op.getOperand(1);
+    mlir::Value Operand2_threshold = op.getOperand(2);
+    dsp::FIRFilterResSymmOptimizedOp prev_FIRFilterSymmOp =
+        Operand0_threshold.getDefiningOp<FIRFilterResSymmOptimizedOp>();
+
+    if (!prev_FIRFilterSymmOp) {
+      return failure();
+    }
+    Value input1 = prev_FIRFilterSymmOp->getOperand(0);
+    Value input2 = prev_FIRFilterSymmOp->getOperand(1);
+    auto fIRFilterResSymmThresholdUpOptimizedOp =
+        rewriter.create<FIRFilterResSymmThresholdUpOptimizedOp>(
+            op.getLoc(), input1, input2, Operand1_threshold,
+            Operand2_threshold);
+
+    // DEBUG_PRINT_NO_ARGS();
+    rewriter.replaceOp(op, fIRFilterResSymmThresholdUpOptimizedOp);
+
+    return mlir::success();
+  }
+};
+
+//  Define the canonicalization pattern.
+struct SimplifyFFTAbs : public OpRewritePattern<FFTRealOp> {
+  SimplifyFFTAbs(MLIRContext *context)
+      : OpRewritePattern<FFTRealOp>(context, 1) {}
+
+  LogicalResult matchAndRewrite(FFTRealOp realOp,
+                                PatternRewriter &rewriter) const override {
+    // Check if there is a corresponding FFT1DImgOp with the same input.
+    Operation *nextofFFTRealOp = realOp->getNextNode();
+    if (!nextofFFTRealOp || !isa<FFTImagOp>(nextofFFTRealOp))
+      return failure();
+
+    // DEBUG_PRINT_NO_ARGS();
+    auto fftImagOp = cast<FFTImagOp>(nextofFFTRealOp);
+    if (realOp.getLhs() != fftImagOp.getLhs())
+      return failure();
+
+    Operation *nextofFFTImagOp = fftImagOp->getNextNode();
+    if (!nextofFFTImagOp || !isa<SquareOp>(nextofFFTImagOp))
+      return failure();
+
+    // DEBUG_PRINT_NO_ARGS();
+    auto square1Op = cast<SquareOp>(nextofFFTImagOp);
+    if (realOp.getResult() != square1Op.getInput())
+      return failure();
+
+    Operation *nextofSquare1Op = square1Op->getNextNode();
+    if (!nextofSquare1Op || !isa<SquareOp>(nextofSquare1Op))
+      return failure();
+
+    // DEBUG_PRINT_NO_ARGS();
+    auto square2Op = cast<SquareOp>(nextofSquare1Op);
+    if (fftImagOp.getResult() != square2Op.getInput())
+      return failure();
+
+    Operation *nextofSquare2Op = square2Op->getNextNode();
+    if (!nextofSquare2Op || !isa<AddOp>(nextofSquare2Op))
+      return failure();
+    // (addOp.getLhs() != squareOp.getResult()) || (addOp.getRhs() !=
+    // square2Op.getResult())   &&  (addOp.getRhs() != squareOp.getResult()) &&
+    // (addOp.getLhs() != square2Op.getResult())
+    // DEBUG_PRINT_NO_ARGS();
+    auto addOp = cast<AddOp>(nextofSquare2Op);
+    if ((addOp.getLhs() != square1Op.getResult() ||
+         addOp.getRhs() != square2Op.getResult()) &&
+        (addOp.getRhs() != square1Op.getResult() ||
+         addOp.getLhs() != square2Op.getResult()))
+      return failure();
+
+    Operation *nextofAddOp = addOp->getNextNode();
+    if (!nextofAddOp || !isa<SqrtOp>(nextofAddOp))
+      return failure();
+
+    // DEBUG_PRINT_NO_ARGS();
+    auto sqrtOp = cast<SqrtOp>(nextofAddOp);
+    if (sqrtOp.getInput() != addOp.getResult())
+      return failure();
+
+    // DEBUG_PRINT_NO_ARGS();
+    auto combinedOp =
+        rewriter.create<FFTAbsOp>(realOp.getLoc(), realOp.getLhs());
+    rewriter.replaceOp(sqrtOp, combinedOp.getAmplitude());
+
+    rewriter.eraseOp(addOp);
+    rewriter.eraseOp(square2Op);
+    rewriter.eraseOp(square1Op);
+    rewriter.eraseOp(fftImagOp);
+    rewriter.eraseOp(realOp);
+
+    return success();
+  }
+};
+
+struct SimplifyDFTAbs : public OpRewritePattern<FFT1DRealOp> {
+  SimplifyDFTAbs(MLIRContext *context)
+      : OpRewritePattern<FFT1DRealOp>(context, 1) {}
+
+  LogicalResult matchAndRewrite(FFT1DRealOp realOp,
+                                PatternRewriter &rewriter) const override {
+    // Check if there is a corresponding FFT1DImgOp with the same input.
+    Operation *nextofFFTRealOp = realOp->getNextNode();
+    if (!nextofFFTRealOp || !isa<FFT1DImgOp>(nextofFFTRealOp))
+      return failure();
+
+    // DEBUG_PRINT_NO_ARGS();
+    auto fftImagOp = cast<FFT1DImgOp>(nextofFFTRealOp);
+    if (realOp.getInput() != fftImagOp.getInput())
+      return failure();
+
+    Operation *nextofFFTImagOp = fftImagOp->getNextNode();
+    if (!nextofFFTImagOp || !isa<SquareOp>(nextofFFTImagOp))
+      return failure();
+
+    // DEBUG_PRINT_NO_ARGS();
+    auto square1Op = cast<SquareOp>(nextofFFTImagOp);
+    if (realOp.getResult() != square1Op.getInput())
+      return failure();
+
+    Operation *nextofSquare1Op = square1Op->getNextNode();
+    if (!nextofSquare1Op || !isa<SquareOp>(nextofSquare1Op))
+      return failure();
+
+    // DEBUG_PRINT_NO_ARGS();
+    auto square2Op = cast<SquareOp>(nextofSquare1Op);
+    if (fftImagOp.getResult() != square2Op.getInput())
+      return failure();
+
+    Operation *nextofSquare2Op = square2Op->getNextNode();
+    if (!nextofSquare2Op || !isa<AddOp>(nextofSquare2Op))
+      return failure();
+    // (addOp.getLhs() != squareOp.getResult()) || (addOp.getRhs() !=
+    // square2Op.getResult())   &&  (addOp.getRhs() != squareOp.getResult()) &&
+    // (addOp.getLhs() != square2Op.getResult())
+    // DEBUG_PRINT_NO_ARGS();
+    auto addOp = cast<AddOp>(nextofSquare2Op);
+    if ((addOp.getLhs() != square1Op.getResult() ||
+         addOp.getRhs() != square2Op.getResult()) &&
+        (addOp.getRhs() != square1Op.getResult() ||
+         addOp.getLhs() != square2Op.getResult()))
+      return failure();
+
+    Operation *nextofAddOp = addOp->getNextNode();
+    if (!nextofAddOp || !isa<SqrtOp>(nextofAddOp))
+      return failure();
+
+    // DEBUG_PRINT_NO_ARGS();
+    auto sqrtOp = cast<SqrtOp>(nextofAddOp);
+    if (sqrtOp.getInput() != addOp.getResult())
+      return failure();
+
+    // DEBUG_PRINT_NO_ARGS();
+    auto combinedOp =
+        rewriter.create<DFTAbsOp>(realOp.getLoc(), realOp.getInput());
+    rewriter.replaceOp(sqrtOp, combinedOp.getAmplitude());
+
+    rewriter.eraseOp(addOp);
+    rewriter.eraseOp(square2Op);
+    rewriter.eraseOp(square1Op);
+    rewriter.eraseOp(fftImagOp);
+    rewriter.eraseOp(realOp);
+
+    return success();
+  }
+};
+
+struct SimplifyDFTAbsThreshold : public mlir::OpRewritePattern<ThresholdUpOp> {
+  SimplifyDFTAbsThreshold(mlir::MLIRContext *context)
+      : OpRewritePattern<ThresholdUpOp>(context, /*benefit=*/1) {}
+
+  /// This method attempts to match a pattern and rewrite it. The rewriter
+  /// argument is the orchestrator of the sequence of rewrites. The pattern is
+  /// expected to interact with it to perform any changes to the IR from here.
+  mlir::LogicalResult
+  matchAndRewrite(ThresholdUpOp op,
+                  mlir::PatternRewriter &rewriter) const override {
+    mlir::Value Operand0_threshold = op.getOperand(0);
+    mlir::Value Operand1_threshold = op.getOperand(1);
+    mlir::Value Operand2_threshold = op.getOperand(2);
+    dsp::DFTAbsOp prev_dftAbsOp = Operand0_threshold.getDefiningOp<DFTAbsOp>();
+
+    if (!prev_dftAbsOp) {
+      return failure();
+    }
+    Value input1 = prev_dftAbsOp->getOperand(0);
+
+    auto combinedOp = rewriter.create<DFTAbsThresholdUpOp>(
+        op.getLoc(), input1, Operand1_threshold, Operand2_threshold);
+
+    // DEBUG_PRINT_NO_ARGS();
+    rewriter.replaceOp(op, combinedOp);
+
+    return mlir::success();
+  }
+};
+
+//  Define the canonicalization pattern.
+struct SimplifyFFTRealAndImagToFFT : public OpRewritePattern<FFTRealOp> {
+  SimplifyFFTRealAndImagToFFT(MLIRContext *context)
+      : OpRewritePattern<FFTRealOp>(context, /*benefit=*/1) {}
+
+  LogicalResult matchAndRewrite(FFTRealOp realOp,
+                                PatternRewriter &rewriter) const override {
+    // Check if there is a corresponding FFT1DImgOp with the same input.
+    Operation *nextOp = realOp->getNextNode();
+    if (!nextOp || !isa<FFTImagOp>(nextOp))
+      return failure();
+
+    // DEBUG_PRINT_NO_ARGS();
+    auto imgOp = cast<FFTImagOp>(nextOp);
+    if (realOp.getLhs() != imgOp.getLhs())
+      return failure();
+
+    // Replace the two operations with the combined FFT1D operation.
+    // DEBUG_PRINT_NO_ARGS();
+    auto combinedOp = rewriter.create<FFTOp>(realOp.getLoc(), realOp.getLhs());
+    rewriter.replaceOp(realOp, combinedOp.getResult(0));
+    rewriter.replaceOp(imgOp, combinedOp.getResult(1));
+
+    return success();
+  }
+};
+
+
+struct SimplifyCorrel2Max : public mlir::OpRewritePattern<MaxOp> {
+  SimplifyCorrel2Max(mlir::MLIRContext *context)
+      : OpRewritePattern<MaxOp>(context, 1) {}
+
+  mlir::LogicalResult
+  matchAndRewrite(MaxOp op, mlir::PatternRewriter &rewriter) const override {
+
+    mlir::Value maxOp_operand0 = op.getOperand();
+
+    CorrelateOp prev_correlateOp = maxOp_operand0.getDefiningOp<CorrelateOp>();
+
+    if (!prev_correlateOp)
+      return failure();
+
+    mlir::Value prev_correlateOp_operand1 = prev_correlateOp.getOperand(0);
+	  mlir::Value prev_correlateOp_operand2 = prev_correlateOp.getOperand(1);
+
+    auto optimizedOp = rewriter.create<dsp::Correl2MaxOptimizedOp>(
+        op.getLoc(), prev_correlateOp_operand1, prev_correlateOp_operand2);
+
+    // Repalce the use of original diff operation with this operation
+    rewriter.replaceOp(op, optimizedOp.getResult());
+    return mlir::success();
+  }
+};
+
+// Pseudo-Code
+// Find pattern on DivOp
+//  %3 = "dsp.getRangeOfVector"(%0, %1, %2) : (tensor<f64>, tensor<f64>, tensor<f64>) -> tensor<*xf64>
+//  %4 = "dsp.fft1dreal"(%3) : (tensor<*xf64>) -> tensor<*xf64>
+//  %5 = "dsp.fft1dimg"(%3) : (tensor<*xf64>) -> tensor<*xf64>
+//  %6 = dsp.square(%4 : tensor<*xf64>) to tensor<*xf64>
+//  %7 = dsp.square(%5 : tensor<*xf64>) to tensor<*xf64>
+//  %8 = dsp.add %6, %7 : tensor<*xf64>
+//  %9 = dsp.sum(%8 : tensor<*xf64>) to tensor<*xf64>
+//  %10 = "dsp.len"(%3) : (tensor<*xf64>) -> tensor<*xf64>
+//  %11 = dsp.div %9, %10 : tensor<*xf64> 
+//  fft_real = fft1dreal(input)
+//  sq1 = square(fft_real)
+//  sq_abs = AddOp (sq1, square(fft_img)) // this is actually + sign
+//  result1 = sum(sq_abs)
+//  len1  = len(result1)
+//  result2 = DivOp(sum1, len1)
+//  
+// if result2 is coming from DivOp operation
+// output pattern is sq= square(input)
+// ans = sum(sq)
+
+struct SimplifyEnergyOfSignal : public mlir::OpRewritePattern<DivOp> {
+  //
+  SimplifyEnergyOfSignal(mlir::MLIRContext *context)
+      : OpRewritePattern<DivOp>(context, 1) {}
+
+  mlir::LogicalResult
+  matchAndRewrite(DivOp op, mlir::PatternRewriter &rewriter) const override {
+
+    //Get operands of DivOp
+    mlir::Value divOp_operand0 = op.getOperand(0);
+    mlir::Value divOp_operand1 = op.getOperand(1);
+
+    //check if FirstOperand is coming from sumOp & 2nd operand from LenOp
+    SumOp prev_SumOp = divOp_operand0.getDefiningOp<SumOp>();
+    LengthOp prev_LenOp = divOp_operand1.getDefiningOp<LengthOp>();
+    if (!prev_SumOp || !prev_LenOp)
+      return failure();
+
+    //check if sumOp operand is coming from AddOp
+    mlir::Value sumOp_operand0 = prev_SumOp.getOperand();
+    AddOp prev_AddOp = sumOp_operand0.getDefiningOp<AddOp>();
+    if (!prev_AddOp )
+      return failure();
+
+    //check if addOp opernad is coming from squareOp
+    mlir::Value addOp_operand0 = prev_AddOp.getOperand(0);
+    mlir::Value addOp_operand1 = prev_AddOp.getOperand(1);
+    SquareOp prev_SqOp0 = addOp_operand0.getDefiningOp<SquareOp>();
+    SquareOp prev_SqOp1 = addOp_operand1.getDefiningOp<SquareOp>();
+    if (!prev_SqOp0 || !prev_SqOp1)
+      return failure();
+
+    //check if squareOp is coming from fft1dreal & other from fft1dImg 
+    mlir::Value sqOp_operand0 = prev_SqOp0.getOperand();
+    mlir::Value sqOp_operand1 = prev_SqOp1.getOperand();
+    FFT1DRealOp prev_fftRealOp = sqOp_operand0.getDefiningOp<FFT1DRealOp>();
+    FFT1DImgOp prev_fftImgOp = sqOp_operand1.getDefiningOp<FFT1DImgOp>();
+
+    if (!prev_fftRealOp || !prev_fftImgOp)
+      return failure();
+
+    // get the opernad of fftReal 
+    mlir::Value input = prev_fftRealOp.getOperand();
+
+    // if result2 is coming from DivOp operation
+    // output pattern is sq= square(input)
+    // ans = sum(sq)
+    auto ansSqOp = rewriter.create<SquareOp>(op.getLoc(), input);
+    auto ansSumOp = rewriter.create<SumOp>(op.getLoc(), ansSqOp.getResult());
+
+    // Repalce the use of original gain operation with this newGainOp
+    rewriter.replaceOp(op, ansSumOp.getResult());
+    return mlir::success();
+  }
+};
+
+struct SimplifyConvolutionThm : public mlir::OpRewritePattern<IFFT1DOp> {
+  //
+  SimplifyConvolutionThm(mlir::MLIRContext *context)
+      : OpRewritePattern<IFFT1DOp>(context, 1) {}
+
+  mlir::LogicalResult
+  matchAndRewrite(IFFT1DOp op, mlir::PatternRewriter &rewriter) const override {
+#define CHECK(x)                                                               \
+  if (!x)                                                                      \
+    return failure();
+#define REMOVE(x)                                                              \
+  if (x->use_empty())                                                          \
+    rewriter.eraseOp(x);
+#define DEBUG(x)                                                               \
+  { llvm::errs() << "check for " << x << "\n"; }
+#define PASS llvm::errs() << "pass\n";
+#define ADD(x) {ops.push_back(x);}
+
+    auto loc = op.getLoc();
+    llvm::SmallVector<mlir::Operation* , 4> ops;
+
+    // init op = ifft1d
+    // pattern -> CHECK()
+    Operation *subOp = op.getOperand(0).getDefiningOp<SubOp>();
+    CHECK(subOp);
+    ADD(subOp);
+
+    Operation *addOp = op->getOperand(1).getDefiningOp<AddOp>();
+    CHECK(addOp);
+    ADD(addOp);
+
+    Operation *mulLreal = subOp->getOperand(0).getDefiningOp<MulOp>(); 
+    CHECK(mulLreal);
+    ADD(mulLreal);
+
+    Operation *mulRreal = subOp->getOperand(1).getDefiningOp<MulOp>(); 
+    CHECK(mulLreal);
+    ADD(mulRreal);
+
+    Operation *mulLImg = addOp->getOperand(0).getDefiningOp<MulOp>(); 
+    CHECK(mulLImg);
+    ADD(mulLImg);
+
+    Operation *mulRImg = addOp->getOperand(1).getDefiningOp<MulOp>(); 
+    CHECK(mulRImg);
+    ADD(mulRImg);
+
+    // 1st fft1dreal and fft1dimg is being replaced by fft1dop by other canonicalization
+    Operation *fft1DOp_a1 = mulLreal->getOperand(0).getDefiningOp<FFT1DOp>(); // real1
+    CHECK(fft1DOp_a1);
+    ADD(fft1DOp_a1);
+
+    Operation *fft1DOp_a2 = mulRreal->getOperand(0).getDefiningOp<FFT1DOp>(); // real2
+    CHECK(fft1DOp_a2);
+    ADD(fft1DOp_a2);
+
+    CHECK((fft1DOp_a1 == fft1DOp_a2));
+
+    // 2nd fft1dreal and fft1dimg is being replaced by fft1dop by other canonicalization
+    Operation *fft1DOp_b1 = mulLreal->getOperand(1).getDefiningOp<FFT1DOp>(); // img1
+    CHECK(fft1DOp_b1);
+    ADD(fft1DOp_b1);
+
+    Operation *fft1DOp_b2 = mulRreal->getOperand(1).getDefiningOp<FFT1DOp>(); // img2
+    CHECK(fft1DOp_b2);
+    ADD(fft1DOp_b2);
+
+    CHECK((fft1DOp_b1 == fft1DOp_b2));
+
+    Operation *padOp_1 = fft1DOp_a1->getOperand(0).getDefiningOp<PaddingOp>(); 
+    CHECK(padOp_1);
+    ADD(padOp_1);
+
+    Operation *padOp_2 = fft1DOp_b1->getOperand(0).getDefiningOp<PaddingOp>(); 
+    CHECK(padOp_2);
+    ADD(padOp_2);
+
+    // check if come from same input
+    Value input1 = padOp_1->getOperand(0);
+    CHECK(input1);
+  
+    Value input2 = padOp_2->getOperand(0);
+    CHECK(input2);
+
+    auto newResult = rewriter.create<FIRFilterResponseOp>(loc, input1, input2);
+
+    rewriter.replaceOp(op, newResult.getResult());
+
+    while(!ops.empty()){
+      REMOVE(ops.back());
+      ops.pop_back();
+    }
+
+    return mlir::success();
+  }
+};
+
+
+// ===================================
+// ===================================
+// ===================================
+// ===================================
+// =====Registration of Patterns =====
+// ===================================
+// ===================================
+// ===================================
+// ===================================
+// ===================================
+/// Register our patterns as "canonicalization" patterns on the TransposeOp so
+/// that they can be picked up by the Canonicalization framework.
+void FFT1DImgOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                             MLIRContext *context) {
+  if (mlir::getEnableCanonicalOpt()) {
+    results.add<SimplifyFFTImgAtInputRealSymm>(context);
+  }
+}
+
+void FFT1DRealOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                              MLIRContext *context) {
+  if (mlir::getEnableCanonicalOpt()) {
+    results.add<SimplifyDFTAbs, SimplifyFFTRealAndImg,
+                SimplifyFFTRealAtInputRealSymm>(context);
+  }
+}
+
+void FIRFilterResponseOp::getCanonicalizationPatterns(
+    RewritePatternSet &results, MLIRContext *context) {
+  if (mlir::getEnableCanonicalOpt()) {
+    results.add<SimplifyFIRFilterRespnseWithSymmFilter,
+                SimplifyFilterRespX_ReverseXYSymmFilter>(context);
+  }
+}
+
+void MulOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                        MLIRContext *context) {
+  if (mlir::getEnableCanonicalOpt()) {
+    results.add<SimplifyFilterMulHamming, SimplifyHighPassFIRHamming>(context);
+  }
+}
+
+void SquareOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                           MLIRContext *context) {
+  if (mlir::getEnableCanonicalOpt()) {
+    results.add<SimplifyFFTSquare>(context);
+  }
+}
+
+/// Register our patterns as "canonicalization" patterns on the TransposeOp so
+/// that they can be picked up by the Canonicalization framework.
+
+void DownsamplingOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                                 MLIRContext *context) {
+  if (mlir::getEnableCanonicalOpt()) {
+    results.add<SimplifyUpsamplingDownsampling>(context);
+  }
+}
+
+void TransposeOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                              MLIRContext *context) {
+  if (mlir::getEnableCanonicalOpt()) {
+    results.add<SimplifyRedundantTranspose>(context);
+  }
+}
+
+void DelayOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                          MLIRContext *context) {
+  // llvm::errs() << "Enabling Delay Optimization\n";
+
+  if (mlir::getEnableCanonicalOpt()) {
+    // DEBUG_PRINT_WITH_ARGS("Enabling Delay Optimization\n");
+    results.add<SimplifyBack2BackDelay>(context);
+  }
+}
+
+void GainOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                         MLIRContext *context) {
+  // results.add<SimplifyBack2BackGain, SimplifyGainwZero>(context);
+  if (mlir::getEnableCanonicalOpt()) {
+    results.add<SimplifyBack2BackGain, SimplifyLMSFilterwithGain, SimplifyLMSFilterResponsewithGain>(
+        context);
+  }
+}
+
+void MeanOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                         MLIRContext *context) {
+  if (mlir::getEnableCanonicalOpt()) {
+    // results.add<SimplifyDiff2Mean>(context);
+    results.add<SimplifyFindPeaks2Diff2Mean>(context);
+  }
+}
+
+void FindPeaksOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                              MLIRContext *context) {
+  if (mlir::getEnableCanonicalOpt()) {
+    results.add<SimplifyLMS2FindPeaks>(context);
+  }
+}
+
+void SlidingWindowAvgOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                                     MLIRContext *context) {
+  if (mlir::getEnableCanonicalOpt()) {
+    results.add<SimplifyMedian2Sliding>(context);
+  }
+}
+
+/// Register our patterns as "canonicalization" patterns on the ReshapeOp so
+/// that they can be picked up by the Canonicalization framework.
+void ReshapeOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                            MLIRContext *context) {
+  if (mlir::getEnableCanonicalOpt()) {
+    results.add<ReshapeReshapeOptPattern, RedundantReshapeOptPattern,
+                FoldConstantReshapeOptPattern>(context);
+  }
+}
+
+void SpaceDemodulateOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                                    MLIRContext *context) {
+  if (mlir::getEnableCanonicalOpt()) {
+    results.add<SimplifySpaceModDemodulate>(context);
+  }
+}
+
+void NormalizeOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                              MLIRContext *ctx) {
+  if (mlir::getEnableCanonicalOpt()) {
+    results.add<SimplifyNormLMSFilterResponse>(ctx);
+  }
+}
+
+void DivOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                        MLIRContext *ctx) {
+  if (mlir::getEnableCanonicalOpt()) {
+    results.add<SimplifyDSSDPass
+    // SimplifyEnergyOfSignal
+    >(ctx);
+  }
+}
+
+void ThresholdUpOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                                MLIRContext *ctx) {
+  if (mlir::getEnableCanonicalOpt()) {
+    results.add<SimplifyFIRFilterHammingThreholdUpOptimized,
+                SimplifyDFTAbsThreshold>(ctx);
+  }
+}
+
+void FFTRealOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                            MLIRContext *context) {
+  if (mlir::getEnableCanonicalOpt()) {
+    results.add<SimplifyFFTAbs, SimplifyFFTRealAndImagToFFT>(context);
+  }
+}
+
+void MaxOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                         MLIRContext *context) {
+  if (mlir::getEnableCanonicalOpt()) {
+    results.add<SimplifyCorrel2Max>(context);
+  }
+}
+
+void IFFT1DOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                         MLIRContext *context) {
+  if (mlir::getEnableCanonicalOpt()) {
+    results.add<SimplifyConvolutionThm>(context);
+  }
+}
diff --git a/mlir/lib/Dialect/DSP/IR/ToyCombine.td b/mlir/lib/Dialect/DSP/IR/ToyCombine.td
new file mode 100644
index 000000000000..a877cd0a0a8d
--- /dev/null
+++ b/mlir/lib/Dialect/DSP/IR/ToyCombine.td
@@ -0,0 +1,63 @@
+//===- ToyCombine.td - Pattern Match Optimizations for Toy -*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines language-specific pattern match optimizations for Toy using
+// Declarative Rewrite Rules (DRR) specified using TableGen records.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TOY_COMBINE
+#define TOY_COMBINE
+
+include "mlir/IR/PatternBase.td"
+include "mlir/Dialect/DSP/IR/DSP.td"
+
+/// Note: The DRR definition used for defining patterns is shown below:
+///
+/// class Pattern<
+///    dag sourcePattern, list<dag> resultPatterns,
+///    list<dag> additionalConstraints = [],
+///    dag benefitsAdded = (addBenefit 0)
+/// >;
+
+//===----------------------------------------------------------------------===//
+// Basic Pattern-Match and Rewrite
+//===----------------------------------------------------------------------===//
+
+// Reshape(Reshape(x)) = Reshape(x)
+def ReshapeReshapeOptPattern : Pat<(ReshapeOp(ReshapeOp $arg)),
+                                   (ReshapeOp $arg)>;
+
+//===----------------------------------------------------------------------===//
+// Pattern-Match and Rewrite using Native Code Call
+//===----------------------------------------------------------------------===//
+
+// Native Code Calls may be used for more complex transformations using inline
+// C++ and C++ helper functions.
+
+// Reshape(Constant(x)) = x'
+def ReshapeConstant :
+  NativeCodeCall<"$0.reshape(::llvm::cast<ShapedType>($1.getType()))">;
+def FoldConstantReshapeOptPattern : Pat<
+  (ReshapeOp:$res (ConstantOp $arg)),
+  (ConstantOp (ReshapeConstant $arg, $res))>;
+
+//===----------------------------------------------------------------------===//
+// Pattern-Match and Rewrite with Constraints
+//===----------------------------------------------------------------------===//
+
+// DRR allows for constraint checking when the transformation is conditional
+// on operand properties.
+
+// Reshape(x) = x, where input and output shapes are identical
+def TypesAreIdentical : Constraint<CPred<"$0.getType() == $1.getType()">>;
+def RedundantReshapeOptPattern : Pat<
+  (ReshapeOp:$res $arg), (replaceWithValue $arg),
+  [(TypesAreIdentical $res, $arg)]>;
+
+#endif // TOY_COMBINE
diff --git a/mlir/lib/Dialect/DSP/Pipelines/CMakeLists.txt b/mlir/lib/Dialect/DSP/Pipelines/CMakeLists.txt
new file mode 100644
index 000000000000..95e99f576580
--- /dev/null
+++ b/mlir/lib/Dialect/DSP/Pipelines/CMakeLists.txt
@@ -0,0 +1,14 @@
+add_mlir_dialect_library(MLIRDSPPipelines
+  DSPPipelines.cpp
+
+  ADDITIONAL_HEADER_DIRS
+  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/DSP
+
+  LINK_LIBS PUBLIC
+  MLIRDSPDialect
+  #MLIRDSPTransforms
+  MLIRMemRefTransforms
+  MLIRFuncDialect
+  MLIRPass
+  MLIRTransforms
+)
diff --git a/mlir/lib/Dialect/DSP/Pipelines/DSPPipelines.cpp b/mlir/lib/Dialect/DSP/Pipelines/DSPPipelines.cpp
new file mode 100644
index 000000000000..cda12a4dfbd3
--- /dev/null
+++ b/mlir/lib/Dialect/DSP/Pipelines/DSPPipelines.cpp
@@ -0,0 +1,155 @@
+//===- DSPPipelines.cpp - Pipelines for bufferization -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/DSP/Pipelines/Passes.h"
+
+#include "mlir/Dialect/DSP/Transforms/Passes.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/MemRef/Transforms/Passes.h"
+#include "mlir/Pass/PassManager.h"
+#include "mlir/Transforms/Passes.h"
+
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/Operation.h"
+#include "mlir/IR/Types.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Support/LLVM.h"
+#include "mlir/Support/TypeID.h"
+#include "mlir/Dialect/DSP/IR/DSPDialect.h"
+//#include "toy/Passes.h"
+//#include "toy/ShapeInferenceInterface.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/raw_ostream.h"
+#include <memory>
+
+#include "mlir/Interfaces/ShapeInferenceOpInterfaces.cpp.inc"
+
+//===----------------------------------------------------------------------===//
+// Pipeline implementation.
+//===----------------------------------------------------------------------===//
+
+void mlir::dsp::buildDSPPipeline(
+    OpPassManager &pm, const DSPPipelineOptions &options) {
+
+  pm.addPass(mlir::createInlinerPass());  
+  // Now that there is only one function, we can infer the shapes of each of
+  // the operations.                                                 
+  
+  mlir::OpPassManager &optPM = pm.nest<mlir::dsp::FuncOp>();
+
+  optPM.addPass(mlir::dsp::createShapeInferencePass());
+
+  
+}
+
+//===----------------------------------------------------------------------===//
+// Pipeline registration.
+//===----------------------------------------------------------------------===//
+
+void mlir::dsp::registerDSPPipelines() {
+  PassPipelineRegistration<DSPPipelineOptions>(
+      "dsp-shapeinference",
+      "Implements Shape Inference and Inlining for DSP dialect operations.",
+      buildDSPPipeline);
+}
+
+
+
+using namespace mlir;
+using namespace dsp;
+
+namespace {
+	
+/// The ShapeInferencePass is a pass that performs intra-procedural
+/// shape inference.
+///
+///    Algorithm:
+///
+///   1) Build a worklist containing all the operations that return a
+///      dynamically shaped tensor: these are the operations that need shape
+///      inference.
+///   2) Iterate on the worklist:
+///     a) find an operation to process: the next ready operation in the
+///        worklist has all of its arguments non-generic,
+///     b) if no operation is found, break out of the loop,
+///     c) remove the operation from the worklist,
+///     d) infer the shape of its output from the argument types.
+///   3) If the worklist is empty, the algorithm succeeded.
+///
+struct ShapeInferencePass
+    : public mlir::PassWrapper<ShapeInferencePass, OperationPass<dsp::FuncOp>> {
+  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(ShapeInferencePass)
+
+  void runOnOperation() override {
+    auto f = getOperation();
+
+    // Populate the worklist with the operations that need shape inference:
+    // these are operations that return a dynamic shape.
+    llvm::SmallPtrSet<mlir::Operation *, 16> opWorklist;
+    f.walk([&](mlir::Operation *op) {
+      if (returnsDynamicShape(op))
+        opWorklist.insert(op);
+    });
+
+    // Iterate on the operations in the worklist until all operations have been
+    // inferred or no change happened (fix point).
+    while (!opWorklist.empty()) {
+      // Find the next operation ready for inference, that is an operation
+      // with all operands already resolved (non-generic).
+      auto nextop = llvm::find_if(opWorklist, allOperandsInferred);
+      if (nextop == opWorklist.end())
+        break;
+
+      Operation *op = *nextop;
+      opWorklist.erase(op);
+
+      // Ask the operation to infer its output shapes.
+      //LLVM_DEBUG(llvm::dbgs() << "Inferring shape for: " << *op << "\n");
+      if (auto shapeOp = dyn_cast<ShapeInference>(op)) {
+        shapeOp.inferShapes();
+      } else {
+        op->emitError("unable to infer shape of operation without shape "
+                      "inference interface");
+        return signalPassFailure();
+      }
+    }
+
+    // If the operation worklist isn't empty, this indicates a failure.
+    if (!opWorklist.empty()) {
+      f.emitError("Shape inference failed, ")
+          << opWorklist.size() << " operations couldn't be inferred\n";
+      signalPassFailure();
+    }
+  }
+
+  /// A utility method that returns if the given operation has all of its
+  /// operands inferred.
+  static bool allOperandsInferred(Operation *op) {
+    return llvm::all_of(op->getOperandTypes(), [](Type operandType) {
+      return llvm::isa<RankedTensorType>(operandType);
+    });
+  }
+
+  /// A utility method that returns if the given operation has a dynamically
+  /// shaped result.
+  static bool returnsDynamicShape(Operation *op) {
+    return llvm::any_of(op->getResultTypes(), [](Type resultType) {
+      return !llvm::isa<RankedTensorType>(resultType);
+    });
+  }
+};
+} // namespace
+
+/// Create a Shape Inference pass.
+std::unique_ptr<mlir::Pass> mlir::dsp::createShapeInferencePass() {
+  return std::make_unique<ShapeInferencePass>();
+}
+
+
diff --git a/mlir/lib/Dialect/DSP/Transforms/CMakeLists.txt b/mlir/lib/Dialect/DSP/Transforms/CMakeLists.txt
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/mlir/lib/Target/LLVM/CMakeLists.txt b/mlir/lib/Target/LLVM/CMakeLists.txt
index e0657c895e8a..8ab802b86130 100644
--- a/mlir/lib/Target/LLVM/CMakeLists.txt
+++ b/mlir/lib/Target/LLVM/CMakeLists.txt
@@ -21,6 +21,16 @@ add_mlir_library(MLIRTargetLLVM
   MLIRTargetLLVMIRExport
 )
 
+if ("Hexagon" IN_LIST LLVM_TARGETS_TO_BUILD)
+  set(Hexagon_LIBS
+    HexagonAsmParser
+    HexagonCodeGen
+    HexagonDesc
+    HexagonDisassembler
+    HexagonInfo
+  )
+endif()
+
 if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)
   set(NVPTX_LIBS
     NVPTXCodeGen
@@ -37,6 +47,7 @@ add_mlir_dialect_library(MLIRNVVMTarget
 
   LINK_COMPONENTS
   ${NVPTX_LIBS}
+  ${Hexagon_LIBS}
 
   LINK_LIBS PUBLIC
   MLIRIR
diff --git a/mlir/lib/Tools/mlir-opt/CMakeLists.txt b/mlir/lib/Tools/mlir-opt/CMakeLists.txt
index f24d4c60174e..5c0c0044029e 100644
--- a/mlir/lib/Tools/mlir-opt/CMakeLists.txt
+++ b/mlir/lib/Tools/mlir-opt/CMakeLists.txt
@@ -13,4 +13,5 @@ add_mlir_library(MLIROptLib
   MLIRPluginsLib
   MLIRSupport
   MLIRIRDL
+  MLIRDSPDialect
   )
diff --git a/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp b/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp
index 44c5e9826f3b..ca27c690dc89 100644
--- a/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp
+++ b/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp
@@ -49,6 +49,8 @@
 #include "llvm/Support/ThreadPool.h"
 #include "llvm/Support/ToolOutputFile.h"
 
+#include "mlir/Dialect/DSP/IR/DSPDialect.h"
+
 using namespace mlir;
 using namespace llvm;
 
@@ -69,6 +71,8 @@ class BytecodeVersionParser : public cl::parser<std::optional<int64_t>> {
   }
 };
 
+
+
 /// This class is intended to manage the handling of command line options for
 /// creating a *-opt config. This is a singleton.
 struct MlirOptMainConfigCLOptions : public MlirOptMainConfig {
@@ -172,6 +176,7 @@ struct MlirOptMainConfigCLOptions : public MlirOptMainConfig {
             cl::location(generateReproducerFileFlag), cl::init(""),
             cl::value_desc("filename"));
 
+
     /// Set the callback to load a pass plugin.
     passPlugins.setCallback([&](const std::string &pluginPath) {
       auto plugin = PassPlugin::load(pluginPath);
@@ -501,6 +506,7 @@ mlir::registerAndParseCLIOptions(int argc, char **argv,
   registerPassManagerCLOptions();
   registerDefaultTimingManagerCLOptions();
   tracing::DebugCounter::registerCLOptions();
+  registerDSPOptions();
 
   // Build the list of dialects as a header for the --help message.
   std::string helpHeader = (toolName + "\nAvailable Dialects: ").str();
diff --git a/mlir/test/Examples/DspExample/conv2d/README.md b/mlir/test/Examples/DspExample/conv2d/README.md
new file mode 100644
index 000000000000..978311729bb0
--- /dev/null
+++ b/mlir/test/Examples/DspExample/conv2d/README.md
@@ -0,0 +1,5 @@
+Convolution Formula
+$$
+output(i, j) = \sum^{k-1}_{p=0}\sum^{k-1}_{q=0}input(i+p, j+q) * kernel(p, q)
+$$
+
diff --git a/mlir/test/Examples/DspExample/conv2d/conv2d b/mlir/test/Examples/DspExample/conv2d/conv2d
new file mode 100755
index 000000000000..7a7ce664e382
Binary files /dev/null and b/mlir/test/Examples/DspExample/conv2d/conv2d differ
diff --git a/mlir/test/Examples/DspExample/conv2d/dsp_conv2d.c b/mlir/test/Examples/DspExample/conv2d/dsp_conv2d.c
new file mode 100644
index 000000000000..e5462e61c038
--- /dev/null
+++ b/mlir/test/Examples/DspExample/conv2d/dsp_conv2d.c
@@ -0,0 +1,97 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+void freeArr(int size, int** arr) {
+    for(int i=0; i<size; ++i) {
+        free(arr[i]);
+    }
+}
+
+void conv2d(int INPUTSIZE, int KERNELSIZE, int** a, int **b, int **c) {
+    int outputsize = INPUTSIZE-KERNELSIZE+1;
+    for(int i=0; i<outputsize; ++i) {
+        for(int j=0; j<outputsize; ++j) {
+            c[i][j] = 0;
+            for(int p=0; p<KERNELSIZE; ++p) {
+                for(int q=0; q<KERNELSIZE; ++q) {
+                    if(i+p >= INPUTSIZE || j+q >= INPUTSIZE) continue;
+                    c[i][j] += a[i+p][j+q] * b[p][q];
+
+                }
+            }
+        }
+    }
+}
+
+int main() {
+
+    const char* filename = "input.txt";
+    FILE *file = fopen(filename, "r");
+
+    if(file == NULL) {
+        printf("error opening file");
+        exit(1);
+    }
+    int* inputrows, *inputcols, *kernelrows, *kernelcols;
+    int rows=0, cols=0;
+    fscanf(file, "%d %d", &rows, &cols);
+
+    inputrows = &rows;
+    inputcols = &cols;
+
+    printf("input size %d, %d\n", *inputrows, *inputcols);
+    int** a = (int**) malloc((*inputrows)*sizeof(int*));
+    for(int i=0; i<(*inputrows); ++i) {
+        a[i] = (int*) malloc((*inputcols)*sizeof(int));
+    }
+
+    for(int i=0; i<(*inputrows); ++i) {
+        for(int j=0; j<(*inputcols); ++j) {
+            fscanf(file, "%d ", &a[i][j]);
+        }
+    }
+
+    int krows=0, kcols=0;
+    fscanf(file, "%d %d", &krows, &kcols);
+
+    kernelrows = &krows;
+    kernelcols = &kcols;
+
+    printf("kernel size %d, %d\n", *kernelrows, *kernelcols);
+    int** b = (int**) malloc((*kernelrows)*sizeof(int*));
+    for(int i=0; i<(*kernelrows); ++i) {
+        b[i] = (int*) malloc((*kernelcols)*sizeof(int));
+    }
+
+    for(int i=0; i<(*kernelrows); ++i) {
+        for(int j=0; j<(*kernelcols); ++j) {
+            fscanf(file, "%d", &b[i][j]);
+        }
+    }
+
+    fclose(file);
+
+
+    int outputrows = (*inputrows) - (*kernelrows) +1;
+    int outputcols = (*inputcols) - (*kernelcols) +1;
+    int** c = (int**) malloc((outputrows)*sizeof(int*));
+    for(int i=0; i<outputrows; ++i){
+        c[i] = (int*) malloc(outputcols*sizeof(int));
+    }
+    printf("output size %d, %d\n", outputrows, outputcols);
+
+    conv2d((*inputrows), (*kernelrows), a, b, c);
+
+    for(int i=0; i<outputrows; ++i) {
+        for(int j=0; j<outputcols; ++j) {
+            printf("%d ", c[i][j]);
+        }
+
+        printf("\n");
+    }
+
+    freeArr((*inputrows), a);
+    freeArr((*kernelrows), b);
+    freeArr((outputrows), c);
+    return 0;
+}
diff --git a/mlir/test/Examples/DspExample/conv2d/dsp_conv2d.mlir b/mlir/test/Examples/DspExample/conv2d/dsp_conv2d.mlir
new file mode 100644
index 000000000000..46133202aa25
--- /dev/null
+++ b/mlir/test/Examples/DspExample/conv2d/dsp_conv2d.mlir
@@ -0,0 +1,5 @@
+module {
+    dsp.func @main() {
+
+    }
+}
diff --git a/mlir/test/Examples/DspExample/conv2d/dsp_conv2d_op.py b/mlir/test/Examples/DspExample/conv2d/dsp_conv2d_op.py
new file mode 100644
index 000000000000..ccdec232eedd
--- /dev/null
+++ b/mlir/test/Examples/DspExample/conv2d/dsp_conv2d_op.py
@@ -0,0 +1,18 @@
+def main() {
+        var a = [[1,2,3,4,5,6,5,4,3,2],[1,2,3,4,5,6,5,4,3,2],[1,2,3,4,5,6,5,4,3,2],
+                 [1,2,3,4,5,6,5,4,3,2],[1,2,3,4,5,6,5,4,3,2],[1,2,3,4,5,6,5,4,3,2],
+                 [1,2,3,4,5,6,5,4,3,2],[1,2,3,4,5,6,5,4,3,2],[1,2,3,4,5,6,5,4,3,2],
+                 [1,2,3,4,5,6,5,4,3,2]];
+
+
+        var b = [[1,0,1],
+                 [0,2,4],
+                 [3,6,8]];
+
+        var c = [0];
+
+        var d = conv2d(a, b, c);
+        print(d);
+}
+
+# tosa.conv2d input weight bias : pad, stride, dialation, quant, local_bound
diff --git a/mlir/test/Examples/DspExample/conv2d/input.txt b/mlir/test/Examples/DspExample/conv2d/input.txt
new file mode 100644
index 000000000000..e484861cc482
--- /dev/null
+++ b/mlir/test/Examples/DspExample/conv2d/input.txt
@@ -0,0 +1,10 @@
+10 10
+1 2 3 4 5 6 5 4 3 2 1 2 3 4 5 6 5 4 3 2 1 2 3 4 5 6 5 4 3 2
+1 2 3 4 5 6 5 4 3 2 1 2 3 4 5 6 5 4 3 2 1 2 3 4 5 6 5 4 3 2
+1 2 3 4 5 6 5 4 3 2 1 2 3 4 5 6 5 4 3 2 1 2 3 4 5 6 5 4 3 2
+1 2 3 4 5 6 5 4 3 2
+
+3 3
+1 0 1
+0 2 4
+3 6 8
diff --git a/mlir/test/Examples/DspExample/dsp_abs_argmax.py b/mlir/test/Examples/DspExample/dsp_abs_argmax.py
new file mode 100644
index 000000000000..bd8673fd1aed
--- /dev/null
+++ b/mlir/test/Examples/DspExample/dsp_abs_argmax.py
@@ -0,0 +1,14 @@
+def main() {
+        # var time = [0.0, -0.25, 0.5, -0.75, 1.0];
+        var time = getRangeOfVector(0, 100, 0.01);
+        var antennas = 4;
+        var freq = 5;
+        var weights = [1, 7, 6, -7];
+
+        var signal = beam_form(antennas, freq, time, weights);
+        var abs_signal = abs(signal);
+        var power_abs_signal= abs_signal * abs_signal;
+        var max_power_angle_idx = argmax(signal, 0);
+
+        print(max_power_angle_idx);
+}
diff --git a/mlir/test/Examples/DspExample/dsp_beam_form.py b/mlir/test/Examples/DspExample/dsp_beam_form.py
new file mode 100644
index 000000000000..bf4094812185
--- /dev/null
+++ b/mlir/test/Examples/DspExample/dsp_beam_form.py
@@ -0,0 +1,10 @@
+def main() {
+        # var time = [0.0, 0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875, 1.];
+        var time = [0.0, 0.25, 0.5, 0.75, 1.0];
+        var antennas = 4;
+        var freq = 5;
+        var weights = [1,2,3,4];
+
+        var signal = beam_form(antennas, freq, time, weights);
+        print(signal);
+}
diff --git a/mlir/test/Examples/DspExample/dsp_biomedical.py b/mlir/test/Examples/DspExample/dsp_biomedical.py
new file mode 100644
index 000000000000..cbb8ca24af17
--- /dev/null
+++ b/mlir/test/Examples/DspExample/dsp_biomedical.py
@@ -0,0 +1,64 @@
+def main() {
+  var input_signal = [0.09391447818349865639, -0.64124217719704934559, -0.62692484054281838457, 0.01275379584596916457, 0.54796006228603910682, -0.78928997929850563953, 0.66584321589266248775, -0.27261286333223494482, 0.37735143885667149499, -0.04032548100483641929, 0.34760746338597903193, 0.08750261364008603271, -0.39336237038669169541, -0.13992208725293370231, -0.03000789137945208962, -0.66949358566426153683, 0.39060269895958293906, -0.55498474667461572540, -0.88563294524087721182, 0.00673558649672073573, -0.29968009344275092776, -0.37167151746433341186, 1.07130673806743748067, 0.38586275933191216403, 0.77598317195055210860, -0.79294992706261857585, -0.78001185531780892202, 0.11210388464987396107, 1.05424296277589624182, 0.76981354956633185616, 0.25523734290301064442, -0.33809204260204811510, 0.95315166684373753281, 0.26008564468881201215, 0.58876648378181939414, -0.28087402961904706089, 1.19144681461698231928, 0.29609750057489769848, 0.57464455208245968088, 1.02435938662426684331, -0.45253993595583219545, -0.95427439150047754790, 0.46238011484697150166, 0.63183510683658594687, 0.53634500920042937899, -0.03109995126536169163, 0.89267964646365993708, -0.82939704833142924301, -0.85373890824349696338, 0.02342685250975734546, 1.57212071365485273056, 0.87342602291143256465, 0.08831370586921122312, -0.00522271222803732948, 1.03672540114726974281, 1.20585550366192695293, 0.41987842758305826685, -0.04249863204181153487, 0.52099665030295705392, 0.75058470154504597360, 0.58531608098388143713, -0.00357873971817451730, 0.59643153109790347788, -0.30127231636450024777, -0.68453395622814872112, 0.09085843420893024991, 0.89552565900418334977, 1.03352745353178310239, 0.03549419301161099716, 0.13406804741697736505, 1.07174666641970217640, 0.44930092518113484701, 0.82884702462989623406, 0.93881425701790854887, 1.07278612567036213399, 1.05200572436050299174, 1.41523562096756005957, -0.30215587638575314156, 0.99853990681816440222, 0.22830091801084728687, 0.69366683328045930512, 0.54950026084627878742, -0.08387356113840860328, 1.35760915527749470577, -0.35237766809126924716, 1.25736877180850781244, 0.60846883126887585380, 0.23451043722395237268, 0.86880181953028123853, 0.36388595731515288589, -0.41257475901782725280, 0.71332566240772687927, 0.01032871329030404794, 0.66221095583151512454, -0.16580117687027862150, 1.96599902259062897514, 0.86107091313818795442, 1.11338758436425022680, 0.16692826424335627244, 0.24917397082510700512, 1.18135909673451511281, -0.20480523217480139930, 0.79124737211770845846, 0.74473823029637753468, 0.92235363553196125785, 0.04658385900091366327, 0.37422271257771638764, 0.30246172702415369260, 0.24478546810998436101, 2.13083118288844275412, 0.40090204635422022061, 0.57312477551478979709, 1.00684232460216649407, 0.96751420028080148406, -0.00274662496819366719, -0.08841483691441620607, 0.27238515446718297719, 0.53945213787503276670, 0.35216609373673479766, 1.00940046570138730608, 0.18925861730019849505, 0.74898446646354943645, 0.88221219656386962171, 0.66230951624880241013, 0.84452427375177152591, 0.07435668420638441578, -0.40286633344518696376, 1.07555891840149397964, 0.27408743073695868153, 0.70833585831952705636, -0.01359718554712519012, 0.64373137902516897046, 0.18041402857681132499, 1.07600669754191957672, 0.01254815239008155725, 0.50956270613537224534, 0.26091430357024164577, 0.40485342256217338175, 0.60233666990861567392, -0.06443275514685653516, 0.72206666860667678698, 0.47224775392542356345, 0.56810948893010992933, 0.63542653848075181244, 0.85181893332911007288, 1.52499833767253267780, 1.20240311020337342640, 1.75257181193744404624, 1.08048609612313439854, 1.67772856282504001513, 0.21610969109458477710, 0.71221576711241718627, 0.79498695985871081771, 1.35693571799292733715, 0.66977105345721210661, 0.34462686256106334071, 1.14407258429036895464, -0.02033206662019215738, 0.10743833106353362083, 0.49076436204561485477, 0.04174717139575079283, 0.69375763570386850265, 0.43035171410453459950, -0.16812265217504418491, 1.33093470069918273779, 0.94152996852788573001, 0.27877102412528109721, 0.82036503853332320979, -0.18371902877937484888, 0.67018735843625520943, 1.11429317127647697916, 0.07638535349681990771, 1.03073958473393467727, 0.42073128705144374750, 0.30674532050030628438, 0.15055276546732965226, 1.18099050232762214918, 0.53042461540071383919, 0.98830245141143269194, 0.89188839391060403816, 0.06308057398534394622, 0.30607413082411127903, 1.51947758474885685764, 0.46117930529971040698, 0.21490151600731086101, 1.16866646449455546808, 0.38310345186209809887, 0.99245619194688372566, 0.29338498199476659156, 1.04983466056790231136, -0.29417527307010643556, 0.42320932380766290715, 0.91148161047995879258, 0.89083923211745941195, 0.57708418406216699381, 0.13524234825948835970, 1.38350585633078604353, 0.35012429855462162687, 0.12492784301101086952, 0.86948955446786446721, -0.23363142145282583817, 0.13006214044203068259, -0.04237612600026541276, 1.29637329899679976108, 0.06511784322398822544, -0.05396327078496410135, 0.08649095343532944846, -0.03019685673215311006, 0.12869637596860863948, 0.87757529019843394380, 0.66307910175066420155, 0.40068018218527812202, 0.74495813212544859105, 0.52641988883382118125, 0.04587903441341781297, 1.55445277963515593456, 0.21795354956207602104, 0.66220727253425359926, 1.19330925522208097789, 0.56162748354815572593, -0.03637749633366196833, -0.11473777255928022112, 0.49850689042882989632, 0.48450914417960433411, 0.48006054513872209455, -0.09103515636425568713, -0.44671221471309074946, 0.03883974102923495875, -0.09902453878002623333, 0.22665978819822624191, -0.14781589853391158274, 0.12814687056024970380, 0.74779135850382782991, 0.28311537345191273296, 0.26213848869667094599, -0.49957813537844081297, 0.21023316934907945730, 0.64669440801895716753, 0.82290947709401607302, 0.68280624182231619557, -1.00393170190676528364, 0.67513776050600649015, 0.69036228407902866522, 0.11649958830362899809, 0.69222583131505177612, -0.09959226938600301837, -0.18366668033768301926, -0.34718225118477052948, 0.86918378471540724828, 0.10457675545881890689, -0.08112797434002502750, -0.30121694535291232420, -0.36331682540476500920, -0.01402990854082046546, 0.34988315584042600781, 0.47426224451741366694, -0.18238113027436977931, 0.08356985576051237374, -0.90990068695001502519, 0.10139169785108301247, -0.38097649980263087466, 0.07659981868350396184, -0.13459034341996067852, -0.39976968194571321780, -0.15335282338983952100, -0.96380372499736699510, -0.08126735377012267447, -0.66339548960312155756, 0.40780227006908020826, 0.21545625563132370095, -0.81790616421192274821, -0.41973790158210566581, -0.21577637826951218125, -0.31702579590228352480, -0.53829852397056754398, 0.13593242682932568655, 0.16686764477995624789, -0.27671306106853527096, 0.23329674564842900475, -0.64777511546045407620, 0.78655689880155288396, -0.09916109332330380610, -1.18157251114269135073, -0.53737476591643684731, 0.63558511687284013902, 0.12734842675935503786, -0.29440659726853202205, -0.35299739792147272199, -0.83958229257499272791, -0.41558192538630339685, -0.40064488065156012020, -0.65295643709666051357, -0.22025448222512514218, 0.56061229515359678466, -1.36527202576251771582, -0.29156694772510516334, -0.88987509089879268664, 0.01180144258537074720, 0.26937606131898178541, -1.25642984477524022324, -0.95022992934930905484, -0.62239154318497691420, -0.38126083433599333716, -0.73552201540328532303, -0.02369511384384270780, -0.14674273530927686715, -0.20938454299185485508, 0.21499373480923594792, 0.03746983275381166045, 0.20980920849842521481, -0.54573564261711282608, -1.11676615004888324378, -0.88545084578605559589, -0.35611253732666503513, -0.83390145297159468107, 0.48746053767095731679, 0.03577465011813768525, 0.00064389610326015534, -1.00140152908365243078, 0.06069516585161383038, -0.37820149287207854849, -1.19940629878695803612, 0.05636693293195671473, -0.01751036819897971997, 0.31880741543437030039, -0.71248848916092244465, -1.28804130204737976584, -1.54816235848471395187, 0.54880538881778428983, -1.46043065834484209198, 0.69559126826421291412, -0.21283142192574650009, 0.16289665298283795281, -0.47662645479827542294, -2.25434984166571927844, -0.88873151768656477589, -0.59401620321138248748, -0.81795366125250534850, 0.47629367630143915058, -0.35857899725000974556, 0.18517240533072298891, -0.49564855820955827648, -0.81292404086702330090, -0.32544371466954902239, -0.44049054672524179388, 0.30361834048874125624, -0.48822245864726809828, -0.89163372212238756198, -1.04082507957091241479, -0.68596155648419088191, -0.01242381969128125707, -0.32096073816043774185, 0.20368801378914525024, -1.41738513979201719017, 0.29666789496311263274, -0.31327344976759824435, -0.92424562580202329620, -0.26613836235289278775, -0.73971838195852235298, -0.90555462004205422577, -0.59034415382356575375, -0.89232121241320894178, -0.30538880163070247775, -0.87041535127760816248, -1.19016738674721911373, -1.41287769516728523023, -0.50391634614517799573, -0.36750881117098466166, -0.63368398514766988150, -0.60479368924704279831, -0.17734427699328669803, -0.81015618196224004066, -1.21869277783517127034, -0.81623748824757369480, -1.73566455574227251901, -0.73717351758885385493, -0.82531839118386907117, -1.22154483751334375086, -1.24855964808297348334, -0.44002199156843546657, -1.22448876377036564023, -0.22977659797195010283, -0.62096768146173564329, -0.31639236618947585988, 0.25187626212062874664, -0.05298191473342050983, 0.39146477260910061968, -0.68455484858757209565, -1.24844962881476728711, -1.46467419566822787402, -1.33500745061883518972, -0.90838296252963157684, -0.02397821508985376049, -0.61091235595626802724, -1.00514814907315286341, -0.10597556387550471912, -0.96061128866199330112, -0.71216561614460516250, -0.57519660622536372685, 0.44731217162472369786, -0.67635375106683826019, -0.30873008080153191424, -0.59738703293825223817, -0.20080833788644053550, -0.73784628421693565503, 0.26333665419794693996, -0.87165557031953389000, -1.46836077827615874014, -0.59544305595777169415, -0.69719097849295363911, -0.26503783515772022517, -0.09828769484448379723, -1.15363125001043331785, 0.48624669319997126937, 0.16109926301438781682, -1.15772019929848890563, -0.78430583908825224349, -0.93514655927017831250, -0.97152047788574935616, -0.62783203634544915150, 0.51877683006390518106, -0.63313658485377166674, -0.18544261249061677610, -0.44739587023791765885, -0.39615786539984731984, -0.50967405481174798965, -0.87069351788568827288, -0.20651260136271382128, -0.59619216152806697728, 0.10837015868786969985, -0.46777181362743069926, -0.68654777888832185795, -1.31268762105612846192, -0.08497503165402225944, -0.79088415747031937109, -0.34625378271578322043, -0.60146657373573242378, -0.21891836744752238286, -0.35167484038966401183, 0.18207642738929680215, 0.13492392088026150532, -0.63194331858864094542, -0.55971156886414497045, 0.09916598098646317583, -0.62479185334231746030, -0.43798309168051197116, -0.37637712780106136190, -0.24987530928236750816, -0.26614567524735260884, -0.32561367632572751907, -0.34756741137191243807, 0.24780539951632141182, 0.06774691059140852678, -0.44253184142155788106, -1.03969481897534365977, 0.31472695855053045566, -0.41071929870994539069, -0.24836871646688729598, -0.23909019647305618239, -0.80411663944202338428, -0.04676479567365990353, -0.50618658450337150612, 0.47565640770866507436, -0.85575052766018355754, 0.16634943731885259988, 0.04267977706960690565, -0.19243319151588628158, -0.09119799423208369005, 0.03689509285176600928, -0.16856707697524808687, 0.33190162337593343000, -0.02069330709524677681, 0.57947880919016048207, 0.33866322403833648558, 0.33204546002693191564, 0.72386485601748906671, -0.11025426153583406386, -0.27637873310954774730, -0.65127404759752138830, -0.02634955325616653021, 0.03698357634568788521, 0.12154623620586629817, -0.65698312732799490909, 0.23389685342379037958, -0.06842344392905776174, -0.12181049313552021196, 0.16792284051384387400, -0.54731101866909803721, -0.99040774674814424650, 0.45100178560647108394, -0.34767172826770059713, 0.13675660748668322375, -0.29145016654095212694, 0.52255676395979155924, -0.23657162543300502833, -0.29056485455079772429, 0.09959233359893603088, 0.25138937361859053921, -0.31911367026500114807, -0.02309901535220839838, -0.57136546661336995445, 0.32965074638705621490, -0.03020931513326359855, -0.46786731483690480182, 0.35501428706646243061, 0.15231646898751408092, -0.31895366568708266586, 0.15129929704293607062, -0.65603816215470944773, 0.75352720782284587919, -0.25418491906215479048, 0.29370172539024125280, 0.03072742359411854973, -0.23518486234546398528, 0.26393147004952749457, 0.37146726223233272890, -0.04605017166034018616, -0.11026459070937078000, 0.65285145882287509700, 0.24608731352028911932, 0.16128893313020575828, 0.32767366619754650925, -0.11556779579503723143, 1.22094072723894697674, 0.03288541195173211307, -0.38220816145034663958, 0.15930392061996195840, -0.46343697010506323952, -0.37930264910590577587, 0.02049424196119797870, 0.49648268963256059205, 1.16901458762458365648, 0.76381001003125315041, 1.05059169075396674486, 0.58274384296521430926, 1.29957383063246645349, -0.39081944153130132591, -0.00828338870021660956, 0.67888149100840178285, 0.16805235729229656139, 0.30195193985438301132, 0.63050119820604666465, 0.63733704537240387644, -0.57749374530399000260, -0.35385722793035584433, 0.75668345869658870662, 0.05556119566548189947, 0.60285281734999629499, 0.15614089160276509172, 0.96114926205869277709, 0.93851453738296464202, 0.40280429267986600506, 0.85497180310963405425, 0.74969166145720367300, 0.27537987923550882874, -0.23647886335430295945, 1.65182193613133287968, 0.63383095095227726112, 0.28560325628648525198, -0.30990561385303128095, -0.10786250782120571090, 0.95908988385452542680, 0.37736624132653573405, 1.09052721780124262096, -0.10660060041120339447, -0.32892447107336503231, 0.60817493296617686216, 0.43021505517392788498, 1.61821035276514990642, 0.54477951722555317815, 0.35666666174014777235, 0.78430076702777129505, 0.16815778845823142751, 0.11874999127484686667, -0.10161527010270526850, 1.67704098849149940698, 0.32471279022608268150, 0.97906593283073506395, 0.91398049191643604416, 0.21323403690008690958, -0.27088573294525453150, 0.74837498606452812488, 0.58580153921460897948, 1.07919759964903771987, 0.96527893727244351574, 0.65470892361208399279, 1.06094344481275126846, 0.47974289775389694634, -0.57465329651194774208, -0.06726551645793121814, -0.40384525149856453918, 1.13319172168718207416, 0.81854368092370854981, 0.43107006058438906981, 1.94257837461359317288, 0.39715216454709162397, -0.31250108601998904945, 0.50068921075657857322, 0.51267154616787924404, 0.72290581264439679465, 0.08486945858838118584, -0.29663651966683368677, 0.85171121658293813539, 1.25979348611680652681, 1.53508784300984890692, 0.70814832538912309001, 1.40882928623625147679, 0.90698698962461810069, 0.06520128622007892449, 0.60243418152429972778, 1.08521232950244739257, 0.43887975755868213756, 0.09468144750091883610, 0.43382060006497535909, -0.32870353696341714222, -0.72342562422811018674, 0.10286397936580260470, -0.15393787968756500462, 0.51179539391266559711, -0.76212200713161559751, 0.56815672534823347117, 1.16462610045649039847, 0.77420827708620698626, 0.47295443544213877640, 0.66362198552546070029, 0.90618235864173979355, 0.20587654685706341384, 1.58637571532921528572, 0.50304274480733468522, 0.01932089536424286269, 1.47509003244507819197, 0.88361088382580499356, 0.68279106244174592177, 0.56418963565966717688, 0.70457189924840935014, -0.17819825776596964761, -0.11070632123069457098, 0.11371389598443987223, 0.71008653044706404600, 0.19359997299505299351, 1.01412949649275740960, 1.20064450529159705283, 0.68307093713609878805, 0.44608459653411147716, 0.40939474607096004721, -0.21849120511986164228, 1.07705145700775362094, 1.32399648254871982189, 0.48518921060314901261, 0.26290570979670868645, 1.09455144274684057493, 0.66329832939853605733, 0.51502487241697270104, 1.84952501983567363375, 0.63412405532441706857, -0.58498638934369362463, -0.12720178496631617815, 0.21784823048977508542, 0.71785313033006459271, 0.69398488965166227338, 0.22843215428123697652, -0.24661699415875149022, 0.63324764415332412959, 1.16666455731365914517, 0.37103271662991066382, 1.62854185534315965000, 0.97837761064670292210, 0.53433772871772389212, 1.30298468587362359372, 0.14984775239147868353, 0.16680046538045983917, 0.04624082996253969791, -0.09838926588846697019, 0.67423353174909239627, 0.44833469547295246027, 0.94870278554381082259, 0.09011477412278590116, 0.35872783269724967115, 0.74597927445065370655, 0.88906412208582941137, 1.20223147682499797995, 0.83889857334782325093, 0.19639857280995798305, 0.29633108661910012582, 0.13394872813537017642, 0.89930537007226507562, 0.28223772771017485717, 0.06507541202294986338, 0.12513048146999022903, -0.51420080474851326269, 0.82669183979348526137, 0.18407706116731195611, 0.76683410423421682900, 0.27236159250375457930, 0.59352672470201062183, 0.12175177304518453036, 0.49682314357094914392, 0.31933862959148190397, -0.15814531699753264782, 0.82633775945555643094, 0.46511523815113831049, 0.08511086890516156300, -0.38175802102994244036, 0.09903809495245480266, 0.67708302151144195147, 1.06541520173828807572, 0.23364913546208304629, -0.24003830930518754716, 0.54917244085180938473, 0.46317942795325894867, 0.62779123745540765800, 0.03865105035465379868, 0.25848677198291847290, 0.00155442282629963957, -0.65499230361440929915, 0.89744372030487995495, 0.13647508950874137623, 0.41221601238139271572, 0.27702923652431316048, -0.35816066673235491535, 0.75864850069989042680, 0.67907857540806526586, 0.28038372471750511172, 0.08699457210465372237, 0.11588773954575359859, -0.18435272959900173007, -0.23813710840376914324, 0.40616067166699021396, 0.71812990191932279949, 0.87202171480210743937, -0.00394196304526492347, 0.45949421208669183336, 0.11399441515224424670, 0.01578544977855123044, -0.76977634304948838739, 0.59493053657595895700, 1.05804457408179142242, -0.10965460345403242604, 0.08914082612150435458, -0.09201204880445953971, -0.15708021790070347823, -0.25149571930419306609, -1.43216396822243230424, -0.54739332001712270870, 0.01703196914610338383, 0.51339111362239320258, 0.43673143448093820762, 0.98468695364015501603, -0.51921001504189967513, -0.22850151833314996663, -0.26664513715991156051, -0.32553967882401441125, 0.83671086338679157013, 0.51563421542179765922, 0.29816415795593559368, 0.50117053632075792002, 0.84962569460146808442, -0.03267741466316731458, -0.39945376151391703834, 0.07388418970518892404, -0.87598255738672736737, 0.19340106816875124451, 0.20517324112231416500, -0.09085207951360414758, -0.04427508635881688415, -1.33341759575567975205, 0.83102654649241824991, -0.18790478792572853917, -0.15770166589326092832, -0.45054830173095539170, 0.18488056411850076199, 0.08322096258840833150, -0.42664303977515938282, -0.53787289887621847484, -0.19051767632382013429, -0.49444347588070913790, 0.27849542027323925808, -0.46507611317334673906, -0.78022820949513504996, -0.78648474213990393622, -0.53315855927644606460, -0.58105499980969166351, -0.17333224228235188935, 0.51222466108513375893, -0.57659240367564823515, -0.11984300847394518952, -0.04788045976587446129, -0.20051220226401705871, -0.01083274783704307742, -0.31255905382773557788, -0.99381352665865285712, -0.31084569334385603323, -1.27498316092423547019, 0.10297921465603160351, -0.55042974238424013933, 0.60773084135225385882, 0.50904106416120264988, 0.13646592906296600711, -1.21188659797317210831, -0.24886077549182333835, -0.12449904954125545764, -0.18584442292001071562, -0.83518412859788238034, -1.14853672897300196354, -0.63928607167665618682, -0.29133646858613371222, 0.04412143231880161220, -0.28308176206361435057, -0.96521488844779201877, -1.17540718283114764553, -0.84594334961251183014, 0.01338573217506372481, -1.02404735143833680588, -0.16557617856716433380, -0.63615437043058176148, 0.55070920451357818770, -0.38258787859814979981, -0.65367739893604626111, -0.57694885957002395305, -0.52716767464042402214, -0.36928737910911046960, -0.88984956730060593699, -0.68897822848007184149, -0.38993291082631675870, -0.13056741986821612800, -0.45585873287463196668, -0.36360557668993753744, -0.44754929370200391414, -0.80088641268058213818, -0.95025630431723484559, -0.06416838811955027921, -0.83735830334533134511, -0.41267254068397934974, 0.05041642421019609799, -0.21498168180080301504, -0.75025452531453495908, -1.43426249868931687814, -1.04688160199016833118, -0.41544211185661900076, -0.51859832862969068579, -0.24453283056657942884, -0.83145529444733456970, 0.05613704187140888813, -0.27058269219285879803, -0.66052516057898391644, -0.04967363310500094720, -0.33263230602671656211, -0.66402953361687944156, -1.27254415735487036443, 0.01893368184837762591, -0.92869422146093483228, -0.66732815041430093572, -0.33965456772052349255, -1.48758620046650724689, -0.04424553583197587958, -0.99397117822985392444, 0.59147907639285945969, -1.12643041123386766778, -0.73673691699023269308, 0.00375678375478749604, -1.09348318625038043450, -0.69539002553094930725, -0.24365605229906012186, -1.21367244273211460914, -0.49283693071272016706, -0.38284092767356758813, -0.50684889913463460065, -1.03979809095839992317, -0.19289905896394449192, -1.23591596626342292709, -0.09992896929193662015, -0.14470189404088135143, -0.64575705346167300291, -0.61485128052609239191, -0.63013209661839864051, -0.95266195469567915843, -0.63122134787105499409, -1.01105848706594603215, 0.09142568942557105505, -0.79557763252826063649, -1.42128572962394916779, -0.04969528838534675863, -0.66970398980508527575, -0.66346949878505201426, 0.42012805576946909980, -0.59236011640340546780, -1.01640648480558359879, -0.49855586069068164035, -0.45112471462941894362, 0.68481331647284937603, -1.15426191896366381329, -0.43903366705538526826, -0.70953963627570171280, -0.19234278438059781990, 0.05621003079617981069, -0.12639883920454220156, -0.64932571362080160338, 0.18127118652588747327, -0.93261186143519003711, -0.87452256849662068916, -0.27909031344600609970, -0.71355120992241216626, 0.18301867544720207270, -0.30318289804925618869, -0.89354496774281710891, -0.62062692027051213906, -0.23997589466504215538, 0.45529132361003654683, -1.28380248176209565969, -1.35630225180059293955, 0.19373940774267606191, -0.61701227638667655029, -0.22996907981293590595, 0.05634378817569174469, -0.42194633953715060537, -0.94948169559356454528, -0.07672660933748132184, 0.17694830393342064756, -0.69862722856941850136, 0.05720548092465838064, -0.80633341027926463962, -0.54886498837083474989, -0.38808228952331946804, -0.74949394631625421148, -0.92297612818782914790, -0.14140335901965322973, -0.49718279551560035090, -0.16903465149972229931, -0.46842226074622544951, -0.52317254041778182838, 0.11592033640718296672, -0.56015153876511480924, -0.34266840878899940126, -0.63566490022022070772, -0.72932252304392997111, -0.66433874166995543487, 0.17360289036878884250, -0.67486392124364169565, -0.87695946803756941179, -0.09249844471771012655, 0.81491004054074300811, -0.57092445801769753366, -0.61654172308179244766, -0.64463604493765325198, -0.78205312771126989091, 0.05979578029258814098, -0.75704473577607434009, -0.11453836023367375274, -0.23411117231350331291, 0.81753015221661273770, -0.24400262921861648469, -0.39544195615696625667, -0.18523537039471940635, 0.07209007023555058513, -0.28480757194405953436, -0.23178555909489118037, -0.53173334352921508561, -0.28882518748441543543, 0.36566886681460769593, -0.49086620968944177923, -0.00005392053749875281, -1.57820763416456855133, -0.38947989234944668802, -0.77623664625153510066, 0.55335154523015683825, 0.15081153974259831063, -0.40393068891410172094, -0.24297661283977109448, -0.42435551405553084692, -0.76159519721459278951, -0.06557676273038465231, -0.91619915326210832340, 0.01131345331409622235, 0.15271380104717968695, 0.97208703428564247062, -0.47661547820231053851, 0.10496816928053179230, -0.55150210444442260282, -1.22540454332020942729, 0.09385423092486136487, 0.82136067668053747148, -0.21724391924306540869, -0.32919750351170340430, 0.32626385761997278578, -0.37614009163759382082, -0.13362758078851746935, 0.46769360868061649050, -0.00334504022973797221, 0.03603765742757472634, -0.20414388585299902967, 0.06446169025841284872, -0.71915608907066441713, 0.19580459991832444011, -0.32740313679898697119, 0.73762866233572765839, -0.14450339193391525949, -0.32662181933416767343, -0.44846920491641539819, -0.05760980789281087283, -0.25819903448095510301, -0.53911420270669396793, -0.54523032791233505545, 0.21185377552948567170, 0.02693456397615351383, -0.36877834599828718254, 0.09387714561014448567, -0.16700559220905045188, -0.82082847777071488160, 0.91161858323264743920, 0.26360911581053370067, -0.31687265675514292296, -0.07458261330753396257, -0.05694116819056579959, 0.25798506608792387551, 0.36791293538244335126, -0.47813373792787561278, 0.09639519907250117259, -0.03388707122239711933, 0.25218467910650355090, 0.68033995976983274723, 0.41675448146791416182, 0.62619243517912470676, 0.71441328522392488765, 1.15343297855284609987, 0.51534724884067351081, 0.65186615884426846179, 0.39007046169817050085, -0.06317371856025907895, -0.46191422309503837873, 0.43504044744580988580, 0.22758164444004441718, 1.54778754525527784125, -0.23674366908448696289, -0.37488542283948234068, 0.48636387138868264479, -0.78884965219652825041, 0.01745452624311394363, 0.75381168753276595051, 0.51654682418928243859, -0.39379316948140130616, 0.60869783863220405795, 0.15243255890833487531, -0.15973731870056717685, 0.63072755426084148311, 0.05212994331585985019, -0.15011668450727871926, 0.16416765869989008442, 1.17901053800343280642, 1.12589470516057543747, 0.63094159885586775793, 0.30481147688167636867, 0.49076857975218179941, 0.78079546553509315832, 0.15539206977474379068, 0.07956815918981136049, -0.30710109432086807324, 0.63813219911271057416, 0.43687259765505970233, 0.78615195161894302345, 0.19315030781542022464, 0.33740941824875692046, -0.12865482487067608774, 0.00442710778412236383, 0.28929661359784769603, 1.02394159728092426498, -0.25958086663003465677, 0.68937707268967118868, 0.87191353582148689583, 0.58923372082643676872, -0.06724726667668345925, 0.23882627459495850419, 1.12977645872468102084, 0.44963721254531180405, 1.11980987103061657706, 0.52675639641114546663, 0.22627820377924751094, 0.04293899722825050835, 1.43270952852694910540, 0.23652037677530962645, 1.00709198143278477744, 0.11287430252886815341, 1.05651573523821284972, 0.83811952241456955104, -0.01216514098794108101, 0.14863058309102944454, 0.41293676473826912421, 0.21082991970806763682, 0.00177248042128508310, 0.48118503680154944835, 0.31367076040107544355, 0.21437085666620975255, 0.65192692092212600752, 0.43189686319640951950, 1.05468932048363273069, 0.56888317313309588741, 0.47888702159550000648, 0.53117827224122837748, 0.32799008857510569825, 1.17875010604274166326, 1.35174272181644550272, 0.54946278631913414436, -0.43225164704733021193, 0.80296906233219056404, 0.58758881795619744004, -0.32930681389262472791, 0.55338462738061322543, 1.03929362604454822439, 0.78538219877869586139, 0.96194333870300319234, 1.00377586167916343918, 0.67206868376318074088, 0.60575976019857058485, 0.05233043044891738660, 1.52281539724171288697, 0.63813194771949566242, 0.62460092294082258935, 0.96373069641613717806, 0.79260827151432267090, 1.05239242151947109960, -0.08767352212269741152, -0.16574057069494985139, 0.58893956654669621376, 1.51676214052649394048, 0.59537552789589243130, 1.03536927142030532600, 0.80327416334400147946, 0.94876679282516218628, 0.43151584811340426562, 0.61160070184242421032, 0.07608322915274556841, 1.09259809712105759871, 0.92460090201728906134, 1.65230351971014699508, -0.72930051332118706053, 0.72709370777205073466, 0.09939527441212492320, -0.16489324775952651780, 0.92778251212549633919, 1.27175623100549639588, 0.41078472156707662721, 1.19693753197186270043, 1.02324879248183964187, 0.61789989431107472662, 0.26564976973385567849, -0.12797984661437233900, 0.52707792625643568041, 1.05940871078140164130, 1.67277598165879037850, 1.03515750845628407006, 0.68732874440685720607, 0.74513630626716409466, 0.94783290615158055648, 1.26631984815221310114, 0.53888852383233543097, 0.67694217874555806524, 0.78340787151002322375, 0.56987404526379847880, 0.28199503965131522687, 0.70235276791920497885, 0.06840349690184094333, 0.40338357371530042528, 0.53103317875592404018, -0.52387842616838042886, 1.21285695827774420152, 0.57616131826865457644, 0.27284882105920060802, 0.29196557915481535783, 0.41099233769716542186, 0.53915102779018020396, 0.50372338475264266666, 1.21173061912820623220, 0.92773331480576626262, 0.49105493067709232724, 0.74892153167248420864, 0.15200520000596112746, -0.33296622370108897027, -0.68553740439086707781, 0.77214091866423695265, 0.06024273470930313623, 0.14816911102485530449, 0.56528697167416064318, 0.26469788875773403447, 0.89318627022512786517, 0.88391783387060174348, -0.66915565429000367637, -0.91834041731137716624, 0.52075821137667110960, 0.30294920471847114340, 0.45493739783697578760, 0.03779220149536999251, 0.33266528079369717119, -0.06351926868814211646, -0.08312714585297403813, -0.11834462868010992009, -0.29368151255126423438, 1.01510368885513857506, -0.03452246081878396211, 0.86044105743816190479, 0.27732296004555873070, -0.19446164106935587279, 0.22561016554881585527, 1.42976297528745743826, 0.06365633955873195582, -0.26838676159044044800, -0.32425551479796582699, 0.06921796254123513403, -0.02537958498860187762, 0.85873030180515241838, 0.23142192336660827312, -0.15426130014432859472, -0.30023716257286248155, 0.20229094109573447779, 0.55877864279599209762, 0.29733490080009172818, 0.09799010059426177532, 0.20178379093235315755, -0.30143384990358867981, 0.61408914196603769931, 0.50511285167563191312, -0.46589716206946396593, 0.21274684825404460708, 0.09357839696721299838, 0.00367975678743007184, -0.59658358345794793021, 0.67457171370778146180, 0.32454154192345485708, 0.02164938185763609635, 0.11588512721007053485, -0.17860133355285667811, 0.11603113635092385247, -0.23664952898848332086, 0.24245022548086797576, 0.17225821314736428080, -0.03083987930021153678, -0.46370655799402515918, -0.34462336515864205877, 0.08015112746309818736, 0.48261220740902038839, -0.21215136197250142325, -0.20151246998271574173, -0.57224419883050647861, -0.13652571694367793276, -0.35310904952512056632, 0.31866194143162890340, 0.18037320963150493824, -0.46678188589944363818, -0.55957250782901346575, 0.06351989780122636875, -0.74950997742283576031, 0.05244553813869212711, 0.17306886663898246592, -0.19648616676385091684, 0.40576099131443688073, 0.35228846165031219018, -0.37924914411327326214, -0.56402713311905250393, -0.39780035458782470048, 0.35779400951339396242, -0.98104390186993539746, -0.04470034060478821070, 0.48168785902705935076, 1.16024637958765608481, -0.06206016338010470618, -0.80143145839080121018, 0.17028026911071397764, -0.15324748300046903138, -0.05521730106949093719, -0.20223505664319940678, 0.43182178364023215966, -0.46208399480219020106, 0.31069540295327513491, -0.16930378276956811745, -0.79004324925507996014, -0.96749386971590101147, 0.06610516771393903923, -0.28072618436283119925, 0.02402835131838354510, 0.39480545121644583517, -0.01872390408430696374, 0.19632699134204664726, -1.03262192889126414030, -0.12465954551508426351, -0.99044616704089682990, -0.42701464150207957271, -0.14242428402612061777, 0.42483243166512357769, 0.33076669633611416321, -0.53593227927545461498, -0.24334053320685608557, -0.42122182755724857373, -0.88675345892659052094, 0.83486593399852471187, -0.39822145327019525807, -0.72816984685331398452, -0.23254231536472164121, 0.31119489080373036183, -0.58198609816608981582, -0.26601844356328996355, -0.27430348357975192775, -0.04447301028532368861, -0.24964412152814469126, -0.13989758808180358107, -0.67990196296811755694, -0.54765500996401250688, -0.40357026597836431758, -1.30666353833579385046, -0.18156827021237001429, -0.46276894010405267288, -0.27303329226543693320, 0.06396538149128461193, 0.35284805367999133585, -0.86294987206660966894, -0.86068836639804369160, -0.70405876375652565180, 0.01589838635976426051, -0.60919814196034616227, -0.42187388547173676479, -0.88045131998471204415, -0.07582199193283312155, -0.14544859018034345111, -0.88990690764561142245, -0.36469225865207899329, -0.73571233513153622141, 0.20630655673036951692, -0.99934928381158316668, -0.44058107056843370808, -0.98337957671734677056, -0.38483104199185091954, 0.12413424277674844909, 0.42548009137919340361, -0.85629570484628447780, -0.54882464143893339159, -0.05577762292327337335, -0.67429430203769902263, 0.10806300376176136702, -0.84223424424139481381, -0.23531736205487668867, 0.05724240706712346860, -0.23722108080858195223, -0.09891437119118606791, -0.46004898888524881073, -0.79994224599148333610, -0.46161880704342272974, -0.23036219316614364727, -0.56435692424298866676, -0.05722261533142924961, -0.82277339516643266393, -0.75772497051321530126, -0.35491206084643467378, -0.89466177591600848373, -0.99021936112934927365, -0.64861274135492297255, -1.38494776572829692007, -0.26903546235157288491, -0.85908911088749817520, -0.81234954125447922380, -1.47714709751061046283, -0.65981507657065963102, -0.21136858047909989899, -0.76773967487306948865, -0.33843209561061082802, -0.93884242560177755266, -0.01763482333652288681, -0.36627509261754209735, -0.60684054560355971120, -0.82050794737775234466, -1.17289098050105922155, -0.59265286314611398399, -0.35172304828475370320, -0.53430451440528081708, -0.55815408217969608717, -0.40084861704557167572, -1.08410803301279301536, -0.02491021888747546598, -0.68285622422863268000, 0.21111589882712689104, -0.06544098431480449563, -1.35386211766877173623, -1.27833584097058206019, -0.61526781285623100004, -0.45200351230942853453, -0.13741448363341957073, -0.91356371675821357314, 0.08126051002060963313, -0.59716018877096943562, -0.70046551338838591860, -0.55486136140082731316, -1.04788947672598253824, -1.25525670854814408450, 0.26337704673977202496, 0.13545685945677299333, -0.32312583700570340906, -0.90170006432019689235, -0.27323573751916685470, -0.58940463960217370776, 0.12367218407082625209, -0.04017161262408475153, -0.37200002209172167156, 0.06207796011242894263, -0.47681498048264608913, -1.66286172317415026356, -1.30984578446686272812, -0.33483175901285977538, -0.02996586221722852272, -0.24720360639910171630, -0.28001137073350113038, 0.14599763132652210373, -0.46881335996769474272, -0.95029569144015701987, -0.83585697446965534496, 0.07179894130804775898, -0.49040708682098110849, -0.77401468043835730537, 0.01232646477994281398, -0.74066586232626185904, -1.01389625738282385115, -0.85120241972426380350, -0.44465705640694774825, 0.29350325799335708066, -0.96657462028117802078, 0.05923209361887715740, -0.79813081785773110077, -0.07576081045070393039, -0.64047786153656605102, -1.00919760336455643923, 0.54335947315022337101, -0.72417268296155179463, 0.34957848448830625143, -0.18600828784791595405, 0.16078401772625161570, 0.02479808865800497975, -0.22537335688654505650, -0.68573946108266470301, -1.17804071341415594887, -0.60226235775474290524, -0.06572245992584452301, -1.08588868214855693850, -0.64327248614103982316, -0.88492890159843073938, 0.09918660282685076712, -0.11747090715318980747, -0.21389331460586602507, -0.71329622115684143679, -0.41039702957492429913, -0.87311287726185360381, -0.30378474561924617703, -1.39939945250303598989, -0.18373379840477910285, -0.89967466105524529496, 0.74219782552596269554, -0.21109744608409386490, -0.67228733370689330862, -1.26668965515244535958, -0.18782845050874066861, -0.00604169236120988273, -1.00194374937913943668, -0.49511412877467836369, -0.58106972033857118287, -0.36155572575240457756, -0.51728983930371696953, -0.13936528541024473249, 0.51703716326666504521, -0.46599052428410880911, -0.31556988455345713618, -0.18843028247567225608, -0.95827931614665218252, -0.77939870638649566548, 0.41763350519060615795, 0.66771019539404941057, 0.50744138608564370863, 0.17405255412243067492, -1.58605709145354412115, 0.05677222937558284999, -0.45264221007980309786, -0.91994303636133234470, 0.05552787765234723505, -0.39547070392521277427, 0.16964199310590141745, -0.62645256259737769344, 0.58897271987692778605, -0.72357854340581839736, -0.61834641661313949346, -0.11610408628794915975, 0.73334983919828877763, -0.82206540384703419289, 0.08577714010697949643, -0.17923585085644522241, -0.76219865221422489210, -0.70994568675983282802, -0.67447982374085135859, 0.73929284432229480650, 0.60342658011423189190, -0.45180160036070809992, -0.85129161488759463872, -1.32259471689935614869, 0.11859399855184973860, -0.02089278914344876864, 0.14277795739041868051, -0.02173774417837680620, 0.14426546921739233365, -0.56588056345395920665, -0.80958550931292561081, -0.27628256433479669862, -0.29295471864413097363, -0.16389099540279433720, -0.37520705621996097712, 0.71457390961423972175, 0.82634090016487871111, 0.32761767196845975603, -0.21499999840207878465, -0.26257139762404402283, 0.23426422274665253953, 0.13672184264078030003, -0.03209761977437040892, -0.07605152808646388485, -0.07379717274291772156, 0.11372695349531633524, 0.22478933588796723431, -0.78701789045846348714, 0.09944376694902126723, 0.70346865489446375186, 0.32335604128254991085, -0.13542390924745581482, -0.16255261241221652391, 0.41022521787213356292, -0.03266574823573484276, 0.13681608970298489436, 0.19502939753570855408, 0.58983712271359101109, 0.72705556263018311647, 0.80491365491102362384, -1.19586810442335989002, -0.55802528131394013577, 0.55779489265010828714, 0.31386097140680302431, 0.18989281476863267839, 0.49194537549643146956, 0.30552209144061404089, 0.51557947118158242716, 0.53221110542138472255, 0.49148375057772808461, 1.19985371683329411496, 0.80704492322327436860, 0.68557895586179051772, 0.62339900205564324764, 0.96936492016729502996, 0.37478618230468346839, 0.27730685631953033710, 0.52824305847309005468, 0.63398077648294948183, -0.06975744468697103740, -0.07112319941317607475, 0.99209244031617971338, 0.78697712695564647767, 0.25379108688831203411, 1.07503068065064155689, 0.04840990587987314964, 0.53697581930603421885, 0.36249361050276662777, -0.47800762769314025791, 0.84951584628797427445, 0.56080050040065310135, -0.21859051583024463605, 0.13345552319488257487, -0.72520409682586484124, 0.28953788893780008085, 0.86801140485305805505, 0.58166764647593927062, 0.06746492638892270799, 0.01593455592026238321, -0.03269514915520094211, 0.26775664727400655396, 0.06279817814172505841, 1.15606024080976332868, 0.05681302049138470256, -0.07196484666444802336, 0.26872285182630573086, 0.53244854624684034849, 0.93588603715477514111, 0.25166506345646305620, -0.29162624455293106873, 0.06818384236205943250, -0.00272854930977761700, 0.46537290275089737701, 0.47212003751861153500, 0.49164629663764730649, 0.18387303544697403135, 0.63733262408682667122, 0.55461410025882718067, 0.67210480214495227358, 0.66490317926310060770, 0.73434057168857713727, 0.40663718042169072486, 1.08877200326880041459, 0.23434856632368405993, 0.43946741605170075440, 0.35292689824542744503, 0.32923726417890297746, -0.23515582219854347379, 0.90606960517228207763, 0.68598919186582696383, 0.00365806727584827573, 0.01470479797291768254, 0.40203257479459453272, 0.08534061071748655358, 0.61277037456890237621, 1.31853156988430564311, 0.54475354626843475181, -0.06928222945031847768, 0.20988003982163960792, 0.26746795139832735266, 0.28852699985506580216, 0.28447968570803999766, 1.26997003894694726611, 0.54205340688356618006, 1.85728859597432571782, 0.01898592622296002563, 0.65763161812251402782, 0.68749799562738789227, 0.42259403581149868057, 1.29643662764656597552, -0.11543584813996643490, 0.51835549613909193134, 0.34475086968681517563, 0.45864470117785094416, 0.86096955297726163181, 1.42678603130714187763, 0.05744702156111092251, 0.04637221617904274673, 0.35253436183915376478, 0.40608288150364479918, 0.28063690096379939609, 0.93218428259467789321, 1.18369172896719865307, 1.03638406132216953637, 0.17379746505606458173, -0.73733840841551556711, 0.77860414684674883468, 0.72503538458671656919, 1.32402517005365849556, -0.26629026298566349507, 1.00213965742861721431, 0.47679519191268304734, 1.15724060023364394567, 0.65155209759375642964, 0.49950442872958372709, 0.92013100665118230381, 0.10437259592236586281, 1.54774027368193212162, 0.89047713230443781285, 0.28679899841997585908, 0.16390695820375306146, 0.57285325931167396796, 1.99654547013740568673, 1.08524033623998250597, 0.02488887549185458958, 0.68557668399425408801, 0.50137353033476694808, 1.17526156277828652819, 1.03412184125869233142, 1.11804925458929593773, 0.22620875313159988229, 0.28365894996271168171, 1.09219452606212330537, 0.50841217653441628244, -0.39349973929880366352, 0.46126768753535501988, 1.16663617729228086262, 1.12474006325244224591, 0.61919904989592922107, 0.22383939814378583932, 0.09760166669731729083, 0.31194815760897443013, 0.52258537415636507895, 0.15297029992424188150, 0.88678458134298043802, 0.48883779182578174227, 0.60914309105903785557, 0.37557054478717671886, 0.06005654534581583714, 0.25358764803017941336, 0.34170362957894473421, 0.24711967526392902839, 0.37071340076327430335, 0.22634809413452669502, 0.19575658211183755153, 1.25554313924362537236, -0.34515820857510948194, 0.49693103559441059724, 0.85044948592555902334, 0.71204115177618243493, 0.10682179823135123931, -0.69618460646852065032, 0.60912882539465562637, 0.59357781711261903279, 1.22124745616878049859, 0.62506266661129294970, 0.44177255977818347121, -0.62598995780021171598, -0.26826640373320864708, 0.50905710598772235809, 0.41481064203395084933, 1.04316357703706263926, 0.42134826346159187649, 0.47195738394482089628, 0.26302799260034376294, 0.51873977587689357183, 0.22077329716079036048, -0.06504374783474986543, 0.42555322722269145475, -0.30860781134177406315, -0.08815278141437055126, 1.07745176042835799635, 0.72261000425525701019, -0.17857423292535945514, 1.43846565726275277264, 0.16605437753689142322, 0.55927181175946261149, 0.18952822647143729995, 0.04464138195681935373, 0.04151053377360691687, 0.10959287894374268590, 0.32882490269412106842, 0.96886710120333841001, 0.01832098542699073240, -0.00814755411176298905, 0.64634685142991754603, 0.36278580337204247019, -0.34119937236498709687, 0.35495129194854319055, -0.65207991697424050326, -1.18531684901667500043, 0.31492226848349547108, 0.45425795031708526839, 0.22817654608489987278, 0.25182819524949207057, 0.48920134210900356742, -0.18234598902200260806, -0.23897952279156758904, 0.29174653520277871177, 0.48122199878913018978, -0.14640404431699347021, 0.60523177906932013048, -0.70898458226282057293, 0.27265410077866636396, 0.07123778748391920712, -0.46578955734387994525, 0.36633011434316425925, 0.55855940125443592770, 0.16681595918602168394, -1.52000553903962765112, -0.39044640428942978261, 0.17021265148387668131, 0.03892543510151335456, 0.07430784959314171156, 0.23218778881412663329, -0.46403160506127982821, 0.20929156064001580573, -1.47524129457931296727, 0.17907299169103785275, -0.49382241764693052266, 0.45230827072826323976, -0.94563447267385558614, 0.27652153485205155370, -0.28175574991082641407, -0.42598991759978188654, -0.63084436216297190558, 0.20154165498293924452, -0.40954123276889259353, 1.77923899735045365489, -0.73786174873831056331, -0.89646552322357031528, -0.12341118253516036862, 0.71868535615028994457, 0.10232478166910724537, 0.44671051198005473637, -0.74831011341973685536, 0.42898768723478503118, -0.68399924227535202625, 0.37654275500182565750, -0.20454732202400333074, -0.20716655816744486418, -0.45091772187347589629, -0.23466745599461560912, -0.83313812856590518940, 0.31559302878918005231, -0.97293766260215830410, 0.54253407937581110687, -0.21266159155136193926, 0.13481519477391040729, -0.66247053699990288145, 0.01727171911595837273, -0.74909467440450161480, -0.37457717915031152334, 0.92612961155198170182, 0.61333911516683370380, -0.43261277411299092766, -0.77964489022463978607, -0.73465238613458772043, -0.20847933032854223945, -0.11327795300548998414, -0.58658206993013328745, -0.96637592132685001811, -0.27737273552049868108, 0.26177343128438840258, -0.16784850526392680603, -0.25222571373984559173, -0.30061943954212622110, -0.40597126771418146074, -0.61479553918946672209, -0.13675895785544028849, -0.78512996781812649516, -0.24072142787082964621, 0.36055050323009374402, -0.26279899493461311710, 0.41322987037643643671, -0.10987922011180067949, -0.00681339201162295405, -0.70026548457276005522, -0.73507594892703864708, 0.17402330747967248437, 0.12615587395244115543, -0.37941480256890353084, -0.50058521965964963840, -0.03365095980561666478, -0.26345118893281460615, -0.38631634512506446422, -0.22545149826384319747, -0.67077215711617921290, -0.46879536593309151238, -1.05807056782024577757, -0.67742492436260626221, -1.00671440465981998358, -0.49524124548469938834, -0.98897191981327803312, -0.42253517095591131003, -0.23267582027507305309, -0.71255762071027162197, -1.05852820176401429819, -0.94175250938865451289, -0.90371214491212792375, -1.22332640971659989404, -0.43801119267503219046, 0.21822773954472840519, -0.11029014805428072332, -0.34343242695358833227, -0.38023291000808401296, -0.69854404884983556379, 0.75790030725799317768, -0.54625411125406542201, -0.65857373214094461211, 0.00637134131047079055, -1.12926753369536569949, -1.07447960361990935141, -0.64005225781011187980, -0.29118701869448376796, -0.12575747098560680071, -0.42587631131466863188, 0.07698924484405256230, -1.15178360535631796324, -0.45058563250871119799, -0.63447819757467827095, -1.01280649926530186278, -0.26049067593033847778, -0.15470450759387116557, -0.74191788113963552664, -1.15716075231603454654, -0.47932536540257753011, -0.14913606404157825347, 0.21214238372128646493, -1.18276156625526862598, -0.24958308908082593724, -1.90530241728302729420, 0.10679560138779220502, -0.31344818779045929835, -0.29894251017750722577, -1.19288488198313746835, 0.06141631502468536397, 0.42264826370647567977, -0.23519007556942411741, 0.15219420075530054604, -0.90413035711960132446, -0.40499911499573798546, -0.00356459229460981408, -0.92021342492310598260, -1.10162917797203263603, -0.83320273940282951752, -0.45245777981558499281, -1.23386492924998458776, -0.42641850057823660336, -0.53422691726549276225, -0.50601026157548933959, -0.48612041145777007412, -0.61846119464831683654, -0.99856082050144334694, -0.29203148395589861863, 0.64338465528490429435, -1.32045899264625621328, -0.12192721071727768978, -0.26184933231252704999, -0.36072488238407779404, -0.02661506079833453331, -0.39288669370494261335, 0.28833514178362318336, 0.14651656669761770058, -1.68410925397477395649, -0.57786326105753693660, -0.72362199687849670937, -0.27653554950571113125, -0.86441446582356329387, 0.24537917406640297191, -0.51907580245778450756, 0.20816612233608422500, 0.03145792081784837269, -1.15173693688325839979, 0.17545364183302136762, -0.50614108836963123128, -0.66529940741243676072, -0.69594072423293662499, -0.27824455958251098631, 0.16275935075232272897, -0.14831716516713305820, -0.70576177700029063544, -0.22414038858309720537, -0.44205801902524510805, -0.73690865202858457650, -0.70397878755875731294, 0.61803287969459219653, -1.06073561312011666047, -1.39663166731524812647, -0.27370680988549161983, -0.84548435136284505464, -0.84740391550912952390, -0.26309489938117824881, -1.08426015436129019598, 0.15281799727881401063, -0.12934857160605073290, -1.29883411018401639936, -1.09484644562065192019, -0.27635169767334599733, -0.31239665649660774971, -0.51165436180935164323, -0.35468191762027739822, -0.60486936029773585854, -1.05557480789569413382, -0.13185677412788016083, 0.22612465508451823348, -0.57628622041137500531, 0.01695254757415892710, -0.49133795640918298542, 0.36393017363707325096, 0.18412235538378324273, -0.02529587391448340217, -0.91191442591036708798, -1.27833200294453708246, -0.33421098740212312750, 0.63762308769580422485, -0.67150259284760616119, 0.78042424036257496134, -0.46726246976247409748, 0.06209495275158732897, -0.49392788777268425982, 0.13705728649682108466, -0.45035111747297418283, -0.18689937394311861851, 0.08194575631634382074, -0.48101893291348557513, 0.04826437015835710609, -0.32522908126551247632, 0.13032747786515080923, -0.67178070499891417988, -0.18097758827078602728, 0.64892404967088479495, 0.41754301729200560267, -0.55107611374043630192, 0.53318516608261568024, -0.14764429300678472146, -0.26839410614066072647, -0.98134125606270750097, 0.08071336738302867242, 0.43001431840315856858, -0.82131202463919805723, -0.32141528912388772632, 0.43129499999519133047, -0.03001579594789419159, 0.02854397618398535341, 0.50881118717811324448, -0.12832441295878990739, -0.78854604707295872235, 0.77029746022352463264, 0.46810216573196267165, -0.05910862388816669893, -0.28987537612510777540, -1.12901742235124302738, -0.19150354063854524433, -0.28387599014656400565, -0.38510425730853764525, 0.19869697973667252433, -0.44840222884262936898, -0.09234883320199208101, -0.26312026154411227719, -0.35699985373010401091, 0.30407277316972614045, 0.62408471113819607989, 0.13587361554797333651, 0.34712785352799280236, -0.98575554991770186763, 0.34792912424419691897, -0.34843543126941417531, 0.03792673012872253141, 0.15639608035029844979, 0.50637349454278979266, 0.16839668335321272830, -0.75731324992434712051, 0.69307650933293640794, -0.06503648042868283297, 0.44952084649417389794, 0.36867159250774161894, 0.75269735417193606164, 0.28029954475163287864, -0.48957081945540681378, -0.34338193331512856243, 0.17140751494491798801, -0.23910087082515771306, -0.29674667688894745821, -0.68996875722852846202, -0.82667167118672180148, -0.08910339402552103216, 0.10745279330419771346, 0.81315770088113514813, -0.61352326964070891258, 0.70247340265876245802, 0.63893222575411301456, 0.44616167929429045858, -0.77152609834227792174, -0.06678848492282668525, 0.27321350529238075389, -0.30931329805468621430, 0.47812040216205953236, -0.26840019295708061886, 0.29593334159151041263, 0.28385819225852532144, -0.49794994732546021821, 0.93462094745566870468, 0.63869817152511798586, 0.33738833168861431799, 0.02260308381647624376, 0.30773794317354485894, 0.99492813936674373565, 1.31193743937484397222, 0.40197335947824541780, 0.62910012554327954248, 1.03348535588596268653, 0.51481301653649458316, 0.30092055364254582939, 0.17540721724754382294, 0.03156933275216089463, -0.55959497760402820976, -0.20704083738011419547, -0.33066216717563245986, 0.04639160010576809379, 0.22937026487934050745, 1.53715999495540689423, 0.44735849327560150979, 0.16635290292955390301, 0.60602948773428466644, 0.94525353712619220037, 0.52139392427825548282, 1.05972500108804035257, -0.25580820655816216558, 0.71259326078180729169, -0.06858125812463805593, 0.35657213892722500193, 1.09451373097149140534, 0.42448438120697168374, -0.32464117389470953379, 1.56327198803940214944, 0.98590261426182923543, 1.12038894774103336971, -0.15692115468091627495, 0.02378725583988672287, -0.22744744279582485014, 1.33421505474147217285, 0.13868294040009876422, 0.49180184313610841862, 0.43861674628883762228, 1.43409301808806066703, -0.06889451588493789025, 0.65120035261441155949, 0.19080573401148326340, 1.38003508777166716825, 0.98910630750781858289, -0.24561193638777700787, 0.00363215441380004833, 1.27600204860118537908, 0.85665604799737027353, 0.44436796480007045851, 0.03863071089793057578, -0.26519511349846203174, 1.32961482271779196651, 0.33771534585442442022, 1.25642963337897128895, 0.55560430167454100125, 0.97352072949809342894, -0.20113838769142433627, 1.18202683462076429954, 0.36466104852098069067, 1.06806019564706211611, 0.86304209706506807009, 0.78872156473820820288, 0.36196167830117698117, -0.22173728699621630955, 0.72072405360808566854, 0.74708994935772055079, 1.03946094938894706416, -0.09782991918129646525, 1.13219012165110299861, 1.28806436913297317481, 1.41492165311369411107, 0.06783874107064280512, 1.45888121769918477355, 1.27345582326829931397, 0.45252157876384802027, 0.77859742339569759650, 0.29384417036840959625, 0.74483509775713563350, 0.20077119030491541452, 1.76029362610267003930, 0.93966216988834694313, 0.79426121463777188403, 0.70676776880864988772, 1.21640384009571844715, 0.17424906728732186822, 1.29326660399277604263, 0.78304686546440827044, -0.14867208671659515318, 0.08555729370825138069, 1.36126587212312255559, -0.00316187009500834559, -0.54383202308016642679, 0.23471251090906947168, 0.23296372808973042634, 0.59587405012825256367, 1.32492626070440699948, 0.66138105311899775263, 0.63861716267046131357, 1.31716733261947727129, 0.50295301940027370424, 0.29082322140506117192, 0.66938683125218245706, 0.94437746736750671772, -0.15520835392291598698, 0.35579131361665705668, 0.88964660623929647798, 0.57369431068737775092, 0.59105202496507480703, 0.48429897913381253494, 0.95803116150454525890, 0.72356435799539642328, 0.35155506947380549265, -0.05455611196564291721, 0.37398175488040441561, 1.49012343064488428368, 0.52489070221840983255, -0.85265668481053935590, -0.43288666997786950930, 0.40950489751272350913, 0.94575726777147006707, 0.68714779095071309989, -0.06206205821115473587, 0.51631927377915476374, 0.73501983356752165211, 0.74510452132927018631, 0.22668043754836159076, 0.67010199031469763753, 1.35141750692932172129, 0.67962560891057799406, -0.44851399592645924130, 1.37528090954953730041, 1.18640442122932721070, 0.71446164859046357165, 1.08430868365660248998, 0.94453992956372001544, -0.73207130518869378832, 0.56170993577163885746, 0.56044211263239862575, 0.20751314285535291115, 0.86492704676057630131, -0.07747345867850480294, 0.28208109388286861652, 0.47561644521171370092, 0.48886050342220588316, 0.80693490416241975272, -0.14618132468410210967, -0.38637619556587560199, 0.73065395490471551909, -0.01657510931961803102, 0.39155599968852788928, -0.04603486200300860309, 0.10656988947500761755, 0.48509031030389920813, 0.63190110427320267394, 0.28192755789667817146, -0.68154769203649778753, 0.11114186605996018153, 0.05995814907816426897, 0.14322409366302679690, -0.02576152733513409343, 0.56496743023343420909, 0.33378737011846570937, -0.69442741858960654877, 0.01554113806072821014, -0.03600821139580406838, -0.16781449651393443023, -0.20291951353975440364, -0.64671451763825071879, 0.31565307004304460348, -0.50233317215299189140, -0.91432298896817898459, -0.11198666417288088981, 0.90588853499226995591, 0.05278647644172582143, -0.40103922152371351828, 0.51606480994414150043, 0.09087243856845719781, 0.43624094656973022577, 0.22310284508305169848, -0.25933734788221751710, 0.39592791481831945966, -0.52607669626680753083, 0.36380899468465649482, 0.88504107844600166111, -0.47156909708183020191, 0.44279861092636824127, -0.01727523114722301356, 0.09372418667571799644, -0.31601977405748721273, -0.91930276942310173993, -0.21505070341731061245, 0.54739123881397777627, -1.03299302084650723721, 0.39193431387560517809, 0.05952142755937335494, -0.01325013689036592252, 0.11571353567931272344, -0.51919823248230823065, -0.37490456811051525721, -0.14790545624014020398, -0.91785845347943884054, -0.11496990475782772889, -0.47630782810362554125, -0.21152302931524169516, 0.40797824337477306411, -0.07071402223828944511, 0.76838181476217481958, 0.36474672475725983256, 0.15673536889562778018, 0.36260371161439980403, 0.18134590280800400031, -0.16926352431833907586, 0.00972666540947945968, -0.80624465275018941668, 0.12746175382212021621, 0.91955453993725722039, -0.13118585554638417801, 0.10876634549331946900, -0.06004959982388856365, -1.33122350492212659923, -0.22988592192977055917, -0.51011841540017321961, -0.57526514313792365130, -0.30761546020388830236, -0.87432840631301211864, 0.32734549570285764730, 0.07182987053723297044, -0.08850214796837158060, 0.60677608492443868560, -0.37917874394345285616, 0.40645104837625178895, -1.01457982511606648046, 0.17699292043455039369, 0.26495081331485226261, -0.45474830500997981408, 0.30774356921029277778, 0.39401741392178879497, 0.22665575726082837216, -0.28881095514636545563, 0.01126874578850395325, -0.34634168051653291265, -0.27448779644462345528, -0.71883707835340582282, -0.11338795637649146109, -0.29774610901788794992, -0.46919690286831305936, 0.17050398621143214584, -0.26379949663141910587, -0.65533280279345296204, -0.62636651764860351577, -0.12615885873377330473, -0.31064233046394701399, -0.26063777618745692655, -0.58763335960234364119, 0.38404885394865917458, -0.27214151162144950558, -1.08337036771885308895, -0.61700062481566697592, -0.96502988731402061795, -0.03818724785451776960, -0.84846894887232937599, -0.73924056851716746230, -0.64650757087000843004, 0.15443246614173777864, -0.71485611247280345104, 0.53370314648082128883, -1.08136092630748770205, -1.69631568820195477443, -0.84763733052163270365, -0.46465593106505581611, -0.43116819992876664891, -0.34297405041502354583, -0.45061415384774428095, -1.07756876606595253243, -0.73336656496419871942, -1.15837764687508171235, 0.30778023927076403332, -0.35037518042413517971, -0.88168328795324013569, -1.02455244334704986819, -1.27514210418571960481, -0.03443337265248530565, -0.50367191096677377260, -1.01201694890649251946, -0.63430381645161904025, -0.45515637415543813704, -0.27304864917678800529, 0.18082669057423939041, -0.50610760160475176672, -1.02945891627439034544, 0.14813589405009530697, -0.75273529668568861517, -0.47780492540404784219, -0.27910824595945338267, 0.01928318733085510051, -1.07590070522957992338, -0.39023194355221962892, -0.66057007982875193264, 0.45054425709556888968, 0.30305157079164146694, 0.05480606925788533701, -0.40316307562256481489, -0.98780496008975082756, 0.15246269462252060034, 0.00576137186251390876, 0.59375151925889990689, -0.01331190816625993367, -1.09783885778878120831, -0.81764773995084061298, -0.71805356390993346771, -1.09985223489712202749, -0.73192785031829843412, -0.56278654988197818199, -0.51946756868729315748, -0.07057790611149084015, -1.37399645297030126301, -1.03894657138560742027, 0.33085461719890219001, 0.31687936963048379813, -0.21162633179041706599, -0.39421960565826053013, -0.89143064804119642730, 0.22303778320110534406, -0.27415803542517858116, -0.21296627309785415294, -0.12914773632326931319, -0.08747503797385458579, -0.28238894641527567675, -1.62000266409334514606, -0.49289173404751723817, -0.16458098825443773228, -0.29714801244268618241, -0.71101459124237165188, -0.91649897656535328494, 0.28465319920509946083, -0.96579828990825400403, -0.02020711439599753856, -0.12140973286494305405, -0.18658789394418690000, -0.29096907543914796168, -0.14314503424822461186, -1.02562545507363811481, -0.53789703243979969383, -0.64172171846714365362, -0.78406389291529876573, -0.96152260991605786877, -0.44203676733266161758, -0.19309521785650601844, -0.46230167018793860700, 0.09618798274720818942, -0.40352368464418097993, -0.54456998475122264658, -0.34058418382018507131, -0.62224544448083818349, -0.55820149525782347677, -1.15049046627010298494, -1.24096053935011640945, -0.96544446712069531547, -0.31553321329062045120, 0.23987567803706766334, -0.60419523073869108032, -0.67172351588886503038, -0.44627969981765203134, 0.08375978841039932732, -0.59956577791208776063, -1.47009505778847815449, -0.16674578815027846224, -0.29548152972428110008, 0.85050977925879567287, -0.51949671865353830302, -0.29274502557404502578, -0.72084029684927664050, -0.25146401224336067948, -0.52103766716490118593, -0.71731651703491228211, -1.15304597682779785828, -0.07328300923106417786, -0.38222687800003479763, -1.55738220916802272420, -1.29687113365825723577, -0.25685309230803798908, -0.24976906113732214032, 0.52524599699670571074, -0.68904077193145840674, 0.33169421933806469971, -0.68883327401067517837, -0.34315956323754398527, 0.35908241192473827619, -0.35114631055595579845, 0.15767430115320374417, 0.10134690448812616603, -0.59311459228594110016, -0.26977257586516656396, 0.22698688421574597651, -1.06854121392182310757, -1.07660906406201184460, -0.27958002716904684082, -0.26261532792578934359, 0.06185153168157053738, 0.71192654740958416504, -1.13355180564679081101, -0.26257078981649417670, -0.85879020509925618754, -0.94363944123965826094, -0.76888685089830710595, -0.14504664402614220187, -0.44006359592568294881, -0.16527072670101980445, -0.50976390487316858291, -0.01504830019187691770, -0.11326774158152094762, 0.50919264889302273680, -0.80850968043038906607, -0.80471764676559720542, 0.21887691024726585454, -0.04657190269351341172, 0.18809697753539661447, 0.56345289134336784542, -0.04933550018533727766, 0.34677712840687979767, 0.11036034704751654378, -0.13776585253879519710, -0.24507018595981580611, -0.68233910328791957944, 0.28937792614719271000, -0.00517159549209669778, -0.07478134642911232755, -0.44495198052713980097, 0.06570206855153493120, -0.20872696801226595098, 0.60458655095861357776, 0.05655270284786981239, -0.65862575041977977630, 0.90090984728709988527, 0.15427787489946157007, -1.80047287297689573293, -0.12886353004249914700, -1.26308353323951116920, 0.31937004593161488764, 0.04784723812169407176, -0.80542301214224099049, 0.73773999097564657568, 0.59071427192057879285, 1.37240884844418831179, 0.33323351247775939710, -0.15236453252341558562, -0.03944457031550152593, 0.14678772271092499491, 0.23376272714386212015, -0.54095703708927589393, -0.30318387133252688859, 0.02300025041755614924, -0.78032856904159608735, -0.28502543319659340293, 0.87385629694986421967, 0.64291949096780398598, 0.00591414773818852227, -0.37528070873629298942, 0.52034660232980833872, 0.18175735118191455042, 0.12083395852273584592, 0.71072587550327348538, 0.51020306932156833657, -0.01815537183607940364, 1.04408022829544355758, 0.29774915964661397005, -0.07601463389088590483, -0.14237935969098397537, 0.50747499583518562716, 0.21708492673879600798, 0.29123078226692605819, 0.52076032735162869347, 0.67426440483606642218, -0.54502786700826733046, -0.02933075231832069085, 0.77319626892098458626, 0.14141384500403514357, 0.66589426457991029995, 0.35497086982868680805, 0.25112613238529285509, -0.42960577795938847956, -0.57764493074767220282, 0.10596514085456833210, 0.54907543862010954605, 0.69534332101949325455, 0.13830159065113820094, 0.48104450866014974997, 0.08041266312757563983, 0.47659038990087509990, -0.20462253886490616717, 0.91478000713491880447, 0.77942942266972981002, 1.20422466364599145550, -0.33072213481981804772, 0.08526028544147082333, 0.15247728119786596590, 0.51300812260050099312, -0.08048530599224384963, 1.46460476242846815431, 1.17253732898657592187, -0.19012198929076801202, 0.15019180477302074284, 0.80624952660461535192, -0.07919367649573333390, 0.02389169912819028019, 0.13543289998601820701, 0.15350285074466835100, 0.52082311386810054010, -0.70657208523538961309, -0.18066674538261723315, 0.41703693253451817879, -0.12297328142840130427, -0.17577663942491855709, -0.33242936555101015861, -0.06111873458378475066, 0.99094121347335650807, 0.55439311163153870421, 0.72497128885631545447, -0.08777523688642835387, 1.26468109040020237899, 1.33763304322642606614, -0.36384540955850869226, 0.50812865838944398789, 0.67307243840878117069, 0.52020308637370415994, 0.75379973344237216626, 0.65138244574071713089, 0.38831537876950855681, 0.14509770815565731228, 1.46142565812923086455, 0.53763204955200027957, 0.27878316362994087774, 0.01606418269011211652, 0.49747920179572924226, 0.44250344809259611889, 1.25065792617170701284, 0.31540406310058766159, 0.87070132846651748793, 0.16490649732728984711, 0.37342599689678690478, 0.71708990482773804498, 1.75686207827923390745, 0.54487142983130643081, 1.02704262837197912717, 0.74053836180854915039, -1.02432506878564444364, -0.22081319328007276148, -0.19816867868927989793, -0.20332182877297522161, 0.74925812452154816334, -0.41448455429322672572, 1.40627112457170566628, 1.52316307899528613490, 0.24106303095908887535, 0.72894200490218086230, -0.33145966823512351684, -0.14172849751549088104, 0.86052977729471136215, 0.61156667126855002170, 1.13727357232086800920, 0.44489004599039239096, -0.10699324513065222586, 0.56455005572604577413, 1.04175495773043813585, 1.02141098526806928248, 0.50759063342765786686, 1.37148959212523635642, 1.03319316631130120321, 0.81768211567137039086, 0.67015100844282604609, 0.72981649052830821489, 0.72665397864112091231, 0.93555919148226762694, 1.01191336425091482631, 0.21323075842938865598, 0.37251867181117248595, 0.92283095556239747026, 0.91620813462181627251, -0.27235545111370240701, 0.55885318847621090477, 0.32234580105850657583, 0.04169231434428866123, 0.13605890476106569098, 0.07857949175758316418, 0.13438815194241349182, 0.07649005138865494491, 1.07961019968133720148, 0.14551622632466504337, 0.50095836738235355323, 0.84696311646492705716, 0.88985683145058835919, 0.81207511022958067670, -0.29047941760174855741, 1.52154047648796098002, 1.18609259207615869158, 1.24147143249571478840, 1.32830110324834160807, 0.10599652429795269892, 1.39011659364140460760, 0.98247086829290442012, 0.06913215987691767594, 0.70715198246155519524, 0.00779946960670141465, 0.04766668375511051714, 0.25418417175439855793, 0.01989943256407444849, -0.05458969493066501144, 0.27180832484604677823, 1.41784285295022893791, 1.02839635456968681027, -0.37932235535564018392, 0.26704106147270828542, 0.84473163398931272461, 0.67123346605862632686, 0.21869796812593439439, 0.86987592771418742466, 0.54799659425472946328, 2.36320305302203781395, -0.22062169770541217861, 0.95758483701623009487, 0.89360676944037853442, 0.17211738299822976206, 0.94113083912150186094, 0.74673089961165195483, 0.19834014552043227608, 0.50031617911715220437, 0.74558544211606669894, 0.51743364622538301489, 0.31769814644764293732, 0.68355752328478536217, 0.32151759908601496019, 1.02142076216734190020, 1.10437801285216341540, -0.38183529795019205411, 0.17120229039097506485, 0.67787340868866907684, 0.72480975925500257251, 0.66052403917969060299, -0.11520845591626077686, 0.91868521551001736114, 0.52318914236745273083, 0.48238216481348950015, 0.40948925196172158980, 0.58415062302299280983, 0.06538055602943981803, 0.25726658711221050968, 0.75926281075830803768, 0.67281629341392945598, 1.31354865282771871016, -0.58622323589282976375, 0.10764209526086343516, -0.33018532702174457905, -0.23557439006845659613, 0.49762400579391397049, 0.43436146567378464134, 0.13246403285289096163, 1.33247546353195112090, 0.44977525465443568553, 0.73610854453687035726, 0.03328647783589916420, 1.04140658531356811167, -0.01612341362650820953, 0.11608876668199388793, 0.56613228435798246263, 0.05674125592601308998, -0.41420995067181770599, 0.77426087211157978984, 0.43661651131188439212, -0.22064815735002082930, -0.41922906983310770368, 0.25514496899572725042, -0.21367522264229085738, -0.14102092463460297322, -0.71222937787688489486, -0.70787954024958121835, 0.58870125820363949920, 1.22828843906329954194, 0.35102477298869710909, -0.09180164278387992205, 0.62562677755085993603, 0.01806743807675906255, -0.29224737674476014559, -0.37540776200320447042, 0.79640174936821683804, 0.80425227881894401083, -0.71385338121456354266, -0.54683163376401577871, 0.12740386756735036511, 0.20759702014260644765, 0.10256644134797097967, -0.12500551056084752144, -0.26889233244286137037, 0.04449151540131074389, -0.12900092314437977548, -0.54533078236499377134, 0.07435738117510856671, -0.61290136891792690310, -0.60205762115783134636, 0.39735274567025113956, 0.27989036858888333148, 0.52998574351956428963, 0.51441295837980915984, 0.86950394295991562110, 0.08297244566620466666, -0.29036892557822091554, 0.45814403593241914692, 0.06274781349866656588, -0.48471874474516241094, 0.24641590607968083537, 0.17898241621312177552, -1.07495012353866581378, -1.33463618906537528908, -0.41777975409193568446, 0.16467239341351053161, -0.66097319544744892461, 0.27828337388116430873, -1.10818044524459913269, 0.02871880860622423592, 0.28571975251297043741, 0.01432903230543458584, -0.33465221345662921060, 0.73638958876769966544, -0.25098137928995334356, -0.29629091483561420528, -0.46550032489633297317, -0.45840182771513704463, -0.33119359160565875477, 0.23869913807384340876, 0.26653716154331474897, 0.07449651298415335687, -0.02239559229003232343, 0.69325352297411890756, -0.81363131548324396647, -0.16779575156052117801, 0.15559230493812578611, -0.19228251287001596470, 0.30501502585685741353, 0.37938041396414290407, 0.37327155320047444853, -0.85445698050885643404, -0.43980317988083666991, -0.00868287165299591601, -1.57693336372284198532, 0.35655773632598380951, 0.50147731527123817763, -0.06435643107406122110, -0.78579965073210900073, -0.40658458983811868181, 0.57613911966039155210, 0.19874184487696988644, -0.01264674339278221149, 0.42094516293904232773, -0.20918962708945693096, -0.65157559375046081307, -1.26164113099699393139, -0.51992614020226157745, -0.12077571699657591031, -0.03107976655886074413, 0.22099016483827338408, -0.41831209537846553159, -0.73457549605684047123, -0.56814988129312016252, 0.22462265269172904114, -0.29728550759657229596, -0.96958951314352226358, -0.54182175446712788069, -1.34708222959571433108, -0.97079523463843631070, -0.58217350909456799801, -0.89610029617934638946, -0.07918387837225365766, -0.71709177263984180684, -1.15176978883291858402, -0.41971226599020672321, 0.20224057186903388050, 0.14464060937799716466, -0.24515240266265997393, -0.73189757024143464736, -0.40968336424402368445, -0.22005174786686998445, -0.33248587101627002927, -0.11249851083580103550, -0.72688020316567536483, -1.06299105490163836585, -0.57420803957781074711, -0.58997503731242106895, -0.03017299546285223677, -1.29758894869024699048, -0.67962765297621130500, -0.48652025867176240670, -0.84425796878772740861, -1.24612201119191556842, -1.10414785673067727956, -0.16812727904381019606, -0.28259008668392648911, -0.25687049461306243892, 0.22936039400719732129, -1.39373961095019671674, -0.63248832458859904904, -1.26130156161527096081, -0.04951846038531115823, -0.35986232960532715808, -0.54167806603284862632, -1.51558123134903599549, -0.50841324841302448778, -0.91347799005850682796, -0.19170389023447920174, -1.23208444325913735184, -0.35972895760563194933, -0.65181989062776035659, -0.74703638831235374962, -0.73764641995166713162, 0.05829062045130717351, -0.86842589040103446152, -1.00970429989819110439, -0.14631679267384278820, -1.37890946264223357431, -0.61718500260795816637, -0.46748452512607785447, -0.65018647205005786205, -1.49396116909693144059, 0.11911987240866217519, -0.75089284438697279089, -0.90404765294252542596, -1.03837190131528611658, -0.46492710790665037202, -0.73079407144126240503, -1.12835843663925272651, -0.26908412882376808728, -1.13866370257720905812, -0.07705231242622778698, -1.03222125020857191657, -0.66844823093787053558, -1.45547153747040169947, 0.16058415961523120341, -1.29397706778379584236, -0.43320281893123679939, -0.84037966187004486951, 0.63094573072151050042, -0.74755111262020090823, -0.70328849082361000189, -0.35866874218814664976, -0.63534342337598892492, -0.08637688381666297488, -0.60957174170076555697, -0.53145169396959979213, -0.49830110357566120838, -0.15164303253471878019, 0.11591515165350108063, -1.50655995923401575531, -0.58885407145073964674, -0.60754250330733294838, -0.54253926714200551018, -0.73495950289590294968, -0.20191437931665118244, -1.14737030718619736902, -0.87467841252617328784, -1.19671132167625682285, -0.88527383340645648957, -0.81924232920269512981, -0.51786710311461026102, 0.29365018843318224651, -0.52492730695787959139, -0.49372397917029198222, -1.12286563981230003151, -0.64819551676870346135, -0.86449357624342559880, -0.62287437121622624137, -0.32053458984275406962, -0.69171439739527218471, -1.13328680689938376958, -0.46491069014217917266, 0.02384682147538186303, -0.45227471489277543126, 0.41013835403750686925, -1.05884266098215706897, -0.88394265265068727722, -0.08544106871351758725, -1.27905942465029376365, 0.02181850551945013761, -0.27034677968531450887, -0.30237401666920638021, -0.21786919614472288753, -0.43471688899738636369, -0.13431841981893410942, -0.49787781999213998052, -0.85939748756583544953, 0.26254084910777270023, -0.35125258848647955556, -0.57400603286709117601, -1.25394975896733162912, -1.12695534433263455654, -0.02038668966944001681, -0.58225813057622655133, 0.06046180785818311421, -0.31443298748576159474, -0.68904984283757597652, -0.30263153024336186814, 0.18301174922393576017, 0.59065985679161525645, -1.21451828715343568277, -0.94898770477958660496, 0.80360755204466538526, 0.54719359036982284650, -0.10811032133433104430, -0.35794862723660836323, -0.55101499593035740965, -0.85945292165797715356, 0.12264599331672115801, -0.34263630615890688924, -0.70207941668302042970, -0.52663320445246286639, 0.48044785769120301744, 0.67877483915393899139, 0.37964076595953594184, -0.99498379480000387343, 0.06826272599467642088, -0.78251811748459598750, -0.56600582227183560136, -0.13496981987158701566, -0.62585640858687163757, -1.38425225342467639322, -0.10947707371136566890, -0.19166029382630336841, -1.08699321150281802417, -0.61606379575504066626, 0.34411126442513895185, 0.01199486691759543699, 0.65783386500470719671, -0.34592391856648818660, 0.41071327022567660769, -0.30662632831484248896, -0.03133610294753032266, -1.25417134197607760271, -0.89148063814124245408, 0.19314213709832558918, 1.00211226212221560239, -0.05964936098329513131, -0.04206515320877264597, 0.44230715237667578288, -0.30932952198550844880, -0.63272713052526863287, 0.32923836150626106800, -0.18445299475602366934, 0.08398245301581493127, -0.30690032172584186254, -0.06069937924380600786, -0.15990420958382189909, 0.60138043265343066324, 0.09684213265731218823, 0.08265089447861780070, 0.20133933823406621744, -0.06609305296836159860, -0.03333704550268735967, 0.56191569531056961839, 1.02177447442722280080, 0.58310476458608240247, -0.31280720368278963628, 0.78553065025672219601, -0.12766737789542473291, -0.32115483358742502595, 0.81689887911010639066, -0.40068981418144078432, 0.64753029371258796765, -0.29785888727658221820, 0.10954859395315809378, -0.03552580398416925467, 0.73347859245946067652, 0.10692663603557045482, 0.76912771379178956899, -0.27710453990873096242, -0.70565954772792949257, -0.02299178472810253782, -0.33360484668973178213, 0.58955073259092261040, -0.06663817651171272116, 0.50489841069354157721, -0.27878293915897200961, -0.50150219095497838850, 0.94773757001654845844, -0.21965655913268120059, -0.09192398398563308981, 0.23578689335862321186, -0.60372475970468264528, -0.06719396330879884305, 0.92357896561708741068, 0.17637140370448731175, 0.21830702090194911702, 1.04375828210253240869, 0.90488511731467347055, 0.85078588905980412349, 1.15198731356439165729, 0.39061150756887502711, 0.78501446460563883978, 0.68830072758619365025, -0.33774688667924901431, 1.13155465436158864101, 0.26121579042667647519, 0.50456329446879166767, -0.61719193943757499898, 1.35984384161013305281, 0.97349625564216291096, 1.01720074254204506659, 0.62155184490221304650, 0.68614314878066151593, 0.54554868755981111850, -0.02928828553174006588, 0.38714918666180453410, -0.27201248431566171782, -0.22873756401948064365, 0.68771752407267827678, 0.12062954827458016727, -0.66091617003197411506, 0.00642956728510013509, 0.48043962405985496389, 0.28302447579752365581, 0.63773883854687529738, 1.26629137250266965431, 0.91425455346685169378, -0.54225353329572834049, 0.08033876053017863228, 1.25411528408185235506, 0.55839458575588496458, 1.43771057452597506021, 0.39822888454703053718, 0.91767593267319791384, 0.10720108738578715268, 0.65148948193115530092, 1.33237112105828225950, 0.55358283724255985536, 0.15317684866007896805, 1.05964156312976376384, 0.62914219259724213362, -0.07141885716422158303, 0.46089726399160263481, -0.71495783535525980756, 0.78919103597411921491, 1.62838099085323162463, 0.76650199704599397688, -0.22246454233306400816, 0.48163747552950247899, 1.05328127047872088795, 0.01325057079395075554, 0.05610608598194027907, 0.97450754031513753262, 0.31347033344653163489, 0.53506168399436981886, 0.39407747923823421843, -0.01907731912921262207, 0.41952512633115113250, 1.16235323181025762551, 1.21713547773632657645, 0.50251930895070417993, -0.13090054161646869080, 1.57532925055400574443, 1.57147694120493808256, 0.94852403839313548239, -0.49846675542138618731, 0.82642493159215490994, 0.48417555276741047754, 0.91603111432040462692, -0.34195100288045388037, 1.62234403720065500920, 1.52325533058728579405, 0.81493966718099064561, 0.30119165613939730131, 0.66928539915397033955, 0.63391717262716018588, 0.12632760333564402311, -0.08206329822976987654, 0.41470443904485310149, 0.27250316250356576564, 0.71912811634924023174, 1.67980881083821120825, 0.89244581197581673848, 0.38222204850079249949, 0.38362803468023576325, -0.47508884674150919647, 0.95915048820220749626, 1.04870961622276492875, 0.52164194659818541311, 0.63375265542190506451, 1.35563908424536450070, 1.09349400441811828166, 0.00178684794744898845, -0.23842942541365330644, 0.40214018900435882786, 1.24229316496768715794, 0.16709297628539243608, 1.55725100441283803576, 0.54360905418339411366, -0.53998341415931272458, 0.28954104538810404579, 0.31189683709821969249, 1.07258152093591130338, 0.00048099284755986904, 0.22581570002344347747, 1.13489878613351202752, 0.95218375612693917365, 0.51244124416363545649, 0.58536062774153008181, 0.78444124532228853841, 0.71778164420748125085, 0.79572398118853837623, 0.28257582709139084098, 1.29540991741921418523, 0.01367376284873855763, 0.76272261193192636419, 1.46651600917420155135, 1.16412834489143612871, 1.18646304249849610102, 0.60826080457589659645, 0.74602678564782509696, -0.08265554066727420768, 0.80782317334880482385, 0.99955078498264748887, 1.23984837784650858694, 1.39366292459884544286, 0.09207659484197960875, 0.14905012545122947598, -0.00512143679836096766, 1.31218276067172534560, 0.67240086341865135111, 0.68093044385638967775, 0.31738976247920025475, 0.27427809502334310476, 1.24079546962610742789, 0.37550092598552298195, 0.45624901790513056055, 0.45606668630333224712, 0.55103850493331174576, 0.25660670828397402765, 0.56609588806401378580, -0.06185526806813457235, 0.57352236554259206081, 0.16655704178042457508, 0.43184350227725881721, 0.79470821360162546831, 0.25090558568633436476, -0.13202630223183481339, 0.50486547900442957371, 0.23043323581600802519, 0.40750354123823012698, 0.27469031141976779642, -0.00094078806570174756, -0.07479648303682956412, 0.51268339554202269071, 0.29129273177258241390, 1.22180172650963081438, 1.42380286424869950679, 0.46872830418084754367, 0.47576103443192041542, -0.83263693920003745141, 0.45339148516507044029, 0.64893870533292585812, 0.10590050162655395560, 0.80756890720165708242, -0.24388570511137186436, 0.00366169720765419093, 0.96103333158949966197, -0.52538710539459221316, 0.61794261405587569413, 0.76561708835045427790, 0.32776543596695273397, -0.48963734290417848527, -0.62427872309178389365, 0.49878576523866702264, -0.09682472033289302171, 0.74160022844627859762, 1.02456540913252291958, 0.62746717898538018066, 0.36710751054917423186, 0.59224336707006264291, 1.52129131528252359296, 0.38831472934140742748, -0.26385428800894417112, 0.57380015628165526032, 0.68243493285244716251, 0.53932893433572759445, 0.09705281496844928024, -0.84053609619691360688, 0.41050528903058425279, -1.05506346688554564217, 0.05580767421431988284, 0.97197264320707477125, 0.70578089504171803714, 1.14646274384304347471, 0.15196780582447877439, 1.05968523827508187551, 0.22370305507448634552, -0.54705821232869433768, 0.60328701864039491198, 0.90263334892858781977, -0.73986548241509897395, 0.18428539621367381884, 0.36950257715402728964, -0.11552890280969553560, 0.46476875760101676871, -0.12714874846106771766, 0.61384363536869368527, -0.33666355308531009660, -0.23162828474755617347, -0.33928929066635299394, 0.90312344043039971542, -1.00349903122936523481, -0.52979562289732040625, 0.64924894064512594927, 0.29914794498663155320, 0.49560009878918293014, -0.75690282191751689922, 0.63561271694888132888, -0.10226733006363800116, -0.11784751124016790147, -0.06996762532048876604, -1.00957240496821643028, -0.54855913638107289820, 0.18160240309539277259, 0.23638616289395486536, 0.50992609024817037167, 1.00250201545518979884, 0.02542877767042589499, -0.06679490093382663141, -0.22973006348902141882, 0.13040610520150747176, -1.07661329291892693405, -0.38720183235438754288, -0.55839026304955607927, 0.44006561680102929124, -0.70828990435483807353, -0.05896388715292247285, -0.75497426464476702357, 0.55667473346386431299, 0.24472254869839288327, 0.30811256079273585673, 0.00000197236482604723, -0.28098041307065763084, -0.42764414926565019748, 0.07558178236232498959, -0.14117442098115193239, -0.14070093300206934495, -0.68988920837257938778, -1.01851779395660702043, 0.33782144492909321754, -0.75508815515244154248, -0.05486187355914592945, 0.08262600840677369884, -0.97297655786871972694, -0.49959519695492804470, -0.61606104849254994527, -0.32920718096137868702, -1.08105714960566423599, 0.01142090851418148256, -0.09542540138828087271, -0.04763697750941281450, -0.38026049607131434671, -1.87915884230737662008, -0.75502151899814395897, -0.07307157918894086057, -0.94383785204089509779, -0.20717085547088015529, -1.09655300612635997481, -0.51612179216765674550, 0.61592404266270783797, 0.56248427493269936850, 0.32989712549199884384, -0.67958103398442193388, -1.30372091165572889793, -0.59464804678391436354, -1.69021414739191788712, -0.85584902931426087846, -0.91136445384181552498, -0.39112498051143829159, -0.47515845193337535868, -0.39574300907052090137, -0.43819469965457363836, -0.96647125671659472879, -0.21989999453963643417, -0.28246634884997717796, -0.49831342023392410923, -0.19393387296702602196, -0.22890821463362159838, -1.22501295994570025272, 0.01345235638101005549, -0.64853745184623712294, -1.28907715151683399668, -1.65002711659909540032, -0.40958104827608171616, -1.05474119111329844856, 0.01722940175614107172, 0.48068620245888271647, -0.84223776729967814170, -0.48403188292036536389, 0.38129664463682577669, -0.72327520291630797988, -0.28220399086946296174, -0.76492520623800586144, -1.78730352805980619024, -1.13474504156034505797, -1.43048835588672007546, -0.28221543504172991179, -0.95385846256577910829, -0.92421343588049464923, -1.30514221289106790991, -1.20418570365081434304, -0.25184141227736817070, -0.28491934472555041635, -1.00462399572016325422, -1.11667095327432130958, -0.01350202673328670944, -0.99207146909789623024, -0.59456821748690469320, -0.24142768360518546134, -0.41347303218147146708, -0.73349995002750223438, -0.69259294387530723203, -0.89593719806828964192, -1.64957966739294015213, -0.37413550446177823439, 0.18169656363663699850, -0.29142682121543322937, -1.08944173636962049478, -0.97512859848313848676, -0.94978907853243743631, -1.12820949875102050086, 0.07380393328816214904, -0.73848839243490616013, -0.09721025639975888755, -0.52652271543630535522, 0.03248029634876592464, -0.87824955293436424153, -0.49169608713645518439, -0.71388053275217822069, -0.32921277144855937147, -0.93826341148163616701, -0.95531415391593577091, -0.94389656669843668979, -0.89672534953564353444, -0.12416448128804119522, -1.58987184315069751683, -0.37342250067582744011, -1.35850962185880907995, 0.14585925866775939497, -0.41462993184253482681, 0.11855978449703930000, -0.33972820160481559393, -0.74252487383270460164, -0.38525200287894567630, -0.36579857197678888880, -0.85165449626776235093, 0.24877031943338034914, -0.93915881018526592428, 0.73188311923960869265, -1.27344978756978144396, 0.52210707483959239994, -1.05908169067071211700, 0.30174715808697716302, -0.65603735793404083232, -0.23399557245197843969, -0.94989508689135226938, -0.42602435583809428188, -0.81602189014856918448, -1.58655552378472775032, -0.05530626006327366007, -0.83761272695863298043, -0.03666486301449428664, 0.37048692207549516198, -0.59960563670078270704, -1.82368635285892044529, -0.47619743330154601368, -0.78877897419823217451, -0.72313903224706255202, -0.65689178674228121402, 0.39276457371600348445, -0.67660514112998826342, 0.06077333155915864982, 0.03434277297763105308, -0.36962259335726715959, -0.14244709585351394976, -1.18131891396471733202, -0.61596868986906894161, -0.30663010082855440830, -0.19686255987910017407, 0.23884189970881342102, -0.69623821080794112692, -0.92835983070313843335, -0.66382164376447794218, 0.07095814545661577322, 0.15146280866471378834, 0.08144289896561801578, -0.91392496268612499755, -0.32033588270097601569, -0.58145092427620859166, 0.24957420588119494376, -1.82380288045068117420, -0.07750740586904930618, -0.55687129106069632734, -0.69688160400208909451, -0.33769632936226823272, 0.69217262839731952262, -1.05784499824666755607, -0.70880091017140123277, -0.55457909309685260446, -0.87554316225302675214, -0.64160680930246827280, 1.00483851238578658638, -0.08229692970874302738, -0.68460794925798251764, -1.05626376551366285561, -0.31002107885510760132, -0.06383111998911439322, -0.19504032868600740258, -0.21157799616748129701, -0.47433032032624167229, -0.48953310695065227476, -0.09208275751910452445, -0.50530043691318282661, -0.81873076413158452080, 0.12343075475096243476, -0.03607436786784401495, 0.11055089696335781979, -0.77233082513969952831, -0.25793670501722448796, -0.05855458533866045312, -0.24252691612471949378, -1.14042120263819124482, -0.52717983093701092923, -0.65183145191114966188, -1.11616820815782680221, 0.76246660632305207628, -0.96867792953436071546, -1.54696539636091245917, -0.51073321486204359410, 0.50853787519145365881, 0.45324167178429586311, -0.53324800633904934077, -0.07018280744710427010, 0.49801804471214539127, -0.96290644113068135646, 0.04326208446970858379, 0.15170693981411628726, -1.72378748490882349742, 0.27758238114435318522, -0.10812517275789169935, 0.70467977289127237128, 0.43001867719442304905, 0.55810451763491319976, 0.03473463868145259398, -1.20466651966684756836, -0.52621026238567758515, -0.41330921494040517850, -0.18480398708459933044, 0.57599605444612822502, 0.45177980649122473356, 0.87970568980822672067, 0.01591402855104453312, 0.24417758276118234351, -0.00797905488325316437, -0.14964534431297882300, -0.07978071288220574264, -0.46048248515536621683, 0.13266940601980201087, 0.67534990290186180939, 0.40343029950325048905, -0.34128315978997247893, 0.01367929418145961973, 0.33977721655374359955, -0.13800976230042910631, -0.07499106772382824193, 0.93655052504775448963, 0.18867941427696238721, 0.62136228973850171631, 1.17156569459184045634, -0.11042977739462614406, -0.23514044308736736788, 0.17847007965549699371, 0.80564831118028923385, 0.46521312498358913246, -0.13563028361698178936, 1.05479197323770201855, 0.66953997067834369883, 0.61894681047085708947, -0.07694332551318933122, 0.01141656220977857461, -0.28492216661152736101, 0.15583592108799326770, 0.32118339039020177283, 0.28462770473423126827, 0.70488432267856904190, -1.13465530869559350080, 0.87639083540584761245, 0.75091481495341771968, 0.43880113814211674228, 0.04920557228459673804, -0.03969356694298381560, 0.43077225178344236145, -0.18566331165069371867, 0.56362184287367556035, 0.22300651095471235585, 0.12369199611610418033, -0.55761821996417992864, 0.36639089410500647048, 1.07840451232046841135, 0.63403326582211005924, 0.97253991682721818712, 0.42707779378515592361, 0.43198628273395783816, 0.11654438960002677650, 0.37845037836301131939, 0.43123491844708461418, 0.11227097113167194831, 0.36186131950886524233, 1.40866417117738063070, -0.25951395997535564586, 1.16182568922902151876, 0.08011431456262069206, 1.06059081548725742117, 0.30776003734953888413, 1.43015578986521041394, 1.62465314081505374233, -0.61061676079074744816, 0.71363876767599943030, 0.60573964386053702480, 0.04501264264660331804, 0.67258651933508406540, 0.25498372610181085918, 0.83199582691765594511, 0.00489944780886319986, 0.28790234553619159508, 0.41040737167903729166, 0.30807225918375424900, 0.98433965796534772519, -0.16627231716854828925, -0.01709653686559187147, 0.36758620853736684309, 0.20386455641375417214, 0.49082907878153925196, 1.10871074417584103422, 1.32436528465590619419, 0.82568597636396257045, 0.40321049160437416603, 0.72101281230588831761, 0.65823810010404459891, 0.33562087969121057185, 0.74348781998374757762, 0.14730937863433984925, 0.42918147767376546575, 0.01030627058720734457, 0.58337088113365276332, 0.33647594839339250994, 0.82704347978958048238, 1.01128099786961556106, 0.26846003209905699993, 0.59276079192040886934, 1.43127465523472219289, 0.13761738199376499781, 0.84912676911295803528, 0.53386175840433192086, 0.30069907013969443543, 1.05280716994287448784, 0.75533588968334719471, 0.00992450694737112560, 1.90975241735985323821, 0.57712831748943671517, 0.38916893930521212486, 0.38479937659197793742, 1.04484374836825377741, 1.08653615107294165831, 0.96492658140021947411, 0.62882004594186313717, 1.34911972407532188001, 0.48609090908949903476, 0.28258976799153573412, 0.61639651950399798608, 1.25416752402087583462, 0.44975430296887247694, 0.78614850689741611056, 0.76177513383858719287, 1.37912588068389618812, 0.73580787825345161934, 0.47423507758341754137, 1.65895417088199748079, -0.16807947511673104746, -0.23899548676142823300, 0.78412675480287574725, 1.51031134394580868197, 0.47550821546559290942, -0.66274178768636171633, 0.76611539330760869415, 1.08013466200487928681, 0.86052186266252506108, 0.17932103478224814541, 0.86952824793515159918, -0.04946550480718925336, -0.03559415580552616198, 1.02718362082774250510, 0.42358236756494405117, 0.61558074747475888255, -0.01507516121431662359, 0.80743845514452039502, 0.43885993903706654873, 1.79982025208062346167, 0.98631813238239507413, 0.35200123188477289160, 0.77572279345978034648, -0.81424577512997364792, 1.22888165269400473356, 0.76007138080354352816, 1.12716338233682589198, 0.55809763361545672300, 0.16527002699172177458, 1.17419400675826279823, 0.09298151086308259483, 0.87192582716210820237, 0.99922591802864024757, 0.15868098447366191239, 1.29212142689289244402, 0.90880265730764109300, 0.45245119805135280977, 0.35022420335567899796, 0.98514642917703532099, -0.65228199156064858855, 0.64602419404499200617, 0.33796985040007954115, 0.52006289035073771654, 0.50328007743885005976, 0.74721579426532702684, -0.07257496612794289703, 0.26644341769301815859, 0.86535745983384992908, 0.26093936045179333405, 0.78591846917599250677, 1.04670562594607163831, 0.54408885202322265773, 1.04374355446341948728, 0.01448053160876916490, -0.12336512160892837420, 1.33829835282016840203, 0.41014621960765584863, 0.07151320763395407898, 0.06315403377643458160, 0.87760739253150088501, -0.40270703817933400925, 1.62541997882980115975, 0.23105202957141268705, 0.82945029843189144181, 0.31530198928856595364, -0.93439884537516615381, 0.53245710630627096638, -0.13220836652263567856, -0.14039005674538701518, 0.33761448528877013864, -0.12885675302935967190, -0.05486578699551059035, -0.10059034515546089184, 0.49094767560002661178, 0.97932966151955558320, -0.35377620014297556095, 0.94764204403496032914, -0.15175211143681649473, 0.48641378456256734175, -0.21488562819823497918, 0.26557592210426511636, 0.71911080516017067410, -0.93573320027645268127, 0.33673570907152816645, 0.33838099682336042484, 0.25839845671671385352, 0.46392199132560685282, -0.06170274407260842309, 0.45125496781321339235, 0.03798276460498456464, 0.83889434597459433540, 0.13598356975982406336, 0.00709358946511109978, 0.91445532971103604680, 0.02117724551404381872, 1.24458098478484435745, 0.01412004334099348224, 0.12577054954001415377, 0.15478161151970201703, 0.87996170162769549172, -0.11770775487446533125, 0.17275279560723083572, 0.20572037557985500822, 0.71209880093604649431, 0.02378765514589539776, -0.48555167812791322213, 0.43780758955623610973, 0.90342315139553897030, -0.79323400177754810336, -0.22834180754702432248, -0.39713355325044263910, 0.02061566332444270122, -0.37384799399886209370, -0.41930019978679000658, 0.05839876839480294402, 0.20985490209849821941, 0.21689884992701180533, 0.57998493012774454680, -0.57637189663675081341, -0.24937955312889742432, 0.84127957181620438565, 0.02935603413575149645, 0.12844868676260381979, 0.65936858139843446125, 0.59293261881326275731, 0.25345294733045298230, -0.23345801856603873303, -0.37325906607648334790, -0.51520941799471953182, 0.01286200581951449373, -0.26117478904946134222, 0.28021983223729518553, -0.28453579786852789546, -0.24832102937366512529, -0.21698654885667184589, -0.54059938554171138581, -0.45955683104497602187, -0.29534814566928646595, -0.47637660079027094318, 0.90275079388758028287, -0.27106109697840180539, -0.18380375345273347598, 0.21608324604203818509, -1.03067538748592002484, -1.28985131264206964730, 0.24035751185528542173, 0.07455855047660278623, -0.01993161422876671418, -0.47736316518498655981, 0.09717824399787458600, -1.02776954768996620082, -0.82512460484267169392, 0.30415742658326183623, -0.08054891079553022637, 0.21506022653570414094, 0.95117748387746359207, -0.90900583688350788947, -0.64122606625969535976, -0.82727981334471878405, -0.12816819328427173086, -1.21502755829493436224, -1.86853369328560425799, -0.07558089092910375006, -0.04798156726846242170, -0.03697257963756905852, -0.45381818327456824314, -0.05803337753596721216, 0.22601664350690570116, -0.30628227407516245284, 0.12148091646101727425, -0.16426715192936880428, 0.16666531715063476193, -0.35313673947063295433, -0.46773944439302844733, 0.00202025193480370735, -0.83857713770514608065, -0.89257678558882602715, 0.20326647381167811668, -0.16731350370167019648, -1.17462334077388041997, -0.00192568738810283691, -0.85650923452381799716, 0.22781698179847181818, 0.30410661681344092200, 0.00819386393686055703, -1.49539383971242401117, -0.46023062362172850737, -0.48414054356937058499, -0.05859578892365663316, -1.03301482484101159187, -0.55398645807113455763, -0.12737274450414787230, -0.59714963535922982452, -0.15378570656571599606, -0.36256605605045499896, -0.95466929931069732795, -0.83297067490645249066, 0.12056196880028446916, -1.42427084899784639482, -0.16362690890571318647, -0.24102879830628443081, -0.31944184555979615592, -1.04515093578857110046, -0.41642582269955685792, -0.08738115485798197968, 0.34483286738239304903, -0.68649453678271388224, -1.35802914298238430746, -0.20795471933336845405, -0.79037930050703564255, 0.01956299142109785283, -0.48382951791467149194, -0.92137070916707930479, -0.65053555158860398855, -0.70103859922000899552, -0.78431024565570961471, -0.58152929527230268203, -0.16873178100282448799, -0.66611560319328422253, -0.20057331163541969321, -0.65263203960670945758, -0.39697678844769512807, 0.15916130134325412460, -0.83085329918012595929, -0.51713130054995992690, -0.79530474755227398287, -0.05852715218530835095, -0.24234245254383013357, -1.02612580102125083847, -0.30162026568873445687, -0.28006314043562424709, -0.19651626569293123570, 0.39711132419236550017, -0.67261008516829401405, -0.27380875613023281101, -0.56152524599762210222, -0.36687050062712178722, 0.12214069654505865792, -1.38462057189789433664, -1.30222182044560819136, -0.62404462769936375199, 0.21925620835662440467, -0.65351763582521482654, -1.27903785327817987039, 0.12282396367900916268, 0.26252047888837382050, 0.19187615740778485307, 0.07441101636377811523, -0.32996650280539435718, -1.01648828065795671094, 0.27083059940367726348, -0.79306138679206439335, -1.46104108579794078615, -0.36066383273351604322, -0.14672606466813326032, -0.70955026086608630198, -1.23224622066775446605, -0.31784206698791683809, -0.94298241282541428454, -1.35663862820239455864, -0.27592123870227369675, -1.41398933881977439952, 0.16202038336831758869, -0.99754046185378197897, -1.87524272493903176873, -0.28889527573184980991, -0.41019363291498289747, -1.40792397210534359075, -0.85941614424177603659, -0.51317626221622680838, -0.40671209830126192619, -0.51831695355261053759, -1.16739702636813968972, -1.52887868576663965570, -0.25565591535825732405, -0.54466302423718016001, -0.38488550854554642866, -0.44562320270996169658, -0.26245515722232493072, -0.49823845632144531104, -0.31510013551152615818, 0.30378194926921031627, -0.22389433756825088961, -0.36305783661933055395, 0.28422101220634676810, -0.98028104743639987717, 0.83052848530922684933, -0.88037649685074104156, -0.53702217370343052760, -2.31798834295878020129, 0.32305546664285966774, -0.61105064537945330549, -1.10081014507531804725, -0.88355487218340389433, -0.84937484568863097500, -0.47115269080145788516, -0.65592623556110918326, -1.04677659646064569188, -0.43731618303209740528, -0.20109869618667891578, -1.23600712709541049605, -0.00517788784688949510, -0.31994648487537946657, -1.18205157956707163081, -0.58853090946501851466, -1.39252374219917074427, -0.25301883597005003779, -0.48462439608794211088, -1.33299380610334461394, 0.19234775151522492953, -1.32672597936276370767, -0.87847617781314912833, -0.91441416817617582335, 0.32294331833573330925, 0.15226402908241498668, -0.53296820594977156738, -0.63420992498709094498, 0.24677487565345440634, -1.56538935196946793482, -0.12204722778163040742, 0.27857609735299460052, 0.09307957780846093376, -0.34858001279214290147, -0.88617027225337130503, -0.96611707279318337349, 0.04326732740884914330, -0.34798742696688406140, -1.21446584957774650348, -0.03054976404243353594, -0.71631322227113769507, -0.46396961714097473273, -0.17194456753649167702, 0.32175070937236627255, 0.20625173274091851416, 0.11860886336807469066, 0.78827973043268884101, 0.25840317487069053293, -0.37708405940148492785, -0.24108341504779765008, -0.25053711525467514676, 0.35156775921069066682, -0.14900049379036059416, 0.69012884849533884246, -0.50322561899931317342, -1.28840587938485495734, 0.34969651199160806110, -0.05757674458791917083, 0.13802702178745770989, 0.72535260411071977860, -0.23645310959143381324, -0.72931288363219537452, -0.56686083922175689587, -0.41494948958014543638, -0.60123435254093238189, 0.15228234937320705988, 0.55127828129683698055, -0.80486636641862974173, 0.26813173605066670246, 0.04386317506322691406, -0.37670189687568639281, -0.35490196817744290581, -0.74143900121055306141, -0.05321219481705496968, -0.50321707396384851574, -0.91207261758764557324, 0.43787711451407890229, 0.49622349084778061279, -0.44032240243907155852, -0.37634818764526267731, -0.03964799084847860783, -0.23380336844063226431, 0.33474394982695609979, -0.51919128174846918000, -0.24908440454621466609, 0.17933657086140214476, 0.81345418242613665161, 0.56496710044017506291, -0.74551203320167203081, 0.25806413907239239558, 0.05625777419302370463, 0.08822205467649174571, -1.06230775953603706618, 0.25869941493341158667, -0.47274872444331927923, 0.66255465839633809200, -0.38243476906679840788, -0.37066865741184684691, 0.22154107363772707062, 0.26315721272599607028, -0.49123629140254798653, 0.07351702553482492020, -0.01146649336196853275, 0.21700010794127849723, 0.47401521063368312614, 0.89311177020357168654, 0.07953308498025868367, -0.90331509112497365699, -0.31152132935978987804, 0.37243815529088497573, -0.28289499213999502736, 0.37469828622346751379, 0.14817974747163292770, 0.20437834933217560729, 0.00531102956850060259, 0.18931191713613770844, 0.09881490084665642271, -0.69935290016500539778, 0.84085473948595423899, 1.02128636298967112594, -0.60408367003135632345, 0.21159853022015828738, 1.08109505786021919960, 0.34252045732304492454, 0.41879371462891556988, 0.02294822870683188576, 1.62521892881429708133, -0.43545499638636708806, 0.25319156676216342250, 1.09981831590957646050, 0.10455178598353145891, 0.84305670439883229061, 0.32442748048359343338, 1.03138096788507738211, 0.10150472227551410320, 0.10086361411095529084, 0.93475604888389629554, 0.03562126638742663109, 1.13523186092234240618, 0.49888263724270259791, -0.50903159320404411936, -0.64403606566861015814, 0.13121607034699123151, 0.84400409540417542509, 0.41898506149208941673, -0.48499582489532627738, 1.12283488790735952456, 0.19123839442101228658, 0.82335948611366382988, -0.36643255272108588017, 0.53364551028012885414, 0.45040874613087966694, 1.03203888689248990396, 0.62764239305754587761, 0.99269936518173762519, -0.25616930375563184974, 0.55937251732855219899, -0.16533257982539756314, 0.19014572735390972147, 0.88870991890885575604, -0.37247525275100051845, 1.14229778849182084244, 0.14516576486334242801, 0.63993453744662454064, 0.16666046026374936595, 0.79841389781111415935, 0.59011361532491335424, 0.24229411748464740883, 0.60308757664594092596, 0.94536348956654658870, -0.09151479912804894035, 0.05115964808768513894, 0.07549507082254997004, 0.23924249711497064252, 0.23580669512278595867, 0.11175406822094513526, 0.39212787300322138329, 0.26248448874330421177, 0.35198520059636662083, 0.12864073127174008304, 0.01764971863331066260, 0.33206000921842226958, 0.26994419989376672309, 1.36588212366423089605, 1.02209422584547304780, 0.44314633908345246738, 0.70749000774926140700, 0.68734426395839465229, 0.36376495865646368832, 0.48833361163365346380, 0.57576692294019915508, 0.44814907123732067307, -0.31746551946133561017, 0.26967089337041427743, 0.20739754543659805197, 1.13990932213584517108, 0.67287890422156571013, 0.70383424830680996198, 0.33804740133489935561, 0.41271340359150282540, 0.50070346811109067708, 0.71949192256658023314, 0.79054510433256330870, 0.27478839314136188632, 0.80362508820724198877, 1.00249843792954163035, 0.72412050476898448537, 0.86504889587694067110, 0.52469921360704363522, 0.08955475113184496738, 0.69297924197419880077, 0.86636474607677471660, 1.46916645379341925803, -0.30645807098780641908, 0.10836385419480337733, 0.03169527638655100787, -0.00244593436680329290, 1.29095835941052072826, 0.77806676303842015052, 0.04665214445279530775, 0.93834807524217944685, 0.20394051966935589082, 1.29608496888845903250, 0.50656796676010773517, 1.20548931477196941131, 1.21195116984346684674, 0.30208212886326557545, 0.86355166472043976000, 0.48170411391450146388, -0.18606278757113003941, 0.73118233798771137266, 0.24082112613499284670, 1.39740785556947999524, 0.14823157042600365596, 1.10374947451610561089, 0.67141332927537233921, 0.42896651459951817831, 0.11585867557291368302, 0.02025196982308774540, 0.87949983088201522108, 0.70722515400354080128, -0.20938793736938265955, 0.92721605355475689159, 0.74233882407211926324, 0.65772508458572453982, 0.23801744049853934548, -0.06552051698432403981, 0.48214578227044124770, 0.61780375643297391619, 0.92490262780595755565, 0.03983626673991025324, 0.45618511195444022555, 1.09168723925500144034, 0.11694088061931323663, 0.48990300130920061417, 1.15759314421506820381, 1.19256185829849026270, 0.88067457574187535396, 0.56526514196434618498, 0.09871458637333468955, 0.42667472937270994882, -1.14239965083009087365, 0.65015268450762420116, 0.22159227614514509508, 0.82114077204793445297, -0.08914757906013109912, 0.80454904643102409523, 0.19532780902355337260, 1.00021273341871674845, 0.25185440368758438279, -0.40682085230824821531, 1.40562534640960978827, 1.10826615208296530568, 0.83883930213873303394, 1.03431041393520439797, 0.55571883727503657013, -0.65308479789433471208, -0.33622646710646331147, 0.84415917149898889171, 0.09643757256811769096, -0.07541293080151706407, -1.07290905313022655321, -0.33661759697177462414, -0.29123950769414819195, 0.73591664128035283454, -0.46072530910093945078, -0.34073993790852791230, 0.41425507394483740420, 1.02900958585784496613, 0.26234424608285017433, 0.41211419243271690815, 0.41212909865620939165, -0.94962338405640323025, 0.99300708254185576784, 0.05133483566556823474, -0.27641188138071348668, 0.12076965043542456368, -0.71027082055777723824, -0.17008717674826184796, 0.22506173231091711195, 0.19985767062765538715, 1.17527470989170756965, 0.39654977464903051754, 0.48356107451591201496, -0.06607842080481310387, 0.09965419835180820696, -0.28670631563197113145, 0.73701493699806586157, -0.27730805575693240339, 0.87992804415498726556, 0.84166855232984705282, 0.30432859440612203272, 0.16827061374703256025, 0.87975958857416058922, -0.22929672353222066428, 0.28345541586034989079, 0.02489144816407777572, -0.18775620013395211139, 0.54992498566390224379, 0.55572000899556439624, -0.17824436200059323721, 0.29105047728690153219, 0.03402197709562670286, 0.13380911274415691059, -0.53526837759219625390, -0.21510094082688926975, 0.06717086956184459279, 0.55824457415600592380, -0.12019482904848685645, 0.15034725016989014534, -0.44357512515051933377, -0.21119268540890215324, -0.32402799132275117433, -0.17887897790532275577, 0.05494158411650425233, 0.00320956963590604896, 0.28879724444747301160, -0.78005328575492915988, -0.04742502830407227532, 0.39617374897024310032, 0.36527337195289077965, 0.14588733467161385526, -0.28665282290408511390, 0.30363897230356629020, 0.76460386513654454710, -0.81368102542418940804, -1.05551943007042869915, 0.09219118911536607064, -0.10075265477477876597, -0.49179015100809569949, 0.41974325335623718924, 0.26175581773169531719, -0.05376893920893044410, 0.22792777533156105552, -0.81412859381531332836, 0.31684369243579268982, 0.41632482437946183307, 0.08754396855479915085, -0.25978643697908798682, -0.30082077821408703677, -0.27416711138731719721, -1.78362598438496289255, -0.42477078257004541317, 0.00637065571055872581, -0.14571606253762264815, -0.45543314552439717602, -0.37877538227685392247, -0.09791070595032150270, -0.71431314014714997196, -1.05624756249894269367, -1.34046659380042343379, -0.04033521951705290598, -0.74943575607775614333, -0.20352936236396507175, -0.27489718564271797829, -1.10372538793186869555, -0.56021284305556817706, -1.42461902276996799088, -0.35214474830182068699, -1.00783153957391191646, 0.20157322080873690817, -0.58576257597069170124, -0.37360387280661777609, -0.63432515325522753624, -1.01837910684413146534, -0.35962038163294862692, -1.11918458002543363961, -0.04458972227554364176, -1.24396556109800049406, -1.10790914647211291566, -0.14572239056462454876, -0.16197454039813338755, -0.34868822285896788893, 0.27162211152782872459, 0.20715257262398423244, 0.33933519706696019247, -0.27628986578656439255, -0.09287513491886739692, -0.49679970174431287155, -1.18975646230093867750, -0.88516693221417197535, -1.03141661767107550851, 0.37217509467067433349, -1.37148824762082122142, 0.18793179525859465828, -0.71820423438085301271, -0.76852167211303457073, 0.32706468142289502055, -1.36836230015656612480, 0.02922063338038782820, -0.65813875584104664096, -0.12442077705411563882, -0.34305938230134658262, -0.23333178082270461529, -0.37306429676836350140, -0.04092468387457665058, 0.31255411461301796372, -1.60405862616270944443, 0.34222929873389662525, 0.67231143733789611172, -0.70438477621972872544, -0.34566221403666380629, -0.09570519956572126619, -0.07450025564053441851, -1.14526089871265779330, -1.42131572003562967055, -0.68045062232256492418, 0.08944469765436158237, -0.45130944243283344086, -1.21328164049858422402, -0.69153158593547703337, 0.35325314004155072833, -0.48004608210528976198, -0.78652647290389232637, 0.15651564816914298461, -0.97397573768256395788, -0.35781061022609844668, -1.46498138383472964463, -1.05726014031562631779, -1.03458112299587923211, -1.22280762206606730125, -0.67537298712195137362, 0.40305984166817721004, -0.89357688471113116968, -0.97234099327967504767, -1.36258121112159558130, 0.18603678492171449310, -1.06681725474310962198, -1.53258362946917348069, -1.03799366368870460420, -1.50017974636031659053, -1.22333163760278762489, 0.06179211366646586434, -0.67814269555239092835, -0.57257295371181937682, -0.91726877349082025503, -0.88412025857653631089, -0.62615655017233684543, -0.92900538937718568633, -0.48541311804440628874, -0.80780092572289907693, -0.74348165468614058060, 0.05803919598879703212, -1.37539314991356409479, -0.24455137833156298521, 0.62625425397179645959, -0.87935040201533531956, -0.02252054383442969598, -0.17898484168207162703, -0.30958045609644962992, -0.14397896033233720248, -0.24737574472234397094, -0.83706194764721009172, -0.30351018372377597254, -0.98084929244325569364, -0.81818255675163797935, -0.90614028050478434562, -1.10139089786435784291, -0.13500574278023552699, -0.80703528485501674350, -0.77946696177283836260, -1.11934900331902298376, 0.02010114358001080515, -1.04486559601635420336, -1.47004059843049517120, 0.09807008414678597408, -1.59881903722587415118, 0.67955528684140809048, -0.53483316154329751946, -0.89929703369981384142, -0.94911264412567952764, -0.83382776270525327256, -0.92339907898829742816, -1.00561866214440343015, -0.04431590214284497620, -0.95277431744654661472, -0.56490568262500950869, -0.25304061885872830562, -0.87102249165792156038, -0.55005028955563650683, -0.64013603572956301324, -0.95232917950393636275, -1.05289227803016838259, -0.58977989545389963855, -0.69565371946179754836, -1.12917484400572964987, -0.77019394741535907034, -1.12603775435224107149, -0.55907100054499903052, -0.76291182660757805856, 0.17676209023581990198, -0.01977188148449232630, 0.62449920819601301147, -0.09864615604017235029, -0.03504845453481320039, -0.76991823129240555268, -0.90725094196725730722, -0.52748087549770328319, -0.85583815902586701796, 0.16204231854666634183, -0.39724774584478084183, -0.57186689367800835893, -0.87775873577605811882, -0.22019771735449625294, -0.62338201717764663012, -0.51698371425561839843, -0.60948082553653282201, 0.68063745743221693019, 0.69527271207357110860, -0.24478918226165147232, 0.71709176034992538895, 0.67885024243637770436, -0.40967895182345165361, -0.97654156791675106319, -0.36340912655531776299, -0.76994441709271632668, -1.11242007073366955261, -0.44551855551581848180, -0.24807257703208984712, 0.09780316993329635222, 0.28596618049002930917, -1.08974379144683286569, -0.15680815365969380526, 0.16880051724564826277, -0.44769187053919601293, -0.31330443753300546428, -0.60211234523471990343, -0.66387271046415108344, -0.84182416229598588941, -0.49063857854392523006, -0.81743679449546802207, 0.70367175352337119065, 0.37782797062068723681, -0.07122916772005344344, 0.63736333929857647007, -1.01897126249321545721, -0.08896529951917886425, -0.26339622511603966171, -0.70457944040251607731, -0.20940259942737043941, -0.30375628262243126176, 0.56514865466982988629, 0.28232018011344484076, -0.04816300885540757137, -0.43971625194565722161, 0.71991859028890248240, 0.20275151304452071477, 0.74126468261480560340, 0.39475658129424906484, 0.31508341605738754421, -0.78636000759201918342, 0.83660021199207879050, -0.10754551945269591462, -0.06002974185331036439, 0.14117021505777804791, -0.04274000610469185640, -0.07364857673125785320, -0.13961340707077943279, -0.21583287560766067914, -0.24903809470744767141, -0.73014090345506632573, 0.17166927301545956142, -0.86898463071833953464, 0.43921485274729277215, 0.06841773020309892261, -0.41665437677725214449, 0.40817525859246106545, -0.39250824694225083800, 0.91239196308732961604, 0.23915201319627382714, -0.18449948597086349156, 0.25848721397091256602, 0.14114415684927178463, 0.66148420873412805410, -0.10023438389445323005, 0.12247404462545387338, 0.19572022911443037740, -0.30023939311569830535, 0.34217159880489189661, 0.39243529299145485378, -0.10676013223822111708, -0.54421854577405148756, -0.68757205329242243597, 0.44277820230724296291, -0.58852800870793631205, 0.33137224751337124307, -1.47060426021692181031, -0.29720603118010591182, -0.32281216352301767003, 0.18678932342762014085, 0.88741442234637024633, 0.04612629218569361256, -0.15326759886268606636, 0.12411758927497441862, 0.41413227152574472179, -0.23094493524693449071, 0.40312162991418054592, 0.02527896996043285660, -0.17114465894903635457, 0.43254455314991324677, -0.20279119949175761217, 0.38120891962296016287, 0.47251274544755378360, 0.60045706233030959886, 0.47210385889387657121, 0.22390956834195380187, 0.62360484853951014816, 0.30516032691645933461, 0.47014843785041515734, 0.63012544038922313483, 0.31594441363733322126, -0.41683074116598789471, 1.26079638029829599333, 0.42535157573735621339, 0.13347793372683305901, 0.62664828007571982127, -0.48487834415599545546, 0.02908969232912850256, -0.23566151359453391123, 0.36704483933373532878, 1.48913540743809158506, -0.29650851464975824801, 0.52381292021975445206, 0.49199656746766529114, 0.00925933492662506241, -0.88828343195507586394, 0.09927216749639838911, 0.42195267711922224141, 0.48043922770029767744, -0.18091940454152027895, 0.35802427275482018310, 0.06348978386319875655, 0.42015538627664361648, 0.06080090497266554816, 0.68561065054623249182, 0.51477820622770120718, 0.13701862246486157249, 1.05018596737236569183, 1.35650923707074699820, 0.65789892509302316803, 1.29441095023460595037, 0.05583016922617173305, 0.08652508209104631254, -0.42140517514673303534, 0.34786964129985775696, 0.68857266129729999271, 0.27628453733613445031, 0.53298374289959993266, 1.17715130292177061477, 1.21392916948041174052, 0.60516355612181316204, 0.00594305539096273705, 0.56910215661030383316, 0.85995564949010117051, 1.57148701818964786625, 0.07067870557924027519, 0.10478822089842088072, 0.67758754031034507115, 1.03463930915615098804, 0.46259515925193706032, 1.04357640788339622873, 0.15804205155316963793, 1.17333552335239010844, 0.32216447541563736356, 1.55482102005805700173, 0.43660631638378621933, 0.36537396190936022311, 0.76547056535247748332, 0.65333890160335816866, 1.23196275188733794792, 1.48449146318310298831, 1.32718933888230106177, 0.25710269382632122692, -0.20239994509206249429, 0.58881103814617874459, 0.27945212095685845188, 0.45744758200501645096, 0.60177335865411529792, 0.56904338167445389640, 0.82740854033376320942, 0.39503281258946820742, 0.05641165853020468912, 0.44602992946019459630, 0.39266782303456304604, 1.05728068323327417666, 0.75311803011758704507, 0.66295810157689427644, 0.61393295874379727461, 1.82314818550734925040, 0.78020249608108160899, 0.32705411426178560008, 1.18972418005913005423, 0.12206221860094734755, 1.08157983398197465874, 0.98503049136625653315, -0.20581179301690077921, 1.37467958580911009392, 0.23954437559728075646, 0.97015646510322151030, 0.88256466434257474507, 0.18451405889764105339, 0.37470246509026189363, 0.10236053473103551292, 0.18857237319300035328, 1.39242497554786970326, 0.34366513529933984117, 1.14540959562590050069, 1.03538371053131994515, 0.41899197828594691995, -0.08700388459246077844, -0.42595902510490568638, -0.08872084496678134258, 1.06339940851660186283, 0.99282346458758963248, 0.27313562355923326486, 0.20074654629991695032, 1.26238399239057930146, 0.70819101459772038076, 0.12808991481619563801, 1.20150636004724109540, 0.31096039344027737172, 0.54822558596788062424, -0.28553348766997477259, 0.43943797897860309432, 0.35298431774765826052, 0.56168723788704677524, 0.69054170143284443473, 1.32191199000613002212, 0.17045061835865987776, 1.08451327200174363341, 0.13699893585198247292, 0.89041909814809694357, -0.03217641200928822443, 1.15405298388665378262, 0.83049386613731468465, 1.28265010749756891428, 0.37311555503958682145, 0.39561099793580895012, 0.79834756261196648808, 0.52547386604089929563, 0.57845049325359076953, 1.27662158641604372988, 0.46308279461689372258, 0.91362150726508328802, 1.77746217314766097317, 0.07946539609826769324, 0.40852170035336793363, 0.72221074215964131682, 0.44363245612808255292, 0.66565087327216798663, 0.91689121267818707306, 0.19627456375391938437, -0.50009118869507807048, 0.43873757383322009584, 0.26352632214148180578, 0.72057978228862218373, 0.55682384706536813823, 0.16034036945921240069, 1.27492659048820766721, 0.08951552576629778768, 0.70346030790888902651, 0.69237948532621662601, 0.22745618421734686843, -0.00929141140843875446, -0.09208557254575083784, 0.66930850862262492029, -0.32338979697071340569, 0.81298571606961977931, 0.36029119394553121847, -0.01390812877280495075, 1.51394404480890165665, -0.27960639551938698544, 0.31840963265703531304, 0.93634580310330139952, 0.01186428651009180379, 0.26194802432604735731, 1.06676702693735081517, -0.06871467352952254082, 0.70395789998197333404, 0.15709508230453017585, -0.04422817873895174001, 0.04863746058904216962, -0.81385565633573553335, -0.56665427616937136257, -0.10338051488648389298, -0.29045619881251505401, -0.95413158333271519407, 0.45740218647590158429, -0.59042494714438631398, -0.08650437235024874272, -0.01999292646159464559, 0.21493825972990032724, 0.08005146213910688258, 0.49541520461585109025, 0.30795638135567060534, 0.18080378046089801147, 0.64455244330570604649, -0.21422390043207273358, -0.09158477036924229930, -0.28291236360795690707, -0.51569160628441723659, -0.50876942886347475969, -0.25949752026029843099, -0.68159527787070450788, -0.49324675493975495000, -0.32829181954718128500, -0.64970106113975267803, 1.27101609108677271465, -0.67095471447684262856, -0.77097968775861491775, 0.23075830071182265590, -0.14475472604462152892, -1.19748509916709444667, 0.46985715638112324211, 0.31429718934252848239, 0.15650599804572670748, -0.27383976700790413350, -0.72748362485000461763, 0.00922146531161965183, -0.64497187103838160560, -0.06501102186795156002, 0.41003777934870661026, -0.67285509578310498391, -0.46046918650052670596, 0.20836853325286736061, 0.44125410094283468165, -0.26545758743829200865, -1.27015398725786710088, 0.02098012999196843809, 0.49598964857338084800, 0.72045304653337827183, -0.79462947736439604185, -0.01195357199722901775, -0.74469296366616088978, -0.37456682064831242629, -0.07015470077748779676, 0.00128179893152863267, -0.57137509129217556314, -0.35971620661556963361, -0.91880199772353809973, -0.81892012783492496020, -0.48387913747624045069, -0.41004258227987194774, 0.14806563191557939319, 0.11198147604656022436, -0.53903525752391945325, -0.25627211371007896368, -0.56392113920449582487, -0.94327256823567529054, -0.03852763713116055810, -0.52281773493698591171, -0.81688357704619063426, -0.27379720486400743740, -0.12543298653575277180, -0.85374170525789794262, 0.73804439779066033367, 0.17740211975481312567, -0.55233381559691696605, -0.50670746523802523420, 0.14777782161748953538, 0.31934752953568185108, -0.59869075222917311763, -1.95237520902476080487, -0.54172666092645005431, -0.58295375695149642681, -0.43702709700270014892, -0.62011414630339289911, -0.55941353458510223451, 0.10049472772340767301, 0.41253058281021282028, -0.84054165657697321734, 0.20217791294070375230, 0.23926403020173597769, -0.56539057057152353192, -0.09039897662312967874, 0.63842582069973441428, -0.77193740845356528801, -0.20836303098123540312, -1.25353158722216373455, 0.30468108464984233752, -0.58029690726803839773, -0.09491790885065631844, 0.46622869984705622715, -0.03341346536636530251, -0.13295273332149099010, -0.70443508110925090460, -0.41674921622420058043, -1.09993948443814071325, -0.29869551420075723946, -0.11541497896918634369, 0.39125187155407337158, -1.07183372517032537985, -0.70806979522448143527, -1.26213509083624408902, -1.01629253822374088223, -0.42676808911622715614, -0.43933042642304004355, 0.07734481186366914951, -1.06028621046638771297, -0.61152791554962326881, 0.09712029610815697822, 0.08578016512470798549, -1.92725159512051669175, -1.34020840449938205552, -0.67872109226913990287, -1.10112722555928677082, -0.50298718278944887228, 0.06948856992044150083, -1.00796869436586278290, -0.41355447176456405778, -1.02910675439697096856, -1.35691447502734785502, 0.31472601142674039298, 0.05340604681242266949, -1.20577817629285699574, 0.13845189960605308599, -0.22363853024173008244, -0.48602406924071622774, 0.23800569426159345898, 0.04352238113961470756, -0.60309578103138483662, -1.64019700186444761236, 0.54821292855282188761, -0.67965442679793519609, 0.55859902185850440759, -0.40279816368564691498, -0.15987066085486267841, -0.73462352342451375797, -1.02948766189166640572, 0.67975726484913767678, -0.64239155921676938110, -0.64135190850809631335, -2.08330406537385393051, -0.67955917587524161672, -0.38265101928961564592, -0.78546501691669945444, 0.05683480744395219553, -0.40500528108451822051, -0.73733121585866201464, -1.59767398456508757931, -1.23918658976696982599, -0.10408274618061236216, -1.89385269557048729894, -0.03220365286357163015, -0.53686786151652687860, -0.62298983302215926727, -0.00539883654937600710, -0.18627445673671627624, -0.55231729687408293028, -0.60369866732770072204, -1.75145214532333159951, -0.37246522976157797613, -0.03532385238183133325, -0.82487428699918674546, -1.45131916186192211349, -0.29415251029121952664, -0.35229080597388040541, -0.88961670417165872315, -1.24161026942514163274, -0.75418483828400983970, -1.44689574640382412341, -0.67685188139538998708, -0.11017580738198612389, 0.38408137589249868871, -0.93683826584540597082, -0.23111053398240216650, -1.36225635180247994782, -0.06565370606701925738, -0.92941356963181176454, -0.36519151970683205732, -0.53594266141073254595, -0.07060528837489810083, -0.18960783108972789623, -0.79215626563262331317, -0.45848985524916785472, -1.41105221655465351560, 0.26142480260117773039, -0.65365639445028067556, -0.19953229800443189612, -1.04171212293487691625, -1.24864650715144565041, -0.56622294308908582838, -1.33435050316525716241, -1.01217532000348864329, 0.28512282101317776295, -0.93248586969622859755, 0.91501886287460942881, -0.82483064292539975959, -0.97445100598562106597, -0.23039279537400858544, -1.15758432365698715749, -0.01399181265754834147, -0.87893787202050088769, -0.76732529355591316822, -1.13810505382119986351, -1.29975205056650056079, -0.73091835081110589911, -0.30792088443628407024, -0.51544366391096629876, -0.46524921415381187417, -0.32941422904421996387, -0.79740594948699816680, 0.49960859247748223844, -0.33629618581476922179, -0.16847248244911192105, -0.39204184562809973880, -0.44248605466898860428, -1.29488165968751411938, -0.29773221458928150751, -0.83416948918946909330, 0.88146437261706722666, -0.37535875776168009521, 0.53207300175529248509, 0.38151481280545002095, 0.60435224350662120063, -0.83050243598551609647, -0.90841620366861186575, -0.07640890037637113053, -0.64760208098319727021, 1.04565147289446835899, -0.31591888195799527894, -0.40956593833390897430, -0.06282154996028335714, -0.46526512139339515350, 0.09410657337959671409, -0.79739765555173180989, 0.08116984837066093528, -0.29972590111691105319, -0.30066385735906631105, -0.24001334944262048277, -0.36146682771750465735, -0.00137771963931862529, -1.01079792578301619344, -0.53687193643200736837, -0.71249958568524673908, -0.15295368044493981574, 0.26934274001835067924, -0.31529941483094969801, 0.04080576897532278702, -0.31489125926294225799, 0.07908316802486370367, -0.67082520415535173974, -0.66692527476994101221, -0.53286786273650244006, -0.03045556918238066790, -1.01255351479069544141, 0.24844532255839574253, -1.27078805187023147205, 0.15205226859128812000, -1.13366611320660770623, -0.70489234626668717532, 0.50326419767004038075, -0.84932136469474983631, -0.47824335363366193841, -0.40886899757732281246, 0.04128260647015145890, -0.40893631206876002171, 0.17483990866653884022, -0.38019156581547142171, -0.02045846782592515567, -0.36471043679539605353, 0.07576455821916869282];
+
+
+
+
+
+  #Parameters
+  var pi = 3.14159265359;
+  var fc1 = 1000;
+  var fc2 = 2000;
+  var Fs = 8000;
+  var N = 101;
+
+  #Parameters for findPeaks. distance should be constant.
+  #var distance = Fs * 0.6;
+  var distance = 4800;
+
+
+
+  # Step 1: FIR Bandpass Filter
+  var wc1 = 2 * pi * fc1 / Fs; #wc should vary from 0 to pi
+  var lpf1 = lowPassFIRFilter(wc1, N); #ideal low -pass filter
+  var lpf1_w = lpf1 * hamming(N);
+
+  var wc2 = 2 * pi * fc2 / Fs;
+  var lpf2 = lowPassFIRFilter(wc2, N);
+  var lpf2_w = lpf2 * hamming(N);
+
+  # var bpf = lpf2 - lpf;
+  var bpf_w = sub(lpf2_w,lpf1_w);
+  var FIRfilterResponseForBpf = FIRFilterResponse(input_signal, bpf_w);
+
+  # Step 2: Artifact Removal (R-peak detection)
+  var max_signal = max(FIRfilterResponseForBpf);
+
+  var height = 0.3 * max_signal;
+
+  var r_peaks = find_peaks(FIRfilterResponseForBpf, height, distance);
+
+  var len_r_peaks = len(r_peaks);
+  var last_peaks_index = sub(len_r_peaks, [1]);
+  var peaks_count = getSingleElemAtIndx(r_peaks, last_peaks_index);
+
+  #### These make error! We need to change from
+  #rr_intervals = np.diff(peaks) / fs
+  #avg_hr = 60 / np.mean(rr_intervals)
+  #### to
+  #diff_mean = np.mean(np.diff(peaks))
+  #avg_hr_fs = 60 * Fs;
+  #avg_hr = avg_hr_fs/diff_mean;
+
+  var diff_val = diff(r_peaks, peaks_count);
+  var peaks_count_minus_one = sub(peaks_count, 1);
+  var diff_mean = mean(diff_val, peaks_count_minus_one);
+
+  var avg_hr = (60 * Fs) / diff_mean;
+
+  print(avg_hr);
+
+
+
+}
+
diff --git a/mlir/test/Examples/DspExample/dsp_bitwiseand_op.py b/mlir/test/Examples/DspExample/dsp_bitwiseand_op.py
new file mode 100644
index 000000000000..ea513bdb7a9f
--- /dev/null
+++ b/mlir/test/Examples/DspExample/dsp_bitwiseand_op.py
@@ -0,0 +1,20 @@
+# RUN: toyc-ch2 %s -emit=mlir 2>&1 | FileCheck %s
+
+def main() {
+  var a = [0,1,2,9,1000];
+  var b = [2,2,2,15,100];
+  var c = bitwiseand(a, b);
+  # c = [0,0,2,9,96]
+  print(c);
+}
+# ninja && ./bin/dsp1 ../mlir/test/Examples/DspExample/dsp_bitwiseand_op.py --emit=mlir
+
+# module {
+#   dsp.func @main() {
+#     %0 = dsp.constant dense<[0.000000e+00, 1.000000e+00, 2.000000e+00, 9.000000e+00, 1.000000e+03]> : tensor<5xf64>
+#     %1 = dsp.constant dense<[2.000000e+00, 2.000000e+00, 2.000000e+00, 1.500000e+01, 1.000000e+02]> : tensor<5xf64>
+#     %2 = dsp.bitwiseand %0, %1 : (tensor<5xf64>, tensor<5xf64>) -> tensor<*xf64>
+#     dsp.print %2 : tensor<*xf64>
+#     dsp.return
+#   }
+# }
diff --git a/mlir/test/Examples/DspExample/dsp_dtmf.py b/mlir/test/Examples/DspExample/dsp_dtmf.py
new file mode 100644
index 000000000000..74c60304d021
--- /dev/null
+++ b/mlir/test/Examples/DspExample/dsp_dtmf.py
@@ -0,0 +1,7 @@
+def main() {
+  var digit = [1];
+  var duration = [[0.5]];
+  var fs = [20];
+  var result = generateDtmf(digit, duration, fs);
+  print(result);
+}
diff --git a/mlir/test/Examples/DspExample/dsp_fft.py b/mlir/test/Examples/DspExample/dsp_fft.py
new file mode 100644
index 000000000000..f49483bdd26f
--- /dev/null
+++ b/mlir/test/Examples/DspExample/dsp_fft.py
@@ -0,0 +1,8 @@
+def main() {
+    var a = generateDtmf(7, 0.5, 16384);
+  var b = fft1dreal(a);
+  var c = fft1dimg(a);
+  print(b);
+  print(c);
+} 
+  
\ No newline at end of file
diff --git a/mlir/test/Examples/DspExample/dsp_fftcombine.py b/mlir/test/Examples/DspExample/dsp_fftcombine.py
new file mode 100644
index 000000000000..fb345815128c
--- /dev/null
+++ b/mlir/test/Examples/DspExample/dsp_fftcombine.py
@@ -0,0 +1,6 @@
+def main() {
+  var real = [0.334089, 0.423880, 1.207107, -1.423880, -0.748303, -1.423880, 1.207107, 0.423880];
+  var imag = [0.000000, 0.000000, -2.000000, 0.000000, 0.000000, -0.000000, 2.000000, 0.000000];
+  var result = fftCombine(real, imag);
+  print(result);
+}
\ No newline at end of file
diff --git a/mlir/test/Examples/DspExample/dsp_identify_speaker.py b/mlir/test/Examples/DspExample/dsp_identify_speaker.py
new file mode 100644
index 000000000000..d29a7db0621d
--- /dev/null
+++ b/mlir/test/Examples/DspExample/dsp_identify_speaker.py
@@ -0,0 +1,34 @@
+def main() {
+  #generate_voice_signature
+
+  var person1 = [2.0, -1.0, 3.2, -2.4, 1.5, 1.0, 3.0];
+  var person2 = [-2.1, -4.0, 1.0, -1.4, -2.5, 0.0, 0.1];
+  var person3 = [-1.0, 1.0, 1.0, -1.0, 5.0, -1.0, -2.0];
+  
+  #var signature = [2.0, -1.0, 3.2,-2.4, 1.5, 1.0, 3.0]; # person 1
+  #var signature = [-2.1, -4.0, 1.0, -1.4, -2.5, 0.0, 0.1]; # person 2
+  var signature = [-1.0, 1.0, 1.0, -1.0, 5.0, -1.0, -2.0]; # person 3
+  
+  var max1 = max(correlate(person1, signature));
+  var max2 = max(correlate(person2, signature));
+  var max3 = max(correlate(person3, signature));
+  
+  var total_maxes = [0, 0, 0];
+
+  #var temp2 = setElemAtIndx(total_maxes, 0, max1); #not work
+  var temp2 = setSingleElemAtIndx(total_maxes, 0, max1); #work
+  var temp3 = setSingleElemAtIndx(total_maxes, 1, max2); #work
+  var temp4 = setSingleElemAtIndx(total_maxes, 2, max3); #work
+  
+  var max_index = argmax(total_maxes);
+  
+  var max_value = getSingleElemAtIndx(total_maxes, max_index);
+  
+  print(max_index);
+  print(temp2);
+  print(max_value);
+  print(temp3);
+  print(total_maxes);
+  print(temp4);
+}
+ 
diff --git a/mlir/test/Examples/DspExample/dsp_medfilt.py b/mlir/test/Examples/DspExample/dsp_medfilt.py
new file mode 100644
index 000000000000..e19f039d4b8d
--- /dev/null
+++ b/mlir/test/Examples/DspExample/dsp_medfilt.py
@@ -0,0 +1,18 @@
+def main() {
+  var a = [0.0, 10.0, 340.0, 30.0, 40.0, 110.0, 60.0, 250.0];
+  var b = medianFilter(a);
+  print(b);
+}
+
+# emit=mlir
+# -----------
+# module {
+#   dsp.func @main() {
+#     %0 = dsp.constant dense<[0.000000e+00, 1.000000e+01, 3.400000e+02, 3.000000e+01, 4.000000e+01, 1.100000e+02, 6.000000e+01, 2.500000e+02]> : tensor<8xf64>
+#     %1 = "dsp.medianFilter"(%0) : (tensor<8xf64>) -> tensor<*xf64>
+#     dsp.print %1 : tensor<*xf64>
+#     dsp.return
+#   }
+# }
+
+# emit=mlir-affine
diff --git a/mlir/test/Examples/DspExample/dsp_modulo_test.toy b/mlir/test/Examples/DspExample/dsp_modulo_test.toy
new file mode 100644
index 000000000000..dcdb20bbaeae
--- /dev/null
+++ b/mlir/test/Examples/DspExample/dsp_modulo_test.toy
@@ -0,0 +1,16 @@
+def main() {
+  var a = [10,20,30];
+  var b = [40,50,60];
+  var c = modulo(a, b);
+  print(c);
+}
+
+#module {
+#  dsp.func @main() {
+#    %0 = dsp.constant dense<[1.000000e+01, 2.000000e+01, 3.000000e+01]> : tensor<3xf64>
+#    %1 = dsp.constant dense<[4.000000e+01, 5.000000e+01, 6.000000e+01]> : tensor<3xf64>
+#    %2 = "dsp.modulo"(%0, %1) : (tensor<3xf64>, tensor<3xf64>) -> tensor<*xf64>
+#    dsp.print %2 : tensor<*xf64>
+#    dsp.return
+#  }
+#}
\ No newline at end of file
diff --git a/mlir/test/Examples/DspExample/dsp_neg_input.py b/mlir/test/Examples/DspExample/dsp_neg_input.py
new file mode 100644
index 000000000000..cbb9fdee0a42
--- /dev/null
+++ b/mlir/test/Examples/DspExample/dsp_neg_input.py
@@ -0,0 +1,26 @@
+# RUN: toyc-ch2 %s -emit=mlir 2>&1 | FileCheck %s
+
+def main() {
+  # var a = [10,20,30];
+  # var b = [40,50,60];
+  # var a = [[10,20],[30,40]];
+  # var b = [[40,50],[60,70]];
+
+  # var a = [[[10,20],[30,40]] , [[10,20],[30,40]]];
+  # var b = [[[40,50],[60,70]] , [[0,0],[10,20]]];
+  var a = [[[10,-20],[30.9,0]] ];
+  var b = [[[40,50],[-60,70]] ];
+  var c = sub(a, b);
+  print(c);
+}
+# /home/local/ASUAD/apkhedka/ForLLVM/build/bin/dsp1 /home/local/ASUAD/apkhedka/ForLLVM/mlir/test/Examples/DspExample/dsp_sub_op.py -emit=mlir
+
+# module {
+#   dsp.func @main() {
+#     %0 = dsp.constant dense<[1.000000e+01, 2.000000e+01, 3.000000e+01]> : tensor<3xf64>
+#     %1 = dsp.constant dense<[4.000000e+01, 5.000000e+01, 6.000000e+01]> : tensor<3xf64>
+#     %2 = "dsp.sub"(%0, %1) : (tensor<3xf64>, tensor<3xf64>) -> tensor<*xf64>
+#     dsp.print %2 : tensor<*xf64>
+#     dsp.return
+#   }
+# }
diff --git a/mlir/test/Examples/DspExample/dsp_pow_op.py b/mlir/test/Examples/DspExample/dsp_pow_op.py
new file mode 100644
index 000000000000..ff9b156ba492
--- /dev/null
+++ b/mlir/test/Examples/DspExample/dsp_pow_op.py
@@ -0,0 +1,18 @@
+# RUN: dsp1 %s -emit=mlir 2>&1 | FileCheck %s
+
+def main() {
+  var a = [4,20];
+  var b = 4;
+  #var c = pow(a, b);
+  var c = a^b;
+  print(c);
+}
+# /home/local/ASUAD/apkhedka/ForLLVM/build/bin/dsp1 /home/local/ASUAD/apkhedka/ForLLVM/mlir/test/Examples/DspExample/dsp_pow_op.py -emit=mlir
+
+# CHECK-LABEL: dsp.func @main() {
+# CHECK-NEXT:       %[[VAL_0:.*]] = dsp.constant dense<{{\[\[}}[1.000000e+01, 2.000000e+01], [3.000000e+01, 0.000000e+00]]]> : tensor<1x2x2xf64>
+# CHECK-NEXT:       %[[VAL_1:.*]] = dsp.constant dense<[1.000000e+01]> : tensor<1xf64>
+# CHECK-NEXT:       %[[VAL_2:.*]] = "dsp.sub"(%[[VAL_0]], %[[VAL_1]]) : (tensor<3xf64>, tensor<3xf64>) -> tensor<*xf64>
+# CHECK-NEXT:       dsp.print %[[VAL_2]] : tensor<*xf64>
+# CHECK-NEXT:       dsp.return
+# CHECK-NEXT:       }
diff --git a/mlir/test/Examples/DspExample/dsp_shiftRight_op.py b/mlir/test/Examples/DspExample/dsp_shiftRight_op.py
new file mode 100644
index 000000000000..420e75564691
--- /dev/null
+++ b/mlir/test/Examples/DspExample/dsp_shiftRight_op.py
@@ -0,0 +1,8 @@
+# RUN: toyc-ch2 %s -emit=mlir 2>&1 | FileCheck %s
+
+def main() {
+  var a = [50,50,50,50];
+  var b = [2,3,4,5];
+  var c = shiftRight(a, b);
+  print(c);
+}
diff --git a/mlir/test/Examples/DspExample/dsp_signal_smoothing.py b/mlir/test/Examples/DspExample/dsp_signal_smoothing.py
new file mode 100644
index 000000000000..fcee96119ba3
--- /dev/null
+++ b/mlir/test/Examples/DspExample/dsp_signal_smoothing.py
@@ -0,0 +1,7 @@
+def main() {
+  var a = [0.0, 10.0, 340.0, 30.0, 40.0, 110.0, 60.0, 250.0];
+  var b = slidingWindowAvg(a);
+  var c = medianFilter(b);
+  print(c);
+}
+
diff --git a/mlir/test/Examples/DspExample/dsp_space_communication.py b/mlir/test/Examples/DspExample/dsp_space_communication.py
new file mode 100644
index 000000000000..2cb2227d3733
--- /dev/null
+++ b/mlir/test/Examples/DspExample/dsp_space_communication.py
@@ -0,0 +1,10 @@
+def main() {
+        var d = "HELLO FROM SPACE";
+        # print(d);
+        var a = space_modulate(d);
+        var noise = sin(a);
+        var noisy_signal = a+noise;
+        var b = space_demodulate(noisy_signal);
+        var e = space_err_correction(d);
+        print(e);
+}
diff --git a/mlir/test/Examples/DspExample/dsp_string.py b/mlir/test/Examples/DspExample/dsp_string.py
new file mode 100644
index 000000000000..ead95cf07e73
--- /dev/null
+++ b/mlir/test/Examples/DspExample/dsp_string.py
@@ -0,0 +1,10 @@
+def main() {
+        var a = [[[10,20],[30,0]] ];
+        var b = [[[40,50],[60,70]] ];
+        var c = sub(a, b);
+        print(c);
+
+        var d = "HELLO FROM SPACE";
+        print(d);
+        print("abd");
+}
diff --git a/mlir/test/Examples/DspExample/dsp_voice_signature.py b/mlir/test/Examples/DspExample/dsp_voice_signature.py
new file mode 100644
index 000000000000..8e3565150555
--- /dev/null
+++ b/mlir/test/Examples/DspExample/dsp_voice_signature.py
@@ -0,0 +1,8 @@
+def main() {
+  var f1 = [697];
+  var f2 = [[1209]];
+  var duration = [0.5];
+  var fs = [20];
+  var result = generateVoiceSignature(f1, f2, duration, fs);
+  print(result);
+}
diff --git a/mlir/test/Examples/DspExample/fftfreq.py b/mlir/test/Examples/DspExample/fftfreq.py
new file mode 100644
index 000000000000..103c7d24fdcd
--- /dev/null
+++ b/mlir/test/Examples/DspExample/fftfreq.py
@@ -0,0 +1,6 @@
+def main() {
+    var d = 8;
+    var N = 5;
+    var frequencies = fftfreq(N, d);
+    print(frequencies);
+}
\ No newline at end of file
diff --git a/mlir/test/Examples/DspExample/full_dtmf.py b/mlir/test/Examples/DspExample/full_dtmf.py
new file mode 100644
index 000000000000..269801a1b7bb
--- /dev/null
+++ b/mlir/test/Examples/DspExample/full_dtmf.py
@@ -0,0 +1,38 @@
+def main() {
+    var digit = 0; # digit whose dtmf tone is to be calculated
+    var duration = 0.0625; # duration of the dtmf signal 
+    var fs = 8192; # sampling frequency 
+    var d = 1/fs;
+    var N = fs * duration;
+    var dtmf_tone = generateDtmf(digit, duration, fs); # generate the dtmf signal
+    # print(dtmf_tone);
+    var fft_real = fftReal(dtmf_tone); # take fft real
+    var fft_imag = fftImag(dtmf_tone); # take fft imag
+    # print(fft_real);
+    # print(fft_imag);
+    var squared_fft_real = square(fft_real);
+    var squared_fft_imag = square(fft_imag);
+    # print(squared_fft_real);
+    # print(squared_fft_imag);
+    var sum = squared_fft_real + squared_fft_imag;
+    # print(sum);
+    var magnitudes = sqrt(sum);
+    # print(magnitudes);
+    var frequencies = fftfreq(512, 0.000122);
+    # print(frequencies);
+    var peaks = findDominantPeaks(frequencies, magnitudes);
+    print(peaks);
+    var freqPairs = [
+    [941, 1336],
+    [697, 1209],
+    [697, 1336],
+    [697, 1477],
+    [770, 1209],
+    [770, 1336],
+    [770, 1477],
+    [852, 1209],
+    [852, 1336],
+    [852, 1477]];
+    var recovered_digit = recoverDtmfDigit(peaks, freqPairs);
+    print(recovered_digit);
+}
diff --git a/mlir/test/Examples/DspExample/speakeridentification.py b/mlir/test/Examples/DspExample/speakeridentification.py
new file mode 100644
index 000000000000..9cf45c8b2011
--- /dev/null
+++ b/mlir/test/Examples/DspExample/speakeridentification.py
@@ -0,0 +1,11 @@
+def main() {
+    # var voice_signature = generateVoiceSignature(150, 250, 5, 10);
+    # print(voice_signature);
+    var speaker_list = [
+        [1,2,3,4,5,6,7,8,9,10],
+        [11,12,13,14,15,16,17,18,19,20],
+        [21,22,23,24,25,26,27,28,29,30]
+    ];
+    var speaker = identifySpeaker([1,2,3,4,5,6,7,8,9,10], speaker_list);
+    print(speaker);
+}
diff --git a/mlir/test/Examples/DspExample/zeroCross/zeroCross.mlir b/mlir/test/Examples/DspExample/zeroCross/zeroCross.mlir
new file mode 100644
index 000000000000..6a5b0e8d0a4e
--- /dev/null
+++ b/mlir/test/Examples/DspExample/zeroCross/zeroCross.mlir
@@ -0,0 +1,46 @@
+func.func @main() {
+  %alloc = memref.alloc() : memref<3xf64>
+  %alloc_1 = memref.alloc() : memref<f64>
+  %c0 = arith.constant 0 : index
+  %c1 = arith.constant 1 : index
+  %c2 = arith.constant 2 : index
+  %cst = arith.constant 1.000000e+01 : f64
+  affine.store %cst, %alloc[%c0] : memref<3xf64>
+  %cst_1 = arith.constant -1.000000e+01 : f64
+  affine.store %cst_1, %alloc[%c1] : memref<3xf64>
+  %cst_2 = arith.constant 1.000000e+01 : f64
+  affine.store %cst_2, %alloc[%c2] : memref<3xf64>
+  %lb = arith.constant 1 : index
+  %ub = arith.constant 3 : index
+  %step = arith.constant 1 : index
+  %total_0 = arith.constant 0.0 : f64
+  %c3 = arith.constant 0 : i64
+  %c4 = arith.constant 1.0 : f64
+  %total = scf.for %arg0 = %lb to %ub step %step 
+    iter_args(%total_iter = %total_0) -> (f64) {
+    %prev_idx = arith.subi %arg0, %step : index
+    %1 = memref.load %alloc[%prev_idx] : memref<3xf64>
+    %int_1 = arith.fptosi %1 : f64 to i64
+    %sign_1 = arith.cmpi "slt", %int_1, %c3 : i64    
+    %2 = memref.load %alloc[%arg0] : memref<3xf64>
+    %int_2 = arith.fptosi %2 : f64 to i64
+    %sign_2 = arith.cmpi "slt", %int_2, %c3 : i64    
+    %cond = arith.cmpi "eq", %sign_1, %sign_2 : i1    
+    %total_next = scf.if %cond -> (f64) {
+      scf.yield %total_iter : f64
+    } else {
+      %new_total = arith.addf %total_iter, %c4 : f64
+      scf.yield %new_total : f64
+    }
+    scf.yield %total_next : f64
+  }
+
+  affine.store %total, %alloc_1[] : memref<f64>
+  // Print the value held by the buffer.
+  // dsp.print %alloc : memref<3xf64>
+  // Print the number of crosses through x=0
+  dsp.print %alloc_1 : memref<f64>
+  memref.dealloc %alloc : memref<3xf64>
+  memref.dealloc %alloc_1 : memref<f64>
+  return 
+}
\ No newline at end of file
diff --git a/mlir/test/Examples/DspExample/zeroCross/zeroCross10.py b/mlir/test/Examples/DspExample/zeroCross/zeroCross10.py
index 4c3848d0a300..24e0b57d45a4 100644
--- a/mlir/test/Examples/DspExample/zeroCross/zeroCross10.py
+++ b/mlir/test/Examples/DspExample/zeroCross/zeroCross10.py
@@ -1,33 +1,8 @@
 # RUN: /bin/dsp1 %s -emit=mlir 2>&1 | FileCheck %s
 
 # User defined generic function that operates on unknown shaped arguments
-
-# def func1( x , y){
-#     var z = x + y;
-#     return z;
-# }
-
 def main() {
-  var a = [10,20,30];
+  var a = [10,-20,30,-10,40,50,60,-100,-20,-30,10]; # Count should be 6
   var g = zeroCrossCount(a);
-
-  
   print(g);
 }
-
-# CHECK-LABEL: toy.func @multiply_transpose(
-# CHECK-SAME:                               [[VAL_0:%.*]]: tensor<*xf64>, [[VAL_1:%.*]]: tensor<*xf64>) -> tensor<*xf64>
-# CHECK:         [[VAL_2:%.*]] = toy.transpose([[VAL_0]] : tensor<*xf64>) to tensor<*xf64>
-# CHECK-NEXT:    [[VAL_3:%.*]] = toy.transpose([[VAL_1]] : tensor<*xf64>) to tensor<*xf64>
-# CHECK-NEXT:    [[VAL_4:%.*]] = toy.mul [[VAL_2]], [[VAL_3]] :  tensor<*xf64>
-# CHECK-NEXT:    toy.return [[VAL_4]] : tensor<*xf64>
-
-# CHECK-LABEL: toy.func @main()
-# CHECK-NEXT:    [[VAL_5:%.*]] = toy.constant dense<{{\[\[}}1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf64>
-# CHECK-NEXT:    [[VAL_6:%.*]] = toy.reshape([[VAL_5]] : tensor<2x3xf64>) to tensor<2x3xf64>
-# CHECK-NEXT:    [[VAL_7:%.*]] = toy.constant dense<[1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00, 5.000000e+00, 6.000000e+00]> : tensor<6xf64>
-# CHECK-NEXT:    [[VAL_8:%.*]] = toy.reshape([[VAL_7]] : tensor<6xf64>) to tensor<2x3xf64>
-# CHECK-NEXT:    [[VAL_9:%.*]] = toy.generic_call @multiply_transpose([[VAL_6]], [[VAL_8]]) : (tensor<2x3xf64>, tensor<2x3xf64>) -> tensor<*xf64>
-# CHECK-NEXT:    [[VAL_10:%.*]] = toy.generic_call @multiply_transpose([[VAL_8]], [[VAL_6]]) : (tensor<2x3xf64>, tensor<2x3xf64>) -> tensor<*xf64>
-# CHECK-NEXT:    toy.print [[VAL_10]] : tensor<*xf64>
-# CHECK-NEXT:    toy.return
diff --git a/mlir/test/Examples/Toy/Ch2/scalar.toy b/mlir/test/Examples/Toy/Ch2/scalar.toy
index b109898fb6d3..a4f4211a5faf 100644
--- a/mlir/test/Examples/Toy/Ch2/scalar.toy
+++ b/mlir/test/Examples/Toy/Ch2/scalar.toy
@@ -2,6 +2,10 @@
 
 def main() {
   var a<2, 2> = 5.5;
+  var b = 6;
+  var c = 8;
+  var d = b+c;
+  print(d);
   print(a);
 }
 
diff --git a/mlir/test/conv2d/conv-affine.mlir b/mlir/test/conv2d/conv-affine.mlir
new file mode 100644
index 000000000000..142af67d4c46
--- /dev/null
+++ b/mlir/test/conv2d/conv-affine.mlir
@@ -0,0 +1,149 @@
+module {
+    func.func @main() {
+        %input = memref.alloc() : memref<4x4xf64>
+        %kernel = memref.alloc() : memref<3x3xf64>
+        %output = memref.alloc() : memref<4x4xf64>
+
+        %c0 = arith.constant 0 : index
+        %c1 = arith.constant 1 : index
+        %c2 = arith.constant 2 : index
+        %c3 = arith.constant 3 : index
+        %c4 = arith.constant 4 : index
+
+        %cst0 = arith.constant 0.000000e+00 : f64
+        %cst1 = arith.constant 1.000000e+00 : f64
+        %cst2 = arith.constant 2.000000e+00 : f64
+        %cst3 = arith.constant 3.000000e+00 : f64
+        %cst4 = arith.constant 4.000000e+00 : f64
+        %cst5 = arith.constant 5.000000e+00 : f64
+        %cst6 = arith.constant 6.000000e+00 : f64
+        %cst7 = arith.constant 7.000000e+00 : f64
+        %cst8 = arith.constant 8.000000e+00 : f64
+        %cstn1float = arith.constant -1.000000e+00 : f64
+        %cstn1int = arith.constant -1 : i64
+
+        // input
+        affine.store %cst1, %input[%c0, %c0] : memref<4x4xf64>
+        affine.store %cst2, %input[%c0, %c1] : memref<4x4xf64>
+        affine.store %cst3, %input[%c0, %c2] : memref<4x4xf64>
+        affine.store %cst4, %input[%c0, %c3] : memref<4x4xf64>
+        
+        affine.store %cst2, %input[%c1, %c0] : memref<4x4xf64>
+        affine.store %cst3, %input[%c1, %c1] : memref<4x4xf64>
+        affine.store %cst4, %input[%c1, %c2] : memref<4x4xf64>
+        affine.store %cst6, %input[%c1, %c3] : memref<4x4xf64>
+
+        affine.store %cst4, %input[%c2, %c0] : memref<4x4xf64>
+        affine.store %cst3, %input[%c2, %c1] : memref<4x4xf64>
+        affine.store %cst2, %input[%c2, %c2] : memref<4x4xf64>
+        affine.store %cst1, %input[%c2, %c3] : memref<4x4xf64>
+
+        affine.store %cst6, %input[%c3, %c0] : memref<4x4xf64>
+        affine.store %cst8, %input[%c3, %c1] : memref<4x4xf64>
+        affine.store %cst4, %input[%c3, %c2] : memref<4x4xf64>
+        affine.store %cst7, %input[%c3, %c3] : memref<4x4xf64>
+
+        // dsp.print %input : memref<4x4xf64>
+
+        // kernel
+        affine.store %cst1, %kernel[%c0, %c0] : memref<3x3xf64>
+        affine.store %cst0, %kernel[%c0, %c1] : memref<3x3xf64>
+        affine.store %cstn1float, %kernel[%c0, %c2] : memref<3x3xf64>
+
+        affine.store %cst1, %kernel[%c1, %c0] : memref<3x3xf64>
+        affine.store %cst0, %kernel[%c1, %c1] : memref<3x3xf64>
+        affine.store %cstn1float, %kernel[%c1, %c2] : memref<3x3xf64>
+
+        affine.store %cst1, %kernel[%c2, %c0] : memref<3x3xf64>
+        affine.store %cst0, %kernel[%c2, %c1] : memref<3x3xf64>
+        affine.store %cstn1float, %kernel[%c2, %c2] : memref<3x3xf64>
+
+
+        // delta
+        %delta_ub = arith.divf %cst3, %cst2 : f64
+        %delta_lb = arith.mulf %delta_ub, %cstn1float : f64
+
+        %ub = arith.fptosi %delta_ub : f64 to i64
+        %lb = arith.fptosi %delta_lb : f64 to i64
+        
+        // %delta_dim_ub = arith.index_cast %ub : i64 to index
+        // %delta_dim_lb = arith.index_cast %lb : i64 to index
+        %delta_dim_lb = arith.constant -1 : index
+        %delta_dim_ub = arith.constant 1 : index
+
+    // for debug
+        %i = memref.alloc() : memref<1xi64>
+        %d = memref.alloc() : memref<1xf64>
+        memref.store %cstn1float, %d[%c0] : memref<1xf64>
+        memref.store %cstn1int, %i[%c0] : memref<1xi64>
+        dsp.print %d : memref<1xf64>
+        dsp.print %i : memref<1xi64>
+
+        // x, y iteration
+        scf.for %x = %c0 to %c4 step %c1 {
+            scf.for %y = %c0 to %c4 step %c1 {
+                %mat_sum = scf.for %kx = %delta_dim_lb to %delta_dim_ub step %c1 iter_args(%outer_sum = %cst0) -> ( f64 ) {
+                    %ele_sum = scf.for %ky = %delta_dim_lb to %delta_dim_ub step %c1 iter_args(%inner_sum = %outer_sum) -> ( f64 ) {
+                        %img_x = arith.addi %x, %kx: index
+                        %img_y = arith.addi %y, %ky: index
+
+                        %test = arith.index_cast %kx : index to i64
+                        memref.store %test, %i[%c0] : memref<1xi64>
+                        // dsp.print %i : memref<1xi64>
+
+                        // sge : predicate 5
+                        %cond_x_lb = "arith.cmpi"(%img_x, %c0) {predicate=5: i64} : (index, index) -> i1
+                        %cond_y_lb = "arith.cmpi"(%img_y, %c0) {predicate=5: i64} : (index, index) -> i1
+                        // slt
+                        %cond_x_ub = "arith.cmpi"(%img_x, %c4) {predicate=2: i64} : (index, index) -> i1
+                        %cond_y_ub = "arith.cmpi"(%img_y, %c4) {predicate=2: i64} : (index, index) -> i1
+                        
+                        %img_sum_ = scf.if %cond_x_lb -> (f64) {
+                            %sum__ = scf.if %cond_y_lb -> (f64) {
+                                %sum_ = scf.if %cond_x_ub -> (f64) {
+                                    %sum = scf.if %cond_y_ub -> (f64) {
+                                        // load from input
+                                        %input_val = memref.load %input[%img_x, %img_y] : memref<4x4xf64>
+
+                                        // load from kernel
+                                        %ker_x = arith.addi %kx, %delta_dim_ub : index
+                                        %ker_y = arith.addi %ky, %delta_dim_ub : index
+                                        %kernel_val = memref.load %kernel[%ker_x, %ker_y] : memref<3x3xf64>
+
+                                        %img_prod = arith.mulf %input_val, %kernel_val : f64
+                                        scf.yield %img_prod : f64
+                                    } else {
+                                        scf.yield %cst0 : f64
+                                    }
+                                    scf.yield %sum : f64
+                                } else {
+                                    scf.yield %cst0 : f64
+                                }
+                                scf.yield %sum_ : f64
+                            } else {
+                                scf.yield %cst0 : f64
+                            }
+                            scf.yield %sum__ : f64
+                        }else{
+                            scf.yield %cst0 : f64
+                        } 
+                        
+                        %IMGSUM = arith.addf %inner_sum, %img_sum_ : f64
+                        scf.yield %IMGSUM : f64
+                    }
+
+                    scf.yield %ele_sum : f64
+                }
+                memref.store %mat_sum, %output[%x, %y] : memref<4x4xf64>
+            }
+        }
+        // dsp.print %input : memref<4x4xf64>
+        // dsp.print %kernel : memref<3x3xf64>
+        // dsp.print %output : memref<4x4xf64>
+
+        memref.dealloc %input : memref<4x4xf64>
+        memref.dealloc %kernel : memref<3x3xf64>
+        memref.dealloc %output : memref<4x4xf64>
+        return
+    }
+}
diff --git a/mlir/test/conv2d/conv-index.mlir b/mlir/test/conv2d/conv-index.mlir
new file mode 100644
index 000000000000..46c8ab486040
--- /dev/null
+++ b/mlir/test/conv2d/conv-index.mlir
@@ -0,0 +1,68 @@
+module {
+    func.func @main() {
+        %input = memref.alloc() : memref<4x4xf32>
+        %kernel = memref.alloc() : memref<3x3xf32>
+        %output = memref.alloc() : memref<4x4xf32>
+
+        %c0 = index.constant 0 
+        %c1 = index.constant 1
+        %c2 = index.constant 2
+        %c3 = index.constant 3
+
+        %cst0 = arith.constant 0.000000e+00 : f32
+        %cst1 = arith.constant 1.000000e+00 : f32
+        %cst2 = arith.constant 2.000000e+00 : f32
+        %cst3 = arith.constant 3.000000e+00 : f32
+        %cst4 = arith.constant 4.000000e+00 : f32
+        %cst5 = arith.constant 5.000000e+00 : f32
+        %cst6 = arith.constant 6.000000e+00 : f32
+        %cst7 = arith.constant 7.000000e+00 : f32
+        %cst8 = arith.constant 8.000000e+00 : f32
+        %cstn1 = arith.constant -1.000000e+00 : f32
+
+        // input
+        affine.store %cst1, %input[%c0, %c0] : memref<4x4xf32>
+        affine.store %cst2, %input[%c0, %c1] : memref<4x4xf32>
+        affine.store %cst3, %input[%c0, %c2] : memref<4x4xf32>
+        affine.store %cst4, %input[%c0, %c3] : memref<4x4xf32>
+        
+        affine.store %cst2, %input[%c1, %c0] : memref<4x4xf32>
+        affine.store %cst3, %input[%c1, %c1] : memref<4x4xf32>
+        affine.store %cst4, %input[%c1, %c2] : memref<4x4xf32>
+        affine.store %cst6, %input[%c1, %c3] : memref<4x4xf32>
+
+        affine.store %cst4, %input[%c2, %c0] : memref<4x4xf32>
+        affine.store %cst3, %input[%c2, %c1] : memref<4x4xf32>
+        affine.store %cst2, %input[%c2, %c2] : memref<4x4xf32>
+        affine.store %cst1, %input[%c2, %c3] : memref<4x4xf32>
+
+        affine.store %cst6, %input[%c3, %c0] : memref<4x4xf32>
+        affine.store %cst8, %input[%c3, %c1] : memref<4x4xf32>
+        affine.store %cst4, %input[%c3, %c2] : memref<4x4xf32>
+        affine.store %cst7, %input[%c3, %c3] : memref<4x4xf32>
+
+        // kernel
+        affine.store %cst1, %kernel[%c0, %c0] : memref<3x3xf32>
+        affine.store %cst0, %kernel[%c0, %c1] : memref<3x3xf32>
+        affine.store %cstn1, %kernel[%c0, %c2] : memref<3x3xf32>
+
+        affine.store %cst1, %kernel[%c1, %c0] : memref<3x3xf32>
+        affine.store %cst0, %kernel[%c1, %c1] : memref<3x3xf32>
+        affine.store %cstn1, %kernel[%c1, %c2] : memref<3x3xf32>
+
+        affine.store %cst1, %kernel[%c2, %c0] : memref<3x3xf32>
+        affine.store %cst0, %kernel[%c2, %c1] : memref<3x3xf32>
+        affine.store %cstn1, %kernel[%c2, %c2] : memref<3x3xf32>
+
+        
+        // delta
+        %delta = arith.divf %cst3, %cst2 : f32
+        %delta_i = arith.fptoui %delta : f32 to i32    
+        %delta_dim = arith.index_cast %delta_i : i32 to index
+
+        memref.dealloc %input : memref<4x4xf32>
+        memref.dealloc %kernel : memref<3x3xf32>
+        memref.dealloc %output : memref<4x4xf32>
+        return
+    }
+}
diff --git a/mlir/test/conv2d/conv-llvm.ll b/mlir/test/conv2d/conv-llvm.ll
new file mode 100644
index 000000000000..45d413b4ce44
--- /dev/null
+++ b/mlir/test/conv2d/conv-llvm.ll
@@ -0,0 +1,230 @@
+llvm.func @free(!llvm.ptr)
+llvm.func @malloc(i64) -> !llvm.ptr
+llvm.func @main() {
+    %0 = llvm.mlir.constant(4 : index) : i64
+    %1 = llvm.mlir.constant(4 : index) : i64
+    %2 = llvm.mlir.constant(1 : index) : i64
+    %3 = llvm.mlir.constant(16 : index) : i64
+    %4 = llvm.mlir.zero : !llvm.ptr
+    %5 = llvm.getelementptr %4[%3] : (!llvm.ptr, i64) -> !llvm.ptr, f32
+    %6 = llvm.ptrtoint %5 : !llvm.ptr to i64
+    %7 = llvm.call @malloc(%6) : (i64) -> !llvm.ptr
+    %8 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
+    %9 = llvm.insertvalue %7, %8[0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %10 = llvm.insertvalue %7, %9[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %11 = llvm.mlir.constant(0 : index) : i64
+    %12 = llvm.insertvalue %11, %10[2] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %13 = llvm.insertvalue %0, %12[3, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %14 = llvm.insertvalue %1, %13[3, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %15 = llvm.insertvalue %1, %14[4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %16 = llvm.insertvalue %2, %15[4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %17 = llvm.mlir.constant(3 : index) : i64
+    %18 = llvm.mlir.constant(3 : index) : i64
+    %19 = llvm.mlir.constant(1 : index) : i64
+    %20 = llvm.mlir.constant(9 : index) : i64
+    %21 = llvm.mlir.zero : !llvm.ptr
+    %22 = llvm.getelementptr %21[%20] : (!llvm.ptr, i64) -> !llvm.ptr, f32
+    %23 = llvm.ptrtoint %22 : !llvm.ptr to i64
+    %24 = llvm.call @malloc(%23) : (i64) -> !llvm.ptr
+    %25 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
+    %26 = llvm.insertvalue %24, %25[0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %27 = llvm.insertvalue %24, %26[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %28 = llvm.mlir.constant(0 : index) : i64
+    %29 = llvm.insertvalue %28, %27[2] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %30 = llvm.insertvalue %17, %29[3, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %31 = llvm.insertvalue %18, %30[3, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %32 = llvm.insertvalue %18, %31[4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %33 = llvm.insertvalue %19, %32[4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %34 = llvm.mlir.constant(4 : index) : i64
+    %35 = llvm.mlir.constant(4 : index) : i64
+    %36 = llvm.mlir.constant(1 : index) : i64
+    %37 = llvm.mlir.constant(16 : index) : i64
+    %38 = llvm.mlir.zero : !llvm.ptr
+    %39 = llvm.getelementptr %38[%37] : (!llvm.ptr, i64) -> !llvm.ptr, f32
+    %40 = llvm.ptrtoint %39 : !llvm.ptr to i64
+    %41 = llvm.call @malloc(%40) : (i64) -> !llvm.ptr
+    %42 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
+    %43 = llvm.insertvalue %41, %42[0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %44 = llvm.insertvalue %41, %43[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %45 = llvm.mlir.constant(0 : index) : i64
+    %46 = llvm.insertvalue %45, %44[2] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %47 = llvm.insertvalue %34, %46[3, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %48 = llvm.insertvalue %35, %47[3, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %49 = llvm.insertvalue %35, %48[4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %50 = llvm.insertvalue %36, %49[4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %51 = llvm.mlir.constant(0 : i64) : i64
+    %52 = llvm.mlir.constant(1 : i64) : i64
+    %53 = llvm.mlir.constant(2 : i64) : i64
+    %54 = llvm.mlir.constant(3 : i64) : i64
+    %55 = llvm.mlir.constant(0.000000e+00 : f32) : f32
+    %56 = llvm.mlir.constant(1.000000e+00 : f32) : f32
+    %57 = llvm.mlir.constant(2.000000e+00 : f32) : f32
+    %58 = llvm.mlir.constant(3.000000e+00 : f32) : f32
+    %59 = llvm.mlir.constant(4.000000e+00 : f32) : f32
+    %60 = llvm.mlir.constant(5.000000e+00 : f32) : f32
+    %61 = llvm.mlir.constant(6.000000e+00 : f32) : f32
+    %62 = llvm.mlir.constant(7.000000e+00 : f32) : f32
+    %63 = llvm.mlir.constant(8.000000e+00 : f32) : f32
+    %64 = llvm.mlir.constant(-1.000000e+00 : f32) : f32
+    %65 = llvm.extractvalue %16[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %66 = llvm.mlir.constant(4 : index) : i64
+    %67 = llvm.mul %51, %66 : i64
+    %68 = llvm.add %67, %51 : i64
+    %69 = llvm.getelementptr %65[%68] : (!llvm.ptr, i64) -> !llvm.ptr, f32
+    llvm.store %56, %69 : f32, !llvm.ptr
+    %70 = llvm.extractvalue %16[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %71 = llvm.mlir.constant(4 : index) : i64
+    %72 = llvm.mul %51, %71 : i64
+    %73 = llvm.add %72, %52 : i64
+    %74 = llvm.getelementptr %70[%73] : (!llvm.ptr, i64) -> !llvm.ptr, f32
+    llvm.store %57, %74 : f32, !llvm.ptr
+    %75 = llvm.extractvalue %16[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %76 = llvm.mlir.constant(4 : index) : i64
+    %77 = llvm.mul %51, %76 : i64
+    %78 = llvm.add %77, %53 : i64
+    %79 = llvm.getelementptr %75[%78] : (!llvm.ptr, i64) -> !llvm.ptr, f32
+    llvm.store %58, %79 : f32, !llvm.ptr
+    %80 = llvm.extractvalue %16[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %81 = llvm.mlir.constant(4 : index) : i64
+    %82 = llvm.mul %51, %81 : i64
+    %83 = llvm.add %82, %54 : i64
+    %84 = llvm.getelementptr %80[%83] : (!llvm.ptr, i64) -> !llvm.ptr, f32
+    llvm.store %59, %84 : f32, !llvm.ptr
+    %85 = llvm.extractvalue %16[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %86 = llvm.mlir.constant(4 : index) : i64
+    %87 = llvm.mul %52, %86 : i64
+    %88 = llvm.add %87, %51 : i64
+    %89 = llvm.getelementptr %85[%88] : (!llvm.ptr, i64) -> !llvm.ptr, f32
+    llvm.store %57, %89 : f32, !llvm.ptr
+    %90 = llvm.extractvalue %16[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %91 = llvm.mlir.constant(4 : index) : i64
+    %92 = llvm.mul %52, %91 : i64
+    %93 = llvm.add %92, %52 : i64
+    %94 = llvm.getelementptr %90[%93] : (!llvm.ptr, i64) -> !llvm.ptr, f32
+    llvm.store %58, %94 : f32, !llvm.ptr
+    %95 = llvm.extractvalue %16[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %96 = llvm.mlir.constant(4 : index) : i64
+    %97 = llvm.mul %52, %96 : i64
+    %98 = llvm.add %97, %53 : i64
+    %99 = llvm.getelementptr %95[%98] : (!llvm.ptr, i64) -> !llvm.ptr, f32
+    llvm.store %59, %99 : f32, !llvm.ptr
+    %100 = llvm.extractvalue %16[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %101 = llvm.mlir.constant(4 : index) : i64
+    %102 = llvm.mul %52, %101 : i64
+    %103 = llvm.add %102, %54 : i64
+    %104 = llvm.getelementptr %100[%103] : (!llvm.ptr, i64) -> !llvm.ptr, f32
+    llvm.store %61, %104 : f32, !llvm.ptr
+    %105 = llvm.extractvalue %16[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %106 = llvm.mlir.constant(4 : index) : i64
+    %107 = llvm.mul %53, %106 : i64
+    %108 = llvm.add %107, %51 : i64
+    %109 = llvm.getelementptr %105[%108] : (!llvm.ptr, i64) -> !llvm.ptr, f32
+    llvm.store %59, %109 : f32, !llvm.ptr
+    %110 = llvm.extractvalue %16[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %111 = llvm.mlir.constant(4 : index) : i64
+    %112 = llvm.mul %53, %111 : i64
+    %113 = llvm.add %112, %52 : i64
+    %114 = llvm.getelementptr %110[%113] : (!llvm.ptr, i64) -> !llvm.ptr, f32
+    llvm.store %58, %114 : f32, !llvm.ptr
+    %115 = llvm.extractvalue %16[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %116 = llvm.mlir.constant(4 : index) : i64
+    %117 = llvm.mul %53, %116 : i64
+    %118 = llvm.add %117, %53 : i64
+    %119 = llvm.getelementptr %115[%118] : (!llvm.ptr, i64) -> !llvm.ptr, f32
+    llvm.store %57, %119 : f32, !llvm.ptr
+    %120 = llvm.extractvalue %16[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %121 = llvm.mlir.constant(4 : index) : i64
+    %122 = llvm.mul %53, %121 : i64
+    %123 = llvm.add %122, %54 : i64
+    %124 = llvm.getelementptr %120[%123] : (!llvm.ptr, i64) -> !llvm.ptr, f32
+    llvm.store %56, %124 : f32, !llvm.ptr
+    %125 = llvm.extractvalue %16[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %126 = llvm.mlir.constant(4 : index) : i64
+    %127 = llvm.mul %54, %126 : i64
+    %128 = llvm.add %127, %51 : i64
+    %129 = llvm.getelementptr %125[%128] : (!llvm.ptr, i64) -> !llvm.ptr, f32
+    llvm.store %61, %129 : f32, !llvm.ptr
+    %130 = llvm.extractvalue %16[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %131 = llvm.mlir.constant(4 : index) : i64
+    %132 = llvm.mul %54, %131 : i64
+    %133 = llvm.add %132, %52 : i64
+    %134 = llvm.getelementptr %130[%133] : (!llvm.ptr, i64) -> !llvm.ptr, f32
+    llvm.store %63, %134 : f32, !llvm.ptr
+    %135 = llvm.extractvalue %16[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %136 = llvm.mlir.constant(4 : index) : i64
+    %137 = llvm.mul %54, %136 : i64
+    %138 = llvm.add %137, %53 : i64
+    %139 = llvm.getelementptr %135[%138] : (!llvm.ptr, i64) -> !llvm.ptr, f32
+    llvm.store %59, %139 : f32, !llvm.ptr
+    %140 = llvm.extractvalue %16[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %141 = llvm.mlir.constant(4 : index) : i64
+    %142 = llvm.mul %54, %141 : i64
+    %143 = llvm.add %142, %54 : i64
+    %144 = llvm.getelementptr %140[%143] : (!llvm.ptr, i64) -> !llvm.ptr, f32
+    llvm.store %62, %144 : f32, !llvm.ptr
+    %145 = llvm.extractvalue %33[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %146 = llvm.mlir.constant(3 : index) : i64
+    %147 = llvm.mul %51, %146 : i64
+    %148 = llvm.add %147, %51 : i64
+    %149 = llvm.getelementptr %145[%148] : (!llvm.ptr, i64) -> !llvm.ptr, f32
+    llvm.store %56, %149 : f32, !llvm.ptr
+    %150 = llvm.extractvalue %33[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %151 = llvm.mlir.constant(3 : index) : i64
+    %152 = llvm.mul %51, %151 : i64
+    %153 = llvm.add %152, %52 : i64
+    %154 = llvm.getelementptr %150[%153] : (!llvm.ptr, i64) -> !llvm.ptr, f32
+    llvm.store %55, %154 : f32, !llvm.ptr
+    %155 = llvm.extractvalue %33[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %156 = llvm.mlir.constant(3 : index) : i64
+    %157 = llvm.mul %51, %156 : i64
+    %158 = llvm.add %157, %53 : i64
+    %159 = llvm.getelementptr %155[%158] : (!llvm.ptr, i64) -> !llvm.ptr, f32
+    llvm.store %64, %159 : f32, !llvm.ptr
+    %160 = llvm.extractvalue %33[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %161 = llvm.mlir.constant(3 : index) : i64
+    %162 = llvm.mul %52, %161 : i64
+    %163 = llvm.add %162, %51 : i64
+    %164 = llvm.getelementptr %160[%163] : (!llvm.ptr, i64) -> !llvm.ptr, f32
+    llvm.store %56, %164 : f32, !llvm.ptr
+    %165 = llvm.extractvalue %33[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %166 = llvm.mlir.constant(3 : index) : i64
+    %167 = llvm.mul %52, %166 : i64
+    %168 = llvm.add %167, %52 : i64
+    %169 = llvm.getelementptr %165[%168] : (!llvm.ptr, i64) -> !llvm.ptr, f32
+    llvm.store %55, %169 : f32, !llvm.ptr
+    %170 = llvm.extractvalue %33[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %171 = llvm.mlir.constant(3 : index) : i64
+    %172 = llvm.mul %52, %171 : i64
+    %173 = llvm.add %172, %53 : i64
+    %174 = llvm.getelementptr %170[%173] : (!llvm.ptr, i64) -> !llvm.ptr, f32
+    llvm.store %64, %174 : f32, !llvm.ptr
+    %175 = llvm.extractvalue %33[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %176 = llvm.mlir.constant(3 : index) : i64
+    %177 = llvm.mul %53, %176 : i64
+    %178 = llvm.add %177, %51 : i64
+    %179 = llvm.getelementptr %175[%178] : (!llvm.ptr, i64) -> !llvm.ptr, f32
+    llvm.store %56, %179 : f32, !llvm.ptr
+    %180 = llvm.extractvalue %33[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %181 = llvm.mlir.constant(3 : index) : i64
+    %182 = llvm.mul %53, %181 : i64
+    %183 = llvm.add %182, %52 : i64
+    %184 = llvm.getelementptr %180[%183] : (!llvm.ptr, i64) -> !llvm.ptr, f32
+    llvm.store %55, %184 : f32, !llvm.ptr
+    %185 = llvm.extractvalue %33[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    %186 = llvm.mlir.constant(3 : index) : i64
+    %187 = llvm.mul %53, %186 : i64
+    %188 = llvm.add %187, %53 : i64
+    %189 = llvm.getelementptr %185[%188] : (!llvm.ptr, i64) -> !llvm.ptr, f32
+    llvm.store %64, %189 : f32, !llvm.ptr
+    %190 = llvm.mlir.constant(1.500000e+00 : f32) : f32
+    %191 = llvm.fptoui %190 : f32 to i32
+    %192 = llvm.sext %191 : i32 to i64
+    %193 = llvm.extractvalue %16[0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    llvm.call @free(%193) : (!llvm.ptr) -> ()
+    %194 = llvm.extractvalue %33[0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    llvm.call @free(%194) : (!llvm.ptr) -> ()
+    %195 = llvm.extractvalue %50[0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 
+    llvm.call @free(%195) : (!llvm.ptr) -> ()
+    llvm.return
+}
+
diff --git a/mlir/test/conv2d/conv.c b/mlir/test/conv2d/conv.c
new file mode 100644
index 000000000000..f2c34db4ce71
--- /dev/null
+++ b/mlir/test/conv2d/conv.c
@@ -0,0 +1,95 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+#define INPUTSIZE 4
+#define KERNELSIZE 3
+
+double example_in[INPUTSIZE][INPUTSIZE] = {
+    {1,2,3,4},
+    {2,3,4,6},
+    {4,3,2,1},
+    {6,8,4,7}
+};
+
+double example_kernel[KERNELSIZE][KERNELSIZE] = {
+    {1,0,-1},
+    {1,0,-1},
+    {1,0,-1}
+};
+
+void conv2d(double **input, double** kernel, double** output) {
+    int delta = KERNELSIZE / 2;
+
+    for(int x=0; x<INPUTSIZE; ++x) {
+        for(int y=0; y<INPUTSIZE; ++y) {
+            float sum=0;
+
+            for(int kx=-1*delta; kx<=delta; ++kx) {
+                for(int ky=-1*delta; ky<=delta; ++ky) {
+                    int imgX = x+kx, imgY = y+ky;
+                    if(imgX>=0 && imgY>=0 && imgX<INPUTSIZE && imgY<INPUTSIZE) {
+                        float imgVal = input[imgX][imgY];
+                        float kerVal = kernel[kx+delta][ky+delta];
+                        sum += imgVal * kerVal;
+                    }
+                }
+            }
+
+            output[x][y] = sum;
+        }
+    }
+};
+
+int main() {
+    double ** input = (double**) malloc( INPUTSIZE * sizeof(double*) );
+    double ** kernel = (double**) malloc( KERNELSIZE * sizeof(double*) );
+    double ** output = (double**) malloc( INPUTSIZE * sizeof(double*) );
+
+    for(int i=0; i<INPUTSIZE; ++i) {
+        input[i] = (double*) malloc( INPUTSIZE * sizeof(double) );
+        output[i] = (double*) malloc( INPUTSIZE * sizeof(double) );
+    }
+
+    for(int i=0; i<KERNELSIZE; ++i) {
+        kernel[i] = (double*) malloc( KERNELSIZE * sizeof(double) );
+    }
+
+    for(int x=0; x<INPUTSIZE; ++x) {
+        for(int y=0; y<INPUTSIZE; ++y) {
+            input[x][y] = example_in[x][y];
+        }
+    }
+    
+    for(int x=0; x<KERNELSIZE; ++x) {
+        for(int y=0; y<KERNELSIZE; ++y) {
+            kernel[x][y] = example_kernel[x][y];
+        }
+    }
+
+    // conv
+    conv2d(input, kernel, output);
+
+    printf("Output:\n");
+    for(int x=0; x<INPUTSIZE; ++x) {
+        for(int y=0; y<INPUTSIZE; ++y) {
+            printf("%f ", output[x][y]);
+        }
+        printf("\n");
+    }
+
+    for(int x=0; x<INPUTSIZE; ++x) {
+        free(input[x]);
+        free(output[x]);
+    }
+
+    free(input);
+    free(output);
+
+    for(int x=0; x<KERNELSIZE; ++x) {
+        free(kernel[x]);
+    }
+
+    free(kernel);
+
+    return 0;
+}
diff --git a/mlir/test/conv2d/matmul.mlir b/mlir/test/conv2d/matmul.mlir
new file mode 100644
index 000000000000..925c148c901e
--- /dev/null
+++ b/mlir/test/conv2d/matmul.mlir
@@ -0,0 +1,20 @@
+func.func @matmul(%A: memref<128x128xf32>, %B: memref<128x128xf32>, %C: memref<128x128xf32>) {
+    affine.for %i = 0 to 128 {
+        affine.for %j = 0 to 128 {
+            affine.for %k = 0 to 128 {
+                        %a = affine.load %A[%i, %k] : memref<128x128xf32>
+                                %b = affine.load %B[%k, %j] : memref<128x128xf32>
+                                        %c = affine.load %C[%i, %j] : memref<128x128xf32>
+                                                %mul = arith.mulf %a, %b : f32
+                                                        %add = arith.addf %mul, %c : f32
+                                                                affine.store %add, %C[%i, %j] : memref<128x128xf32>
+                                                                      
+            }
+                
+        }
+          
+    }
+      return
+      
+}
+
diff --git a/mlir/test/conv2d/scftest.mlir b/mlir/test/conv2d/scftest.mlir
new file mode 100644
index 000000000000..fbd25d1772ed
--- /dev/null
+++ b/mlir/test/conv2d/scftest.mlir
@@ -0,0 +1,19 @@
+module {
+func.func @main(%buffer: memref<1024xf32>, %lb: index,
+        %ub: index, %step: index) -> (f32) {
+              // Initial sum set to 0.
+                %sum_0 = arith.constant 0.0 : f32
+                  // iter_args binds initial values to the loop's region arguments.
+                    %sum = scf.for %iv = %lb to %ub step %step
+                    iter_args(%sum_iter = %sum_0) -> (f32) {
+                            %t = memref.load %buffer[%iv] : memref<1024xf32>
+                                %sum_next = arith.addf %sum_iter, %t : f32
+                                    // Yield current iteration sum to next iteration %sum_iter or to %sum
+                                        // if final iteration.
+                                            scf.yield %sum_next : f32
+                                              
+                    }
+                      return %sum : f32
+                      
+}
+}
diff --git a/mlir/test/conv2d/tosa.conv2d.mlir b/mlir/test/conv2d/tosa.conv2d.mlir
new file mode 100644
index 000000000000..32e34934030d
--- /dev/null
+++ b/mlir/test/conv2d/tosa.conv2d.mlir
@@ -0,0 +1,14 @@
+module {
+    func.func @main() -> i32 {
+        %arg0 = arith.constant 10 : i32
+        %arg1 = arith.constant 122: i32
+
+        %result = call @foo(%arg0, %arg1) : (i32, i32) -> i32
+        return %result : i32
+    }
+
+    func.func @foo(%arg0: i32, %arg1: i32) -> i32 {
+        %0 = arith.addi %arg0, %arg1: i32
+        return %0: i32
+    }
+}
diff --git a/mlir/test/mlir-opt/example1.mlir b/mlir/test/mlir-opt/example1.mlir
new file mode 100644
index 000000000000..a0e2a810794c
--- /dev/null
+++ b/mlir/test/mlir-opt/example1.mlir
@@ -0,0 +1,21 @@
+#accesses = [
+    affine_map<(m) -> (m)>,
+    affine_map<(m) -> (m)>
+]
+
+#attrs = {
+    indexing_maps = #accesses,
+    iterator_types = ["parallel"]
+}
+
+func.func @example(%a: memref<?xf32, strided<[1]>>, %b: memref<?xvector<4xf32>, strided<[2], offset: 1>>) {
+    linalg.generic #attrs
+    ins(%a: memref<?xf32, strided<[1]>>)
+    outs(%b: memref<?xvector<4xf32>, strided<[2], offset: 1>>) {
+        ^bb0(%aa: f32, %bb:vector<4xf32>):
+            %cc = "mk_compute"(%aa, %bb): (f32, vector<4xf32>) -> (vector<4xf32>)
+            linalg.yield %cc: vector<4xf32>
+    }
+
+    return
+}