From dceb47ba0d571fe0bd4a62b90118325317df11be Mon Sep 17 00:00:00 2001 From: Florence Monna Date: Tue, 20 Aug 2019 14:53:02 -0500 Subject: [PATCH 1/6] [wip] minor corrections on index page. --- doc/index.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/doc/index.rst b/doc/index.rst index 2c642ec3..f2029e8e 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -21,8 +21,8 @@ blocks*, used to develop explicit memory and data management policies. The goals of AML are: * **composability**: application developers and performance experts should be - able to pick and choose the building blocks to use depending on their specific - needs. + able to pick and choose which building blocks to use depending on their + specific needs. * **flexibility**: users should be able to customize, replace, or change the configuration of each building block as much as possible. @@ -36,7 +36,7 @@ AML currently implements the following abstractions: * :doc:`Area `, a set of addressable physical memories, * :doc:`Layout `, a description of data structure organization, * :doc:`Tiling `, a description of data blocking (decomposition) -* :doc:`DMA `, an engine to asynchronously move data structures between areas, +* :doc:`DMA `, an engine to asynchronously move data structures between areas. Each of these abstractions has several implementations. For instance, areas may refer to the usual DRAM or its subset, to GPU memory, or to non-volatile memory. @@ -76,7 +76,7 @@ Installation Workflow ~~~~~~~~ -Include the AML header: +Include AML header: .. code-block:: c @@ -93,7 +93,7 @@ Check the AML version: return 1; } -Initialize and clean up the library: +Initialize and cleanup AML: .. code-block:: c @@ -106,8 +106,8 @@ Initialize and clean up the library: Link your program with *-laml*. -Check the above building-blocks-specific pages for further examples and -information on the library features. +See the above pages on specific building blocks for further examples and +information on library features. Support ------- From 035a86f32a8e0fe5ea8169449f2707572e6a231c Mon Sep 17 00:00:00 2001 From: Florence Monna Date: Tue, 27 Aug 2019 16:15:25 -0500 Subject: [PATCH 2/6] [wip] work on AML documentation - worked on getting the abstract documentation out of the headers and into text documentation. - started writing some examples. - minors spelling and syntax corrections. --- doc/pages/area_linux_api.rst | 86 +++++++++++++++++++++++++++++++++++- doc/pages/areas.rst | 80 +++++++++++++++++++++++++++++++++ doc/pages/layout.rst | 76 +++++++++++++++++++++++++++++++ doc/pages/tilings.rst | 26 +++++++++++ include/aml/area/linux.h | 44 +++++++++--------- 5 files changed, 288 insertions(+), 24 deletions(-) diff --git a/doc/pages/area_linux_api.rst b/doc/pages/area_linux_api.rst index 8f8a61f0..2ab5d2d1 100644 --- a/doc/pages/area_linux_api.rst +++ b/doc/pages/area_linux_api.rst @@ -1,4 +1,86 @@ -Area Linux Implementation API -================================= +Area Linux Implementation +========================= + +This is the Linux implementation of AML areas. + +This building block relies on the libnuma implementation and linux mmap / munmap +to provide mmap/munmap on the NUMA host processor memory. +New areas may be created to allocate a specific subset of memories. +This building block also include a static declaration of a default initialized +area that can be used out of the box with the abstract area API. + +Example +------- +Using built-in feature of linux areas: +We allocate data accessible by several processes with the same address, spread +across all CPU memories (using linux interleave policy) + +.. codeblock:: c + // include .. + + struct aml_area* area; + aml_area_linux_create(&area, AML_AREA_LINUX_MMAP_FLAG_SHARED, NULL, + AML_AREA_LINUX_BINDING_FLAG_INTERLEAVE); + + // When work is done with this area, free resources associated with it + aml_area_linux_destroy(&area); + +Integrating new feature in a new area implementation with some linux features: +You need an area feature not integrated in AML, but you want to work with AML +features around areas. +You can extend the features of linux area and reimplement a custom +implementation of mmap and munmap functions with +additional fields. + +.. codeblock:: c + // include .. + + // declaration of data field used in generic areas + struct aml_area_data { + // uses features of linux areas + struct aml_area_linux_data linux_data; + // implements additional features + void* my_data; + }; + + // create your struct my_area_data with custom linux settings + struct aml_area_data { + .linux_data = { + .nodeset = NULL, + .binding_flags = AML_AREA_LINUX_BINDING_FLAG_INTERLEAVE, + .mmap_flags = AML_AREA_LINUX_FLAG_SHARED, + }, + .my_data = whatever_floats_your_boat, + } my_area_data; + + // implements mmap using linux area features and custom features + void* my_mmap(const struct aml_area_data* data, void* ptr, size_t size){ + program_data = aml_area_linux_mmap(data->linux_data, ptr, size); + aml_area_linux_mbind(data->linux_data, program_data, size); + // additional work we wnat to do on top of area linux work + whatever_shark(data->my_data, program_data, size); + return program_data; + } + // same for munmap + int* my_munmap(cont struct aml_area_data* data, void* ptr, size_t size); + + // builds your custom area + struct aml_area_ops { + .mmap = my_mmap, + .munmap = my_munmap, + } my_area_ops; + + struct aml_area { + .ops = my_area_ops, + .data = my_area_data, + } my_area; + + void* program_data = aml_area_mmap(&my_area, NULL, size); + + +And now you can call the generic API on your area. + +Area Linux API +============== .. doxygengroup:: aml_area_linux diff --git a/doc/pages/areas.rst b/doc/pages/areas.rst index 8447fc04..b4321318 100644 --- a/doc/pages/areas.rst +++ b/doc/pages/areas.rst @@ -1,10 +1,90 @@ Areas: Addressable Physical Memories ==================================== +AML areas represent places where data can belong. +In shared memory systems, locality is a major concern for performance. +Being able to query memory from specific places is of major interest to achieve +this goal. +AML areas provide mmap / munmap low level functions to query memory from +specific places materialized as areas. +Available area implementations dictate the way such places can be arranged and +with which properties. + +.. image:: img/area.png + :width=700px +"Illustration of areas on a complex system." + +An AML area is an implementation of memory operations for several type of +devices through a consistent abstraction. +This abstraction is meant to be implemented for several kind of devices, i.e. +the same function calls allocate different kinds of devices depending on the +area implementation provided. + +With the high level API, you can: + +* Use an area to allocate space for your data +* Release the data in this area + +Example +------- + +Let's look how these operations can be done in a C program. + +.. codeblock:: c + #include + #include + + int main(){ + + void* data = aml_area_mmap(&aml_area_linux, s); + do_work(data); + aml_area_munmap(data, s); + } + +We start by importing the AML interface, as well as the area implementation we +want to use. + +We then proceed to allocate space for the data of size s using the default from +the AML Linux implementation. +The data will be only visible by this process and bound to the CPU with the +default linux allocation policy. + +Finally, when the work is done with data, we free it: + + +Area API +-------- + +It is important to notice that the functions provided through the Area API are +low-level functions and are not optimized for performance as allocators are. + .. doxygengroup:: aml_area + Implementations --------------- +Aware users may create or modify implementation by assembling appropriate +operations in an aml_area_ops structure. + +The linux implementation is go to for using simple areas on NUMA CPUs with +linux operating system. + +There is an ongoing work on hwloc, CUDA and OpenCL areas. + +Let's look at an example of a dynamic creation of a linux area identical to the +static default aml_area_linux: + +.. codeblock:: c + #include + #include + + int main(){ + struct aml_area* area; + aml_area_linux_create(&area, AML_AREA_LINUX_MMAP_FLAG_PRIVATE, NULL, + AML_AREA_LINUX_BINDING_FLAG_DEFAULT); + do_work(area); + aml_area_linux_destroy(&area); + } .. toctree:: diff --git a/doc/pages/layout.rst b/doc/pages/layout.rst index 329fc87e..3ffc158d 100644 --- a/doc/pages/layout.rst +++ b/doc/pages/layout.rst @@ -1,6 +1,82 @@ Layout: Description of Data Organization ======================================== +A layout describes how contiguous elements of a flat memory address space are +organized into a multidimensional array of fixed-size elements. +The abstraction provides functions to build layouts, access elements, reshape a +layout, or subset a layout. + +A layout is characterized by: +* A pointer to the data it describes +* A set of dimensions on which data spans. +* A stride in between elements of a dimension. +* A pitch indicating the space between contiguous elements of a dimension. + +The figure below describes a 2D layout with a sub-layout (obtained with +aml_layout_slice()) operation. +The sub-layout has a stride of 1 element along the second dimension. +The slice has an offset of 1 element along the same dimension, and its pitch is +the pitch of the original layout. +Calling aml_layout_deref() on this sublayout with appropriate coordinates will +return a pointer to elements noted (coor_x, coord_y). + +.. image:: img/layout.png + :width=400px +"2D layout with a 2D slice." + +Access to specific elements of a layout can be done with the aml_layout_deref() +function. +Access to an element is always done relatively to the dimension order set by at +creation time. +However, internally, the library will store dimensions from the last dimension +to the first dimension such that elements along the first dimension are +contiguous in memory. +This order is defined with the value AML_LAYOUT_ORDER_FORTRAN. +Therefore, AML provides access to elements without the overhead of user order +choice through function suffixed with "native". + +The layout abstraction also provides a function to reshape data with a different +set of dimensions. +A reshaped layout will access the same data but with different coordinates as +pictured in the figure below. + +.. image:: img/reshape.png + :width=700px +"2D layout turned into a 3D layout." + +Example +------- + +Let's look at a problem where layouts can be quite useful: matrix +multiplication, with DGEMM. +Let's say you want to multiple matrix A (size [m, k]) with matrix B +(size [k, n]) to get matrix C (size [m, n]). + +The first step is implementing an efficient micro-kernel. +The micro-kernel update a block of C of size [mr, nr] noted C_r using a block of +A of size [mr, kb] noted A_r, and a block of B of size [kb, nr] noted B_r. +A_r is stored in column major order while C_r and B_r are stored in row major +order. + +The medium kernel works using blocks of intermediate size. +The medium kernel updates a block of C of size [kb, n] noted C_b using a block +of A of size [mb, kb] noted A_b, and a block of B of size [kb, n] noted B_b. +A_b is stored as mb/mr consecutive blocks of size [mr, kb] (A_r) in column major +order while C_b is stored as (mb/mr)*(n/nr) blocks of size [mr, nr] (C_r) in row +major order and B_b is stored as n/nr blocks of size [kb, nr] (B_r) in row major +order. + +The large kernel uses matrices of any size. +Let's say we consider the matrices already transformed. +The original matrices are C of size [m, n], A of size [m, k] and B of size +[k, n]. +The layout used here are: C is stored as m/mb blocks of C_b, A is stored as +(k/kb) * (m/mb) blocks of A_b and B is stored as k/kb blocks of B_b. + + +High level API +-------------- + .. doxygengroup:: aml_layout Implementations diff --git a/doc/pages/tilings.rst b/doc/pages/tilings.rst index 0558aa6a..653c86c0 100644 --- a/doc/pages/tilings.rst +++ b/doc/pages/tilings.rst @@ -1,11 +1,37 @@ Tilings: Decomposing Data ==================================== +Tiling is a representation of data structures as arrays. + +An AML tiling structure can be defined as a multi-dimensional grid of data, +like a matrix, a stencil, etc... +Tilings are used in AML as a description of a macro data structure that will be +used by a library to do its own work. +This structure is exploitable by AML to perform optimized movement operations. + +You can think of a tiling as 1D or 2D contiguous array. +The tiles in the structure can be of custom size and AML provides iterators to +easily access tile elements. + +The 1D type tiling is a regular linear tiling with uniform tile sizes. +The 2D type tiling is a 2 dimensional cartesian tiling with uniform tile sizes, +that can be stored in two different orders, rowmajor and columnmajor. + +With the tiling API, you can create and destroy a tiling. +You can also perform some operations over a tiling. +You can create and destroy an iterator, access the indexing, size of tiles or +their tiling dimensions. + +Tiling High Level API +--------------------- + .. doxygengroup:: aml_tiling Implementations --------------- +There are so far two implementations for the AML tiling, in 1D and in 2D: + .. toctree:: tiling_resize_api diff --git a/include/aml/area/linux.h b/include/aml/area/linux.h index 6d01cfb0..b518ebae 100644 --- a/include/aml/area/linux.h +++ b/include/aml/area/linux.h @@ -92,17 +92,17 @@ struct aml_area_linux_mmap_options { /** * \brief Linux area creation. * - * Allocates and initializes struct aml_area implemented by aml_area_linux + * Allocate and initialize a struct aml_area implemented by aml_area_linux * operations. - * @param[out] area pointer to an uninitialized struct aml_area pointer to + * @param[out] area: pointer to an uninitialized struct aml_area pointer to * receive the new area. - * @param[in] nodemask list of memory nodes to use. Defaults to all allowed + * @param[in] nodemask: list of memory nodes to use. Defaults to all allowed * memory nodes if NULL. - * @param[in] policy: The memory allocation policy to use when binding to + * @param[in] policy: the memory allocation policy to use when binding to * nodeset. - * @return On success, returns 0 and fills "area" with a pointer to the new + * @return on success, returns 0 and fills "area" with a pointer to the new * aml_area. - * @return On failure, fills "area" with NULL and returns one of AML error + * @return on failure, fills "area" with NULL and returns one of AML error * codes: * - AML_ENOMEM if there wasn't enough memory available. * - AML_EINVAL if input flags were invalid. @@ -117,21 +117,21 @@ int aml_area_linux_create(struct aml_area **area, /** * \brief Linux area destruction. * - * Destroys (finalizes and frees resources) struct aml_area created by + * Destroy (finalizes and frees resources) a struct aml_area created by * aml_area_linux_create(). * - * @param area address of an initialized struct aml_area pointer, which will be + * @param[inout] area: address of an initialized struct aml_area pointer, which will be * reset to NULL on return from this call. **/ void aml_area_linux_destroy(struct aml_area **area); /** - * Binds memory of size "size" pointed to by "ptr" using the binding provided + * Bind memory of size "size" pointed to by "ptr" using the binding provided * in "bind". If the mbind() call was not successfull, i.e., AML_FAILURE is * returned, then "errno" should be inspected for further error information. - * @param bind: The requested binding. "mmap_flags" is actually unused. - * @param ptr: The memory to bind. - * @param size: The size of the memory pointed to by "ptr". + * @param[in] bind: the requested binding. "mmap_flags" is actually unused. + * @param[in] ptr: the memory to bind. + * @param[in] size: the size of the memory pointed to by "ptr". * @return 0 if successful; an error code otherwise. **/ int @@ -140,12 +140,12 @@ aml_area_linux_mbind(struct aml_area_linux_data *bind, size_t size); /** - * Checks whether the binding of a pointer obtained with + * Check whether the binding of a pointer obtained with * aml_area_linux_mmap() followed by aml_area_linux_mbind() matches the area * settings. - * @param area_data: The expected binding settings. - * @param ptr: The supposedly bound memory. - * @param size: The memory size. + * @param[in] area_data: the expected binding settings. + * @param[in] ptr: the supposedly bound memory. + * @param[in] size: the memory size. * @return 1 if the mapped memory binding in "ptr" matches the "area_data" * binding settings, else 0. **/ @@ -161,10 +161,10 @@ aml_area_linux_check_binding(struct aml_area_linux_data *area_data, * "mmap_flags" of "area_data". * This function does not perform binding, unlike what is done in areas created * using aml_area_linux_create(). - * @param area_data: The structure containing "mmap_flags" for the mmap() call. + * @param[in] area_data: The structure containing "mmap_flags" for the mmap() call. * "nodemask" and "bind_flags" fields are ignored. - * @param size: The size to allocate. - * @param opts: See "aml_area_linux_mmap_options". + * @param[in] size: The size to allocate. + * @param[in] opts: See "aml_area_linux_mmap_options". * @return a valid memory pointer, or NULL on failure. * On failure, "errno" should be checked for further information. **/ @@ -177,9 +177,9 @@ aml_area_linux_mmap(const struct aml_area_data *area_data, * \brief munmap hook for AML area. * * Unmaps memory mapped with aml_area_linux_mmap(). - * @param area_data: unused - * @param ptr: The virtual memory to unmap. - * @param size: The size of the virtual memory to unmap. + * @param[in] area_data: unused + * @param[inout] ptr: The virtual memory to unmap. + * @param[in] size: The size of the virtual memory to unmap. * @return AML_SUCCESS on success, else AML_FAILURE. * On failure, "errno" should be checked for further information. **/ From 79e7cf6bd10e9e939e496a6a4811939c1313b3e5 Mon Sep 17 00:00:00 2001 From: Florence Monna Date: Wed, 4 Sep 2019 10:21:41 -0500 Subject: [PATCH 3/6] [wip] working on a layout example based on DGEMM. --- doc/pages/layout.rst | 72 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 57 insertions(+), 15 deletions(-) diff --git a/doc/pages/layout.rst b/doc/pages/layout.rst index 3ffc158d..af648f2f 100644 --- a/doc/pages/layout.rst +++ b/doc/pages/layout.rst @@ -47,32 +47,74 @@ pictured in the figure below. Example ------- -Let's look at a problem where layouts can be quite useful: matrix -multiplication, with DGEMM. -Let's say you want to multiple matrix A (size [m, k]) with matrix B +Let's look at a problem where layouts can be quite useful: DGEMM in multiple +levels. +Let's say you want to multiply matrix A (size [m, k]) with matrix B (size [k, n]) to get matrix C (size [m, n]). -The first step is implementing an efficient micro-kernel. -The micro-kernel update a block of C of size [mr, nr] noted C_r using a block of +The naive matrix multiplication algorithm should look something like: + +.. code:: c + for (i = 0; i < m; i++){ + for (j = 0; j < n; j++){ + cij = C[i*n + j]; + for (l = 0; l < k; l++) + cij += A[i*n + l] * B[l*n + j]; + C[i*n + j] = cij; + } + } + +Unfortunately this algorithm does not have a great runtime complexity... + +We can then have 3 nested loops running on blocks of the matrices. +With several sizes of memory, we want to lverage the power of using blocks of +different sizes. +Let's take an algorithm with three levels of granularity. + + +The first level is focused on fitting our blocks in the smallest cache. +We compute a block of C of size [mr, nr] noted C_r using a block of A of size [mr, kb] noted A_r, and a block of B of size [kb, nr] noted B_r. A_r is stored in column major order while C_r and B_r are stored in row major -order. - -The medium kernel works using blocks of intermediate size. -The medium kernel updates a block of C of size [kb, n] noted C_b using a block +order, allowing us to read A_r row by row, and go with B_r and C_r column by +column. + +.. code:: c + for (i = 0; i < m_r; i++){ + for (j = 0; j < n_r; j++){ + for (l = 0; l < k_b; l++) + C_r[i][j] += A_r[i][l] + B_r[l][j]; + } + } + +These are our smallest blocks. +The implementation at this level is simply doing the multiplication at a level +where is fast enough. +B_r blocks need to be transposed before they can be accessed column by column. + +The second level is when the matrices are so big that you need a second +caching. +We then use blocks of intermediate size. +We compute a block of C of size [mb, n] noted C_b using a block of A of size [mb, kb] noted A_b, and a block of B of size [kb, n] noted B_b. -A_b is stored as mb/mr consecutive blocks of size [mr, kb] (A_r) in column major -order while C_b is stored as (mb/mr)*(n/nr) blocks of size [mr, nr] (C_r) in row -major order and B_b is stored as n/nr blocks of size [kb, nr] (B_r) in row major -order. +To be efficient, A_b is stored as mb/mr consecutive blocks of size [mr, kb] +(A_r) in column major order while C_b is stored as (mb/mr)*(n/nr) blocks of +size [mr, nr] (C_r) in row major order and B_b is stored as n/nr blocks of size +[kb, nr] (B_r) in row major order. + +This means we need to have Ab laid out as a 3-dimensional array mr x kb x (mb/mr), +B as nr x kb x (n/nr), C with 4 dimensions as nr x mr x (mb/mr) x (n/nr). -The large kernel uses matrices of any size. -Let's say we consider the matrices already transformed. +The last level uses the actual matrices, of any size. The original matrices are C of size [m, n], A of size [m, k] and B of size [k, n]. The layout used here are: C is stored as m/mb blocks of C_b, A is stored as (k/kb) * (m/mb) blocks of A_b and B is stored as k/kb blocks of B_b. +This means we need to rework A to be laid out in 5 dimensions as +mr x kb x mb/mr x m/mb x k/kb, +B in 4 dimensions as nr x kb x n/nr x k/kb, +C in 5 dimensions as nr x mr x mb/mr x n/nr x m/mb High level API -------------- From 90a1dc1851209b003b1f750e00bbfd8ed125bfae Mon Sep 17 00:00:00 2001 From: Florence Monna Date: Mon, 10 May 2021 16:01:04 -0500 Subject: [PATCH 4/6] Corrections to match the current master version of documentation. Deleted double paragraphs. --- doc/pages/area_linux_api.rst | 11 ++++-- doc/pages/areas.rst | 8 ++-- doc/pages/layout.rst | 22 +++++++---- doc/pages/tilings.rst | 26 ++++--------- include/aml.h | 73 ------------------------------------ include/aml/area/linux.h | 13 ------- 6 files changed, 32 insertions(+), 121 deletions(-) diff --git a/doc/pages/area_linux_api.rst b/doc/pages/area_linux_api.rst index 2ab5d2d1..da3c3767 100644 --- a/doc/pages/area_linux_api.rst +++ b/doc/pages/area_linux_api.rst @@ -3,11 +3,14 @@ Area Linux Implementation This is the Linux implementation of AML areas. -This building block relies on the libnuma implementation and linux mmap / munmap -to provide mmap/munmap on the NUMA host processor memory. +This building block relies on the libnuma implementation and the Linux +mmap() / munmap() to provide mmap() / munmap() on NUMA host processor memory. New areas may be created to allocate a specific subset of memories. -This building block also include a static declaration of a default initialized -area that can be used out of the box with the abstract area API. +This building block also includes a static declaration of a default initialized +area that can be used out-of-the-box with the abstract area API. + +.. codeblock:: c + #include - * @endcode * @{ **/ From 3e0d2fcf48d07336e0db5680d605af1221dcebea Mon Sep 17 00:00:00 2001 From: Florence Monna Date: Thu, 13 May 2021 14:47:04 -0500 Subject: [PATCH 5/6] Move documentation paragraphs to documentation pages for areas. --- doc/pages/area_cuda_api.rst | 14 ++++++++++++++ doc/pages/area_opencl_api.rst | 11 +++++++++++ doc/pages/area_ze_api.rst | 12 ++++++++++++ include/aml/area/cuda.h | 15 --------------- include/aml/area/opencl.h | 12 ------------ include/aml/area/ze.h | 12 ------------ 6 files changed, 37 insertions(+), 39 deletions(-) diff --git a/doc/pages/area_cuda_api.rst b/doc/pages/area_cuda_api.rst index 78742f74..2c4c4511 100644 --- a/doc/pages/area_cuda_api.rst +++ b/doc/pages/area_cuda_api.rst @@ -1,4 +1,18 @@ Area Cuda Implementation API ================================= +Cuda Implementation of Areas. + +.. codeblock:: c + #include + +Cuda implementation of AML areas. +This building block relies on Cuda implementation of +malloc/free to provide mmap/munmap on device memory. +Additional documentation of cuda runtime API can be found here: +https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY.html + +AML cuda areas may be created to allocate current or specific cuda devices. +Also allocations can be private to a single device or shared across devices. +Finally allocations can be backed by host memory allocation. .. doxygengroup:: aml_area_cuda diff --git a/doc/pages/area_opencl_api.rst b/doc/pages/area_opencl_api.rst index 0cf2cdce..df6105cb 100644 --- a/doc/pages/area_opencl_api.rst +++ b/doc/pages/area_opencl_api.rst @@ -1,4 +1,15 @@ Area OpenCL Implementation API ================================= +OpenCL Implementation of Areas. + +.. codeblock:: c + #include + +OpenCL implementation of AML areas. +This building block relies on OpenCL implementation of +device memory allocation to provide mmap/munmap on device memory. +Additional documentation of OpenCL memory model can be found here: +https://www.khronos.org/registry/OpenCL/specs/2.2/html/OpenCL_API.html#_memory_model + .. doxygengroup:: aml_area_opencl diff --git a/doc/pages/area_ze_api.rst b/doc/pages/area_ze_api.rst index 014c8814..04038a26 100644 --- a/doc/pages/area_ze_api.rst +++ b/doc/pages/area_ze_api.rst @@ -1,4 +1,16 @@ Area Level Zero Implementation API ================================== +Implementation of Areas with Level Zero API. + +.. codeblock:: c + #include + +Implementation of Areas with Level Zero API. +This building block relies on Ze implementation of +host and device memory mapping to provide mmap/munmap on device memory. +Additional documentation of Ze memory model can be found here: + +https://spec.oneapi.com/level-zero/latest/core/api.html#memory + .. doxygengroup:: aml_area_ze diff --git a/include/aml/area/cuda.h b/include/aml/area/cuda.h index 88a74596..6338fb58 100644 --- a/include/aml/area/cuda.h +++ b/include/aml/area/cuda.h @@ -17,21 +17,6 @@ extern "C" { /** * @defgroup aml_area_cuda "AML Cuda Areas" - * @brief Cuda Implementation of Areas. - * @code - * #include - * @endcode - * - * Cuda implementation of AML areas. - * This building block relies on Cuda implementation of - * malloc/free to provide mmap/munmap on device memory. - * Additional documentation of cuda runtime API can be found here: - * @see https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY.html - * - * AML cuda areas may be created to allocate current or specific cuda devices. - * Also allocations can be private to a single device or shared across devices. - * Finally allocations can be backed by host memory allocation. - * * @{ **/ diff --git a/include/aml/area/opencl.h b/include/aml/area/opencl.h index 6664dbeb..40872cb7 100644 --- a/include/aml/area/opencl.h +++ b/include/aml/area/opencl.h @@ -19,18 +19,6 @@ extern "C" { /** * @defgroup aml_area_opencl "AML OpenCL Areas" - * @brief OpenCL Implementation of Areas. - * @code - * #include - * @endcode - * - * OpenCL implementation of AML areas. - * This building block relies on OpenCL implementation of - * device memory allocation to provide mmap/munmap on device memory. - * Additional documentation of OpenCL memory model can be found here: - * @see - *https://www.khronos.org/registry/OpenCL/specs/2.2/html/OpenCL_API.html#_memory_model - * * @{ **/ diff --git a/include/aml/area/ze.h b/include/aml/area/ze.h index ce2cfe22..dd816b11 100644 --- a/include/aml/area/ze.h +++ b/include/aml/area/ze.h @@ -20,18 +20,6 @@ extern "C" { /** * @defgroup aml_area_ze "AML Level Zero Areas" - * @brief Implementation of Areas with Level Zero API. - * @code - * #include - * @endcode - * - * Implementation of Areas with Level Zero API. - * This building block relies on Ze implementation of - * host and device memory mapping to provide mmap/munmap on device memory. - * Additional documentation of Ze memory model can be found here: - * @see - * https://spec.oneapi.com/level-zero/latest/core/api.html#memory - * * @{ **/ From 5c155595250b4db3d8b9eff4deee874b57592be6 Mon Sep 17 00:00:00 2001 From: Florence Monna Date: Fri, 21 May 2021 10:57:34 -0500 Subject: [PATCH 6/6] cosmetic changes for documentation of areas --- doc/pages/areas.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/pages/areas.rst b/doc/pages/areas.rst index 78a0e2d7..63a4c718 100644 --- a/doc/pages/areas.rst +++ b/doc/pages/areas.rst @@ -10,7 +10,7 @@ specific places materialized as areas. Available area implementations dictate the way such places can be arranged and their properties. -.. image:: img/area.png +.. image:: ../img/area.png :width=700px "Illustration of areas on a complex system." @@ -30,7 +30,7 @@ Example Let's look how these operations can be done in a C program. -.. codeblock:: c +.. code-block:: c #include #include @@ -66,7 +66,7 @@ Implementations Aware users may create or modify implementation by assembling appropriate operations in an aml_area_ops structure. -The linux implementation is go to for using simple areas on NUMA CPUs with +The linux implementation is the go to for using simple areas on NUMA CPUs with linux operating system. There is an ongoing work on hwloc, CUDA and OpenCL areas. @@ -74,7 +74,7 @@ There is an ongoing work on hwloc, CUDA and OpenCL areas. Let's look at an example of a dynamic creation of a linux area identical to the static default aml_area_linux: -.. codeblock:: c +.. code-block:: c #include #include