Skip to content
2 changes: 1 addition & 1 deletion parsec/arena.c
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ int parsec_arena_allocate_device_private(parsec_data_copy_t *copy,
assert(0 == (((ptrdiff_t)chunk->data) % arena->alignment));
assert((arena->elem_size + (ptrdiff_t)chunk->data) <= (size + (ptrdiff_t)chunk));

data->nb_elts = count * arena->elem_size;
data->span = count * arena->elem_size;

copy->flags = PARSEC_DATA_FLAG_ARENA |
PARSEC_DATA_FLAG_PARSEC_OWNED |
Expand Down
6 changes: 3 additions & 3 deletions parsec/data.c
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ static void parsec_data_construct(parsec_data_t* obj )
obj->owner_device = -1;
obj->preferred_device = -1;
obj->key = 0;
obj->nb_elts = 0;
obj->span = 0;
for( uint32_t i = 0; i < parsec_nb_devices;
obj->device_copies[i] = NULL, i++ );
obj->dc = NULL;
Expand Down Expand Up @@ -509,7 +509,7 @@ parsec_data_create( parsec_data_t **holder,
data->owner_device = 0;
data->key = key;
data->dc = desc;
data->nb_elts = size;
data->span = size;
parsec_data_copy_attach(data, data_copy, 0);

if( !parsec_atomic_cas_ptr(holder, NULL, data) ) {
Expand Down Expand Up @@ -546,7 +546,7 @@ parsec_data_create_with_type( parsec_data_collection_t *desc,
clone->owner_device = 0;
clone->key = key;
clone->dc = desc;
clone->nb_elts = size;
clone->span = size;
parsec_data_copy_attach(clone, data_copy, 0);

return clone;
Expand Down
2 changes: 1 addition & 1 deletion parsec/data_dist/matrix/broadcast.jdf
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ static parsec_data_t* data_of(parsec_data_collection_t *desc, ...)
data->owner_device = 0;
data->key = k;
data->dc = (parsec_data_collection_t*)desc;
data->nb_elts = 1;
data->span = 1;
parsec_data_copy_t* data_copy = (parsec_data_copy_t*)PARSEC_OBJ_NEW(parsec_data_copy_t);
parsec_data_copy_attach(data, data_copy, 0);
data_copy->device_private = NULL;
Expand Down
2 changes: 1 addition & 1 deletion parsec/data_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ struct parsec_data_s {
* which device this data should be modified RW when there
* are multiple choices. -1 means no preference. */
struct parsec_data_collection_s* dc;
size_t nb_elts; /* size in bytes of the memory layout */
size_t span; /* size in bytes of the memory layout */
struct parsec_data_copy_s *device_copies[]; /* this array allocated according to the number of devices
* (parsec_nb_devices). It points to the most recent
* version of the data.
Expand Down
10 changes: 4 additions & 6 deletions parsec/interfaces/dtd/insert_function.c
Original file line number Diff line number Diff line change
Expand Up @@ -2280,20 +2280,18 @@ static parsec_hook_return_t parsec_dtd_gpu_task_submit(parsec_execution_stream_t
#if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) || defined(PARSEC_HAVE_DEV_HIP_SUPPORT) || defined(PARSEC_HAVE_DEV_LEVEL_ZERO_SUPPORT)
parsec_dtd_task_t *dtd_task = (parsec_dtd_task_t *)this_task;
parsec_dtd_task_class_t *dtd_tc = (parsec_dtd_task_class_t*)this_task->task_class;
parsec_gpu_task_t *gpu_task = (parsec_gpu_task_t *) calloc(1, sizeof(parsec_gpu_task_t));
PARSEC_OBJ_CONSTRUCT(gpu_task, parsec_list_item_t);
gpu_task->release_device_task = free; /* by default free the device task */
parsec_gpu_task_t *gpu_task = (parsec_gpu_task_t*)PARSEC_OBJ_NEW(parsec_gpu_dsl_task_t);
gpu_task->ec = (parsec_task_t *) this_task;
gpu_task->submit = dtd_tc->gpu_func_ptr;
gpu_task->task_type = 0;
gpu_task->last_data_check_epoch = -1; /* force at least one validation for the task */
gpu_task->pushout = 0;
gpu_task->nb_flows = dtd_tc->super.nb_flows; /* inherit the flows from the task class */
for(int i = 0; i < dtd_tc->super.nb_flows; i++) {
parsec_dtd_flow_info_t *flow = FLOW_OF(dtd_task, i);
if(flow->op_type & PARSEC_PUSHOUT)
gpu_task->pushout |= 1<<i;
gpu_task->flow[i] = dtd_tc->super.in[i];
gpu_task->flow_nb_elts[i] = this_task->data[i].data_in->original->nb_elts;
gpu_task->flow_info[i].flow = dtd_tc->super.in[i];
gpu_task->flow_info[i].flow_span = this_task->data[i].data_in->original->span;
}

parsec_device_module_t *device = this_task->selected_device;
Expand Down
51 changes: 21 additions & 30 deletions parsec/interfaces/ptg/ptg-compiler/jdf2c.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2009-2024 The University of Tennessee and The University
* Copyright (c) 2009-2025 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
Expand Down Expand Up @@ -6809,66 +6809,56 @@ static void jdf_generate_code_hook_gpu(const jdf_t *jdf,
" assert(NULL != dev);\n"
" assert(PARSEC_DEV_IS_GPU(dev->type));\n"
"\n"
" gpu_task = (parsec_gpu_task_t*)calloc(1, sizeof(parsec_gpu_task_t));\n"
" PARSEC_OBJ_CONSTRUCT(gpu_task, parsec_list_item_t);\n"
" gpu_task->release_device_task = free; /* by default free the device task */\n"
" gpu_task = (parsec_gpu_task_t*)PARSEC_OBJ_NEW(parsec_gpu_dsl_task_t);"
" gpu_task->ec = (parsec_task_t*)this_task;\n"
" gpu_task->submit = &%s_kernel_submit_%s_%s;\n"
" gpu_task->task_type = 0;\n"
" gpu_task->last_data_check_epoch = -1; /* force at least one validation for the task */\n",
" gpu_task->task_type = PARSEC_GPU_TASK_TYPE_KERNEL;\n",
dev_lower, jdf_basename, f->fname);

/* Set up stage in/out callbacks */
jdf_find_property(body->properties, "stage_in", &stage_in_property);
jdf_find_property(body->properties, "stage_out", &stage_out_property);

if(stage_in_property == NULL) {
coutput(" gpu_task->stage_in = parsec_default_gpu_stage_in;\n");
}else{
coutput(" gpu_task->stage_in = %s;\n", dump_expr((void**)stage_in_property->expr, &info));
}
coutput(" gpu_task->stage_in = %s;\n", (NULL == stage_in_property) ? "parsec_default_gpu_stage_in"
: dump_expr((void **)stage_in_property->expr, &info));

if(stage_out_property == NULL) {
coutput(" gpu_task->stage_out = parsec_default_gpu_stage_out;\n");
}else{
coutput(" gpu_task->stage_out = %s;\n", dump_expr((void**)stage_out_property->expr, &info));
}
jdf_find_property(body->properties, "stage_out", &stage_out_property);
coutput(" gpu_task->stage_out = %s;\n", (NULL == stage_out_property) ? "parsec_default_gpu_stage_out"
: dump_expr((void **)stage_out_property->expr, &info));

/* Dump the dataflow */
coutput(" gpu_task->pushout = 0;\n");
for(fl = f->dataflow, di = 0; fl != NULL; fl = fl->next, di++) {
coutput(" gpu_task->flow[%d] = &%s;\n",
coutput(" gpu_task->flow_info[%d].flow = &%s;\n",
di, JDF_OBJECT_ONAME( fl ));

sprintf(sa->ptr, "%s.dc", fl->varname);
jdf_find_property(body->properties, sa->ptr, &desc_property);
if(desc_property == NULL){
coutput(" gpu_task->flow_dc[%d] = NULL;\n", di);
if(desc_property == NULL) {
coutput(" gpu_task->flow_info[%d].flow_dc = NULL;\n", di);
}else{
coutput(" gpu_task->flow_dc[%d] = (parsec_data_collection_t *)%s;\n", di,
coutput(" gpu_task->flow_info[%d].flow_dc = (parsec_data_collection_t *)%s;\n", di,
dump_expr((void**)desc_property->expr, &info));
}

sprintf(sa->ptr, "%s.size", fl->varname);
jdf_find_property(body->properties, sa->ptr, &size_property);

if(fl->flow_flags & JDF_FLOW_TYPE_CTL) {
if(size_property != NULL){
if(size_property != NULL) {
fprintf(stderr, "Error: specifying GPU buffer size for CTL flow %s at line %d\n",
fl->varname, JDF_OBJECT_LINENO(fl));
exit(-1);
}
coutput(" gpu_task->flow_nb_elts[%d] = 0;\n", di);
}else{
coutput(" gpu_task->flow_info[%d].flow_span = 0;\n", di);
} else {
coutput(" // A shortcut to check if the flow exists\n");
coutput(" if (gpu_task->ec->data[%d].data_in != NULL) {\n", di);
if(size_property == NULL){
coutput(" gpu_task->flow_nb_elts[%d] = gpu_task->ec->data[%d].data_in->original->nb_elts;\n", di, di);
}else{
coutput(" gpu_task->flow_nb_elts[%d] = %s;\n",
di, dump_expr((void**)size_property->expr, &info));
coutput(" gpu_task->flow_info[%d].flow_span = gpu_task->ec->data[%d].data_in->original->span;\n", di, di);
} else {
coutput(" gpu_task->flow_info[%d].flow_span = %s;\n",
di, dump_expr((void **)size_property->expr, &info));
if( (stage_in_property == NULL) || ( stage_out_property == NULL )){
coutput(" assert(gpu_task->ec->data[%d].data_in->original->nb_elts <= %s);\n",
coutput(" assert(gpu_task->ec->data[%d].data_in->original->span <= %s);\n",
di, dump_expr((void**)size_property->expr, &info));
}

Expand Down Expand Up @@ -6936,6 +6926,7 @@ static void jdf_generate_code_hook_gpu(const jdf_t *jdf,
}
}
string_arena_free(info.sa);
coutput(" gpu_task->nb_flows = %d; /* inherit the flows from the task_class */\n", di);

coutput("\n"
" return dev->kernel_scheduler(dev, es, gpu_task);\n"
Expand Down
Loading
Loading