/*
|
/*
|
Runtime library for supporting Coarray Fortran
|
Runtime library for supporting Coarray Fortran
|
|
|
Copyright (C) 2009-2012 University of Houston.
|
Copyright (C) 2009-2012 University of Houston.
|
|
|
This program is free software; you can redistribute it and/or modify it
|
This program is free software; you can redistribute it and/or modify it
|
under the terms of version 2 of the GNU General Public License as
|
under the terms of version 2 of the GNU General Public License as
|
published by the Free Software Foundation.
|
published by the Free Software Foundation.
|
|
|
This program is distributed in the hope that it would be useful, but
|
This program is distributed in the hope that it would be useful, but
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
Further, this software is distributed without any warranty that it is
|
Further, this software is distributed without any warranty that it is
|
free of the rightful claim of any third person regarding infringement
|
free of the rightful claim of any third person regarding infringement
|
or the like. Any license provided herein, whether implied or
|
or the like. Any license provided herein, whether implied or
|
otherwise, applies only to this software file. Patent licenses, if
|
otherwise, applies only to this software file. Patent licenses, if
|
any, provided herein do not apply to combinations of this program with
|
any, provided herein do not apply to combinations of this program with
|
other software, or any other product whatsoever.
|
other software, or any other product whatsoever.
|
|
|
You should have received a copy of the GNU General Public License along
|
You should have received a copy of the GNU General Public License along
|
with this program; if not, write the Free Software Foundation, Inc., 59
|
with this program; if not, write the Free Software Foundation, Inc., 59
|
Temple Place - Suite 330, Boston MA 02111-1307, USA.
|
Temple Place - Suite 330, Boston MA 02111-1307, USA.
|
|
|
Contact information:
|
Contact information:
|
http://www.cs.uh.edu/~hpctools
|
http://www.cs.uh.edu/~hpctools
|
*/
|
*/
|
|
|
#include <stdlib.h>
|
#include <stdlib.h>
|
#include <stdio.h>
|
#include <stdio.h>
|
#include <math.h>
|
#include <math.h>
|
#include <string.h>
|
#include <string.h>
|
#include <assert.h>
|
#include <assert.h>
|
|
|
#include "dopevec.h"
|
#include "dopevec.h"
|
|
|
#if defined(ARMCI)
|
#if defined(ARMCI)
|
#include "armci_comm_layer.h"
|
#include "armci_comm_layer.h"
|
#elif defined(GASNET)
|
#elif defined(GASNET)
|
#include "gasnet_comm_layer.h"
|
#include "gasnet_comm_layer.h"
|
#endif
|
#endif
|
|
|
#include "caf_rtl.h"
|
#include "caf_rtl.h"
|
#include "trace.h"
|
#include "trace.h"
|
|
|
const int DEBUG = 1;
|
const int DEBUG = 1;
|
|
|
/* initialized in comm_init() */
|
/* initialized in comm_init() */
|
unsigned long _this_image;
|
unsigned long _this_image;
|
unsigned long _num_images;
|
unsigned long _num_images;
|
|
|
/* common_slot is a node in the shared memory link-list that keeps track
|
/* common_slot is a node in the shared memory link-list that keeps track
|
* of available memory that can used for both allocatable coarrays and
|
* of available memory that can used for both allocatable coarrays and
|
* asymmetric data. It is the only handle to access the link-list.*/
|
* asymmetric data. It is the only handle to access the link-list.*/
|
struct shared_memory_slot *common_slot;
|
struct shared_memory_slot *common_slot;
|
|
|
void caf_init_()
|
void caf_init_()
|
{
|
{
|
LIBCAF_TRACE_INIT();
|
LIBCAF_TRACE_INIT();
|
|
|
common_slot = (struct shared_memory_slot *) malloc (
|
common_slot = (struct shared_memory_slot *) malloc (
|
sizeof(struct shared_memory_slot));
|
sizeof(struct shared_memory_slot));
|
START_TIMER();
|
START_TIMER();
|
comm_init(common_slot); /* common slot is initialized in comm_init */
|
comm_init(common_slot); /* common slot is initialized in comm_init */
|
STOP_TIMER(INIT);
|
STOP_TIMER(INIT);
|
|
|
_this_image = comm_get_proc_id() + 1;
|
_this_image = comm_get_proc_id() + 1;
|
_num_images = comm_get_num_procs();
|
_num_images = comm_get_num_procs();
|
|
|
LIBCAF_TRACE( LIBCAF_LOG_INIT, "caf_rtl.c:caf_init_->initialized,"
|
LIBCAF_TRACE( LIBCAF_LOG_INIT, "caf_rtl.c:caf_init_->initialized,"
|
" num_images = %lu", _num_images);
|
" num_images = %lu", _num_images);
|
LIBCAF_TRACE( LIBCAF_LOG_TIME, "comm_init ");
|
LIBCAF_TRACE( LIBCAF_LOG_TIME, "comm_init ");
|
}
|
}
|
|
|
/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
* SHARED MEMORY MANAGEMENT FUNCTIONS
|
* SHARED MEMORY MANAGEMENT FUNCTIONS
|
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
* Note: The term 'shared memory' is used in the PGAS sense, i.e. the
|
* Note: The term 'shared memory' is used in the PGAS sense, i.e. the
|
* memory may not be physically shared. It can however be directly
|
* memory may not be physically shared. It can however be directly
|
* accessed from any other image. This is done by the pinning/registering
|
* accessed from any other image. This is done by the pinning/registering
|
* memory by underlying communication layer (GASNet/ARMCI).
|
* memory by underlying communication layer (GASNet/ARMCI).
|
*
|
*
|
* During comm_init GASNet/ARMCI creates a big chunk of shared memory.
|
* During comm_init GASNet/ARMCI creates a big chunk of shared memory.
|
* Static coarrays are allocated memory from this chunk. The remaining
|
* Static coarrays are allocated memory from this chunk. The remaining
|
* memory is left for allocatable coarrays and pointers in coarrays of
|
* memory is left for allocatable coarrays and pointers in coarrays of
|
* derived datatype (henceforth referred as asymmetric data).
|
* derived datatype (henceforth referred as asymmetric data).
|
* It returns the starting address and size of this remaining memory chunk
|
* It returns the starting address and size of this remaining memory chunk
|
* by populating the structure common_slot(explained later).
|
* by populating the structure common_slot(explained later).
|
*
|
*
|
* Normal fortan allocation calls are intercepted to check whether they
|
* Normal fortan allocation calls are intercepted to check whether they
|
* are for coarrays or asymmetric data. If yes, the following functions
|
* are for coarrays or asymmetric data. If yes, the following functions
|
* are called which are defined below.
|
* are called which are defined below.
|
* void* coarray_allocatable_allocate_(unsigned long var_size);
|
* void* coarray_allocatable_allocate_(unsigned long var_size);
|
* void* coarray_asymmetric_allocate_(unsigned long var_size);
|
* void* coarray_asymmetric_allocate_(unsigned long var_size);
|
* Since allocatable coarrays must have symmetric address, a seperate heap
|
* Since allocatable coarrays must have symmetric address, a seperate heap
|
* must be created for asymmetric data. To avoid wasting memory space by
|
* must be created for asymmetric data. To avoid wasting memory space by
|
* statically reserving it, we use the top of heap for allocatable
|
* statically reserving it, we use the top of heap for allocatable
|
* coarrays (which grows downward) and the bottom of heap for asymmetric
|
* coarrays (which grows downward) and the bottom of heap for asymmetric
|
* data(which grows up). A link-list of struct shared_memory_slot is
|
* data(which grows up). A link-list of struct shared_memory_slot is
|
* used to manage allocation and deallocation.
|
* used to manage allocation and deallocation.
|
*
|
*
|
* common_slot is an empty slot which always lies in between allocatable
|
* common_slot is an empty slot which always lies in between allocatable
|
* heap and asymmetric heap, and used by both to reserve memory.
|
* heap and asymmetric heap, and used by both to reserve memory.
|
* _________
|
* _________
|
* | Alloc |
|
* | Alloc |
|
* | heap |
|
* | heap |
|
* =========
|
* =========
|
* | Common|
|
* | Common|
|
* | slot |
|
* | slot |
|
* =========
|
* =========
|
* | asymm |
|
* | asymm |
|
* | heap |
|
* | heap |
|
* |_______|
|
* |_______|
|
* Allocatable heap comsumes common_slot from top, while assymetric heap
|
* Allocatable heap comsumes common_slot from top, while assymetric heap
|
* consumes from bottom. Each allocation address and size is stored in
|
* consumes from bottom. Each allocation address and size is stored in
|
* a sperate slot (node in the list). Each slot has a full-empty bit(feb).
|
* a sperate slot (node in the list). Each slot has a full-empty bit(feb).
|
* During deallocation (using function coarray_deallocate_) the feb is set
|
* During deallocation (using function coarray_deallocate_) the feb is set
|
* to 0 (empty). If any neighboring slot is empty, they are merged. Hence,
|
* to 0 (empty). If any neighboring slot is empty, they are merged. Hence,
|
* when a slot bordering common-slot is deallocated, the common-slot
|
* when a slot bordering common-slot is deallocated, the common-slot
|
* grows.
|
* grows.
|
*
|
*
|
* If there is no more space left in common slot, empty slots are used
|
* If there is no more space left in common slot, empty slots are used
|
* from above for allocable coarrays or from below for asymmetric data.
|
* from above for allocable coarrays or from below for asymmetric data.
|
*
|
*
|
* During exit, the function coarray_free_all_shared_memory_slots()
|
* During exit, the function coarray_free_all_shared_memory_slots()
|
* is used to free all nodes in the shared memory list.
|
* is used to free all nodes in the shared memory list.
|
*/
|
*/
|
|
|
/* Static function used to find empty memory slots while reserving
|
/* Static function used to find empty memory slots while reserving
|
* memory for allocatable coarrays */
|
* memory for allocatable coarrays */
|
static struct shared_memory_slot* find_empty_shared_memory_slot_above
|
static struct shared_memory_slot* find_empty_shared_memory_slot_above
|
(struct shared_memory_slot *slot, unsigned long var_size)
|
(struct shared_memory_slot *slot, unsigned long var_size)
|
{
|
{
|
while (slot)
|
while (slot)
|
{
|
{
|
if(slot->feb==0 && slot->size>=var_size)
|
if(slot->feb==0 && slot->size>=var_size)
|
return slot;
|
return slot;
|
slot = slot->prev;
|
slot = slot->prev;
|
}
|
}
|
return 0;
|
return 0;
|
}
|
}
|
|
|
/* Static function used to find empty memory slots while reserving
|
/* Static function used to find empty memory slots while reserving
|
* memory for assymetric coarrays */
|
* memory for assymetric coarrays */
|
static struct shared_memory_slot* find_empty_shared_memory_slot_below
|
static struct shared_memory_slot* find_empty_shared_memory_slot_below
|
(struct shared_memory_slot *slot, unsigned long var_size)
|
(struct shared_memory_slot *slot, unsigned long var_size)
|
{
|
{
|
while (slot)
|
while (slot)
|
{
|
{
|
if(slot->feb==0 && slot->size>=var_size)
|
if(slot->feb==0 && slot->size>=var_size)
|
return slot;
|
return slot;
|
slot = slot->next;
|
slot = slot->next;
|
}
|
}
|
return 0;
|
return 0;
|
}
|
}
|
|
|
/* Static function used to reserve top part of an empty memory slot
|
/* Static function used to reserve top part of an empty memory slot
|
* for allocatable coarrays. Returns the memory address allocated */
|
* for allocatable coarrays. Returns the memory address allocated */
|
static void* split_empty_shared_memory_slot_from_top
|
static void* split_empty_shared_memory_slot_from_top
|
(struct shared_memory_slot *slot, unsigned long var_size)
|
(struct shared_memory_slot *slot, unsigned long var_size)
|
{
|
{
|
struct shared_memory_slot *new_empty_slot;
|
struct shared_memory_slot *new_empty_slot;
|
new_empty_slot = (struct shared_memory_slot *) malloc
|
new_empty_slot = (struct shared_memory_slot *) malloc
|
(sizeof(struct shared_memory_slot));
|
(sizeof(struct shared_memory_slot));
|
new_empty_slot->addr = slot->addr + var_size;
|
new_empty_slot->addr = slot->addr + var_size;
|
new_empty_slot->size = slot->size - var_size;
|
new_empty_slot->size = slot->size - var_size;
|
new_empty_slot->feb = 0;
|
new_empty_slot->feb = 0;
|
new_empty_slot->next = slot->next;
|
new_empty_slot->next = slot->next;
|
new_empty_slot->prev = slot;
|
new_empty_slot->prev = slot;
|
slot->size = var_size;
|
slot->size = var_size;
|
slot->feb = 1;
|
slot->feb = 1;
|
slot->next = new_empty_slot;
|
slot->next = new_empty_slot;
|
if(common_slot == slot)
|
if(common_slot == slot)
|
common_slot = new_empty_slot;
|
common_slot = new_empty_slot;
|
return slot->addr;
|
return slot->addr;
|
}
|
}
|
|
|
/* Static function used to reserve bottom part of an empty memory slot
|
/* Static function used to reserve bottom part of an empty memory slot
|
* for asymmetric data. Returns the memory address allocated*/
|
* for asymmetric data. Returns the memory address allocated*/
|
static void* split_empty_shared_memory_slot_from_bottom
|
static void* split_empty_shared_memory_slot_from_bottom
|
(struct shared_memory_slot *slot, unsigned long var_size)
|
(struct shared_memory_slot *slot, unsigned long var_size)
|
{
|
{
|
struct shared_memory_slot *new_full_slot;
|
struct shared_memory_slot *new_full_slot;
|
new_full_slot = (struct shared_memory_slot *) malloc
|
new_full_slot = (struct shared_memory_slot *) malloc
|
(sizeof(struct shared_memory_slot));
|
(sizeof(struct shared_memory_slot));
|
new_full_slot->addr = slot->addr + slot->size - var_size;
|
new_full_slot->addr = slot->addr + slot->size - var_size;
|
new_full_slot->size = var_size;
|
new_full_slot->size = var_size;
|
new_full_slot->feb = 1;
|
new_full_slot->feb = 1;
|
new_full_slot->next = slot->next;
|
new_full_slot->next = slot->next;
|
new_full_slot->prev = slot;
|
new_full_slot->prev = slot;
|
slot->size = slot->size - var_size;
|
slot->size = slot->size - var_size;
|
slot->next = new_full_slot;
|
slot->next = new_full_slot;
|
return new_full_slot->addr;
|
return new_full_slot->addr;
|
}
|
}
|
|
|
/* Memory allocation function for allocatable coarrays. It is invoked
|
/* Memory allocation function for allocatable coarrays. It is invoked
|
* from fortran allocation function _ALLOCATE in
|
* from fortran allocation function _ALLOCATE in
|
* osprey/libf/fort/allocation.c
|
* osprey/libf/fort/allocation.c
|
* It finds empty slot from the shared memory list (common_slot & above)
|
* It finds empty slot from the shared memory list (common_slot & above)
|
* and then splits the slot from top
|
* and then splits the slot from top
|
* Note: there is barrier as it is a collective operation*/
|
* Note: there is barrier as it is a collective operation*/
|
void* coarray_allocatable_allocate_(unsigned long var_size)
|
void* coarray_allocatable_allocate_(unsigned long var_size)
|
{
|
{
|
struct shared_memory_slot *empty_slot;
|
struct shared_memory_slot *empty_slot;
|
empty_slot = find_empty_shared_memory_slot_above(common_slot,
|
empty_slot = find_empty_shared_memory_slot_above(common_slot,
|
var_size);
|
var_size);
|
if(empty_slot == 0)
|
if(empty_slot == 0)
|
LIBCAF_TRACE(LIBCAF_LOG_FATAL,
|
LIBCAF_TRACE(LIBCAF_LOG_FATAL,
|
"No More Shared Memory Space available for allocatable coarray.\n"
|
"No More Shared Memory Space available for allocatable coarray.\n"
|
"Set env variable UHCAF_IMAGE_HEAP_SIZE or cafrun option "
|
"Set env variable UHCAF_IMAGE_HEAP_SIZE or cafrun option "
|
"for more space");
|
"for more space");
|
|
|
LIBCAF_TRACE(LIBCAF_LOG_MEMORY,"caf_rtl.c:coarray_coarray_allocate"
|
LIBCAF_TRACE(LIBCAF_LOG_MEMORY,"caf_rtl.c:coarray_coarray_allocate"
|
"-> Found empty slot %p. About to split it from top."
|
"-> Found empty slot %p. About to split it from top."
|
,empty_slot->addr);
|
,empty_slot->addr);
|
|
|
comm_barrier_all(); // implicit barrier in case of allocatable.
|
comm_barrier_all(); // implicit barrier in case of allocatable.
|
if ( empty_slot!=common_slot && empty_slot->size == var_size )
|
if ( empty_slot!=common_slot && empty_slot->size == var_size )
|
{
|
{
|
empty_slot->feb=1;
|
empty_slot->feb=1;
|
return empty_slot->addr;
|
return empty_slot->addr;
|
}
|
}
|
return split_empty_shared_memory_slot_from_top(empty_slot, var_size);
|
return split_empty_shared_memory_slot_from_top(empty_slot, var_size);
|
}
|
}
|
|
|
/* Memory allocation function for asymmetric data. It is invoked
|
/* Memory allocation function for asymmetric data. It is invoked
|
* from fortran allocation function _ALLOCATE in
|
* from fortran allocation function _ALLOCATE in
|
* osprey/libf/fort/allocation.c
|
* osprey/libf/fort/allocation.c
|
* It finds empty slot from the shared memory list (common_slot & below)
|
* It finds empty slot from the shared memory list (common_slot & below)
|
* and then splits the slot from bottom */
|
* and then splits the slot from bottom */
|
void* coarray_asymmetric_allocate_(unsigned long var_size)
|
void* coarray_asymmetric_allocate_(unsigned long var_size)
|
{
|
{
|
struct shared_memory_slot *empty_slot;
|
struct shared_memory_slot *empty_slot;
|
empty_slot = find_empty_shared_memory_slot_below(common_slot,
|
empty_slot = find_empty_shared_memory_slot_below(common_slot,
|
var_size);
|
var_size);
|
if(empty_slot == 0)
|
if(empty_slot == 0)
|
LIBCAF_TRACE(LIBCAF_LOG_FATAL,
|
LIBCAF_TRACE(LIBCAF_LOG_FATAL,
|
"No More Shared Memory Space available for asymmetric data.\n"
|
"No More Shared Memory Space available for asymmetric data.\n"
|
"Set env variable UHCAF_IMAGE_HEAP_SIZE or cafrun option "
|
"Set env variable UHCAF_IMAGE_HEAP_SIZE or cafrun option "
|
"for more space");
|
"for more space");
|
|
|
LIBCAF_TRACE(LIBCAF_LOG_MEMORY,"caf_rtl.c:coarray_asymmetric_allocate"
|
LIBCAF_TRACE(LIBCAF_LOG_MEMORY,"caf_rtl.c:coarray_asymmetric_allocate"
|
"-> Found empty slot %p. About to split it from bottom. "
|
"-> Found empty slot %p. About to split it from bottom. "
|
, empty_slot->addr);
|
, empty_slot->addr);
|
|
|
if ( empty_slot!=common_slot && empty_slot->size == var_size )
|
if ( empty_slot!=common_slot && empty_slot->size == var_size )
|
{
|
{
|
empty_slot->feb=1;
|
empty_slot->feb=1;
|
return empty_slot->addr;
|
return empty_slot->addr;
|
}
|
}
|
return split_empty_shared_memory_slot_from_bottom(empty_slot,
|
return split_empty_shared_memory_slot_from_bottom(empty_slot,
|
var_size);
|
var_size);
|
}
|
}
|
|
|
/* Static function called from coarray_deallocate.
|
/* Static function called from coarray_deallocate.
|
* It finds the slot with the address (passed in param) by searching
|
* It finds the slot with the address (passed in param) by searching
|
* the shared memory link-list starting from the slot(passed in param)
|
* the shared memory link-list starting from the slot(passed in param)
|
* and above it. Used for finding slots containing allocatable coarrays*/
|
* and above it. Used for finding slots containing allocatable coarrays*/
|
static struct shared_memory_slot* find_shared_memory_slot_above
|
static struct shared_memory_slot* find_shared_memory_slot_above
|
(struct shared_memory_slot *slot, void *address)
|
(struct shared_memory_slot *slot, void *address)
|
{
|
{
|
while (slot)
|
while (slot)
|
{
|
{
|
if(slot->feb==1 && slot->addr==address)
|
if(slot->feb==1 && slot->addr==address)
|
return slot;
|
return slot;
|
slot=slot->prev;
|
slot=slot->prev;
|
}
|
}
|
return 0;
|
return 0;
|
}
|
}
|
|
|
/* Static function called from coarray_deallocate.
|
/* Static function called from coarray_deallocate.
|
* It finds the slot with the address (passed in param) by searching
|
* It finds the slot with the address (passed in param) by searching
|
* the shared memory link-list starting from the slot(passed in param)
|
* the shared memory link-list starting from the slot(passed in param)
|
* and below it. Used for finding slots containing asymmetric data*/
|
* and below it. Used for finding slots containing asymmetric data*/
|
static struct shared_memory_slot* find_shared_memory_slot_below
|
static struct shared_memory_slot* find_shared_memory_slot_below
|
(struct shared_memory_slot *slot, void *address)
|
(struct shared_memory_slot *slot, void *address)
|
{
|
{
|
while (slot)
|
while (slot)
|
{
|
{
|
if(slot->feb==1 && slot->addr==address)
|
if(slot->feb==1 && slot->addr==address)
|
return slot;
|
return slot;
|
slot=slot->next;
|
slot=slot->next;
|
}
|
}
|
return 0;
|
return 0;
|
}
|
}
|
|
|
/* Static function called from empty_shared_memory_slot (used in dealloc).
|
/* Static function called from empty_shared_memory_slot (used in dealloc).
|
* Merge slot with the slot above & below it. If any of these slots is the
|
* Merge slot with the slot above & below it. If any of these slots is the
|
* common-slot, the common-slot points to the merged slot */
|
* common-slot, the common-slot points to the merged slot */
|
static void join_3_shared_memory_slots(struct shared_memory_slot *slot)
|
static void join_3_shared_memory_slots(struct shared_memory_slot *slot)
|
{
|
{
|
slot->prev->size = slot->prev->size + slot->size + slot->next->size;
|
slot->prev->size = slot->prev->size + slot->size + slot->next->size;
|
slot->prev->next = slot->next->next;
|
slot->prev->next = slot->next->next;
|
if(slot->next->next)
|
if(slot->next->next)
|
slot->next->next->prev = slot->prev;
|
slot->next->next->prev = slot->prev;
|
if(common_slot == slot || common_slot == slot->next)
|
if(common_slot == slot || common_slot == slot->next)
|
common_slot=slot->prev;
|
common_slot=slot->prev;
|
comm_free(slot->next);
|
comm_free(slot->next);
|
comm_free(slot);
|
comm_free(slot);
|
}
|
}
|
|
|
/* Static function called from empty_shared_memory_slot (used in dealloc).
|
/* Static function called from empty_shared_memory_slot (used in dealloc).
|
* Merge slot with the slot above it. If any of these slots is the
|
* Merge slot with the slot above it. If any of these slots is the
|
* common-slot, the common-slot points to the merged slot */
|
* common-slot, the common-slot points to the merged slot */
|
static void join_with_prev_shared_memory_slot
|
static void join_with_prev_shared_memory_slot
|
(struct shared_memory_slot *slot)
|
(struct shared_memory_slot *slot)
|
{
|
{
|
slot->prev->size += slot->size;
|
slot->prev->size += slot->size;
|
slot->prev->next = slot->next;
|
slot->prev->next = slot->next;
|
if(slot->next)
|
if(slot->next)
|
slot->next->prev = slot->prev;
|
slot->next->prev = slot->prev;
|
if(common_slot == slot)
|
if(common_slot == slot)
|
common_slot = slot->prev;
|
common_slot = slot->prev;
|
comm_free(slot);
|
comm_free(slot);
|
}
|
}
|
|
|
/* Static function called from empty_shared_memory_slot (used in dealloc).
|
/* Static function called from empty_shared_memory_slot (used in dealloc).
|
* Merge slot with the slot below it. If any of these slots is the
|
* Merge slot with the slot below it. If any of these slots is the
|
* common-slot, the common-slot points to the merged slot */
|
* common-slot, the common-slot points to the merged slot */
|
static void join_with_next_shared_memory_slot
|
static void join_with_next_shared_memory_slot
|
(struct shared_memory_slot *slot)
|
(struct shared_memory_slot *slot)
|
{
|
{
|
struct shared_memory_slot *tmp;
|
struct shared_memory_slot *tmp;
|
tmp = slot->next;
|
tmp = slot->next;
|
slot->size += slot->next->size;
|
slot->size += slot->next->size;
|
if(slot->next->next)
|
if(slot->next->next)
|
slot->next->next->prev = slot;
|
slot->next->next->prev = slot;
|
slot->next = slot->next->next;
|
slot->next = slot->next->next;
|
if(common_slot == tmp)
|
if(common_slot == tmp)
|
common_slot = slot;
|
common_slot = slot;
|
comm_free(tmp);
|
comm_free(tmp);
|
}
|
}
|
|
|
/* Static function called from coarray_deallocate.
|
/* Static function called from coarray_deallocate.
|
* Empties the slot passed in parameter:
|
* Empties the slot passed in parameter:
|
* 1) set full-empty-bit to 0
|
* 1) set full-empty-bit to 0
|
* 2) merge the slot with neighboring empty slots (if found) */
|
* 2) merge the slot with neighboring empty slots (if found) */
|
static void empty_shared_memory_slot(struct shared_memory_slot *slot)
|
static void empty_shared_memory_slot(struct shared_memory_slot *slot)
|
{
|
{
|
slot->feb=0;
|
slot->feb=0;
|
if(slot->prev && (slot->prev->feb==0) && slot->next
|
if(slot->prev && (slot->prev->feb==0) && slot->next
|
&& (slot->next->feb==0) )
|
&& (slot->next->feb==0) )
|
join_3_shared_memory_slots(slot);
|
join_3_shared_memory_slots(slot);
|
else if (slot->prev && (slot->prev->feb==0))
|
else if (slot->prev && (slot->prev->feb==0))
|
join_with_prev_shared_memory_slot(slot);
|
join_with_prev_shared_memory_slot(slot);
|
else if (slot->next && (slot->next->feb==0))
|
else if (slot->next && (slot->next->feb==0))
|
join_with_next_shared_memory_slot(slot);
|
join_with_next_shared_memory_slot(slot);
|
}
|
}
|
|
|
/* Memory deallocation function for allocatable coarrays and asymmetric
|
/* Memory deallocation function for allocatable coarrays and asymmetric
|
* data. It is invoked from fortran allocation function _DEALLOCATE in
|
* data. It is invoked from fortran allocation function _DEALLOCATE in
|
* osprey/libf/fort/allocation.c
|
* osprey/libf/fort/allocation.c
|
* It finds the slot from the shared memory list, set full-empty-bit to 0,
|
* It finds the slot from the shared memory list, set full-empty-bit to 0,
|
* and then merge the slot with neighboring empty slots (if found)
|
* and then merge the slot with neighboring empty slots (if found)
|
* Note: there is implicit barrier for allocatable coarrays*/
|
* Note: there is implicit barrier for allocatable coarrays*/
|
void coarray_deallocate_(void *var_address)
|
void coarray_deallocate_(void *var_address)
|
{
|
{
|
struct shared_memory_slot *slot;
|
struct shared_memory_slot *slot;
|
slot = find_shared_memory_slot_above(common_slot, var_address);
|
slot = find_shared_memory_slot_above(common_slot, var_address);
|
if (slot)
|
if (slot)
|
comm_barrier_all(); //implicit barrier for allocatable
|
comm_barrier_all(); //implicit barrier for allocatable
|
else
|
else
|
slot = find_shared_memory_slot_below(common_slot, var_address);
|
slot = find_shared_memory_slot_below(common_slot, var_address);
|
if (slot == 0)
|
if (slot == 0)
|
{
|
{
|
LIBCAF_TRACE(LIBCAF_LOG_NOTICE,
|
LIBCAF_TRACE(LIBCAF_LOG_NOTICE,
|
"caf_rtl.c:coarray_deallocate_->Address%p not coarray."
|
"caf_rtl.c:coarray_deallocate_->Address%p not coarray."
|
,var_address);
|
,var_address);
|
return;
|
return;
|
}
|
}
|
LIBCAF_TRACE(LIBCAF_LOG_MEMORY,
|
LIBCAF_TRACE(LIBCAF_LOG_MEMORY,
|
"caf_rtl.c:coarray_deallocate_->before deallocating %p.", var_address);
|
"caf_rtl.c:coarray_deallocate_->before deallocating %p.", var_address);
|
empty_shared_memory_slot(slot);
|
empty_shared_memory_slot(slot);
|
|
|
}
|
}
|
|
|
/* Static function called from coarray_free_all_shared_memory_slots()
|
/* Static function called from coarray_free_all_shared_memory_slots()
|
* during exit from program.
|
* during exit from program.
|
* It recursively frees slots in the shared memory link-list starting
|
* It recursively frees slots in the shared memory link-list starting
|
* from slot passed in param and all slots above(previous) it. */
|
* from slot passed in param and all slots above(previous) it. */
|
static void free_prev_slots_recursively( struct shared_memory_slot *slot )
|
static void free_prev_slots_recursively( struct shared_memory_slot *slot )
|
{
|
{
|
if(slot)
|
if(slot)
|
{
|
{
|
free_prev_slots_recursively(slot->prev);
|
free_prev_slots_recursively(slot->prev);
|
comm_free(slot);
|
comm_free(slot);
|
}
|
}
|
}
|
}
|
|
|
/* Static function called from coarray_free_all_shared_memory_slots()
|
/* Static function called from coarray_free_all_shared_memory_slots()
|
* during exit from program.
|
* during exit from program.
|
* It recursively frees slots in the shared memory link-list starting
|
* It recursively frees slots in the shared memory link-list starting
|
* from slot passed in param and all slots below(next) it. */
|
* from slot passed in param and all slots below(next) it. */
|
static void free_next_slots_recursively( struct shared_memory_slot *slot )
|
static void free_next_slots_recursively( struct shared_memory_slot *slot )
|
{
|
{
|
if(slot)
|
if(slot)
|
{
|
{
|
free_next_slots_recursively(slot->next);
|
free_next_slots_recursively(slot->next);
|
comm_free(slot);
|
comm_free(slot);
|
}
|
}
|
}
|
}
|
|
|
/* Function to delete the shared memory link-list.
|
/* Function to delete the shared memory link-list.
|
* Called during exit from comm_exit in armci_comm_layer.c or
|
* Called during exit from comm_exit in armci_comm_layer.c or
|
* gasnet_comm_layer.c.
|
* gasnet_comm_layer.c.
|
*/
|
*/
|
void coarray_free_all_shared_memory_slots()
|
void coarray_free_all_shared_memory_slots()
|
{
|
{
|
free_prev_slots_recursively(common_slot->prev);
|
free_prev_slots_recursively(common_slot->prev);
|
free_next_slots_recursively(common_slot);
|
free_next_slots_recursively(common_slot);
|
}
|
}
|
|
|
/* end shared memory management functions*/
|
/* end shared memory management functions*/
|
|
|
void caf_exit_(int status)
|
void caf_exit_(int status)
|
{
|
{
|
LIBCAF_TRACE(LIBCAF_LOG_DEBUG,
|
LIBCAF_TRACE(LIBCAF_LOG_DEBUG,
|
"caf_rtl.c:caf_exit_->Exiting with error code %d",status);
|
"caf_rtl.c:caf_exit_->Exiting with error code %d",status);
|
comm_exit(status);
|
comm_exit(status);
|
}
|
}
|
|
|
void caf_finalize_()
|
void caf_finalize_()
|
{
|
{
|
LIBCAF_TRACE( LIBCAF_LOG_TIME_SUMMARY, "Accumulated Time:");
|
LIBCAF_TRACE( LIBCAF_LOG_TIME_SUMMARY, "Accumulated Time:");
|
LIBCAF_TRACE(LIBCAF_LOG_DEBUG,
|
LIBCAF_TRACE(LIBCAF_LOG_DEBUG,
|
"caf_rtl.c:caf_finalize_->Before call to comm_finalize");
|
"caf_rtl.c:caf_finalize_->Before call to comm_finalize");
|
comm_finalize();
|
comm_finalize();
|
}
|
}
|
|
|
void acquire_lcb_(unsigned long buf_size, void **ptr)
|
void acquire_lcb_(unsigned long buf_size, void **ptr)
|
{
|
{
|
*ptr = comm_malloc(buf_size);
|
*ptr = comm_malloc(buf_size);
|
LIBCAF_TRACE( LIBCAF_LOG_DEBUG, "caf_rtl.c:acquire_lcb->"
|
LIBCAF_TRACE( LIBCAF_LOG_DEBUG, "caf_rtl.c:acquire_lcb->"
|
" acquired lcb %p of size %lu", *ptr, buf_size);
|
" acquired lcb %p of size %lu", *ptr, buf_size);
|
}
|
}
|
|
|
void release_lcb_(void **ptr)
|
void release_lcb_(void **ptr)
|
{
|
{
|
comm_free_lcb(*ptr);
|
comm_free_lcb(*ptr);
|
LIBCAF_TRACE( LIBCAF_LOG_DEBUG, "caf_rtl.c:release_lcb_->"
|
LIBCAF_TRACE( LIBCAF_LOG_DEBUG, "caf_rtl.c:release_lcb_->"
|
"freed lcb %p", *ptr);
|
"freed lcb %p", *ptr);
|
}
|
}
|
|
|
void sync_all_()
|
void sync_all_()
|
{
|
{
|
LIBCAF_TRACE( LIBCAF_LOG_BARRIER, "caf_rtl.c:sync_all_->"
|
LIBCAF_TRACE( LIBCAF_LOG_BARRIER, "caf_rtl.c:sync_all_->"
|
"before call to comm_barrier_all");
|
"before call to comm_barrier_all");
|
START_TIMER();
|
START_TIMER();
|
comm_barrier_all();
|
comm_barrier_all();
|
STOP_TIMER(SYNC);
|
STOP_TIMER(SYNC);
|
LIBCAF_TRACE( LIBCAF_LOG_TIME, "comm_sync_all ");
|
LIBCAF_TRACE( LIBCAF_LOG_TIME, "comm_sync_all ");
|
|
|
}
|
}
|
|
|
/*************CRITICAL SUPPORT **************/
|
/*************CRITICAL SUPPORT **************/
|
|
|
void caf_critical_()
|
void caf_critical_()
|
{
|
{
|
comm_critical();
|
comm_critical();
|
}
|
}
|
|
|
void caf_end_critical_()
|
void caf_end_critical_()
|
{
|
{
|
comm_end_critical();
|
comm_end_critical();
|
}
|
}
|
|
|
|
|
|
|
/*************END CRITICAL SUPPORT **************/
|
/*************END CRITICAL SUPPORT **************/
|
|
|
|
|
void sync_memory_()
|
void sync_memory_()
|
{
|
{
|
LIBCAF_TRACE( LIBCAF_LOG_BARRIER, "caf_rtl.c:sync_memory->"
|
LIBCAF_TRACE( LIBCAF_LOG_BARRIER, "caf_rtl.c:sync_memory->"
|
"in sync memory");
|
"in sync memory");
|
}
|
}
|
|
|
void sync_images_( int *imageList, int imageCount)
|
void sync_images_( int *imageList, int imageCount)
|
{
|
{
|
int i;
|
int i;
|
for ( i=0; i<imageCount ; i++)
|
for ( i=0; i<imageCount ; i++)
|
{
|
{
|
LIBCAF_TRACE( LIBCAF_LOG_BARRIER,"caf_rtl.c:sync_images_->Before"
|
LIBCAF_TRACE( LIBCAF_LOG_BARRIER,"caf_rtl.c:sync_images_->Before"
|
" call to comm_syncimages for sync with img%d",imageList[i]);
|
" call to comm_syncimages for sync with img%d",imageList[i]);
|
imageList[i]--;
|
imageList[i]--;
|
}
|
}
|
START_TIMER();
|
START_TIMER();
|
comm_sync_images(imageList,imageCount);
|
comm_sync_images(imageList,imageCount);
|
STOP_TIMER(SYNC);
|
STOP_TIMER(SYNC);
|
LIBCAF_TRACE( LIBCAF_LOG_TIME, "comm_sync_images ");
|
LIBCAF_TRACE( LIBCAF_LOG_TIME, "comm_sync_images ");
|
}
|
}
|
|
|
void sync_images_all_()
|
void sync_images_all_()
|
{
|
{
|
int i;
|
int i;
|
int imageCount=_num_images;
|
int imageCount=_num_images;
|
int *imageList;
|
int *imageList;
|
LIBCAF_TRACE( LIBCAF_LOG_BARRIER, "caf_rtl.c:sync_images_all_->"
|
LIBCAF_TRACE( LIBCAF_LOG_BARRIER, "caf_rtl.c:sync_images_all_->"
|
"before call to comm_sync_images for sync with all images");
|
"before call to comm_sync_images for sync with all images");
|
imageList = (int *)comm_malloc(_num_images*sizeof(int));
|
imageList = (int *)comm_malloc(_num_images*sizeof(int));
|
for (i=0; i<imageCount ; i++)
|
for (i=0; i<imageCount ; i++)
|
imageList[i]=i;
|
imageList[i]=i;
|
START_TIMER();
|
START_TIMER();
|
comm_sync_images(imageList,imageCount);
|
comm_sync_images(imageList,imageCount);
|
STOP_TIMER(SYNC);
|
STOP_TIMER(SYNC);
|
LIBCAF_TRACE( LIBCAF_LOG_TIME, "comm_sync_image_all ");
|
LIBCAF_TRACE( LIBCAF_LOG_TIME, "comm_sync_image_all ");
|
|
|
comm_free(imageList);
|
comm_free(imageList);
|
}
|
}
|
|
|
int image_index_(DopeVectorType *diminfo, DopeVectorType *sub)
|
int image_index_(DopeVectorType *diminfo, DopeVectorType *sub)
|
{
|
{
|
if ( diminfo == NULL || sub == NULL )
|
if ( diminfo == NULL || sub == NULL )
|
{
|
{
|
LIBCAF_TRACE(LIBCAF_LOG_FATAL,
|
LIBCAF_TRACE(LIBCAF_LOG_FATAL,
|
"caf_rtl.c:image_index_-> image_index failed for "
|
"caf_rtl.c:image_index_-> image_index failed for "
|
"&diminfo=%p, &codim=%p", diminfo,sub);
|
"&diminfo=%p, &codim=%p", diminfo,sub);
|
}
|
}
|
|
|
int i;
|
int i;
|
int rank = diminfo->n_dim;
|
int rank = diminfo->n_dim;
|
int corank = diminfo->n_codim;
|
int corank = diminfo->n_codim;
|
int image = 0;
|
int image = 0;
|
int lb_codim, ub_codim;
|
int lb_codim, ub_codim;
|
int *codim = (int *)sub->base_addr.a.ptr;
|
int *codim = (int *)sub->base_addr.a.ptr;
|
int str_m = 1;
|
int str_m = 1;
|
|
|
|
|
LIBCAF_TRACE(LIBCAF_LOG_DEBUG,
|
LIBCAF_TRACE(LIBCAF_LOG_DEBUG,
|
"caf_rtl.c:image_index_->rank: %d, corank %d", rank, corank);
|
"caf_rtl.c:image_index_->rank: %d, corank %d", rank, corank);
|
if (sub->dimension[0].extent != corank)
|
if (sub->dimension[0].extent != corank)
|
return 0;
|
return 0;
|
|
|
for (i = 0; i < corank; i++) {
|
for (i = 0; i < corank; i++) {
|
int extent;
|
int extent;
|
str_m = diminfo->dimension[rank+i].stride_mult;
|
str_m = diminfo->dimension[rank+i].stride_mult;
|
if (i == (corank-1))
|
if (i == (corank-1))
|
extent = (_num_images-1) / str_m + 1;
|
extent = (_num_images-1) / str_m + 1;
|
else
|
else
|
extent = diminfo->dimension[rank+i].extent;
|
extent = diminfo->dimension[rank+i].extent;
|
lb_codim = diminfo->dimension[rank+i].low_bound;
|
lb_codim = diminfo->dimension[rank+i].low_bound;
|
ub_codim = diminfo->dimension[rank+i].low_bound +
|
ub_codim = diminfo->dimension[rank+i].low_bound +
|
extent - 1;
|
extent - 1;
|
if (codim[i]>=lb_codim && (ub_codim==0 || codim[i]<=ub_codim)) {
|
if (codim[i]>=lb_codim && (ub_codim==0 || codim[i]<=ub_codim)) {
|
image += str_m * (codim[i] - lb_codim);
|
image += str_m * (codim[i] - lb_codim);
|
}
|
}
|
else{
|
else{
|
return 0;
|
return 0;
|
}
|
}
|
}
|
}
|
|
|
if( _num_images > image )
|
if( _num_images > image )
|
return image+1;
|
return image+1;
|
else
|
else
|
return 0;
|
return 0;
|
}
|
}
|
|
|
int this_image3_(DopeVectorType *diminfo, int* sub)
|
int this_image3_(DopeVectorType *diminfo, int* sub)
|
{
|
{
|
int img = _this_image-1;
|
int img = _this_image-1;
|
int rank = diminfo->n_dim;
|
int rank = diminfo->n_dim;
|
int corank = diminfo->n_codim;
|
int corank = diminfo->n_codim;
|
int dim = *sub;
|
int dim = *sub;
|
int str_m = 1;
|
int str_m = 1;
|
int lb_codim=0;
|
int lb_codim=0;
|
int ub_codim=0;
|
int ub_codim=0;
|
int extent,i;
|
int extent,i;
|
|
|
if ( diminfo == NULL )
|
if ( diminfo == NULL )
|
{
|
{
|
LIBCAF_TRACE(LIBCAF_LOG_FATAL,
|
LIBCAF_TRACE(LIBCAF_LOG_FATAL,
|
"caf_rtl.c:this_image3_ ->this_image failed for &diminfo=%p",
|
"caf_rtl.c:this_image3_ ->this_image failed for &diminfo=%p",
|
diminfo);
|
diminfo);
|
}
|
}
|
if(dim < 1 || dim > corank)
|
if(dim < 1 || dim > corank)
|
{
|
{
|
LIBCAF_TRACE(LIBCAF_LOG_FATAL,
|
LIBCAF_TRACE(LIBCAF_LOG_FATAL,
|
"caf_rtl.c:this_image3_->this_image failed as %d dim"
|
"caf_rtl.c:this_image3_->this_image failed as %d dim"
|
" is not present", dim);
|
" is not present", dim);
|
}
|
}
|
|
|
lb_codim = diminfo->dimension[rank+dim-1].low_bound;
|
lb_codim = diminfo->dimension[rank+dim-1].low_bound;
|
str_m = diminfo->dimension[rank+dim-1].stride_mult;
|
str_m = diminfo->dimension[rank+dim-1].stride_mult;
|
if (dim == corank)
|
if (dim == corank)
|
extent = (_num_images-1) / str_m + 1;
|
extent = (_num_images-1) / str_m + 1;
|
else
|
else
|
extent = diminfo->dimension[rank+dim-1].extent;
|
extent = diminfo->dimension[rank+dim-1].extent;
|
ub_codim = lb_codim + extent - 1;
|
ub_codim = lb_codim + extent - 1;
|
if(ub_codim > 0){
|
if(ub_codim > 0){
|
return (((img/str_m)%extent)+lb_codim);
|
return (((img/str_m)%extent)+lb_codim);
|
}
|
}
|
else{
|
else{
|
return ((img/str_m)+lb_codim);
|
return ((img/str_m)+lb_codim);
|
}
|
}
|
}
|
}
|
|
|
void this_image2_(DopeVectorType *ret, DopeVectorType *diminfo)
|
void this_image2_(DopeVectorType *ret, DopeVectorType *diminfo)
|
{
|
{
|
int i;
|
int i;
|
int corank = diminfo->n_codim;
|
int corank = diminfo->n_codim;
|
int *ret_int;
|
int *ret_int;
|
if ( diminfo == NULL || ret==NULL)
|
if ( diminfo == NULL || ret==NULL)
|
{
|
{
|
LIBCAF_TRACE(LIBCAF_LOG_FATAL,
|
LIBCAF_TRACE(LIBCAF_LOG_FATAL,
|
"caf_rtl.c:this_image2_ ->this_image failed for "
|
"caf_rtl.c:this_image2_ ->this_image failed for "
|
"&diminfo:%p and &ret:%p",diminfo, ret);
|
"&diminfo:%p and &ret:%p",diminfo, ret);
|
}
|
}
|
ret->base_addr.a.ptr = comm_malloc(sizeof(int)*corank);
|
ret->base_addr.a.ptr = comm_malloc(sizeof(int)*corank);
|
ret->dimension[0].low_bound = 1;
|
ret->dimension[0].low_bound = 1;
|
ret->dimension[0].extent = corank;
|
ret->dimension[0].extent = corank;
|
ret->dimension[0].stride_mult = 1;
|
ret->dimension[0].stride_mult = 1;
|
ret_int = (int*)ret->base_addr.a.ptr;
|
ret_int = (int*)ret->base_addr.a.ptr;
|
for (i=1; i<=corank; i++)
|
for (i=1; i<=corank; i++)
|
{
|
{
|
ret_int[i-1] = this_image3_(diminfo, &i);
|
ret_int[i-1] = this_image3_(diminfo, &i);
|
}
|
}
|
}
|
}
|
|
|
int lcobound2_(DopeVectorType *diminfo, int *sub)
|
int lcobound2_(DopeVectorType *diminfo, int *sub)
|
{
|
{
|
int rank = diminfo->n_dim;
|
int rank = diminfo->n_dim;
|
int corank = diminfo->n_codim;
|
int corank = diminfo->n_codim;
|
int dim=*sub;
|
int dim=*sub;
|
if ( diminfo == NULL )
|
if ( diminfo == NULL )
|
{
|
{
|
LIBCAF_TRACE(LIBCAF_LOG_FATAL,
|
LIBCAF_TRACE(LIBCAF_LOG_FATAL,
|
"caf_rtl.c:lcobound2 ->lcobound failed for &diminfo:%p",
|
"caf_rtl.c:lcobound2 ->lcobound failed for &diminfo:%p",
|
diminfo);
|
diminfo);
|
}
|
}
|
if(dim < 1 || dim > corank)
|
if(dim < 1 || dim > corank)
|
{
|
{
|
LIBCAF_TRACE(LIBCAF_LOG_FATAL,
|
LIBCAF_TRACE(LIBCAF_LOG_FATAL,
|
"caf_rtl.c:lcobound2 ->lcobound failed as dim %d not present",
|
"caf_rtl.c:lcobound2 ->lcobound failed as dim %d not present",
|
dim);
|
dim);
|
}
|
}
|
return diminfo->dimension[rank+dim-1].low_bound;
|
return diminfo->dimension[rank+dim-1].low_bound;
|
}
|
}
|
|
|
void lcobound_(DopeVectorType *ret, DopeVectorType *diminfo)
|
void lcobound_(DopeVectorType *ret, DopeVectorType *diminfo)
|
{
|
{
|
int i;
|
int i;
|
int rank = diminfo->n_dim;
|
int rank = diminfo->n_dim;
|
int corank = diminfo->n_codim;
|
int corank = diminfo->n_codim;
|
int *ret_int;
|
int *ret_int;
|
if ( diminfo == NULL || ret==NULL)
|
if ( diminfo == NULL || ret==NULL)
|
{
|
{
|
LIBCAF_TRACE(LIBCAF_LOG_FATAL,
|
LIBCAF_TRACE(LIBCAF_LOG_FATAL,
|
"caf_rtl.c:lcobound ->lcobound failed for diminfo:%p and ret:%p",
|
"caf_rtl.c:lcobound ->lcobound failed for diminfo:%p and ret:%p",
|
diminfo, ret);
|
diminfo, ret);
|
}
|
}
|
ret->base_addr.a.ptr = comm_malloc(sizeof(int)*corank);
|
ret->base_addr.a.ptr = comm_malloc(sizeof(int)*corank);
|
ret->dimension[0].low_bound = 1;
|
ret->dimension[0].low_bound = 1;
|
ret->dimension[0].extent = corank;
|
ret->dimension[0].extent = corank;
|
ret->dimension[0].stride_mult = 1;
|
ret->dimension[0].stride_mult = 1;
|
ret_int = (int*)ret->base_addr.a.ptr;
|
ret_int = (int*)ret->base_addr.a.ptr;
|
for (i=0; i<corank; i++)
|
for (i=0; i<corank; i++)
|
{
|
{
|
ret_int[i] = diminfo->dimension[rank+i].low_bound;
|
ret_int[i] = diminfo->dimension[rank+i].low_bound;
|
}
|
}
|
}
|
}
|
|
|
int ucobound2_(DopeVectorType *diminfo, int *sub)
|
int ucobound2_(DopeVectorType *diminfo, int *sub)
|
{
|
{
|
int rank = diminfo->n_dim;
|
int rank = diminfo->n_dim;
|
int corank = diminfo->n_codim;
|
int corank = diminfo->n_codim;
|
int dim=*sub;
|
int dim=*sub;
|
int extent;
|
int extent;
|
if ( diminfo == NULL )
|
if ( diminfo == NULL )
|
{
|
{
|
LIBCAF_TRACE(LIBCAF_LOG_FATAL,
|
LIBCAF_TRACE(LIBCAF_LOG_FATAL,
|
"caf_rtl.c:ucobound2 ->ucobound failed for &diminfo:%p",diminfo);
|
"caf_rtl.c:ucobound2 ->ucobound failed for &diminfo:%p",diminfo);
|
}
|
}
|
if(dim < 1 || dim > corank)
|
if(dim < 1 || dim > corank)
|
{
|
{
|
LIBCAF_TRACE(LIBCAF_LOG_FATAL,
|
LIBCAF_TRACE(LIBCAF_LOG_FATAL,
|
"caf_rtl.c:ucobound2 ->ucobound failed as dim %d not present",dim);
|
"caf_rtl.c:ucobound2 ->ucobound failed as dim %d not present",dim);
|
}
|
}
|
|
|
if (dim == corank)
|
if (dim == corank)
|
extent = (_num_images-1) /
|
extent = (_num_images-1) /
|
diminfo->dimension[rank+dim-1].stride_mult + 1;
|
diminfo->dimension[rank+dim-1].stride_mult + 1;
|
else
|
else
|
extent = diminfo->dimension[rank+dim-1].extent;
|
extent = diminfo->dimension[rank+dim-1].extent;
|
|
|
return (diminfo->dimension[rank+dim-1].low_bound +
|
return (diminfo->dimension[rank+dim-1].low_bound +
|
extent - 1);
|
extent - 1);
|
}
|
}
|
|
|
void ucobound_(DopeVectorType *ret, DopeVectorType *diminfo)
|
void ucobound_(DopeVectorType *ret, DopeVectorType *diminfo)
|
{
|
{
|
int i;
|
int i;
|
int rank = diminfo->n_dim;
|
int rank = diminfo->n_dim;
|
int corank = diminfo->n_codim;
|
int corank = diminfo->n_codim;
|
int *ret_int;
|
int *ret_int;
|
int extent;
|
int extent;
|
if ( diminfo == NULL || ret==NULL)
|
if ( diminfo == NULL || ret==NULL)
|
{
|
{
|
LIBCAF_TRACE(LIBCAF_LOG_FATAL,
|
LIBCAF_TRACE(LIBCAF_LOG_FATAL,
|
"caf_rtl.c:ucobound ->ucobound failed for diminfo:%p and ret:%p",
|
"caf_rtl.c:ucobound ->ucobound failed for diminfo:%p and ret:%p",
|
diminfo, ret);
|
diminfo, ret);
|
}
|
}
|
ret->base_addr.a.ptr = comm_malloc(sizeof(int)*corank);
|
ret->base_addr.a.ptr = comm_malloc(sizeof(int)*corank);
|
ret->dimension[0].low_bound = 1;
|
ret->dimension[0].low_bound = 1;
|
ret->dimension[0].extent = corank;
|
ret->dimension[0].extent = corank;
|
ret->dimension[0].stride_mult = 1;
|
ret->dimension[0].stride_mult = 1;
|
ret_int = (int*)ret->base_addr.a.ptr;
|
ret_int = (int*)ret->base_addr.a.ptr;
|
for (i=0; i<corank; i++)
|
for (i=0; i<corank; i++)
|
{
|
{
|
if (i == (corank-1))
|
if (i == (corank-1))
|
extent = (_num_images-1) /
|
extent = (_num_images-1) /
|
diminfo->dimension[rank+i].stride_mult + 1;
|
diminfo->dimension[rank+i].stride_mult + 1;
|
else
|
else
|
extent = diminfo->dimension[rank+i].extent;
|
extent = diminfo->dimension[rank+i].extent;
|
|
|
ret_int[i] = diminfo->dimension[rank+i].low_bound +
|
ret_int[i] = diminfo->dimension[rank+i].low_bound +
|
extent - 1;
|
extent - 1;
|
}
|
}
|
}
|
}
|
|
|
void coarray_read_full_str_(void * src, void *dest, unsigned int src_ndim,
|
void coarray_read_full_str_(void * src, void *dest, unsigned int src_ndim,
|
unsigned long *src_str_mults, unsigned long *src_extents,
|
unsigned long *src_str_mults, unsigned long *src_extents,
|
unsigned long *src_strides,
|
unsigned long *src_strides,
|
unsigned int dest_ndim, unsigned long *dest_str_mults,
|
unsigned int dest_ndim, unsigned long *dest_str_mults,
|
unsigned long *dest_extents, unsigned long *dest_strides,
|
unsigned long *dest_extents, unsigned long *dest_strides,
|
unsigned long img)
|
unsigned long img)
|
{
|
{
|
int i, is_contig = 1;
|
int i, is_contig = 1;
|
|
|
for (i = 1; i < src_ndim; i++) {
|
for (i = 1; i < src_ndim; i++) {
|
if (src_str_mults[i] != (src_str_mults[i-1]*src_extents[i-1])) {
|
if (src_str_mults[i] != (src_str_mults[i-1]*src_extents[i-1])) {
|
is_contig = 0;
|
is_contig = 0;
|
break;
|
break;
|
}
|
}
|
}
|
}
|
|
|
if (is_contig) {
|
if (is_contig) {
|
for (i = 1; i < dest_ndim; i++) {
|
for (i = 1; i < dest_ndim; i++) {
|
if (dest_str_mults[i] != (dest_str_mults[i-1]*dest_extents[i-1])) {
|
if (dest_str_mults[i] != (dest_str_mults[i-1]*dest_extents[i-1])) {
|
is_contig = 0;
|
is_contig = 0;
|
break;
|
break;
|
}
|
}
|
}
|
}
|
}
|
}
|
|
|
if (src_strides || dest_strides)
|
if (src_strides || dest_strides)
|
is_contig = 0;
|
is_contig = 0;
|
|
|
if (is_contig) {
|
if (is_contig) {
|
unsigned long xfer_size = src_str_mults[0]*src_extents[0];
|
unsigned long xfer_size = src_str_mults[0]*src_extents[0];
|
for (i = 1; i < src_ndim; i++) {
|
for (i = 1; i < src_ndim; i++) {
|
xfer_size *= src_extents[i];
|
xfer_size *= src_extents[i];
|
}
|
}
|
if (DEBUG) {
|
if (DEBUG) {
|
unsigned long dest_xfer_size = dest_str_mults[0]*dest_extents[0];
|
unsigned long dest_xfer_size = dest_str_mults[0]*dest_extents[0];
|
for (i = 1; i < dest_ndim; i++) {
|
for (i = 1; i < dest_ndim; i++) {
|
dest_xfer_size *= dest_extents[i];
|
dest_xfer_size *= dest_extents[i];
|
}
|
}
|
if (dest_xfer_size != xfer_size) {
|
if (dest_xfer_size != xfer_size) {
|
LIBCAF_TRACE(LIBCAF_LOG_FATAL,
|
LIBCAF_TRACE(LIBCAF_LOG_FATAL,
|
"caf_rtl.c:coarray_read_full_str->dest and src xfer_size must"
|
"caf_rtl.c:coarray_read_full_str->dest and src xfer_size must"
|
" be same. xfer_size=%d, dest_xfer_size=%d",
|
" be same. xfer_size=%d, dest_xfer_size=%d",
|
xfer_size, dest_xfer_size);
|
xfer_size, dest_xfer_size);
|
}
|
}
|
}
|
}
|
coarray_read_(src, dest, xfer_size, img);
|
coarray_read_(src, dest, xfer_size, img);
|
return;
|
return;
|
/* not reached */
|
/* not reached */
|
}
|
}
|
|
|
START_TIMER();
|
START_TIMER();
|
if (src_strides != NULL || dest_strides != NULL) {
|
if (src_strides != NULL || dest_strides != NULL) {
|
comm_read_full_str2(src, dest, src_ndim, src_str_mults, src_extents,
|
comm_read_full_str2(src, dest, src_ndim, src_str_mults, src_extents,
|
src_strides,
|
src_strides,
|
dest_ndim, dest_str_mults, dest_extents,
|
dest_ndim, dest_str_mults, dest_extents,
|
dest_strides, img-1);
|
dest_strides, img-1);
|
} else {
|
} else {
|
comm_read_full_str(src, dest, src_ndim, src_str_mults, src_extents,
|
comm_read_full_str(src, dest, src_ndim, src_str_mults, src_extents,
|
dest_ndim, dest_str_mults, dest_extents, img-1);
|
dest_ndim, dest_str_mults, dest_extents, img-1);
|
}
|
}
|
STOP_TIMER(READ);
|
STOP_TIMER(READ);
|
LIBCAF_TRACE( LIBCAF_LOG_TIME, "comm_read_strided ");
|
LIBCAF_TRACE( LIBCAF_LOG_TIME, "comm_read_strided ");
|
|
|
LIBCAF_TRACE(LIBCAF_LOG_DEBUG,
|
LIBCAF_TRACE(LIBCAF_LOG_DEBUG,
|
"caf_rtl.c:coarray_read_full_str->Finished read(strided) "
|
"caf_rtl.c:coarray_read_full_str->Finished read(strided) "
|
"from %p on Img %lu to %p using dim %d ",
|
"from %p on Img %lu to %p using dim %d ",
|
src, img, dest, src_ndim);
|
src, img, dest, src_ndim);
|
}
|
}
|
|
|
|
|
void coarray_read_src_str_(void * src, void *dest, unsigned int ndim,
|
void coarray_read_src_str_(void * src, void *dest, unsigned int ndim,
|
unsigned long *src_str_mults, unsigned long *src_extents,
|
unsigned long *src_str_mults, unsigned long *src_extents,
|
unsigned long *src_strides,
|
unsigned long *src_strides,
|
unsigned long img)
|
unsigned long img)
|
{
|
{
|
int i, is_contig = 1;
|
int i, is_contig = 1;
|
|
|
/* runtime check if it is contiguous transfer */
|
/* runtime check if it is contiguous transfer */
|
for (i = 1; i < ndim; i++) {
|
for (i = 1; i < ndim; i++) {
|
if (src_str_mults[i] != (src_strides[i-1]*src_extents[i-1])) {
|
if (src_str_mults[i] != (src_str_mults[i-1]*src_extents[i-1])) {
|
is_contig = 0;
|
is_contig = 0;
|
break;
|
break;
|
}
|
}
|
}
|
}
|
|
|
if (src_strides)
|
if (src_strides)
|
is_contig = 0;
|
is_contig = 0;
|
|
|
if (is_contig) {
|
if (is_contig) {
|
unsigned long xfer_size = src_str_mults[0]*src_extents[0];
|
unsigned long xfer_size = src_str_mults[0]*src_extents[0];
|
for (i = 1; i < ndim; i++) {
|
for (i = 1; i < ndim; i++) {
|
xfer_size *= src_extents[i];
|
xfer_size *= src_extents[i];
|
}
|
}
|
coarray_read_(src, dest, xfer_size, img);
|
coarray_read_(src, dest, xfer_size, img);
|
return;
|
return;
|
/* not reached */
|
/* not reached */
|
}
|
}
|
|
|
START_TIMER();
|
START_TIMER();
|
if (src_strides != NULL) {
|
if (src_strides != NULL) {
|
comm_read_src_str2(src, dest, ndim, src_str_mults, src_extents,
|
comm_read_src_str2(src, dest, ndim, src_str_mults, src_extents,
|
src_strides, img-1);
|
src_strides, img-1);
|
} else {
|
} else {
|
comm_read_src_str(src, dest, ndim, src_str_mults, src_extents, img-1);
|
comm_read_src_str(src, dest, ndim, src_str_mults, src_extents, img-1);
|
}
|
}
|
STOP_TIMER(READ);
|
STOP_TIMER(READ);
|
LIBCAF_TRACE( LIBCAF_LOG_TIME, "comm_read_strided ");
|
LIBCAF_TRACE( LIBCAF_LOG_TIME, "comm_read_strided ");
|
|
|
LIBCAF_TRACE(LIBCAF_LOG_DEBUG,
|
LIBCAF_TRACE(LIBCAF_LOG_DEBUG,
|
"caf_rtl.c:coarray_read_src_str->Finished read(strided) "
|
"caf_rtl.c:coarray_read_src_str->Finished read(strided) "
|
"from %p on Img %lu to %p using dim %d ",
|
"from %p on Img %lu to %p using dim %d ",
|
src, img, dest, ndim);
|
src, img, dest, ndim);
|
}
|
}
|
|
|
void coarray_read_(void * src, void * dest, unsigned long xfer_size,
|
void coarray_read_(void * src, void * dest, unsigned long xfer_size,
|
unsigned long img)
|
unsigned long img)
|
{
|
{
|
START_TIMER();
|
START_TIMER();
|
comm_read(src, dest, xfer_size, img-1);//reads from src on img to dest
|
comm_read(src, dest, xfer_size, img-1);//reads from src on img to dest
|
STOP_TIMER(READ);
|
STOP_TIMER(READ);
|
LIBCAF_TRACE( LIBCAF_LOG_TIME, "comm_read ");
|
LIBCAF_TRACE( LIBCAF_LOG_TIME, "comm_read ");
|
|
|
LIBCAF_TRACE(LIBCAF_LOG_DEBUG,
|
LIBCAF_TRACE(LIBCAF_LOG_DEBUG,
|
"caf_rtl.c:coarray_read->Finished read from %p on Img %lu to %p"
|
"caf_rtl.c:coarray_read->Finished read from %p on Img %lu to %p"
|
" data of size %lu ", src, img, dest, xfer_size);
|
" data of size %lu ", src, img, dest, xfer_size);
|
}
|
}
|
|
|
void coarray_write_dest_str_(void * dest, void *src, unsigned int ndim,
|
void coarray_write_dest_str_(void * dest, void *src, unsigned int ndim,
|
unsigned long *dest_str_mults, unsigned long *dest_extents,
|
unsigned long *dest_str_mults, unsigned long *dest_extents,
|
unsigned long *dest_strides,
|
unsigned long *dest_strides,
|
unsigned long img)
|
unsigned long img)
|
{
|
{
|
int i, is_contig = 1;
|
int i, is_contig = 1;
|
|
|
/* runtime check if it is contiguous transfer */
|
/* runtime check if it is contiguous transfer */
|
for (i = 1; i < ndim; i++) {
|
for (i = 1; i < ndim; i++) {
|
if (dest_str_mults[i] != (dest_str_mults[i-1]*dest_extents[i-1])) {
|
if (dest_str_mults[i] != (dest_str_mults[i-1]*dest_extents[i-1])) {
|
is_contig = 0;
|
is_contig = 0;
|
break;
|
break;
|
}
|
}
|
}
|
}
|
|
|
if (dest_strides)
|
if (dest_strides)
|
is_contig = 0;
|
is_contig = 0;
|
|
|
if (is_contig) {
|
if (is_contig) {
|
unsigned long xfer_size = dest_str_mults[0]*dest_extents[0];
|
unsigned long xfer_size = dest_str_mults[0]*dest_extents[0];
|
for (i = 1; i < ndim; i++) {
|
for (i = 1; i < ndim; i++) {
|
xfer_size *= dest_extents[i];
|
xfer_size *= dest_extents[i];
|
}
|
}
|
coarray_write_(dest, src, xfer_size, img);
|
coarray_write_(dest, src, xfer_size, img);
|
return;
|
return;
|
/* not reached */
|
/* not reached */
|
}
|
}
|
|
|
START_TIMER();
|
START_TIMER();
|
if (dest_strides != NULL) {
|
if (dest_strides != NULL) {
|
comm_write_dest_str2(dest, src, ndim,dest_str_mults,dest_extents,
|
comm_write_dest_str2(dest, src, ndim,dest_str_mults,dest_extents,
|
dest_strides, img-1);
|
dest_strides, img-1);
|
} else {
|
} else {
|
comm_write_dest_str(dest, src, ndim,dest_str_mults,dest_extents,
|
comm_write_dest_str(dest, src, ndim,dest_str_mults,dest_extents,
|
img-1);
|
img-1);
|
}
|
}
|
STOP_TIMER(WRITE);
|
STOP_TIMER(WRITE);
|
LIBCAF_TRACE( LIBCAF_LOG_TIME, "comm_write_strided ");
|
LIBCAF_TRACE( LIBCAF_LOG_TIME, "comm_write_strided ");
|
|
|
LIBCAF_TRACE(LIBCAF_LOG_DEBUG,
|
LIBCAF_TRACE(LIBCAF_LOG_DEBUG,
|
"caf_rtl.c:coarray_write_dest_str->Finished write(strided) to %p"
|
"caf_rtl.c:coarray_write_dest_str->Finished write(strided) to %p"
|
" on Img %lu from %p using dim %d ", dest, img, src, ndim);
|
" on Img %lu from %p using dim %d ", dest, img, src, ndim);
|
}
|
}
|
|
|
void coarray_write_(void * dest, void * src, unsigned long xfer_size, unsigned long img)
|
void coarray_write_(void * dest, void * src, unsigned long xfer_size, unsigned long img)
|
{
|
{
|
START_TIMER();
|
START_TIMER();
|
comm_write(dest, src, xfer_size, img-1);//write to dest in img
|
comm_write(dest, src, xfer_size, img-1);//write to dest in img
|
STOP_TIMER(WRITE);
|
STOP_TIMER(WRITE);
|
LIBCAF_TRACE( LIBCAF_LOG_TIME, "comm_write ");
|
LIBCAF_TRACE( LIBCAF_LOG_TIME, "comm_write ");
|
|
|
LIBCAF_TRACE(LIBCAF_LOG_DEBUG,
|
LIBCAF_TRACE(LIBCAF_LOG_DEBUG,
|
"caf_rtl.c:coarray_write->Wrote to %p on Img %lu from %p data of"
|
"caf_rtl.c:coarray_write->Wrote to %p on Img %lu from %p data of"
|
" size %lu ", dest, img, src, xfer_size);
|
" size %lu ", dest, img, src, xfer_size);
|
}
|
}
|
|
|
void coarray_write_full_str_(void * dest, void *src,
|
void coarray_write_full_str_(void * dest, void *src,
|
unsigned int dest_ndim,
|
unsigned int dest_ndim,
|
unsigned long *dest_str_mults, unsigned long *dest_extents,
|
unsigned long *dest_str_mults, unsigned long *dest_extents,
|
unsigned long *dest_strides,
|
unsigned long *dest_strides,
|
unsigned int src_ndim,
|
unsigned int src_ndim,
|
unsigned long *src_str_mults, unsigned long *src_extents,
|
unsigned long *src_str_mults, unsigned long *src_extents,
|
unsigned long *src_strides,
|
unsigned long *src_strides,
|
unsigned long img)
|
unsigned long img)
|
{
|
{
|
int i, is_contig = 1;
|
int i, is_contig = 1;
|
|
|
for (i = 1; i < dest_ndim; i++) {
|
for (i = 1; i < dest_ndim; i++) {
|
if (dest_str_mults[i] != (dest_str_mults[i-1]*dest_extents[i-1])) {
|
if (dest_str_mults[i] != (dest_str_mults[i-1]*dest_extents[i-1])) {
|
is_contig = 0;
|
is_contig = 0;
|
break;
|
break;
|
}
|
}
|
}
|
}
|
|
|
if (is_contig) {
|
if (is_contig) {
|
for (i = 1; i < src_ndim; i++) {
|
for (i = 1; i < src_ndim; i++) {
|
if (src_str_mults[i] != (src_str_mults[i-1]*src_extents[i-1])) {
|
if (src_str_mults[i] != (src_str_mults[i-1]*src_extents[i-1])) {
|
is_contig = 0;
|
is_contig = 0;
|
break;
|
break;
|
}
|
}
|
}
|
}
|
}
|
}
|
|
|
if (src_strides || dest_strides)
|
if (src_strides || dest_strides)
|
is_contig = 0;
|
is_contig = 0;
|
|
|
if (is_contig) {
|
if (is_contig) {
|
unsigned long xfer_size = dest_str_mults[0]*dest_extents[0];
|
unsigned long xfer_size = dest_str_mults[0]*dest_extents[0];
|
for (i = 1; i < dest_ndim; i++) {
|
for (i = 1; i < dest_ndim; i++) {
|
xfer_size *= dest_extents[i];
|
xfer_size *= dest_extents[i];
|
}
|
}
|
if (DEBUG) {
|
if (DEBUG) {
|
unsigned long src_xfer_size = src_str_mults[0]*src_extents[0];
|
unsigned long src_xfer_size = src_str_mults[0]*src_extents[0];
|
for (i = 1; i < src_ndim; i++) {
|
for (i = 1; i < src_ndim; i++) {
|
src_xfer_size *= src_extents[i];
|
src_xfer_size *= src_extents[i];
|
}
|
}
|
if (src_xfer_size != xfer_size) {
|
if (src_xfer_size != xfer_size) {
|
LIBCAF_TRACE(LIBCAF_LOG_FATAL,
|
LIBCAF_TRACE(LIBCAF_LOG_FATAL,
|
"caf_rtl.c:coarray_write_full_str->dest and src xfer_size must"
|
"caf_rtl.c:coarray_write_full_str->dest and src xfer_size must"
|
" be same. xfer_size=%d, src_xfer_size=%d",
|
" be same. xfer_size=%d, src_xfer_size=%d",
|
xfer_size, src_xfer_size);
|
xfer_size, src_xfer_size);
|
}
|
}
|
}
|
}
|
coarray_write_(dest, src, xfer_size, img);
|
coarray_write_(dest, src, xfer_size, img);
|
return;
|
return;
|
/* not reached */
|
/* not reached */
|
}
|
}
|
|
|
START_TIMER();
|
START_TIMER();
|
if (src_strides != NULL || dest_strides != NULL) {
|
if (src_strides != NULL || dest_strides != NULL) {
|
comm_write_full_str2(dest, src, dest_ndim, dest_str_mults, dest_extents,
|
comm_write_full_str2(dest, src, dest_ndim, dest_str_mults, dest_extents,
|
dest_strides,
|
dest_strides,
|
src_ndim, src_str_mults, src_extents,
|
src_ndim, src_str_mults, src_extents,
|
src_strides, img-1);
|
src_strides, img-1);
|
} else {
|
} else {
|
comm_write_full_str(dest, src, dest_ndim, dest_str_mults, dest_extents,
|
comm_write_full_str(dest, src, dest_ndim, dest_str_mults, dest_extents,
|
src_ndim, src_str_mults, src_extents, img-1);
|
src_ndim, src_str_mults, src_extents, img-1);
|
}
|
}
|
STOP_TIMER(WRITE);
|
STOP_TIMER(WRITE);
|
LIBCAF_TRACE( LIBCAF_LOG_TIME, "comm_write_strided ");
|
LIBCAF_TRACE( LIBCAF_LOG_TIME, "comm_write_strided ");
|
|
|
LIBCAF_TRACE(LIBCAF_LOG_DEBUG,
|
LIBCAF_TRACE(LIBCAF_LOG_DEBUG,
|
"caf_rtl.c:coarray_write_full_str->Finished write(strided) to %p"
|
"caf_rtl.c:coarray_write_full_str->Finished write(strided) to %p"
|
" on Img %lu from %p using dim %d ", dest, img, src, dest_ndim);
|
" on Img %lu from %p using dim %d ", dest, img, src, dest_ndim);
|
}
|
}
|
|
|
|
|
/* COLLECTIVES */
|
/* COLLECTIVES */
|
|
|
/* supplemental functions for collective subroutines
|
/* supplemental functions for collective subroutines
|
* Borrowed from Joon's code */
|
* Borrowed from Joon's code */
|
|
|
static int my_pow2(int exp)
|
static int my_pow2(int exp)
|
{
|
{
|
int result=1;
|
int result=1;
|
result <<= exp ;
|
result <<= exp ;
|
return result ;
|
return result ;
|
}
|
}
|
|
|
static int is_even()
|
static int is_even()
|
{
|
{
|
return (_num_images % 2) ? 0:1 ;
|
return (_num_images % 2) ? 0:1 ;
|
}
|
}
|
|
|
static double mylog2(double exp)
|
static double mylog2(double exp)
|
{
|
{
|
return log10(exp)/log10(2);
|
return log10(exp)/log10(2);
|
}
|
}
|
|
|
static double myceillog2(double exp)
|
static double myceillog2(double exp)
|
{
|
{
|
return ceil(log10(exp)/log10(2));
|
return ceil(log10(exp)/log10(2));
|
}
|
}
|
|
|
/* Add the value in buf to dope vector dst_dv */
|
/* Add the value in buf to dope vector dst_dv */
|
static void dope_add( void *buf, DopeVectorType *dst_dv,
|
static void dope_add( void *buf, DopeVectorType *dst_dv,
|
int total_bytes )
|
int total_bytes )
|
{
|
{
|
int el_type = dst_dv->type_lens.type;
|
int el_type = dst_dv->type_lens.type;
|
void *dst_ptr = dst_dv->base_addr.a.ptr;
|
void *dst_ptr = dst_dv->base_addr.a.ptr;
|
int i;
|
int i;
|
unsigned int el_len;
|
unsigned int el_len;
|
el_len = dst_dv->base_addr.a.el_len >>3; // convert bits to bytes
|
el_len = dst_dv->base_addr.a.el_len >>3; // convert bits to bytes
|
|
|
switch (el_type)
|
switch (el_type)
|
{
|
{
|
case DVTYPE_INTEGER:
|
case DVTYPE_INTEGER:
|
{
|
{
|
for(i=0; i< total_bytes/el_len;i++)
|
for(i=0; i< total_bytes/el_len;i++)
|
{
|
{
|
*((int*)dst_ptr + i) += *((int*)buf + i);
|
*((int*)dst_ptr + i) += *((int*)buf + i);
|
}
|
}
|
break;
|
break;
|
}
|
}
|
case DVTYPE_REAL:
|
case DVTYPE_REAL:
|
{
|
{
|
for(i=0; i< total_bytes/el_len;i++)
|
for(i=0; i< total_bytes/el_len;i++)
|
{
|
{
|
*((float*)dst_ptr + i) += *((float*)buf + i);
|
*((float*)dst_ptr + i) += *((float*)buf + i);
|
}
|
}
|
break;
|
break;
|
}
|
}
|
default :
|
default :
|
{ break; }
|
{ break; }
|
}
|
}
|
}
|
}
|
|
|
/* COSUM (modified Joon's armci_comaxval function) */
|
/* COSUM (modified Joon's armci_comaxval function) */
|
/* Accumulates the value of src_dv on all images and stores it into sum_dv
|
/* Accumulates the value of src_dv on all images and stores it into sum_dv
|
* of root */
|
* of root */
|
void comm_cosum(DopeVectorType *src_dv, DopeVectorType *sum_dv,int root)
|
void comm_cosum(DopeVectorType *src_dv, DopeVectorType *sum_dv,int root)
|
{
|
{
|
int i,iter;
|
int i,iter;
|
int total_iter = (int) myceillog2(_num_images) ;
|
int total_iter = (int) myceillog2(_num_images) ;
|
unsigned int el_len;
|
unsigned int el_len;
|
unsigned int target;
|
unsigned int target;
|
void *local_buf;
|
void *local_buf;
|
int total_bytes =1;
|
int total_bytes =1;
|
|
|
// initialization
|
// initialization
|
el_len = src_dv->base_addr.a.el_len >>3; // convert bits to bytes
|
el_len = src_dv->base_addr.a.el_len >>3; // convert bits to bytes
|
for(i=0; i<src_dv->n_dim ; i++)
|
for(i=0; i<src_dv->n_dim ; i++)
|
total_bytes *= src_dv->dimension[i].extent;
|
total_bytes *= src_dv->dimension[i].extent;
|
local_buf = malloc(total_bytes);
|
local_buf = malloc(total_bytes);
|
memset(local_buf, 0 , total_bytes);
|
memset(local_buf, 0 , total_bytes);
|
total_bytes *=el_len;
|
total_bytes *=el_len;
|
// copy content of dopevector from src to sum locally
|
// copy content of dopevector from src to sum locally
|
memcpy(sum_dv->base_addr.a.ptr, src_dv->base_addr.a.ptr, total_bytes);
|
memcpy(sum_dv->base_addr.a.ptr, src_dv->base_addr.a.ptr, total_bytes);
|
|
|
// swap processed ID between 0 and root (non zero process ID)
|
// swap processed ID between 0 and root (non zero process ID)
|
int vPID = (_this_image == root ) ?
|
int vPID = (_this_image == root ) ?
|
0 : (_this_image == 0) ? root : _this_image;
|
0 : (_this_image == 0) ? root : _this_image;
|
|
|
// do reduction
|
// do reduction
|
for(iter=0; iter<total_iter; iter++)
|
for(iter=0; iter<total_iter; iter++)
|
{
|
{
|
if( (vPID % my_pow2(iter+1)) == 0 )
|
if( (vPID % my_pow2(iter+1)) == 0 )
|
{
|
{
|
if( (vPID + my_pow2(iter)) < _num_images)
|
if( (vPID + my_pow2(iter)) < _num_images)
|
{
|
{
|
// compute target process IDs for data transfer
|
// compute target process IDs for data transfer
|
target = vPID + my_pow2(iter);
|
target = vPID + my_pow2(iter);
|
|
|
//swap back for process Id 0 and root process(non-zero)
|
//swap back for process Id 0 and root process(non-zero)
|
if(target == root) target=0;
|
if(target == root) target=0;
|
|
|
comm_read(src_dv->base_addr.a.ptr, local_buf, total_bytes, target);
|
comm_read(src_dv->base_addr.a.ptr, local_buf, total_bytes, target);
|
|
|
dope_add(local_buf, sum_dv, total_bytes);
|
dope_add(local_buf, sum_dv, total_bytes);
|
}
|
}
|
}
|
}
|
comm_barrier_all();
|
comm_barrier_all();
|
}
|
}
|
|
|
free(local_buf);
|
free(local_buf);
|
//Broadcast for all to all
|
//Broadcast for all to all
|
}
|
}
|
|
|