Subversion Repositories Open64

[/] [branches/] [OpenUH/] [osprey/] [libcaf/] [caf_rtl.c] - Blame information for rev 3892

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 3802 dreachem
/*
2
 Runtime library for supporting Coarray Fortran
3
 
4 3879 dreachem
 Copyright (C) 2009-2012 University of Houston.
5 3802 dreachem
 
6
 This program is free software; you can redistribute it and/or modify it
7
 under the terms of version 2 of the GNU General Public License as
8
 published by the Free Software Foundation.
9
 
10
 This program is distributed in the hope that it would be useful, but
11
 WITHOUT ANY WARRANTY; without even the implied warranty of
12
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
13
 
14
 Further, this software is distributed without any warranty that it is
15
 free of the rightful claim of any third person regarding infringement
16
 or the like.  Any license provided herein, whether implied or
17
 otherwise, applies only to this software file.  Patent licenses, if
18
 any, provided herein do not apply to combinations of this program with
19
 other software, or any other product whatsoever.
20
 
21
 You should have received a copy of the GNU General Public License along
22
 with this program; if not, write the Free Software Foundation, Inc., 59
23
 Temple Place - Suite 330, Boston MA 02111-1307, USA.
24
 
25
 Contact information:
26
 http://www.cs.uh.edu/~hpctools
27
*/
28
 
29
#include <stdlib.h>
30
#include <stdio.h>
31 3811 dreachem
#include <math.h>
32
#include <string.h>
33 3802 dreachem
#include <assert.h>
34
 
35
#include "dopevec.h"
36
 
37
#if defined(ARMCI)
38
#include "armci_comm_layer.h"
39
#elif defined(GASNET)
40
#include "gasnet_comm_layer.h"
41
#endif
42
 
43
#include "caf_rtl.h"
44
#include "trace.h"
45
 
46
const int DEBUG = 1;
47
 
48
/* initialized in comm_init() */
49
unsigned long _this_image;
50
unsigned long _num_images;
51
 
52
/* common_slot is a node in the shared memory link-list that keeps track
53
 * of available memory that can used for both allocatable coarrays and
54
 * asymmetric data. It is the only handle to access the link-list.*/
55
struct shared_memory_slot *common_slot;
56
 
57
void caf_init_()
58
{
59
    LIBCAF_TRACE_INIT();
60
 
61
    common_slot = (struct shared_memory_slot *) malloc (
62
                            sizeof(struct shared_memory_slot));
63
    START_TIMER();
64
    comm_init(common_slot); /* common slot is initialized in comm_init */
65
    STOP_TIMER(INIT);
66
 
67
    _this_image = comm_get_proc_id() + 1;
68
    _num_images = comm_get_num_procs();
69
 
70
    LIBCAF_TRACE( LIBCAF_LOG_INIT, "caf_rtl.c:caf_init_->initialized,"
71
            " num_images = %lu", _num_images);
72
    LIBCAF_TRACE( LIBCAF_LOG_TIME, "comm_init ");
73
}
74
 
75
/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
76
 * SHARED MEMORY MANAGEMENT FUNCTIONS
77
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
78
 * Note: The term 'shared memory' is used in the PGAS sense, i.e. the
79
 * memory may not be physically shared. It can however be directly
80
 * accessed from any other image. This is done by the pinning/registering
81
 * memory by underlying communication layer (GASNet/ARMCI).
82
 *
83
 * During comm_init GASNet/ARMCI creates a big chunk of shared memory.
84
 * Static coarrays are allocated memory from this chunk. The remaining
85
 * memory is left for allocatable coarrays and pointers in coarrays of
86
 * derived datatype (henceforth referred as asymmetric data).
87
 * It returns the starting address and size of this remaining memory chunk
88
 * by populating the structure common_slot(explained later).
89
 *
90
 * Normal fortan allocation calls are intercepted to check whether they
91
 * are for coarrays or asymmetric data. If yes, the following functions
92
 * are called which are defined below.
93
 *  void* coarray_allocatable_allocate_(unsigned long var_size);
94
 *  void* coarray_asymmetric_allocate_(unsigned long var_size);
95
 * Since allocatable coarrays must have symmetric address, a seperate heap
96
 * must be created for asymmetric data. To avoid wasting memory space by
97
 * statically reserving it, we use the top of heap for allocatable
98
 * coarrays (which grows downward) and the bottom of heap for asymmetric
99
 * data(which grows up). A link-list of struct shared_memory_slot is
100
 * used to manage allocation and deallocation.
101
 *
102
 * common_slot is an empty slot which always lies in between allocatable
103
 * heap and asymmetric heap, and used by both to reserve memory.
104
 *                          _________
105
 *                          | Alloc |
106
 *                          | heap  |
107
 *                          =========
108
 *                          | Common|
109
 *                          |  slot |
110
 *                          =========
111
 *                          | asymm |
112
 *                          | heap  |
113
 *                          |_______|
114
 * Allocatable heap comsumes common_slot from top, while assymetric heap
115
 * consumes from bottom. Each allocation address and size is stored in
116
 * a sperate slot (node in the list). Each slot has a full-empty bit(feb).
117
 * During deallocation (using function coarray_deallocate_) the feb is set
118
 * to 0 (empty). If any neighboring slot is empty, they are merged. Hence,
119
 * when a slot bordering common-slot is deallocated, the common-slot
120
 * grows.
121
 *
122
 * If there is no more space left in common slot, empty slots are used
123
 * from above for allocable coarrays or from below for asymmetric data.
124
 *
125
 * During exit, the function coarray_free_all_shared_memory_slots()
126
 * is used to free all nodes in the shared memory list.
127
 */
128
 
129
/* Static function used to find empty memory slots while reserving
130
 * memory for allocatable coarrays */
131
static struct shared_memory_slot* find_empty_shared_memory_slot_above
132
             (struct shared_memory_slot *slot, unsigned long var_size)
133
{
134
    while (slot)
135
    {
136
        if(slot->feb==0 && slot->size>=var_size)
137
            return slot;
138
        slot = slot->prev;
139
    }
140
    return 0;
141
}
142
 
143
/* Static function used to find empty memory slots while reserving
144
 * memory for assymetric coarrays */
145
static struct shared_memory_slot* find_empty_shared_memory_slot_below
146
             (struct shared_memory_slot *slot, unsigned long var_size)
147
{
148
    while (slot)
149
    {
150
        if(slot->feb==0 && slot->size>=var_size)
151
            return slot;
152
        slot = slot->next;
153
    }
154
    return 0;
155
}
156
 
157
/* Static function used to reserve top part of an empty memory slot
158
 * for allocatable coarrays. Returns the memory address allocated */
159
static void* split_empty_shared_memory_slot_from_top
160
    (struct shared_memory_slot *slot, unsigned long var_size)
161
{
162
    struct shared_memory_slot *new_empty_slot;
163
    new_empty_slot = (struct shared_memory_slot *) malloc
164
                            (sizeof(struct shared_memory_slot));
165
    new_empty_slot->addr = slot->addr + var_size;
166
    new_empty_slot->size = slot->size - var_size;
167
    new_empty_slot->feb = 0;
168
    new_empty_slot->next = slot->next;
169
    new_empty_slot->prev = slot;
170
    slot->size = var_size;
171
    slot->feb = 1;
172
    slot->next = new_empty_slot;
173
    if(common_slot == slot)
174
        common_slot = new_empty_slot;
175
    return slot->addr;
176
}
177
 
178
/* Static function used to reserve bottom part of an empty memory slot
179
 * for asymmetric data. Returns the memory address allocated*/
180
static void* split_empty_shared_memory_slot_from_bottom
181
    (struct shared_memory_slot *slot, unsigned long var_size)
182
{
183
    struct shared_memory_slot *new_full_slot;
184
    new_full_slot = (struct shared_memory_slot *) malloc
185
                        (sizeof(struct shared_memory_slot));
186
    new_full_slot->addr = slot->addr + slot->size - var_size;
187
    new_full_slot->size = var_size;
188
    new_full_slot->feb = 1;
189
    new_full_slot->next = slot->next;
190
    new_full_slot->prev = slot;
191
    slot->size = slot->size - var_size;
192
    slot->next = new_full_slot;
193
    return new_full_slot->addr;
194
}
195
 
196
/* Memory allocation function for allocatable coarrays. It is invoked
197
 * from fortran allocation function _ALLOCATE in
198
 * osprey/libf/fort/allocation.c
199
 * It finds empty slot from the shared memory list (common_slot & above)
200
 * and then splits the slot from top
201
 * Note: there is barrier as it is a collective operation*/
202
void* coarray_allocatable_allocate_(unsigned long var_size)
203
{
204
    struct shared_memory_slot *empty_slot;
205
    empty_slot = find_empty_shared_memory_slot_above(common_slot,
206
                                                      var_size);
207
    if(empty_slot == 0)
208
        LIBCAF_TRACE(LIBCAF_LOG_FATAL,
209
        "No More Shared Memory Space available for allocatable coarray.\n"
210 3805 dreachem
        "Set env variable UHCAF_IMAGE_HEAP_SIZE or cafrun option "
211 3802 dreachem
        "for more space");
212
 
213
    LIBCAF_TRACE(LIBCAF_LOG_MEMORY,"caf_rtl.c:coarray_coarray_allocate"
214
        "-> Found empty slot %p. About to split it from top."
215
        ,empty_slot->addr);
216
 
217
    comm_barrier_all(); // implicit barrier in case of allocatable.
218
    if ( empty_slot!=common_slot && empty_slot->size == var_size )
219
    {
220
        empty_slot->feb=1;
221
        return empty_slot->addr;
222
    }
223
    return split_empty_shared_memory_slot_from_top(empty_slot, var_size);
224
}
225
 
226
/* Memory allocation function for asymmetric data. It is invoked
227
 * from fortran allocation function _ALLOCATE in
228
 * osprey/libf/fort/allocation.c
229
 * It finds empty slot from the shared memory list (common_slot & below)
230
 * and then splits the slot from bottom */
231
void* coarray_asymmetric_allocate_(unsigned long var_size)
232
{
233
    struct shared_memory_slot *empty_slot;
234
    empty_slot = find_empty_shared_memory_slot_below(common_slot,
235
                                                        var_size);
236
    if(empty_slot == 0)
237
        LIBCAF_TRACE(LIBCAF_LOG_FATAL,
238
        "No More Shared Memory Space available for asymmetric data.\n"
239 3805 dreachem
        "Set env variable UHCAF_IMAGE_HEAP_SIZE or cafrun option "
240 3802 dreachem
        "for more space");
241
 
242
    LIBCAF_TRACE(LIBCAF_LOG_MEMORY,"caf_rtl.c:coarray_asymmetric_allocate"
243
        "-> Found empty slot %p. About to split it from bottom. "
244
        , empty_slot->addr);
245
 
246
    if ( empty_slot!=common_slot && empty_slot->size == var_size )
247
    {
248
        empty_slot->feb=1;
249
        return empty_slot->addr;
250
    }
251
    return split_empty_shared_memory_slot_from_bottom(empty_slot,
252
                                                        var_size);
253
}
254
 
255
/* Static function called from coarray_deallocate.
256
 * It finds the slot with the address (passed in param) by searching
257
 * the shared memory link-list starting from the slot(passed in param)
258
 * and above it. Used for finding slots containing allocatable coarrays*/
259
static struct shared_memory_slot* find_shared_memory_slot_above
260
                    (struct shared_memory_slot *slot, void *address)
261
{
262
    while (slot)
263
    {
264
        if(slot->feb==1 && slot->addr==address)
265
            return slot;
266
        slot=slot->prev;
267
    }
268
    return 0;
269
}
270
 
271
/* Static function called from coarray_deallocate.
272
 * It finds the slot with the address (passed in param) by searching
273
 * the shared memory link-list starting from the slot(passed in param)
274
 * and below it. Used for finding slots containing asymmetric data*/
275
static struct shared_memory_slot* find_shared_memory_slot_below
276
                    (struct shared_memory_slot *slot, void *address)
277
{
278
    while (slot)
279
    {
280
        if(slot->feb==1 && slot->addr==address)
281
            return slot;
282
        slot=slot->next;
283
    }
284
    return 0;
285
}
286
 
287
/* Static function called from empty_shared_memory_slot (used in dealloc).
288
 * Merge slot with the slot above & below it. If any of these slots is the
289
 * common-slot, the common-slot points to the merged slot */
290
static void join_3_shared_memory_slots(struct shared_memory_slot *slot)
291
{
292
    slot->prev->size = slot->prev->size + slot->size + slot->next->size;
293
    slot->prev->next = slot->next->next;
294
    if(slot->next->next)
295
        slot->next->next->prev = slot->prev;
296
    if(common_slot == slot || common_slot == slot->next)
297
        common_slot=slot->prev;
298
    comm_free(slot->next);
299
    comm_free(slot);
300
}
301
 
302
/* Static function called from empty_shared_memory_slot (used in dealloc).
303
 * Merge slot with the slot above it. If any of these slots is the
304
 * common-slot, the common-slot points to the merged slot */
305
static void join_with_prev_shared_memory_slot
306
                (struct shared_memory_slot *slot)
307
{
308
    slot->prev->size += slot->size;
309
    slot->prev->next = slot->next;
310
    if(slot->next)
311
        slot->next->prev = slot->prev;
312
    if(common_slot == slot)
313
        common_slot = slot->prev;
314
    comm_free(slot);
315
}
316
 
317
/* Static function called from empty_shared_memory_slot (used in dealloc).
318
 * Merge slot with the slot below it. If any of these slots is the
319
 * common-slot, the common-slot points to the merged slot */
320
static void join_with_next_shared_memory_slot
321
                (struct shared_memory_slot *slot)
322
{
323
    struct shared_memory_slot *tmp;
324
    tmp = slot->next;
325
    slot->size += slot->next->size;
326
    if(slot->next->next)
327
        slot->next->next->prev = slot;
328
    slot->next = slot->next->next;
329
    if(common_slot == tmp)
330
        common_slot = slot;
331
    comm_free(tmp);
332
}
333
 
334
/* Static function called from coarray_deallocate.
335
 * Empties the slot passed in parameter:
336
 * 1) set full-empty-bit to 0
337
 * 2) merge the slot with neighboring empty slots (if found) */
338
static void empty_shared_memory_slot(struct shared_memory_slot *slot)
339
{
340
   slot->feb=0;
341
   if(slot->prev && (slot->prev->feb==0) && slot->next
342
                                           && (slot->next->feb==0) )
343
       join_3_shared_memory_slots(slot);
344
   else if (slot->prev && (slot->prev->feb==0))
345
       join_with_prev_shared_memory_slot(slot);
346
   else if (slot->next && (slot->next->feb==0))
347
       join_with_next_shared_memory_slot(slot);
348
}
349
 
350
/* Memory deallocation function for allocatable coarrays and asymmetric
351
 * data. It is invoked from fortran allocation function _DEALLOCATE in
352
 * osprey/libf/fort/allocation.c
353
 * It finds the slot from the shared memory list, set full-empty-bit to 0,
354
 * and then merge the slot with neighboring empty slots (if found)
355
 * Note: there is implicit barrier for allocatable coarrays*/
356
void coarray_deallocate_(void *var_address)
357
{
358
    struct shared_memory_slot *slot;
359
    slot = find_shared_memory_slot_above(common_slot, var_address);
360
    if (slot)
361
        comm_barrier_all(); //implicit barrier for allocatable
362
    else
363
        slot = find_shared_memory_slot_below(common_slot, var_address);
364
    if (slot == 0)
365
    {
366
        LIBCAF_TRACE(LIBCAF_LOG_NOTICE,
367
            "caf_rtl.c:coarray_deallocate_->Address%p not coarray."
368
            ,var_address);
369
        return;
370
    }
371
    LIBCAF_TRACE(LIBCAF_LOG_MEMORY,
372
            "caf_rtl.c:coarray_deallocate_->before deallocating %p.", var_address);
373
    empty_shared_memory_slot(slot);
374
 
375
}
376
 
377
/* Static function called from coarray_free_all_shared_memory_slots()
378
 * during exit from program.
379
 * It recursively frees slots in the shared memory link-list starting
380
 * from slot passed in param and all slots above(previous) it. */
381
static void free_prev_slots_recursively( struct shared_memory_slot *slot )
382
{
383
    if(slot)
384
    {
385
        free_prev_slots_recursively(slot->prev);
386
        comm_free(slot);
387
    }
388
}
389
 
390
/* Static function called from coarray_free_all_shared_memory_slots()
391
 * during exit from program.
392
 * It recursively frees slots in the shared memory link-list starting
393
 * from slot passed in param and all slots below(next) it. */
394
static void free_next_slots_recursively( struct shared_memory_slot *slot )
395
{
396
    if(slot)
397
    {
398
        free_next_slots_recursively(slot->next);
399
        comm_free(slot);
400
    }
401
}
402
 
403
/* Function to delete the shared memory link-list.
404
 * Called during exit from comm_exit in armci_comm_layer.c or
405
 * gasnet_comm_layer.c.
406
 */
407
void coarray_free_all_shared_memory_slots()
408
{
409
    free_prev_slots_recursively(common_slot->prev);
410
    free_next_slots_recursively(common_slot);
411
}
412
 
413
/* end shared memory management functions*/
414
 
415
void caf_exit_(int status)
416
{
417
    LIBCAF_TRACE(LIBCAF_LOG_DEBUG,
418
            "caf_rtl.c:caf_exit_->Exiting with error code %d",status);
419
    comm_exit(status);
420
}
421
 
422
void caf_finalize_()
423
{
424
    LIBCAF_TRACE( LIBCAF_LOG_TIME_SUMMARY, "Accumulated Time:");
425
    LIBCAF_TRACE(LIBCAF_LOG_DEBUG,
426
            "caf_rtl.c:caf_finalize_->Before call to comm_finalize");
427
    comm_finalize();
428
}
429
 
430
void acquire_lcb_(unsigned long buf_size, void **ptr)
431
{
432
    *ptr = comm_malloc(buf_size);
433
    LIBCAF_TRACE( LIBCAF_LOG_DEBUG, "caf_rtl.c:acquire_lcb->"
434
            " acquired lcb %p of size %lu", *ptr, buf_size);
435
}
436
 
437
void release_lcb_(void **ptr)
438
{
439 3811 dreachem
    comm_free_lcb(*ptr);
440 3802 dreachem
    LIBCAF_TRACE( LIBCAF_LOG_DEBUG, "caf_rtl.c:release_lcb_->"
441
            "freed lcb %p", *ptr);
442
}
443
 
444
void sync_all_()
445
{
446
   LIBCAF_TRACE( LIBCAF_LOG_BARRIER, "caf_rtl.c:sync_all_->"
447
           "before call to comm_barrier_all");
448
   START_TIMER();
449
   comm_barrier_all();
450
   STOP_TIMER(SYNC);
451
   LIBCAF_TRACE( LIBCAF_LOG_TIME, "comm_sync_all ");
452
 
453
}
454
 
455 3879 dreachem
/*************CRITICAL SUPPORT **************/
456
 
457
void caf_critical_()
458
{
459
 comm_critical();
460
}
461
 
462
void caf_end_critical_()
463
{
464
  comm_end_critical();
465
}
466
 
467
 
468
 
469
/*************END CRITICAL SUPPORT **************/
470
 
471
 
472 3802 dreachem
void sync_memory_()
473
{
474
   LIBCAF_TRACE( LIBCAF_LOG_BARRIER, "caf_rtl.c:sync_memory->"
475
           "in sync memory");
476
}
477
 
478
void sync_images_( int *imageList, int imageCount)
479
{
480
    int i;
481
    for ( i=0; i<imageCount ; i++)
482
    {
483
        LIBCAF_TRACE( LIBCAF_LOG_BARRIER,"caf_rtl.c:sync_images_->Before"
484
        " call to comm_syncimages for sync with img%d",imageList[i]);
485
        imageList[i]--;
486
    }
487
    START_TIMER();
488
    comm_sync_images(imageList,imageCount);
489
    STOP_TIMER(SYNC);
490
    LIBCAF_TRACE( LIBCAF_LOG_TIME, "comm_sync_images ");
491
}
492
 
493
void sync_images_all_()
494
{
495
    int i;
496
    int imageCount=_num_images;
497
    int *imageList;
498
    LIBCAF_TRACE( LIBCAF_LOG_BARRIER, "caf_rtl.c:sync_images_all_->"
499
        "before call to comm_sync_images for sync with all images");
500
    imageList = (int *)comm_malloc(_num_images*sizeof(int));
501
    for (i=0; i<imageCount ; i++)
502
        imageList[i]=i;
503
    START_TIMER();
504
    comm_sync_images(imageList,imageCount);
505
    STOP_TIMER(SYNC);
506
    LIBCAF_TRACE( LIBCAF_LOG_TIME, "comm_sync_image_all ");
507
 
508
    comm_free(imageList);
509
}
510
 
511
int image_index_(DopeVectorType *diminfo, DopeVectorType *sub)
512
{
513
    if ( diminfo == NULL || sub == NULL )
514
    {
515
        LIBCAF_TRACE(LIBCAF_LOG_FATAL,
516
           "caf_rtl.c:image_index_-> image_index failed for "
517
           "&diminfo=%p, &codim=%p", diminfo,sub);
518
    }
519
 
520
    int i;
521
    int rank = diminfo->n_dim;
522
    int corank = diminfo->n_codim;
523
    int image = 0;
524
    int lb_codim, ub_codim;
525
    int *codim = (int *)sub->base_addr.a.ptr;
526
    int str_m = 1;
527
 
528
 
529
    LIBCAF_TRACE(LIBCAF_LOG_DEBUG,
530
        "caf_rtl.c:image_index_->rank: %d, corank %d", rank, corank);
531
    if (sub->dimension[0].extent != corank)
532
        return 0;
533
 
534
    for (i = 0; i < corank; i++) {
535
        int extent;
536
        str_m = diminfo->dimension[rank+i].stride_mult;
537
        if (i == (corank-1))
538
            extent = (_num_images-1) / str_m + 1;
539
        else
540
            extent = diminfo->dimension[rank+i].extent;
541
        lb_codim = diminfo->dimension[rank+i].low_bound;
542
        ub_codim = diminfo->dimension[rank+i].low_bound +
543
                    extent - 1;
544
        if (codim[i]>=lb_codim && (ub_codim==0 || codim[i]<=ub_codim)) {
545
            image += str_m * (codim[i] - lb_codim);
546
        }
547
        else{
548
            return 0;
549
        }
550
    }
551
 
552
    if( _num_images > image )
553
        return image+1;
554
    else
555
        return 0;
556
}
557
 
558
int this_image3_(DopeVectorType *diminfo, int* sub)
559
{
560
    int img = _this_image-1;
561
    int rank = diminfo->n_dim;
562
    int corank = diminfo->n_codim;
563
    int dim = *sub;
564
    int str_m = 1;
565
    int lb_codim=0;
566
    int ub_codim=0;
567
    int extent,i;
568
 
569
    if ( diminfo == NULL )
570
    {
571
       LIBCAF_TRACE(LIBCAF_LOG_FATAL,
572
       "caf_rtl.c:this_image3_ ->this_image failed for &diminfo=%p",
573
       diminfo);
574
    }
575
    if(dim < 1 || dim > corank)
576
    {
577
        LIBCAF_TRACE(LIBCAF_LOG_FATAL,
578
           "caf_rtl.c:this_image3_->this_image failed as %d dim"
579
           " is not present", dim);
580
    }
581
 
582
    lb_codim = diminfo->dimension[rank+dim-1].low_bound;
583
    str_m = diminfo->dimension[rank+dim-1].stride_mult;
584
    if (dim == corank)
585
      extent = (_num_images-1) / str_m + 1;
586
    else
587
      extent = diminfo->dimension[rank+dim-1].extent;
588
    ub_codim = lb_codim + extent - 1;
589
    if(ub_codim > 0){
590
        return (((img/str_m)%extent)+lb_codim);
591
    }
592
    else{
593
        return ((img/str_m)+lb_codim);
594
    }
595
}
596
 
597
void this_image2_(DopeVectorType *ret, DopeVectorType *diminfo)
598
{
599
    int i;
600
    int corank = diminfo->n_codim;
601
    int *ret_int;
602
    if ( diminfo == NULL || ret==NULL)
603
    {
604
        LIBCAF_TRACE(LIBCAF_LOG_FATAL,
605
           "caf_rtl.c:this_image2_ ->this_image failed for "
606
           "&diminfo:%p and &ret:%p",diminfo, ret);
607
    }
608
    ret->base_addr.a.ptr = comm_malloc(sizeof(int)*corank);
609
    ret->dimension[0].low_bound = 1;
610
    ret->dimension[0].extent = corank;
611
    ret->dimension[0].stride_mult = 1;
612
    ret_int = (int*)ret->base_addr.a.ptr;
613
    for (i=1; i<=corank; i++)
614
    {
615
        ret_int[i-1] = this_image3_(diminfo, &i);
616
    }
617
}
618
 
619
int lcobound2_(DopeVectorType *diminfo, int *sub)
620
{
621
    int rank   = diminfo->n_dim;
622
    int corank = diminfo->n_codim;
623
    int dim=*sub;
624
    if ( diminfo == NULL )
625
    {
626
        LIBCAF_TRACE(LIBCAF_LOG_FATAL,
627
           "caf_rtl.c:lcobound2 ->lcobound failed for &diminfo:%p",
628
           diminfo);
629
    }
630
    if(dim < 1 || dim > corank)
631
    {
632
        LIBCAF_TRACE(LIBCAF_LOG_FATAL,
633
           "caf_rtl.c:lcobound2 ->lcobound failed as dim %d not present",
634
            dim);
635
    }
636
    return diminfo->dimension[rank+dim-1].low_bound;
637
}
638
 
639
void lcobound_(DopeVectorType *ret, DopeVectorType *diminfo)
640
{
641
    int i;
642
    int rank   = diminfo->n_dim;
643
    int corank = diminfo->n_codim;
644
    int *ret_int;
645
    if ( diminfo == NULL || ret==NULL)
646
    {
647
        LIBCAF_TRACE(LIBCAF_LOG_FATAL,
648
       "caf_rtl.c:lcobound ->lcobound failed for diminfo:%p and ret:%p",
649
        diminfo, ret);
650
    }
651
    ret->base_addr.a.ptr = comm_malloc(sizeof(int)*corank);
652
    ret->dimension[0].low_bound = 1;
653
    ret->dimension[0].extent = corank;
654
    ret->dimension[0].stride_mult = 1;
655
    ret_int = (int*)ret->base_addr.a.ptr;
656
    for (i=0; i<corank; i++)
657
    {
658
        ret_int[i] = diminfo->dimension[rank+i].low_bound;
659
    }
660
}
661
 
662
int ucobound2_(DopeVectorType *diminfo, int *sub)
663
{
664
    int rank   = diminfo->n_dim;
665
    int corank = diminfo->n_codim;
666
    int dim=*sub;
667
    int extent;
668
    if ( diminfo == NULL )
669
    {
670
        LIBCAF_TRACE(LIBCAF_LOG_FATAL,
671
       "caf_rtl.c:ucobound2 ->ucobound failed for &diminfo:%p",diminfo);
672
    }
673
    if(dim < 1 || dim > corank)
674
    {
675
        LIBCAF_TRACE(LIBCAF_LOG_FATAL,
676
       "caf_rtl.c:ucobound2 ->ucobound failed as dim %d not present",dim);
677
    }
678
 
679
    if (dim == corank)
680
      extent = (_num_images-1) /
681
          diminfo->dimension[rank+dim-1].stride_mult + 1;
682
    else
683
      extent = diminfo->dimension[rank+dim-1].extent;
684
 
685
    return (diminfo->dimension[rank+dim-1].low_bound +
686
                extent - 1);
687
}
688
 
689
void ucobound_(DopeVectorType *ret, DopeVectorType *diminfo)
690
{
691
    int i;
692
    int rank   = diminfo->n_dim;
693
    int corank = diminfo->n_codim;
694
    int *ret_int;
695
    int extent;
696
    if ( diminfo == NULL || ret==NULL)
697
    {
698
      LIBCAF_TRACE(LIBCAF_LOG_FATAL,
699
      "caf_rtl.c:ucobound ->ucobound failed for diminfo:%p and ret:%p",
700
       diminfo, ret);
701
    }
702
    ret->base_addr.a.ptr = comm_malloc(sizeof(int)*corank);
703
    ret->dimension[0].low_bound = 1;
704
    ret->dimension[0].extent = corank;
705
    ret->dimension[0].stride_mult = 1;
706
    ret_int = (int*)ret->base_addr.a.ptr;
707
    for (i=0; i<corank; i++)
708
    {
709
      if (i == (corank-1))
710
        extent = (_num_images-1) /
711
            diminfo->dimension[rank+i].stride_mult + 1;
712
      else
713
        extent = diminfo->dimension[rank+i].extent;
714
 
715
      ret_int[i] = diminfo->dimension[rank+i].low_bound +
716
                    extent - 1;
717
    }
718
}
719
 
720
void coarray_read_full_str_(void * src, void *dest, unsigned int src_ndim,
721 3879 dreachem
        unsigned long *src_str_mults, unsigned long *src_extents,
722
        unsigned long *src_strides,
723
        unsigned int dest_ndim, unsigned long *dest_str_mults,
724
        unsigned long *dest_extents, unsigned long *dest_strides,
725
        unsigned long img)
726 3802 dreachem
{
727
    int i, is_contig = 1;
728
 
729
    for (i = 1; i < src_ndim; i++) {
730 3879 dreachem
      if (src_str_mults[i] != (src_str_mults[i-1]*src_extents[i-1])) {
731 3802 dreachem
        is_contig = 0;
732
        break;
733
      }
734
    }
735
 
736
    if (is_contig) {
737
      for (i = 1; i < dest_ndim; i++) {
738 3879 dreachem
        if (dest_str_mults[i] != (dest_str_mults[i-1]*dest_extents[i-1])) {
739 3802 dreachem
          is_contig = 0;
740
          break;
741
        }
742
      }
743
    }
744
 
745 3879 dreachem
    if (src_strides || dest_strides)
746
      is_contig = 0;
747
 
748 3802 dreachem
    if (is_contig) {
749 3879 dreachem
      unsigned long xfer_size = src_str_mults[0]*src_extents[0];
750 3802 dreachem
      for (i = 1; i < src_ndim; i++) {
751
        xfer_size *= src_extents[i];
752
      }
753
      if (DEBUG) {
754 3879 dreachem
        unsigned long dest_xfer_size = dest_str_mults[0]*dest_extents[0];
755 3802 dreachem
        for (i = 1; i < dest_ndim; i++) {
756
          dest_xfer_size *= dest_extents[i];
757
        }
758
        if (dest_xfer_size != xfer_size) {
759
          LIBCAF_TRACE(LIBCAF_LOG_FATAL,
760
              "caf_rtl.c:coarray_read_full_str->dest and src xfer_size must"
761
              " be same. xfer_size=%d, dest_xfer_size=%d",
762
              xfer_size, dest_xfer_size);
763
        }
764
      }
765
      coarray_read_(src, dest, xfer_size, img);
766
      return;
767
      /* not reached */
768
    }
769
 
770
   START_TIMER();
771 3879 dreachem
   if (src_strides != NULL || dest_strides != NULL) {
772
     comm_read_full_str2(src, dest, src_ndim, src_str_mults, src_extents,
773
                     src_strides,
774
                     dest_ndim, dest_str_mults, dest_extents,
775
                     dest_strides,  img-1);
776
   } else {
777
     comm_read_full_str(src, dest, src_ndim, src_str_mults, src_extents,
778
                     dest_ndim, dest_str_mults, dest_extents, img-1);
779
   }
780 3802 dreachem
   STOP_TIMER(READ);
781
   LIBCAF_TRACE( LIBCAF_LOG_TIME, "comm_read_strided ");
782
 
783
    LIBCAF_TRACE(LIBCAF_LOG_DEBUG,
784
            "caf_rtl.c:coarray_read_full_str->Finished read(strided) "
785
            "from %p on Img %lu to %p using dim %d ",
786
            src, img, dest, src_ndim);
787
}
788
 
789
 
790
void coarray_read_src_str_(void * src, void *dest, unsigned int ndim,
791 3879 dreachem
        unsigned long *src_str_mults, unsigned long *src_extents,
792
        unsigned long *src_strides,
793 3802 dreachem
        unsigned long img)
794
{
795
    int i, is_contig = 1;
796 3879 dreachem
 
797
    /* runtime check if it is contiguous transfer */
798 3802 dreachem
    for (i = 1; i < ndim; i++) {
799 3892 dreachem
      if (src_str_mults[i] != (src_str_mults[i-1]*src_extents[i-1])) {
800 3802 dreachem
        is_contig = 0;
801
        break;
802
      }
803
    }
804
 
805 3879 dreachem
    if (src_strides)
806
      is_contig = 0;
807
 
808 3802 dreachem
    if (is_contig) {
809 3879 dreachem
      unsigned long xfer_size = src_str_mults[0]*src_extents[0];
810 3802 dreachem
      for (i = 1; i < ndim; i++) {
811
        xfer_size *= src_extents[i];
812
      }
813
      coarray_read_(src, dest, xfer_size, img);
814
      return;
815
      /* not reached */
816
    }
817
 
818
   START_TIMER();
819 3879 dreachem
   if (src_strides != NULL) {
820
     comm_read_src_str2(src, dest, ndim, src_str_mults, src_extents,
821
                       src_strides, img-1);
822
   } else {
823
     comm_read_src_str(src, dest, ndim, src_str_mults, src_extents, img-1);
824
   }
825 3802 dreachem
   STOP_TIMER(READ);
826
   LIBCAF_TRACE( LIBCAF_LOG_TIME, "comm_read_strided ");
827
 
828
    LIBCAF_TRACE(LIBCAF_LOG_DEBUG,
829
            "caf_rtl.c:coarray_read_src_str->Finished read(strided) "
830
            "from %p on Img %lu to %p using dim %d ",
831
            src, img, dest, ndim);
832
}
833
 
834
void coarray_read_(void * src, void * dest, unsigned long xfer_size,
835
        unsigned long img)
836
{
837
    START_TIMER();
838
    comm_read(src, dest, xfer_size, img-1);//reads from src on img to dest
839
    STOP_TIMER(READ);
840
    LIBCAF_TRACE( LIBCAF_LOG_TIME, "comm_read ");
841
 
842
    LIBCAF_TRACE(LIBCAF_LOG_DEBUG,
843
        "caf_rtl.c:coarray_read->Finished read from %p on Img %lu to %p"
844
        " data of size %lu ", src, img, dest, xfer_size);
845
}
846
 
847
void coarray_write_dest_str_(void * dest, void *src, unsigned int ndim,
848 3879 dreachem
        unsigned long *dest_str_mults, unsigned long *dest_extents,
849
        unsigned long *dest_strides,
850 3802 dreachem
        unsigned long img)
851
{
852
    int i, is_contig = 1;
853 3879 dreachem
 
854
    /* runtime check if it is contiguous transfer */
855 3802 dreachem
    for (i = 1; i < ndim; i++) {
856 3879 dreachem
      if (dest_str_mults[i] != (dest_str_mults[i-1]*dest_extents[i-1])) {
857 3802 dreachem
        is_contig = 0;
858
        break;
859
      }
860
    }
861 3879 dreachem
 
862
    if (dest_strides)
863
      is_contig = 0;
864
 
865 3802 dreachem
    if (is_contig) {
866 3879 dreachem
      unsigned long xfer_size = dest_str_mults[0]*dest_extents[0];
867 3802 dreachem
      for (i = 1; i < ndim; i++) {
868
        xfer_size *= dest_extents[i];
869
      }
870
      coarray_write_(dest, src, xfer_size, img);
871
      return;
872
      /* not reached */
873
    }
874
 
875
    START_TIMER();
876 3879 dreachem
    if (dest_strides != NULL) {
877
      comm_write_dest_str2(dest, src, ndim,dest_str_mults,dest_extents,
878
                          dest_strides, img-1);
879
    } else  {
880
      comm_write_dest_str(dest, src, ndim,dest_str_mults,dest_extents,
881
                          img-1);
882
    }
883 3802 dreachem
    STOP_TIMER(WRITE);
884
    LIBCAF_TRACE( LIBCAF_LOG_TIME, "comm_write_strided ");
885
 
886
    LIBCAF_TRACE(LIBCAF_LOG_DEBUG,
887
        "caf_rtl.c:coarray_write_dest_str->Finished write(strided) to %p"
888
        " on Img %lu from %p using dim %d ", dest, img, src, ndim);
889
}
890
 
891
void coarray_write_(void * dest, void * src, unsigned long xfer_size, unsigned long img)
892
{
893
    START_TIMER();
894
    comm_write(dest, src, xfer_size, img-1);//write to dest in img
895
    STOP_TIMER(WRITE);
896
    LIBCAF_TRACE( LIBCAF_LOG_TIME, "comm_write ");
897
 
898
    LIBCAF_TRACE(LIBCAF_LOG_DEBUG,
899
        "caf_rtl.c:coarray_write->Wrote to %p on Img %lu from %p data of"
900
        " size %lu ", dest, img, src, xfer_size);
901
}
902
 
903
void coarray_write_full_str_(void * dest, void *src,
904
        unsigned int dest_ndim,
905 3879 dreachem
        unsigned long *dest_str_mults, unsigned long *dest_extents,
906
        unsigned long *dest_strides,
907 3802 dreachem
        unsigned int src_ndim,
908 3879 dreachem
        unsigned long *src_str_mults, unsigned long *src_extents,
909
        unsigned long *src_strides,
910 3802 dreachem
        unsigned long img)
911
{
912
    int i, is_contig = 1;
913
 
914
    for (i = 1; i < dest_ndim; i++) {
915 3879 dreachem
      if (dest_str_mults[i] != (dest_str_mults[i-1]*dest_extents[i-1])) {
916 3802 dreachem
        is_contig = 0;
917
        break;
918
      }
919
    }
920
 
921
    if (is_contig) {
922
      for (i = 1; i < src_ndim; i++) {
923 3879 dreachem
        if (src_str_mults[i] != (src_str_mults[i-1]*src_extents[i-1])) {
924 3802 dreachem
          is_contig = 0;
925
          break;
926
        }
927
      }
928
    }
929
 
930 3879 dreachem
    if (src_strides || dest_strides)
931
      is_contig = 0;
932
 
933 3802 dreachem
    if (is_contig) {
934 3879 dreachem
      unsigned long xfer_size = dest_str_mults[0]*dest_extents[0];
935 3802 dreachem
      for (i = 1; i < dest_ndim; i++) {
936
        xfer_size *= dest_extents[i];
937
      }
938
      if (DEBUG) {
939 3879 dreachem
        unsigned long src_xfer_size = src_str_mults[0]*src_extents[0];
940 3802 dreachem
        for (i = 1; i < src_ndim; i++) {
941
          src_xfer_size *= src_extents[i];
942
        }
943
        if (src_xfer_size != xfer_size) {
944
          LIBCAF_TRACE(LIBCAF_LOG_FATAL,
945
              "caf_rtl.c:coarray_write_full_str->dest and src xfer_size must"
946
              " be same. xfer_size=%d, src_xfer_size=%d",
947
              xfer_size, src_xfer_size);
948
        }
949
      }
950
      coarray_write_(dest, src, xfer_size, img);
951
      return;
952
      /* not reached */
953
    }
954
 
955
    START_TIMER();
956 3879 dreachem
    if (src_strides != NULL || dest_strides != NULL) {
957
      comm_write_full_str2(dest, src, dest_ndim, dest_str_mults, dest_extents,
958
                           dest_strides,
959
                           src_ndim, src_str_mults, src_extents,
960
                           src_strides, img-1);
961
    } else {
962
      comm_write_full_str(dest, src, dest_ndim, dest_str_mults, dest_extents,
963
          src_ndim, src_str_mults, src_extents, img-1);
964
    }
965 3802 dreachem
    STOP_TIMER(WRITE);
966
    LIBCAF_TRACE( LIBCAF_LOG_TIME, "comm_write_strided ");
967
 
968
    LIBCAF_TRACE(LIBCAF_LOG_DEBUG,
969
        "caf_rtl.c:coarray_write_full_str->Finished write(strided) to %p"
970
        " on Img %lu from %p using dim %d ", dest, img, src, dest_ndim);
971
}
972 3811 dreachem
 
973
 
974
/* COLLECTIVES */
975
 
976
/* supplemental functions for collective subroutines
977
 * Borrowed from Joon's code */
978
 
979
static int my_pow2(int exp)
980
{
981
    int result=1;
982
    result <<= exp ;
983
    return result ;
984
}
985
 
986
static int is_even()
987
{
988
    return (_num_images % 2) ? 0:1 ;
989
}
990
 
991
static double mylog2(double exp)
992
{
993
    return log10(exp)/log10(2);
994
}
995
 
996
static double myceillog2(double exp)
997
{
998
    return ceil(log10(exp)/log10(2));
999
}
1000
 
1001
/* Add the value in buf to dope vector dst_dv */
1002
static void dope_add( void *buf, DopeVectorType *dst_dv,
1003
                        int total_bytes )
1004
{
1005
    int el_type  = dst_dv->type_lens.type;
1006
    void *dst_ptr = dst_dv->base_addr.a.ptr;
1007
    int i;
1008
    unsigned int el_len;
1009
    el_len = dst_dv->base_addr.a.el_len >>3; // convert bits to bytes
1010
 
1011
    switch (el_type)
1012
    {
1013
        case  DVTYPE_INTEGER:
1014
        {
1015
            for(i=0; i< total_bytes/el_len;i++)
1016
            {
1017
                *((int*)dst_ptr + i) += *((int*)buf + i);
1018
            }
1019
            break;
1020
        }
1021
        case  DVTYPE_REAL:
1022
        {
1023
            for(i=0; i< total_bytes/el_len;i++)
1024
            {
1025
                *((float*)dst_ptr + i) += *((float*)buf + i);
1026
            }
1027
            break;
1028
        }
1029
        default :
1030
        { break; }
1031
    }
1032
}
1033
 
1034
/* COSUM  (modified Joon's armci_comaxval function) */
1035
/* Accumulates the value of src_dv on all images and stores it into sum_dv
1036
 * of root */
1037
void comm_cosum(DopeVectorType *src_dv, DopeVectorType *sum_dv,int root)
1038
{
1039
    int i,iter;
1040
    int total_iter = (int) myceillog2(_num_images) ;
1041
    unsigned int el_len;
1042
    unsigned int target;
1043
    void *local_buf;
1044
    int total_bytes =1;
1045
 
1046
    // initialization
1047
    el_len = src_dv->base_addr.a.el_len >>3; // convert bits to bytes
1048
    for(i=0; i<src_dv->n_dim ; i++)
1049
      total_bytes *= src_dv->dimension[i].extent;
1050
    local_buf = malloc(total_bytes);
1051
    memset(local_buf, 0 , total_bytes);
1052
    total_bytes *=el_len;
1053
    // copy content of dopevector from src to sum locally
1054
    memcpy(sum_dv->base_addr.a.ptr, src_dv->base_addr.a.ptr, total_bytes);
1055
 
1056
    // swap processed ID between 0 and root (non zero process ID)
1057
    int vPID = (_this_image == root ) ?
1058
 
1059
 
1060
    // do reduction
1061
    for(iter=0; iter<total_iter; iter++)
1062
    {
1063
      if( (vPID % my_pow2(iter+1)) == 0 )
1064
      {
1065
        if( (vPID + my_pow2(iter)) < _num_images)
1066
        {
1067
          // compute target process IDs for data transfer
1068
          target = vPID + my_pow2(iter);
1069
 
1070
          //swap back for process Id 0 and root process(non-zero)
1071
          if(target == root) target=0;
1072
 
1073
          comm_read(src_dv->base_addr.a.ptr, local_buf, total_bytes, target);
1074
 
1075
          dope_add(local_buf, sum_dv, total_bytes);
1076
        }
1077
      }
1078
      comm_barrier_all();
1079
    }
1080
 
1081
    free(local_buf);
1082
    //Broadcast for all to all
1083
}