> cat main.cpp

#include <stdio.h>

int testit(void);

int

main()

{

testit();

printf("Done\n");

return 0;

}

> cat routine.cpp

/* one-D input/output arrays

* routine seq

*/

#include <stdio.h>

#include "openacc.h"

extern float doit( float* a, int i );

#pragma acc routine(doit) seq

void

testit()

{

float a0[1000], b0[1000];

int i, n = 1000;

for( i = 0; i < n; ++i ) a0

*= (float)i;*

for( i = 0; i < n; ++i ) b0

for( i = 0; i < n; ++i ) b0

*= -1;*

#pragma acc parallel copy( b0, a0 )

{

#pragma acc loop

for( i = 0; i < n; ++i ) b0#pragma acc parallel copy( b0, a0 )

{

#pragma acc loop

for( i = 0; i < n; ++i ) b0

*= doit( a0, i );*

}

for( i = 0; i < n; ++i ) if (b0}

for( i = 0; i < n; ++i ) if (b0

*!= (float(i)*float(i))) printf("BAD\n");*

}

> cat doit.cpp

/* one-D input/output arrays

* routine seq

*/

#include <stdio.h>

#include "openacc.h"

#pragma acc routine seq

float doit( float* a, int i )

{

return a}

> cat doit.cpp

/* one-D input/output arrays

* routine seq

*/

#include <stdio.h>

#include "openacc.h"

#pragma acc routine seq

float doit( float* a, int i )

{

return a

**a**;*

}

> pgc++ -c -ta=tesla -fPIC -Minfo=acc routine.cpp doit.cpp

routine.cpp:

testit():

21, Generating copy(b0[:],a0[:]) [if not already present]

Generating Tesla code

23, #pragma acc loop gang, vector(128) /* blockIdx.x threadIdx.x */

doit.cpp:

doit(float *, int):

10, Generating acc routine seq

Generating Tesla code

> pgc++ -ta=tesla -shared -o r.so routine.o doit.o

> g++ main.cpp r.so

> export LD_LIBRARY_PATH=`pwd`

> ./a.out

Done}

> pgc++ -c -ta=tesla -fPIC -Minfo=acc routine.cpp doit.cpp

routine.cpp:

testit():

21, Generating copy(b0[:],a0[:]) [if not already present]

Generating Tesla code

23, #pragma acc loop gang, vector(128) /* blockIdx.x threadIdx.x */

doit.cpp:

doit(float *, int):

10, Generating acc routine seq

Generating Tesla code

> pgc++ -ta=tesla -shared -o r.so routine.o doit.o

> g++ main.cpp r.so

> export LD_LIBRARY_PATH=`pwd`

> ./a.out

Done