|
| View previous topic :: View next topic |
| Author |
Message |
erkant
Joined: 23 Nov 2011 Posts: 1
|
Posted: Sun Apr 28, 2013 5:05 am Post subject: Can someone test my code, while I have CC 1.2? |
|
|
I have the following code for Mandelbrot set calculation. After some tweaking and stuff, I managed to remove some of the errors regarding the function call. Now I have few errors about double precision operations. Because my GPU has compute capability 1.2, I cannot use double precision, cause as long as I know support for it was added in version 1.3. Therefore, if someone can compile my code in his computer, who has GPU with CC 1.3 or greater, and tell me the result I would be glad. Below you can find the error message that I get while running my program, along with the message from -Minfo, how I try to compile, and my code.
How I compile:
| Quote: | | pgcc -acc -Minfo=accel -ta=nvidia,cc12 -lm -o mandopenacc mandopenacc.c |
Messages:
| Quote: | mandelbrot:
34, Generating present_or_copyout(m[0:400][0:600])
Generating NVIDIA code
Double precision operations disable compute capability 1.0 kernel
35, Loop is parallelizable
36, Loop is parallelizable
Accelerator kernel generated
35, #pragma acc loop gang /* blockIdx.y */
36, #pragma acc loop gang, vector(128) /* blockIdx.x threadIdx.x */
39, Double precision operations disable compute capability 1.2 kernel
40, Double precision operations disable compute capability 1.2 kernel
41, Double precision operations disable compute capability 1.2 kernel
45, Loop carried scalar dependence for 'newRe' at line 47
Scalar last value needed after loop for 'newRe' at line 66
Loop carried scalar dependence for 'newIm' at line 48
Scalar last value needed after loop for 'newIm' at line 66
Inner sequential loop scheduled on accelerator
47, Double precision operations disable compute capability 1.2 kernel
48, Double precision operations disable compute capability 1.2 kernel
50, Double precision operations disable compute capability 1.2 kernel
51, Double precision operations disable compute capability 1.2 kernel
53, Double precision operations disable compute capability 1.2 kernel
66, Double precision operations disable compute capability 1.2 kernel
67, Double precision operations disable compute capability 1.2 kernel |
Error that I get after running the program:
| Quote: | etairi@Progex:~/Projects/mandelbrot$ ./mandopenacc 20000 > out1.ppm
call to cuModuleLoadData returned error 200: Invalid image |
Code:
| Code: | // mandopenacc.c
// to compile: pgcc -acc -Minfo=accel -ta=nvidia,cc12 -o mandopenacc mandopenacc.c
// usage: ./mandopenacc <no_of_iterations> > output.ppm
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include <openacc.h>
typedef struct {
int r, g, b;
} rgb;
void mandelbrot(int niterations, rgb **m)
{
int w = 600, h = 400, x, y, i;
// each iteration, it calculates: newz = oldz*oldz + p,
// where p is the current pixel, and oldz stars at the origin
double pr, pi; // real and imaginary part of the pixel p
double newRe, newIm, oldRe, oldIm; // real and imaginary parts of new and old z
double zoom = 1, moveX = -0.5, moveY = 0; // you can change these to zoom and change position
//loop through every pixel
#pragma acc kernels loop private(y,x,i,newRe,newIm,oldRe,oldIm)
for(y = 0; y < h; y++) {
for(x = 0; x < w; x++) {
// calculate the initial real and imaginary part of z,
// based on the pixel location and zoom and position values
pr = 1.5 * (x - w / 2) / (0.5 * zoom * w) + moveX;
pi = (y - h / 2) / (0.5 * zoom * h) + moveY;
newRe = newIm = oldRe = oldIm = 0; //these should start at 0,0
// i will represent the number of iterations
// start the iteration process
for(i = 0; i < niterations; i++) {
// remember value of previous iteration
oldRe = newRe;
oldIm = newIm;
// the actual iteration, the real and imaginary part are calculated
newRe = oldRe * oldRe - oldIm * oldIm + pr;
newIm = 2 * oldRe * oldIm + pi;
// if the point is outside the circle with radius 2: stop
if((newRe * newRe + newIm * newIm) > 4) break;
}
if(i == niterations)
{
m[y][x].r = 0;
m[y][x].g = 0;
m[y][x].b = 0;
}
else
{
// normalized iteration count method for proper coloring
double z = sqrt(newRe * newRe + newIm * newIm);
int brightness = 256. * log(1.75 + i - log(log(z))) / log((double)niterations);
m[y][x].r = brightness;
m[y][x].g = brightness;
m[y][x].b = 255;
}
}
}
}
int main(int argc, char *argv[])
{
int niterations, i, j;
if(argc != 2)
{
printf("Usage: %s <no_of_iterations> > output.ppm\n", argv[0]);
exit(1);
}
niterations = atoi(argv[1]);
rgb **m;
m = malloc(400 * sizeof(rgb *));
for(i = 0; i < 400; i++)
m[i] = malloc(600 * sizeof(rgb));
clock_t begin, end;
double time_spent;
begin = clock();
mandelbrot(niterations, m);
printf("P6\n# AUTHOR: Erkan Tairi\n");
printf("%d %d\n255\n",600,400);
for(i = 0; i < 400; i++) {
for(j = 0; j < 600; j++) {
fputc((char)m[i][j].r, stdout);
fputc((char)m[i][j].g, stdout);
fputc((char)m[i][j].b, stdout);
}
}
end = clock();
time_spent = (double)(end - begin) / CLOCKS_PER_SEC;
fprintf(stderr, "Elapsed time: %.2lf seconds.\n", time_spent);
return 0;
} |
|
|
| Back to top |
|
 |
mkcolg
Joined: 30 Jun 2004 Posts: 4996 Location: The Portland Group Inc.
|
Posted: Mon Apr 29, 2013 3:13 pm Post subject: |
|
|
Hi erkant,
Alternately, you can compile to single precision.
| Code: | % cat mandlebrot.c
// mandopenacc.c
// to compile: pgcc -acc -Minfo=accel -ta=nvidia,cc12 -o mandopenacc mandopenacc.c
// usage: ./mandopenacc <no_of_iterations> > output.ppm
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#ifdef _OPENACC
#include <openacc.h>
#endif
#ifdef SINGLE
#define REAL float
#define LOG logf
#define SQRT sqrtf
#else
#define REAL double
#define LOG log
#define SQRT sqrt
#endif
typedef struct {
int r, g, b;
} rgb;
void mandelbrot(int niterations, rgb **m)
{
int w = 600, h = 400, x, y, i;
// each iteration, it calculates: newz = oldz*oldz + p,
// where p is the current pixel, and oldz stars at the origin
REAL pr, pi; // real and imaginary part of the pixel p
REAL newRe, newIm, oldRe, oldIm; // real and imaginary parts of new and old z
REAL zoom = 1, moveX = -0.5, moveY = 0; // you can change these to zoom and change position
//loop through every pixel
#pragma acc kernels loop private(newRe,newIm,oldRe,oldIm)
for(y = 0; y < h; y++) {
for(x = 0; x < w; x++) {
// calculate the initial real and imaginary part of z,
// based on the pixel location and zoom and position values
pr = 1.5 * (x - w / 2) / (0.5 * zoom * w) + moveX;
pi = (y - h / 2) / (0.5 * zoom * h) + moveY;
newRe = newIm = oldRe = oldIm = 0; //these should start at 0,0
// i will represent the number of iterations
// start the iteration process
for(i = 0; i < niterations; i++) {
// remember value of previous iteration
oldRe = newRe;
oldIm = newIm;
// the actual iteration, the real and imaginary part are calculated
newRe = oldRe * oldRe - oldIm * oldIm + pr;
newIm = 2 * oldRe * oldIm + pi;
// if the point is outside the circle with radius 2: stop
if((newRe * newRe + newIm * newIm) > 4) break;
}
if(i == niterations)
{
m[y][x].r = 0;
m[y][x].g = 0;
m[y][x].b = 0;
}
else
{
// normalized iteration count method for proper coloring
REAL z = SQRT(newRe * newRe + newIm * newIm);
int brightness = 256. * LOG(1.75 + i - LOG(LOG(z))) / LOG((REAL)niterations);
m[y][x].r = brightness;
m[y][x].g = brightness;
m[y][x].b = 255;
}
}
}
}
int main(int argc, char *argv[])
{
int niterations, i, j;
if(argc != 2)
{
printf("Usage: %s <no_of_iterations> > output.ppm\n", argv[0]);
exit(1);
}
niterations = atoi(argv[1]);
rgb **m;
m = malloc(400 * sizeof(rgb *));
for(i = 0; i < 400; i++)
m[i] = malloc(600 * sizeof(rgb));
clock_t begin, end;
REAL time_spent;
begin = clock();
mandelbrot(niterations, m);
printf("P6\n# AUTHOR: Erkan Tairi\n");
printf("%d %d\n255\n",600,400);
for(i = 0; i < 400; i++) {
for(j = 0; j < 600; j++) {
fputc((char)m[i][j].r, stdout);
fputc((char)m[i][j].g, stdout);
fputc((char)m[i][j].b, stdout);
}
}
end = clock();
time_spent = (REAL)(end - begin) / CLOCKS_PER_SEC;
fprintf(stderr, "Elapsed time: %.2lf seconds.\n", time_spent);
return 0;
}
% pgcc mandlebrot.c -acc -Minfo=accel -V13.5 -ta=nvidia,4.2,cc12 -DSINGLE -Mfcon
mandelbrot:
38, Generating present_or_copyout(m[0:400][0:600])
Generating NVIDIA code
Generating compute capability 1.2 binary
39, Loop is parallelizable
40, Loop is parallelizable
Accelerator kernel generated
39, #pragma acc loop gang /* blockIdx.y */
40, #pragma acc loop gang, vector(128) /* blockIdx.x threadIdx.x */
48, Loop carried scalar dependence for 'newRe' at line 50
Scalar last value needed after loop for 'newRe' at line 68
Loop carried scalar dependence for 'newIm' at line 51
Scalar last value needed after loop for 'newIm' at line 68
Inner sequential loop scheduled on accelerator
57, Accelerator restriction: induction variable live-out from loop: i
% a.out 1024 > output.ppm
Elapsed time: 0.43 seconds.
|
- Mat |
|
| Back to top |
|
 |
|
|
You cannot post new topics in this forum You cannot reply to topics in this forum You cannot edit your posts in this forum You cannot delete your posts in this forum You cannot vote in polls in this forum
|
Powered by phpBB © 2001, 2002 phpBB Group
|