PGI User Forum
 SearchSearch   MemberlistMemberlist     RegisterRegister   ProfileProfile    Log inLog in 

CUDA-x86.

how to solve this compile error
Goto page 1, 2  Next
 
Post new topic   Reply to topic    PGI User Forum Forum Index -> Programming and Compiling
View previous topic :: View next topic  
Author Message
addison827



Joined: 02 Nov 2010
Posts: 12

PostPosted: Thu Feb 13, 2014 12:23 am    Post subject: how to solve this compile error Reply with quote

The following is my cuda code.


! Linerize Reynolds Equation by Newton's Method
! Air Foil Journal Bearing
! Domain partitioning method with OpenMP is used to parallize SOR method
! iterative methods for simple Reynolds eq.
module sor_rb_mod
use cudafor
contains
!-------------------------- SOR for GPU --------------------------------
attributes(device) subroutine sor_kernel(i,j,nx,ny,pdev,hdev,dx,dy,omega,pddev)
implicit none
real(kind=8),shared :: pdev(0:nx,0:ny),hdev(0:nx,0:ny), pddev(0:nx,0:ny)
real(kind=8):: a1,a2,a3,a4,a5,a6,b1,b2,b3,b4,b5,b6,h2,h3
real(kind=8), value:: omega,delta,dx,d2x,dy,d2y,dx2,dy2
integer, value :: i,j,nx, ny,ip,im,jp,jm,np
ip=i+1; im=i-1
if (i==nx) then; ip=1; end if
jp=j+1; jm=j-1
dx2=dx**2; dy2=dy**2; d2x=2.0d0*dx; d2y=2.0d0*dy
h2=hdev(i,j)**2; h3=hdev(i,j)**3
b1=(hdev(ip,j)-hdev(im,j))/d2x ! dh/dx
b2=(hdev(i,jp)-hdev(i,jm))/d2y ! dh/dy
b3=(pdev(ip,j)-pdev(im,j))/d2x ! dp/dx
b4=(pdev(i,jp)-pdev(i,jm))/d2y ! dp/dy
b5=(pdev(ip,j)+pdev(im,j)-2.d0*pdev(i,j))/dx2 ! dp2/dx2
b6=(pdev(i,jp)+pdev(i,jm)-2.d0*pdev(i,j))/dy2 ! dp2/dy2
a1=(pdev(i,j)*h3)/(dx2)+(2.0d0*h3*b3+3.0d0*h2*pdev(i,j)*b1)/d2x-(6.0d0*hdev(i,j))/d2x
a2=(pdev(i,j)*h3)/(dx2)-(2.0d0*h3*b3+3.0d0*h2*pdev(i,j)*b1)/d2x+(6.0d0*hdev(i,j))/d2x
a3=(pdev(i,j)*h3)/(dy2)+(2.0d0*h3*b4+3.0d0*h2*pdev(i,j)*b2)/d2y
a4=(pdev(i,j)*h3)/(dy2)-(2.0d0*h3*b4+3.0d0*h2*pdev(i,j)*b2)/d2y
a5=(-2.0d0*pdev(i,j)*h3)/dx2+h3*b5+3.0d0*h2*b1*b3-(2.0d0*pdev(i,j)*h3)/dy2+h3*b6+3.0d0*h2*b4*b2-6.0d0*b1
a6=pdev(i,j)*h3*(b5+b6)+h3*(b3**2+b4**2)+3.0d0*h2*pdev(i,j)*(b1*b3+b2*b4)-6.0d0*hdev(i,j)*b3-6.0d0*pdev(i,j)*b1
delta=-(a1*pddev(ip,j)+a2*pddev(im,j)+a3*pddev(i,jp)+a4*pddev(i,jm)+a6)/a5-pddev(i,j)
pddev(i,j)=pddev(i,j)+omega*delta
end subroutine sor_kernel
!-------------------------- Red block --------------------------------
attributes(global) subroutine red_kernel(nx,ny,pdev,hdev,dx,dy,omega,pddev)
implicit none
real(kind=8),shared :: pdev(0:nx,0:ny), hdev(0:nx,0:ny), pddev(0:nx,0:ny)
real(kind=8), value:: dx,dy,omega
integer, value :: nx, ny
integer :: i, j
i = (blockidx%x-1) * 16 + threadidx%x
j = (blockidx%y-1) * 16 + threadidx%y
if (mod((i+j),2)==0) then
call sor_kernel(i,j,nx,ny,pdev,hdev,dx,dy,omega,pddev)
end if
end subroutine red_kernel
!-------------------------- Black block -------------------------------
attributes(global) subroutine black_kernel(nx,ny,pdev,hdev,dx,dy,omega,pddev)
implicit none
real(kind=8),shared :: pdev(0:nx,0:ny), hdev(0:nx,0:ny), pddev(0:nx,0:ny)
real(kind=8), value:: dx,dy,omega
integer, value :: nx, ny
integer :: i, j
i = (blockidx%x-1) * 16 + threadidx%x
j = (blockidx%y-1) * 16 + threadidx%y
if (mod((i+j),2)==1) then
call sor_kernel(i,j,nx,ny,pdev,hdev,dx,dy,omega,pddev)
end if
end subroutine black_kernel
!-------------------------- Sor GPU ------------------------------------
subroutine sor2(p,eps)
use parameters
implicit none
real(kind=8), device, allocatable, dimension(:,:) :: pdev,hdev,pddev
type(dim3) :: dimGrid, dimBlock
integer :: i,j,r,iter,t3,t4,tr
real(kind=8), intent(in)::eps
real(kind=8), intent(inout)::p(0:nx,0:ny)
real(kind=8)::de(0:nx,0:ny),de_old(0:nx,0:ny),p_old(0:nx,0:ny),h(0:nx,0:ny)
real(kind=8)::h0,rjac,theta,tim
integer::iter1,iter2,nt,thread,i1,j1,k
dx=(diam*pi/real(nx))
dy=(b/real(ny))
rjac=(cos(pi/nx)+(dx/dy)**2*cos(pi/ny))/(1.0d0+(dx/dy)**2)
!-------------- film thickness -------------------------------
do i=0,nx ; theta=360.0/real(nx)*real(i)
do j=0,ny
h(i,j)=c*(1.0d0+eps*cos(pi/180.0d0*theta))
end do
end do
!---------------------------------------------------------------
h0=minval(h)
!-----------normalization-----------
dx=dx/b
dy=dy/b
h=h/h0
p=p*(h0**2)/(vis*b*u) ! ambient pressure, kg/m**2

!-------------------------------------
iter_in=0
iter2=0
p_norm=1.0d0
iter=0
allocate( pdev(0:nx,0:ny),hdev(0:nx,0:ny),pddev(0:nx,0:ny))
hdev=h
pdev=p
dimGrid = dim3( nx/16, ny/16, 1 )
dimBlock = dim3( 16, 16, 1 )
!------------------------ GPU --------------------------------------
call system_clock(t3,tr)
do while(p_norm > err2) ! outer-loop stopping criteria (pressure)
iter2=iter2+1
de=0.0d0
pddev=0.0d0
norm=1.0d0
p_old=p
pdev=p
iter1=0
do while (norm> err1 .and. iter1<5000) ! inner-loop stopping criteria (delta)
de_old=de
call red_kernel<<<dimGrid,dimBlock>>>(nx,ny,pdev,hdev,dx,dy,omega,pddev)
r = cudathreadsynchronize()
call black_kernel<<<dimGrid,dimBlock>>>(nx,ny,pdev,hdev,dx,dy,omega,pddev)
r = cudathreadsynchronize()
de=pddev
iter1=iter1+1
norm=sqrt(dx*dy*sum((de-de_old)**2))
end do
iter_in=iter_in+iter1
p=p+de
p(0,:)=p(nx,:)
p_norm=sqrt(dx*dy*sum((p-p_old)**2))
end do
r = cudathreadsynchronize()
!---------------------- End GPU ------------------------------------
p=p*(vis*b*u)/h0**2
dx=dx*b
dy=dy*b

deallocate( pdev, hdev,pddev)
end subroutine sor2
end module sor_rb_mod

!------------------------------------------------------------
module parameters
real(kind=8):: a1,a2,a3,a4,a5,a6,b1,b2,b3,b4,b5,b6
real(kind=8):: rpm,u,vis,diam,b,pi,c,err1,err2,p_norm,norm,omega,delta
real(kind=8):: h2,h3,dx,d2x,dy,d2y,dx2,dy2,tt1,tt2
real(kind=8):: pa
integer::nx,ny,ip,im,jp,jm,np,iter_in
end module parameters

!----------------Main Program-------------
program airbrg
use parameters
use sor_rb_mod
use cudafor
implicit none
integer::i,j,iter,t1,t2,tr
real(kind=8)::load,phi,eps,brg_num,pave,cpu1,cpu2,air_brg_num
real(kind=8),allocatable::p(:,:)
integer:: istat, num, numdevices,devnum
type(cudadeviceprop) :: prop
write (*,*) '--------------------- GPU Information ------------------------'
istat = cudaGetDeviceCount(numdevices)
istat = cudasetDevice(devnum)
do num = 0, numdevices-1
istat = cudaGetDeviceProperties(prop, num)
call printDeviceProperties(prop, num)
end do
pi=4.0d0*atan(1.0d0)
write(*,*) 'enter nx (e.g. 512)'
read(*,*) nx
write(*,*) 'enter omega (e.g. 1.95)'
read(*,*) omega
ny=nint(nx/pi)
if (mod(ny,2)/=0) then; ny=ny+1; end if
vis=179.0d-7 ! air viscosity (ns/m^2)
c=40.0d-6 ! clearance (m)
diam=50.0d-3 ! bearing diameter
b=50.0d-3 ! bearing width (m)
err1=1.0d-5 ! allowable errors (Inner iteration)
err2=1.0d-4 ! allowable errors (Outer iteration)
pa=101347.0d0 ! N/m^2
rpm=3.0d4 ! rpm
u=rpm*diam*pi/60.d0 ! bearing velocity
!omega=1.95

allocate(p(0:nx,0:ny))
p=pa

eps=0.8d0
write(*,*)'-----------------CPU Computing----------------'
call cpu_time(cpu1); call system_clock(t1,tr)
call sor(p,eps)
call cpu_time(cpu2); call system_clock(t2,tr)
tt1=cpu2-cpu1
call load_brg(p,load,phi)
pave=load/(diam*b)
air_brg_num=(diam/2.0d0/c)**2*eps*vis*rpm/pa
write(*,*)'air_brg_num=', air_brg_num

p=p/pa

!write(*,*)'brg_number=',brg_num
write(*,*)'Pmax=',maxval(p)
write(*,*)'iter_sor=',iter_in
write(*,*)'CPU_time=',cpu2-cpu1


open(1,file='p_cpu.dat')
do i=0,nx
write(1,'(1000(1x,f10.4))') (p(i,j), j=0,ny)
end do
close(1)
!---------------------GPU-----------------------------------
write(*,*)'-----------------GPU Computing----------------'
p=pa
call cpu_time(cpu1); call system_clock(t1,tr)
call sor2(p,eps)
call cpu_time(cpu2); call system_clock(t2,tr)
tt2=cpu2-cpu1
call load_brg(p,load,phi)
pave=load/(diam*b)
air_brg_num=(diam/2.0d0/c)**2*eps*vis*rpm/pa
write(*,*)'air_brg_num=', air_brg_num

p=p/pa

write(*,*)'Pmax=',maxval(p)
write(*,*)'iter_sor=',iter_in
write(*,*)'CPU_time=',cpu2-cpu1
write(*,*)'Speedup=',tt1/tt2
open(1,file='p_gpu.dat')
do i=0,nx
write(1,'(1000(1x,f10.4))') (p(i,j), j=0,ny)
end do
close(1)
stop;end program airbrg

!-------------------------------------
subroutine sor(p,eps)
use parameters
implicit none

real(kind=8), intent(in)::eps
real(kind=8), intent(inout)::p(0:nx,0:ny)
real(kind=8)::de(0:nx,0:ny),de_old(0:nx,0:ny),p_old(0:nx,0:ny),h(0:nx,0:ny)
real(kind=8)::h0,rjac,theta
integer::i,j,iter1,iter2,nt,thread,i1,j1,k
dx=(diam*pi/real(nx))
dy=(b/real(ny))
rjac=(cos(pi/nx)+(dx/dy)**2*cos(pi/ny))/(1.0d0+(dx/dy)**2)
!-------------- film thickness -------------------------------
do i=0,nx ; theta=360.0/real(nx)*real(i)
do j=0,ny
h(i,j)=c*(1.0d0+eps*cos(pi/180.0d0*theta))
end do
end do
!---------------------------------------------------------------
h0=minval(h)
!-----------normalization-----------
dx=dx/b
dy=dy/b
h=h/h0
p=p*(h0**2)/(vis*b*u) ! ambient pressure, kg/m**2
dx2=dx**2; dy2=dy**2; d2x=2.0d0*dx; d2y=2.0d0*dy
!-------------------------------------
iter_in=0
iter2=0
p_norm=1.0d0
do while(p_norm > err2) ! outer-loop stopping criteria (pressure)

iter2=iter2+1
de=0.0d0
norm=1.0d0
p_old=p
iter1=0

do while (norm> err1 .and. iter1<5000) ! inner-loop stopping criteria (delta)
de_old=de
do i1=1,2
do j1=1,2
do i=i1,nx,2; ip=i+1; im=i-1
if (i==nx) then; ip=1; end if
do j=j1,ny-1,2; jp=j+1; jm=j-1
h2=h(i,j)**2; h3=h(i,j)**3
b1=(h(ip,j)-h(im,j))/d2x ! dh/dx
b2=(h(i,jp)-h(i,jm))/d2y ! dh/dy
b3=(p(ip,j)-p(im,j))/d2x ! dp/dx
b4=(p(i,jp)-p(i,jm))/d2y ! dp/dy
b5=(p(ip,j)+p(im,j)-2.d0*p(i,j))/dx2 ! dp2/dx2
b6=(p(i,jp)+p(i,jm)-2.d0*p(i,j))/dy2 ! dp2/dy2
a1=(p(i,j)*h3)/(dx2)+(2.0d0*h3*b3+3.0d0*h2*p(i,j)*b1)/d2x-(6.0d0*h(i,j))/d2x
a2=(p(i,j)*h3)/(dx2)-(2.0d0*h3*b3+3.0d0*h2*p(i,j)*b1)/d2x+(6.0d0*h(i,j))/d2x
a3=(p(i,j)*h3)/(dy2)+(2.0d0*h3*b4+3.0d0*h2*p(i,j)*b2)/d2y
a4=(p(i,j)*h3)/(dy2)-(2.0d0*h3*b4+3.0d0*h2*p(i,j)*b2)/d2y
a5=(-2.0d0*p(i,j)*h3)/dx2+h3*b5+3.0d0*h2*b1*b3-(2.0d0*p(i,j)*h3)/dy2+h3*b6+3.0d0*h2*b4*b2-6.0d0*b1
a6=p(i,j)*h3*(b5+b6)+h3*(b3**2+b4**2)+3.0d0*h2*p(i,j)*(b1*b3+b2*b4)-6.0d0*h(i,j)*b3-6.0d0*p(i,j)*b1
delta=-(a1*de(ip,j)+a2*de(im,j)+a3*de(i,jp)+a4*de(i,jm)+a6)/a5-de(i,j)
de(i,j)=de(i,j)+omega*delta
end do
end do
end do
end do

iter1=iter1+1
norm=sqrt(dx*dy*sum((de-de_old)**2))
end do
iter_in=iter_in+iter1
p=p+de
p(0,:)=p(nx,:)
p_norm=sqrt(dx*dy*sum((p-p_old)**2))
end do

p=p*(vis*b*u)/h0**2
dx=dx*b
dy=dy*b

return;end subroutine sor

!---------------------------
!-------------------------------------
subroutine sor1(p,eps)
use parameters
implicit none

real(kind=8), intent(in)::eps
real(kind=8), intent(inout)::p(0:nx,0:ny)
real(kind=8)::de(0:nx,0:ny),de_old(0:nx,0:ny),p_old(0:nx,0:ny),h(0:nx,0:ny)
real(kind=8)::h0,rjac,theta
integer::i,j,iter1,iter2,nt,thread,i1,j1,k
dx=(diam*pi/real(nx))
dy=(b/real(ny))
rjac=(cos(pi/nx)+(dx/dy)**2*cos(pi/ny))/(1.0d0+(dx/dy)**2)
!-------------- film thickness -------------------------------
do i=0,nx ; theta=360.0/real(nx)*real(i)
do j=0,ny
h(i,j)=c*(1.0d0+eps*cos(pi/180.0d0*theta))
end do
end do
!---------------------------------------------------------------
h0=minval(h)
!-----------normalization-----------
dx=dx/b
dy=dy/b
h=h/h0
p=p*(h0**2)/(vis*b*u) ! ambient pressure, kg/m**2
dx2=dx**2; dy2=dy**2; d2x=2.0d0*dx; d2y=2.0d0*dy
!-------------------------------------
iter_in=0
iter2=0
p_norm=1.0d0
do while(p_norm > err2) ! outer-loop stopping criteria (pressure)

iter2=iter2+1
de=0.0d0
norm=1.0d0
p_old=p
iter1=0

do while (norm> err1 .and. iter1<5000) ! inner-loop stopping criteria (delta)
de_old=de
do i=1,nx-1; ip=i+1; im=i-1
if (i==nx) then; ip=1; end if
do j=1,ny-1; jp=j+1; jm=j-1
if (mod((i+j),2)==0) then
h2=h(i,j)**2; h3=h(i,j)**3
b1=(h(ip,j)-h(im,j))/d2x ! dh/dx
b2=(h(i,jp)-h(i,jm))/d2y ! dh/dy
b3=(p(ip,j)-p(im,j))/d2x ! dp/dx
b4=(p(i,jp)-p(i,jm))/d2y ! dp/dy
b5=(p(ip,j)+p(im,j)-2.d0*p(i,j))/dx2 ! dp2/dx2
b6=(p(i,jp)+p(i,jm)-2.d0*p(i,j))/dy2 ! dp2/dy2
a1=(p(i,j)*h3)/(dx2)+(2.0d0*h3*b3+3.0d0*h2*p(i,j)*b1)/d2x-(6.0d0*h(i,j))/d2x
a2=(p(i,j)*h3)/(dx2)-(2.0d0*h3*b3+3.0d0*h2*p(i,j)*b1)/d2x+(6.0d0*h(i,j))/d2x
a3=(p(i,j)*h3)/(dy2)+(2.0d0*h3*b4+3.0d0*h2*p(i,j)*b2)/d2y
a4=(p(i,j)*h3)/(dy2)-(2.0d0*h3*b4+3.0d0*h2*p(i,j)*b2)/d2y
a5=(-2.0d0*p(i,j)*h3)/dx2+h3*b5+3.0d0*h2*b1*b3-(2.0d0*p(i,j)*h3)/dy2+h3*b6+3.0d0*h2*b4*b2-6.0d0*b1
a6=p(i,j)*h3*(b5+b6)+h3*(b3**2+b4**2)+3.0d0*h2*p(i,j)*(b1*b3+b2*b4)-6.0d0*h(i,j)*b3-6.0d0*p(i,j)*b1
delta=-(a1*de(ip,j)+a2*de(im,j)+a3*de(i,jp)+a4*de(i,jm)+a6)/a5-de(i,j)
de(i,j)=de(i,j)+omega*delta
end if
end do
end do
do i=1,nx-1; ip=i+1; im=i-1
if (i==nx) then; ip=1; end if
do j=1,ny-1; jp=j+1; jm=j-1
if (mod((i+j),2)==1) then
h2=h(i,j)**2; h3=h(i,j)**3
b1=(h(ip,j)-h(im,j))/d2x ! dh/dx
b2=(h(i,jp)-h(i,jm))/d2y ! dh/dy
b3=(p(ip,j)-p(im,j))/d2x ! dp/dx
b4=(p(i,jp)-p(i,jm))/d2y ! dp/dy
b5=(p(ip,j)+p(im,j)-2.d0*p(i,j))/dx2 ! dp2/dx2
b6=(p(i,jp)+p(i,jm)-2.d0*p(i,j))/dy2 ! dp2/dy2
a1=(p(i,j)*h3)/(dx2)+(2.0d0*h3*b3+3.0d0*h2*p(i,j)*b1)/d2x-(6.0d0*h(i,j))/d2x
a2=(p(i,j)*h3)/(dx2)-(2.0d0*h3*b3+3.0d0*h2*p(i,j)*b1)/d2x+(6.0d0*h(i,j))/d2x
a3=(p(i,j)*h3)/(dy2)+(2.0d0*h3*b4+3.0d0*h2*p(i,j)*b2)/d2y
a4=(p(i,j)*h3)/(dy2)-(2.0d0*h3*b4+3.0d0*h2*p(i,j)*b2)/d2y
a5=(-2.0d0*p(i,j)*h3)/dx2+h3*b5+3.0d0*h2*b1*b3-(2.0d0*p(i,j)*h3)/dy2+h3*b6+3.0d0*h2*b4*b2-6.0d0*b1
a6=p(i,j)*h3*(b5+b6)+h3*(b3**2+b4**2)+3.0d0*h2*p(i,j)*(b1*b3+b2*b4)-6.0d0*h(i,j)*b3-6.0d0*p(i,j)*b1
delta=-(a1*de(ip,j)+a2*de(im,j)+a3*de(i,jp)+a4*de(i,jm)+a6)/a5-de(i,j)
de(i,j)=de(i,j)+omega*delta
end if
end do
end do

iter1=iter1+1
norm=sqrt(dx*dy*sum((de-de_old)**2))
end do
iter_in=iter_in+iter1
p=p+de
p(0,:)=p(nx,:)
p_norm=sqrt(dx*dy*sum((p-p_old)**2))
end do

p=p*(vis*b*u)/h0**2
dx=dx*b
dy=dy*b

return;end subroutine sor1

!---------------------------
subroutine load_brg(p,load,phi) ! get the horizontal/vertical forces (fx,fy)
use parameters
implicit none
real(kind=8), intent(in):: p(0:nx,0:ny)
real(kind=8), intent(out):: load,phi
real(kind=8):: theta,px(0:nx,0:ny),py(0:nx,0:ny),fx,fy
integer:: i,j

do j=0,ny
do i=0,nx
theta=real(i)*360.0d0/real(nx)
px(i,j)=p(i,j)*sin(pi/180.0d0*theta)
py(i,j)=-p(i,j)*cos(pi/180.0d0*theta)
end do
end do

call simps(px,fx)
call simps(py,fy)

load=sqrt(fx**2+fy**2)
phi=atan(fx/fy)*180.0d0/pi

return; end subroutine load_brg
!----------------------
subroutine simps(pn,fn) ! Simpson's integration formula
use parameters
implicit none
real(kind=8), intent(in):: pn(0:nx,0:ny)
real(kind=8), intent(out):: fn
real(kind=8):: f1,f2,s1(0:ny),sx,sy
integer:: i,j

sx=dx/3.0d0
sy=dy/3.0d0

do j=0,ny
f1=0.0d0;f2=0.0d0

do i=1,nx-1,2; f1=f1+pn(i,j); end do
do i=2,nx-1,2; f2=f2+pn(i,j); end do

s1(j)=sx*(pn(0,j)+pn(nx,j)+4.0d0*f1+2.0d0*f2)
end do

f1=0.0d0
f2=0.0d0

do j=1,ny-1,2; f1=f1+s1(j); end do
do j=2,ny-1,2; f2=f2+s1(j); end do

fn=sy*(s1(0)+s1(ny)+4.0d0*f1+2.0d0*f2)

return; end subroutine simps
!-----------------------------------------------
subroutine printDeviceProperties(prop, num)
use cudafor
integer:: np
type(cudadeviceprop) :: prop
integer num
ilen = verify(prop%name, ' ', .true.)
write (*,900) "Device Number: " ,num
write (*,901) "Device Name: " ,prop%name(1:ilen)
write (*,904) "Compute Capability Revision: ",prop%major,prop%minor

write (*,900) "multiProcessorCount: ",prop%multiProcessorCount
if (prop%major<2) then
write (*,905) "Total Cores: ",prop%multiProcessorCount, 8
else
write (*,905) "Total Cores: ",prop%multiProcessorCount, 32
end if
write (*,902) "Total Global Memory: ",real(prop%totalGlobalMem)/1e9," Gbytes"
write (*,900) "maxThreadsPerBlock: " ,prop%maxThreadsPerBlock
write (*,903) "maxThreadsDim: " ,prop%maxThreadsDim
write (*,903) "maxGridSize: " ,prop%maxGridSize
write (*,902) "ClockRate: " ,real(prop%clockRate)/1e6," GHz"
900 format (a,i0)
901 format (a,a)
902 format (a,f5.3,a)
903 format (a,2(i0,1x,'x',1x),i0)
904 format (a,i0,'.',i0)
905 format (a,i0,'x',i0)

return
end





I compiled it by using "pgfortran -ta=nvidia -Minfo=accel -fast hsbgpu1.cuf". However, an error was shown as following:


"pgfortran7fheBbv25QgJ8M.obj : error LNK2019: unresolved external symbol _SOR_RB_
MOD_SOR_KERNEL@0 referenced in function _SOR_RB_MOD_RED_KERNEL@44
abrg_gpudp1.exe : fatal error LNK1120: 1 unresolved externals"


How can I solve this problem? Thanks a lot.
Back to top
View user's profile
mkcolg



Joined: 30 Jun 2004
Posts: 6134
Location: The Portland Group Inc.

PostPosted: Thu Feb 13, 2014 9:03 am    Post subject: Reply with quote

Hi addison827,

Quote:
_SOR_RB_ MOD_SOR_KERNEL@0 referenced in function _SOR_RB_MOD_RED_KERNEL@44
Given the symbol names, I'm assuming you are using 32-bit Windows. The "@" followed by a number represents the size in bytes of the arguments. Hence, the linker is complaining the call to "sor_kernel", without arguments, is missing. Though given there is no such call and that "device" routines should get inlined, it's unclear why this symbol is here.

Can you post the source to the "parameter" module so I can try to recreate the issue? Also, which version of the compiler are you using?

Thanks,
Mat
Back to top
View user's profile
addison827



Joined: 02 Nov 2010
Posts: 12

PostPosted: Sun Feb 16, 2014 8:16 pm    Post subject: Reply with quote

Hi Mat,
Thanks for your reply. The PGI compiler I used is "PGI Workstation 11.1", and it's 32-bit. You can download the parameter module via this link: https://docs.google.com/file/d/0B3GZ-geiDlBteElIWF9aam1kT3M/edit

Thanks again.
Back to top
View user's profile
mecej4



Joined: 19 Jun 2011
Posts: 55

PostPosted: Mon Feb 17, 2014 12:12 pm    Post subject: Reply with quote

addison827 wrote:
Hi Mat,
Thanks for your reply. The PGI compiler I used is "PGI Workstation 11.1", and it's 32-bit. You can download the parameter module via this link: https://docs.google.com/file/d/0B3GZ-geiDlBteElIWF9aam1kT3M/edit

Thanks again.

With a small change, your code compiled and ran fine with 11.8 32-bit under Windows 8.1 on a laptop with a GeForce GT 555M GPU. The change is to move up the source for the PARAMETERS module to a location prior to the first instance of a USE statement for the module.

In general, the source of a module should be processed by the compiler before the compiler attempts to process other sources that use that module.
Back to top
View user's profile
addison827



Joined: 02 Nov 2010
Posts: 12

PostPosted: Mon Feb 17, 2014 7:15 pm    Post subject: Reply with quote

Thanks for your reply, I already move the parameter mod. to the top of my code. However, I think the main problem about the error "unresolved external symbol" is still there. I have to solve this problem.
Back to top
View user's profile
Display posts from previous:   
Post new topic   Reply to topic    PGI User Forum Forum Index -> Programming and Compiling All times are GMT - 7 Hours
Goto page 1, 2  Next
Page 1 of 2

 
Jump to:  
You cannot post new topics in this forum
You cannot reply to topics in this forum
You cannot edit your posts in this forum
You cannot delete your posts in this forum
You cannot vote in polls in this forum


Powered by phpBB © phpBB Group