PGI User Forum
 SearchSearch   MemberlistMemberlist     RegisterRegister   ProfileProfile    Log inLog in 

Free OpenACC Webinar

Openmp compile by using PGI Fortran and Intel Fortran
Goto page 1, 2  Next
 
Post new topic   Reply to topic    PGI User Forum Forum Index -> Programming and Compiling
View previous topic :: View next topic  
Author Message
Chia-WenChan52986



Joined: 25 Feb 2014
Posts: 11

PostPosted: Tue Mar 18, 2014 11:28 pm    Post subject: Openmp compile by using PGI Fortran and Intel Fortran Reply with quote

I used the Intel and PGI Fortran to compile the same code as following. However, the results calculated by using the PGI Fortran are wrong. Is anyone can't help me to find out the problem? Or it's the limitation of PGI Fortan? Thanks a lot.

Code:

!------------------------------------------------------------
module parameters
  real(kind=8):: a1,a2,a3,a4,a5,a6,b1,b2,b3,b4,b5,b6
  real(kind=8):: rpm,u,vis,diam,b,pi,c,err1,err2,p_norm,norm,omega,delta
  real(kind=8):: h2,h3,dx,d2x,dy,d2y,dx2,dy2,lr,br
  real(kind=8):: pa,prr
  integer:: x1,x2,x3,x4,x5,x6,x7,x8,y1,y2    ! recess coord.
  integer:: n_x,n_y,n_x1,n_x2                  ! recess coord. parameters
  real(kind=8)::alpha,dxr,dyr                   ! recess coord. parameters
  integer::nx,ny,ip,im,jp,jm,iter_in
end module parameters
!------------------------------------------------------------
Program pso_abrg      ! Particle Swarm Algorithm
implicit none
!A program for hydrodynmic lubrication recessed slider bearing optimization by using  PSO
! Nv: No. of variables
! Np: Population size (number of particles)
! v: pseudovelocity
! c1: Cognitive trust parameter
! c2: Social trust parameter
! w0: Inertia
! wd:Inertia Reduction parameter
! k: Bound on velocity fraction
! vd: Velocity reduction parameter
! d: Dynamic inertia/velocity reduction delay (function evaluations)
! r1,r2: Random factors in the [0,1] interval
! f: Function value
! ik: iteration times
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
integer::i,j,Nv,Np,d,ik,ikmax,it,ic,n,icmax,tpso1,tpso2,tpsor,thn
real(kind=8)::c1,c2,w0,wd,k,vd,g_f,g_f_old
real(kind=8), allocatable::x_UB(:),x_LB(:),v_x_max(:),g_x(:),g_x_new(:),r1(:),r2(:)
real(kind=8), allocatable::v_x(:,:),x(:,:),x_best(:,:),f(:),f_best(:),p_x(:,:)
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!$ integer:: omp_get_num_threads
!$ integer:: omp_get_num_procs
!$ thn=omp_get_num_procs()
!$ write(*,*) "The number of available processors/threads in the system: ",thn
thn=1    ! when OpenMP is not used
!$ write(*,*) "Enter the number of threads"
!$ read(*,*) thn
!$ call omp_set_num_threads(thn)             ! set the number of threads
!write(*,*)'Np=  (e.g. 100)'
!read(*,*)Np
!write(*,*)'icmax=  (e.g. 20)'
!read(*,*)icmax
Np=60
icmax=60
Nv=3

c1=1.0d0  !2.0d0
c2=1.0d0  !2.0d0
w0=0.5d0
wd=0.05d0
k=0.2d0    !*******************
vd=0.05d0
d=5
ikmax=20 !****************************

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
allocate(x_UB(1:Nv),x_LB(1:Nv),g_x(1:Nv),g_x_new(1:Nv),v_x(1:Nv,1:Np),v_x_max(1:Nv))
allocate(x(1:Nv,1:Np),x_best(1:Nv,1:Np),p_x(1:Nv,1:Np))
allocate(f(1:Np),f_best(1:Np),r1(1:Np),r2(1:Np))
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!


g_f=0.0d0

x_UB(1)=60.0d-6
x_LB(1)=40.0d-6
x_UB(2)=30.0d-3
x_LB(2)=10.0d-3
x_UB(3)=60.0d-3
x_LB(3)=40.0d-3
do i=1,Nv
   v_x_max(i)=k*(x_UB(i)-x_LB(i))
end do
ik=0
it=0
ic=0
do i=1,5
call random_number(x)
call random_number(v_x)
end do
g_f=1000000
do i=1,Nv
    x(i,:)=x(i,:)*(x_UB(i)-x_LB(i))+x_LB(i)
end do

do i=1,Nv
     v_x(i,:)=v_x(i,:)*v_x_max(i)
end do
call obfun(Nv,Np,x,f,thn)
f_best=f
x_best=x
p_x=x_best

do j=1,Np
     if (f_best(j)<=g_f) then
         g_x(:)=x_best(:,j)
         g_f=f_best(j)
     end if
end do

open(1,file='track_x.dat')
open(2,file='d_f.dat')
call system_clock(tpso1,tpsor)
do n=1,ikmax
     g_f_old=g_f
     write(2,'(1000(1x,f20.4))') g_x,-g_f
     ik=ik+1
     do i=1,Nv
          call random_number(r1)
          call random_number(r2)
          v_x(i,:)=w0*v_x(i,:)+c1*r1(:)*(p_x(i,:)-x(i,:))+c2*r2(:)*(g_x(i)-x(i,:))
     end do
     do i=1,Nv
         do j=1,Np
              if (v_x(i,j)>v_x_max(i)) then
                  v_x(i,j)=v_x_max(i)
              end if   
              x(i,j)=x(i,j)+v_x(i,j)
               if (x(i,j)<=x_LB(i)) then; x(i,j)=x_LB(i); end if
               if (x(i,j)>=x_UB(i)) then; x(i,j)=x_UB(i); end if
         end do
     end do
     write(1,'(1000(1x,f10.4))') x
     call obfun(Nv,Np,x,f,thn)
     do j=1,Np
          if (f(j)<=f_best(j)) then
              f_best(j)=f(j)
              x_best(:,j)=x(:,j)
              p_x(:,j)=x_best(:,j)
          end if
     end do
     do j=1,Np
          if (f_best(j)<=g_f) then
              g_f=f_best(j)
              g_x_new(:)=x_best(:,j)
          end if
     end do
     if (abs((g_f-g_f_old)/g_f)>1d-5) then
         it=0
     end if
     it=it+1
     g_x=g_x_new
         !write(*,*) 'g_x: ',g_x
     if (it==d) then
         w0=w0*(1.0d0-wd)
         do i=1,Nv
             v_x_max(i)=v_x_max(i)*(1.0d0-vd)
         end do
         it=0
         ic=ic+1
     end if
     if (ic==icmax) exit
     write(*,*) 'ik/g_x/g_f: ',  ik, g_x, g_f
end do
call system_clock(tpso2,tpsor)
close(1)
close(2)
write(*,*)'elapsed_time (s)=',real(tpso2-tpso1)/real(tpsor)
write(*,*)'g_x, g_f=',g_x, g_f
write(*,*)'ik=',ik

stop
end program pso_abrg

!------------------------------------------------------
subroutine obfun(Nv,Np,x,f,thn)
use parameters
implicit none
integer,intent(in)::Nv,Np,thn
real(kind=8), intent(in)::x(1:Nv,1:Np)
real(kind=8), intent(out)::f(1:Np)
integer:: psoi
real(kind=8)::op,xv(Nv,1)

!$omp parallel do
    do psoi=1,Np
        xv(:,1)=x(:,psoi)
        call airbear(Nv,xv,op)
        f(psoi)=-op
        write(*,*) 'pi,x,f:  ', psoi,x(:,psoi),f(psoi)
    end do
!$omp end parallel do     
return
end subroutine obfun
!------------------------------------------------------
subroutine airbear(Nv,xv,op)
use parameters
implicit none
integer,intent(in)::Nv
real(kind=8),intent(in)::xv(Nv,1)
real(kind=8), intent(out)::op
real(kind=8),allocatable::p(:,:)
real(kind=8)::eps,load,phi
        c=xv(1,1)
        lr=xv(2,1)
        diam=xv(3,1)
        call constant
        call recess_coord                   ! define the recess coordinates
        allocate(p(0:nx,0:ny))
        p=pa
        eps=0.7d0
        call sor(p,eps)
        call load_brg(p,load,phi)
        !pave=load/(diam*b)
        op=load
end subroutine airbear
!------------------------------------------------------
subroutine constant
use parameters
implicit none

pi=4.0d0*atan(1.0d0)
nx=256
ny=nint(nx/pi)
if (mod(ny,2)/=0) then; ny=ny+1; end if

vis=179.0d-7                    ! air viscosity (ns/m^2)
!c=50.0d-6                       ! clearance (m)
!diam=50.0d-3                 ! bearing diameter
b=50.0d-3                       ! bearing  width (m)
err1=1.0d-5                      ! allowable errors (Inner iteration)
err2=1.0d-4                     ! allowable errors (Outer iteration)
pa=101347.0d0                ! atmosphere pressure (N/m^2)
rpm=6.0d4                     ! rotation speed (rpm)
u=rpm*diam*pi/60.d0  ! bearing velocity
omega=1.6                      ! relaxation no.
alpha=50.0d0                 ! bearing orientation angle (0<alpha<90, deg)
dxr=pi*diam/real(nx)    ! x-grid size
dyr=b/real(ny)               ! y-grid size
!lr=10d-3                          ! recess length (m)
br=40.0d-3                     ! recess width (m)
prr=4.5                           ! recess pressure (atm)
return;end subroutine constant
!------------------------------------------------------
subroutine sor(p,eps)
use parameters
implicit none

real(kind=8), intent(in)::eps
real(kind=8), intent(inout)::p(0:nx,0:ny)
real(kind=8)::de(0:nx,0:ny),de_old(0:nx,0:ny),p_old(0:nx,0:ny),h(0:nx,0:ny)
real(kind=8)::h0,rjac,theta
integer::i,j,iter1,iter2,nt,thread,i1,j1,k

dx=(diam*pi/real(nx))
dy=(b/real(ny))
rjac=(cos(pi/nx)+(dx/dy)**2*cos(pi/ny))/(1.0d0+(dx/dy)**2)
!-------------- film thickness -------------------------------
do i=0,nx ; theta=360.0/real(nx)*real(i)
   do j=0,ny
      h(i,j)=c*(1.0d0+eps*cos(pi/180.0d0*theta))
   end do
end do
!---------------------------------------------------------------
h0=minval(h)
!-----------normalization-----------
dx=dx/b
dy=dy/b
h=h/h0
p=p*(h0**2)/(vis*b*u)   ! ambient pressure, kg/m**2
dx2=dx**2; dy2=dy**2; d2x=2.0d0*dx; d2y=2.0d0*dy
!-------------------------------------
iter_in=0
iter2=0
p_norm=1.0d0
do while(p_norm > err2 .and. iter2<200)       ! outer-loop stopping criteria (pressure)
   iter2=iter2+1
   de=0.0d0
   norm=1.0d0
   p_old=p
   iter1=0
   do i=1, nx
        do j=1,ny
            if (j>=y1 .and. j<=y2) then
                if (x1>x2) then
                    if (i>=x1) p(i,j)=0
                        if (i<=x2) p(i,j)=0
                    else
                        if (i>=x1 .and. i<=x2) p(i,j)=0
                    end if
                    if (i>=x3 .and. i<=x4) p(i,j)=0
                    if (i>=x5 .and. i<=x6) p(i,j)=0
                    if (x8<x7) then
                        if (i>=x7) p(i,j)=0
                        if (i<=x8) p(i,j)=0
                    else
                    if (i>=x7 .and. i<=x8) p(i,j)=0
                end if
            end if
        end do
   end do

   do while (norm> err1 .and. iter1<5000)        ! inner-loop stopping criteria  (delta)
      de_old=de
      do i1=1,2
         do j1=1,2
            do i=i1,nx,2; ip=i+1; im=i-1
              if (i==nx) then; ip=1; end if
                 do j=j1,ny-1,2; jp=j+1; jm=j-1
                    h2=h(i,j)**2; h3=h(i,j)**3
                    b1=(h(ip,j)-h(im,j))/d2x      ! dh/dx
                    b2=(h(i,jp)-h(i,jm))/d2y      ! dh/dy
                    b3=(p(ip,j)-p(im,j))/d2x      ! dp/dx
                    b4=(p(i,jp)-p(i,jm))/d2y      ! dp/dy
                    b5=(p(ip,j)+p(im,j)-2.d0*p(i,j))/dx2      ! dp2/dx2
                    b6=(p(i,jp)+p(i,jm)-2.d0*p(i,j))/dy2      ! dp2/dy2
                    a1=(p(i,j)*h3)/(dx2)+(2.0d0*h3*b3+3.0d0*h2*p(i,j)*b1)/d2x-(6.0d0*h(i,j))/d2x
                    a2=(p(i,j)*h3)/(dx2)-(2.0d0*h3*b3+3.0d0*h2*p(i,j)*b1)/d2x+(6.0d0*h(i,j))/d2x
                    a3=(p(i,j)*h3)/(dy2)+(2.0d0*h3*b4+3.0d0*h2*p(i,j)*b2)/d2y
                    a4=(p(i,j)*h3)/(dy2)-(2.0d0*h3*b4+3.0d0*h2*p(i,j)*b2)/d2y
                    a5=(-2.0d0*p(i,j)*h3)/dx2+h3*b5+3.0d0*h2*b1*b3-(2.0d0*p(i,j)*h3)/dy2+h3*b6+3.0d0*h2*b4*b2-6.0d0*b1
                    a6=p(i,j)*h3*(b5+b6)+h3*(b3**2+b4**2)+3.0d0*h2*p(i,j)*(b1*b3+b2*b4)-6.0d0*h(i,j)*b3-6.0d0*p(i,j)*b1
                    delta=-(a1*de(ip,j)+a2*de(im,j)+a3*de(i,jp)+a4*de(i,jm)+a6)/a5-de(i,j)
                    de(i,j)=de(i,j)+omega*delta
                    if (j>=y1 .and. j<=y2) then
                        if (x1>x2) then
                           if (i>=x1) de(i,j)=prr*pa*(h0**2)/(vis*b*u)
                           if (i<=x2) de(i,j)=prr*pa*(h0**2)/(vis*b*u)
                        else
                           if (i>=x1 .and. i<=x2) de(i,j)=prr*pa*(h0**2)/(vis*b*u)
                        end if
                        if (i>=x3 .and. i<=x4) de(i,j)=prr*pa*(h0**2)/(vis*b*u)
                        if (i>=x5 .and. i<=x6) de(i,j)=prr*pa*(h0**2)/(vis*b*u)
                        if (x8<x7) then
                           if (i>=x7) de(i,j)=prr*pa*(h0**2)/(vis*b*u)
                           if (i<=x8) de(i,j)=prr*pa*(h0**2)/(vis*b*u)
                        else
                           if (i>=x7 .and. i<=x8) de(i,j)=prr*pa*(h0**2)/(vis*b*u)
                        end if
                    end if
                 end do
              end do
         end do
      end do
   iter1=iter1+1
   norm=sqrt(dx*dy*sum((de-de_old)**2))
   end do
   iter_in=iter_in+iter1
   p=p+de
   p(0,:)=p(nx,:)
   p_norm=sqrt(dx*dy*sum((p-p_old)**2))
end do

p=p*(vis*b*u)/h0**2
dx=dx*b
dy=dy*b

return;end subroutine sor
!---------------------------
subroutine load_brg(p,load,phi)  ! get the horizontal/vertical forces (fx,fy)
use parameters
implicit none
real(kind=8), intent(in):: p(0:nx,0:ny)
real(kind=8), intent(out):: load,phi
real(kind=8):: theta,px(0:nx,0:ny),py(0:nx,0:ny),fx,fy
integer:: i,j

do j=0,ny
   do i=0,nx
      theta=real(i)*360.0d0/real(nx)
      px(i,j)=p(i,j)*sin(pi/180.0d0*theta)
      py(i,j)=-p(i,j)*cos(pi/180.0d0*theta)
   end do
end do

call simps(px,fx)
call simps(py,fy)

load=sqrt(fx**2+fy**2)
phi=atan(fx/fy)*180.0d0/pi

return; end subroutine load_brg
!----------------------
subroutine simps(pn,fn) ! Simpson's integration formula
use parameters
implicit none
real(kind=8), intent(in):: pn(0:nx,0:ny)
real(kind=8), intent(out):: fn
real(kind=8):: f1,f2,s1(0:ny),sx,sy
integer:: i,j

sx=dx/3.0d0
sy=dy/3.0d0

do j=0,ny
   f1=0.0d0;f2=0.0d0

   do i=1,nx-1,2; f1=f1+pn(i,j); end do
   do i=2,nx-1,2; f2=f2+pn(i,j); end do

   s1(j)=sx*(pn(0,j)+pn(nx,j)+4.0d0*f1+2.0d0*f2)
end do
f1=0.0d0
f2=0.0d0
do j=1,ny-1,2; f1=f1+s1(j); end do
do j=2,ny-1,2; f2=f2+s1(j); end do
fn=sy*(s1(0)+s1(ny)+4.0d0*f1+2.0d0*f2)
return; end subroutine simps
!-----------------------------------------------
subroutine recess_coord

! calculate the locations of the four recesses
! alpha: the orientation angle of the hybrid journal bearing

use parameters
implicit none

n_x=nint(lr/2.0d0/dxr)
n_x1=nint(alpha*real(nx)/360.0d0)
n_x2=nint(90.0d0*real(nx)/360.0d0)

x1=n_x1-n_x
x3=x1+n_x2

if (x1 < 0) then
   x1=n_x1-n_x+nx
   x3=x1+n_x2-nx
end if

x2=n_x1+n_x
x4=x2+n_x2
x5=x3+n_x2
x6=x4+n_x2
x7=x5+n_x2
x8=x6+n_x2

if (x8 > nx) then
   x8=x6+n_x2-nx
end if

n_y=nint(br/2.0d0/dyr)
y1=ny/2-n_y
y2=ny/2+n_y

return; end subroutine recess_coord


 
Back to top
View user's profile
mkcolg



Joined: 30 Jun 2004
Posts: 6206
Location: The Portland Group Inc.

PostPosted: Wed Mar 19, 2014 12:49 pm    Post subject: Reply with quote

Hi Chia-WenChan52986,

If I'm reading your code correctly, x and v_x are initialized to random values. Since you are not setting the random seed, the same random set of initial values are used. However, when moving compilers, a different set of values will be created since a different default seed and possibly a different random number generating algorithm is used. Both the PGI and Intel results are correct based on the initial values given.

If you wish to have portable results, you will need to either use a fixed set of initial values or use your own random generator to produce the same set of values.

Hope this helps,
Mat
Back to top
View user's profile
Chia-WenChan52986



Joined: 25 Feb 2014
Posts: 11

PostPosted: Wed Mar 19, 2014 7:56 pm    Post subject: Reply with quote

Hi Mat,
Thanks for your reply. I forgot to write down that if I run the code compiled by PGI Fortran by using only one processor-core, the results are correct. However, the results will be wrong by using multi-core. Of course, the results are all correct by using single- and multi-core while the code is compiled by the Intel compiler. Do you have any idea about this? Thanks a lot.
Back to top
View user's profile
Chia-WenChan52986



Joined: 25 Feb 2014
Posts: 11

PostPosted: Thu Mar 20, 2014 12:05 am    Post subject: Reply with quote

Hey Mat,
I don't think the random no. will affect the results. If you run the code by using the compile commend pgfortran -mp=allcores filename, and using 8 cores. The results will be: (where x is the variables, and f is the result)


[img] pi,x,f: 28 4.0184321516887570E-005 2.2472007788150276E-002
4.3997613006648069E-002 NaN
pi,x,f: 5 4.5495235966134833E-005 1.3074807470546545E-002
4.0256985102921874E-002 NaN
pi,x,f: 13 5.9450583298249174E-005 2.5159378489068442E-002
5.9044047013436227E-002 NaN[/img]


However, if the compile commend is ifort /Qopenmp filename. The results will be like this:


[img] pi,x,f: 5 5.412999203477487E-005 2.284296926653587E-002
4.081297764047193E-002 -22.6035797108662
pi,x,f: 21 5.028790524903308E-005 2.202444713659489E-002
4.410197768764015E-002 -14.7622043809480
pi,x,f: 28 4.586909123643895E-005 2.134972742978799E-002
5.618972495017878E-002 -14.7622043809480
pi,x,f: 25 5.320182378504191E-005 1.997834648385616E-002
5.025304901018232E-002 -14.7622043809480[/img]


I think it's compiler's problem.

[/img]
Back to top
View user's profile
mkcolg



Joined: 30 Jun 2004
Posts: 6206
Location: The Portland Group Inc.

PostPosted: Fri Mar 21, 2014 8:59 am    Post subject: Reply with quote

Hi Chia-WenChan52986,

Thanks for the further explanation. To be clear, you expect different answer from each build given you're using random numbers to generate your initial values.

For the OpenMP "NaN" issue, this is being caused by a race condition in your code. You have each thread set the values of the parameter module. However, the default for module variables is "shared", hence the threads are overwriting each others values causing the bad answers. To fix, you need to add the "threadprivate" clause in your module. Also, you need to privatize xv and op since all variables (except for loop index) are shared by default.

Why Intel works is not clear. It may be that how they schedule the threads happens to avoid the race condition?

- Mat

Code:
!------------------------------------------------------------
 module parameters
   real(kind=8):: a1,a2,a3,a4,a5,a6,b1,b2,b3,b4,b5,b6
   real(kind=8)::rpm,u,vis,diam,b,pi,c,err1,err2,p_norm,norm,omega,delta
   real(kind=8):: h2,h3,dx,d2x,dy,d2y,dx2,dy2,lr,br
   real(kind=8):: pa,prr
   integer:: x1,x2,x3,x4,x5,x6,x7,x8,y1,y2    ! recess coord.
   integer:: n_x,n_y,n_x1,n_x2                  ! recess coord.
   real(kind=8)::alpha,dxr,dyr                   ! recess coord.
   integer::nx,ny,ip,im,jp,jm,iter_in
 !$omp  threadprivate(a1,a2,a3,a4,a5,a6,b1,b2,b3,b4,b5,b6)
 !$omp  threadprivate(rpm,u,vis,diam,b,pi,c,err1,err2,p_norm,norm,omega,delta)
 !$omp  threadprivate(h2,h3,dx,d2x,dy,d2y,dx2,dy2,lr,br,pa,prr)
 !$omp  threadprivate(x1,x2,x3,x4,x5,x6,x7,x8,y1,y2)
 !$omp  threadprivate(n_x,n_y,n_x1,n_x2)
 !$omp  threadprivate(alpha,dxr,dyr)
 !$omp  threadprivate(nx,ny,ip,im,jp,jm,iter_in)
 end module parameters
 !------------------------------------------------------------
 Program pso_abrg      ! Particle Swarm Algorithm
 use omp_lib
 implicit none
 !A program for hydrodynmic lubrication recessed slider bearing
 !optimization by using  PSO
 ! Nv: No. of variables
 ! Np: Population size (number of particles)
 ! v: pseudovelocity
 ! c1: Cognitive trust parameter
 ! c2: Social trust parameter
 ! w0: Inertia
 ! wd:Inertia Reduction parameter
 ! k: Bound on velocity fraction
 ! vd: Velocity reduction parameter
 ! d: Dynamic inertia/velocity reduction delay (function evaluations)
 ! r1,r2: Random factors in the [0,1] interval
 ! f: Function value
 ! ik: iteration times
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 integer::i,j,Nv,Np,d,ik,ikmax,it,ic,n,icmax,tpso1,tpso2,tpsor,thn
 real(kind=8)::c1,c2,w0,wd,k,vd,g_f,g_f_old
 real(kind=8),allocatable::x_UB(:),x_LB(:),v_x_max(:),g_x(:),g_x_new(:),r1(:),r2(:)
 real(kind=8),allocatable::v_x(:,:),x(:,:),x_best(:,:),f(:),f_best(:),p_x(:,:)
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 !integer:: omp_get_num_threads
 !integer:: omp_get_num_procs
 ! thn=omp_get_num_procs()
 ! write(*,*) "The number of available processors/threads in the
 !system: ",thn
 thn=1    ! when OpenMP is not used
 write(*,*) "Enter the number of threads"
 read(*,*) thn
 call omp_set_num_threads(thn)             ! set the number of
 !threads
 !write(*,*)'Np=  (e.g. 100)'
 !read(*,*)Np
 !write(*,*)'icmax=  (e.g. 20)'
 !read(*,*)icmax
 Np=60
 icmax=60
 Nv=3

 c1=1.0d0  !2.0d0
 c2=1.0d0  !2.0d0
 w0=0.5d0
 wd=0.05d0
 k=0.2d0    !*******************
 vd=0.05d0
 d=5
 ikmax=20 !****************************

 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 allocate(x_UB(1:Nv),x_LB(1:Nv),g_x(1:Nv),g_x_new(1:Nv),v_x(1:Nv,1:Np),v_x_max(1:Nv))
 allocate(x(1:Nv,1:Np),x_best(1:Nv,1:Np),p_x(1:Nv,1:Np))
 allocate(f(1:Np),f_best(1:Np),r1(1:Np),r2(1:Np))
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!


 g_f=0.0d0

 x_UB(1)=60.0d-6
 x_LB(1)=40.0d-6
 x_UB(2)=30.0d-3
 x_LB(2)=10.0d-3
 x_UB(3)=60.0d-3
 x_LB(3)=40.0d-3
 do i=1,Nv
    v_x_max(i)=k*(x_UB(i)-x_LB(i))
 end do
 ik=0
 it=0
 ic=0
 do i=1,5
 call random_number(x)
 call random_number(v_x)
 end do
 g_f=1000000
 do i=1,Nv
     x(i,:)=x(i,:)*(x_UB(i)-x_LB(i))+x_LB(i)
 end do

 do i=1,Nv
      v_x(i,:)=v_x(i,:)*v_x_max(i)
 end do
 call obfun(Nv,Np,x,f,thn)
 f_best=f
 x_best=x
 p_x=x_best

 do j=1,Np
      if (f_best(j)<=g_f) then
          g_x(:)=x_best(:,j)
          g_f=f_best(j)
      end if
 end do

 open(1,file='track_x.dat')
 open(2,file='d_f.dat')
 call system_clock(tpso1,tpsor)
 do n=1,ikmax
      g_f_old=g_f
      write(2,'(1000(1x,f20.4))') g_x,-g_f
      ik=ik+1
      do i=1,Nv
           call random_number(r1)
           call random_number(r2)
           v_x(i,:)=w0*v_x(i,:)+c1*r1(:)*(p_x(i,:)-x(i,:))+c2*r2(:)*(g_x(i)-x(i,:))
      end do
      do i=1,Nv
          do j=1,Np
               if (v_x(i,j)>v_x_max(i)) then
                   v_x(i,j)=v_x_max(i)
               end if   
               x(i,j)=x(i,j)+v_x(i,j)
                if (x(i,j)<=x_LB(i)) then; x(i,j)=x_LB(i); end if
                if (x(i,j)>=x_UB(i)) then; x(i,j)=x_UB(i); end if
          end do
      end do
      write(1,'(1000(1x,f10.4))') x
      call obfun(Nv,Np,x,f,thn)
      do j=1,Np
           if (f(j)<=f_best(j)) then
               f_best(j)=f(j)
               x_best(:,j)=x(:,j)
               p_x(:,j)=x_best(:,j)
           end if
      end do
      do j=1,Np
           if (f_best(j)<=g_f) then
               g_f=f_best(j)
               g_x_new(:)=x_best(:,j)
           end if
      end do
      if (abs((g_f-g_f_old)/g_f)>1d-5) then
          it=0
      end if
      it=it+1
      g_x=g_x_new
          !write(*,*) 'g_x: ',g_x
      if (it==d) then
          w0=w0*(1.0d0-wd)
          do i=1,Nv
              v_x_max(i)=v_x_max(i)*(1.0d0-vd)
          end do
          it=0
          ic=ic+1
      end if
      if (ic==icmax) exit
      write(*,*) 'ik/g_x/g_f: ',  ik, g_x, g_f
 end do
 call system_clock(tpso2,tpsor)
 close(1)
 close(2)
 write(*,*)'elapsed_time (s)=',real(tpso2-tpso1)/real(tpsor)
 write(*,*)'g_x, g_f=',g_x, g_f
 write(*,*)'ik=',ik

 stop
 end program pso_abrg

 !------------------------------------------------------
 subroutine obfun(Nv,Np,x,f,thn)
 use parameters
 implicit none
 integer,intent(in)::Nv,Np,thn
 real(kind=8), intent(in)::x(1:Nv,1:Np)
 real(kind=8), intent(out)::f(1:Np)
 integer:: psoi
 real(kind=8)::op,xv(Nv,1)

 !$omp parallel do &
 !$omp  private (xv,op)
     do psoi=1,Np
         op = 1.0
         xv(:,1)=x(:,psoi)
         call airbear(Nv,xv,op)
         f(psoi)=-op
         write(*,*) 'pi,x,f:  ', psoi,x(:,psoi),f(psoi)
     end do
 !$omp end parallel do     
 return
 end subroutine obfun
 !------------------------------------------------------
 subroutine airbear(Nv,xv,op)
 use parameters
 implicit none
 integer,intent(in)::Nv
 real(kind=8),intent(in)::xv(Nv,1)
 real(kind=8), intent(out)::op
 real(kind=8),allocatable::p(:,:)
 real(kind=8)::eps,load,phi
         c=xv(1,1)
         lr=xv(2,1)
         diam=xv(3,1)
         call constant
         call recess_coord                   ! define the recess
         allocate(p(0:nx,0:ny))
         p=pa
         eps=0.7d0
         call sor(p,eps)
         call load_brg(p,load,phi)
         !pave=load/(diam*b)
         op=load
 end subroutine airbear
 !------------------------------------------------------
 subroutine constant
 use parameters
 implicit none

 pi=4.0d0*atan(1.0d0)
 nx=256
 ny=nint(nx/pi)
 if (mod(ny,2)/=0) then; ny=ny+1; end if

 vis=179.0d-7                    ! air viscosity (ns/m^2)
 !c=50.0d-6                       ! clearance (m)
 !diam=50.0d-3                 ! bearing diameter
 b=50.0d-3                       ! bearing  width (m)
 err1=1.0d-5                      ! allowable errors (Inner iteration)
 err2=1.0d-4                     ! allowable errors (Outer iteration)
 pa=101347.0d0                ! atmosphere pressure (N/m^2)
 rpm=6.0d4                     ! rotation speed (rpm)
 u=rpm*diam*pi/60.d0  ! bearing velocity
 omega=1.6                      ! relaxation no.
 alpha=50.0d0                 ! bearing orientation angle (0<alpha<90,
 dxr=pi*diam/real(nx)    ! x-grid size
 dyr=b/real(ny)               ! y-grid size
 !lr=10d-3                          ! recess length (m)
 br=40.0d-3                     ! recess width (m)
 prr=4.5                           ! recess pressure (atm)
 return;end subroutine constant
 !------------------------------------------------------
 subroutine sor(p,eps)
 use parameters
 implicit none

 real(kind=8), intent(in)::eps
 real(kind=8), intent(inout)::p(0:nx,0:ny)
 real(kind=8)::de(0:nx,0:ny),de_old(0:nx,0:ny),p_old(0:nx,0:ny),h(0:nx,0:ny)
 real(kind=8)::h0,rjac,theta
 integer::i,j,iter1,iter2,nt,thread,i1,j1,k

 dx=(diam*pi/real(nx))
 dy=(b/real(ny))
 rjac=(cos(pi/nx)+(dx/dy)**2*cos(pi/ny))/(1.0d0+(dx/dy)**2)
 !-------------- film thickness -------------------------------
 do i=0,nx ; theta=360.0/real(nx)*real(i)
    do j=0,ny
       h(i,j)=c*(1.0d0+eps*cos(pi/180.0d0*theta))
    end do
 end do
 !---------------------------------------------------------------
 h0=minval(h)
 !-----------normalization-----------
 dx=dx/b
 dy=dy/b
 h=h/h0
 p=p*(h0**2)/(vis*b*u)   ! ambient pressure, kg/m**2
 dx2=dx**2; dy2=dy**2; d2x=2.0d0*dx; d2y=2.0d0*dy
 !-------------------------------------
 iter_in=0
 iter2=0
 p_norm=1.0d0
 do while(p_norm > err2 .and. iter2<200)       ! outer-loop stopping
    iter2=iter2+1
    de=0.0d0
    norm=1.0d0
    p_old=p
    iter1=0
    do i=1, nx
         do j=1,ny
             if (j>=y1 .and. j<=y2) then
                 if (x1>x2) then
                     if (i>=x1) p(i,j)=0
                         if (i<=x2) p(i,j)=0
                     else
                         if (i>=x1 .and. i<=x2) p(i,j)=0
                     end if
                     if (i>=x3 .and. i<=x4) p(i,j)=0
                     if (i>=x5 .and. i<=x6) p(i,j)=0
                     if (x8<x7) then
                         if (i>=x7) p(i,j)=0
                         if (i<=x8) p(i,j)=0
                     else
                     if (i>=x7 .and. i<=x8) p(i,j)=0
                 end if
             end if
         end do
    end do

    do while (norm> err1 .and. iter1<5000)        ! inner-loop stopping
       de_old=de
       do i1=1,2
          do j1=1,2
             do i=i1,nx,2; ip=i+1; im=i-1
               if (i==nx) then; ip=1; end if
                  do j=j1,ny-1,2; jp=j+1; jm=j-1
                     h2=h(i,j)**2; h3=h(i,j)**3
                     b1=(h(ip,j)-h(im,j))/d2x      ! dh/dx
                     b2=(h(i,jp)-h(i,jm))/d2y      ! dh/dy
                     b3=(p(ip,j)-p(im,j))/d2x      ! dp/dx
                     b4=(p(i,jp)-p(i,jm))/d2y      ! dp/dy
                     b5=(p(ip,j)+p(im,j)-2.d0*p(i,j))/dx2      ! dp2/dx2
                     b6=(p(i,jp)+p(i,jm)-2.d0*p(i,j))/dy2      ! dp2/dy2
                     a1=(p(i,j)*h3)/(dx2)+(2.0d0*h3*b3+3.0d0*h2*p(i,j)*b1)/d2x-(6.0d0*h(i,j))/d2x
                     a2=(p(i,j)*h3)/(dx2)-(2.0d0*h3*b3+3.0d0*h2*p(i,j)*b1)/d2x+(6.0d0*h(i,j))/d2x
                     a3=(p(i,j)*h3)/(dy2)+(2.0d0*h3*b4+3.0d0*h2*p(i,j)*b2)/d2y
                     a4=(p(i,j)*h3)/(dy2)-(2.0d0*h3*b4+3.0d0*h2*p(i,j)*b2)/d2y
                     a5=(-2.0d0*p(i,j)*h3)/dx2+h3*b5+3.0d0*h2*b1*b3-(2.0d0*p(i,j)*h3)/dy2+h3*b6+3.0d0*h2*b4*b2-6.0d0*b1
                     a6=p(i,j)*h3*(b5+b6)+h3*(b3**2+b4**2)+3.0d0*h2*p(i,j)*(b1*b3+b2*b4)-6.0d0*h(i,j)*b3-6.0d0*p(i,j)*b1
                     delta=-(a1*de(ip,j)+a2*de(im,j)+a3*de(i,jp)+a4*de(i,jm)+a6)/a5-de(i,j)
                     de(i,j)=de(i,j)+omega*delta
                     if (j>=y1 .and. j<=y2) then
                         if (x1>x2) then
                            if (i>=x1) de(i,j)=prr*pa*(h0**2)/(vis*b*u)
                            if (i<=x2) de(i,j)=prr*pa*(h0**2)/(vis*b*u)
                         else
                 if (i>=x1 .and. i<=x2) de(i,j)=prr*pa*(h0**2)/(vis*b*u)
                         end if
                 if (i>=x3 .and. i<=x4) de(i,j)=prr*pa*(h0**2)/(vis*b*u)
                 if (i>=x5 .and. i<=x6) de(i,j)=prr*pa*(h0**2)/(vis*b*u)
                         if (x8<x7) then
                            if (i>=x7) de(i,j)=prr*pa*(h0**2)/(vis*b*u)
                            if (i<=x8) de(i,j)=prr*pa*(h0**2)/(vis*b*u)
                         else
                 if (i>=x7 .and. i<=x8) de(i,j)=prr*pa*(h0**2)/(vis*b*u)
                         end if
                     end if
                  end do
               end do
          end do
       end do
    iter1=iter1+1
    norm=sqrt(dx*dy*sum((de-de_old)**2))
    end do
    iter_in=iter_in+iter1
    p=p+de
    p(0,:)=p(nx,:)
    p_norm=sqrt(dx*dy*sum((p-p_old)**2))
 end do

 p=p*(vis*b*u)/h0**2
 dx=dx*b
 dy=dy*b

 return;end subroutine sor
 !---------------------------
 subroutine load_brg(p,load,phi)  ! get the horizontal/vertical forces
 use parameters
 implicit none
 real(kind=8), intent(in):: p(0:nx,0:ny)
 real(kind=8), intent(out):: load,phi
 real(kind=8):: theta,px(0:nx,0:ny),py(0:nx,0:ny),fx,fy
 integer:: i,j

 do j=0,ny
    do i=0,nx
       theta=real(i)*360.0d0/real(nx)
       px(i,j)=p(i,j)*sin(pi/180.0d0*theta)
       py(i,j)=-p(i,j)*cos(pi/180.0d0*theta)
    end do
 end do

 call simps(px,fx)
 call simps(py,fy)

 load=sqrt(fx**2+fy**2)
 phi=atan(fx/fy)*180.0d0/pi

 return; end subroutine load_brg
 !----------------------
 subroutine simps(pn,fn) ! Simpson's integration formula
 use parameters
 implicit none
 real(kind=8), intent(in):: pn(0:nx,0:ny)
 real(kind=8), intent(out):: fn
 real(kind=8):: f1,f2,s1(0:ny),sx,sy
 integer:: i,j

 sx=dx/3.0d0
 sy=dy/3.0d0

 do j=0,ny
    f1=0.0d0;f2=0.0d0

    do i=1,nx-1,2; f1=f1+pn(i,j); end do
    do i=2,nx-1,2; f2=f2+pn(i,j); end do

    s1(j)=sx*(pn(0,j)+pn(nx,j)+4.0d0*f1+2.0d0*f2)
 end do
 f1=0.0d0
 f2=0.0d0
 do j=1,ny-1,2; f1=f1+s1(j); end do
 do j=2,ny-1,2; f2=f2+s1(j); end do
 fn=sy*(s1(0)+s1(ny)+4.0d0*f1+2.0d0*f2)
 return; end subroutine simps
 !-----------------------------------------------
 subroutine recess_coord

 ! calculate the locations of the four recesses
 ! alpha: the orientation angle of the hybrid journal bearing

 use parameters
 implicit none

 n_x=nint(lr/2.0d0/dxr)
 n_x1=nint(alpha*real(nx)/360.0d0)
 n_x2=nint(90.0d0*real(nx)/360.0d0)

 x1=n_x1-n_x
 x3=x1+n_x2

 if (x1 < 0) then
    x1=n_x1-n_x+nx
    x3=x1+n_x2-nx
 end if

 x2=n_x1+n_x
 x4=x2+n_x2
 x5=x3+n_x2
 x6=x4+n_x2
 x7=x5+n_x2
 x8=x6+n_x2

 if (x8 > nx) then
    x8=x6+n_x2-nx
 end if

 n_y=nint(br/2.0d0/dyr)
 y1=ny/2-n_y
 y2=ny/2+n_y

 return; end subroutine recess_coord

% pgfortran -mp test.f90 -fast ; a.out
 Enter the number of threads
8
 pi,x,f:             25   5.9516665980696078E-005   2.8437097496238260E-002
   4.6029907490028846E-002   -1.342359780510695
 pi,x,f:              9   5.0088449394745570E-005   2.3686958953027498E-002
   4.2029409143188728E-002   -2.412770113034530
 pi,x,f:             17   4.4093232024295338E-005   2.6362292584158238E-002
   5.1496865681003784E-002   -8.826354187314926
 pi,x,f:             26   5.0093386960804480E-005   2.7163973133330042E-002
   4.2043941585420443E-002   -1.243699821905178
 pi,x,f:             47   5.8831031247096063E-005   2.1460884697961261E-002
   4.1440365191234889E-002   -2.711834122652949
...
Back to top
View user's profile
Display posts from previous:   
Post new topic   Reply to topic    PGI User Forum Forum Index -> Programming and Compiling All times are GMT - 7 Hours
Goto page 1, 2  Next
Page 1 of 2

 
Jump to:  
You cannot post new topics in this forum
You cannot reply to topics in this forum
You cannot edit your posts in this forum
You cannot delete your posts in this forum
You cannot vote in polls in this forum


Powered by phpBB © phpBB Group