equal load (MPI) by 바죠

병렬 프로그래밍 (MPI 프로그램)에서 가장 기본적인 것 중 하나는 바로 각 노드에 가능한한 균등한 계산량을 골고루 분배하는 것이다. 이 때, 일의 크기가 클수록, CPU시간이 많이 걸릴 수록, 좋은 병렬 계산의 효율성은 높아 질 수 있다. 간단한 예제들을 통해서 일의 균등 배분 방식을 알아 본다. 하나의 예로서, 포트란90으로 만들어진 동등분배 루틴을 아래와 같이 나타낼수 있다.

!234567890
       PROGRAM mphase
       IMPLICIT NONE
       include 'mpif.h'
       character*8 string,fname8,fname9
       real*8 temper
       character*8 fnnd ; character*10 fnnt
       integer itemp,itemq,irate,isize,j,k
       integer myid,nproc,ierr,kount,iroot

       integer n1,n2,istart,ifinish
       real*8 tmp,tmq,tmr,tms,tmt

       call MPI_INIT( ierr )
       call MPI_COMM_RANK( MPI_COMM_WORLD, myid, ierr )
       call MPI_COMM_SIZE( MPI_COMM_WORLD, nproc, ierr )

!
       isize=1
       if(myid == 0)then   ! -----[   process id = 0

       call system_clock(itemp,irate)
       call date_and_time(date=fnnd,time=fnnt)
       write(6,'(a10,2x,a8,2x,a10)') 'date,time:', fnnd,fnnt
       if(nproc > 1) print *,  nproc," processes are alive"
       if(nproc ==1) print *,  nproc," process is alive"
!
                    endif  ! -----]   process id = 0
!
   iroot=0 ; kount=1
       n1=1 ; n2=200
       if(myid == 0) write(6,*) 'myid, n1, n2 ', myid,n1,n2
       call equal_load(n1,n2,nproc,myid,istart,ifinish)
!      write(6,*) 'myid ', myid,istart,ifinish

       j=istart
       call numeral(j,string,isize)
       fname8=string
       k=j+1000
       call numeral(k,string,isize)
       fname9=string
      call phase_diagram(fname8,fname9,istart,ifinish)

!
      if(myid == 0) then     ! -------=== { process id =0
         call system_clock(itemq)
        write(6,'(2e15.8,2x,a6)') float(itemq-itemp)/float(irate)/60.,float(itemq-itemp)/float(irate)/3600.  ,' min,h'
                    endif    ! -------=== } process id =0

       call MPI_FINALIZE(ierr)
       STOP
       END PROGRAM mphase
 !234567890

       subroutine equal_load(n1,n2,nproc,myid,istart,ifinish)
       implicit none
       integer nproc,myid,istart,ifinish,n1,n2
       integer iw1,iw2
       iw1=(n2-n1+1)/nproc ; iw2=mod(n2-n1+1,nproc)
       istart=myid*iw1+n1+min(myid,iw2)
       ifinish=istart+iw1-1 ; if(iw2 > myid) ifinish=ifinish+1
!      print*, n1,n2,myid,nproc,istart,ifinish
       return
       end

 
굉장히 단순한 루틴이지만, 굉장히 유용한 루틴이다. 이 루틴에서 입력은 n1,n2,nproc,myid이다. 모두들 정수형이다. n1,n2는 정량화된 일의 순번을 이야기한다. n1에서 n2까지, 즉, n2-n1+1개의 일의 단위가 있다는 것을 의미한다. nproc는 현재 사용하고 있는 노드의 숫자를 나타낸다. myid는 현재 작업하고 있는 노드의 번호이다. 0,1,2,3,....nproc-1까지 중에 하나의 값이 될것이다. 출력은 istart, ifinish이다. 이 두 값은 각 노드 마다 다른 값들이 출력으로 배정될 것이다. 즉, myid에 따라서 다른 값들이 출력으로 나올것이다. 물론, nproc는 모든 노드에서 같은 값을 가진다.
---------------------------------------------------------------------------------------------------------------------

!234567890
program equal_load_sum
implicit none
include 'mpif.h'
integer nn
real*8, allocatable :: aa(:)
integer nproc,myid,ierr,istart,ifinish
integer i
real*8 xsum,xxsum

nn=10000

call MPI_INIT(ierr)
call MPI_COMM_SIZE(MPI_COMM_WORLD, nproc, ierr)
call MPI_COMM_RANK(MPI_COMM_WORLD, myid, ierr)

call equal_load(1,nn,nproc,myid,istart,ifinish)

allocate(aa(istart:ifinish)) ! 단순한 인덱스의 분할 뿐만아니라 메모리의 분할이 이루어지고 있다. 노드별로

do i=istart,ifinish
aa(i)=float(i)
enddo

xsum=0.0d0
do i=istart,ifinish
xsum=xsum+aa(i)
enddo

call MPI_REDUCE(xsum,xxsum,1,MPI_DOUBLE_PRECISION,MPI_SUM,0,MPI_COMM_WORLD,ierr)
xsum=xxsum

if(myid == 0)then
write(6,*) xsum,' xsum'
endif

deallocate(aa)
call MPI_FINALIZE(ierr)
end program equal_load_sum

---------------------------------------------------------------------------------------------------------------------
   
!234567890
       program equal_load_sum
       IMPLICIT NONE
       include 'mpif.h'
       integer nn,n1,n2
       real*8, allocatable :: aall(:)
       real*8, allocatable :: bbll(:)
       integer nproc,myid,ierr
       integer i,iroot,kount,istart,ifinish

       nn=11

       n1=-1
       n2=nn

       call MPI_INIT(ierr)
       call MPI_COMM_SIZE(MPI_COMM_WORLD, nproc, ierr)
       call MPI_COMM_RANK(MPI_COMM_WORLD, myid, ierr)

       call equal_load(n1,n2,nproc,myid,istart,ifinish)

       if(myid == 0)then
       allocate(aall(n1:n2))
       aall=0.0d0
                    endif

       allocate(bbll(n1:n2))
       bbll=0.0d0


       do i=istart,ifinish
       bbll(i)=float(i)
       enddo

       iroot=0 ; kount=n2-n1+1
       call MPI_REDUCE(bbll,aall,kount,MPI_DOUBLE_PRECISION,MPI_SUM,iroot,MPI_COMM_WORLD,ierr)


       if(myid == 0)then
       write(6,*) (aall(i),i=n1,n2)
                    endif

       if(allocated(aall)) deallocate(aall)
       if(allocated(bbll)) deallocate(bbll)

       call MPI_FINALIZE(ierr)
       end program equal_load_sum

       subroutine equal_load(n1,n2,nproc,myid,istart,ifinish)
       implicit none
       integer nproc,myid,istart,ifinish,n1,n2
       integer iw1,iw2
       iw1=(n2-n1+1)/nproc ; iw2=mod(n2-n1+1,nproc)
       istart=myid*iw1+n1+min(myid,iw2)
       ifinish=istart+iw1-1 ; if(iw2 > myid) ifinish=ifinish+1
!      print*, n1,n2,myid,nproc,istart,ifinish
       return
       end

---------------------------------revision------------------------------
       implicit none
       integer nproc,myid,istart,ifinish,n1,n2
       integer id
       nproc=10
       n1=-21
       n2=9

       write(6,*) nproc,' nproc'

       do myid=0,nproc+10

       call equal_load(n1,n2,nproc,myid,istart,ifinish)

       if(istart > ifinish  )then
       write(6,*) 'not a myid ',istart,ifinish
                         else
       write(6,'(i8,9x,2i8,5x,i6)') myid,istart,ifinish, (ifinish-istart+1)
                         endif


       enddo


       stop
       end
       subroutine equal_load(n1,n2,nproc,myid,istart,ifinish)
       implicit none
       integer nproc,myid,istart,ifinish,n1,n2
       integer iw1,iw2
       iw1=(n2-n1+1)/nproc ; iw2=mod(n2-n1+1,nproc)
       istart=myid*iw1+n1+min(myid,iw2)
       ifinish=istart+iw1-1 ; if(iw2 > myid) ifinish=ifinish+1
!      print*, n1,n2,myid,nproc,istart,ifinish
       if(n2 < istart) ifinish=istart-1
       return
       end

            10  nproc
       0              -21     -18          4
       1              -17     -15          3
       2              -14     -12          3
       3              -11      -9          3
       4               -8      -6          3
       5               -5      -3          3
       6               -2       0          3
       7                1       3          3
       8                4       6          3
       9                7       9          3
 not a myid            10            9
 not a myid            13           12
 not a myid            16           15
 not a myid            19           18
 not a myid            22           21
 not a myid            25           24
 not a myid            28           27
 not a myid            31           30
 not a myid            34           33
 not a myid            37           36
 not a myid            40           39
FORTRAN STOP

--------------------------------------------------------------------------------------------------------------------
!234567890
       implicit none
       integer istart,ifinish

       real*8, allocatable :: arr(:)


       allocate(arr(1:10))
       arr=1.0d0

      istart=5
       ifinish=4

       arr(istart:ifinish)=10.0d0

       write(6,*) arr

       deallocate(arr)
       stop
       end

 a.out
    1.000000000000000         1.000000000000000         1.000000000000000     
    1.000000000000000         1.000000000000000         1.000000000000000     
    1.000000000000000         1.000000000000000         1.000000000000000     
    1.000000000000000    
FORTRAN STOP


!234567890
       implicit none
       integer istart,ifinish
       integer ii

       real*8, allocatable :: arr(:)

       allocate(arr(1:10))
       arr=-1.0d0

       istart=6
       ifinish=3
       do ii=istart,ifinish
       arr(ii)=11.0d0
       enddo

       write(6,*) arr

       deallocate(arr)
       stop
       end

 a.out
   -1.000000000000000        -1.000000000000000        -1.000000000000000     
   -1.000000000000000        -1.000000000000000        -1.000000000000000     
   -1.000000000000000        -1.000000000000000        -1.000000000000000     
   -1.000000000000000    
FORTRAN STOP



-----------------------


do i=n1,n2
......
end do
-------------> 아래와 같이 serial에서 parallel로 바뀝니다.
do i=n1+irank, n2, nprocs
...........
end do

cyclic distribution

-----------------------



do i=n1,n2
..........
end do
-------------> 아래와 같이 serial에서 parallel로 바뀝니다.

do ii=n1+irank*iblock, n2, nprocs*iblock
do i=ii,min(ii+iblock-1,n2)
............
end do
end do

 block-cyclic distribution

-----------------------


do i=n1,n2
......
end do
-------------> 아래와 같이 serial에서 parallel로 바뀝니다.

call para_range(n1,n2,nprocs,irank,ista,iend)
do i=ista,iend
...........
end do
여기에서 ista, iend는 노드별(irank)로 다른값이 할당된다.

subroutine para_range(n1,n2,nprocs,irank,ista,iend)
implicit none
integer n1,n2,nprocs,irank,ista,iend
integer iwork1,iwork2
iwork1=(n2-n1+1)/nprocs
iwork2=mod(n2-n1+1,nprocs)
ista=irank*iwork1+n1+min(irank,iwork2)
iend=ista+iwork1-1
if(iwork2> irank) iend=iend+1
end
 
block distribution

 

-----------------------


 


cf.
http://incredible.egloos.com/3880690
http://incredible.egloos.com/2950371
http://incredible.egloos.com/3755171


핑백

덧글

댓글 입력 영역

최근 포토로그



MathJax