Various fixes for MPI simulations (#60)

* Fixed MPI initialisation * Fixed MPI blocks in output modules * Fixed MPI input * Added FID_IN to replace magic number 15 * Fixed mesh-generation in Python wrapper, updated docs * Added nxglob, nwglob for serial execution
ydluo · Aug 12, 2020 · 3171b9f · 3171b9f
1 parent 215b741
commit 3171b9f
Show file tree

Hide file tree

Showing 8 changed files with 95 additions and 74 deletions.
diff --git a/docs/running_simulations.md b/docs/running_simulations.md
@@ -233,6 +233,7 @@ QDYN offers various optional simulation features. Set the following parameters t
 |   `NX`    | Number of fault elements along strike if `MESHDIM=2`. It must be a power of 2 if FFT is used along strike (`FFT_TYPE=1` in `constants.f90`) |      `1`      |
 |   `NW`    | Number of fault elements along dip if `MESHDIM=2`. It must be a power of 2 if FFT is used along dip (`FFT_TYPE=2` in `constants.f90`) |     `-1`      |
 | `NPROCS`  | Number of processors if running in parallel and with MPI (only implemented for `MESHDIM=2` and `FFT_TYPE=1`) |      `1`      |
+| `MPI_PATH`  | Full path to the MPI binary (`which mpirun`). For WSL, this path is likely `/usr/bin/mpirun` |      `/usr/local/bin/mpirun`      |
 |   `DW`    | Along-dip length (m) of each element, from deep to shallow   |      `1`      |
 |  `TMAX`   | Threshold for stopping criterion:<br />Final simulation time (s) when `NSTOP=0`<br />Slip velocity threshold (m/s) when `NSTOP=3` |               |
 |  `NSTOP`  | Stopping criterion:<br />`0` = Stop at `t = TMAX`<br />`1` = Stop at end of slip localization phase (**deprecated**)<br />`2` = Stop soon after first slip rate peak<br />`3` = Stop when slip velocity exceeds `TMAX` (m/s) |      `0`      |

diff --git a/src/constants.f90 b/src/constants.f90
@@ -52,6 +52,9 @@ module constants
 !   2 : Runge-Kutta-Fehlberg
 integer :: SOLVER_TYPE = 0
 
+! Input unit
+integer, parameter :: FID_IN = 15
+
 ! Output units
 integer, parameter :: FID_SCREEN = 6
 integer, parameter :: FID_OT = 18

diff --git a/src/input.f90 b/src/input.f90
@@ -18,39 +18,39 @@ subroutine read_main(pb)
   use mesh, only : read_mesh_parameters, read_mesh_nodes, mesh_get_size
   use output, only : ot_read_stations
   use my_mpi, only : my_mpi_tag, is_MPI_parallel
-  use constants, only : FAULT_TYPE, SOLVER_TYPE
+  use constants, only : FAULT_TYPE, SOLVER_TYPE, FID_IN, FID_SCREEN
 
   type(problem_type), intent(inout) :: pb
 
   double precision, dimension(:), allocatable :: read_buf
   integer :: i, j, n, N_cols, N_creep
 
-  write(6,*) 'Start reading input ...'
+  write(FID_SCREEN, *) 'Start reading input ...'
 
-  open(unit=15, file='qdyn'//trim(my_mpi_tag())//'.in', action='read')
+  open(unit=FID_IN, file='qdyn'//trim(my_mpi_tag())//'.in', action='read')
 
-  call read_mesh_parameters(15,pb%mesh)
-  write(6,*) '   Mesh input complete'
+  call read_mesh_parameters(FID_IN, pb%mesh)
+  write(FID_SCREEN, *) '   Mesh input complete'
 
-  if (pb%mesh%dim==1) read(15,*) pb%finite
-  read(15,*) pb%itheta_law
-  read(15,*) pb%i_rns_law
-  read(15,*) pb%i_sigma_cpl
+  if (pb%mesh%dim==1) read(FID_IN, *) pb%finite
+  read(FID_IN, *) pb%itheta_law
+  read(FID_IN, *) pb%i_rns_law
+  read(FID_IN, *) pb%i_sigma_cpl
   ! SEISMIC: various simulation features can be turned on (1) or off (0)
   if (pb%i_rns_law == 3) then
-    read(15,*) pb%cns_params%N_creep
+    read(FID_IN, *) pb%cns_params%N_creep
   endif
-  read(15,*) pb%features%stress_coupling, pb%features%tp, pb%features%localisation
-  read(15,*) pb%ot%ntout, pb%ox%ntout, pb%ot%ic, pb%ox%nxout, pb%ox%nwout, &
+  read(FID_IN, *) pb%features%stress_coupling, pb%features%tp, pb%features%localisation
+  read(FID_IN, *) pb%ot%ntout, pb%ox%ntout, pb%ot%ic, pb%ox%nxout, pb%ox%nwout, &
              pb%ox%nxout_dyn, pb%ox%nwout_dyn, pb%ox%i_ox_seq, pb%ox%i_ox_dyn
-  read(15,*) pb%beta, pb%smu, pb%lam, pb%D, pb%H, pb%ot%v_th
-  read(15,*) pb%Tper, pb%Aper
-  read(15,*) pb%dt_try, pb%dt_max,pb%tmax, pb%acc
-  read(15,*) pb%NSTOP
-  read(15,*) pb%DYN_FLAG,pb%DYN_SKIP
-  read(15,*) pb%DYN_M,pb%DYN_th_on,pb%DYN_th_off
-  read(15,*) FAULT_TYPE, SOLVER_TYPE
-  write(6,*) '  Flags input complete'
+  read(FID_IN, *) pb%beta, pb%smu, pb%lam, pb%D, pb%H, pb%ot%v_th
+  read(FID_IN, *) pb%Tper, pb%Aper
+  read(FID_IN, *) pb%dt_try, pb%dt_max,pb%tmax, pb%acc
+  read(FID_IN, *) pb%NSTOP
+  read(FID_IN, *) pb%DYN_FLAG,pb%DYN_SKIP
+  read(FID_IN, *) pb%DYN_M,pb%DYN_th_on,pb%DYN_th_off
+  read(FID_IN, *) FAULT_TYPE, SOLVER_TYPE
+  write(FID_SCREEN, *) '  Flags input complete'
 
   n = mesh_get_size(pb%mesh) ! number of nodes in this processor
   allocate ( pb%tau(n), pb%sigma(n), pb%v(n), pb%theta(n),  &
@@ -105,7 +105,7 @@ subroutine read_main(pb)
     do i=1,n
 
       ! Read data into buffer
-      read(15,*) read_buf(:)
+      read(FID_IN, *) read_buf(:)
 
       ! General fault parameters
       pb%sigma(i) = read_buf(1)
@@ -152,10 +152,10 @@ subroutine read_main(pb)
     allocate ( pb%a(n), pb%b(n), pb%v1(n), &
                pb%v2(n), pb%mu_star(n))
     do i=1,n
-      read(15,*)pb%sigma(i), pb%v(i), pb%theta(i),  &
-                pb%a(i), pb%b(i), pb%dc(i), pb%v1(i), &
-                pb%v2(i), pb%mu_star(i), pb%v_star(i), &
-                pb%ot%iot(i), pb%ot%iasp(i), pb%coh(i), pb%v_pl(i)
+      read(FID_IN, *) pb%sigma(i), pb%v(i), pb%theta(i),  &
+                      pb%a(i), pb%b(i), pb%dc(i), pb%v1(i), &
+                      pb%v2(i), pb%mu_star(i), pb%v_star(i), &
+                      pb%ot%iot(i), pb%ot%iasp(i), pb%coh(i), pb%v_pl(i)
     end do
   endif
 
@@ -166,7 +166,8 @@ subroutine read_main(pb)
   if (pb%features%localisation == 1) then
     ! Raise an error if the CNS model is not selected
     if (pb%i_rns_law /= 3) then
-      write(6,*) "Localisation of shear strain is compatible only with the CNS model (i_rns_law = 3)"
+      write(FID_SCREEN, *) &
+        "Localisation of shear strain is compatible only with the CNS model (i_rns_law = 3)"
       stop
     endif
 
@@ -182,7 +183,7 @@ subroutine read_main(pb)
     do i=1,n
 
       ! Read data into buffer
-      read(15,*) read_buf(:)
+      read(FID_IN, *) read_buf(:)
 
       ! Degree of localisation (0 <= lambda <= 1)
       pb%cns_params%lambda(i) = read_buf(1)
@@ -232,21 +233,21 @@ subroutine read_main(pb)
                 pb%tp%k_t(n), pb%tp%k_p(n), pb%tp%l(n), &
                 pb%tp%P_a(n), pb%tp%T_a(n), pb%tp%dilat_factor(n) )
     do i=1,n
-      read(15,*)pb%tp%rhoc(i), pb%tp%beta(i), pb%tp%eta(i), pb%tp%w(i), &
-                pb%tp%k_t(i), pb%tp%k_p(i), pb%tp%l(i), &
-                pb%tp%P_a(i), pb%tp%T_a(i), pb%tp%dilat_factor(i)
+      read(FID_IN, *) pb%tp%rhoc(i), pb%tp%beta(i), pb%tp%eta(i), pb%tp%w(i), &
+                      pb%tp%k_t(i), pb%tp%k_p(i), pb%tp%l(i), &
+                      pb%tp%P_a(i), pb%tp%T_a(i), pb%tp%dilat_factor(i)
     end do
   endif
   ! End reading TP model parameters
   ! </SEISMIC>
 
-  ! if (pb%mesh%dim == 2) then
-    ! call read_mesh_nodes(15,pb%mesh)
+  if (pb%mesh%dim == 2 .and. is_MPI_parallel()) then
+    call read_mesh_nodes(FID_IN, pb%mesh)
     ! call ot_read_stations(pb%ot)
-  ! endif
+  endif
 
-  close(15)
-  write(6,*) 'Input complete'
+  close(FID_IN)
+  write(FID_SCREEN, *) 'Input complete'
 
 end subroutine read_main
 

diff --git a/src/mesh.f90 b/src/mesh.f90
@@ -5,7 +5,7 @@ module mesh
 
   type mesh_type
     integer :: dim = 0  ! dim = 1, 2 ,3 ~xD
-    integer :: nx, nw, nn, nnglob, nwglob ! along-strike, along-dip, total grid number
+    integer :: nx, nw, nn, nnglob, nwglob, nxglob ! along-strike, along-dip, total grid number
     double precision :: Lfault, W, Z_CORNER ! fault length, width, lower-left corner z (follow Okada's convention)
     double precision, allocatable :: DIP_W(:) ! along-dip grid size and dip (adjustable), nw count
     ! Local mesh coordinates
@@ -119,6 +119,8 @@ subroutine init_mesh_0D(m)
   allocate(m%dx(1), m%dw(1))
   m%nx = 1
   m%nw = 1
+  m%nxglob = m%nx
+  m%nwglob = m%nw
   m%dx = m%Lfault
   m%dw = 1d0
   m%x = 0d0
@@ -154,6 +156,8 @@ subroutine init_mesh_1D(m)
   m%xglob => m%x
   m%yglob => m%y
   m%zglob => m%z
+  m%nxglob = m%nx
+  m%nwglob = m%nw
 
 end subroutine init_mesh_1D
 
@@ -168,6 +172,7 @@ subroutine init_mesh_2D(m)
 
   use constants, only : PI
   use my_mpi, only: is_MPI_parallel, my_mpi_NPROCS, gather_alli, gather_allvdouble, my_mpi_tag
+  use my_mpi, only: is_MPI_master
 
   type(mesh_type), intent(inout) :: m
 
@@ -205,7 +210,6 @@ subroutine init_mesh_2D(m)
     m%x(j0+1:j0+m%nx) = m%x(1:m%nx)
     m%y(j0+1:j0+m%nx) = m%y(j0) + 0.5d0*m%dw(i-1)*cd0 + 0.5d0*m%dw(i)*cd
     m%z(j0+1:j0+m%nx) = m%z(j0) + 0.5d0*m%dw(i-1)*sd0 + 0.5d0*m%dw(i)*sd
-!    write(66,*) m%z(j0+1:j0+m%nx) !JPA Who is using this output? Shall we remove it?
     m%dip(j0+1:j0+m%nx) = m%DIP_W(i)
   end do
 
@@ -215,6 +219,7 @@ subroutine init_mesh_2D(m)
     m%dipglob => m%dip
     m%dwglob => m%dw
     m%nwglob = m%nw
+    m%nxglob = m%nx
 
 else
 ! If MPI parallel, the mesh for each processor has been already read
@@ -224,16 +229,17 @@ subroutine init_mesh_2D(m)
     NPROCS = my_mpi_NPROCS()
 !PG, for debugging:
     nwLocal = m%nw
-    write(6,*) 'iproc,nwLocal:',my_mpi_tag(),nwLocal
+    ! write(6,*) 'iproc,nwLocal:',my_mpi_tag(),nwLocal
     allocate(nwLocal_perproc(0:NPROCS-1))
     call gather_alli(nwLocal,nwLocal_perproc)
     allocate(nwoffset_glob_perproc(0:NPROCS-1))
     do iproc=0,NPROCS-1
       nwoffset_glob_perproc(iproc)=sum(nwLocal_perproc(0:iproc))-nwLocal_perproc(iproc)
     enddo
     nwGlobal=sum(nwLocal_perproc)
+    m%nxglob = m%nx
     m%nwglob = nwGlobal
-    write(6,*) 'iproc,nwGlobal:',my_mpi_tag(),nwGlobal
+    ! write(6,*) 'iproc,nwGlobal:',my_mpi_tag(),nwGlobal
 
     nnLocal= m%nx*nwLocal
     allocate(nnLocal_perproc(0:NPROCS-1))
@@ -246,6 +252,7 @@ subroutine init_mesh_2D(m)
    ! global mesh for computing the kernel and for outputs
     allocate(m%xglob(nnGlobal),m%yglob(nnGlobal),&
              m%zglob(nnGlobal),m%dwglob(nwGlobal),m%dipglob(nnGlobal))
+
     call gather_allvdouble(m%x, nnLocal,m%xglob,nnLocal_perproc,nnoffset_glob_perproc,nnGlobal)
     call gather_allvdouble(m%y, nnLocal,m%yglob,nnLocal_perproc,nnoffset_glob_perproc,nnGlobal)
     call gather_allvdouble(m%z, nnLocal,m%zglob,nnLocal_perproc,nnoffset_glob_perproc,nnGlobal)

diff --git a/src/output.f90 b/src/output.f90
@@ -256,20 +256,17 @@ subroutine ot_init(pb)
   use problem_class
   use constants, only:  BIN_OUTPUT, FID_IASP, FID_OT, FID_VMAX, &
                         FILE_IASP, FILE_OT, FILE_VMAX
-  use my_mpi, only : is_MPI_parallel, is_MPI_master, gather_allvi_root
-  use mesh, only : mesh_get_size, nnLocal_perproc, nnoffset_glob_perproc
+  use my_mpi, only: is_MPI_parallel, is_MPI_master, gather_allvi_root
+  use mesh, only: mesh_get_size, nnLocal_perproc, nnoffset_glob_perproc
 
   type (problem_type), intent(inout) :: pb
   integer :: i, id, iasp_count, iot_count, n, niasp, niot, nnGlobal
   integer, dimension(pb%mesh%nnglob) :: iasp_buf, iot_buf
   integer, allocatable, dimension(:) :: iasp_list, iot_list
-  logical :: call_gather
 
   character(len=100) :: tmp
   character(len=100), allocatable :: iot_name
 
-  call_gather = is_MPI_parallel() .and. is_MPI_master()
-
   ! Number of mesh elements
   n = mesh_get_size(pb%mesh)
   nnGlobal = pb%mesh%nnglob
@@ -305,15 +302,15 @@ subroutine ot_init(pb)
   pb%ot%fmt_vmax(2) = "(i15)"
 
   ! If parallel:
-  if (is_MPI_parallel() .and. is_MPI_master()) then
+  if (is_MPI_parallel()) then
     ! Combine local OT indices
     call gather_allvi_root( pb%ot%iot, n, iot_buf, nnLocal_perproc, &
                             nnoffset_glob_perproc, nnGlobal)
     ! Combine local IASP indices
     call gather_allvi_root( pb%ot%iasp, n, iasp_buf, nnLocal_perproc, &
                             nnoffset_glob_perproc, nnGlobal)
   ! If serial:
-  elseif (.not. is_MPI_parallel()) then
+  else
     ! Point global to local indices
     iot_buf = pb%ot%iot
     iasp_buf = pb%ot%iasp
@@ -498,7 +495,6 @@ subroutine ot_write(pb)
 
   type (problem_type), intent(inout) :: pb
   integer :: i, id, iot, istart, ivmax, k, n, niasp, niot
-  logical :: call_gather
 
   character(len=100) :: tmp
   character(len=100), allocatable :: iot_name
@@ -508,22 +504,20 @@ subroutine ot_write(pb)
   ! Size of the container
   k = pb%nobj
 
-  ! Check if an MPI sync is needed
-  call_gather = (is_MPI_parallel() .and. is_MPI_master())
-  ! If yes: do sync
-  if (call_gather) then
+  ! If parallel: do sync
+  if (is_MPI_parallel()) then
     call pb_global(pb)
   endif
 
+  ! Get the maximum slip rate
+  call get_ivmax(pb)
+  ivmax = pb%ivmax
+  ! Calculate potency (rate)
+  call calc_potency(pb)
+
   ! If this is master proc: write output
   if (is_MPI_master()) then
 
-    ! Get the maximum slip rate
-    call get_ivmax(pb)
-    ivmax = pb%ivmax
-    ! Calculate potency (rate)
-    call calc_potency(pb)
-
     ! Number of OT locations
     niot = size(pb%ot%iot)
 
@@ -632,19 +626,22 @@ subroutine ox_write(pb)
   skip = (pb%ox%dyn_count <= pb%dyn_skip)
 
   ! Should this proc write ox, ox_dyn, or QSB output?
-  write_ox = (mod(pb%it, pb%ox%ntout) == 0 .or. last_call) .and. MPI_master
-  write_ox_dyn =  ((pb%ox%i_ox_dyn == 1) .and. dynamic) .and. &
-                  MPI_master .and. .not. skip
-  write_ox_seq = write_ox .and. (pb%ox%i_ox_seq == 1) .and. MPI_master
-  write_QSB = ((pb%DYN_FLAG == 1) .and. dynamic) .and. &
-              MPI_master .and. .not. skip
+  write_ox = (mod(pb%it, pb%ox%ntout) == 0 .or. last_call)
+  write_ox_dyn =  ((pb%ox%i_ox_dyn == 1) .and. dynamic) .and. .not. skip
+  write_ox_seq = write_ox .and. (pb%ox%i_ox_seq == 1)
+  write_QSB = ((pb%DYN_FLAG == 1) .and. dynamic) .and. .not. skip
 
   ! Call an MPI gather when:
   !  1. Output is requested (either regular ox or other flavour)
   !  2. AND parallel execution
-  !  3. AND this proc is MPI master
   call_gather = (write_ox .or. write_ox_dyn .or. write_QSB) .and. &
-                is_MPI_parallel() .and. MPI_master
+                is_MPI_parallel()
+
+  ! Update write output only if this proc is MPI master
+  write_ox = write_ox .and. MPI_master
+  write_ox_dyn = write_ox_dyn .and. MPI_master
+  write_ox_seq = write_ox_seq .and. MPI_master
+  write_QSB = write_QSB .and. MPI_master
 
   ! Does the output unit need to be closed?
   close_unit = last_call
@@ -880,8 +877,8 @@ subroutine write_ox_lines(unit, fmt, objects, nxout, nwout, pb)
   k = pb%nobj
 
   ! Number of nodes to loop over (either local or global)
-  nx = pb%mesh%nx
-  nw = pb%mesh%nw
+  nx = pb%mesh%nxglob
+  nw = pb%mesh%nwglob
 
   ! Write header
   write(unit,'(a)') trim(pb%ox%header)

diff --git a/src/parallel.f90 b/src/parallel.f90
@@ -27,6 +27,9 @@ subroutine init_mpi()
   integer :: ier
 
   call MPI_INIT(ier)
+  call MPI_COMM_SIZE(MPI_COMM_WORLD, NPROCS, ier)
+  call MPI_COMM_RANK(MPI_COMM_WORLD, MY_RANK, ier)
+
   if (ier /= 0 ) stop 'Error initializing MPI'
 
   if (NPROCS<2) call MPI_FINALIZE(ier)

diff --git a/src/pyqdyn.py b/src/pyqdyn.py
@@ -196,7 +196,7 @@ def __init__(self):
         set_dict["DYN_M"] = 1e18			# Target seismic moment of dynamic event
 
         set_dict["NPROC"] = 1				# Number of processors, default 1 = serial (no MPI)
-        set_dict["MPI_path"] = "/usr/local/bin/mpirun"   # Path to MPI executable
+        set_dict["MPI_PATH"] = "/usr/local/bin/mpirun"   # Path to MPI executable
 
         self.set_dict = set_dict
 
@@ -303,12 +303,18 @@ def render_mesh(self):
             mesh_dict["Y"] = np.ones(N)*settings["W"]
 
         if dim == 2:
+
             print("Warning: 2D faults currently require constant dip angle")
-            theta = settings["DIP_W"]*np.pi/180.0
-            mesh_dict["DW"] = np.ones(settings["NW"])*settings["DW"]
-            mesh_dict["DIP_W"] = np.ones(N)*settings["DIP_W"]
-            mesh_dict["X"] = (np.arange(N) % settings["NX"] + 0.5)*dx
-            mesh_dict["Y"] = (np.floor(np.arange(N)/settings["NX"]) + 0.5)*settings["DW"]*np.cos(theta)
+
+            dw = settings["W"] / settings["NW"]
+            theta = settings["DIP_W"] * np.pi / 180.0
+            mesh_dict["DIP_W"] = np.ones(N) * settings["DIP_W"]
+            mesh_dict["DW"] = np.ones(N) * dw
+            x = (np.arange(settings["NX"]) + 0.5) * dx
+            y = (np.arange(settings["NW"]) + 0.5) * dw * np.cos(theta)
+            X, Y = np.meshgrid(x, y, indexing="xy")
+            mesh_dict["X"] = X.ravel()
+            mesh_dict["Y"] = Y.ravel()
             mesh_dict["Z"] = settings["Z_CORNER"] + mesh_dict["Y"]*np.tan(theta)
 
         self.mesh_dict.update(mesh_dict)