Skip to content

Commit

Permalink
Various fixes for MPI simulations (#60)
Browse files Browse the repository at this point in the history
* Fixed MPI initialisation
* Fixed MPI blocks in output modules
* Fixed MPI input
* Added FID_IN to replace magic number 15
* Fixed mesh-generation in Python wrapper, updated docs
* Added nxglob, nwglob for serial execution
  • Loading branch information
martijnende authored Aug 12, 2020
1 parent 215b741 commit 3171b9f
Show file tree
Hide file tree
Showing 8 changed files with 95 additions and 74 deletions.
1 change: 1 addition & 0 deletions docs/running_simulations.md
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ QDYN offers various optional simulation features. Set the following parameters t
| `NX` | Number of fault elements along strike if `MESHDIM=2`. It must be a power of 2 if FFT is used along strike (`FFT_TYPE=1` in `constants.f90`) | `1` |
| `NW` | Number of fault elements along dip if `MESHDIM=2`. It must be a power of 2 if FFT is used along dip (`FFT_TYPE=2` in `constants.f90`) | `-1` |
| `NPROCS` | Number of processors if running in parallel and with MPI (only implemented for `MESHDIM=2` and `FFT_TYPE=1`) | `1` |
| `MPI_PATH` | Full path to the MPI binary (`which mpirun`). For WSL, this path is likely `/usr/bin/mpirun` | `/usr/local/bin/mpirun` |
| `DW` | Along-dip length (m) of each element, from deep to shallow | `1` |
| `TMAX` | Threshold for stopping criterion:<br />Final simulation time (s) when `NSTOP=0`<br />Slip velocity threshold (m/s) when `NSTOP=3` | |
| `NSTOP` | Stopping criterion:<br />`0` = Stop at `t = TMAX`<br />`1` = Stop at end of slip localization phase (**deprecated**)<br />`2` = Stop soon after first slip rate peak<br />`3` = Stop when slip velocity exceeds `TMAX` (m/s) | `0` |
Expand Down
3 changes: 3 additions & 0 deletions src/constants.f90
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ module constants
! 2 : Runge-Kutta-Fehlberg
integer :: SOLVER_TYPE = 0

! Input unit
integer, parameter :: FID_IN = 15

! Output units
integer, parameter :: FID_SCREEN = 6
integer, parameter :: FID_OT = 18
Expand Down
71 changes: 36 additions & 35 deletions src/input.f90
Original file line number Diff line number Diff line change
Expand Up @@ -18,39 +18,39 @@ subroutine read_main(pb)
use mesh, only : read_mesh_parameters, read_mesh_nodes, mesh_get_size
use output, only : ot_read_stations
use my_mpi, only : my_mpi_tag, is_MPI_parallel
use constants, only : FAULT_TYPE, SOLVER_TYPE
use constants, only : FAULT_TYPE, SOLVER_TYPE, FID_IN, FID_SCREEN

type(problem_type), intent(inout) :: pb

double precision, dimension(:), allocatable :: read_buf
integer :: i, j, n, N_cols, N_creep

write(6,*) 'Start reading input ...'
write(FID_SCREEN, *) 'Start reading input ...'

open(unit=15, file='qdyn'//trim(my_mpi_tag())//'.in', action='read')
open(unit=FID_IN, file='qdyn'//trim(my_mpi_tag())//'.in', action='read')

call read_mesh_parameters(15,pb%mesh)
write(6,*) ' Mesh input complete'
call read_mesh_parameters(FID_IN, pb%mesh)
write(FID_SCREEN, *) ' Mesh input complete'

if (pb%mesh%dim==1) read(15,*) pb%finite
read(15,*) pb%itheta_law
read(15,*) pb%i_rns_law
read(15,*) pb%i_sigma_cpl
if (pb%mesh%dim==1) read(FID_IN, *) pb%finite
read(FID_IN, *) pb%itheta_law
read(FID_IN, *) pb%i_rns_law
read(FID_IN, *) pb%i_sigma_cpl
! SEISMIC: various simulation features can be turned on (1) or off (0)
if (pb%i_rns_law == 3) then
read(15,*) pb%cns_params%N_creep
read(FID_IN, *) pb%cns_params%N_creep
endif
read(15,*) pb%features%stress_coupling, pb%features%tp, pb%features%localisation
read(15,*) pb%ot%ntout, pb%ox%ntout, pb%ot%ic, pb%ox%nxout, pb%ox%nwout, &
read(FID_IN, *) pb%features%stress_coupling, pb%features%tp, pb%features%localisation
read(FID_IN, *) pb%ot%ntout, pb%ox%ntout, pb%ot%ic, pb%ox%nxout, pb%ox%nwout, &
pb%ox%nxout_dyn, pb%ox%nwout_dyn, pb%ox%i_ox_seq, pb%ox%i_ox_dyn
read(15,*) pb%beta, pb%smu, pb%lam, pb%D, pb%H, pb%ot%v_th
read(15,*) pb%Tper, pb%Aper
read(15,*) pb%dt_try, pb%dt_max,pb%tmax, pb%acc
read(15,*) pb%NSTOP
read(15,*) pb%DYN_FLAG,pb%DYN_SKIP
read(15,*) pb%DYN_M,pb%DYN_th_on,pb%DYN_th_off
read(15,*) FAULT_TYPE, SOLVER_TYPE
write(6,*) ' Flags input complete'
read(FID_IN, *) pb%beta, pb%smu, pb%lam, pb%D, pb%H, pb%ot%v_th
read(FID_IN, *) pb%Tper, pb%Aper
read(FID_IN, *) pb%dt_try, pb%dt_max,pb%tmax, pb%acc
read(FID_IN, *) pb%NSTOP
read(FID_IN, *) pb%DYN_FLAG,pb%DYN_SKIP
read(FID_IN, *) pb%DYN_M,pb%DYN_th_on,pb%DYN_th_off
read(FID_IN, *) FAULT_TYPE, SOLVER_TYPE
write(FID_SCREEN, *) ' Flags input complete'

n = mesh_get_size(pb%mesh) ! number of nodes in this processor
allocate ( pb%tau(n), pb%sigma(n), pb%v(n), pb%theta(n), &
Expand Down Expand Up @@ -105,7 +105,7 @@ subroutine read_main(pb)
do i=1,n

! Read data into buffer
read(15,*) read_buf(:)
read(FID_IN, *) read_buf(:)

! General fault parameters
pb%sigma(i) = read_buf(1)
Expand Down Expand Up @@ -152,10 +152,10 @@ subroutine read_main(pb)
allocate ( pb%a(n), pb%b(n), pb%v1(n), &
pb%v2(n), pb%mu_star(n))
do i=1,n
read(15,*)pb%sigma(i), pb%v(i), pb%theta(i), &
pb%a(i), pb%b(i), pb%dc(i), pb%v1(i), &
pb%v2(i), pb%mu_star(i), pb%v_star(i), &
pb%ot%iot(i), pb%ot%iasp(i), pb%coh(i), pb%v_pl(i)
read(FID_IN, *) pb%sigma(i), pb%v(i), pb%theta(i), &
pb%a(i), pb%b(i), pb%dc(i), pb%v1(i), &
pb%v2(i), pb%mu_star(i), pb%v_star(i), &
pb%ot%iot(i), pb%ot%iasp(i), pb%coh(i), pb%v_pl(i)
end do
endif

Expand All @@ -166,7 +166,8 @@ subroutine read_main(pb)
if (pb%features%localisation == 1) then
! Raise an error if the CNS model is not selected
if (pb%i_rns_law /= 3) then
write(6,*) "Localisation of shear strain is compatible only with the CNS model (i_rns_law = 3)"
write(FID_SCREEN, *) &
"Localisation of shear strain is compatible only with the CNS model (i_rns_law = 3)"
stop
endif

Expand All @@ -182,7 +183,7 @@ subroutine read_main(pb)
do i=1,n

! Read data into buffer
read(15,*) read_buf(:)
read(FID_IN, *) read_buf(:)

! Degree of localisation (0 <= lambda <= 1)
pb%cns_params%lambda(i) = read_buf(1)
Expand Down Expand Up @@ -232,21 +233,21 @@ subroutine read_main(pb)
pb%tp%k_t(n), pb%tp%k_p(n), pb%tp%l(n), &
pb%tp%P_a(n), pb%tp%T_a(n), pb%tp%dilat_factor(n) )
do i=1,n
read(15,*)pb%tp%rhoc(i), pb%tp%beta(i), pb%tp%eta(i), pb%tp%w(i), &
pb%tp%k_t(i), pb%tp%k_p(i), pb%tp%l(i), &
pb%tp%P_a(i), pb%tp%T_a(i), pb%tp%dilat_factor(i)
read(FID_IN, *) pb%tp%rhoc(i), pb%tp%beta(i), pb%tp%eta(i), pb%tp%w(i), &
pb%tp%k_t(i), pb%tp%k_p(i), pb%tp%l(i), &
pb%tp%P_a(i), pb%tp%T_a(i), pb%tp%dilat_factor(i)
end do
endif
! End reading TP model parameters
! </SEISMIC>

! if (pb%mesh%dim == 2) then
! call read_mesh_nodes(15,pb%mesh)
if (pb%mesh%dim == 2 .and. is_MPI_parallel()) then
call read_mesh_nodes(FID_IN, pb%mesh)
! call ot_read_stations(pb%ot)
! endif
endif

close(15)
write(6,*) 'Input complete'
close(FID_IN)
write(FID_SCREEN, *) 'Input complete'

end subroutine read_main

Expand Down
15 changes: 11 additions & 4 deletions src/mesh.f90
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ module mesh

type mesh_type
integer :: dim = 0 ! dim = 1, 2 ,3 ~xD
integer :: nx, nw, nn, nnglob, nwglob ! along-strike, along-dip, total grid number
integer :: nx, nw, nn, nnglob, nwglob, nxglob ! along-strike, along-dip, total grid number
double precision :: Lfault, W, Z_CORNER ! fault length, width, lower-left corner z (follow Okada's convention)
double precision, allocatable :: DIP_W(:) ! along-dip grid size and dip (adjustable), nw count
! Local mesh coordinates
Expand Down Expand Up @@ -119,6 +119,8 @@ subroutine init_mesh_0D(m)
allocate(m%dx(1), m%dw(1))
m%nx = 1
m%nw = 1
m%nxglob = m%nx
m%nwglob = m%nw
m%dx = m%Lfault
m%dw = 1d0
m%x = 0d0
Expand Down Expand Up @@ -154,6 +156,8 @@ subroutine init_mesh_1D(m)
m%xglob => m%x
m%yglob => m%y
m%zglob => m%z
m%nxglob = m%nx
m%nwglob = m%nw

end subroutine init_mesh_1D

Expand All @@ -168,6 +172,7 @@ subroutine init_mesh_2D(m)

use constants, only : PI
use my_mpi, only: is_MPI_parallel, my_mpi_NPROCS, gather_alli, gather_allvdouble, my_mpi_tag
use my_mpi, only: is_MPI_master

type(mesh_type), intent(inout) :: m

Expand Down Expand Up @@ -205,7 +210,6 @@ subroutine init_mesh_2D(m)
m%x(j0+1:j0+m%nx) = m%x(1:m%nx)
m%y(j0+1:j0+m%nx) = m%y(j0) + 0.5d0*m%dw(i-1)*cd0 + 0.5d0*m%dw(i)*cd
m%z(j0+1:j0+m%nx) = m%z(j0) + 0.5d0*m%dw(i-1)*sd0 + 0.5d0*m%dw(i)*sd
! write(66,*) m%z(j0+1:j0+m%nx) !JPA Who is using this output? Shall we remove it?
m%dip(j0+1:j0+m%nx) = m%DIP_W(i)
end do

Expand All @@ -215,6 +219,7 @@ subroutine init_mesh_2D(m)
m%dipglob => m%dip
m%dwglob => m%dw
m%nwglob = m%nw
m%nxglob = m%nx

else
! If MPI parallel, the mesh for each processor has been already read
Expand All @@ -224,16 +229,17 @@ subroutine init_mesh_2D(m)
NPROCS = my_mpi_NPROCS()
!PG, for debugging:
nwLocal = m%nw
write(6,*) 'iproc,nwLocal:',my_mpi_tag(),nwLocal
! write(6,*) 'iproc,nwLocal:',my_mpi_tag(),nwLocal
allocate(nwLocal_perproc(0:NPROCS-1))
call gather_alli(nwLocal,nwLocal_perproc)
allocate(nwoffset_glob_perproc(0:NPROCS-1))
do iproc=0,NPROCS-1
nwoffset_glob_perproc(iproc)=sum(nwLocal_perproc(0:iproc))-nwLocal_perproc(iproc)
enddo
nwGlobal=sum(nwLocal_perproc)
m%nxglob = m%nx
m%nwglob = nwGlobal
write(6,*) 'iproc,nwGlobal:',my_mpi_tag(),nwGlobal
! write(6,*) 'iproc,nwGlobal:',my_mpi_tag(),nwGlobal

nnLocal= m%nx*nwLocal
allocate(nnLocal_perproc(0:NPROCS-1))
Expand All @@ -246,6 +252,7 @@ subroutine init_mesh_2D(m)
! global mesh for computing the kernel and for outputs
allocate(m%xglob(nnGlobal),m%yglob(nnGlobal),&
m%zglob(nnGlobal),m%dwglob(nwGlobal),m%dipglob(nnGlobal))

call gather_allvdouble(m%x, nnLocal,m%xglob,nnLocal_perproc,nnoffset_glob_perproc,nnGlobal)
call gather_allvdouble(m%y, nnLocal,m%yglob,nnLocal_perproc,nnoffset_glob_perproc,nnGlobal)
call gather_allvdouble(m%z, nnLocal,m%zglob,nnLocal_perproc,nnoffset_glob_perproc,nnGlobal)
Expand Down
53 changes: 25 additions & 28 deletions src/output.f90
Original file line number Diff line number Diff line change
Expand Up @@ -256,20 +256,17 @@ subroutine ot_init(pb)
use problem_class
use constants, only: BIN_OUTPUT, FID_IASP, FID_OT, FID_VMAX, &
FILE_IASP, FILE_OT, FILE_VMAX
use my_mpi, only : is_MPI_parallel, is_MPI_master, gather_allvi_root
use mesh, only : mesh_get_size, nnLocal_perproc, nnoffset_glob_perproc
use my_mpi, only: is_MPI_parallel, is_MPI_master, gather_allvi_root
use mesh, only: mesh_get_size, nnLocal_perproc, nnoffset_glob_perproc

type (problem_type), intent(inout) :: pb
integer :: i, id, iasp_count, iot_count, n, niasp, niot, nnGlobal
integer, dimension(pb%mesh%nnglob) :: iasp_buf, iot_buf
integer, allocatable, dimension(:) :: iasp_list, iot_list
logical :: call_gather

character(len=100) :: tmp
character(len=100), allocatable :: iot_name

call_gather = is_MPI_parallel() .and. is_MPI_master()

! Number of mesh elements
n = mesh_get_size(pb%mesh)
nnGlobal = pb%mesh%nnglob
Expand Down Expand Up @@ -305,15 +302,15 @@ subroutine ot_init(pb)
pb%ot%fmt_vmax(2) = "(i15)"

! If parallel:
if (is_MPI_parallel() .and. is_MPI_master()) then
if (is_MPI_parallel()) then
! Combine local OT indices
call gather_allvi_root( pb%ot%iot, n, iot_buf, nnLocal_perproc, &
nnoffset_glob_perproc, nnGlobal)
! Combine local IASP indices
call gather_allvi_root( pb%ot%iasp, n, iasp_buf, nnLocal_perproc, &
nnoffset_glob_perproc, nnGlobal)
! If serial:
elseif (.not. is_MPI_parallel()) then
else
! Point global to local indices
iot_buf = pb%ot%iot
iasp_buf = pb%ot%iasp
Expand Down Expand Up @@ -498,7 +495,6 @@ subroutine ot_write(pb)

type (problem_type), intent(inout) :: pb
integer :: i, id, iot, istart, ivmax, k, n, niasp, niot
logical :: call_gather

character(len=100) :: tmp
character(len=100), allocatable :: iot_name
Expand All @@ -508,22 +504,20 @@ subroutine ot_write(pb)
! Size of the container
k = pb%nobj

! Check if an MPI sync is needed
call_gather = (is_MPI_parallel() .and. is_MPI_master())
! If yes: do sync
if (call_gather) then
! If parallel: do sync
if (is_MPI_parallel()) then
call pb_global(pb)
endif

! Get the maximum slip rate
call get_ivmax(pb)
ivmax = pb%ivmax
! Calculate potency (rate)
call calc_potency(pb)

! If this is master proc: write output
if (is_MPI_master()) then

! Get the maximum slip rate
call get_ivmax(pb)
ivmax = pb%ivmax
! Calculate potency (rate)
call calc_potency(pb)

! Number of OT locations
niot = size(pb%ot%iot)

Expand Down Expand Up @@ -632,19 +626,22 @@ subroutine ox_write(pb)
skip = (pb%ox%dyn_count <= pb%dyn_skip)

! Should this proc write ox, ox_dyn, or QSB output?
write_ox = (mod(pb%it, pb%ox%ntout) == 0 .or. last_call) .and. MPI_master
write_ox_dyn = ((pb%ox%i_ox_dyn == 1) .and. dynamic) .and. &
MPI_master .and. .not. skip
write_ox_seq = write_ox .and. (pb%ox%i_ox_seq == 1) .and. MPI_master
write_QSB = ((pb%DYN_FLAG == 1) .and. dynamic) .and. &
MPI_master .and. .not. skip
write_ox = (mod(pb%it, pb%ox%ntout) == 0 .or. last_call)
write_ox_dyn = ((pb%ox%i_ox_dyn == 1) .and. dynamic) .and. .not. skip
write_ox_seq = write_ox .and. (pb%ox%i_ox_seq == 1)
write_QSB = ((pb%DYN_FLAG == 1) .and. dynamic) .and. .not. skip

! Call an MPI gather when:
! 1. Output is requested (either regular ox or other flavour)
! 2. AND parallel execution
! 3. AND this proc is MPI master
call_gather = (write_ox .or. write_ox_dyn .or. write_QSB) .and. &
is_MPI_parallel() .and. MPI_master
is_MPI_parallel()

! Update write output only if this proc is MPI master
write_ox = write_ox .and. MPI_master
write_ox_dyn = write_ox_dyn .and. MPI_master
write_ox_seq = write_ox_seq .and. MPI_master
write_QSB = write_QSB .and. MPI_master

! Does the output unit need to be closed?
close_unit = last_call
Expand Down Expand Up @@ -880,8 +877,8 @@ subroutine write_ox_lines(unit, fmt, objects, nxout, nwout, pb)
k = pb%nobj

! Number of nodes to loop over (either local or global)
nx = pb%mesh%nx
nw = pb%mesh%nw
nx = pb%mesh%nxglob
nw = pb%mesh%nwglob

! Write header
write(unit,'(a)') trim(pb%ox%header)
Expand Down
3 changes: 3 additions & 0 deletions src/parallel.f90
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ subroutine init_mpi()
integer :: ier

call MPI_INIT(ier)
call MPI_COMM_SIZE(MPI_COMM_WORLD, NPROCS, ier)
call MPI_COMM_RANK(MPI_COMM_WORLD, MY_RANK, ier)

if (ier /= 0 ) stop 'Error initializing MPI'

if (NPROCS<2) call MPI_FINALIZE(ier)
Expand Down
18 changes: 12 additions & 6 deletions src/pyqdyn.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ def __init__(self):
set_dict["DYN_M"] = 1e18 # Target seismic moment of dynamic event

set_dict["NPROC"] = 1 # Number of processors, default 1 = serial (no MPI)
set_dict["MPI_path"] = "/usr/local/bin/mpirun" # Path to MPI executable
set_dict["MPI_PATH"] = "/usr/local/bin/mpirun" # Path to MPI executable

self.set_dict = set_dict

Expand Down Expand Up @@ -303,12 +303,18 @@ def render_mesh(self):
mesh_dict["Y"] = np.ones(N)*settings["W"]

if dim == 2:

print("Warning: 2D faults currently require constant dip angle")
theta = settings["DIP_W"]*np.pi/180.0
mesh_dict["DW"] = np.ones(settings["NW"])*settings["DW"]
mesh_dict["DIP_W"] = np.ones(N)*settings["DIP_W"]
mesh_dict["X"] = (np.arange(N) % settings["NX"] + 0.5)*dx
mesh_dict["Y"] = (np.floor(np.arange(N)/settings["NX"]) + 0.5)*settings["DW"]*np.cos(theta)

dw = settings["W"] / settings["NW"]
theta = settings["DIP_W"] * np.pi / 180.0
mesh_dict["DIP_W"] = np.ones(N) * settings["DIP_W"]
mesh_dict["DW"] = np.ones(N) * dw
x = (np.arange(settings["NX"]) + 0.5) * dx
y = (np.arange(settings["NW"]) + 0.5) * dw * np.cos(theta)
X, Y = np.meshgrid(x, y, indexing="xy")
mesh_dict["X"] = X.ravel()
mesh_dict["Y"] = Y.ravel()
mesh_dict["Z"] = settings["Z_CORNER"] + mesh_dict["Y"]*np.tan(theta)

self.mesh_dict.update(mesh_dict)
Expand Down
Loading

0 comments on commit 3171b9f

Please sign in to comment.