diff --git a/mpp/include/mpp_do_redistribute.fh b/mpp/include/mpp_do_redistribute.fh index 5acf4bb314..475af25669 100644 --- a/mpp/include/mpp_do_redistribute.fh +++ b/mpp/include/mpp_do_redistribute.fh @@ -17,97 +17,384 @@ !*********************************************************************** !> @addtogroup mpp_domains_mod !> @{ - subroutine MPP_DO_REDISTRIBUTE_3D_( f_in, f_out, d_comm, d_type ) - integer(i8_kind), intent(in) :: f_in(:), f_out(:) - type(DomainCommunicator2D), intent(in) :: d_comm - MPP_TYPE_, intent(in) :: d_type - MPP_TYPE_ :: field_in(d_comm%domain_in%x(1)%domain_data%begin:d_comm%domain_in%x(1)%domain_data%end, & - d_comm%domain_in%y(1)%domain_data%begin:d_comm%domain_in%y(1)%domain_data%end,d_comm%ke) - pointer( ptr_field_in, field_in) - MPP_TYPE_ :: field_out(d_comm%domain_out%x(1)%domain_data%begin:d_comm%domain_out%x(1)%domain_data%end, & - d_comm%domain_out%y(1)%domain_data%begin:d_comm%domain_out%y(1)%domain_data%end,d_comm%ke) - pointer( ptr_field_out, field_out) - type(domain2D), pointer :: domain_in, domain_out - integer :: i, j, k, l, n, l_size - integer :: is, ie, js, je - integer :: ke - integer :: list, pos, msgsize - integer :: to_pe, from_pe - MPP_TYPE_ :: buffer(size(mpp_domains_stack(:))) - pointer( ptr, buffer ) - integer :: buffer_pos, wordlen, errunit - -!fix ke - errunit = stderr() - l_size = size(f_out(:)) ! equal to size(f_in(:)) - ke = d_comm%ke - domain_in =>d_comm%domain_in; domain_out =>d_comm%domain_out - - buffer_pos = 0 - ptr = LOC(mpp_domains_stack) - wordlen = size(TRANSFER(buffer(1),mpp_domains_stack)) - -!pre-post recv - n = d_comm%Rlist_size - do list = 0,n-1 - if( .NOT. d_comm%R_do_buf(list) )cycle - from_pe = d_comm%cfrom_pe(list) - msgsize = d_comm%R_msize(list)*l_size - call mpp_recv( buffer(buffer_pos+1), glen=msgsize, from_pe=from_pe, block=.FALSE., tag=COMM_TAG_1 ) - buffer_pos = buffer_pos + msgsize +subroutine MPP_DO_REDISTRIBUTE_3D_( f_in, f_out, d_comm, d_type, axis_to_storage_in, & + & nchunks, chunk_shape_in, chunk_shape_out ) + + use iso_c_binding, only : c_ptr, c_null_ptr, c_f_pointer, c_intptr_t + + integer(i8_kind), intent(in) :: f_in(:), f_out(:) + type(DomainCommunicator2D), intent(in) :: d_comm + MPP_TYPE_, intent(in) :: d_type + + integer, intent(in), optional :: axis_to_storage_in(3) + + ! One entry per public redistribute field. + ! + ! 2D/3D field: + ! nchunks(field) = 1 + ! + ! 4D field: + ! nchunks(field) = size(field,4) + ! + ! 5D field: + ! nchunks(field) = size(field,4) * size(field,5) + integer, intent(in) :: nchunks(:) + + ! Storage shape of one contiguous 3D chunk for each public field. + ! + ! First dimension is the storage-dimension selector. + ! Second dimension is the public field-list selector. + integer, intent(in) :: chunk_shape_in(:,:) + integer, intent(in) :: chunk_shape_out(:,:) + + integer :: axis_to_storage(3) + + type(c_ptr) :: p_in, p_out + + MPP_TYPE_, pointer :: field_in(:,:,:) + MPP_TYPE_, pointer :: field_out(:,:,:) + + integer :: shape3(3) + + ! Logical-axis bounds: + ! 1 = x + ! 2 = y + ! 3 = z + integer :: logical_lo(3), logical_hi(3) + + ! Bounds reordered into storage-dimension order. + integer :: storage_lo(3), storage_hi(3) + + integer(c_intptr_t) :: base_addr + integer(c_intptr_t) :: chunk_addr + integer(c_intptr_t) :: chunk_offset + integer(c_intptr_t) :: chunk_elements + integer(c_intptr_t) :: element_bytes + + type(domain2D), pointer :: domain_in, domain_out + + integer :: nfields + integer :: total_chunks + + integer :: field + integer :: chunk + integer :: list + integer :: n + + integer :: s1, s2, s3 + integer :: is, ie, js, je + integer :: ke + + integer :: pos + integer :: msgsize + integer :: buffer_pos + + integer :: to_pe + integer :: from_pe + + integer :: errunit + + MPP_TYPE_ :: buffer(size(mpp_domains_stack(:))) + pointer(ptr, buffer) + + !-------------------------------------------------------------------- + ! Validate axis permutation and field metadata. + !-------------------------------------------------------------------- + axis_to_storage = (/1,2,3/) + if (present(axis_to_storage_in)) axis_to_storage = axis_to_storage_in + + if (any(axis_to_storage < 1) .OR. any(axis_to_storage > 3) .OR. & + axis_to_storage(1) == axis_to_storage(2) .OR. & + axis_to_storage(1) == axis_to_storage(3) .OR. & + axis_to_storage(2) == axis_to_storage(3)) then + call mpp_error(FATAL, & + 'MPP_DO_REDISTRIBUTE_3D_: invalid axis_to_storage permutation') + endif + + nfields = size(f_in) + + if (size(f_out) /= nfields) then + call mpp_error(FATAL, & + 'MPP_DO_REDISTRIBUTE_3D_: input/output field-list sizes differ') + endif + + if (size(nchunks) /= nfields) then + call mpp_error(FATAL, & + 'MPP_DO_REDISTRIBUTE_3D_: invalid nchunks size') + endif + + if (size(chunk_shape_in,1) /= 3 .OR. & + size(chunk_shape_out,1) /= 3 .OR. & + size(chunk_shape_in,2) /= nfields .OR. & + size(chunk_shape_out,2) /= nfields) then + call mpp_error(FATAL, & + 'MPP_DO_REDISTRIBUTE_3D_: invalid chunk-shape metadata') + endif + + if (any(nchunks < 1)) then + call mpp_error(FATAL, & + 'MPP_DO_REDISTRIBUTE_3D_: chunk counts must be positive') + endif + + if (any(chunk_shape_in < 1) .OR. any(chunk_shape_out < 1)) then + call mpp_error(FATAL, & + 'MPP_DO_REDISTRIBUTE_3D_: chunk extents must be positive') + endif + + total_chunks = sum(nchunks) + + !-------------------------------------------------------------------- + ! General setup. + !-------------------------------------------------------------------- + errunit = stderr() + + ke = d_comm%ke + + domain_in => d_comm%domain_in + domain_out => d_comm%domain_out + + ptr = LOC(mpp_domains_stack) + + ! LOC addresses are byte addresses on the supported targets. + ! STORAGE_SIZE returns the number of bits per scalar element. + element_bytes = int(storage_size(d_type) / 8, kind=c_intptr_t) + + if (element_bytes < 1_c_intptr_t) then + call mpp_error(FATAL, & + 'MPP_DO_REDISTRIBUTE_3D_: invalid element size') + endif + + !-------------------------------------------------------------------- + ! Pre-post receives. + ! + ! R_msize contains the x*y*z message size for one logical 3D chunk. + !-------------------------------------------------------------------- + buffer_pos = 0 + n = d_comm%Rlist_size + + do list = 0, n-1 + + if (.NOT. d_comm%R_do_buf(list)) cycle + + from_pe = d_comm%cfrom_pe(list) + msgsize = d_comm%R_msize(list) * total_chunks + + call mpp_recv( buffer(buffer_pos+1), glen=msgsize, & + & from_pe=from_pe, block=.FALSE., tag=COMM_TAG_1 ) + + buffer_pos = buffer_pos + msgsize + + enddo + + !-------------------------------------------------------------------- + ! Pack and send. + !-------------------------------------------------------------------- + n = d_comm%Slist_size + + do list = 0, n-1 + + if (.NOT. d_comm%S_do_buf(list)) cycle + + to_pe = d_comm%cto_pe(list) + + is = d_comm%sendis(1,list) + ie = d_comm%sendie(1,list) + js = d_comm%sendjs(1,list) + je = d_comm%sendje(1,list) + + ! Logical bounds within the local input field. + logical_lo(1) = is - domain_in%x(1)%domain_data%begin + 1 + logical_hi(1) = ie - domain_in%x(1)%domain_data%begin + 1 + + logical_lo(2) = js - domain_in%y(1)%domain_data%begin + 1 + logical_hi(2) = je - domain_in%y(1)%domain_data%begin + 1 + + logical_lo(3) = 1 + logical_hi(3) = ke + + ! Map logical-axis bounds onto the storage dimensions. + storage_lo = 0 + storage_hi = 0 + + storage_lo(axis_to_storage(1)) = logical_lo(1) + storage_hi(axis_to_storage(1)) = logical_hi(1) + + storage_lo(axis_to_storage(2)) = logical_lo(2) + storage_hi(axis_to_storage(2)) = logical_hi(2) + + storage_lo(axis_to_storage(3)) = logical_lo(3) + storage_hi(axis_to_storage(3)) = logical_hi(3) + + pos = buffer_pos + + do field = 1, nfields + + shape3 = chunk_shape_in(:,field) + + ! Each input chunk must represent the same logical x/y/z shape + ! used to initialize this communicator. + if (shape3(axis_to_storage(1)) /= d_comm%isize_in .OR. & + shape3(axis_to_storage(2)) /= d_comm%jsize_in .OR. & + shape3(axis_to_storage(3)) /= ke) then + call mpp_error(FATAL, & + 'MPP_DO_REDISTRIBUTE_3D_: incompatible input chunk shape') + endif + + chunk_elements = int(shape3(1), kind=c_intptr_t) * & + int(shape3(2), kind=c_intptr_t) * & + int(shape3(3), kind=c_intptr_t) + + base_addr = int(f_in(field), kind=c_intptr_t) + + do chunk = 1, nchunks(field) + + ! Each chunk is one contiguous 3D slab. + chunk_offset = int(chunk-1, kind=c_intptr_t) * & + chunk_elements * element_bytes + + chunk_addr = base_addr + chunk_offset + + p_in = transfer(chunk_addr, c_null_ptr) + + call c_f_pointer(p_in, field_in, shape3) + + ! Traverse in Fortran column-major storage order: + ! storage dim 1 varies fastest. + do s3 = storage_lo(3), storage_hi(3) + do s2 = storage_lo(2), storage_hi(2) + do s1 = storage_lo(1), storage_hi(1) + + pos = pos + 1 + buffer(pos) = field_in(s1,s2,s3) + + enddo + enddo + enddo + + enddo + enddo + + if (debug) then + write(errunit,*) 'PE', pe, ' to PE ', to_pe, & + 'is,ie,js,je=', is, ie, js, je + endif + + msgsize = pos - buffer_pos + + if (msgsize /= d_comm%S_msize(list) * total_chunks) then + call mpp_error(FATAL, & + 'MPP_DO_REDISTRIBUTE_3D_: packed send size mismatch') + endif + + call mpp_send( buffer(buffer_pos+1), plen=msgsize, & + & to_pe=to_pe, tag=COMM_TAG_1 ) + + buffer_pos = pos + + enddo + + call mpp_sync_self(check=EVENT_RECV) + + !-------------------------------------------------------------------- + ! Unpack receive buffers. + !-------------------------------------------------------------------- + buffer_pos = 0 + n = d_comm%Rlist_size + + do list = 0, n-1 + + if (.NOT. d_comm%R_do_buf(list)) cycle + + from_pe = d_comm%cfrom_pe(list) + + is = d_comm%recvis(1,list) + ie = d_comm%recvie(1,list) + js = d_comm%recvjs(1,list) + je = d_comm%recvje(1,list) + + if (debug) then + write(errunit,*) 'PE', pe, ' from PE ', from_pe, & + 'is,ie,js,je=', is, ie, js, je + endif + + ! Logical bounds within the local output field. + logical_lo(1) = is - domain_out%x(1)%domain_data%begin + 1 + logical_hi(1) = ie - domain_out%x(1)%domain_data%begin + 1 + + logical_lo(2) = js - domain_out%y(1)%domain_data%begin + 1 + logical_hi(2) = je - domain_out%y(1)%domain_data%begin + 1 + + logical_lo(3) = 1 + logical_hi(3) = ke + + storage_lo = 0 + storage_hi = 0 + + storage_lo(axis_to_storage(1)) = logical_lo(1) + storage_hi(axis_to_storage(1)) = logical_hi(1) + + storage_lo(axis_to_storage(2)) = logical_lo(2) + storage_hi(axis_to_storage(2)) = logical_hi(2) + + storage_lo(axis_to_storage(3)) = logical_lo(3) + storage_hi(axis_to_storage(3)) = logical_hi(3) + + pos = buffer_pos + + do field = 1, nfields + + shape3 = chunk_shape_out(:,field) + + ! Input and output horizontal extents may differ, but the output + ! chunk must match the output communicator decomposition. + if (shape3(axis_to_storage(1)) /= d_comm%isize_out .OR. & + shape3(axis_to_storage(2)) /= d_comm%jsize_out .OR. & + shape3(axis_to_storage(3)) /= ke) then + call mpp_error(FATAL, & + 'MPP_DO_REDISTRIBUTE_3D_: incompatible output chunk shape') + endif + + chunk_elements = int(shape3(1), kind=c_intptr_t) * & + int(shape3(2), kind=c_intptr_t) * & + int(shape3(3), kind=c_intptr_t) + + base_addr = int(f_out(field), kind=c_intptr_t) + + do chunk = 1, nchunks(field) + + chunk_offset = int(chunk-1, kind=c_intptr_t) * & + chunk_elements * element_bytes + + chunk_addr = base_addr + chunk_offset + + p_out = transfer(chunk_addr, c_null_ptr) + + call c_f_pointer(p_out, field_out, shape3) + + do s3 = storage_lo(3), storage_hi(3) + do s2 = storage_lo(2), storage_hi(2) + do s1 = storage_lo(1), storage_hi(1) + + pos = pos + 1 + field_out(s1,s2,s3) = buffer(pos) + + enddo + enddo + enddo + enddo + enddo + + if (pos - buffer_pos /= d_comm%R_msize(list) * total_chunks) then + call mpp_error(FATAL, & + 'MPP_DO_REDISTRIBUTE_3D_: unpacked receive size mismatch') + endif + + buffer_pos = pos + + enddo + + call mpp_sync_self() -!send - n = d_comm%Slist_size - do list = 0,n-1 - if( .NOT. d_comm%S_do_buf(list) )cycle - to_pe = d_comm%cto_pe(list) - is=d_comm%sendis(1,list); ie=d_comm%sendie(1,list) - js=d_comm%sendjs(1,list); je=d_comm%sendje(1,list) - pos = buffer_pos - do l=1,l_size ! loop over number of fields - ptr_field_in = f_in(l) - do k = 1,ke - do j = js,je - do i = is,ie - pos = pos+1 - buffer(pos) = field_in(i,j,k) - end do - end do - end do - end do - if( debug )write( errunit,* )'PE', pe, ' to PE ', to_pe, 'is,ie,js,je=', is, ie, js, je - msgsize = pos - buffer_pos - call mpp_send( buffer(buffer_pos+1), plen=msgsize, to_pe=to_pe, tag=COMM_TAG_1 ) - buffer_pos = pos - end do - - call mpp_sync_self(check=EVENT_RECV) - -!unpack buffer - buffer_pos = 0 - n = d_comm%Rlist_size - do list = 0,n-1 - if( .NOT. d_comm%R_do_buf(list) )cycle - from_pe = d_comm%cfrom_pe(list) - is=d_comm%recvis(1,list); ie=d_comm%recvie(1,list) - js=d_comm%recvjs(1,list); je=d_comm%recvje(1,list) - if( debug )write( errunit,* )'PE', pe, ' from PE ', from_pe, 'is,ie,js,je=', is, ie, js, je - pos = buffer_pos - do l=1,l_size ! loop over number of in/out fields - ptr_field_out = f_out(l) - do k = 1,ke - do j = js,je - do i = is,ie - pos = pos+1 - field_out(i,j,k) = buffer(pos) - end do - end do - end do - end do - buffer_pos = pos - end do - - call mpp_sync_self() - end subroutine MPP_DO_REDISTRIBUTE_3D_ +end subroutine MPP_DO_REDISTRIBUTE_3D_ !> @} diff --git a/mpp/include/mpp_domains_misc.inc b/mpp/include/mpp_domains_misc.inc index fe13a75867..ff07d527ca 100644 --- a/mpp/include/mpp_domains_misc.inc +++ b/mpp/include/mpp_domains_misc.inc @@ -988,6 +988,8 @@ end subroutine init_nonblock_type #undef MPP_UPDATE_DOMAINS_5D_V_ #define MPP_UPDATE_DOMAINS_5D_V_ mpp_update_domain2D_r8_5Dv #endif +#undef MPP_REDISTRIBUTE_REGISTER_ +#define MPP_REDISTRIBUTE_REGISTER_ mpp_redistribute_register_r8 #undef MPP_REDISTRIBUTE_2D_ #define MPP_REDISTRIBUTE_2D_ mpp_redistribute_r8_2D #undef MPP_REDISTRIBUTE_3D_ @@ -1010,6 +1012,8 @@ end subroutine init_nonblock_type #define MPP_UPDATE_DOMAINS_4D_ mpp_update_domain2D_c8_4D #undef MPP_UPDATE_DOMAINS_5D_ #define MPP_UPDATE_DOMAINS_5D_ mpp_update_domain2D_c8_5D +#undef MPP_REDISTRIBUTE_REGISTER_ +#define MPP_REDISTRIBUTE_REGISTER_ mpp_redistribute_register_c8 #undef MPP_REDISTRIBUTE_2D_ #define MPP_REDISTRIBUTE_2D_ mpp_redistribute_c8_2D #undef MPP_REDISTRIBUTE_3D_ @@ -1031,6 +1035,8 @@ end subroutine init_nonblock_type #define MPP_UPDATE_DOMAINS_4D_ mpp_update_domain2D_i8_4D #undef MPP_UPDATE_DOMAINS_5D_ #define MPP_UPDATE_DOMAINS_5D_ mpp_update_domain2D_i8_5D +#undef MPP_REDISTRIBUTE_REGISTER_ +#define MPP_REDISTRIBUTE_REGISTER_ mpp_redistribute_register_i8 #undef MPP_REDISTRIBUTE_2D_ #define MPP_REDISTRIBUTE_2D_ mpp_redistribute_i8_2D #undef MPP_REDISTRIBUTE_3D_ @@ -1062,6 +1068,8 @@ end subroutine init_nonblock_type #define MPP_UPDATE_DOMAINS_4D_V_ mpp_update_domain2D_r4_4Dv #undef MPP_UPDATE_DOMAINS_5D_V_ #define MPP_UPDATE_DOMAINS_5D_V_ mpp_update_domain2D_r4_5Dv +#undef MPP_REDISTRIBUTE_REGISTER_ +#define MPP_REDISTRIBUTE_REGISTER_ mpp_redistribute_register_r4 #undef MPP_REDISTRIBUTE_2D_ #define MPP_REDISTRIBUTE_2D_ mpp_redistribute_r4_2D #undef MPP_REDISTRIBUTE_3D_ @@ -1085,6 +1093,8 @@ end subroutine init_nonblock_type #define MPP_UPDATE_DOMAINS_4D_ mpp_update_domain2D_c4_4D #undef MPP_UPDATE_DOMAINS_5D_ #define MPP_UPDATE_DOMAINS_5D_ mpp_update_domain2D_c4_5D +#undef MPP_REDISTRIBUTE_REGISTER_ +#define MPP_REDISTRIBUTE_REGISTER_ mpp_redistribute_register_c4 #undef MPP_REDISTRIBUTE_2D_ #define MPP_REDISTRIBUTE_2D_ mpp_redistribute_c4_2D #undef MPP_REDISTRIBUTE_3D_ @@ -1106,6 +1116,8 @@ end subroutine init_nonblock_type #define MPP_UPDATE_DOMAINS_4D_ mpp_update_domain2D_i4_4D #undef MPP_UPDATE_DOMAINS_5D_ #define MPP_UPDATE_DOMAINS_5D_ mpp_update_domain2D_i4_5D +#undef MPP_REDISTRIBUTE_REGISTER_ +#define MPP_REDISTRIBUTE_REGISTER_ mpp_redistribute_register_i4 #undef MPP_REDISTRIBUTE_2D_ #define MPP_REDISTRIBUTE_2D_ mpp_redistribute_i4_2D #undef MPP_REDISTRIBUTE_3D_ diff --git a/mpp/include/mpp_update_domains2D.fh b/mpp/include/mpp_update_domains2D.fh index b9b0a95e09..ebd90c6f7e 100644 --- a/mpp/include/mpp_update_domains2D.fh +++ b/mpp/include/mpp_update_domains2D.fh @@ -216,179 +216,422 @@ return end subroutine MPP_UPDATE_DOMAINS_5D_ + subroutine MPP_REDISTRIBUTE_REGISTER_( domain_in, addr_in, shape_in, domain_out, addr_out, shape_out, & + & nchunks, complete, free, list_size, dc_handle, position, axis_to_storage_in) + type(domain2D), intent(in) :: domain_in, domain_out + integer(i8_kind), intent(in) :: addr_in, addr_out + integer, intent(in) :: shape_in(3), shape_out(3) + integer, intent(in) :: nchunks + + logical, intent(in), optional :: complete, free + integer, intent(in), optional :: list_size + integer, intent(in), optional :: position + integer, intent(in) :: axis_to_storage_in(3) + + type(DomainCommunicator2D), pointer, optional :: dc_handle + type(DomainCommunicator2D), pointer, save :: d_comm => NULL() + + integer(i8_kind), save :: l_addrs_in(MAX_DOMAIN_FIELDS) = -9999 + integer(i8_kind), save :: l_addrs_out(MAX_DOMAIN_FIELDS) = -9999 + + integer, save :: l_nchunks(MAX_DOMAIN_FIELDS) = 1 + integer, save :: l_shape_in(3,MAX_DOMAIN_FIELDS) = 0 + integer, save :: l_shape_out(3,MAX_DOMAIN_FIELDS) = 0 + + integer, save :: l_size = 0 + + integer, save :: isize_in = 0 + integer, save :: jsize_in = 0 + integer, save :: ke_in = 0 + integer, save :: isize_out = 0 + integer, save :: jsize_out = 0 + integer, save :: ke_out = 0 + + integer, save :: saved_axis_to_storage(3) = (/1,2,3/) + + integer :: axis_to_storage(3) + integer :: xdim, ydim, zdim + integer :: ke, lsize + + logical :: do_redist + logical :: free_comm + logical :: set_mismatch + + character(len=16) :: text + MPP_TYPE_ :: d_type + + axis_to_storage = axis_to_storage_in + + xdim = axis_to_storage(1) + ydim = axis_to_storage(2) + zdim = axis_to_storage(3) + + if (nchunks < 1) then + call mpp_error(FATAL, 'MPP_REDISTRIBUTE_REGISTER_3D_: nchunks must be positive') + endif + + if (present(position)) then + if (position /= CENTER) then + call mpp_error(FATAL, 'MPP_REDISTRIBUTE_REGISTER_3D_: only position=CENTER is implemented') + endif + endif + + do_redist = .true. + if (present(complete)) do_redist = complete + + free_comm = .false. + if (present(free)) free_comm = free + + if (free_comm) then + + if (addr_out > 0) then + ke = shape_out(zdim) + else + ke = shape_in(zdim) + endif + + lsize = 1 + if (present(list_size)) lsize = list_size + + call mpp_redistribute_free_comm( domain_in, addr_in, domain_out, addr_out, ke, lsize ) + + return + endif + + l_size = l_size + 1 + + if (l_size > MAX_DOMAIN_FIELDS) then + write(text,'(i0)') MAX_DOMAIN_FIELDS + call mpp_error(FATAL, 'MPP_REDISTRIBUTE_REGISTER_3D_: MAX_DOMAIN_FIELDS=' // trim(text) // ' exceeded') + endif + + l_addrs_in(l_size) = addr_in + l_addrs_out(l_size) = addr_out + + l_nchunks(l_size) = nchunks + l_shape_in(:,l_size) = shape_in + l_shape_out(:,l_size) = shape_out + + if (l_size == 1) then + + saved_axis_to_storage = axis_to_storage + + if (addr_in > 0) then + isize_in = shape_in(xdim) + jsize_in = shape_in(ydim) + ke_in = shape_in(zdim) + endif + + if (addr_out > 0) then + isize_out = shape_out(xdim) + jsize_out = shape_out(ydim) + ke_out = shape_out(zdim) + endif + + else + + set_mismatch = any(axis_to_storage /= saved_axis_to_storage) + + set_mismatch = set_mismatch .OR. (addr_in == 0 .AND. l_addrs_in(l_size-1) /= 0) + set_mismatch = set_mismatch .OR. (addr_in > 0 .AND. l_addrs_in(l_size-1) == 0) + set_mismatch = set_mismatch .OR. (addr_out == 0 .AND. l_addrs_out(l_size-1) /= 0) + set_mismatch = set_mismatch .OR. (addr_out > 0 .AND. l_addrs_out(l_size-1) == 0) + + if (addr_in > 0) then + set_mismatch = set_mismatch .OR. (isize_in /= shape_in(xdim)) + set_mismatch = set_mismatch .OR. (jsize_in /= shape_in(ydim)) + set_mismatch = set_mismatch .OR. (ke_in /= shape_in(zdim)) + endif + + if (addr_out > 0) then + set_mismatch = set_mismatch .OR. (isize_out /= shape_out(xdim)) + set_mismatch = set_mismatch .OR. (jsize_out /= shape_out(ydim)) + set_mismatch = set_mismatch .OR. (ke_out /= shape_out(zdim)) + endif + + if (set_mismatch) then + write(text,'(i0)') l_size + call mpp_error(FATAL, 'MPP_REDISTRIBUTE_REGISTER_3D_: incompatible grouped field at count ' // trim(text)) + endif + + endif + + if (.not. do_redist) return + + if (present(dc_handle)) d_comm => dc_handle + + if (.not. associated(d_comm)) then + + d_comm => mpp_redistribute_init_comm(domain_in, l_addrs_in(1:l_size), domain_out, l_addrs_out(1:l_size), & + isize_in, jsize_in, ke_in, isize_out, jsize_out, ke_out ) + + if (present(dc_handle)) dc_handle => d_comm + + endif + + call mpp_do_redistribute(l_addrs_in(1:l_size), l_addrs_out(1:l_size), d_comm, d_type, & + & axis_to_storage_in = axis_to_storage, nchunks = l_nchunks(1:l_size), & + & chunk_shape_in = l_shape_in(:,1:l_size), chunk_shape_out = l_shape_out(:,1:l_size) ) + + !-------------------------------------------------------------------- + ! Reset grouped state. + !-------------------------------------------------------------------- + l_size = 0 + l_addrs_in = -9999 + l_addrs_out = -9999 + + l_nchunks = 1 + l_shape_in = 0 + l_shape_out = 0 + + isize_in = 0 + jsize_in = 0 + ke_in = 0 + + isize_out = 0 + jsize_out = 0 + ke_out = 0 + + saved_axis_to_storage = (/1,2,3/) + d_comm => NULL() + + end subroutine MPP_REDISTRIBUTE_REGISTER_ + subroutine MPP_REDISTRIBUTE_2D_( domain_in, field_in, domain_out, field_out, complete, free, list_size, & - & dc_handle, position ) + & dc_handle, position, axis_to_storage_in ) type(domain2D), intent(in) :: domain_in, domain_out - MPP_TYPE_, intent(in) :: field_in (:,:) - MPP_TYPE_, intent(out) :: field_out(:,:) + MPP_TYPE_, intent(in) :: field_in(:,:) + MPP_TYPE_, intent(out) :: field_out(:,:) + logical, intent(in), optional :: complete, free integer, intent(in), optional :: list_size integer, intent(in), optional :: position - MPP_TYPE_ :: field3D_in (size(field_in, 1),size(field_in, 2),1) - MPP_TYPE_ :: field3D_out(size(field_out,1),size(field_out,2),1) - type(DomainCommunicator2D),pointer,optional :: dc_handle - pointer( ptr_in, field3D_in ) - pointer( ptr_out, field3D_out ) - - field_out = 0 - ptr_in = 0 - ptr_out = 0 - if(domain_in%initialized) ptr_in = LOC(field_in ) - if(domain_out%initialized) ptr_out = LOC(field_out) - call mpp_redistribute( domain_in, field3D_in, domain_out, field3D_out, complete, free, list_size, & - & dc_handle, position ) + integer, intent(in), optional :: axis_to_storage_in(2) + + type(DomainCommunicator2D), pointer, optional :: dc_handle + + integer(i8_kind) :: addr_in, addr_out + integer :: shape_in(3), shape_out(3) + integer :: axis_to_storage(3) + integer :: nchunks + + ! Logical x/y may occupy storage dims 1 and 2 in either order. + ! The third dimension is an internal singleton carried dimension. + axis_to_storage = (/1,2,3/) + if (present(axis_to_storage_in)) then + if (any(axis_to_storage_in < 1) .OR. any(axis_to_storage_in > 2) .OR. & + axis_to_storage_in(1) == axis_to_storage_in(2)) then + call mpp_error(FATAL, & + 'MPP_REDISTRIBUTE_2D_: invalid axis_to_storage permutation') + endif + + axis_to_storage(1:2) = axis_to_storage_in + endif - return - end subroutine MPP_REDISTRIBUTE_2D_ + nchunks = 1 + + addr_in = 0 + addr_out = 0 + if (domain_in%initialized) addr_in = LOC(field_in) + if (domain_out%initialized) addr_out = LOC(field_out) + + shape_in = (/ size(field_in,1), & + & size(field_in,2), & + & 1 /) + + shape_out = (/ size(field_out,1), & + & size(field_out,2), & + & 1 /) + + if (domain_out%initialized) field_out = 0 + + call MPP_REDISTRIBUTE_REGISTER_( domain_in = domain_in, addr_in = addr_in, shape_in = shape_in, & + & domain_out = domain_out, addr_out = addr_out, shape_out = shape_out, & + & nchunks = nchunks, complete = complete, free = free, & + & list_size = list_size, dc_handle = dc_handle, position = position, & + & axis_to_storage_in = axis_to_storage ) + + end subroutine MPP_REDISTRIBUTE_2D_ subroutine MPP_REDISTRIBUTE_3D_( domain_in, field_in, domain_out, field_out, complete, free, list_size, & - & dc_handle, position ) + & dc_handle, position, axis_to_storage_in ) type(domain2D), intent(in) :: domain_in, domain_out - MPP_TYPE_, intent(in) :: field_in (:,:,:) - MPP_TYPE_, intent(out) :: field_out(:,:,:) + MPP_TYPE_, intent(in) :: field_in(:,:,:) + MPP_TYPE_, intent(out) :: field_out(:,:,:) + logical, intent(in), optional :: complete, free integer, intent(in), optional :: list_size integer, intent(in), optional :: position - type(DomainCommunicator2D),pointer,optional :: dc_handle - type(DomainCommunicator2D),pointer,save :: d_comm =>NULL() - logical :: do_redist,free_comm - integer :: lsize - integer(i8_kind),dimension(MAX_DOMAIN_FIELDS),save :: l_addrs_in=-9999, l_addrs_out=-9999 - integer, save :: isize_in=0,jsize_in=0,ke_in=0,l_size=0 - integer, save :: isize_out=0,jsize_out=0,ke_out=0 - logical :: set_mismatch - integer :: ke - character(len=2) :: text - MPP_TYPE_ :: d_type - integer(i8_kind) :: floc_in, floc_out + integer, intent(in), optional :: axis_to_storage_in(3) - floc_in = 0 - floc_out = 0 - if(domain_in%initialized) floc_in = LOC(field_in) - if(domain_out%initialized) floc_out = LOC(field_out) + type(DomainCommunicator2D), pointer, optional :: dc_handle - if(present(position)) then - if(position .NE. CENTER) call mpp_error( FATAL, & - 'MPP_REDISTRIBUTE_3Dold_: only position = CENTER is implemented, contact author') - endif + integer(i8_kind) :: addr_in, addr_out + integer :: shape_in(3), shape_out(3) + integer :: axis_to_storage(3) + integer :: nchunks - do_redist=.true.; if(PRESENT(complete))do_redist=complete - free_comm=.false.; if(PRESENT(free))free_comm=free - if(free_comm)then - l_addrs_in(1) = floc_in; l_addrs_out(1) = floc_out - if(l_addrs_out(1)>0)then - ke = size(field_out,3) - else - ke = size(field_in,3) - end if - lsize=1; if(PRESENT(list_size))lsize=list_size - call mpp_redistribute_free_comm(domain_in,l_addrs_in(1),domain_out,l_addrs_out(1),ke,lsize) - else - l_size = l_size+1 - if(l_size > MAX_DOMAIN_FIELDS)then - write( text,'(i2)' ) MAX_DOMAIN_FIELDS - call mpp_error(FATAL,'MPP_REDISTRIBUTE_3D: MAX_DOMAIN_FIELDS='//text//' exceeded for group redistribute.' ) - end if - l_addrs_in(l_size) = floc_in; l_addrs_out(l_size) = floc_out - if(l_size == 1)then - if(l_addrs_in(l_size) > 0)then - isize_in=size(field_in,1); jsize_in=size(field_in,2); ke_in = size(field_in,3) - end if - if(l_addrs_out(l_size) > 0)then - isize_out=size(field_out,1); jsize_out=size(field_out,2); ke_out = size(field_out,3) - endif - else - set_mismatch = .false. - set_mismatch = l_addrs_in(l_size) == 0 .AND. l_addrs_in(l_size-1) /= 0 - set_mismatch = set_mismatch .OR. (l_addrs_in(l_size) > 0 .AND. l_addrs_in(l_size-1) == 0) - set_mismatch = set_mismatch .OR. (l_addrs_out(l_size) == 0 .AND. l_addrs_out(l_size-1) /= 0) - set_mismatch = set_mismatch .OR. (l_addrs_out(l_size) > 0 .AND. l_addrs_out(l_size-1) == 0) - if(l_addrs_in(l_size) > 0)then - set_mismatch = set_mismatch .OR. (isize_in /= size(field_in,1)) - set_mismatch = set_mismatch .OR. (jsize_in /= size(field_in,2)) - set_mismatch = set_mismatch .OR. (ke_in /= size(field_in,3)) - endif - if(l_addrs_out(l_size) > 0)then - set_mismatch = set_mismatch .OR. (isize_out /= size(field_out,1)) - set_mismatch = set_mismatch .OR. (jsize_out /= size(field_out,2)) - set_mismatch = set_mismatch .OR. (ke_out /= size(field_out,3)) - endif - if(set_mismatch)then - write( text,'(i2)' ) l_size - call mpp_error(FATAL,'MPP_REDISTRIBUTE_3D: Incompatible field at count '// & - & text//' for group redistribute.' ) - endif - endif - if(do_redist)then - if(PRESENT(dc_handle))d_comm =>dc_handle ! User has kept pointer to d_comm - if(.not.ASSOCIATED(d_comm))then ! d_comm needs initialization or lookup - d_comm =>mpp_redistribute_init_comm(domain_in,l_addrs_in(1:l_size),domain_out,l_addrs_out(1:l_size), & - isize_in,jsize_in,ke_in,isize_out,jsize_out,ke_out) - if(PRESENT(dc_handle))dc_handle =>d_comm ! User wants to keep pointer to d_comm - endif - call mpp_do_redistribute( l_addrs_in(1:l_size), l_addrs_out(1:l_size), d_comm, d_type ) - l_size=0; l_addrs_in=-9999; l_addrs_out=-9999 - isize_in=0; jsize_in=0; ke_in=0 - isize_out=0; jsize_out=0; ke_out=0 - d_comm =>NULL() - endif + axis_to_storage = (/1,2,3/) + if (present(axis_to_storage_in)) axis_to_storage = axis_to_storage_in + + if (any(axis_to_storage < 1) .OR. any(axis_to_storage > 3) .OR. & + axis_to_storage(1) == axis_to_storage(2) .OR. & + axis_to_storage(1) == axis_to_storage(3) .OR. & + axis_to_storage(2) == axis_to_storage(3)) then + call mpp_error(FATAL, 'MPP_REDISTRIBUTE_3D_: invalid axis_to_storage permutation') endif - end subroutine MPP_REDISTRIBUTE_3D_ + nchunks = 1 + + addr_in = 0 + addr_out = 0 + + if (domain_in%initialized) addr_in = LOC(field_in) + if (domain_out%initialized) addr_out = LOC(field_out) + shape_in = (/ size(field_in,1), & + & size(field_in,2), & + & size(field_in,3) /) + + shape_out = (/ size(field_out,1), & + & size(field_out,2), & + & size(field_out,3) /) + + if (domain_out%initialized) field_out = 0 + + call MPP_REDISTRIBUTE_REGISTER_( domain_in = domain_in, addr_in = addr_in, shape_in = shape_in, & + & domain_out = domain_out, addr_out = addr_out, shape_out = shape_out, & + & nchunks = nchunks, complete = complete, free = free, & + & list_size = list_size, dc_handle = dc_handle, position = position, & + & axis_to_storage_in = axis_to_storage ) + + end subroutine MPP_REDISTRIBUTE_3D_ subroutine MPP_REDISTRIBUTE_4D_( domain_in, field_in, domain_out, field_out, complete, free, list_size, & - & dc_handle, position ) + & dc_handle, position, axis_to_storage_in ) type(domain2D), intent(in) :: domain_in, domain_out - MPP_TYPE_, intent(in) :: field_in (:,:,:,:) - MPP_TYPE_, intent(out) :: field_out(:,:,:,:) + MPP_TYPE_, intent(in) :: field_in(:,:,:,:) + MPP_TYPE_, intent(out) :: field_out(:,:,:,:) + logical, intent(in), optional :: complete, free integer, intent(in), optional :: list_size integer, intent(in), optional :: position - MPP_TYPE_ :: field3D_in (size(field_in, 1),size(field_in, 2),size(field_in ,3)*size(field_in ,4)) - MPP_TYPE_ :: field3D_out(size(field_out,1),size(field_out,2),size(field_out,3)*size(field_out,4)) - type(DomainCommunicator2D),pointer,optional :: dc_handle - pointer( ptr_in, field3D_in ) - pointer( ptr_out, field3D_out ) - - field_out = 0 - ptr_in = 0 - ptr_out = 0 - if(domain_in%initialized) ptr_in = LOC(field_in ) - if(domain_out%initialized) ptr_out = LOC(field_out) - call mpp_redistribute( domain_in, field3D_in, domain_out, field3D_out, complete, free, list_size, & - & dc_handle, position ) + integer, intent(in), optional :: axis_to_storage_in(3) + + type(DomainCommunicator2D), pointer, optional :: dc_handle + + integer(i8_kind) :: addr_in, addr_out + integer :: shape_in(3), shape_out(3) + integer :: axis_to_storage(3) + integer :: nchunks + + axis_to_storage = (/1,2,3/) + if (present(axis_to_storage_in)) axis_to_storage = axis_to_storage_in + + nchunks = size(field_in,4) + + if (nchunks /= size(field_out,4)) then + call mpp_error(FATAL, 'MPP_REDISTRIBUTE_4D_: input and output fourth extents differ') + endif + + if (nchunks < 1) then + call mpp_error(FATAL, 'MPP_REDISTRIBUTE_4D_: fourth dimension must be nonempty') + endif + + addr_in = 0 + addr_out = 0 + + if (domain_in%initialized) addr_in = LOC(field_in) + if (domain_out%initialized) addr_out = LOC(field_out) + + ! Each fixed-fourth-index section field(:,:,:,l) is one contiguous + ! 3D chunk. Logical x/y/z may be mapped to any of these three + ! storage dimensions through axis_to_storage. + shape_in = (/ size(field_in,1), & + & size(field_in,2), & + & size(field_in,3) /) + + shape_out = (/ size(field_out,1), & + & size(field_out,2), & + & size(field_out,3) /) + + if (domain_out%initialized) field_out = 0 + + call MPP_REDISTRIBUTE_REGISTER_( domain_in = domain_in, addr_in = addr_in, shape_in = shape_in, & + & domain_out = domain_out, addr_out = addr_out, shape_out = shape_out, & + & nchunks = nchunks, complete = complete, free = free, & + & list_size = list_size, dc_handle = dc_handle, position = position, & + & axis_to_storage_in = axis_to_storage ) - return end subroutine MPP_REDISTRIBUTE_4D_ subroutine MPP_REDISTRIBUTE_5D_( domain_in, field_in, domain_out, field_out, complete, free, list_size, & - & dc_handle, position ) + & dc_handle, position, axis_to_storage_in ) type(domain2D), intent(in) :: domain_in, domain_out - MPP_TYPE_, intent(in) :: field_in (:,:,:,:,:) - MPP_TYPE_, intent(out) :: field_out(:,:,:,:,:) + MPP_TYPE_, intent(in) :: field_in(:,:,:,:,:) + MPP_TYPE_, intent(out) :: field_out(:,:,:,:,:) + logical, intent(in), optional :: complete, free integer, intent(in), optional :: list_size integer, intent(in), optional :: position - MPP_TYPE_ :: field3D_in (size(field_in, 1),size(field_in, 2), & - & size(field_in ,3)*size(field_in,4)*size(field_in ,5)) - MPP_TYPE_ :: field3D_out(size(field_out,1),size(field_out,2), & - & size(field_out,3)*size(field_out,4)*size(field_out,5)) - - type(DomainCommunicator2D),pointer,optional :: dc_handle - pointer( ptr_in, field3D_in ) - pointer( ptr_out, field3D_out ) - - field_out = 0 - ptr_in = 0 - ptr_out = 0 - if(domain_in%initialized) ptr_in = LOC(field_in ) - if(domain_out%initialized) ptr_out = LOC(field_out) - call mpp_redistribute( domain_in, field3D_in, domain_out, field3D_out, complete, free, list_size, & - & dc_handle, position ) + integer, intent(in), optional :: axis_to_storage_in(3) + + type(DomainCommunicator2D), pointer, optional :: dc_handle + + integer(i8_kind) :: addr_in, addr_out + integer :: shape_in(3), shape_out(3) + integer :: axis_to_storage(3) + integer :: nchunks + + axis_to_storage = (/1,2,3/) + if (present(axis_to_storage_in)) axis_to_storage = axis_to_storage_in + + if (any(axis_to_storage < 1) .OR. any(axis_to_storage > 3) .OR. & + axis_to_storage(1) == axis_to_storage(2) .OR. & + axis_to_storage(1) == axis_to_storage(3) .OR. & + axis_to_storage(2) == axis_to_storage(3)) then + call mpp_error(FATAL, 'MPP_REDISTRIBUTE_5D_: invalid axis_to_storage permutation') + endif + + if (size(field_in,4) /= size(field_out,4)) then + call mpp_error(FATAL, 'MPP_REDISTRIBUTE_5D_: input and output fourth extents differ') + endif + + if (size(field_in,5) /= size(field_out,5)) then + call mpp_error(FATAL, 'MPP_REDISTRIBUTE_5D_: input and output fifth extents differ') + endif + + if (size(field_in,4) < 1 .OR. size(field_in,5) < 1) then + call mpp_error(FATAL, 'MPP_REDISTRIBUTE_5D_: fourth and fifth dimensions must be nonempty') + endif + + nchunks = size(field_in,4) * size(field_in,5) + + addr_in = 0 + addr_out = 0 + + if (domain_in%initialized) addr_in = LOC(field_in) + if (domain_out%initialized) addr_out = LOC(field_out) + + ! Each fixed-(fourth,fifth)-index section field(:,:,:,l,m) + ! is one contiguous 3D chunk. + shape_in = (/ size(field_in,1), & + & size(field_in,2), & + & size(field_in,3) /) + + shape_out = (/ size(field_out,1), & + & size(field_out,2), & + & size(field_out,3) /) + + if (domain_out%initialized) field_out = 0 + + call MPP_REDISTRIBUTE_REGISTER_( domain_in = domain_in, addr_in = addr_in, shape_in = shape_in, & + & domain_out = domain_out, addr_out = addr_out, shape_out = shape_out, & + & nchunks = nchunks, complete = complete, free = free, & + & list_size = list_size, dc_handle = dc_handle, position = position, & + & axis_to_storage_in = axis_to_storage ) - return end subroutine MPP_REDISTRIBUTE_5D_ #ifdef VECTOR_FIELD_ diff --git a/test_fms/mpp/test_mpp_domains.F90 b/test_fms/mpp/test_mpp_domains.F90 index e681173944..c6cbaebd8f 100644 --- a/test_fms/mpp/test_mpp_domains.F90 +++ b/test_fms/mpp/test_mpp_domains.F90 @@ -83,6 +83,7 @@ program test_mpp_domains logical :: test_nonsym_edge = .false. logical :: test_group = .false. logical :: test_cubic_grid_redistribute = .false. + logical :: test_cubic_grid_redistribute_generalized_indices = .false. logical :: check_parallel = .FALSE. logical :: test_get_nbr = .FALSE. logical :: test_boundary = .false. @@ -109,7 +110,8 @@ program test_mpp_domains warn_level, wide_halo_x, wide_halo_y, nx_cubic, ny_cubic, & test_performance, test_interface, num_fields, do_sleep, num_iter, & mix_2D_3D, test_get_nbr, test_edge_update, & - test_cubic_grid_redistribute, ensemble_size, layout_cubic, & + test_cubic_grid_redistribute, test_cubic_grid_redistribute_generalized_indices, & + ensemble_size, layout_cubic, & layout_ensemble, nthreads, test_boundary, layout_tripolar, & test_group, test_global_sum, test_subset, test_nonsym_edge, & test_halosize_performance, test_adjoint, wide_halo, & @@ -202,6 +204,16 @@ program test_mpp_domains & '--------------------> Finished cubic_grid_redistribute <-------------------' endif + if( test_cubic_grid_redistribute_generalized_indices ) then + if (mpp_pe() == mpp_root_pe()) print *, & + & '--------------------> Calling cubic_grid_redistribute_generalized_indices <-------------------' + if (mpp_pe() == mpp_root_pe()) print *, & + & 'Storage Layout = (3,1,2)' + call cubic_grid_redistribute((/3,1,2/)) + if (mpp_pe() == mpp_root_pe()) print *, & + & '--------------------> Finished cubic_grid_redistribute_generalized_indices <-------------------' + endif + if(test_boundary) then if (mpp_pe() == mpp_root_pe()) print *, '--------------------> Calling test_boundary <-------------------' call test_get_boundary('torus') @@ -598,7 +610,8 @@ end subroutine test_redistribute !####################################################################### - subroutine cubic_grid_redistribute + subroutine cubic_grid_redistribute(axis_to_storage_in) + integer, intent(in), optional :: axis_to_storage_in(3) integer :: npes, npes_per_ensemble, npes_per_tile integer :: ensemble_id, tile_id, ensemble_tile_id @@ -607,6 +620,8 @@ subroutine cubic_grid_redistribute integer :: isd_ens, ied_ens, jsd_ens, jed_ens integer :: isc, iec, jsc, jec integer :: isd, ied, jsd, jed + integer :: sc(3), ec(3), sc_ens(3), ec_ens(3) + integer :: sd(3), ed(3), sd_ens(3), ed_ens(3) integer, allocatable :: my_ensemble_pelist(:), pe_start(:), pe_end(:) integer, allocatable :: global_indices(:,:), layout2D(:,:) real, allocatable :: x(:,:,:,:), x_ens(:,:,:), y(:,:,:) @@ -614,6 +629,34 @@ subroutine cubic_grid_redistribute type(domain2D) :: domain type(domain2D), allocatable :: domain_ensemble(:) character(len=128) :: mesg + integer :: axis_to_storage(3), storage_to_axis(3) + integer :: i_dim, j_dim, k_dim + + !------------------------------------------------------------------- + ! axis_to_storage maps logical axis order to storage layout + ! axis_to_storage = (3,1,2) maps + ! x -> 3rd dim + ! y -> 1st dim + ! z -> 2nd dim + ! This map is passed on to MPP_REDISTRIBUTE to select storage + ! dim by logical axis desired + ! The inverse map is defined by storage_to_axis(axis_to_storage)=(1,2,3) + ! maps storage layout to logical axis order and is used in this + ! routine to select the correct logical start/stop indices when + ! allocating storage arrays. + !------------------------------------------------------------------- + axis_to_storage = (/1,2,3/) + if (present(axis_to_storage_in)) axis_to_storage = axis_to_storage_in + + ! Inverse Map + storage_to_axis(axis_to_storage(1))=1 + storage_to_axis(axis_to_storage(2))=2 + storage_to_axis(axis_to_storage(3))=3 + + ! Selectors for logical indices by storage dim + i_dim = storage_to_axis(1) + j_dim = storage_to_axis(2) + k_dim = storage_to_axis(3) ! --- set up pelist npes = mpp_npes() @@ -698,44 +741,55 @@ subroutine cubic_grid_redistribute call mpp_get_data_domain( domain, isd, ied, jsd, jed) call mpp_get_compute_domain( domain, isc, iec, jsc, jec) - allocate(x_ens(isd_ens:ied_ens, jsd_ens:jed_ens, nz)) - allocate(x(isd:ied, jsd:jed, nz, ensemble_size)) - allocate(y(isd:ied, jsd:jed, nz)) + sd_ens = (/isd_ens, jsd_ens, 1/) + ed_ens = (/ied_ens, jed_ens, nz/) + allocate(x_ens(sd_ens(i_dim):ed_ens(i_dim), sd_ens(j_dim):ed_ens(j_dim), sd_ens(k_dim):ed_ens(k_dim) )) + + sd = (/isd, jsd, 1/) + ed = (/ied, jed, nz/) + allocate(x(sd(i_dim):ed(i_dim), sd(j_dim):ed(j_dim), sd(k_dim):ed(k_dim), ensemble_size)) + allocate(y(sd(i_dim):ed(i_dim), sd(j_dim):ed(j_dim), sd(k_dim):ed(k_dim) )) + print *, 'shape x:', shape(x) + sc_ens = (/isc_ens, jsc_ens, 1/) + ec_ens = (/iec_ens, jec_ens, nz/) x = 0 - do k = 1, nz - do j = jsc_ens, jec_ens - do i = isc_ens, iec_ens + do k = sc_ens(k_dim), ec_ens(k_dim) + do j = sc_ens(j_dim), ec_ens(j_dim) + do i = sc_ens(i_dim), ec_ens(i_dim) x_ens(i,j,k) = ensemble_id *1e6 + ensemble_tile_id*1e3 + i + j * 1.e-3 + k * 1.e-6 enddo enddo enddo + sc = (/isc, jsc, 1/) + ec = (/iec, jec, nz/) do n = 1, ensemble_size x = 0 - call mpp_redistribute( domain_ensemble(n), x_ens, domain, x(:,:,:,n) ) + call mpp_redistribute( domain_ensemble(n), x_ens, domain, x(:,:,:,n), axis_to_storage_in=axis_to_storage ) y = 0 - do k = 1, nz - do j = jsc, jec - do i = isc, iec + do k = sc(k_dim), ec(k_dim) + do j = sc(j_dim), ec(j_dim) + do i = sc(i_dim), ec(i_dim) y(i,j,k) = n *1e6 + tile_id*1e3 + i + j * 1.e-3 + k * 1.e-6 enddo enddo enddo write(mesg,'(a,i4)') "cubic_grid redistribute from ensemble", n - call compare_checksums( x(isc:iec,jsc:jec,:,n), y(isc:iec,jsc:jec,:), trim(mesg) ) + call compare_checksums( x(sc(i_dim):ec(i_dim),sc(j_dim):ec(j_dim),sc(k_dim):ec(k_dim),n), & + & y(sc(i_dim):ec(i_dim),sc(j_dim):ec(j_dim),sc(k_dim):ec(k_dim)), trim(mesg) ) enddo ! redistribute data to each ensemble. deallocate(x,y,x_ens) - allocate(x(isd:ied, jsd:jed, nz, ensemble_size)) - allocate(x_ens(isd_ens:ied_ens, jsd_ens:jed_ens, nz)) - allocate(y(isd_ens:ied_ens, jsd_ens:jed_ens, nz)) + allocate(x(sd(i_dim):ed(i_dim), sd(j_dim):ed(j_dim), sd(k_dim):ed(k_dim), ensemble_size)) + allocate(x_ens(sd_ens(i_dim):ed_ens(i_dim), sd_ens(j_dim):ed_ens(j_dim), sd_ens(k_dim):ed_ens(k_dim) )) + allocate(y(sd_ens(i_dim):ed_ens(i_dim), sd_ens(j_dim):ed_ens(j_dim), sd_ens(k_dim):ed_ens(k_dim) )) y = 0 - do k = 1, nz - do j = jsc, jec - do i = isc, iec + do k = sc(k_dim), ec(k_dim) + do j = sc(j_dim), ec(j_dim) + do i = sc(i_dim), ec(i_dim) x(i,j,k,:) = i + j * 1.e-3 + k * 1.e-6 enddo enddo @@ -743,20 +797,21 @@ subroutine cubic_grid_redistribute do n = 1, ensemble_size x_ens = 0 - call mpp_redistribute(domain, x(:,:,:,n), domain_ensemble(n), x_ens) + call mpp_redistribute(domain, x(:,:,:,n), domain_ensemble(n), x_ens, axis_to_storage_in=axis_to_storage) y = 0 if( ensemble_id == n ) then - do k = 1, nz - do j = jsc_ens, jec_ens - do i = isc_ens, iec_ens + do k = sc_ens(k_dim), ec_ens(k_dim) + do j = sc_ens(j_dim), ec_ens(j_dim) + do i = sc_ens(i_dim), ec_ens(i_dim) y(i,j,k) = i + j * 1.e-3 + k * 1.e-6 enddo enddo enddo endif write(mesg,'(a,i4)') "cubic_grid redistribute to ensemble", n - call compare_checksums( x_ens(isc_ens:iec_ens,jsc_ens:jec_ens,:), y(isc_ens:iec_ens,jsc_ens:jec_ens,:), & - & trim(mesg) ) + call compare_checksums( x_ens(sc_ens(i_dim):ec_ens(i_dim),sc_ens(j_dim):ec_ens(j_dim),sc_ens(k_dim):ec_ens(k_dim)), & + & y(sc_ens(i_dim):ec_ens(i_dim),sc_ens(j_dim):ec_ens(j_dim),sc_ens(k_dim):ec_ens(k_dim)), & + & trim(mesg) ) enddo deallocate(x, y, x_ens) diff --git a/test_fms/mpp/test_mpp_domains2.sh b/test_fms/mpp/test_mpp_domains2.sh index a9b26f2cd3..a87189640c 100755 --- a/test_fms/mpp/test_mpp_domains2.sh +++ b/test_fms/mpp/test_mpp_domains2.sh @@ -28,7 +28,7 @@ . ../test-lib.sh # TODO: Enable this test once generalized indices work is complete -SKIP_TESTS="test_mpp_domains2.12" +SKIP_TESTS="test_mpp_domains2.13" # TODO edge update, fails on non-blocking with gnu #SKIP_TESTS="$SKIP_TESTS $(basename $0 .sh).6" @@ -66,6 +66,7 @@ mix_2D_3D = .false. test_get_nbr = .false. test_edge_update = .false. test_cubic_grid_redistribute = .false. +test_cubic_grid_redistribute_generalized_indices = .false. ensemble_size = 1 layout_cubic = 0,0 layout_ensemble = 0,0 @@ -116,6 +117,10 @@ sed "s/test_cubic_grid_redistribute = .false./test_cubic_grid_redistribute = .tr test_expect_success "cubic grid redistribute" ' mpirun -n 6 ../test_mpp_domains ' +sed "s/test_cubic_grid_redistribute_generalized_indices = .false./test_cubic_grid_redistribute_generalized_indices = .true./" input_base.nml > input.nml +test_expect_success "cubic grid redistribute_generalized_indices" ' + mpirun -n 6 ../test_mpp_domains +' sed "s/test_boundary = .false./test_boundary = .true./" input_base.nml > input.nml test_expect_success "boundary" ' mpirun -n 6 ../test_mpp_domains