Mod function sometimes returns wrong behavior to evaluate equatorial symmetry. It happens when intel compiler is used with AVX512 related options.
Here is the subroutine in sym_rj_base_field.f90
!
subroutine decomp_rj_scalar(sph_rj, &
& rj_fld, ipol_fld, ipol_sym, ipol_asym)
!
integer(kind = kint), intent(in) :: ipol_fld, ipol_sym, ipol_asym
type(sph_rj_grid), intent(in) :: sph_rj
type(phys_data), intent(inout) :: rj_fld
!
integer(kind = kint) :: inod, l_gl, m_gl, j, lm_odd
!
!
if(ipol_sym .le. 0 .and. ipol_asym .le. 0 ) return
! write(,) 'sym scalar', ipol_sym
! write(,) 'asym scalar', ipol_asym
!$omp parallel do private(inod, l_gl, m_gl, lm_odd)
do inod = 1, sph_rj%nnod_rj
l_gl = aint(sqrt(real(sph_rj%idx_global_rj(inod,2))))
m_gl = sph_rj%idx_global_rj(inod,2) - l_gl*(l_gl + 1)
!
! Failed by Intel compiler with AVX-512 options
! lm_odd = mod(l_gl - abs(m_gl), 2)
!
! It works by using Intel compiler with AVX-512 options
lm_odd = (l_gl - abs(m_gl)+1)/2 - (l_gl - abs(m_gl))/2
!
rj_fld%d_fld(inod, ipol_sym) &
& = dble(1-lm_odd) * rj_fld%d_fld(inod, ipol_fld)
rj_fld%d_fld(inod, ipol_asym) &
& = dble(lm_odd) * rj_fld%d_fld(inod, ipol_fld)
end do
!$omp end parallel do
!
do inod = 1, sph_rj%nnod_rj
j = sph_rj%idx_global_rj(inod,2)
l_gl = aint(sqrt(real(sph_rj%idx_global_rj(inod,2))))
m_gl = sph_rj%idx_global_rj(inod,2) - l_gl*(l_gl + 1)
if(j .eq. 11663) &
& write(,) 'splitted', sph_rj%idx_global_rj(inod,1:2), &
& l_gl, m_gl, rj_fld%d_fld(inod, ipol_fld), &
& rj_fld%d_fld(inod, ipol_sym), rj_fld%d_fld(inod, ipol_asym)
if(j .eq. 11664) &
& write(,) 'splitted', sph_rj%idx_global_rj(inod,1:2), &
& l_gl, m_gl, rj_fld%d_fld(inod, ipol_fld), &
& rj_fld%d_fld(inod, ipol_sym), rj_fld%d_fld(inod, ipol_asym)
if(j .eq. 2916) &
& write(,) 'splitted', sph_rj%idx_global_rj(inod,1:2), &
& l_gl, m_gl, rj_fld%d_fld(inod, ipol_fld), &
& rj_fld%d_fld(inod, ipol_sym), rj_fld%d_fld(inod, ipol_asym)
if(j .eq. 729) &
& write(,) 'splitted', sph_rj%idx_global_rj(inod,1:2), &
& l_gl, m_gl, rj_fld%d_fld(inod, ipol_fld), &
& rj_fld%d_fld(inod, ipol_sym), rj_fld%d_fld(inod, ipol_asym)
if(j .eq. 729) &
& write(,) 'splitted', sph_rj%idx_global_rj(inod,1:2), &
& l_gl, m_gl, rj_fld%d_fld(inod, ipol_fld), &
& rj_fld%d_fld(inod, ipol_sym), rj_fld%d_fld(inod, ipol_asym)
if(j .eq. 961) &
& write(,) 'splitted', sph_rj%idx_global_rj(inod,1:2), &
& l_gl, m_gl, rj_fld%d_fld(inod, ipol_fld), &
& rj_fld%d_fld(inod, ipol_sym), rj_fld%d_fld(inod, ipol_asym)
end do
!
end subroutine decomp_rj_scalar
!
In correct case, the log should be
Warning: no access to tty (Bad file descriptor).
Thus no job control in this shell.
term: Undefined variable.
/home/matsui/kemo_b/sph_shell_139_4
Intel(R) Parallel Studio XE 2019 Update 4 for Linux*
Copyright (C) 2009-2019 Intel Corporation. All rights reserved.
...
spherical harmonics table exists
Read gzipped merged ascii grid file: sph_lm159t240r160c_ic_80_8/in.rtp.gz
Read gzipped merged ascii spectr modes file:
sph_lm159t240r160c_ic_80_8/in.rj.gz
Read gzipped merged ascii grid file: sph_lm159t240r160c_ic_80_8/in.rtm.gz
Read gzipped merged ascii spectr modes file:
sph_lm159t240r160c_ic_80_8/in.rlm.gz
Read gzipped merged mesh file: sph_lm159t240r160c_ic_80_8/in.gfm.gz
Failed export by set_element_export_item 0
edge position is successfully syncronizad
coefficient for temperature: 1.00000000000000
coefficient for thermal diffusion: 1.00000000000000
coefficient for heat source: 3.20000000000000
coefficient for velocity: 1.00000000000000
coefficient for pressure: 20000.0000000000
coefficient for viscous diffusion: 1.00000000000000
coefficient for buoyancy: 26000000.6500000
coefficient for coriolis force: 40000.0000000000
coefficient for Lorentz force: 20000.0000000000
coefficient for magnetic field: 1.00000000000000
coefficient for magnetic potential: 1.00000000000000
coefficient for magnetic diffusion: 1.00000000000000
coefficient for induction: 1.00000000000000
check rtp -> rtm
check rtm -> rtp
check rj -> rlm
check rlm -> rj
check rtp -> rtm
check rtm -> rtp
check rj -> rlm
check rlm -> rj
test send_recv with reg. import
test send_recv with rev. import
0: Time by reg. import list: 9.878099678317086E-002
1: Time by rev. import list: 9.351379003783222E-002
Vector length for Legendre transform: 0
Block number for meridinal grid: 1
Use single transform in FFTW
Selected Fourier transform: FFTW ( 12)
Use single transform in FFTW
Communication mode for sph. transform: 1 (reversed_table)
Selected Legendre transform type: 4 (SYMMETRIC_MATMUL_BIG)
Read gzipped ascii data by MPI-IO: rst_80/rst.6500.fst.gz
Read 1 velocity
Read 2 temperature
Read 3 magnetic_field
Read 4 heat_source
Read 5 previous_momentum
Read 6 previous_induction
Read 7 previous_heat
splitted 1 729 27 -27
8.939645680558843E-034 8.939645680558843E-034 0.000000000000000E+000
splitted 1 729 27 -27
8.939645680558843E-034 8.939645680558843E-034 0.000000000000000E+000
splitted 2 729 27 -27
3.387224435688200E-031 3.387224435688200E-031 0.000000000000000E+000
splitted 2 729 27 -27
3.387224435688200E-031 3.387224435688200E-031 0.000000000000000E+000
splitted 3 729 27 -27
4.308479018990698E-029 4.308479018990698E-029 0.000000000000000E+000
splitted 3 729 27 -27
4.308479018990698E-029 4.308479018990698E-029 0.000000000000000E+000
but, in the failed case, data was
Warning: no access to tty (Bad file descriptor).
Thus no job control in this shell.
term: Undefined variable.
/home/matsui/kemo_b/sph_shell_139_4
Intel(R) Parallel Studio XE 2019 Update 4 for Linux*
Copyright (C) 2009-2019 Intel Corporation. All rights reserved.
Simulation start: PE. 5
Simulation start: PE. 20
Simulation start: PE. 60
Simulation start: PE. 8
Simulation start: PE. 21
Simulation start: PE. 61
Simulation start: PE. 40
Simulation start: PE. 9
Simulation start: PE. 22
Simulation start: PE. 62
Simulation start: PE. 41
Simulation start: PE. 0
Simulation start: PE. 23
Simulation start: PE. 63
Simulation start: PE. 42
Simulation start: PE. 1
Simulation start: PE. 24
Simulation start: PE. 64
Simulation start: PE. 43
Simulation start: PE. 2
Simulation start: PE. 25
Simulation start: PE. 65
Simulation start: PE. 44
Simulation start: PE. 3
Simulation start: PE. 26
Simulation start: PE. 66
Simulation start: PE. 45
Simulation start: PE. 4
Simulation start: PE. 27
Simulation start: PE. 67
Simulation start: PE. 46
Simulation start: PE. 6
Simulation start: PE. 28
Simulation start: PE. 68
Simulation start: PE. 47
Simulation start: PE. 7
Simulation start: PE. 29
Simulation start: PE. 69
Simulation start: PE. 48
Simulation start: PE. 49
Simulation start: PE. 55
Simulation start: PE. 56
Simulation start: PE. 58
Simulation start: PE. 59
Simulation start: PE. 50
Simulation start: PE. 51
Simulation start: PE. 10
Simulation start: PE. 52
Simulation start: PE. 11
Simulation start: PE. 53
Simulation start: PE. 12
Simulation start: PE. 54
Simulation start: PE. 13
Simulation start: PE. 57
Simulation start: PE. 14
Simulation start: PE. 15
Simulation start: PE. 16
Simulation start: PE. 17
Simulation start: PE. 18
Simulation start: PE. 19
Simulation start: PE. 30
Simulation start: PE. 31
Simulation start: PE. 72
Simulation start: PE. 33
Simulation start: PE. 74
Simulation start: PE. 35
Simulation start: PE. 76
Simulation start: PE. 38
Simulation start: PE. 77
Simulation start: PE. 39
Simulation start: PE. 78
Simulation start: PE. 32
Simulation start: PE. 79
Simulation start: PE. 34
Simulation start: PE. 70
Simulation start: PE. 36
Simulation start: PE. 71
Simulation start: PE. 37
Simulation start: PE. 73
Simulation start: PE. 75
spherical harmonics table exists
Read gzipped merged ascii grid file: sph_lm159t240r160c_ic_80_8/in.rtp.gz
Read gzipped merged ascii spectr modes file:
sph_lm159t240r160c_ic_80_8/in.rj.gz
Read gzipped merged ascii grid file: sph_lm159t240r160c_ic_80_8/in.rtm.gz
Read gzipped merged ascii spectr modes file:
sph_lm159t240r160c_ic_80_8/in.rlm.gz
Read gzipped merged mesh file: sph_lm159t240r160c_ic_80_8/in.gfm.gz
Failed export by set_element_export_item 0
edge position is successfully syncronizad
coefficient for temperature: 1.00000000000000
coefficient for thermal diffusion: 1.00000000000000
coefficient for heat source: 3.20000000000000
coefficient for velocity: 1.00000000000000
coefficient for pressure: 20000.0000000000
coefficient for viscous diffusion: 1.00000000000000
coefficient for buoyancy: 26000000.6500000
coefficient for coriolis force: 40000.0000000000
coefficient for Lorentz force: 20000.0000000000
coefficient for magnetic field: 1.00000000000000
coefficient for magnetic potential: 1.00000000000000
coefficient for magnetic diffusion: 1.00000000000000
coefficient for induction: 1.00000000000000
check rtp -> rtm
check rtm -> rtp
check rj -> rlm
check rlm -> rj
check rtp -> rtm
check rtm -> rtp
check rj -> rlm
check rlm -> rj
test send_recv with reg. import
test send_recv with rev. import
0: Time by reg. import list: 9.703509896353353E-002
1: Time by rev. import list: 9.337021347018891E-002
Vector length for Legendre transform: 0
Block number for meridinal grid: 1
Use single transform in FFTW
Selected Fourier transform: FFTW ( 12)
Use single transform in FFTW
Communication mode for sph. transform: 1 (reversed_table)
Selected Legendre transform type: 4 (SYMMETRIC_MATMUL_BIG)
Read gzipped ascii data by MPI-IO: rst_80/rst.6500.fst.gz
Read 1 velocity
Read 2 temperature
Read 3 magnetic_field
Read 4 heat_source
Read 5 previous_momentum
Read 6 previous_induction
Read 7 previous_heat
splitted 1 729 27 -27
8.939645680558843E-034 1.787929136111769E-033 -8.939645680558843E-034
splitted 1 729 27 -27
8.939645680558843E-034 1.787929136111769E-033 -8.939645680558843E-034
splitted 2 729 27 -27
3.387224435688200E-031 6.774448871376400E-031 -3.387224435688200E-031
splitted 2 729 27 -27
3.387224435688200E-031 6.774448871376400E-031 -3.387224435688200E-031
splitted 1 2916 54 -54
-2.653351188079307E-054 -5.306702376158614E-054 2.653351188079307E-054
splitted 2 2916 54 -54
-3.942554344170903E-051 -7.885108688341806E-051 3.942554344170903E-051
splitted 3 2916 54 -54