Changeset 4366 for palm/trunk/SOURCE/transpose.f90
- Timestamp:
- Jan 9, 2020 8:12:43 AM (5 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
palm/trunk/SOURCE/transpose.f90
r4360 r4366 25 25 ! ----------------- 26 26 ! $Id$ 27 ! modifications for NEC vectorization 28 ! 29 ! 4360 2020-01-07 11:25:50Z suehring 27 30 ! Added missing OpenMP directives 28 31 ! … … 266 269 ONLY: cpu_log, cpu_log_nowait, log_point_s 267 270 271 USE fft_xy, & 272 ONLY: f_vec, temperton_fft_vec 273 268 274 USE indices, & 269 275 ONLY: nnx, nx, nxl, nxr, nyn, nys, nz … … 282 288 INTEGER(iwp) :: k !< 283 289 INTEGER(iwp) :: l !< 290 INTEGER(iwp) :: mm !< 284 291 INTEGER(iwp) :: xs !< 285 292 … … 292 299 #endif 293 300 294 295 ! 296 !-- If the PE grid is one-dimensional along y, the array has only to be 297 !-- reordered locally and therefore no transposition has to be done. 301 ! 302 !-- If the PE grid is one-dimensional along y, the array has only to be 303 !-- reordered locally and therefore no transposition has to be done. 298 304 IF ( pdims(1) /= 1 ) THEN 299 305 300 306 #if defined( __parallel ) 301 307 ! 302 !-- Reorder input array for transposition 303 !$OMP PARALLEL PRIVATE ( i, j, k, l, xs ) 304 DO l = 0, pdims(1) - 1 305 xs = 0 + l * nnx 306 #if __acc_fft_device 307 !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) & 308 !$ACC PRESENT(work, f_in) 309 #endif 310 !$OMP DO 311 DO k = nzb_x, nzt_x 312 DO i = xs, xs + nnx - 1 313 DO j = nys_x, nyn_x 314 work(j,i-xs+1,k,l) = f_in(i,j,k) 308 !-- Reorder input array for transposition. Data from the vectorized Temperton-fft is stored in 309 !-- different array format (f_vec). 310 IF ( temperton_fft_vec ) THEN 311 312 DO l = 0, pdims(1) - 1 313 xs = 0 + l * nnx 314 DO k = nzb_x, nzt_x 315 DO i = xs, xs + nnx - 1 316 DO j = nys_x, nyn_x 317 mm = j-nys_x+1+(k-nzb_x)*(nyn_x-nys_x+1) 318 work(j,i-xs+1,k,l) = f_vec(mm,i) 319 ENDDO 315 320 ENDDO 316 321 ENDDO 317 322 ENDDO 318 !$OMP END DO NOWAIT 319 ENDDO 320 !$OMP END PARALLEL 323 324 ELSE 325 326 !$OMP PARALLEL PRIVATE ( i, j, k, l, xs ) 327 DO l = 0, pdims(1) - 1 328 xs = 0 + l * nnx 329 #if __acc_fft_device 330 !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) & 331 !$ACC PRESENT(work, f_in) 332 #endif 333 !$OMP DO 334 DO k = nzb_x, nzt_x 335 DO i = xs, xs + nnx - 1 336 DO j = nys_x, nyn_x 337 work(j,i-xs+1,k,l) = f_in(i,j,k) 338 ENDDO 339 ENDDO 340 ENDDO 341 !$OMP END DO NOWAIT 342 ENDDO 343 !$OMP END PARALLEL 344 345 ENDIF 321 346 322 347 ! … … 836 861 ONLY: cpu_log, cpu_log_nowait, log_point_s 837 862 863 USE fft_xy, & 864 ONLY: f_vec, temperton_fft_vec 865 838 866 USE indices, & 839 867 ONLY: nnx, nx, nxl, nxr, nyn, nys, nz … … 852 880 INTEGER(iwp) :: k !< 853 881 INTEGER(iwp) :: l !< 882 INTEGER(iwp) :: mm !< 854 883 INTEGER(iwp) :: xs !< 855 884 … … 914 943 915 944 ! 916 !-- Reorder transposed array 917 ! $OMP PARALLEL PRIVATE ( i, j, k, l, xs )918 DO l = 0, pdims(1) - 1 919 xs = 0 + l * nnx920 #if __acc_fft_device 921 !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) &922 !$ACC PRESENT(f_out, work)923 #endif 924 !$OMP DO925 DO k = nzb_x, nzt_x926 DO i = xs, xs + nnx - 1927 DO j = nys_x, nyn_x928 f_out(i,j,k) = work(j,i-xs+1,k,l)945 !-- Reorder transposed array. 946 !-- Data for the vectorized Temperton-fft is stored in different array format (f_vec) which saves 947 !-- additional data copy in fft_x. 948 IF ( temperton_fft_vec ) THEN 949 950 DO l = 0, pdims(1) - 1 951 xs = 0 + l * nnx 952 DO k = nzb_x, nzt_x 953 DO i = xs, xs + nnx - 1 954 DO j = nys_x, nyn_x 955 mm = j-nys_x+1+(k-nzb_x)*(nyn_x-nys_x+1) 956 f_vec(mm,i) = work(j,i-xs+1,k,l) 957 ENDDO 929 958 ENDDO 930 959 ENDDO 931 960 ENDDO 932 !$OMP END DO NOWAIT 933 ENDDO 934 !$OMP END PARALLEL 935 #endif 961 962 ELSE 963 964 !$OMP PARALLEL PRIVATE ( i, j, k, l, xs ) 965 DO l = 0, pdims(1) - 1 966 xs = 0 + l * nnx 967 #if __acc_fft_device 968 !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) & 969 !$ACC PRESENT(f_out, work) 970 #endif 971 !$OMP DO 972 DO k = nzb_x, nzt_x 973 DO i = xs, xs + nnx - 1 974 DO j = nys_x, nyn_x 975 f_out(i,j,k) = work(j,i-xs+1,k,l) 976 ENDDO 977 ENDDO 978 ENDDO 979 !$OMP END DO NOWAIT 980 ENDDO 981 !$OMP END PARALLEL 982 #endif 983 984 ENDIF 936 985 937 986 ENDIF
Note: See TracChangeset
for help on using the changeset viewer.