Changeset 1216 for palm/trunk
- Timestamp:
- Aug 26, 2013 9:31:42 AM (11 years ago)
- Location:
- palm/trunk
- Files:
-
- 11 edited
Legend:
- Unmodified
- Added
- Removed
-
palm/trunk/SCRIPTS/mbuild
r1211 r1216 22 22 # Current revisions: 23 23 # ------------------ 24 # 24 # RCS renamed SOURCES 25 25 # 26 26 # Former revisions: … … 369 369 cat Makefile_check|while read line 370 370 do 371 line=$(echo $line|grep RCS)372 if [[ $line == *" RCS"* ]]373 then 374 line=$(echo $line|sed 's/ RCS = //g')371 line=$(echo $line|grep SOURCES) 372 if [[ $line == *"SOURCES"* ]] 373 then 374 line=$(echo $line|sed 's/SOURCES = //g') 375 375 break 376 376 fi -
palm/trunk/SOURCE/Makefile_check
r1213 r1216 20 20 # Current revisions: 21 21 # ------------------ 22 # 22 # +tridia_solver 23 23 # 24 24 # Former revisions: … … 71 71 local_system.f90 message.f90 modules.f90 package_parin.f90 parin.f90 \ 72 72 poisfft.f90 random_function.f90 singleton.f90 \ 73 subsidence.f90 temperton_fft.f90 \73 subsidence.f90 temperton_fft.f90 tridia_solver.f90 \ 74 74 user_3d_data_averaging.f90 user_actions.f90 \ 75 75 user_additional_routines.f90 user_check_data_output.f90 \ … … 82 82 user_lpm_init.f90 user_lpm_set_attributes.f90 user_module.f90 \ 83 83 user_parin.f90 user_read_restart_data.f90 user_spectra.f90 \ 84 user_statistics.f90 \ 85 84 user_statistics.f90 86 85 87 86 OBJS=$(SOURCES:.f90=.o) … … 131 130 package_parin.o: modules.o 132 131 parin.o: modules.o 133 poisfft.o: modules.o fft_xy.o 132 poisfft.o: modules.o fft_xy.o tridia_solver.o 134 133 random_function.o: modules.o 135 134 singleton.o: singleton.f90 136 135 subsidence.o: modules.o 137 136 temperton_fft.o: modules.o 137 tridia_solver.o: modules.o 138 138 user_3d_data_averaging.o: modules.o user_module.o 139 139 user_actions.o: modules.o user_module.o -
palm/trunk/SOURCE/calc_spectra.f90
r1121 r1216 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! resorting of array moved to separate routine resort_for_zx, 23 ! one argument removed from the transpose_..d routines 23 24 ! 24 25 ! Former revisions: … … 121 122 #if defined( __parallel ) 122 123 IF ( pdims(2) /= 1 ) THEN 123 CALL transpose_zx( d, tend, d ) 124 CALL resort_for_zx( d, tend ) 125 CALL transpose_zx( tend, d ) 124 126 ELSE 125 CALL transpose_yxd( d, tend,d )127 CALL transpose_yxd( d, d ) 126 128 ENDIF 127 129 CALL calc_spectra_x( d, pr, m ) … … 153 155 154 156 #if defined( __parallel ) 155 CALL transpose_zyd( d, tend,d )157 CALL transpose_zyd( d, d ) 156 158 CALL calc_spectra_y( d, pr, m ) 157 159 #else -
palm/trunk/SOURCE/check_parameters.f90
r1215 r1216 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! check for transpose_compute_overlap (temporary) 23 23 ! 24 24 ! Former revisions: … … 319 319 LOGICAL :: found, ldum 320 320 REAL :: gradient, remote = 0.0, simulation_time_since_reference 321 322 ! 323 !-- Check for overlap combinations, which are not realized yet 324 IF ( transpose_compute_overlap ) THEN 325 IF ( numprocs == 1 ) STOP '+++ transpose-compute-overlap not implemented for single PE runs' 326 #if defined( __openacc ) 327 STOP '+++ transpose-compute-overlap not implemented for GPU usage' 328 #endif 329 ENDIF 321 330 322 331 ! -
palm/trunk/SOURCE/fft_xy.f90
r1211 r1216 286 286 287 287 288 SUBROUTINE fft_x( ar, direction )288 SUBROUTINE fft_x( ar, direction, ar_2d ) 289 289 290 290 !----------------------------------------------------------------------! … … 321 321 COMPLEX(dpk), DIMENSION(0:(nx+1)/2,nys_x:nyn_x,nzb_x:nzt_x) :: ar_tmp 322 322 #endif 323 REAL, DIMENSION(0:nx,nys_x:nyn_x), OPTIONAL :: ar_2d 323 324 REAL, DIMENSION(0:nx,nys_x:nyn_x,nzb_x:nzt_x) :: ar 324 325 … … 454 455 CALL FFTW_EXECUTE_DFT_R2C( plan_xf, x_in, x_out ) 455 456 456 DO i = 0, (nx+1)/2 457 ar(i,j,k) = REAL( x_out(i) ) / ( nx+1 ) 458 ENDDO 459 DO i = 1, (nx+1)/2 - 1 460 ar(nx+1-i,j,k) = AIMAG( x_out(i) ) / ( nx+1 ) 461 ENDDO 457 IF ( PRESENT( ar_2d ) ) THEN 458 459 DO i = 0, (nx+1)/2 460 ar_2d(i,j) = REAL( x_out(i) ) / ( nx+1 ) 461 ENDDO 462 DO i = 1, (nx+1)/2 - 1 463 ar_2d(nx+1-i,j) = AIMAG( x_out(i) ) / ( nx+1 ) 464 ENDDO 465 466 ELSE 467 468 DO i = 0, (nx+1)/2 469 ar(i,j,k) = REAL( x_out(i) ) / ( nx+1 ) 470 ENDDO 471 DO i = 1, (nx+1)/2 - 1 472 ar(nx+1-i,j,k) = AIMAG( x_out(i) ) / ( nx+1 ) 473 ENDDO 474 475 ENDIF 462 476 463 477 ENDDO … … 465 479 !$OMP END PARALLEL 466 480 467 ELSE481 ELSE 468 482 !$OMP PARALLEL PRIVATE ( work, i, j, k ) 469 483 !$OMP DO … … 471 485 DO j = nys_x, nyn_x 472 486 473 x_out(0) = CMPLX( ar(0,j,k), 0.0 ) 474 DO i = 1, (nx+1)/2 - 1 475 x_out(i) = CMPLX( ar(i,j,k), ar(nx+1-i,j,k) ) 476 ENDDO 477 x_out((nx+1)/2) = CMPLX( ar((nx+1)/2,j,k), 0.0 ) 487 IF ( PRESENT( ar_2d ) ) THEN 488 489 x_out(0) = CMPLX( ar_2d(0,j), 0.0 ) 490 DO i = 1, (nx+1)/2 - 1 491 x_out(i) = CMPLX( ar_2d(i,j), ar_2d(nx+1-i,j) ) 492 ENDDO 493 x_out((nx+1)/2) = CMPLX( ar_2d((nx+1)/2,j), 0.0 ) 494 495 ELSE 496 497 x_out(0) = CMPLX( ar(0,j,k), 0.0 ) 498 DO i = 1, (nx+1)/2 - 1 499 x_out(i) = CMPLX( ar(i,j,k), ar(nx+1-i,j,k) ) 500 ENDDO 501 x_out((nx+1)/2) = CMPLX( ar((nx+1)/2,j,k), 0.0 ) 502 503 ENDIF 478 504 479 505 CALL FFTW_EXECUTE_DFT_C2R( plan_xi, x_out, x_in) … … 484 510 !$OMP END PARALLEL 485 511 486 ENDIF512 ENDIF 487 513 #endif 488 514 … … 766 792 ENDIF 767 793 794 ELSEIF ( fft_method == 'fftw' ) THEN 795 796 #if defined( __fftw ) 797 IF ( forward_fft ) THEN 798 799 x_in(0:nx) = ar(0:nx) 800 CALL FFTW_EXECUTE_DFT_R2C( plan_xf, x_in, x_out ) 801 802 DO i = 0, (nx+1)/2 803 ar(i) = REAL( x_out(i) ) / ( nx+1 ) 804 ENDDO 805 DO i = 1, (nx+1)/2 - 1 806 ar(nx+1-i) = AIMAG( x_out(i) ) / ( nx+1 ) 807 ENDDO 808 809 ELSE 810 811 x_out(0) = CMPLX( ar(0), 0.0 ) 812 DO i = 1, (nx+1)/2 - 1 813 x_out(i) = CMPLX( ar(i), ar(nx+1-i) ) 814 ENDDO 815 x_out((nx+1)/2) = CMPLX( ar((nx+1)/2), 0.0 ) 816 817 CALL FFTW_EXECUTE_DFT_C2R( plan_xi, x_out, x_in) 818 ar(0:nx) = x_in(0:nx) 819 820 ENDIF 821 #endif 822 768 823 ELSEIF ( fft_method == 'system-specific' ) THEN 769 824 … … 843 898 END SUBROUTINE fft_x_1d 844 899 845 SUBROUTINE fft_y( ar, direction ) 900 SUBROUTINE fft_y( ar, direction, ar_tr, nxl_y_bound, nxr_y_bound, nxl_y_l, & 901 nxr_y_l ) 846 902 847 903 !----------------------------------------------------------------------! … … 853 909 ! fft_y uses internal algorithms (Singleton or Temperton) or ! 854 910 ! system-specific routines, if they are available ! 911 ! ! 912 ! direction: 'forward' or 'backward' ! 913 ! ar, ar_tr: 3D data arrays ! 914 ! forward: ar: before ar_tr: after transformation ! 915 ! backward: ar_tr: before ar: after transfosition ! 916 ! ! 917 ! In case of non-overlapping transposition/transformation: ! 918 ! nxl_y_bound = nxl_y_l = nxl_y ! 919 ! nxr_y_bound = nxr_y_l = nxr_y ! 920 ! ! 921 ! In case of overlapping transposition/transformation ! 922 ! - nxl_y_bound and nxr_y_bound have the original values of ! 923 ! nxl_y, nxr_y. ar_tr is dimensioned using these values. ! 924 ! - nxl_y_l = nxr_y_r. ar is dimensioned with these values, so that ! 925 ! transformation is carried out for a 2D-plane only. ! 855 926 !----------------------------------------------------------------------! 856 927 … … 864 935 CHARACTER (LEN=*) :: direction 865 936 INTEGER :: i, j, jshape(1), k 937 INTEGER :: nxl_y_bound, nxl_y_l, nxr_y_bound, nxr_y_l 866 938 867 939 LOGICAL :: forward_fft … … 878 950 COMPLEX(dpk), DIMENSION(0:(ny+1)/2,nxl_y:nxr_y,nzb_y:nzt_y) :: ar_tmp 879 951 #endif 880 REAL, DIMENSION(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) :: ar 952 REAL, DIMENSION(0:ny,nxl_y_l:nxr_y_l,nzb_y:nzt_y) :: ar 953 REAL, DIMENSION(0:ny,nxl_y_bound:nxr_y_bound,nzb_y:nzt_y) :: ar_tr 881 954 882 955 IF ( direction == 'forward' ) THEN … … 898 971 !$OMP DO 899 972 DO k = nzb_y, nzt_y 900 DO i = nxl_y , nxr_y973 DO i = nxl_y_l, nxr_y_l 901 974 902 975 DO j = 0, ny … … 908 981 909 982 DO j = 0, (ny+1)/2 910 ar (j,i,k) = REAL( cwork(j) )983 ar_tr(j,i,k) = REAL( cwork(j) ) 911 984 ENDDO 912 985 DO j = 1, (ny+1)/2 - 1 913 ar (ny+1-j,i,k) = -AIMAG( cwork(j) )986 ar_tr(ny+1-j,i,k) = -AIMAG( cwork(j) ) 914 987 ENDDO 915 988 … … 923 996 !$OMP DO 924 997 DO k = nzb_y, nzt_y 925 DO i = nxl_y , nxr_y926 927 cwork(0) = CMPLX( ar (0,i,k), 0.0 )998 DO i = nxl_y_l, nxr_y_l 999 1000 cwork(0) = CMPLX( ar_tr(0,i,k), 0.0 ) 928 1001 DO j = 1, (ny+1)/2 - 1 929 cwork(j) = CMPLX( ar (j,i,k), -ar(ny+1-j,i,k) )930 cwork(ny+1-j) = CMPLX( ar (j,i,k), ar(ny+1-j,i,k) )931 ENDDO 932 cwork((ny+1)/2) = CMPLX( ar ((ny+1)/2,i,k), 0.0 )1002 cwork(j) = CMPLX( ar_tr(j,i,k), -ar_tr(ny+1-j,i,k) ) 1003 cwork(ny+1-j) = CMPLX( ar_tr(j,i,k), ar_tr(ny+1-j,i,k) ) 1004 ENDDO 1005 cwork((ny+1)/2) = CMPLX( ar_tr((ny+1)/2,i,k), 0.0 ) 933 1006 934 1007 jshape = SHAPE( cwork ) … … 957 1030 !$OMP DO 958 1031 DO k = nzb_y, nzt_y 959 DO i = nxl_y , nxr_y1032 DO i = nxl_y_l, nxr_y_l 960 1033 961 1034 work(0:ny) = ar(0:ny,i,k) … … 963 1036 964 1037 DO j = 0, (ny+1)/2 965 ar (j,i,k) = work(2*j)1038 ar_tr(j,i,k) = work(2*j) 966 1039 ENDDO 967 1040 DO j = 1, (ny+1)/2 - 1 968 ar (ny+1-j,i,k) = work(2*j+1)1041 ar_tr(ny+1-j,i,k) = work(2*j+1) 969 1042 ENDDO 970 1043 … … 978 1051 !$OMP DO 979 1052 DO k = nzb_y, nzt_y 980 DO i = nxl_y , nxr_y1053 DO i = nxl_y_l, nxr_y_l 981 1054 982 1055 DO j = 0, (ny+1)/2 983 work(2*j) = ar (j,i,k)1056 work(2*j) = ar_tr(j,i,k) 984 1057 ENDDO 985 1058 DO j = 1, (ny+1)/2 - 1 986 work(2*j+1) = ar (ny+1-j,i,k)1059 work(2*j+1) = ar_tr(ny+1-j,i,k) 987 1060 ENDDO 988 1061 work(1) = 0.0 … … 1006 1079 !$OMP DO 1007 1080 DO k = nzb_y, nzt_y 1008 DO i = nxl_y , nxr_y1081 DO i = nxl_y_l, nxr_y_l 1009 1082 1010 1083 y_in(0:ny) = ar(0:ny,i,k) … … 1012 1085 1013 1086 DO j = 0, (ny+1)/2 1014 ar (j,i,k) = REAL( y_out(j) ) /(ny+1)1087 ar_tr(j,i,k) = REAL( y_out(j) ) / (ny+1) 1015 1088 ENDDO 1016 1089 DO j = 1, (ny+1)/2 - 1 1017 ar (ny+1-j,i,k) = AIMAG( y_out(j) ) /(ny+1)1090 ar_tr(ny+1-j,i,k) = AIMAG( y_out(j) ) / (ny+1) 1018 1091 ENDDO 1019 1092 … … 1027 1100 !$OMP DO 1028 1101 DO k = nzb_y, nzt_y 1029 DO i = nxl_y , nxr_y1030 1031 y_out(0) = CMPLX( ar (0,i,k), 0.0 )1102 DO i = nxl_y_l, nxr_y_l 1103 1104 y_out(0) = CMPLX( ar_tr(0,i,k), 0.0 ) 1032 1105 DO j = 1, (ny+1)/2 - 1 1033 y_out(j) = CMPLX( ar (j,i,k), ar(ny+1-j,i,k) )1034 ENDDO 1035 y_out((ny+1)/2) = CMPLX( ar ((ny+1)/2,i,k), 0.0 )1106 y_out(j) = CMPLX( ar_tr(j,i,k), ar_tr(ny+1-j,i,k) ) 1107 ENDDO 1108 y_out((ny+1)/2) = CMPLX( ar_tr((ny+1)/2,i,k), 0.0 ) 1036 1109 1037 1110 CALL FFTW_EXECUTE_DFT_C2R( plan_yi, y_out, y_in ) … … 1053 1126 !$OMP DO 1054 1127 DO k = nzb_y, nzt_y 1055 DO i = nxl_y , nxr_y1128 DO i = nxl_y_l, nxr_y_l 1056 1129 1057 1130 CALL DRCFT( 0, ar, 1, work, 1, ny+1, 1, 1, sqr_dny, auy1, nau1, & … … 1059 1132 1060 1133 DO j = 0, (ny+1)/2 1061 ar (j,i,k) = work(2*j)1134 ar_tr(j,i,k) = work(2*j) 1062 1135 ENDDO 1063 1136 DO j = 1, (ny+1)/2 - 1 1064 ar (ny+1-j,i,k) = work(2*j+1)1137 ar_tr(ny+1-j,i,k) = work(2*j+1) 1065 1138 ENDDO 1066 1139 … … 1074 1147 !$OMP DO 1075 1148 DO k = nzb_y, nzt_y 1076 DO i = nxl_y , nxr_y1149 DO i = nxl_y_l, nxr_y_l 1077 1150 1078 1151 DO j = 0, (ny+1)/2 1079 work(2*j) = ar (j,i,k)1152 work(2*j) = ar_tr(j,i,k) 1080 1153 ENDDO 1081 1154 DO j = 1, (ny+1)/2 - 1 1082 work(2*j+1) = ar (ny+1-j,i,k)1155 work(2*j+1) = ar_tr(ny+1-j,i,k) 1083 1156 ENDDO 1084 1157 work(1) = 0.0 … … 1103 1176 !$OMP DO 1104 1177 DO k = nzb_y, nzt_y 1105 DO i = nxl_y , nxr_y1178 DO i = nxl_y_l, nxr_y_l 1106 1179 1107 1180 work(0:ny) = ar(0:ny,i,k) … … 1110 1183 1111 1184 DO j = 0, (ny+1)/2 1112 ar (j,i,k) = work(2*j)1185 ar_tr(j,i,k) = work(2*j) 1113 1186 ENDDO 1114 1187 DO j = 1, (ny+1)/2 - 1 1115 ar (ny+1-j,i,k) = work(2*j+1)1188 ar_tr(ny+1-j,i,k) = work(2*j+1) 1116 1189 ENDDO 1117 1190 … … 1125 1198 !$OMP DO 1126 1199 DO k = nzb_y, nzt_y 1127 DO i = nxl_y , nxr_y1200 DO i = nxl_y_l, nxr_y_l 1128 1201 1129 1202 DO j = 0, (ny+1)/2 1130 work(2*j) = ar (j,i,k)1203 work(2*j) = ar_tr(j,i,k) 1131 1204 ENDDO 1132 1205 DO j = 1, (ny+1)/2 - 1 1133 work(2*j+1) = ar (ny+1-j,i,k)1206 work(2*j+1) = ar_tr(ny+1-j,i,k) 1134 1207 ENDDO 1135 1208 work(1) = 0.0 … … 1322 1395 1323 1396 ENDIF 1397 1398 ELSEIF ( fft_method == 'fftw' ) THEN 1399 1400 #if defined( __fftw ) 1401 IF ( forward_fft ) THEN 1402 1403 y_in(0:ny) = ar(0:ny) 1404 CALL FFTW_EXECUTE_DFT_R2C( plan_yf, y_in, y_out ) 1405 1406 DO j = 0, (ny+1)/2 1407 ar(j) = REAL( y_out(j) ) / (ny+1) 1408 ENDDO 1409 DO j = 1, (ny+1)/2 - 1 1410 ar(ny+1-j) = AIMAG( y_out(j) ) / (ny+1) 1411 ENDDO 1412 1413 ELSE 1414 1415 y_out(0) = CMPLX( ar(0), 0.0 ) 1416 DO j = 1, (ny+1)/2 - 1 1417 y_out(j) = CMPLX( ar(j), ar(ny+1-j) ) 1418 ENDDO 1419 y_out((ny+1)/2) = CMPLX( ar((ny+1)/2), 0.0 ) 1420 1421 CALL FFTW_EXECUTE_DFT_C2R( plan_yi, y_out, y_in ) 1422 ar(0:ny) = y_in(0:ny) 1423 1424 ENDIF 1425 #endif 1324 1426 1325 1427 ELSEIF ( fft_method == 'system-specific' ) THEN -
palm/trunk/SOURCE/header.f90
r1213 r1216 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! output for transpose_compute_overlap 23 23 ! 24 24 ! Former revisions: … … 332 332 IF ( psolver(1:7) == 'poisfft' ) THEN 333 333 WRITE ( io, 111 ) TRIM( fft_method ) 334 IF ( transpose_compute_overlap ) WRITE( io, 115 ) 334 335 ELSEIF ( psolver == 'sor' ) THEN 335 336 WRITE ( io, 112 ) nsor_ini, nsor, omega_sor … … 1653 1654 113 FORMAT (' --> Momentum advection via Piascek-Williams-Scheme (Form C3)', & 1654 1655 ' or Upstream') 1656 115 FORMAT (' FFT and transpositions are overlapping') 1655 1657 116 FORMAT (' --> Scalar advection via Piascek-Williams-Scheme (Form C3)', & 1656 1658 ' or Upstream') -
palm/trunk/SOURCE/modules.f90
r1213 r1216 20 20 ! Current revisions: 21 21 ! ------------------ 22 ! 22 ! +transpose_compute_overlap, 23 ! several variables are now defined in the serial (non-parallel) case also 23 24 ! 24 25 ! Former revisions: … … 776 777 scalar_rayleigh_damping = .TRUE., sloping_surface = .FALSE., & 777 778 stop_dt = .FALSE., synchronous_exchange = .FALSE., & 778 terminate_run = .FALSE., t urbulence= .FALSE., &779 turbulen t_inflow = .FALSE., use_cmax = .TRUE., &780 use_ initial_profile_as_reference = .FALSE., &779 terminate_run = .FALSE., transpose_compute_overlap = .FALSE., & 780 turbulence = .FALSE., turbulent_inflow = .FALSE., & 781 use_cmax = .TRUE., use_initial_profile_as_reference = .FALSE., & 781 782 use_prescribed_profile_data = .FALSE., & 782 783 use_single_reference_value = .FALSE., & … … 1489 1490 CHARACTER(LEN=2) :: send_receive = 'al' 1490 1491 CHARACTER(LEN=5) :: myid_char = '' 1491 INTEGER :: acc_rank, id_inflow = 0, id_recycling = 0, & 1492 myid = 0, num_acc_per_node = 0, req_count = 0, & 1493 target_id, npex = -1, npey = -1, numprocs = 1, & 1494 numprocs_previous_run = -1, & 1495 tasks_per_node = -9999, threads_per_task = 1 1492 INTEGER :: acc_rank, comm1dx, comm1dy, comm2d, comm_inter, & 1493 comm_palm, id_inflow = 0, id_recycling = 0, ierr, & 1494 myid = 0, myidx = 0, myidy = 0, ndim = 2, ngp_a, & 1495 ngp_o, ngp_xy, ngp_y, npex = -1, npey = -1, & 1496 numprocs = 1, numprocs_previous_run = -1, & 1497 num_acc_per_node = 0, pleft, pnorth, pright, psouth, & 1498 req_count = 0, sendrecvcount_xy, sendrecvcount_yz, & 1499 sendrecvcount_zx, sendrecvcount_zyd, & 1500 sendrecvcount_yxd, target_id, tasks_per_node = -9999, & 1501 threads_per_task = 1, type_x, type_x_int, type_xy, & 1502 type_y, type_y_int 1496 1503 1497 1504 INTEGER :: pdims(2) = 1, req(100) … … 1507 1514 CHARACTER (LEN=MPI_MAX_PORT_NAME) :: port_name 1508 1515 #endif 1509 1510 INTEGER :: comm1dx, comm1dy, comm2d, comm_inter, comm_palm, ierr, myidx, &1511 myidy, ndim = 2, ngp_a, ngp_o, ngp_xy, ngp_y, pleft, pnorth, &1512 pright, psouth, sendrecvcount_xy, sendrecvcount_yz, &1513 sendrecvcount_zx, sendrecvcount_zyd, sendrecvcount_yxd, &1514 type_x, type_x_int, type_xy, type_y, type_y_int1515 1516 1516 1517 INTEGER :: ibuf(12), pcoord(2) -
palm/trunk/SOURCE/parin.f90
r1196 r1216 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! +transpose_compute_overlap in inipar 23 23 ! 24 24 ! Former revisions: … … 256 256 subs_vertical_gradient_level, surface_heatflux, surface_pressure, & 257 257 surface_scalarflux, surface_waterflux, & 258 s_surface, & 259 s_surface_initial_change, s_vertical_gradient, & 258 s_surface, s_surface_initial_change, s_vertical_gradient, & 260 259 s_vertical_gradient_level, timestep_scheme, & 261 260 topography, topography_grid_convention, top_heatflux, & 262 261 top_momentumflux_u, top_momentumflux_v, top_salinityflux, & 263 turbulence, turbulent_inflow, ug_surface, ug_vertical_gradient, & 262 transpose_compute_overlap, turbulence, turbulent_inflow, & 263 ug_surface, ug_vertical_gradient, & 264 264 ug_vertical_gradient_level, use_surface_fluxes, use_cmax, & 265 265 use_top_fluxes, use_ug_for_galilei_tr, use_upstream_for_tke, & -
palm/trunk/SOURCE/poisfft.f90
r1213 r1216 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! resorting of arrays moved to separate routines resort_for_..., 23 ! one argument, used as temporary work array, removed from all transpose 24 ! routines 25 ! overlapping fft / transposition implemented 23 26 ! 24 27 ! Former revisions: … … 221 224 222 225 #if ! defined ( __check ) 223 SUBROUTINE poisfft( ar, work)224 225 USE control_parameters, ONLY : fft_method 226 SUBROUTINE poisfft( ar, ar_inv_test ) 227 228 USE control_parameters, ONLY : fft_method, transpose_compute_overlap 226 229 USE cpulog 227 230 USE interfaces … … 230 233 IMPLICIT NONE 231 234 232 REAL, DIMENSION(1:nz,nys:nyn,nxl:nxr) :: ar, work 235 INTEGER :: ind_even, ind_odd, ind_third, ii, iind, inew, jj, jind, & 236 jnew, ki, kk, knew, n, nblk, nnx_y, nny_z, nnz_t, nnz_x, & 237 nxl_y_bound, nxr_y_bound 238 INTEGER, DIMENSION(4) :: isave 239 240 REAL, DIMENSION(1:nz,nys:nyn,nxl:nxr) :: ar 241 REAL, DIMENSION(nys:nyn,nxl:nxr,1:nz) :: ar_inv_test ! work array tend from pres 242 !$acc declare create( ar_inv ) 243 REAL, DIMENSION(nys:nyn,nxl:nxr,1:nz) :: ar_inv 244 245 REAL, DIMENSION(:,:,:), ALLOCATABLE :: f_in, f_inv, f_out_y, f_out_z 246 REAL, DIMENSION(:,:,:,:), ALLOCATABLE :: ar1 233 247 234 248 … … 244 258 !-- 1d-domain-decomposition along x: 245 259 !-- FFT along y and transposition y --> x 246 CALL ffty_tr_yx( ar, work,ar )260 CALL ffty_tr_yx( ar, ar ) 247 261 248 262 ! … … 252 266 ! 253 267 !-- Transposition x --> y and backward FFT along y 254 CALL tr_xy_ffty( ar, work,ar )268 CALL tr_xy_ffty( ar, ar ) 255 269 256 270 ELSEIF ( pdims(1) == 1 .AND. pdims(2) > 1 ) THEN … … 259 273 !-- 1d-domain-decomposition along y: 260 274 !-- FFT along x and transposition x --> y 261 CALL fftx_tr_xy( ar, work,ar )275 CALL fftx_tr_xy( ar, ar ) 262 276 263 277 ! … … 267 281 ! 268 282 !-- Transposition y --> x and backward FFT along x 269 CALL tr_yx_fftx( ar, work,ar )270 271 ELSE 283 CALL tr_yx_fftx( ar, ar ) 284 285 ELSEIF ( .NOT. transpose_compute_overlap ) THEN 272 286 273 287 ! … … 275 289 !-- Transposition z --> x 276 290 CALL cpu_log( log_point_s(5), 'transpo forward', 'start' ) 277 CALL transpose_zx( ar, work, ar ) 291 CALL resort_for_zx( ar, ar_inv ) 292 CALL transpose_zx( ar_inv, ar ) 278 293 CALL cpu_log( log_point_s(5), 'transpo forward', 'pause' ) 279 294 … … 291 306 !-- Transposition x --> y 292 307 CALL cpu_log( log_point_s(5), 'transpo forward', 'continue' ) 293 CALL transpose_xy( ar, work, ar ) 308 CALL resort_for_xy( ar, ar_inv ) 309 CALL transpose_xy( ar_inv, ar ) 294 310 CALL cpu_log( log_point_s(5), 'transpo forward', 'pause' ) 295 311 … … 298 314 !$acc update host( ar ) 299 315 ENDIF 300 CALL fft_y( ar, 'forward' ) 316 CALL fft_y( ar, 'forward', ar_tr = ar, & 317 nxl_y_bound = nxl_y, nxr_y_bound = nxr_y, & 318 nxl_y_l = nxl_y, nxr_y_l = nxr_y ) 301 319 IF ( fft_method /= 'system-specific' ) THEN 302 320 !$acc update device( ar ) … … 307 325 !-- Transposition y --> z 308 326 CALL cpu_log( log_point_s(5), 'transpo forward', 'continue' ) 309 CALL transpose_yz( ar, work, ar ) 327 CALL resort_for_yz( ar, ar_inv ) 328 CALL transpose_yz( ar_inv, ar ) 310 329 CALL cpu_log( log_point_s(5), 'transpo forward', 'stop' ) 311 330 … … 320 339 !-- Transposition z --> y 321 340 CALL cpu_log( log_point_s(8), 'transpo invers', 'start' ) 322 CALL transpose_zy( ar, work, ar ) 341 CALL transpose_zy( ar, ar_inv ) 342 CALL resort_for_zy( ar_inv, ar ) 323 343 CALL cpu_log( log_point_s(8), 'transpo invers', 'pause' ) 324 344 … … 327 347 !$acc update host( ar ) 328 348 ENDIF 329 CALL fft_y( ar, 'backward' ) 349 CALL fft_y( ar, 'backward', ar_tr = ar, & 350 nxl_y_bound = nxl_y, nxr_y_bound = nxr_y, & 351 nxl_y_l = nxl_y, nxr_y_l = nxr_y ) 330 352 IF ( fft_method /= 'system-specific' ) THEN 331 353 !$acc update device( ar ) … … 336 358 !-- Transposition y --> x 337 359 CALL cpu_log( log_point_s(8), 'transpo invers', 'continue' ) 338 CALL transpose_yx( ar, work, ar ) 360 CALL transpose_yx( ar, ar_inv ) 361 CALL resort_for_yx( ar_inv, ar ) 339 362 CALL cpu_log( log_point_s(8), 'transpo invers', 'pause' ) 340 363 … … 352 375 !-- Transposition x --> z 353 376 CALL cpu_log( log_point_s(8), 'transpo invers', 'continue' ) 354 CALL transpose_xz( ar, work, ar ) 377 CALL transpose_xz( ar, ar_inv ) 378 CALL resort_for_xz( ar_inv, ar ) 355 379 CALL cpu_log( log_point_s(8), 'transpo invers', 'stop' ) 356 380 381 ELSE 382 383 ! 384 !-- 2d-domain-decomposition or no decomposition (1 PE run) with 385 !-- overlapping transposition / fft 386 ALLOCATE( f_out_y(0:ny,nxl_y:nxr_y,nzb_y:nzt_y), & 387 f_out_z(0:nx,nys_x:nyn_x,nzb_x:nzt_x) ) 388 ! 389 !-- Transposition z --> x + subsequent fft along x 390 ALLOCATE( f_inv(nys:nyn,nxl:nxr,1:nz) ) 391 CALL resort_for_zx( ar, f_inv ) 392 ! 393 !-- Save original indices and gridpoint counter 394 isave(1) = nz 395 isave(2) = nzb_x 396 isave(3) = nzt_x 397 isave(4) = sendrecvcount_zx 398 ! 399 !-- Set new indices for transformation 400 nblk = nz / pdims(1) 401 nz = pdims(1) 402 nnz_x = 1 403 nzb_x = 1 + myidx * nnz_x 404 nzt_x = ( myidx + 1 ) * nnz_x 405 sendrecvcount_zx = nnx * nny * nnz_x 406 407 ALLOCATE( ar1(0:nx,nys_x:nyn_x,nzb_x:nzt_x,2) ) 408 ALLOCATE( f_in(nys:nyn,nxl:nxr,1:nz) ) 409 410 DO kk = 1, nblk+1 411 ind_odd = MOD( kk, 2 ) + 1 412 ind_even = MOD( kk+1, 2 ) + 1 413 !$OMP sections private(ki,knew,n) 414 !$OMP section 415 IF ( kk <= nblk ) THEN 416 417 IF ( kk == 1 ) THEN 418 CALL cpu_log( log_point_s(5), 'transpo forward', 'start' ) 419 ELSE 420 CALL cpu_log( log_point_s(5), 'transpo forward', 'continue' ) 421 ENDIF 422 423 DO knew = 1, nz 424 ki = kk + nblk * ( knew - 1 ) 425 f_in(:,:,knew) = f_inv(:,:,ki) 426 ENDDO 427 428 CALL transpose_zx( f_in, ar1(:,:,:,ind_odd)) 429 CALL cpu_log( log_point_s(5), 'transpo forward', 'pause' ) 430 431 ENDIF 432 433 !$OMP section 434 IF ( kk >= 2 ) THEN 435 436 IF ( kk == 2 ) THEN 437 CALL cpu_log( log_point_s(4), 'fft_x', 'start' ) 438 ELSE 439 CALL cpu_log( log_point_s(4), 'fft_x', 'continue' ) 440 ENDIF 441 442 n = isave(2) + kk - 2 443 CALL fft_x( ar1(:,:,:,ind_even), 'forward', ar_2d = f_out_z(:,:,n)) 444 CALL cpu_log( log_point_s(4), 'fft_x', 'pause' ) 445 446 ENDIF 447 !$OMP end sections 448 449 ENDDO 450 ! 451 !-- Restore original indices/counters 452 nz = isave(1) 453 nzb_x = isave(2) 454 nzt_x = isave(3) 455 sendrecvcount_zx = isave(4) 456 457 DEALLOCATE( ar1, f_in, f_inv ) 458 459 ! 460 !-- Transposition x --> y + subsequent fft along y 461 ALLOCATE( f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) ) 462 CALL resort_for_xy( f_out_z, f_inv ) 463 ! 464 !-- Save original indices and gridpoint counter 465 isave(1) = nx 466 isave(2) = nxl_y 467 isave(3) = nxr_y 468 isave(4) = sendrecvcount_xy 469 ! 470 !-- Set new indices for transformation 471 nblk = ( ( nx+1 ) / pdims(2) ) - 1 472 nx = pdims(2) 473 nnx_y = 1 474 nxl_y = myidy * nnx_y 475 nxr_y = ( myidy + 1 ) * nnx_y - 1 476 sendrecvcount_xy = nnx_y * ( nyn_x-nys_x+1 ) * ( nzt_x-nzb_x+1 ) 477 478 ALLOCATE( ar1(0:ny,nxl_y:nxr_y,nzb_y:nzt_y,2) ) 479 ALLOCATE( f_in(nys_x:nyn_x,nzb_x:nzt_x,0:nx) ) 480 481 DO ii = 0, nblk+1 482 ind_odd = MOD( ii+1, 2 ) + 1 483 ind_even = MOD( ii+2, 2 ) + 1 484 !$OMP sections private(ki,knew,n) 485 !$OMP section 486 IF ( ii <= nblk ) THEN 487 488 CALL cpu_log( log_point_s(5), 'transpo forward', 'continue' ) 489 490 DO inew = 0, nx-1 491 iind = ii + ( nblk + 1 ) * inew 492 f_in(:,:,inew) = f_inv(:,:,iind) 493 ENDDO 494 495 CALL transpose_xy( f_in, ar1(:,:,:,ind_odd) ) 496 497 CALL cpu_log( log_point_s(5), 'transpo forward', 'pause' ) 498 499 ENDIF 500 501 !$OMP section 502 IF ( ii >= 1 ) THEN 503 504 IF ( ii == 1 ) THEN 505 CALL cpu_log( log_point_s(7), 'fft_y', 'start' ) 506 ELSE 507 CALL cpu_log( log_point_s(7), 'fft_y', 'continue' ) 508 ENDIF 509 510 nxl_y_bound = isave(2) 511 nxr_y_bound = isave(3) 512 n = isave(2) + ii - 1 513 ! CALL fft_y( ar1(:,:,:,ind_even), 'forward', ar_3d = f_out_y, & 514 ! ni = n ) 515 CALL fft_y( ar1(:,:,:,ind_even), 'forward', ar_tr = f_out_y, & 516 nxl_y_bound = nxl_y_bound, nxr_y_bound = nxr_y_bound, & 517 nxl_y_l = n, nxr_y_l = n ) 518 519 CALL cpu_log( log_point_s(7), 'fft_y', 'pause' ) 520 521 ENDIF 522 !$OMP end sections 523 524 ENDDO 525 ! 526 !-- Restore original indices/counters 527 nx = isave(1) 528 nxl_y = isave(2) 529 nxr_y = isave(3) 530 sendrecvcount_xy = isave(4) 531 532 DEALLOCATE( ar1, f_in, f_inv ) 533 534 ! 535 !-- Transposition y --> z + subsequent tridia + resort for z --> y 536 ALLOCATE( f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) ) 537 CALL resort_for_yz( f_out_y, f_inv ) 538 ! 539 !-- Save original indices and gridpoint counter 540 isave(1) = ny 541 isave(2) = nys_z 542 isave(3) = nyn_z 543 isave(4) = sendrecvcount_yz 544 ! 545 !-- Set new indices for transformation 546 nblk = ( ( ny+1 ) / pdims(1) ) - 1 547 ny = pdims(1) 548 nny_z = 1 549 nys_z = myidx * nny_z 550 nyn_z = ( myidx + 1 ) * nny_z - 1 551 sendrecvcount_yz = ( nxr_y-nxl_y+1 ) * nny_z * ( nzt_y-nzb_y+1 ) 552 553 ALLOCATE( ar1(nxl_z:nxr_z,nys_z:nyn_z,1:nz,3) ) 554 ALLOCATE( f_in(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) ) 555 556 DO jj = 0, nblk+2 557 ind_odd = MOD( jj+3, 3 ) + 1 558 ind_even = MOD( jj+2, 3 ) + 1 559 ind_third = MOD( jj+1, 3 ) + 1 560 !$OMP sections private(ki,knew,n) 561 !$OMP section 562 IF ( jj <= nblk ) THEN 563 ! 564 !-- Forward Fourier Transformation 565 !-- Transposition y --> z 566 CALL cpu_log( log_point_s(5), 'transpo forward', 'continue' ) 567 568 DO jnew = 0, ny-1 569 jind = jj + ( nblk + 1 ) * jnew 570 f_in(:,:,jnew) =f_inv(:,:,jind) 571 ENDDO 572 573 CALL transpose_yz( f_in, ar1(:,:,:,ind_odd) ) 574 575 IF ( jj == nblk ) THEN 576 CALL cpu_log( log_point_s(5), 'transpo forward', 'stop' ) 577 ELSE 578 CALL cpu_log( log_point_s(5), 'transpo forward', 'pause' ) 579 ENDIF 580 581 ENDIF 582 583 IF ( jj >= 2 ) THEN 584 ! 585 !-- Inverse Fourier Transformation 586 !-- Transposition z --> y 587 !-- Only one thread should call MPI routines, therefore forward and 588 !-- backward tranpose are in the same section 589 IF ( jj == 2 ) THEN 590 CALL cpu_log( log_point_s(8), 'transpo invers', 'start' ) 591 ELSE 592 CALL cpu_log( log_point_s(8), 'transpo invers', 'continue' ) 593 ENDIF 594 595 CALL transpose_zy( ar1(:,:,:,ind_third), f_in ) 596 597 DO jnew = 0, ny-1 598 jind = jj-2 + ( nblk + 1 ) * jnew 599 f_inv(:,:,jind) = f_in(:,:,jnew) 600 ENDDO 601 602 CALL cpu_log( log_point_s(8), 'transpo invers', 'pause' ) 603 604 ENDIF 605 606 !$OMP section 607 IF ( jj >= 1 .AND. jj <= nblk+1 ) THEN 608 ! 609 !-- Solve the tridiagonal equation system along z 610 CALL cpu_log( log_point_s(6), 'tridia', 'start' ) 611 612 n = isave(2) + jj - 1 613 CALL tridia_substi_overlap( ar1(:,:,:,ind_even), n ) 614 615 CALL cpu_log( log_point_s(6), 'tridia', 'stop' ) 616 ENDIF 617 !$OMP end sections 618 619 ENDDO 620 ! 621 !-- Restore original indices/counters 622 ny = isave(1) 623 nys_z = isave(2) 624 nyn_z = isave(3) 625 sendrecvcount_yz = isave(4) 626 627 CALL resort_for_zy( f_inv, f_out_y ) 628 629 DEALLOCATE( ar1, f_in, f_inv ) 630 631 ! 632 !-- fft along y backward + subsequent transposition y --> x 633 ALLOCATE( f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) ) 634 ! 635 !-- Save original indices and gridpoint counter 636 isave(1) = nx 637 isave(2) = nxl_y 638 isave(3) = nxr_y 639 isave(4) = sendrecvcount_xy 640 ! 641 !-- Set new indices for transformation 642 nblk = (( nx+1 ) / pdims(2) ) - 1 643 nx = pdims(2) 644 nnx_y = 1 645 nxl_y = myidy * nnx_y 646 nxr_y = ( myidy + 1 ) * nnx_y - 1 647 sendrecvcount_xy = nnx_y * ( nyn_x-nys_x+1 ) * ( nzt_x-nzb_x+1 ) 648 649 ALLOCATE( ar1(0:ny,nxl_y:nxr_y,nzb_y:nzt_y,2) ) 650 ALLOCATE( f_in(nys_x:nyn_x,nzb_x:nzt_x,0:nx) ) 651 652 DO ii = 0, nblk+1 653 ind_odd = MOD( ii+1, 2 ) + 1 654 ind_even = MOD( ii+2, 2 ) + 1 655 !$OMP sections private(ki,knew,n) 656 !$OMP section 657 IF ( ii <= nblk ) THEN 658 659 CALL cpu_log( log_point_s(7), 'fft_y', 'continue' ) 660 661 n = isave(2) + ii 662 nxl_y_bound = isave(2) 663 nxr_y_bound = isave(3) 664 665 ! CALL fft_y( ar1(:,:,:,ind_even), 'backward', ar_3d = f_out_y, & 666 ! ni = n ) 667 CALL fft_y( ar1(:,:,:,ind_even), 'backward', ar_tr = f_out_y, & 668 nxl_y_bound = nxl_y_bound, nxr_y_bound = nxr_y_bound, & 669 nxl_y_l = n, nxr_y_l = n ) 670 671 IF ( ii == nblk ) THEN 672 CALL cpu_log( log_point_s(7), 'fft_y', 'stop' ) 673 ELSE 674 CALL cpu_log( log_point_s(7), 'fft_y', 'pause' ) 675 ENDIF 676 677 ENDIF 678 679 !$OMP section 680 IF ( ii >= 1 ) THEN 681 682 CALL cpu_log( log_point_s(8), 'transpo invers', 'continue' ) 683 684 CALL transpose_yx( ar1(:,:,:,ind_odd), f_in ) 685 686 DO inew = 0, nx-1 687 iind = ii-1 + (nblk+1) * inew 688 f_inv(:,:,iind) = f_in(:,:,inew) 689 ENDDO 690 691 CALL cpu_log( log_point_s(8), 'transpo invers', 'pause' ) 692 693 ENDIF 694 !$OMP end sections 695 696 ENDDO 697 ! 698 !-- Restore original indices/counters 699 nx = isave(1) 700 nxl_y = isave(2) 701 nxr_y = isave(3) 702 sendrecvcount_xy = isave(4) 703 704 CALL resort_for_yx( f_inv, f_out_z ) 705 706 DEALLOCATE( ar1, f_in, f_inv ) 707 708 ! 709 !-- fft along x backward + subsequent final transposition x --> z 710 ALLOCATE( f_inv(nys:nyn,nxl:nxr,1:nz) ) 711 ! 712 !-- Save original indices and gridpoint counter 713 isave(1) = nz 714 isave(2) = nzb_x 715 isave(3) = nzt_x 716 isave(4) = sendrecvcount_zx 717 ! 718 !-- Set new indices for transformation 719 nblk = nz / pdims(1) 720 nz = pdims(1) 721 nnz_x = 1 722 nzb_x = 1 + myidx * nnz_x 723 nzt_x = ( myidx + 1 ) * nnz_x 724 sendrecvcount_zx = nnx * nny * nnz_x 725 726 ALLOCATE( ar1(0:nx,nys_x:nyn_x,nzb_x:nzt_x,2) ) 727 ALLOCATE( f_in(nys:nyn,nxl:nxr,1:nz) ) 728 729 DO kk = 1, nblk+1 730 ind_odd = MOD( kk, 2 ) + 1 731 ind_even = MOD( kk+1, 2 ) + 1 732 !$OMP sections private(ki,knew,n) 733 !$OMP section 734 IF ( kk <= nblk ) THEN 735 736 CALL cpu_log( log_point_s(4), 'fft_x', 'continue' ) 737 738 n = isave(2) + kk - 1 739 CALL fft_x( ar1(:,:,:,ind_even), 'backward', f_out_z(:,:,n)) 740 741 IF ( kk == nblk ) THEN 742 CALL cpu_log( log_point_s(4), 'fft_x', 'stop' ) 743 ELSE 744 CALL cpu_log( log_point_s(4), 'fft_x', 'pause' ) 745 ENDIF 746 747 ENDIF 748 749 !$OMP section 750 IF ( kk >= 2 ) THEN 751 752 CALL cpu_log( log_point_s(8), 'transpo invers', 'continue' ) 753 754 CALL transpose_xz( ar1(:,:,:,ind_odd), f_in ) 755 756 DO knew = 1, nz 757 ki = kk-1 + nblk * (knew-1) 758 f_inv(:,:,ki) = f_in(:,:,knew) 759 ENDDO 760 761 IF ( kk == nblk+1 ) THEN 762 CALL cpu_log( log_point_s(8), 'transpo invers', 'stop' ) 763 ELSE 764 CALL cpu_log( log_point_s(8), 'transpo invers', 'pause' ) 765 ENDIF 766 767 ENDIF 768 !$OMP end sections 769 770 ENDDO 771 ! 772 !-- Restore original indices/counters 773 nz = isave(1) 774 nzb_x = isave(2) 775 nzt_x = isave(3) 776 sendrecvcount_zx = isave(4) 777 778 CALL resort_for_xz( f_inv, ar ) 779 780 DEALLOCATE( ar1, f_in, f_inv ) 781 357 782 ENDIF 358 783 … … 363 788 364 789 365 SUBROUTINE ffty_tr_yx( f_in, work,f_out )790 SUBROUTINE ffty_tr_yx( f_in, f_out ) 366 791 367 792 !------------------------------------------------------------------------------! … … 489 914 490 915 491 SUBROUTINE tr_xy_ffty( f_in, work,f_out )916 SUBROUTINE tr_xy_ffty( f_in, f_out ) 492 917 493 918 !------------------------------------------------------------------------------! … … 738 1163 739 1164 740 SUBROUTINE fftx_tr_xy( f_in, work,f_out )1165 SUBROUTINE fftx_tr_xy( f_in, f_out ) 741 1166 742 1167 !------------------------------------------------------------------------------! … … 848 1273 849 1274 850 SUBROUTINE tr_yx_fftx( f_in, work,f_out )1275 SUBROUTINE tr_yx_fftx( f_in, f_out ) 851 1276 852 1277 !------------------------------------------------------------------------------! -
palm/trunk/SOURCE/transpose.f90
r1112 r1216 1 SUBROUTINE transpose_xy( f_in, work, f_out)1 SUBROUTINE resort_for_xy( f_in, f_inv ) 2 2 3 3 !--------------------------------------------------------------------------------! … … 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! re-sorting of the transposed / to be transposed arrays moved to separate 23 ! routines resort_for_... 23 24 ! 24 25 ! Former revisions: … … 69 70 ! Initial revision 70 71 ! 71 ! 72 !------------------------------------------------------------------------------! 73 ! Description: 74 ! ------------ 75 ! Resorting data for the transposition from x to y. The transposition itself 76 ! is carried out in transpose_xy 77 !------------------------------------------------------------------------------! 78 79 USE indices 80 USE transpose_indices 81 82 IMPLICIT NONE 83 84 REAL :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x) 85 REAL :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) 86 87 88 INTEGER :: i, j, k 89 90 ! 91 !-- Rearrange indices of input array in order to make data to be send 92 !-- by MPI contiguous 93 !$OMP PARALLEL PRIVATE ( i, j, k ) 94 !$OMP DO 95 !$acc kernels present( f_in, f_inv ) 96 !$acc loop 97 DO i = 0, nx 98 DO k = nzb_x, nzt_x 99 !$acc loop vector( 32 ) 100 DO j = nys_x, nyn_x 101 f_inv(j,k,i) = f_in(i,j,k) 102 ENDDO 103 ENDDO 104 ENDDO 105 !$acc end kernels 106 !$OMP END PARALLEL 107 108 END SUBROUTINE resort_for_xy 109 110 111 SUBROUTINE transpose_xy( f_inv, f_out ) 112 113 !------------------------------------------------------------------------------! 72 114 ! Description: 73 115 ! ------------ … … 87 129 INTEGER :: i, j, k, l, ys 88 130 89 REAL :: f_in (0:nx,nys_x:nyn_x,nzb_x:nzt_x), f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y)131 REAL :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx), f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) 90 132 91 133 REAL, DIMENSION(nyn_x-nys_x+1,nzb_y:nzt_y,nxl_y:nxr_y,0:pdims(2)-1) :: work 92 134 93 !$acc declare create( f_inv )94 REAL :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx)95 96 97 !98 !-- Rearrange indices of input array in order to make data to be send99 !-- by MPI contiguous100 !$OMP PARALLEL PRIVATE ( i, j, k )101 !$OMP DO102 !$acc kernels present( f_in )103 !$acc loop104 DO i = 0, nx105 DO k = nzb_x, nzt_x106 !$acc loop vector( 32 )107 DO j = nys_x, nyn_x108 f_inv(j,k,i) = f_in(i,j,k)109 ENDDO110 ENDDO111 ENDDO112 !$acc end kernels113 !$OMP END PARALLEL114 135 115 136 IF ( numprocs /= 1 ) THEN … … 124 145 work(1,nzb_y,nxl_y,0), sendrecvcount_xy, MPI_REAL, & 125 146 comm1dy, ierr ) 126 !$acc update device( work )127 147 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) 128 148 … … 131 151 !$OMP PARALLEL PRIVATE ( i, j, k, l, ys ) 132 152 !$OMP DO 153 !$acc data copyin( work ) 133 154 DO l = 0, pdims(2) - 1 134 155 ys = 0 + l * ( nyn_x - nys_x + 1 ) … … 145 166 !$acc end kernels 146 167 ENDDO 168 !$acc end data 147 169 !$OMP END PARALLEL 148 170 #endif … … 154 176 !$OMP PARALLEL PRIVATE ( i, j, k ) 155 177 !$OMP DO 156 !$acc kernels present( f_ out )178 !$acc kernels present( f_inv, f_out ) 157 179 !$acc loop 158 180 DO k = nzb_y, nzt_y … … 172 194 173 195 174 SUBROUTINE transpose_xz( f_in, work, f_out ) 196 SUBROUTINE resort_for_xz( f_inv, f_out ) 197 198 !------------------------------------------------------------------------------! 199 ! Description: 200 ! ------------ 201 ! Resorting data after the transposition from x to z. The transposition itself 202 ! is carried out in transpose_xz 203 !------------------------------------------------------------------------------! 204 205 USE indices 206 USE transpose_indices 207 208 IMPLICIT NONE 209 210 REAL :: f_inv(nys:nyn,nxl:nxr,1:nz) 211 REAL :: f_out(1:nz,nys:nyn,nxl:nxr) 212 213 214 INTEGER :: i, j, k 215 216 ! 217 !-- Rearrange indices of input array in order to make data to be send 218 !-- by MPI contiguous. 219 !-- In case of parallel fft/transposition, scattered store is faster in 220 !-- backward direction!!! 221 !$OMP PARALLEL PRIVATE ( i, j, k ) 222 !$OMP DO 223 !$acc kernels present( f_inv, f_out ) 224 !$acc loop 225 DO k = 1, nz 226 DO i = nxl, nxr 227 !$acc loop vector( 32 ) 228 DO j = nys, nyn 229 f_out(k,j,i) = f_inv(j,i,k) 230 ENDDO 231 ENDDO 232 ENDDO 233 !$acc end kernels 234 !$OMP END PARALLEL 235 236 END SUBROUTINE resort_for_xz 237 238 239 SUBROUTINE transpose_xz( f_in, f_inv ) 175 240 176 241 !------------------------------------------------------------------------------! … … 192 257 INTEGER :: i, j, k, l, xs 193 258 194 REAL :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x), f_ out(1:nz,nys:nyn,nxl:nxr)259 REAL :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x), f_inv(nys:nyn,nxl:nxr,1:nz) 195 260 196 261 REAL, DIMENSION(nys_x:nyn_x,nnx,nzb_x:nzt_x,0:pdims(1)-1) :: work 197 198 !$acc declare create( f_inv )199 REAL :: f_inv(nys:nyn,nxl:nxr,1:nz)200 262 201 263 … … 210 272 !$OMP PARALLEL PRIVATE ( i, j, k, l, xs ) 211 273 !$OMP DO 274 !$acc data copyout( work ) 212 275 DO l = 0, pdims(1) - 1 213 276 xs = 0 + l * nnx … … 224 287 !$acc end kernels 225 288 ENDDO 289 !$acc end data 226 290 !$OMP END PARALLEL 227 291 … … 230 294 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) 231 295 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 232 !$acc update host( work )233 296 CALL MPI_ALLTOALL( work(nys_x,1,nzb_x,0), sendrecvcount_zx, MPI_REAL, & 234 297 f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, & … … 236 299 !$acc update device( f_inv ) 237 300 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) 238 239 !240 !-- Reorder transposed array in a way that the z index is in first position241 !$OMP PARALLEL PRIVATE ( i, j, k )242 !$OMP DO243 !$acc kernels present( f_out )244 !$acc loop245 DO k = 1, nz246 DO i = nxl, nxr247 !$acc loop vector( 32 )248 DO j = nys, nyn249 f_out(k,j,i) = f_inv(j,i,k)250 ENDDO251 ENDDO252 ENDDO253 !$acc end kernels254 !$OMP END PARALLEL255 301 #endif 256 302 … … 261 307 !$OMP PARALLEL PRIVATE ( i, j, k ) 262 308 !$OMP DO 263 !$acc kernels present( f_in )309 !$acc kernels present( f_in, f_inv ) 264 310 !$acc loop 265 311 DO i = nxl, nxr … … 274 320 !$OMP END PARALLEL 275 321 276 !$OMP PARALLEL PRIVATE ( i, j, k )277 !$OMP DO278 !$acc kernels present( f_out )279 !$acc loop280 DO k = 1, nz281 DO i = nxl, nxr282 !$acc loop vector( 32 )283 DO j = nys, nyn284 f_out(k,j,i) = f_inv(j,i,k)285 ENDDO286 ENDDO287 ENDDO288 !$acc end kernels289 !$OMP END PARALLEL290 291 322 ENDIF 292 323 … … 294 325 295 326 296 SUBROUTINE transpose_yx( f_in, work, f_out ) 327 SUBROUTINE resort_for_yx( f_inv, f_out ) 328 329 !------------------------------------------------------------------------------! 330 ! Description: 331 ! ------------ 332 ! Resorting data after the transposition from y to x. The transposition itself 333 ! is carried out in transpose_yx 334 !------------------------------------------------------------------------------! 335 336 USE indices 337 USE transpose_indices 338 339 IMPLICIT NONE 340 341 REAL :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) 342 REAL :: f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x) 343 344 345 INTEGER :: i, j, k 346 347 ! 348 !-- Rearrange indices of input array in order to make data to be send 349 !-- by MPI contiguous 350 !$OMP PARALLEL PRIVATE ( i, j, k ) 351 !$OMP DO 352 !$acc kernels present( f_inv, f_out ) 353 !$acc loop 354 DO i = 0, nx 355 DO k = nzb_x, nzt_x 356 !$acc loop vector( 32 ) 357 DO j = nys_x, nyn_x 358 f_out(i,j,k) = f_inv(j,k,i) 359 ENDDO 360 ENDDO 361 ENDDO 362 !$acc end kernels 363 !$OMP END PARALLEL 364 365 END SUBROUTINE resort_for_yx 366 367 368 SUBROUTINE transpose_yx( f_in, f_inv ) 297 369 298 370 !------------------------------------------------------------------------------! … … 314 386 INTEGER :: i, j, k, l, ys 315 387 316 REAL :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y), f_ out(0:nx,nys_x:nyn_x,nzb_x:nzt_x)388 REAL :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y), f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) 317 389 318 390 REAL, DIMENSION(nyn_x-nys_x+1,nzb_y:nzt_y,nxl_y:nxr_y,0:pdims(2)-1) :: work 319 320 !$acc declare create( f_inv )321 REAL :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx)322 391 323 392 … … 329 398 !$OMP PARALLEL PRIVATE ( i, j, k, l, ys ) 330 399 !$OMP DO 400 !$acc data copyout( work ) 331 401 DO l = 0, pdims(2) - 1 332 402 ys = 0 + l * ( nyn_x - nys_x + 1 ) … … 343 413 !$acc end kernels 344 414 ENDDO 415 !$acc end data 345 416 !$OMP END PARALLEL 346 417 … … 349 420 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) 350 421 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 351 !$acc update host( work )352 422 CALL MPI_ALLTOALL( work(1,nzb_y,nxl_y,0), sendrecvcount_xy, MPI_REAL, & 353 423 f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, & … … 363 433 !$OMP PARALLEL PRIVATE ( i, j, k ) 364 434 !$OMP DO 365 !$acc kernels present( f_in )435 !$acc kernels present( f_in, f_inv ) 366 436 !$acc loop 367 437 DO i = nxl_y, nxr_y … … 378 448 ENDIF 379 449 380 !381 !-- Reorder transposed array in a way that the x index is in first position382 !$OMP PARALLEL PRIVATE ( i, j, k )383 !$OMP DO384 !$acc kernels present( f_out )385 !$acc loop386 DO i = 0, nx387 DO k = nzb_x, nzt_x388 !$acc loop vector( 32 )389 DO j = nys_x, nyn_x390 f_out(i,j,k) = f_inv(j,k,i)391 ENDDO392 ENDDO393 ENDDO394 !$acc end kernels395 !$OMP END PARALLEL396 397 450 END SUBROUTINE transpose_yx 398 451 399 452 400 SUBROUTINE transpose_yxd( f_in, work,f_out )453 SUBROUTINE transpose_yxd( f_in, f_out ) 401 454 402 455 !------------------------------------------------------------------------------! … … 466 519 467 520 468 SUBROUTINE transpose_yz( f_in, work, f_out ) 521 SUBROUTINE resort_for_yz( f_in, f_inv ) 522 523 !------------------------------------------------------------------------------! 524 ! Description: 525 ! ------------ 526 ! Resorting data for the transposition from y to z. The transposition itself 527 ! is carried out in transpose_yz 528 !------------------------------------------------------------------------------! 529 530 USE indices 531 USE transpose_indices 532 533 IMPLICIT NONE 534 535 REAL :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) 536 REAL :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) 537 538 539 INTEGER :: i, j, k 540 541 ! 542 !-- Rearrange indices of input array in order to make data to be send 543 !-- by MPI contiguous 544 !$OMP PARALLEL PRIVATE ( i, j, k ) 545 !$OMP DO 546 !$acc kernels present( f_in, f_inv ) 547 !$acc loop 548 DO j = 0, ny 549 DO k = nzb_y, nzt_y 550 !$acc loop vector( 32 ) 551 DO i = nxl_y, nxr_y 552 f_inv(i,k,j) = f_in(j,i,k) 553 ENDDO 554 ENDDO 555 ENDDO 556 !$acc end kernels 557 !$OMP END PARALLEL 558 559 END SUBROUTINE resort_for_yz 560 561 562 SUBROUTINE transpose_yz( f_inv, f_out ) 469 563 470 564 !------------------------------------------------------------------------------! … … 486 580 INTEGER :: i, j, k, l, zs 487 581 488 REAL :: f_in (0:ny,nxl_y:nxr_y,nzb_y:nzt_y), f_out(nxl_z:nxr_z,nys_z:nyn_z,1:nz)582 REAL :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny), f_out(nxl_z:nxr_z,nys_z:nyn_z,1:nz) 489 583 490 584 REAL, DIMENSION(nxl_z:nxr_z,nzt_y-nzb_y+1,nys_z:nyn_z,0:pdims(1)-1) :: work 491 585 492 !$acc declare create( f_inv ) 493 REAL :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) 494 495 496 ! 497 !-- Rearrange indices of input array in order to make data to be send 498 !-- by MPI contiguous 499 !$OMP PARALLEL PRIVATE ( i, j, k ) 500 !$OMP DO 501 !$acc kernels present( f_in ) 502 !$acc loop 503 DO j = 0, ny 504 DO k = nzb_y, nzt_y 505 !$acc loop vector( 32 ) 506 DO i = nxl_y, nxr_y 507 f_inv(i,k,j) = f_in(j,i,k) 508 ENDDO 509 ENDDO 510 ENDDO 511 !$acc end kernels 512 !$OMP END PARALLEL 513 514 ! 515 !-- Move data to different array, because memory location of work1 is 516 !-- needed further below (work1 = work2). 586 587 ! 517 588 !-- If the PE grid is one-dimensional along y, only local reordering 518 589 !-- of the data is necessary and no transposition has to be done. … … 521 592 !$OMP PARALLEL PRIVATE ( i, j, k ) 522 593 !$OMP DO 523 !$acc kernels present( f_ out )594 !$acc kernels present( f_inv, f_out ) 524 595 !$acc loop 525 596 DO j = 0, ny … … 545 616 work(nxl_z,1,nys_z,0), sendrecvcount_yz, MPI_REAL, & 546 617 comm1dx, ierr ) 547 !$acc update device( work )548 618 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) 549 619 … … 552 622 !$OMP PARALLEL PRIVATE ( i, j, k, l, zs ) 553 623 !$OMP DO 624 !$acc data copyin( work ) 554 625 DO l = 0, pdims(1) - 1 555 626 zs = 1 + l * ( nzt_y - nzb_y + 1 ) 556 !$acc kernels present( f_out , work)627 !$acc kernels present( f_out ) 557 628 !$acc loop 558 629 DO j = nys_z, nyn_z … … 566 637 !$acc end kernels 567 638 ENDDO 639 !$acc end data 568 640 !$OMP END PARALLEL 569 641 #endif … … 574 646 575 647 576 SUBROUTINE transpose_zx( f_in, work, f_out ) 648 SUBROUTINE resort_for_zx( f_in, f_inv ) 649 650 !------------------------------------------------------------------------------! 651 ! Description: 652 ! ------------ 653 ! Resorting data for the transposition from z to x. The transposition itself 654 ! is carried out in transpose_zx 655 !------------------------------------------------------------------------------! 656 657 USE indices 658 USE transpose_indices 659 660 IMPLICIT NONE 661 662 REAL :: f_in(1:nz,nys:nyn,nxl:nxr) 663 REAL :: f_inv(nys:nyn,nxl:nxr,1:nz) 664 665 666 INTEGER :: i, j, k 667 668 ! 669 !-- Rearrange indices of input array in order to make data to be send 670 !-- by MPI contiguous 671 !$OMP PARALLEL PRIVATE ( i, j, k ) 672 !$OMP DO 673 !$acc kernels present( f_in, f_inv ) 674 !$acc loop 675 DO k = 1,nz 676 DO i = nxl, nxr 677 !$acc loop vector( 32 ) 678 DO j = nys, nyn 679 f_inv(j,i,k) = f_in(k,j,i) 680 ENDDO 681 ENDDO 682 ENDDO 683 !$acc end kernels 684 !$OMP END PARALLEL 685 686 END SUBROUTINE resort_for_zx 687 688 689 SUBROUTINE transpose_zx( f_inv, f_out ) 577 690 578 691 !------------------------------------------------------------------------------! … … 594 707 INTEGER :: i, j, k, l, xs 595 708 596 REAL :: f_in (1:nz,nys:nyn,nxl:nxr), f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x)709 REAL :: f_inv(nys:nyn,nxl:nxr,1:nz), f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x) 597 710 598 711 REAL, DIMENSION(nys_x:nyn_x,nnx,nzb_x:nzt_x,0:pdims(1)-1) :: work 599 712 600 !$acc declare create( f_inv ) 601 REAL :: f_inv(nys:nyn,nxl:nxr,1:nz) 602 603 604 ! 605 !-- Rearrange indices of input array in order to make data to be send 606 !-- by MPI contiguous 607 !$OMP PARALLEL PRIVATE ( i, j, k ) 608 !$OMP DO 609 !$acc kernels present( f_in ) 610 !$acc loop 611 DO k = 1,nz 612 DO i = nxl, nxr 613 !$acc loop vector( 32 ) 614 DO j = nys, nyn 615 f_inv(j,i,k) = f_in(k,j,i) 616 ENDDO 617 ENDDO 618 ENDDO 619 !$acc end kernels 620 !$OMP END PARALLEL 621 622 ! 623 !-- Move data to different array, because memory location of work1 is 624 !-- needed further below (work1 = work2). 713 714 ! 625 715 !-- If the PE grid is one-dimensional along y, only local reordering 626 716 !-- of the data is necessary and no transposition has to be done. … … 629 719 !$OMP PARALLEL PRIVATE ( i, j, k ) 630 720 !$OMP DO 631 !$acc kernels present( f_ out )721 !$acc kernels present( f_inv, f_out ) 632 722 !$acc loop 633 723 DO k = 1, nz … … 653 743 work(nys_x,1,nzb_x,0), sendrecvcount_zx, MPI_REAL, & 654 744 comm1dx, ierr ) 655 !$acc update device( work )656 745 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) 657 746 … … 660 749 !$OMP PARALLEL PRIVATE ( i, j, k, l, xs ) 661 750 !$OMP DO 751 !$acc data copyin( work ) 662 752 DO l = 0, pdims(1) - 1 663 753 xs = 0 + l * nnx 664 !$acc kernels present( f_out , work)754 !$acc kernels present( f_out ) 665 755 !$acc loop 666 756 DO k = nzb_x, nzt_x … … 674 764 !$acc end kernels 675 765 ENDDO 766 !$acc end data 676 767 !$OMP END PARALLEL 677 768 #endif … … 682 773 683 774 684 SUBROUTINE transpose_zy( f_in, work, f_out ) 775 SUBROUTINE resort_for_zy( f_inv, f_out ) 776 777 !------------------------------------------------------------------------------! 778 ! Description: 779 ! ------------ 780 ! Resorting data after the transposition from z to y. The transposition itself 781 ! is carried out in transpose_zy 782 !------------------------------------------------------------------------------! 783 784 USE indices 785 USE transpose_indices 786 787 IMPLICIT NONE 788 789 REAL :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) 790 REAL :: f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) 791 792 793 INTEGER :: i, j, k 794 795 ! 796 !-- Rearrange indices of input array in order to make data to be send 797 !-- by MPI contiguous 798 !$OMP PARALLEL PRIVATE ( i, j, k ) 799 !$OMP DO 800 !$acc kernels present( f_inv, f_out ) 801 !$acc loop 802 DO k = nzb_y, nzt_y 803 DO j = 0, ny 804 !$acc loop vector( 32 ) 805 DO i = nxl_y, nxr_y 806 f_out(j,i,k) = f_inv(i,k,j) 807 ENDDO 808 ENDDO 809 ENDDO 810 !$acc end kernels 811 !$OMP END PARALLEL 812 813 END SUBROUTINE resort_for_zy 814 815 816 SUBROUTINE transpose_zy( f_in, f_inv ) 685 817 686 818 !------------------------------------------------------------------------------! … … 702 834 INTEGER :: i, j, k, l, zs 703 835 704 REAL :: f_in(nxl_z:nxr_z,nys_z:nyn_z,1:nz), f_ out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y)836 REAL :: f_in(nxl_z:nxr_z,nys_z:nyn_z,1:nz), f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) 705 837 706 838 REAL, DIMENSION(nxl_z:nxr_z,nzt_y-nzb_y+1,nys_z:nyn_z,0:pdims(1)-1) :: work 707 708 !$acc declare create( f_inv )709 REAL :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny)710 839 711 840 … … 720 849 !$OMP PARALLEL PRIVATE ( i, j, k, l, zs ) 721 850 !$OMP DO 851 !$acc data copyout( work ) 722 852 DO l = 0, pdims(1) - 1 723 853 zs = 1 + l * ( nzt_y - nzb_y + 1 ) … … 734 864 !$acc end kernels 735 865 ENDDO 866 !$acc end data 736 867 !$OMP END PARALLEL 737 868 … … 740 871 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) 741 872 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 742 !$acc update host( work )743 873 CALL MPI_ALLTOALL( work(nxl_z,1,nys_z,0), sendrecvcount_yz, MPI_REAL, & 744 874 f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, & … … 753 883 !$OMP PARALLEL PRIVATE ( i, j, k ) 754 884 !$OMP DO 755 !$acc kernels present( f_in )885 !$acc kernels present( f_in, f_inv ) 756 886 !$acc loop 757 887 DO k = nzb_y, nzt_y … … 768 898 ENDIF 769 899 770 !771 !-- Reorder transposed array in a way that the y index is in first position772 !$OMP PARALLEL PRIVATE ( i, j, k )773 !$OMP DO774 !$acc kernels present( f_out )775 !$acc loop776 DO k = nzb_y, nzt_y777 DO i = nxl_y, nxr_y778 !$acc loop vector( 32 )779 DO j = 0, ny780 f_out(j,i,k) = f_inv(i,k,j)781 ENDDO782 ENDDO783 ENDDO784 !$acc end kernels785 !$OMP END PARALLEL786 787 900 END SUBROUTINE transpose_zy 788 901 789 902 790 SUBROUTINE transpose_zyd( f_in, work,f_out )903 SUBROUTINE transpose_zyd( f_in, f_out ) 791 904 792 905 !------------------------------------------------------------------------------! -
palm/trunk/SOURCE/tridia_solver.f90
r1213 r1216 20 20 ! Current revisions: 21 21 ! ------------------ 22 ! 22 ! +tridia_substi_overlap for handling overlapping fft / transposition 23 23 ! 24 24 ! Former revisions: … … 57 57 END INTERFACE tridia_substi 58 58 59 PUBLIC tridia_substi, tridia_init, tridia_1dd 59 INTERFACE tridia_substi_overlap 60 MODULE PROCEDURE tridia_substi_overlap 61 END INTERFACE tridia_substi_overlap 62 63 PUBLIC tridia_substi, tridia_substi_overlap, tridia_init, tridia_1dd 60 64 61 65 CONTAINS … … 260 264 261 265 266 SUBROUTINE tridia_substi_overlap( ar, jj ) 267 268 !------------------------------------------------------------------------------! 269 ! Substitution (Forward and Backward) (Thomas algorithm) 270 !------------------------------------------------------------------------------! 271 272 USE arrays_3d, ONLY: tri 273 USE control_parameters 274 275 IMPLICIT NONE 276 277 INTEGER :: i, j, jj, k 278 279 REAL :: ar(nxl_z:nxr_z,nys_z:nyn_z,1:nz) 280 281 !$acc declare create( ar1 ) 282 REAL, DIMENSION(nxl_z:nxr_z,nys_z:nyn_z,0:nz-1) :: ar1 283 284 ! 285 !-- Forward substitution 286 DO k = 0, nz - 1 287 !$acc kernels present( ar, tri ) 288 !$acc loop 289 DO j = nys_z, nyn_z 290 DO i = nxl_z, nxr_z 291 292 IF ( k == 0 ) THEN 293 ar1(i,j,k) = ar(i,j,k+1) 294 ELSE 295 ar1(i,j,k) = ar(i,j,k+1) - tri(i,jj,k,2) * ar1(i,j,k-1) 296 ENDIF 297 298 ENDDO 299 ENDDO 300 !$acc end kernels 301 ENDDO 302 303 ! 304 !-- Backward substitution 305 !-- Note, the 1.0E-20 in the denominator is due to avoid divisions 306 !-- by zero appearing if the pressure bc is set to neumann at the top of 307 !-- the model domain. 308 DO k = nz-1, 0, -1 309 !$acc kernels present( ar, tri ) 310 !$acc loop 311 DO j = nys_z, nyn_z 312 DO i = nxl_z, nxr_z 313 314 IF ( k == nz-1 ) THEN 315 ar(i,j,k+1) = ar1(i,j,k) / ( tri(i,jj,k,1) + 1.0E-20 ) 316 ELSE 317 ar(i,j,k+1) = ( ar1(i,j,k) - ddzuw(k,2) * ar(i,j,k+2) ) & 318 / tri(i,jj,k,1) 319 ENDIF 320 ENDDO 321 ENDDO 322 !$acc end kernels 323 ENDDO 324 325 ! 326 !-- Indices i=0, j=0 correspond to horizontally averaged pressure. 327 !-- The respective values of ar should be zero at all k-levels if 328 !-- acceleration of horizontally averaged vertical velocity is zero. 329 IF ( ibc_p_b == 1 .AND. ibc_p_t == 1 ) THEN 330 IF ( nys_z == 0 .AND. nxl_z == 0 ) THEN 331 !$acc kernels loop present( ar ) 332 DO k = 1, nz 333 ar(nxl_z,nys_z,k) = 0.0 334 ENDDO 335 ENDIF 336 ENDIF 337 338 END SUBROUTINE tridia_substi_overlap 339 340 262 341 SUBROUTINE split 263 342
Note: See TracChangeset
for help on using the changeset viewer.