Changeset 164 for palm/trunk
- Timestamp:
- May 15, 2008 8:46:15 AM (17 years ago)
- Location:
- palm/trunk
- Files:
-
- 11 edited
Legend:
- Unmodified
- Added
- Removed
-
palm/trunk/SCRIPTS/mrun
r149 r164 140 140 # true, mrun tries "ln -f" on local output and resorts 141 141 # to "cp" or "cp -r" on error 142 # 15/04/08 - Siggi - argument -c introduced to most of the subjob calls, 143 # which allows the user to choose his own job catalog 144 # by setting job_catalog in the configuration file 145 # (default is ~/job_queue), 146 # workaround for mpixec with -env option, 147 # adjustments for lcxt4 (Bergen Center for Computational 148 # Science) 142 149 143 150 # VARIABLENVEREINBARUNGEN + DEFAULTWERTE … … 183 190 input_list="" 184 191 interpreted_config_file="" 192 job_catalog="~/job_queue" 185 193 job_on_file="" 186 194 keep_data_from_previous_run=false … … 1817 1825 lopts="$lopts $netcdf_lib $dvrp_lib" 1818 1826 ROPTS="$ropts" 1819 if [[ ( $(echo $host | cut -c1-3) = nec || $(echo $host | cut -c1-3) = ibm || $host = lctit || $host = lcfimm ) && -n $numprocs ]]1827 if [[ ( $(echo $host | cut -c1-3) = nec || $(echo $host | cut -c1-3) = ibm || $host = lctit || $host = lcfimm || $host = lcxt4 ) && -n $numprocs ]] 1820 1828 then 1821 1829 XOPT="-X $numprocs" … … 2768 2776 then 2769 2777 printf "\n\n" 2770 mpiexec -machinefile hostfile -n $ii a.out $ROPTS 2778 if [[ $host = lcxt4 ]] 2779 then 2780 aprun -n $ii -N $tasks_per_node a.out $ROPTS 2781 else 2782 mpiexec -machinefile hostfile -n $ii a.out $ROPTS 2783 fi 2771 2784 else 2772 2785 (( iii = ii / 2 )) 2786 echo "atmosphere_to_ocean" > runfile_atmos 2787 echo "ocean_to_atmosphere" > runfile_ocean 2788 2773 2789 printf "\n coupled run ($iii atmosphere, $iii ocean)" 2774 2790 printf "\n\n" 2775 mpiexec -machinefile hostfile -n $iii -env coupling_mode atmosphere_to_ocean a.out $ROPTS & 2776 mpiexec -machinefile hostfile -n $iii -env coupling_mode ocean_to_atmosphere a.out $ROPTS & 2791 2792 if [[ $host == lcxt4 ]] 2793 then 2794 aprun -n $iii -N $tasks_per_node a.out < runfile_atmos $ROPTS & 2795 aprun -n $iii -N $tasks_per_node a.out < runfile_ocean $ROPTS & 2796 else 2797 2798 # WORKAROUND BECAUSE mpiexec WITH -env option IS NOT AVAILABLE ON SOME SYSTEMS 2799 mpiexec -machinefile hostfile -n $iii a.out $ROPTS < runfile_atmos & 2800 mpiexec -machinefile hostfile -n $iii a.out $ROPTS < runfile_ocean & 2801 # mpiexec -machinefile hostfile -n $iii -env coupling_mode atmosphere_to_ocean a.out $ROPTS & 2802 # mpiexec -machinefile hostfile -n $iii -env coupling_mode ocean_to_atmosphere a.out $ROPTS & 2803 fi 2777 2804 wait 2778 2805 fi … … 2970 2997 if [[ "$LOGNAME" = b323013 ]] 2971 2998 then 2972 subjob -v -q c1 -X 0 -m 1000 -t 900 transfer_${localout[$i]}2999 subjob -v -q c1 -X 0 -m 1000 -t 900 -c $job_catalog transfer_${localout[$i]} 2973 3000 else 2974 subjob -d -v -q c1 -X 0 -m 1000 -t 900 transfer_${localout[$i]}3001 subjob -d -v -q c1 -X 0 -m 1000 -t 900 -c $job_catalog transfer_${localout[$i]} 2975 3002 fi 2976 3003 fi … … 3045 3072 if [[ $LOGNAME = b323013 ]] 3046 3073 then 3047 subjob -v -q c1 -X 0 -m 1000 -t 900 transfer_${localout[$i]}3074 subjob -v -q c1 -X 0 -m 1000 -t 900 -c $job_catalog transfer_${localout[$i]} 3048 3075 else 3049 subjob -d -v -q c1 -X 0 -m 1000 -t 900 transfer_${localout[$i]}3076 subjob -d -v -q c1 -X 0 -m 1000 -t 900 -c $job_catalog transfer_${localout[$i]} 3050 3077 fi 3051 3078 fi … … 3232 3259 if [[ $localhost = ibmh || $localhost = ibmb ]] 3233 3260 then 3234 # subjob -d -v -q cdata -X 0 -m 1000 -t 43200 archive_${frelout[$i]}3235 subjob -v -q cdata -X 0 -m 1000 -t 43200 archive_${frelout[$i]}3261 # subjob -d -v -q cdata -X 0 -m 1000 -t 43200 -c $job_catalog archive_${frelout[$i]} 3262 subjob -v -q cdata -X 0 -m 1000 -t 43200 -c $job_catalog archive_${frelout[$i]} 3236 3263 elif [[ $localhost = nech ]] 3237 3264 then … … 3357 3384 fi 3358 3385 3359 subjob -v -d -q cdata -X 0 -m 1000 -t 43200 archive_${frelout[$i]}3386 subjob -v -d -q cdata -X 0 -m 1000 -t 43200 -c $job_catalog archive_${frelout[$i]} 3360 3387 printf " Archiving of $tmp_data_catalog/${frelout[$i]} initiated (batch job submitted)\n" 3361 3388 file_saved=true … … 3800 3827 fi 3801 3828 3802 subjob $job_on_file -h $host -u $remote_username -g $group_number -q $queue -m $memory -N $node_usage -t $cpumax $XOPT $TOPT $OOPT -n $fname -v $jobfile3829 subjob $job_on_file -h $host -u $remote_username -g $group_number -q $queue -m $memory -N $node_usage -t $cpumax $XOPT $TOPT $OOPT -n $fname -v -c $job_catalog $jobfile 3803 3830 rm -rf $jobfile 3804 3831 -
palm/trunk/SCRIPTS/subjob
r129 r164 90 90 # 19/10/07 - Siggi - a ";" was missing in the last change done by Marcus 91 91 # 30/10/07 - Marcus- further adjustments for queues on lctit 92 # 15/05/08 - Siggi - adjustments for lcxt4 (Bergen Center for Computational 93 # Science) 92 94 93 95 … … 167 169 (gregale) local_addres=130.75.105.109; local_host=lcmuk;; 168 170 (hababai) local_addres=130.75.105.108; local_host=lcmuk;; 171 (hexagon.bccs.uib.no) local_addres=129.177.20.113; local_host=lcxt4;; 169 172 (hreg*-en0|hanni*-en0) local_addres=130.75.4.10; local_host=ibmh;; 170 173 (irifi) local_addres=130.75.105.104; local_host=lcmuk;; 171 174 (levanto) local_addres=130.75.105.45; local_host=lcmuk;; 172 175 (maestro) local_addres=130.75.105.2; local_host=lcmuk;; 176 (nid*) local_addres=129.177.20.113; local_host=lcxt4;; 173 177 (nobel*) local_addres=150.183.5.101; local_host=ibms;; 174 178 (orkan) local_addres=130.75.105.3; local_host=lcmuk;; … … 303 307 (lcfimm) remote_addres=172.20.4.2; submcom=/opt/torque/bin/qsub;; 304 308 (lctit) queue=lctit; remote_addres=172.17.75.161; submcom=/n1ge/TITECH_GRID/tools/bin/n1ge;; 309 (lcxt4) remote_addres=129.177.20.113; submcom=/opt/torque/2.3.0/bin/qsub;; 305 310 (nech) qsubmem=memsz_job; qsubtime=cputim_job; remote_addres=136.172.44.147; submcom="/usr/local/bin/qsub";; 306 311 (neck) qsubmem=memsz_job; qsubtime=cputim_job; remote_addres=133.5.178.11; submcom="/usr/bin/nqsII/qsub";; … … 639 644 %%END%% 640 645 646 elif [[ $remote_host = lcxt4 ]] 647 then 648 649 if [[ $numprocs != 0 ]] 650 then 651 cat > $job_to_send << %%END%% 652 #!/bin/ksh 653 #PBS -S /bin/ksh 654 #PBS -N $job_name 655 #PBS -A nersc 656 #PBS -l walltime=$timestring 657 #PBS -l mppwidth=${numprocs} 658 #PBS -l mppnppn=${tasks_per_node} 659 #PBS -m abe 660 #PBS -M igore@nersc.no 661 #PBS -o $remote_dayfile 662 #PBS -e $remote_dayfile 663 664 %%END%% 665 641 666 else 642 667 cat > $job_to_send << %%END%% … … 1022 1047 then 1023 1048 eval $submcom $job_on_remhost 1024 elif [[ $local_host = lcfimm ]] 1025 then 1026 eval $submcom $job_on_remhost 1027 echo "$submcom $job_on_remhost" 1028 chmod u+x $job_on_remhost 1029 elif [[ $local_host = lctit ]] 1049 elif [[ $local_host = lcfimm || $local_host = lctit || $localhost = lcxt4 ]] 1030 1050 then 1031 1051 eval $submcom $job_on_remhost -
palm/trunk/SOURCE/CURRENT_MODIFICATIONS
r158 r164 30 30 User-defined spectra. 31 31 32 Argument -c introduced to most of the subjob calls, which allows the user to 33 choose his own job catalog by setting job_catalog in the configuration file 34 (default is ~/job_queue). Workaround for mpixec with -env option. 35 Adjustments for lcxt4 (Bergen Center for Computational Science) (mrun, subjob) 36 32 37 advec_particles, calc_spectra, check_open, check_parameters, data_output_spectra, header, init_particles, init_pegrid, init_3d_model, modules, netcdf, parin, particle_boundary_conds, plant_canopy_model, prognostic_equations, read_var_list, read_3d_binary, time_integration, user_interface, write_var_list, write_3d_binary 33 38 … … 58 63 informations are now contained in file _0000. (parin, check_open) 59 64 60 check_open, init_3d_model, modules, parin, read_var_list, read_3d_binary, write_var_list, write_3d_binary 65 Transpositions for the 2D domain decomposition have been optimized by using 66 f_inv as an automatic array instead of providing the memory by a dummy argument. 67 This spares one copy loop per transposition. Order of indices in the 3D loops 68 in some of the transpose routines have been rearranged for better cache utilization. 69 Both have been suggested by Roland Richter (SGI) as part of the 70 HLRN-II benchmark process. (transpose) 71 72 Workaround for getting information about the coupling mode. (palm) 73 74 advec_s_ups, advec_u_ups, advec_v_ups, advec_w_ups, calc_spectra, check_open, init_3d_model, modules, palm, parin, poisfft, read_var_list, read_3d_binary, transpose, write_var_list, write_3d_binary 61 75 62 76 -
palm/trunk/SOURCE/advec_s_ups.f90
r4 r164 4 4 ! Actual revisions: 5 5 ! ----------------- 6 ! 6 ! Arguments removed from transpose routines 7 7 ! 8 8 ! Former revisions: … … 75 75 ! 76 76 !-- Transpose the component to be advected: z --> x 77 CALL transpose_zx( v_ad, tend, v_ad , tend, v_ad)77 CALL transpose_zx( v_ad, tend, v_ad ) 78 78 79 79 #else … … 101 101 ! 102 102 !-- Transpose the advecting componnet: z --> x 103 CALL transpose_zx( d, tend, d , tend, d)103 CALL transpose_zx( d, tend, d ) 104 104 105 105 #endif … … 124 124 ! 125 125 !-- Transpose the advecting component: z --> y 126 CALL transpose_zx( d, tend, d , tend, d)127 CALL transpose_xy( d, tend, d , tend, d)126 CALL transpose_zx( d, tend, d ) 127 CALL transpose_xy( d, tend, d ) 128 128 129 129 ! 130 130 !-- Transpose the component to be advected: x --> y 131 CALL transpose_xy( v_ad, tend, v_ad , tend, v_ad)131 CALL transpose_xy( v_ad, tend, v_ad ) 132 132 133 133 #endif … … 155 155 ! 156 156 !-- Transpose the component to be advected: y --> z (= y --> x + x --> z) 157 CALL transpose_yx( v_ad, tend, v_ad , tend, v_ad)158 CALL transpose_xz( v_ad, tend, v_ad , tend, v_ad)157 CALL transpose_yx( v_ad, tend, v_ad ) 158 CALL transpose_xz( v_ad, tend, v_ad ) 159 159 160 160 ! -
palm/trunk/SOURCE/advec_u_ups.f90
r4 r164 4 4 ! Actual revisions: 5 5 ! ----------------- 6 ! 6 ! Arguments removed from transpose routines 7 7 ! 8 8 ! Former revisions: … … 69 69 ! 70 70 !-- Transpose the component to be advected: z --> x 71 CALL transpose_zx( v_ad, tend, v_ad , tend, v_ad)71 CALL transpose_zx( v_ad, tend, v_ad ) 72 72 73 73 ! … … 115 115 ! 116 116 !-- Transpose the advecting component: z --> y 117 CALL transpose_zx( d, tend, d , tend, d)118 CALL transpose_xy( d, tend, d , tend, d)117 CALL transpose_zx( d, tend, d ) 118 CALL transpose_xy( d, tend, d ) 119 119 120 120 ! 121 121 !-- Transpose the component to be advected: x --> y 122 CALL transpose_xy( v_ad, tend, v_ad , tend, v_ad)122 CALL transpose_xy( v_ad, tend, v_ad ) 123 123 124 124 #endif … … 148 148 ! 149 149 !-- Transpose the component to be advected: y --> z (= y --> x + x --> z) 150 CALL transpose_yx( v_ad, tend, v_ad , tend, v_ad)151 CALL transpose_xz( v_ad, tend, v_ad , tend, v_ad)150 CALL transpose_yx( v_ad, tend, v_ad ) 151 CALL transpose_xz( v_ad, tend, v_ad ) 152 152 153 153 ! -
palm/trunk/SOURCE/advec_v_ups.f90
r4 r164 4 4 ! Actual revisions: 5 5 ! ----------------- 6 ! 6 ! Arguments removed from transpose routines 7 7 ! 8 8 ! Former revisions: … … 69 69 ! 70 70 !-- Transpose the component to be advected: z --> x 71 CALL transpose_zx( v_ad, tend, v_ad , tend, v_ad)71 CALL transpose_zx( v_ad, tend, v_ad ) 72 72 73 73 #else … … 97 97 ! 98 98 !-- Transpose the advecting component: z --> x 99 CALL transpose_zx( d, tend, d , tend, d)99 CALL transpose_zx( d, tend, d ) 100 100 101 101 #endif … … 116 116 ! 117 117 !-- Transpose the advecting component: z --> y 118 CALL transpose_zx( d, tend, d , tend, d)119 CALL transpose_xy( d, tend, d , tend, d)118 CALL transpose_zx( d, tend, d ) 119 CALL transpose_xy( d, tend, d ) 120 120 121 121 ! 122 122 !-- Transpose the component to be advected: x --> y 123 CALL transpose_xy( v_ad, tend, v_ad , tend, v_ad)123 CALL transpose_xy( v_ad, tend, v_ad ) 124 124 125 125 #endif … … 149 149 ! 150 150 !-- Transpose the component to be advected: y --> z (= y --> x + x --> z) 151 CALL transpose_yx( v_ad, tend, v_ad , tend, v_ad)152 CALL transpose_xz( v_ad, tend, v_ad , tend, v_ad)151 CALL transpose_yx( v_ad, tend, v_ad ) 152 CALL transpose_xz( v_ad, tend, v_ad ) 153 153 154 154 ! -
palm/trunk/SOURCE/advec_w_ups.f90
r4 r164 4 4 ! Actual revisions: 5 5 ! ----------------- 6 ! 6 ! Arguments removed from transpose routines 7 7 ! 8 8 ! Former revisions: … … 69 69 ! 70 70 !-- Transpose the component to be advected: z --> x 71 CALL transpose_zx( v_ad, tend, v_ad , tend, v_ad)71 CALL transpose_zx( v_ad, tend, v_ad ) 72 72 73 73 #else … … 97 97 ! 98 98 !-- Transpose the component to be advected: z --> x 99 CALL transpose_zx( d, tend, d , tend, d)99 CALL transpose_zx( d, tend, d ) 100 100 101 101 #endif … … 123 123 ! 124 124 !-- Transpose the advecting component: z --> y 125 CALL transpose_zx( d, tend, d , tend, d)126 CALL transpose_xy( d, tend, d , tend, d)125 CALL transpose_zx( d, tend, d ) 126 CALL transpose_xy( d, tend, d ) 127 127 128 128 ! 129 129 !-- Transpose the component to be advected: x --> y 130 CALL transpose_xy( v_ad, tend, v_ad , tend, v_ad)130 CALL transpose_xy( v_ad, tend, v_ad ) 131 131 132 132 #endif … … 145 145 ! 146 146 !-- Transpose the component to be advected: y --> z (= y --> x + x --> z) 147 CALL transpose_yx( v_ad, tend, v_ad , tend, v_ad)148 CALL transpose_xz( v_ad, tend, v_ad , tend, v_ad)147 CALL transpose_yx( v_ad, tend, v_ad ) 148 CALL transpose_xz( v_ad, tend, v_ad ) 149 149 150 150 ! -
palm/trunk/SOURCE/calc_spectra.f90
r146 r164 4 4 ! Actual revisions: 5 5 ! ----------------- 6 ! user-defined spectra 6 ! user-defined spectra, arguments removed from transpose routines 7 7 ! 8 8 ! Former revisions: … … 77 77 #if defined( __parallel ) 78 78 IF ( pdims(2) /= 1 ) THEN 79 CALL transpose_zx( d, tend, d , tend, d)79 CALL transpose_zx( d, tend, d ) 80 80 ELSE 81 CALL transpose_yxd( d, tend, d , tend, d)81 CALL transpose_yxd( d, tend, d ) 82 82 ENDIF 83 83 CALL calc_spectra_x( d, pr, m ) … … 110 110 111 111 #if defined( __parallel ) 112 CALL transpose_zyd( d, tend, d , tend, d)112 CALL transpose_zyd( d, tend, d ) 113 113 CALL calc_spectra_y( d, pr, m ) 114 114 #else -
palm/trunk/SOURCE/palm.f90
r114 r164 4 4 ! Actual revisions: 5 5 ! ----------------- 6 ! 6 ! Workaround for getting information about the coupling mode 7 7 ! 8 8 ! Former revisions: … … 81 81 ! 82 82 !-- Get information about the coupling mode from the environment variable 83 !-- which has been set by the mpiexec command 84 CALL local_getenv( 'coupling_mode', 13, coupling_mode, i ) 85 IF ( i == 0 ) coupling_mode = 'uncoupled' 83 !-- which has been set by the mpiexec command. 84 !-- This method is currently not used because the mpiexec command is not 85 !-- available on some machines 86 ! CALL local_getenv( 'coupling_mode', 13, coupling_mode, i ) 87 ! IF ( i == 0 ) coupling_mode = 'uncoupled' 88 ! IF ( coupling_mode == 'ocean_to_atmosphere' ) coupling_char = '_O' 89 90 ! 91 !-- Get information about the coupling mode from standard input (PE0 only) and 92 !-- distribute it to the other PEs 93 CALL MPI_COMM_RANK( MPI_COMM_WORLD, myid, ierr ) 94 IF ( myid == 0 ) THEN 95 READ (*,*,ERR=10,END=10) coupling_mode 96 10 IF ( TRIM( coupling_mode ) == 'atmosphere_to_ocean' ) THEN 97 i = 1 98 ELSEIF ( TRIM( coupling_mode ) == 'ocean_to_atmosphere' ) THEN 99 i = 2 100 ELSE 101 i = 0 102 ENDIF 103 ENDIF 104 CALL MPI_BCAST( i, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr ) 105 IF ( i == 0 ) THEN 106 coupling_mode = 'uncoupled' 107 ELSEIF ( i == 1 ) THEN 108 coupling_mode = 'atmosphere_to_ocean' 109 ELSEIF ( i == 2 ) THEN 110 coupling_mode = 'ocean_to_atmosphere' 111 ENDIF 86 112 IF ( coupling_mode == 'ocean_to_atmosphere' ) coupling_char = '_O' 87 113 #endif -
palm/trunk/SOURCE/poisfft.f90
r139 r164 4 4 ! Actual revisions: 5 5 ! ----------------- 6 ! 6 ! Arguments removed from transpose routines 7 7 ! 8 8 ! Former revisions: … … 152 152 !-- Transposition z --> x 153 153 CALL cpu_log( log_point_s(5), 'transpo forward', 'start' ) 154 CALL transpose_zx( ar, work, ar , work, ar)154 CALL transpose_zx( ar, work, ar ) 155 155 CALL cpu_log( log_point_s(5), 'transpo forward', 'pause' ) 156 156 … … 162 162 !-- Transposition x --> y 163 163 CALL cpu_log( log_point_s(5), 'transpo forward', 'continue' ) 164 CALL transpose_xy( ar, work, ar , work, ar)164 CALL transpose_xy( ar, work, ar ) 165 165 CALL cpu_log( log_point_s(5), 'transpo forward', 'pause' ) 166 166 … … 172 172 !-- Transposition y --> z 173 173 CALL cpu_log( log_point_s(5), 'transpo forward', 'continue' ) 174 CALL transpose_yz( ar, work, ar , work, ar)174 CALL transpose_yz( ar, work, ar ) 175 175 CALL cpu_log( log_point_s(5), 'transpo forward', 'stop' ) 176 176 … … 185 185 !-- Transposition z --> y 186 186 CALL cpu_log( log_point_s(8), 'transpo invers', 'start' ) 187 CALL transpose_zy( ar, work, ar , work, ar)187 CALL transpose_zy( ar, work, ar ) 188 188 CALL cpu_log( log_point_s(8), 'transpo invers', 'pause' ) 189 189 … … 195 195 !-- Transposition y --> x 196 196 CALL cpu_log( log_point_s(8), 'transpo invers', 'continue' ) 197 CALL transpose_yx( ar, work, ar , work, ar)197 CALL transpose_yx( ar, work, ar ) 198 198 CALL cpu_log( log_point_s(8), 'transpo invers', 'pause' ) 199 199 … … 205 205 !-- Transposition x --> z 206 206 CALL cpu_log( log_point_s(8), 'transpo invers', 'continue' ) 207 CALL transpose_xz( ar, work, ar , work, ar)207 CALL transpose_xz( ar, work, ar ) 208 208 CALL cpu_log( log_point_s(8), 'transpo invers', 'stop' ) 209 209 -
palm/trunk/SOURCE/transpose.f90
r4 r164 1 SUBROUTINE transpose_xy( f_in, work 1, f_inv, work2, f_out )1 SUBROUTINE transpose_xy( f_in, work, f_out ) 2 2 3 3 !------------------------------------------------------------------------------! 4 4 ! Actual revisions: 5 5 ! ----------------- 6 ! 6 ! f_inv changed from subroutine argument to automatic array in order to do 7 ! re-ordering from f_in to f_inv in one step, one array work is needed instead 8 ! of work1 and work2 7 9 ! 8 10 ! Former revisions: … … 44 46 f_inv(nys_x:nyn_xa,nzb_x:nzt_xa,0:nxa), & 45 47 f_out(0:nya,nxl_y:nxr_ya,nzb_y:nzt_ya), & 46 work 1(nys_x:nyn_xa,nzb_x:nzt_xa,0:nxa), work2(nnx*nny*nnz)48 work(nnx*nny*nnz) 47 49 48 50 #if defined( __parallel ) … … 51 53 !-- Rearrange indices of input array in order to make data to be send 52 54 !-- by MPI contiguous 53 DO k = nzb_x, nzt_xa54 DO j = nys_x, nyn_xa55 DO i = 0, nxa56 work1(j,k,i) = f_in(i,j,k)57 ENDDO58 ENDDO59 ENDDO60 61 !62 !-- Move data to different array, because memory location of work1 is63 !-- needed further below (work1 = work2)64 55 DO i = 0, nxa 65 56 DO k = nzb_x, nzt_xa 66 57 DO j = nys_x, nyn_xa 67 f_inv(j,k,i) = work1(j,k,i)58 f_inv(j,k,i) = f_in(i,j,k) 68 59 ENDDO 69 60 ENDDO … … 74 65 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) 75 66 CALL MPI_ALLTOALL( f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, & 76 work 2(1),sendrecvcount_xy, MPI_REAL, &67 work(1), sendrecvcount_xy, MPI_REAL, & 77 68 comm1dy, ierr ) 78 69 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) … … 87 78 DO j = ys, ys + nyn_xa - nys_x 88 79 m = m + 1 89 f_out(j,i,k) = work 2(m)80 f_out(j,i,k) = work(m) 90 81 ENDDO 91 82 ENDDO … … 98 89 99 90 100 SUBROUTINE transpose_xz( f_in, work 1, f_inv, work2, f_out )91 SUBROUTINE transpose_xz( f_in, work, f_out ) 101 92 102 93 !------------------------------------------------------------------------------! … … 119 110 120 111 REAL :: f_in(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa), & 121 f_inv(n xl:nxra,nys:nyna,1:nza), &112 f_inv(nys:nyna,nxl:nxra,1:nza), & 122 113 f_out(1:nza,nys:nyna,nxl:nxra), & 123 work 1(1:nza,nys:nyna,nxl:nxra), work2(nnx*nny*nnz)114 work(nnx*nny*nnz) 124 115 125 116 #if defined( __parallel ) … … 135 126 xs = 0 + l * nnx 136 127 DO k = nzb_x, nzt_xa 137 DO j = nys_x, nyn_xa138 DO i = xs, xs + nnx - 1128 DO i = xs, xs + nnx - 1 129 DO j = nys_x, nyn_xa 139 130 m = m + 1 140 work 2(m) = f_in(i,j,k)131 work(m) = f_in(i,j,k) 141 132 ENDDO 142 133 ENDDO … … 147 138 !-- Transpose array 148 139 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) 149 CALL MPI_ALLTOALL( work 2(1),sendrecvcount_zx, MPI_REAL, &150 f_inv(n xl,nys,1), sendrecvcount_zx, MPI_REAL, &140 CALL MPI_ALLTOALL( work(1), sendrecvcount_zx, MPI_REAL, & 141 f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, & 151 142 comm1dx, ierr ) 152 143 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) … … 154 145 ! 155 146 !-- Reorder transposed array in a way that the z index is in first position 156 DO i = nxl, nxra157 DO j = nys, nyna158 DO k = 1, nza159 work1(k,j,i) = f_inv(i,j,k)147 DO k = 1, nza 148 DO i = nxl, nxra 149 DO j = nys, nyna 150 f_out(k,j,i) = f_inv(j,i,k) 160 151 ENDDO 161 152 ENDDO … … 167 158 DO j = nys, nyna 168 159 DO k = 1, nza 169 work1(k,j,i) = f_in(i,j,k) 170 ENDDO 171 ENDDO 172 ENDDO 160 f_inv(j,i,k) = f_in(i,j,k) 161 ENDDO 162 ENDDO 163 ENDDO 164 165 DO k = 1, nza 166 DO i = nxl, nxra 167 DO j = nys, nyna 168 f_out(k,j,i) = f_inv(j,i,k) 169 ENDDO 170 ENDDO 171 ENDDO 172 173 173 ENDIF 174 174 175 !176 !-- Move data to output array177 DO i = nxl, nxra178 DO j = nys, nyna179 DO k = 1, nza180 f_out(k,j,i) = work1(k,j,i)181 ENDDO182 ENDDO183 ENDDO184 175 185 176 #endif … … 188 179 189 180 190 SUBROUTINE transpose_yx( f_in, work 1, f_inv, work2, f_out )181 SUBROUTINE transpose_yx( f_in, work, f_out ) 191 182 192 183 !------------------------------------------------------------------------------! … … 211 202 f_inv(nys_x:nyn_xa,nzb_x:nzt_xa,0:nxa), & 212 203 f_out(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa), & 213 work 1(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa), work2(nnx*nny*nnz)204 work(nnx*nny*nnz) 214 205 215 206 #if defined( __parallel ) … … 224 215 DO j = ys, ys + nyn_xa - nys_x 225 216 m = m + 1 226 work 2(m) = f_in(j,i,k)217 work(m) = f_in(j,i,k) 227 218 ENDDO 228 219 ENDDO … … 233 224 !-- Transpose array 234 225 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) 235 CALL MPI_ALLTOALL( work 2(1),sendrecvcount_xy, MPI_REAL, &226 CALL MPI_ALLTOALL( work(1), sendrecvcount_xy, MPI_REAL, & 236 227 f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, & 237 228 comm1dy, ierr ) … … 243 234 DO k = nzb_x, nzt_xa 244 235 DO j = nys_x, nyn_xa 245 work1(i,j,k) = f_inv(j,k,i) 246 ENDDO 247 ENDDO 248 ENDDO 249 250 ! 251 !-- Move data to output array 252 DO k = nzb_x, nzt_xa 253 DO j = nys_x, nyn_xa 254 DO i = 0, nxa 255 f_out(i,j,k) = work1(i,j,k) 236 f_out(i,j,k) = f_inv(j,k,i) 256 237 ENDDO 257 238 ENDDO … … 263 244 264 245 265 SUBROUTINE transpose_yxd( f_in, work 1, f_inv, work2, f_out )246 SUBROUTINE transpose_yxd( f_in, work, f_out ) 266 247 267 248 !------------------------------------------------------------------------------! … … 287 268 REAL :: f_in(1:nza,nys:nyna,nxl:nxra), f_inv(nxl:nxra,1:nza,nys:nyna), & 288 269 f_out(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa), & 289 work 1(nxl:nxra,1:nza,nys:nyna), work2(nnx*nny*nnz)270 work(nnx*nny*nnz) 290 271 291 272 #if defined( __parallel ) … … 297 278 DO j = nys, nyna 298 279 DO i = nxl, nxra 299 work1(i,k,j) = f_in(k,j,i) 300 ENDDO 301 ENDDO 302 ENDDO 303 304 ! 305 !-- Move data to different array, because memory location of work1 is 306 !-- needed further below (work1 = work2) 307 DO j = nys, nyna 308 DO k = 1, nza 309 DO i = nxl, nxra 310 f_inv(i,k,j) = work1(i,k,j) 280 f_inv(i,k,j) = f_in(k,j,i) 311 281 ENDDO 312 282 ENDDO … … 317 287 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) 318 288 CALL MPI_ALLTOALL( f_inv(nxl,1,nys), sendrecvcount_xy, MPI_REAL, & 319 work 2(1),sendrecvcount_xy, MPI_REAL, &289 work(1), sendrecvcount_xy, MPI_REAL, & 320 290 comm1dx, ierr ) 321 291 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) … … 330 300 DO i = xs, xs + nnx - 1 331 301 m = m + 1 332 f_out(i,j,k) = work 2(m)302 f_out(i,j,k) = work(m) 333 303 ENDDO 334 304 ENDDO … … 341 311 342 312 343 SUBROUTINE transpose_yz( f_in, work 1, f_inv, work2, f_out )313 SUBROUTINE transpose_yz( f_in, work, f_out ) 344 314 345 315 !------------------------------------------------------------------------------! … … 364 334 f_inv(nxl_y:nxr_ya,nzb_y:nzt_ya,0:nya), & 365 335 f_out(nxl_z:nxr_za,nys_z:nyn_za,1:nza), & 366 work 1(nxl_y:nxr_ya,nzb_y:nzt_ya,0:nya), work2(nnx*nny*nnz)336 work(nnx*nny*nnz) 367 337 368 338 #if defined( __parallel ) … … 371 341 !-- Rearrange indices of input array in order to make data to be send 372 342 !-- by MPI contiguous 373 DO k = nzb_y, nzt_ya374 DO i = nxl_y, nxr_ya375 DO j = 0, nya376 work1(i,k,j) = f_in(j,i,k)343 DO j = 0, nya 344 DO k = nzb_y, nzt_ya 345 DO i = nxl_y, nxr_ya 346 f_inv(i,k,j) = f_in(j,i,k) 377 347 ENDDO 378 348 ENDDO … … 388 358 DO k = nzb_y, nzt_ya 389 359 DO i = nxl_y, nxr_ya 390 f_out(i,j,k) = work1(i,k,j)360 f_out(i,j,k) = f_inv(i,k,j) 391 361 ENDDO 392 362 ENDDO 393 363 ENDDO 394 364 RETURN 395 ELSE396 DO j = 0, nya397 DO k = nzb_y, nzt_ya398 DO i = nxl_y, nxr_ya399 f_inv(i,k,j) = work1(i,k,j)400 ENDDO401 ENDDO402 ENDDO403 365 ENDIF 404 366 … … 407 369 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) 408 370 CALL MPI_ALLTOALL( f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, & 409 work 2(1),sendrecvcount_yz, MPI_REAL, &371 work(1), sendrecvcount_yz, MPI_REAL, & 410 372 comm1dx, ierr ) 411 373 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) … … 420 382 DO i = nxl_z, nxr_za 421 383 m = m + 1 422 f_out(i,j,k) = work 2(m)384 f_out(i,j,k) = work(m) 423 385 ENDDO 424 386 ENDDO … … 431 393 432 394 433 SUBROUTINE transpose_zx( f_in, work 1, f_inv, work2, f_out )395 SUBROUTINE transpose_zx( f_in, work, f_out ) 434 396 435 397 !------------------------------------------------------------------------------! … … 451 413 INTEGER :: i, j, k, l, m, xs 452 414 453 REAL :: f_in(1:nza,nys:nyna,nxl:nxra), f_inv(n xl:nxra,nys:nyna,1:nza), &415 REAL :: f_in(1:nza,nys:nyna,nxl:nxra), f_inv(nys:nyna,nxl:nxra,1:nza), & 454 416 f_out(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa), & 455 work 1(nxl:nxra,nys:nyna,1:nza), work2(nnx*nny*nnz)417 work(nnx*nny*nnz) 456 418 457 419 #if defined( __parallel ) … … 460 422 !-- Rearrange indices of input array in order to make data to be send 461 423 !-- by MPI contiguous 462 DO i = nxl, nxra463 DO j = nys, nyna464 DO k = 1,nza465 work1(i,j,k) = f_in(k,j,i)424 DO k = 1,nza 425 DO i = nxl, nxra 426 DO j = nys, nyna 427 f_inv(j,i,k) = f_in(k,j,i) 466 428 ENDDO 467 429 ENDDO … … 475 437 IF ( pdims(1) == 1 ) THEN 476 438 DO k = 1, nza 477 DO j = nys, nyna478 DO i = nxl, nxra479 f_out(i,j,k) = work1(i,j,k)439 DO i = nxl, nxra 440 DO j = nys, nyna 441 f_out(i,j,k) = f_inv(j,i,k) 480 442 ENDDO 481 443 ENDDO 482 444 ENDDO 483 445 RETURN 484 ELSE485 DO k = 1, nza486 DO j = nys, nyna487 DO i = nxl, nxra488 f_inv(i,j,k) = work1(i,j,k)489 ENDDO490 ENDDO491 ENDDO492 446 ENDIF 493 447 … … 495 449 !-- Transpose array 496 450 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) 497 CALL MPI_ALLTOALL( f_inv(n xl,nys,1), sendrecvcount_zx, MPI_REAL, &498 work 2(1),sendrecvcount_zx, MPI_REAL, &451 CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, & 452 work(1), sendrecvcount_zx, MPI_REAL, & 499 453 comm1dx, ierr ) 500 454 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) … … 506 460 xs = 0 + l * nnx 507 461 DO k = nzb_x, nzt_xa 508 DO j = nys_x, nyn_xa509 DO i = xs, xs + nnx - 1462 DO i = xs, xs + nnx - 1 463 DO j = nys_x, nyn_xa 510 464 m = m + 1 511 f_out(i,j,k) = work 2(m)465 f_out(i,j,k) = work(m) 512 466 ENDDO 513 467 ENDDO … … 520 474 521 475 522 SUBROUTINE transpose_zy( f_in, work 1, f_inv, work2, f_out )476 SUBROUTINE transpose_zy( f_in, work, f_out ) 523 477 524 478 !------------------------------------------------------------------------------! … … 543 497 f_inv(nxl_y:nxr_ya,nzb_y:nzt_ya,0:nya), & 544 498 f_out(0:nya,nxl_y:nxr_ya,nzb_y:nzt_ya), & 545 work 1(0:nya,nxl_y:nxr_ya,nzb_y:nzt_ya), work2(nnx*nny*nnz)499 work(nnx*nny*nnz) 546 500 547 501 #if defined( __parallel ) … … 560 514 DO i = nxl_z, nxr_za 561 515 m = m + 1 562 work 2(m) = f_in(i,j,k)516 work(m) = f_in(i,j,k) 563 517 ENDDO 564 518 ENDDO … … 569 523 !-- Transpose array 570 524 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) 571 CALL MPI_ALLTOALL( work 2(1),sendrecvcount_yz, MPI_REAL, &525 CALL MPI_ALLTOALL( work(1), sendrecvcount_yz, MPI_REAL, & 572 526 f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, & 573 527 comm1dx, ierr ) … … 576 530 ! 577 531 !-- Reorder transposed array in a way that the y index is in first position 532 DO j = 0, nya 533 DO k = nzb_y, nzt_ya 534 DO i = nxl_y, nxr_ya 535 f_out(j,i,k) = f_inv(i,k,j) 536 ENDDO 537 ENDDO 538 ENDDO 539 ELSE 540 ! 541 !-- Reorder the array in a way that the y index is in first position 542 DO k = nzb_y, nzt_ya 543 DO j = 0, nya 544 DO i = nxl_y, nxr_ya 545 f_inv(i,k,j) = f_in(i,j,k) 546 ENDDO 547 ENDDO 548 ENDDO 549 ! 550 !-- Move data to output array 578 551 DO k = nzb_y, nzt_ya 579 552 DO i = nxl_y, nxr_ya 580 553 DO j = 0, nya 581 work1(j,i,k) = f_inv(i,k,j) 582 ENDDO 583 ENDDO 584 ENDDO 585 ELSE 586 ! 587 !-- Reorder the array in a way that the y index is in first position 588 DO k = nzb_y, nzt_ya 589 DO i = nxl_y, nxr_ya 590 DO j = 0, nya 591 work1(j,i,k) = f_in(i,j,k) 592 ENDDO 593 ENDDO 594 ENDDO 554 f_out(j,i,k) = f_inv(i,k,j) 555 ENDDO 556 ENDDO 557 ENDDO 558 595 559 ENDIF 596 560 597 !598 !-- Move data to output array599 DO k = nzb_y, nzt_ya600 DO i = nxl_y, nxr_ya601 DO j = 0, nya602 f_out(j,i,k) = work1(j,i,k)603 ENDDO604 ENDDO605 ENDDO606 607 561 #endif 608 562 … … 610 564 611 565 612 SUBROUTINE transpose_zyd( f_in, work 1, f_inv, work2, f_out )566 SUBROUTINE transpose_zyd( f_in, work, f_out ) 613 567 614 568 !------------------------------------------------------------------------------! … … 634 588 REAL :: f_in(1:nza,nys:nyna,nxl:nxra), f_inv(nys:nyna,nxl:nxra,1:nza), & 635 589 f_out(0:nya,nxl_yd:nxr_yda,nzb_yd:nzt_yda), & 636 work 1(nys:nyna,nxl:nxra,1:nza), work2(nnx*nny*nnz)590 work(nnx*nny*nnz) 637 591 638 592 #if defined( __parallel ) … … 644 598 DO j = nys, nyna 645 599 DO k = 1, nza 646 work1(j,i,k) = f_in(k,j,i)600 f_inv(j,i,k) = f_in(k,j,i) 647 601 ENDDO 648 602 ENDDO … … 658 612 DO i = nxl, nxra 659 613 DO j = nys, nyna 660 f_out(j,i,k) = work1(j,i,k)614 f_out(j,i,k) = f_inv(j,i,k) 661 615 ENDDO 662 616 ENDDO 663 617 ENDDO 664 618 RETURN 665 ELSE666 DO k = 1, nza667 DO i = nxl, nxra668 DO j = nys, nyna669 f_inv(j,i,k) = work1(j,i,k)670 ENDDO671 ENDDO672 ENDDO673 619 ENDIF 674 620 … … 677 623 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) 678 624 CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zyd, MPI_REAL, & 679 work 2(1),sendrecvcount_zyd, MPI_REAL, &625 work(1), sendrecvcount_zyd, MPI_REAL, & 680 626 comm1dy, ierr ) 681 627 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) … … 690 636 DO j = ys, ys + nny - 1 691 637 m = m + 1 692 f_out(j,i,k) = work 2(m)638 f_out(j,i,k) = work(m) 693 639 ENDDO 694 640 ENDDO
Note: See TracChangeset
for help on using the changeset viewer.