Changeset 164 for palm/trunk/SOURCE/transpose.f90
 Timestamp:
 May 15, 2008 8:46:15 AM (13 years ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

palm/trunk/SOURCE/transpose.f90
r4 r164 1 SUBROUTINE transpose_xy( f_in, work 1, f_inv, work2, f_out )1 SUBROUTINE transpose_xy( f_in, work, f_out ) 2 2 3 3 !! 4 4 ! Actual revisions: 5 5 !  6 ! 6 ! f_inv changed from subroutine argument to automatic array in order to do 7 ! reordering from f_in to f_inv in one step, one array work is needed instead 8 ! of work1 and work2 7 9 ! 8 10 ! Former revisions: … … 44 46 f_inv(nys_x:nyn_xa,nzb_x:nzt_xa,0:nxa), & 45 47 f_out(0:nya,nxl_y:nxr_ya,nzb_y:nzt_ya), & 46 work 1(nys_x:nyn_xa,nzb_x:nzt_xa,0:nxa), work2(nnx*nny*nnz)48 work(nnx*nny*nnz) 47 49 48 50 #if defined( __parallel ) … … 51 53 ! Rearrange indices of input array in order to make data to be send 52 54 ! by MPI contiguous 53 DO k = nzb_x, nzt_xa54 DO j = nys_x, nyn_xa55 DO i = 0, nxa56 work1(j,k,i) = f_in(i,j,k)57 ENDDO58 ENDDO59 ENDDO60 61 !62 ! Move data to different array, because memory location of work1 is63 ! needed further below (work1 = work2)64 55 DO i = 0, nxa 65 56 DO k = nzb_x, nzt_xa 66 57 DO j = nys_x, nyn_xa 67 f_inv(j,k,i) = work1(j,k,i)58 f_inv(j,k,i) = f_in(i,j,k) 68 59 ENDDO 69 60 ENDDO … … 74 65 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) 75 66 CALL MPI_ALLTOALL( f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, & 76 work 2(1),sendrecvcount_xy, MPI_REAL, &67 work(1), sendrecvcount_xy, MPI_REAL, & 77 68 comm1dy, ierr ) 78 69 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) … … 87 78 DO j = ys, ys + nyn_xa  nys_x 88 79 m = m + 1 89 f_out(j,i,k) = work 2(m)80 f_out(j,i,k) = work(m) 90 81 ENDDO 91 82 ENDDO … … 98 89 99 90 100 SUBROUTINE transpose_xz( f_in, work 1, f_inv, work2, f_out )91 SUBROUTINE transpose_xz( f_in, work, f_out ) 101 92 102 93 !! … … 119 110 120 111 REAL :: f_in(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa), & 121 f_inv(n xl:nxra,nys:nyna,1:nza), &112 f_inv(nys:nyna,nxl:nxra,1:nza), & 122 113 f_out(1:nza,nys:nyna,nxl:nxra), & 123 work 1(1:nza,nys:nyna,nxl:nxra), work2(nnx*nny*nnz)114 work(nnx*nny*nnz) 124 115 125 116 #if defined( __parallel ) … … 135 126 xs = 0 + l * nnx 136 127 DO k = nzb_x, nzt_xa 137 DO j = nys_x, nyn_xa138 DO i = xs, xs + nnx  1128 DO i = xs, xs + nnx  1 129 DO j = nys_x, nyn_xa 139 130 m = m + 1 140 work 2(m) = f_in(i,j,k)131 work(m) = f_in(i,j,k) 141 132 ENDDO 142 133 ENDDO … … 147 138 ! Transpose array 148 139 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) 149 CALL MPI_ALLTOALL( work 2(1),sendrecvcount_zx, MPI_REAL, &150 f_inv(n xl,nys,1), sendrecvcount_zx, MPI_REAL, &140 CALL MPI_ALLTOALL( work(1), sendrecvcount_zx, MPI_REAL, & 141 f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, & 151 142 comm1dx, ierr ) 152 143 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) … … 154 145 ! 155 146 ! Reorder transposed array in a way that the z index is in first position 156 DO i = nxl, nxra157 DO j = nys, nyna158 DO k = 1, nza159 work1(k,j,i) = f_inv(i,j,k)147 DO k = 1, nza 148 DO i = nxl, nxra 149 DO j = nys, nyna 150 f_out(k,j,i) = f_inv(j,i,k) 160 151 ENDDO 161 152 ENDDO … … 167 158 DO j = nys, nyna 168 159 DO k = 1, nza 169 work1(k,j,i) = f_in(i,j,k) 170 ENDDO 171 ENDDO 172 ENDDO 160 f_inv(j,i,k) = f_in(i,j,k) 161 ENDDO 162 ENDDO 163 ENDDO 164 165 DO k = 1, nza 166 DO i = nxl, nxra 167 DO j = nys, nyna 168 f_out(k,j,i) = f_inv(j,i,k) 169 ENDDO 170 ENDDO 171 ENDDO 172 173 173 ENDIF 174 174 175 !176 ! Move data to output array177 DO i = nxl, nxra178 DO j = nys, nyna179 DO k = 1, nza180 f_out(k,j,i) = work1(k,j,i)181 ENDDO182 ENDDO183 ENDDO184 175 185 176 #endif … … 188 179 189 180 190 SUBROUTINE transpose_yx( f_in, work 1, f_inv, work2, f_out )181 SUBROUTINE transpose_yx( f_in, work, f_out ) 191 182 192 183 !! … … 211 202 f_inv(nys_x:nyn_xa,nzb_x:nzt_xa,0:nxa), & 212 203 f_out(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa), & 213 work 1(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa), work2(nnx*nny*nnz)204 work(nnx*nny*nnz) 214 205 215 206 #if defined( __parallel ) … … 224 215 DO j = ys, ys + nyn_xa  nys_x 225 216 m = m + 1 226 work 2(m) = f_in(j,i,k)217 work(m) = f_in(j,i,k) 227 218 ENDDO 228 219 ENDDO … … 233 224 ! Transpose array 234 225 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) 235 CALL MPI_ALLTOALL( work 2(1),sendrecvcount_xy, MPI_REAL, &226 CALL MPI_ALLTOALL( work(1), sendrecvcount_xy, MPI_REAL, & 236 227 f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, & 237 228 comm1dy, ierr ) … … 243 234 DO k = nzb_x, nzt_xa 244 235 DO j = nys_x, nyn_xa 245 work1(i,j,k) = f_inv(j,k,i) 246 ENDDO 247 ENDDO 248 ENDDO 249 250 ! 251 ! Move data to output array 252 DO k = nzb_x, nzt_xa 253 DO j = nys_x, nyn_xa 254 DO i = 0, nxa 255 f_out(i,j,k) = work1(i,j,k) 236 f_out(i,j,k) = f_inv(j,k,i) 256 237 ENDDO 257 238 ENDDO … … 263 244 264 245 265 SUBROUTINE transpose_yxd( f_in, work 1, f_inv, work2, f_out )246 SUBROUTINE transpose_yxd( f_in, work, f_out ) 266 247 267 248 !! … … 287 268 REAL :: f_in(1:nza,nys:nyna,nxl:nxra), f_inv(nxl:nxra,1:nza,nys:nyna), & 288 269 f_out(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa), & 289 work 1(nxl:nxra,1:nza,nys:nyna), work2(nnx*nny*nnz)270 work(nnx*nny*nnz) 290 271 291 272 #if defined( __parallel ) … … 297 278 DO j = nys, nyna 298 279 DO i = nxl, nxra 299 work1(i,k,j) = f_in(k,j,i) 300 ENDDO 301 ENDDO 302 ENDDO 303 304 ! 305 ! Move data to different array, because memory location of work1 is 306 ! needed further below (work1 = work2) 307 DO j = nys, nyna 308 DO k = 1, nza 309 DO i = nxl, nxra 310 f_inv(i,k,j) = work1(i,k,j) 280 f_inv(i,k,j) = f_in(k,j,i) 311 281 ENDDO 312 282 ENDDO … … 317 287 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) 318 288 CALL MPI_ALLTOALL( f_inv(nxl,1,nys), sendrecvcount_xy, MPI_REAL, & 319 work 2(1),sendrecvcount_xy, MPI_REAL, &289 work(1), sendrecvcount_xy, MPI_REAL, & 320 290 comm1dx, ierr ) 321 291 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) … … 330 300 DO i = xs, xs + nnx  1 331 301 m = m + 1 332 f_out(i,j,k) = work 2(m)302 f_out(i,j,k) = work(m) 333 303 ENDDO 334 304 ENDDO … … 341 311 342 312 343 SUBROUTINE transpose_yz( f_in, work 1, f_inv, work2, f_out )313 SUBROUTINE transpose_yz( f_in, work, f_out ) 344 314 345 315 !! … … 364 334 f_inv(nxl_y:nxr_ya,nzb_y:nzt_ya,0:nya), & 365 335 f_out(nxl_z:nxr_za,nys_z:nyn_za,1:nza), & 366 work 1(nxl_y:nxr_ya,nzb_y:nzt_ya,0:nya), work2(nnx*nny*nnz)336 work(nnx*nny*nnz) 367 337 368 338 #if defined( __parallel ) … … 371 341 ! Rearrange indices of input array in order to make data to be send 372 342 ! by MPI contiguous 373 DO k = nzb_y, nzt_ya374 DO i = nxl_y, nxr_ya375 DO j = 0, nya376 work1(i,k,j) = f_in(j,i,k)343 DO j = 0, nya 344 DO k = nzb_y, nzt_ya 345 DO i = nxl_y, nxr_ya 346 f_inv(i,k,j) = f_in(j,i,k) 377 347 ENDDO 378 348 ENDDO … … 388 358 DO k = nzb_y, nzt_ya 389 359 DO i = nxl_y, nxr_ya 390 f_out(i,j,k) = work1(i,k,j)360 f_out(i,j,k) = f_inv(i,k,j) 391 361 ENDDO 392 362 ENDDO 393 363 ENDDO 394 364 RETURN 395 ELSE396 DO j = 0, nya397 DO k = nzb_y, nzt_ya398 DO i = nxl_y, nxr_ya399 f_inv(i,k,j) = work1(i,k,j)400 ENDDO401 ENDDO402 ENDDO403 365 ENDIF 404 366 … … 407 369 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) 408 370 CALL MPI_ALLTOALL( f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, & 409 work 2(1),sendrecvcount_yz, MPI_REAL, &371 work(1), sendrecvcount_yz, MPI_REAL, & 410 372 comm1dx, ierr ) 411 373 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) … … 420 382 DO i = nxl_z, nxr_za 421 383 m = m + 1 422 f_out(i,j,k) = work 2(m)384 f_out(i,j,k) = work(m) 423 385 ENDDO 424 386 ENDDO … … 431 393 432 394 433 SUBROUTINE transpose_zx( f_in, work 1, f_inv, work2, f_out )395 SUBROUTINE transpose_zx( f_in, work, f_out ) 434 396 435 397 !! … … 451 413 INTEGER :: i, j, k, l, m, xs 452 414 453 REAL :: f_in(1:nza,nys:nyna,nxl:nxra), f_inv(n xl:nxra,nys:nyna,1:nza), &415 REAL :: f_in(1:nza,nys:nyna,nxl:nxra), f_inv(nys:nyna,nxl:nxra,1:nza), & 454 416 f_out(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa), & 455 work 1(nxl:nxra,nys:nyna,1:nza), work2(nnx*nny*nnz)417 work(nnx*nny*nnz) 456 418 457 419 #if defined( __parallel ) … … 460 422 ! Rearrange indices of input array in order to make data to be send 461 423 ! by MPI contiguous 462 DO i = nxl, nxra463 DO j = nys, nyna464 DO k = 1,nza465 work1(i,j,k) = f_in(k,j,i)424 DO k = 1,nza 425 DO i = nxl, nxra 426 DO j = nys, nyna 427 f_inv(j,i,k) = f_in(k,j,i) 466 428 ENDDO 467 429 ENDDO … … 475 437 IF ( pdims(1) == 1 ) THEN 476 438 DO k = 1, nza 477 DO j = nys, nyna478 DO i = nxl, nxra479 f_out(i,j,k) = work1(i,j,k)439 DO i = nxl, nxra 440 DO j = nys, nyna 441 f_out(i,j,k) = f_inv(j,i,k) 480 442 ENDDO 481 443 ENDDO 482 444 ENDDO 483 445 RETURN 484 ELSE485 DO k = 1, nza486 DO j = nys, nyna487 DO i = nxl, nxra488 f_inv(i,j,k) = work1(i,j,k)489 ENDDO490 ENDDO491 ENDDO492 446 ENDIF 493 447 … … 495 449 ! Transpose array 496 450 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) 497 CALL MPI_ALLTOALL( f_inv(n xl,nys,1), sendrecvcount_zx, MPI_REAL, &498 work 2(1),sendrecvcount_zx, MPI_REAL, &451 CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, & 452 work(1), sendrecvcount_zx, MPI_REAL, & 499 453 comm1dx, ierr ) 500 454 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) … … 506 460 xs = 0 + l * nnx 507 461 DO k = nzb_x, nzt_xa 508 DO j = nys_x, nyn_xa509 DO i = xs, xs + nnx  1462 DO i = xs, xs + nnx  1 463 DO j = nys_x, nyn_xa 510 464 m = m + 1 511 f_out(i,j,k) = work 2(m)465 f_out(i,j,k) = work(m) 512 466 ENDDO 513 467 ENDDO … … 520 474 521 475 522 SUBROUTINE transpose_zy( f_in, work 1, f_inv, work2, f_out )476 SUBROUTINE transpose_zy( f_in, work, f_out ) 523 477 524 478 !! … … 543 497 f_inv(nxl_y:nxr_ya,nzb_y:nzt_ya,0:nya), & 544 498 f_out(0:nya,nxl_y:nxr_ya,nzb_y:nzt_ya), & 545 work 1(0:nya,nxl_y:nxr_ya,nzb_y:nzt_ya), work2(nnx*nny*nnz)499 work(nnx*nny*nnz) 546 500 547 501 #if defined( __parallel ) … … 560 514 DO i = nxl_z, nxr_za 561 515 m = m + 1 562 work 2(m) = f_in(i,j,k)516 work(m) = f_in(i,j,k) 563 517 ENDDO 564 518 ENDDO … … 569 523 ! Transpose array 570 524 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) 571 CALL MPI_ALLTOALL( work 2(1),sendrecvcount_yz, MPI_REAL, &525 CALL MPI_ALLTOALL( work(1), sendrecvcount_yz, MPI_REAL, & 572 526 f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, & 573 527 comm1dx, ierr ) … … 576 530 ! 577 531 ! Reorder transposed array in a way that the y index is in first position 532 DO j = 0, nya 533 DO k = nzb_y, nzt_ya 534 DO i = nxl_y, nxr_ya 535 f_out(j,i,k) = f_inv(i,k,j) 536 ENDDO 537 ENDDO 538 ENDDO 539 ELSE 540 ! 541 ! Reorder the array in a way that the y index is in first position 542 DO k = nzb_y, nzt_ya 543 DO j = 0, nya 544 DO i = nxl_y, nxr_ya 545 f_inv(i,k,j) = f_in(i,j,k) 546 ENDDO 547 ENDDO 548 ENDDO 549 ! 550 ! Move data to output array 578 551 DO k = nzb_y, nzt_ya 579 552 DO i = nxl_y, nxr_ya 580 553 DO j = 0, nya 581 work1(j,i,k) = f_inv(i,k,j) 582 ENDDO 583 ENDDO 584 ENDDO 585 ELSE 586 ! 587 ! Reorder the array in a way that the y index is in first position 588 DO k = nzb_y, nzt_ya 589 DO i = nxl_y, nxr_ya 590 DO j = 0, nya 591 work1(j,i,k) = f_in(i,j,k) 592 ENDDO 593 ENDDO 594 ENDDO 554 f_out(j,i,k) = f_inv(i,k,j) 555 ENDDO 556 ENDDO 557 ENDDO 558 595 559 ENDIF 596 560 597 !598 ! Move data to output array599 DO k = nzb_y, nzt_ya600 DO i = nxl_y, nxr_ya601 DO j = 0, nya602 f_out(j,i,k) = work1(j,i,k)603 ENDDO604 ENDDO605 ENDDO606 607 561 #endif 608 562 … … 610 564 611 565 612 SUBROUTINE transpose_zyd( f_in, work 1, f_inv, work2, f_out )566 SUBROUTINE transpose_zyd( f_in, work, f_out ) 613 567 614 568 !! … … 634 588 REAL :: f_in(1:nza,nys:nyna,nxl:nxra), f_inv(nys:nyna,nxl:nxra,1:nza), & 635 589 f_out(0:nya,nxl_yd:nxr_yda,nzb_yd:nzt_yda), & 636 work 1(nys:nyna,nxl:nxra,1:nza), work2(nnx*nny*nnz)590 work(nnx*nny*nnz) 637 591 638 592 #if defined( __parallel ) … … 644 598 DO j = nys, nyna 645 599 DO k = 1, nza 646 work1(j,i,k) = f_in(k,j,i)600 f_inv(j,i,k) = f_in(k,j,i) 647 601 ENDDO 648 602 ENDDO … … 658 612 DO i = nxl, nxra 659 613 DO j = nys, nyna 660 f_out(j,i,k) = work1(j,i,k)614 f_out(j,i,k) = f_inv(j,i,k) 661 615 ENDDO 662 616 ENDDO 663 617 ENDDO 664 618 RETURN 665 ELSE666 DO k = 1, nza667 DO i = nxl, nxra668 DO j = nys, nyna669 f_inv(j,i,k) = work1(j,i,k)670 ENDDO671 ENDDO672 ENDDO673 619 ENDIF 674 620 … … 677 623 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) 678 624 CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zyd, MPI_REAL, & 679 work 2(1),sendrecvcount_zyd, MPI_REAL, &625 work(1), sendrecvcount_zyd, MPI_REAL, & 680 626 comm1dy, ierr ) 681 627 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) … … 690 636 DO j = ys, ys + nny  1 691 637 m = m + 1 692 f_out(j,i,k) = work 2(m)638 f_out(j,i,k) = work(m) 693 639 ENDDO 694 640 ENDDO
Note: See TracChangeset
for help on using the changeset viewer.