Changeset 1113 for palm/trunk/SOURCE/pres.f90
- Timestamp:
- Mar 10, 2013 2:48:14 AM (11 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
palm/trunk/SOURCE/pres.f90
r1112 r1113 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! GPU-porting of several loops, some loops rearranged 23 23 ! 24 24 ! Former revisions: … … 403 403 !-- Solver for 2d-decomposition 404 404 CALL poisfft( d, tend ) 405 !$acc update host( d ) 405 406 406 ELSEIF ( psolver == 'poisfft_hybrid' ) THEN 407 407 ! … … 410 410 !-- are some optimization problems in poisfft 411 411 CALL poisfft_hybrid( d ) 412 412 413 ENDIF 413 414 … … 416 417 !-- z-direction 417 418 !$OMP PARALLEL DO 419 !$acc kernels present( d, tend ) 420 !$acc loop 418 421 DO i = nxl, nxr 419 422 DO j = nys, nyn 423 !$acc loop vector( 32 ) 420 424 DO k = nzb+1, nzt 421 425 tend(k,j,i) = d(k,j,i) … … 423 427 ENDDO 424 428 ENDDO 429 !$acc end kernels 425 430 426 431 ! … … 432 437 !-- Neumann (dp/dz = 0) 433 438 !$OMP PARALLEL DO 439 !$acc kernels present( nzb_s_inner, tend ) 434 440 DO i = nxlg, nxrg 435 441 DO j = nysg, nyng … … 437 443 ENDDO 438 444 ENDDO 445 !$acc end kernels 439 446 440 447 ELSE … … 442 449 !-- Dirichlet 443 450 !$OMP PARALLEL DO 451 !$acc kernels present( tend ) 444 452 DO i = nxlg, nxrg 445 453 DO j = nysg, nyng … … 447 455 ENDDO 448 456 ENDDO 457 !$acc end kernels 449 458 450 459 ENDIF … … 456 465 !-- Neumann 457 466 !$OMP PARALLEL DO 467 !$acc kernels present( tend ) 458 468 DO i = nxlg, nxrg 459 469 DO j = nysg, nyng … … 461 471 ENDDO 462 472 ENDDO 473 !$acc end kernels 463 474 464 475 ELSE … … 466 477 !-- Dirichlet 467 478 !$OMP PARALLEL DO 479 !$acc kernels present( tend ) 468 480 DO i = nxlg, nxrg 469 481 DO j = nysg, nyng … … 471 483 ENDDO 472 484 ENDDO 485 !$acc end kernels 473 486 474 487 ENDIF … … 476 489 ! 477 490 !-- Exchange boundaries for p 491 IF ( numprocs == 1 ) THEN ! workaround for single-core GPU runs 492 on_device = .TRUE. ! to be removed after complete porting 493 ELSE ! of ghost point exchange 494 !$acc update host( tend ) 495 ENDIF 478 496 CALL exchange_horiz( tend, nbgp ) 497 IF ( numprocs == 1 ) THEN ! workaround for single-core GPU runs 498 on_device = .FALSE. ! to be removed after complete porting 499 ELSE ! of ghost point exchange 500 !$acc update device( tend ) 501 ENDIF 479 502 480 503 ELSEIF ( psolver == 'sor' ) THEN … … 531 554 !$OMP PARALLEL PRIVATE (i,j,k) 532 555 !$OMP DO 556 !$acc kernels present( p, tend, weight_substep ) 557 !$acc loop 533 558 DO i = nxl-1, nxr+1 534 559 DO j = nys-1, nyn+1 560 !$acc loop vector( 32 ) 535 561 DO k = nzb, nzt+1 536 562 p(k,j,i) = tend(k,j,i) * & … … 539 565 ENDDO 540 566 ENDDO 567 !$acc end kernels 541 568 !$OMP END PARALLEL 542 569 … … 544 571 !$OMP PARALLEL PRIVATE (i,j,k) 545 572 !$OMP DO 573 !$acc kernels present( p, tend, weight_substep ) 574 !$acc loop 546 575 DO i = nxl-1, nxr+1 547 576 DO j = nys-1, nyn+1 577 !$acc loop vector( 32 ) 548 578 DO k = nzb, nzt+1 549 579 p(k,j,i) = p(k,j,i) + tend(k,j,i) * & … … 552 582 ENDDO 553 583 ENDDO 584 !$acc end kernels 554 585 !$OMP END PARALLEL 555 586 … … 571 602 !$OMP PARALLEL PRIVATE (i,j,k) 572 603 !$OMP DO 604 !$acc kernels present( ddzu, nzb_u_inner, nzb_v_inner, nzb_w_inner, tend, u, v, w, weight_pres ) 605 !$acc loop 573 606 DO i = nxl, nxr 574 607 DO j = nys, nyn 575 DO k = nzb_w_inner(j,i)+1, nzt 576 w(k,j,i) = w(k,j,i) - dt_3d * & 577 ( tend(k+1,j,i) - tend(k,j,i) ) * ddzu(k+1) * & 578 weight_pres(intermediate_timestep_count) 579 ENDDO 580 DO k = nzb_u_inner(j,i)+1, nzt 581 u(k,j,i) = u(k,j,i) - dt_3d * & 582 ( tend(k,j,i) - tend(k,j,i-1) ) * ddx * & 583 weight_pres(intermediate_timestep_count) 584 ENDDO 585 DO k = nzb_v_inner(j,i)+1, nzt 586 v(k,j,i) = v(k,j,i) - dt_3d * & 587 ( tend(k,j,i) - tend(k,j-1,i) ) * ddy * & 588 weight_pres(intermediate_timestep_count) 608 !$acc loop vector( 32 ) 609 DO k = 1, nzt 610 IF ( k > nzb_w_inner(j,i) ) THEN 611 w(k,j,i) = w(k,j,i) - dt_3d * & 612 ( tend(k+1,j,i) - tend(k,j,i) ) * ddzu(k+1) * & 613 weight_pres(intermediate_timestep_count) 614 ENDIF 615 ENDDO 616 !$acc loop vector( 32 ) 617 DO k = 1, nzt 618 IF ( k > nzb_u_inner(j,i) ) THEN 619 u(k,j,i) = u(k,j,i) - dt_3d * & 620 ( tend(k,j,i) - tend(k,j,i-1) ) * ddx * & 621 weight_pres(intermediate_timestep_count) 622 ENDIF 623 ENDDO 624 !$acc loop vector( 32 ) 625 DO k = 1, nzt 626 IF ( k > nzb_v_inner(j,i) ) THEN 627 v(k,j,i) = v(k,j,i) - dt_3d * & 628 ( tend(k,j,i) - tend(k,j-1,i) ) * ddy * & 629 weight_pres(intermediate_timestep_count) 630 ENDIF 589 631 ENDDO 590 !591 !-- Sum up the volume flow through the right and north boundary592 IF ( conserve_volume_flow .AND. bc_lr_cyc .AND. bc_ns_cyc .AND. &593 i == nx ) THEN594 !$OMP CRITICAL595 DO k = nzb_2d(j,i) + 1, nzt596 volume_flow_l(1) = volume_flow_l(1) + u(k,j,i) * dzw(k)597 ENDDO598 !$OMP END CRITICAL599 ENDIF600 IF ( conserve_volume_flow .AND. bc_ns_cyc .AND. bc_lr_cyc .AND. &601 j == ny ) THEN602 !$OMP CRITICAL603 DO k = nzb_2d(j,i) + 1, nzt604 volume_flow_l(2) = volume_flow_l(2) + v(k,j,i) * dzw(k)605 ENDDO606 !$OMP END CRITICAL607 ENDIF608 632 609 633 ENDDO 610 634 ENDDO 635 !$acc end kernels 611 636 !$OMP END PARALLEL 637 638 ! 639 !-- Sum up the volume flow through the right and north boundary 640 IF ( conserve_volume_flow .AND. bc_lr_cyc .AND. bc_ns_cyc .AND. & 641 nxr == nx ) THEN 642 643 !$OMP PARALLEL PRIVATE (j,k) 644 !$OMP DO 645 DO j = nys, nyn 646 !$OMP CRITICAL 647 DO k = nzb_2d(j,nx) + 1, nzt 648 volume_flow_l(1) = volume_flow_l(1) + u(k,j,nx) * dzw(k) 649 ENDDO 650 !$OMP END CRITICAL 651 ENDDO 652 !$OMP END PARALLEL 653 654 ENDIF 655 656 IF ( conserve_volume_flow .AND. bc_ns_cyc .AND. bc_lr_cyc .AND. & 657 nyn == ny ) THEN 658 659 !$OMP PARALLEL PRIVATE (i,k) 660 !$OMP DO 661 DO i = nxl, nxr 662 !$OMP CRITICAL 663 DO k = nzb_2d(ny,i) + 1, nzt 664 volume_flow_l(2) = volume_flow_l(2) + v(k,ny,i) * dzw(k) 665 ENDDO 666 !$OMP END CRITICAL 667 ENDDO 668 !$OMP END PARALLEL 669 670 ENDIF 612 671 613 672 ! … … 645 704 ! 646 705 !-- Exchange of boundaries for the velocities 706 IF ( numprocs == 1 ) THEN ! workaround for single-core GPU runs 707 on_device = .TRUE. ! to be removed after complete porting 708 ELSE ! of ghost point exchange 709 !$acc update host( u, v, w ) 710 ENDIF 647 711 CALL exchange_horiz( u, nbgp ) 648 712 CALL exchange_horiz( v, nbgp ) 649 713 CALL exchange_horiz( w, nbgp ) 714 IF ( numprocs == 1 ) THEN ! workaround for single-core GPU runs 715 on_device = .FALSE. ! to be removed after complete porting 716 ELSE ! of ghost point exchange 717 !$acc update device( u, v, w ) 718 ENDIF 650 719 651 720 ! … … 679 748 ENDDO 680 749 #else 750 !$acc kernels present( d, ddzw, nzb_s_inner, u, v, w ) 751 !$acc loop 681 752 DO i = nxl, nxr 682 753 DO j = nys, nyn 754 !$acc loop vector( 32 ) 755 DO k = 1, nzt 756 IF ( k > nzb_s_inner(j,i) ) THEN 757 d(k,j,i) = ( u(k,j,i+1) - u(k,j,i) ) * ddx + & 758 ( v(k,j+1,i) - v(k,j,i) ) * ddy + & 759 ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) 760 ENDIF 761 ENDDO 762 ENDDO 763 ENDDO 764 !$acc end kernels 765 ! 766 !-- Compute possible PE-sum of divergences for flow_statistics 767 !$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum) 768 !$OMP DO SCHEDULE( STATIC ) 769 DO i = nxl, nxr 770 DO j = nys, nyn 683 771 DO k = nzb_s_inner(j,i)+1, nzt 684 d(k,j,i) = ( u(k,j,i+1) - u(k,j,i) ) * ddx + &685 ( v(k,j+1,i) - v(k,j,i) ) * ddy + &686 ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k)687 772 threadsum = threadsum + ABS( d(k,j,i) ) 688 773 ENDDO … … 701 786 CALL cpu_log( log_point_s(1), 'divergence', 'stop' ) 702 787 703 !$acc update device( u, v, w )704 705 788 CALL cpu_log( log_point(8), 'pres', 'stop' ) 706 707 789 708 790
Note: See TracChangeset
for help on using the changeset viewer.