Changeset 4717 for palm/trunk/SOURCE/pres.f90
- Timestamp:
- Sep 30, 2020 10:27:40 PM (4 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
palm/trunk/SOURCE/pres.f90
r4651 r4717 25 25 ! ----------------- 26 26 ! $Id$ 27 ! Fixes and optimizations of OpenMP parallelization, formatting of OpenMP 28 ! directives (J. Resler) 29 ! 30 ! 4651 2020-08-27 07:17:45Z raasch 27 31 ! preprocessor branch for ibm removed 28 32 ! 29 33 ! 4649 2020-08-25 12:11:17Z raasch 30 34 ! File re-formatted to follow the PALM coding standard 31 !32 35 ! 33 36 ! 4457 2020-03-11 14:20:43Z raasch … … 171 174 REAL(wp) :: ddt_3d !< 172 175 REAL(wp) :: d_weight_pres !< 173 REAL(wp) :: localsum !<174 176 REAL(wp) :: threadsum !< 175 177 REAL(wp) :: weight_pres_l !< … … 363 365 364 366 IF ( psolver(1:9) == 'multigrid' ) THEN 365 !$OMP PARALLEL DO SCHEDULE( STATIC ) PRIVATE (i,j,k)367 !$OMP PARALLEL DO PRIVATE (i,j,k) SCHEDULE( STATIC ) 366 368 DO i = nxl-1, nxr+1 367 369 DO j = nys-1, nyn+1 … … 372 374 ENDDO 373 375 ELSE 374 !$OMP PARALLEL DO SCHEDULE( STATIC ) PRIVATE (i,j,k)376 !$OMP PARALLEL DO PRIVATE (i,j,k) SCHEDULE( STATIC ) 375 377 !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i, j, k) & 376 378 !$ACC PRESENT(d) … … 384 386 ENDIF 385 387 386 localsum = 0.0_wp 387 threadsum = 0.0_wp 388 389 !$OMP PARALLEL PRIVATE (i,j,k) 390 !$OMP DO SCHEDULE( STATIC ) 388 !$OMP PARALLEL DO PRIVATE (i,j,k) SCHEDULE( STATIC ) 391 389 !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i, j, k) & 392 390 !$ACC PRESENT(u, v, w, rho_air, rho_air_zw, ddzw, wall_flags_total_0) & … … 403 401 ENDDO 404 402 ENDDO 405 !$OMP END PARALLEL406 403 407 404 ! … … 410 407 IF ( intermediate_timestep_count == intermediate_timestep_count_max .OR. & 411 408 intermediate_timestep_count == 0 ) THEN 412 !$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum)413 !$OMP DOSCHEDULE( STATIC )409 threadsum = 0.0_wp 410 !$OMP PARALLEL DO PRIVATE (i,j,k) REDUCTION(+:threadsum) SCHEDULE( STATIC ) 414 411 !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) & 415 412 !$ACC REDUCTION(+:threadsum) COPY(threadsum) & … … 422 419 ENDDO 423 420 ENDDO 424 localsum = localsum + threadsum * dt_3d * weight_pres_l 425 !$OMP END PARALLEL 421 threadsum = threadsum + threadsum * dt_3d * weight_pres_l 426 422 ENDIF 427 423 … … 430 426 IF ( intermediate_timestep_count == intermediate_timestep_count_max .OR. & 431 427 intermediate_timestep_count == 0 ) THEN 432 sums_divold_l(0:statistic_regions) = localsum428 sums_divold_l(0:statistic_regions) = threadsum 433 429 ENDIF 434 430 … … 445 441 ! 446 442 !-- Store computed perturbation pressure and set boundary condition in z-direction 447 !$OMP PARALLEL DO PRIVATE (i,j,k) 443 !$OMP PARALLEL DO PRIVATE (i,j,k) SCHEDULE( STATIC ) 448 444 !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i, j, k) & 449 445 !$ACC PRESENT(d, tend) … … 465 461 !-- natural and urban surfaces 466 462 !-- Upward facing 467 !$OMP PARALLEL DO PRIVATE( i, j, k)463 !$OMP PARALLEL DO PRIVATE( m, i, j, k ) SCHEDULE( STATIC ) 468 464 !$ACC PARALLEL LOOP PRIVATE(i, j, k) & 469 465 !$ACC PRESENT(bc_h, tend) … … 476 472 ! 477 473 !-- Downward facing 478 !$OMP PARALLEL DO PRIVATE( i, j, k)474 !$OMP PARALLEL DO PRIVATE( m, i, j, k ) SCHEDULE( STATIC ) 479 475 !$ACC PARALLEL LOOP PRIVATE(i, j, k) & 480 476 !$ACC PRESENT(bc_h, tend) … … 491 487 !-- urban surfaces 492 488 !-- Upward facing 493 !$OMP PARALLEL DO PRIVATE( i, j, k)489 !$OMP PARALLEL DO PRIVATE( m, i, j, k ) SCHEDULE( STATIC ) 494 490 DO m = 1, bc_h(0)%ns 495 491 i = bc_h(0)%i(m) … … 500 496 ! 501 497 !-- Downward facing 502 !$OMP PARALLEL DO PRIVATE( i, j, k)498 !$OMP PARALLEL DO PRIVATE( m, i, j, k ) SCHEDULE( STATIC ) 503 499 DO m = 1, bc_h(1)%ns 504 500 i = bc_h(1)%i(m) … … 515 511 ! 516 512 !-- Neumann 517 !$OMP PARALLEL DO PRIVATE (i,j ,k)513 !$OMP PARALLEL DO PRIVATE (i,j) SCHEDULE( STATIC ) 518 514 DO i = nxlg, nxrg 519 515 DO j = nysg, nyng … … 525 521 ! 526 522 !-- Dirichlet 527 !$OMP PARALLEL DO PRIVATE (i,j ,k)523 !$OMP PARALLEL DO PRIVATE (i,j) SCHEDULE( STATIC ) 528 524 !$ACC PARALLEL LOOP COLLAPSE(2) PRIVATE(i, j) & 529 525 !$ACC PRESENT(tend) … … 590 586 !-- Ghost layers are added in the output routines (except sor-method: see below) 591 587 IF ( intermediate_timestep_count <= 1 ) THEN 592 !$OMP PARALLEL PRIVATE (i,j,k) 593 !$OMP DO 588 !$OMP PARALLEL DO PRIVATE (i,j,k) SCHEDULE( STATIC ) 594 589 !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i, j, k) & 595 590 !$ACC PRESENT(p, tend) … … 601 596 ENDDO 602 597 ENDDO 603 !$OMP END PARALLEL604 598 605 599 ELSEIF ( intermediate_timestep_count > 1 ) THEN 606 !$OMP PARALLEL PRIVATE (i,j,k) 607 !$OMP DO 600 !$OMP PARALLEL DO PRIVATE (i,j,k) SCHEDULE( STATIC ) 608 601 !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i, j, k) & 609 602 !$ACC PRESENT(p, tend) … … 615 608 ENDDO 616 609 ENDDO 617 !$OMP END PARALLEL618 610 619 611 ENDIF … … 635 627 !-- the velocities, zero-gradient conditions for the pressure are set, so that no modification is 636 628 !-- imposed at the boundaries. 637 !$OMP PARALLEL PRIVATE (i,j,k) 638 !$OMP DO 629 !$OMP PARALLEL DO PRIVATE (i,j,k) SCHEDULE( STATIC ) 639 630 !$ACC PARALLEL LOOP COLLAPSE(2) PRIVATE(i, j, k) & 640 631 !$ACC PRESENT(u, v, w, tend, ddzu, wall_flags_total_0) … … 660 651 ENDDO 661 652 ENDDO 662 !$OMP END PARALLEL663 653 664 654 ! … … 675 665 IF ( conserve_volume_flow .AND. bc_lr_cyc .AND. bc_ns_cyc .AND. nxr == nx ) THEN 676 666 677 !$OMP PARALLEL PRIVATE (j,k) 678 !$OMP DO 667 !$OMP PARALLEL DO PRIVATE (j,k) REDUCTION (volume_flow_l(1)) 679 668 DO j = nys, nyn 680 !$OMP CRITICAL681 669 DO k = nzb+1, nzt 682 670 volume_flow_l(1) = volume_flow_l(1) + u(k,j,nxr) * dzw(k) & 683 671 * MERGE( 1.0_wp, 0.0_wp, BTEST( wall_flags_total_0(k,j,nxr), 1 ) ) 684 672 ENDDO 685 !$OMP END CRITICAL 686 ENDDO 687 !$OMP END PARALLEL 673 ENDDO 688 674 689 675 ENDIF … … 691 677 IF ( conserve_volume_flow .AND. bc_ns_cyc .AND. bc_lr_cyc .AND. nyn == ny ) THEN 692 678 693 !$OMP PARALLEL PRIVATE (i,k) 694 !$OMP DO 695 DO i = nxl, nxr 696 !$OMP CRITICAL 679 !$OMP PARALLEL DO PRIVATE (i,k) REDUCTION (volume_flow_l(2)) 680 DO i = nxl, nxr 697 681 DO k = nzb+1, nzt 698 682 volume_flow_l(2) = volume_flow_l(2) + v(k,nyn,i) * dzw(k) & 699 683 * MERGE( 1.0_wp, 0.0_wp, BTEST( wall_flags_total_0(k,nyn,i), 2 ) ) 700 684 ENDDO 701 !$OMP END CRITICAL 702 ENDDO 703 !$OMP END PARALLEL 685 ENDDO 704 686 705 687 ENDIF … … 719 701 / volume_flow_area(1:2) 720 702 721 !$OMP PARALLEL PRIVATE (i,j,k) 722 !$OMP DO 703 !$OMP PARALLEL DO PRIVATE (i,j,k) SCHEDULE( STATIC ) 723 704 DO i = nxl, nxr 724 705 DO j = nys, nyn … … 733 714 ENDDO 734 715 ENDDO 735 736 !$OMP END PARALLEL737 716 738 717 ENDIF … … 757 736 IF ( topography /= 'flat' ) d = 0.0_wp 758 737 759 localsum = 0.0_wp 760 threadsum = 0.0_wp 761 762 !$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum) 763 !$OMP DO SCHEDULE( STATIC ) 738 !$OMP PARALLEL DO PRIVATE (i,j,k) SCHEDULE( STATIC ) 764 739 !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i, j, k) & 765 740 !$ACC PRESENT(u, v, w, rho_air, rho_air_zw, ddzw, wall_flags_total_0) & … … 778 753 ! 779 754 !-- Compute possible PE-sum of divergences for flow_statistics 780 !$OMP DO SCHEDULE( STATIC ) 755 threadsum = 0.0_wp 756 !$OMP PARALLEL DO PRIVATE (i,j,k) REDUCTION(+:threadsum) SCHEDULE( STATIC ) 781 757 !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i, j, k) & 782 758 !$ACC REDUCTION(+:threadsum) COPY(threadsum) & … … 790 766 ENDDO 791 767 792 localsum = localsum + threadsum793 !$OMP END PARALLEL794 795 768 ! 796 769 !-- For completeness, set the divergence sum of all statistic regions to those of the total 797 770 !-- domain 798 sums_divnew_l(0:statistic_regions) = localsum771 sums_divnew_l(0:statistic_regions) = threadsum 799 772 800 773 CALL cpu_log( log_point_s(1), 'divergence', 'stop' )
Note: See TracChangeset
for help on using the changeset viewer.