在fortran中查找和修改2D数组中的最高n值



我有一个2D实数数组,我想找到n个最高值,并将这些最高值分配给1,将所有其他值分配给0。

下面的代码通过在循环中使用MAXLOC来找到最大值,将其更改为-9999,从而将其从循环的下一次迭代中排除,从而正确地完成了这一操作。最后,所有-9999的值都指定为1。问题是,这种方法慢得令人难以置信。这个样本数据集有8604个单元格,其中选择了最高的50个,这很好,但实际数据有108183756个单元格,我需要找到其中最高的1672137个。如果有任何帮助,我将不胜感激。

PROGRAM mapalgebra
IMPLICIT NONE
REAL, DIMENSION(64,126) :: n
REAL, DIMENSION(64,126) :: a
REAL, DIMENSION(64,126) :: r
REAL, DIMENSION(64,126) :: ine
REAL, DIMENSION(64,126) :: s
REAL, DIMENSION(64,126) :: p
REAL, DIMENSION(64,126) :: TTP
INTEGER, DIMENSION(64,126) :: newlu
INTEGER :: row,col,max_rows,max_cols,i, j,demand, si
INTEGER, ALLOCATABLE :: AR1(:)

newlu = 0

max_rows=64
max_cols=126
OPEN(UNIT=1, FILE="urb.txt") 
OPEN(UNIT=2, FILE="acc.txt") 
OPEN(UNIT=4, FILE="ran.txt")  
OPEN(UNIT=5, FILE="lu1.txt") 
OPEN(UNIT=7, FILE="suit.txt") 
OPEN(UNIT=8, FILE="pob.txt") 
DO row = 1,max_rows
READ(1,*) (n(row,col),col=1,max_cols)
END DO
DO row = 1,max_rows
READ(2,*) (a(row,col),col=1,max_cols)
END DO

DO row = 1,max_rows
READ(4,*) (r(row,col),col=1,max_cols)
END DO
DO row = 1,max_rows
READ(5,*) (ine(row,col),col=1,max_cols)
END DO

DO row = 1,max_rows
READ(7,*) (s(row,col),col=1,max_cols)
END DO
DO row = 1,max_rows
READ(8,*) (p(row,col),col=1,max_cols)
END DO

demand = 50

print *, "Land use demand is : ", demand
TTP = (ine+n+r+a+s)*p   !population weighted model, (inertia+nhood+randomness+accessibility+suitability)*population

si = SIZE(SHAPE(TTP))   ! Get number of dimensions
! in array
print *, "TTP has : ", si  , " dimensions"                    

ALLOCATE (AR1(si))       ! Allocate AR1 to number
! of dimensions in array

DO i=1,demand
AR1=MAXLOC (TTP)
!print *, "MAXLOC (TTP) : ", AR1
TTP(AR1(1),AR1(2)) = -9999
newlu(AR1(1),AR1(2)) = 1
END DO

OPEN(UNIT=3, FILE="output.txt", ACTION="write", STATUS="replace")
WRITE(3,11) 'ncols 126'
WRITE(3,11) 'nrows 64'
WRITE(3,11) 'xllcorner 461229.21505206'
WRITE(3,11) 'yllcorner 4478822.89048722'
WRITE(3,11) 'cellsize 99.38142966 '
WRITE(3,11) 'NODATA_value 0 '
11 format(A,I3)
DO i=1,max_rows
WRITE(3,*) (newlu(i,j), j=1,max_cols)
END DO
CLOSE (1)
CLOSE (2)
CLOSE (3)
CLOSE (4)
CLOSE (5)
CLOSE (7)
CLOSE (8)

END PROGRAM mapalgebra

这里有一个解决方案。其思想是计算为>= v0的元素的数量,并通过平分找到v0,直到元素的数量为k。总体复杂度为O(N)。不涉及排序,也不移动任何数据。在k个最大元素中,有几个元素等于最小值的情况下,会有一个(非常)轻微的复杂性。

Program test
implicit none
real, allocatable :: a(:)
integer, parameter :: n=108183756, k=1672137
integer :: k0, i, kk
real :: v0, v1, v2, vmin, vmax
allocate(a(n))
call random_number(a)
vmax = maxval(a,dim=1)
vmin = minval(a,dim=1)
write(*,*) vmin,vmax

if (count(a == vmax) > k) then
! special case
kk = k
do i = 1, n
if (a(i) == vmax .and. kk > 0) then
a(i) = 1.0
kk = kk - 1
else
a(i) = 0.0
end if
end do

else

! ...
v1 = vmax
v2 = vmin ! the v2 initial value could be refined
! in the case where k << n

! Reduce the [v1,v2] interval by bisection
v0 = (v1+v2)/2.0
k0 = count(a >= v0)
do
write(*,*) "-----",v1,v2,v0,k0
if (k0 == k) exit   ! perfect value
if (v0 == v2 .or. v0 == v1) exit   ! see below
if (k0 > k) then
v2 = v0
else
v1 = v0
end if
v0 = (v1+v2)/2.0
k0 = count(a >= v0)
end do

! If the previous loop exited because v0 was equal to 
! either v1 or v2, then it means that there were no 
! more possible real value between v1 and v2.
! In turns it means that there are several elements of 
! a(:) that are equal to the minimum value (v2) among
! the k largest. This has to be taken into account
if (k0 == k) then
kk = 0
else
v0 = v1
kk = k-count(a >= v0)
end if
do i = 1, n
if (a(i) >= v0) then
a(i) = 1.0
else if (a(i) >= v2 .and. kk > 0) then
a(i) = 1.0
kk = kk - 1
else
a(i) = 0.0
end if
end do

end if

write(*,*) count(a == 1.0)

End

编辑:可能的优化

  • 正如评论中所提到的,v2(而不是vmax)的更好的初始猜测可以节省一些迭代。例如v2 = vmax - (vmax-vmin)*k/n(这将适用于值的均匀分布)。如果最初的猜测太大,那么在主循环之前需要一个额外的循环
  • 在某个时刻,当v1和v2之间的元素计数为"0"时;"足够低";,对这些元素进行排序会更快,而不是继续迭代

根据@lastchance的建议,我用一个不如@PierU优雅的解决方案来回答我自己的问题,但我认为值得发布,因为它略有不同。新代码(到完整数据集的下载链接-151Mb)使用MERGE函数内的掩蔽方法找到最高的n个值(可变需求);蒙版的";包含TTP中大于或等于某个阈值(仓大小)的所有值。它通过根据新阵列中的单元数调整仓大小来做到这一点,而无需对阵列TTP进行排序;蒙版的";在MERGE操作之后。如果单元格太多,则会减小仓位大小;如果单元格太少,则会增大仓位大小。如果算法";棒";这由变量"0"标识;lt;其仅取值>0,如果箱子大小已经减小,然后在没有找到解决方案的情况下增加。一旦新阵列中的单元计数等于所需值的数量(可变需求),或者一旦不能进行进一步改进,循环就终止。我相信这个代码可以参考@PierU建议的解决方案进行改进。谢谢大家!

PROGRAM mapalgebra
IMPLICIT NONE
!gfortran assigntest2.f90 -mcmodel=medium -o assigntest2
REAL, DIMENSION(8966,12066) :: TTP
REAL, DIMENSION(8966,12066) :: masked
REAL :: maxv,minv,rangev, bincut,binsize,binstart,binstartold,increment
!real, intent(in) :: my_array(:)
!real, allocatable :: masked(::)
REAL, DIMENSION(8966,12066) :: newlu
INTEGER :: row,col,max_rows,max_cols,i, j,demand, si,counts,gt,lt
INTEGER, ALLOCATABLE :: AR1(:)

newlu = 0.0

max_rows=8966
max_cols=12066
OPEN(UNIT=1, FILE="TTPbig.txt") 
DO row = 1,max_rows
READ(1,*) (TTP(row,col),col=1,max_cols)
END DO

demand = 1672137

print *, "Land use demand is : ", demand
maxv = maxval(TTP)
minv = minval(TTP)
rangev = maxv-minv
binstart = 10.0
counts = 0
gt = 0
lt = 0
increment = 1.0
print *, "TTP has maxval : ", maxv  , " minval ", minv, " and range ", rangev 

DO WHILE ( counts .NE. demand )
if (binstartold .EQ. binstart) EXIT
bincut = rangev/binstart
binsize = maxv-bincut
print *, "Bin cut off = ", bincut  , " bin = ", binsize 
print *, "binstartold = ", binstartold  , " binstart = ", binstart
if (lt>0) then
increment= increment/10
gt=0
lt=0
end if
binstartold = binstart
masked = MERGE(TTP, 0., TTP>=binsize)
counts = count(masked/=0)
if (counts > demand) then
gt=gt+1
print *,counts
print *, "too many cells, reducing binsize ", binstart
binstart = binstart+increment
print *,binstart
else if (counts < demand) then
lt=lt+gt
print *,counts
print *, "too few cells, increasing binsize ", binstart
binstart = binstart-increment
print *,binstart
end if
END DO 
print *, "count = ",counts," and demand= ",demand
where(masked/=0) newlu = 1

!DO i=1,demand
!AR1=MAXLOC (TTP)
!!print *, "MAXLOC (TTP) : ", AR1
!TTP(AR1(1),AR1(2)) = -9999
!newlu(AR1(1),AR1(2)) = 1
!END DO

OPEN(UNIT=3, FILE="output.txt", ACTION="write", STATUS="replace")
WRITE(3,11) 'ncols 12066'
WRITE(3,11) 'nrows 8966'
WRITE(3,11) 'xllcorner -42274.14800538'
WRITE(3,11) 'yllcorner 3975389.72101546'
WRITE(3,11) 'cellsize 99.38142966 '
WRITE(3,11) 'NODATA_value 0 '
11 format(A,I3)
DO i=1,max_rows
WRITE(3,*) (newlu(i,j), j=1,max_cols)
END DO
CLOSE (1)
CLOSE (2)
CLOSE (3)
CLOSE (4)
CLOSE (5)
CLOSE (7)
CLOSE (8)

END PROGRAM mapalgebra

尝试以下算法

  1. 首先找到与array的第demand秩相对应的阵列的阈值x。它使用可移植库中的qsort()算法对项目进行排序,然后从列表末尾(最高值)中选择所需项目

    function find_top_n_value(array, demand) result(x)
    use ifport
    real, intent(in) :: array(:)
    integer, intent(in) :: demand
    real :: sorted(size(array))
    real :: x
    integer :: n
    n = size(array)
    sorted = array
    call qsort(sorted, n, SIZEOF(x), compr)
    x = sorted(n-demand+1)
    end function
    INTEGER(2) FUNCTION compr(arg1, arg2)
    real, intent(in) :: arg1, arg2
    if(arg1>arg2) then
    compr = 1
    else if(arg2>arg1) then
    compr = -1
    else
    compr = 0
    end if
    end function
    
  2. 现在使用where()语句将名为masked的数组副本转换为0和1值,具体取决于它们在阈值上的位置

    masked = array
    where(masked>= x)
    masked = 1.0
    else where
    masked = 0.0
    end where
    

在我的示例代码中

real :: x
real, parameter :: array(*) = [6.0, 5.5, 2.0, 8.0, 9.0, 4.0, 8.5]
real :: masked(size(array))
integer :: demand
print *, "Input  Array: ", array
demand = 3
x = find_top_n_value( array, 3)
print '(*(g0))', "Top #", demand ," value is ", x
masked = array
where(masked>= x)
masked = 1.0
else where
masked = 0.0
end where
print *, "Masked Array: ", masked

结果是:

Input  Array:    6.000000       5.500000       2.000000       8.000000
9.000000       4.000000       8.500000
Top #3 value is 8.000000
Masked Array:   0.0000000E+00  0.0000000E+00  0.0000000E+00   1.000000
1.000000      0.0000000E+00   1.000000

您可以看到8或更多的值为1.0,其余为零。

与其他海报一样,这首先确定阈值,然后"二进制";(因为缺少更好的单词)数组。

此版本使用QuickSelect(非QuickSort),希望使其平均为O(N),而不是排序算法中的O(N log N)-请参阅https://en.wikipedia.org/wiki/Quickselect

module libs
use iso_fortran_env
implicit none
contains
!----------------------------------------------
integer function partition( A, left, right ) result( i )
real, intent(inout) :: A(:)
integer, intent(in) :: left, right
real AP
integer j
i = left
call pivot( A(left:right) )
Ap = A(left)
do j = left + 1, right
if ( A(j) < Ap ) then
call swap( A(i+1), A(j) )
i = i + 1
end if
end do
call swap( A(left), A(i) )
end function partition
!----------------------------------------------
recursive real function quickSelect( A, left, right, n ) result( res )
real, intent(inout) :: A(:)
integer, intent(in) :: left, right, n
integer p
if ( left == right ) then
res = A(left)
else
p = partition( A, left, right )             ! A(p) will be in its correct place
if ( p == n ) then
res = A(p)                               ! if p == n then we're done
else if ( p < n ) then
res = quickSelect( A, p + 1, right, n )  ! check the RIGHT side ONLY 
else
res = quickSelect( A, left , p - 1, n )  ! check the LEFT side ONLY
end if
end if
end function quickSelect
!----------------------------------------------
subroutine swap( a, b )
real, intent(inout) :: a, b
real t
t = a
a = b
b = t
end subroutine swap
!----------------------------------------------
subroutine pivot( A )
real, intent(inout) :: A(:)
integer p
real r
call random_number( r )
p = 1 + size( A ) * r
if ( p /= 1 ) call swap( A(1), A(p) )
end subroutine pivot
!----------------------------------------------
subroutine binarise( A, An, n )
real, intent(inout) :: A(:)
real, intent(in) :: An
real dummy(size(A))
integer, intent(in) :: n
integer counter, i
counter = 0
dummy = 0
do i = 1, size( A )
if ( A(i) > An ) then
dummy(i) = 1.0
counter = counter + 1
end if
end do
do i = 1, size( A )
if ( A(i) == An ) then
dummy(i) = 1.0
counter = counter + 1
if ( counter == n ) exit    ! ignore further repeats
end if
end do
A = dummy
end subroutine binarise
end module libs
!=======================================================================
program main
use libs
implicit none
real, allocatable :: A(:), dummy(:)
real An
integer left, right
integer n
character(len=*), parameter :: FMT = "( A, *( f4.1, 1x ) )"
A = [ 5, 6, 4, 1, 8, 1, 5, 9 ]                ! array to be "binarised"
n = 4                                         ! nth LARGEST element
write( *, FMT ) "A(original)  = ", A
write( *, *   ) "n = ", n
! Find the nth value
left = 1;   right = size( A )
dummy = A
An = quickSelect( dummy, left, right, size(A) - n + 1 ) ! the cut-off value (which might be repeated)
! Binarise A
call binarise( A, An, n )
write( *, FMT ) "A(binarised) = ", A
end program main
!=======================================================================

输出(注意,重复的5中只有一个获得"1.0"值):

A(original)  =  5.0  6.0  4.0  1.0  8.0  1.0  5.0  9.0
n =            4
A(binarised) =  1.0  1.0  0.0  0.0  1.0  0.0  0.0  1.0

到目前为止,已经有四个答案:

  • 矿井,采用平分法
  • OP,采用@lastchance在评论中建议的装箱方法
  • @快速选择方法的最后机会
  • @JohnAlexiou采用快速排序方法

正如一些贡献者所说,这个问题本质上是关于在无序列表中按降序找到第k个元素。

我已经完善并改进了我的平分例程(使其递归,并修复了一些问题)。我还编写了一个带有装箱方法的例程。我将两者与快速排序和快速选择方法进行比较。所有这些都是为了找到递增顺序中的第k个元素而编写的。此外,我不再处理有几个元素等于搜索值的情况(不过输出仍然正确)。

以下是n=108183756, k=16721370在不同情况下的基准:

$ gfortran -O3 klargest2.f90 && ./a.out 
Input = uniform distribution
--------------------------------
quicksort           v =  0.154617488     time =   13.4908419    
quickselect         v =  0.154617488     time =   1.18194902    
bisection           v =  0.154617488     time =  0.724147022    
binning             v =  0.154617488     time =  0.649156988    
Input = non-uniform distribution
--------------------------------
quicksort           v =   2.39065681E-02 time =   13.4206972    
quickselect         v =   2.39065681E-02 time =   1.28310299    
bisection           v =   2.39065681E-02 time =  0.781505048    
binning             v =   2.39065681E-02 time =   1.13891697    
Input = highly non-uniform distribution
--------------------------------
quicksort           v =   3.69637343E-03 time =   14.0137014    
quickselect         v =   3.69637343E-03 time =   1.20810306    
bisection           v =   3.69637343E-03 time =  0.823186994    
binning             v =   3.69637343E-03 time =   1.64031005     

观察结果:

  • 正如预期的那样,快速排序方法比其他方法慢得多。原因是必须不断地复制、移动数据等。平均复杂度为O(N*log(N)),但在最坏的情况下(即使不太可能)也可能为O(N+*2)
  • quikselect方法要快10倍。原因是quickselect只对到达第k个元素所需的内容进行排序,因此复杂性为O(N)(但最坏的情况仍然是O(N**2))

比较装仓和平分:

  • 在均匀分布上装箱稍微快一点
  • 非均匀分布的平分速度更快
  • 平分码更简单

注意,装仓方法实际上与平分方法非常相似,但我们没有将值范围拆分为2,而是将其拆分为NBIN(在我的示例中为10)。装箱方法的效率完全取决于猜测搜索值在哪个箱子里:这对于均匀分布来说很容易,但在一般情况下就不那么容易了

比较平分和快速选择

  • 平分速度更快
  • 两者都具有O(N)的复杂度,但在最坏的情况下(尽管不太可能),快速选择的复杂度是O(N**2),而对于平分,它仍然是O(N)
  • 平分不会修改输入数组
  • 快速选择代码要简单得多

以下是完整的测试代码,您可以运行和修改:

MODULE kth_element
implicit none
CONTAINS    
!*************************************************************************************
real recursive function bisection(a,k,strict,v1,v2,k1,k2) result(val)
!*************************************************************************************
! Finding the k-th value of a(:) in increasing order by bisection of a [v1;v2] interval,
! mostly without sorting or copying the data (see below)
! - At each step we have count(a<=v1) < k <= count(a<=v2)
! - If the number of elements in the interval falls below a hard-coded threshold, 
!   we stop the bisection and explicitly sort the remaining elements.
!   Drawback: a bit more memory occupation and (limited) data duplication
!   Advantage: if strict is .true., sorting is more efficient for a small number 
!              of elements
! - If the number of elements in the interval falls below 1/10th the input number
!   of elements, these elements are copied to a new array.
!   Drawback: a bit more memory occupation and (limited) data duplication
!   Advantage: much less elements to count, and more cache friendly
! - if strict is .true., the returned value is a value of the input array, otherwise
!   it is an arbitrary value such that count(a<=val) == k, potentially saving 
!   some final recursion step
!
! The complexity is O(n*log(n)) 
!*************************************************************************************
real, intent(in) :: a(:)
integer, intent(in) :: k
logical, intent(in),  optional :: strict
real, intent(in), optional :: v1, v2
integer, intent(in), optional :: k1, k2
integer, parameter :: NTOSORT = 10000
integer :: n, k0, kk1, kk2, i, j
real :: v0, vv1, vv2, c
logical :: strict___
real, allocatable :: b(:)
!*************************************************************************************
n = size(a)
strict___ = .true.; if (present(strict))  strict___ = strict

if (strict___ .and. n <= NTOSORT) then
b = a(:)
call quickselect(b,1,n,k)
val = b(k)
return
end if
if (.not.present(v1)) then
! Search for the min value vv1 and max value vv2 (faster than using minval/maxval)
! Generally in the code, k = count(a <= v)
vv1 = a(1); kk1 = 1
vv2 = a(1); kk2 = n
do i = 2, n
if (a(i) >  vv2) then
vv2 = a(i)
else if (a(i) <  vv1) then
vv1 = a(i)
kk1 = 1
else if (a(i) == vv1) then
kk1 = kk1 + 1
end if
end do

! trivial cases
if (k <= kk1) then
val = vv1
return
end if
if (k == n) then
val = vv2
return
end if
else
vv1 = v1; kk1 = k1
vv2 = v2; kk2 = k2
end if

! Reduce the [v1,v2] interval by bisection
v0 = 0.5*(vv1+vv2)
! if the middle value falls back on v1 or v2, then no middle value can be obtained
! we are at the solution
if (v0 == vv2 .or. v0 == vv1) then 
val = vv2
else
! actual bisection
k0 = count(a <= v0)
if (.not.strict___ .and. k0 == k) then
! v0 is not necessarily a value present in a(:)
val = v0
else
if (k0 >= k) then
vv2 = v0; kk2 = k0
else
vv1 = v0; kk1 = k0
end if
if (kk2-kk1 <= n/10) then
call extract(a,vv1,vv2,b,kk2-kk1)
val = bisection(b,k-kk1,strict)
else
val = bisection(a,k,strict,vv1,vv2,kk1,kk2)
end if
end if
end if

end function bisection

!*************************************************************************************
recursive subroutine quicksort(a,ifirst,ilast)
!*************************************************************************************
! Author: t-nissie
! License: GPLv3
! Gist: https://gist.github.com/t-nissie/479f0f16966925fa29ea 
!*************************************************************************************
real, intent(inout) :: a(:)
integer, intent(in) :: ifirst, ilast
real :: x, t
integer n, i, j
!*************************************************************************************  
n = size(a)
x = a( (ifirst+ilast) / 2 )
i = ifirst
j = ilast
do
do while (a(i) < x)
i=i+1
end do
do while (x < a(j))
j=j-1
end do
if (i >= j) exit
t = a(i);  a(i) = a(j);  a(j) = t
i=i+1
j=j-1
end do
if (ifirst < i-1) call quicksort(a,ifirst,i-1)
if (j+1 < ilast)  call quicksort(a,j+1,ilast)
end subroutine quicksort

!*************************************************************************************
recursive subroutine quickselect(a,ifirst,ilast,k)
!*************************************************************************************
! Author: t-nissie
! License: GPLv3
! Gist: https://gist.github.com/t-nissie/479f0f16966925fa29ea 
!*************************************************************************************
real, intent(inout) :: a(:)
integer, intent(in) :: ifirst, ilast, k
real :: x, t
integer n, i, j
!*************************************************************************************  
n = size(a)
x = a( (ifirst+ilast) / 2 )
i = ifirst
j = ilast
do
do while (a(i) < x)
i=i+1
end do
do while (x < a(j))
j=j-1
end do
if (i >= j) exit
t = a(i);  a(i) = a(j);  a(j) = t
i=i+1
j=j-1
end do
if (ifirst < i-1 .and. k <= i-1) call quickselect(a,ifirst,i-1,k)
if (j+1 < ilast  .and. k >= j+1) call quickselect(a,j+1,ilast,k)
end subroutine quickselect
!*************************************************************************************  
pure subroutine extract(a,v1,v2,b,m)
!*************************************************************************************  
real, intent(in) :: a(:), v1, v2
real, allocatable, intent(out) :: b(:)
integer, intent(in) :: m
integer :: i, j
!*************************************************************************************  
allocate( b(m) )
j = 0
do i = 1, size(a)
if (a(i) > v1 .and. a(i) <= v2) then
j = j + 1
b(j) = a(i)
end if
end do
end subroutine extract
!*************************************************************************************
real recursive function binning(a,k,strict,status) result(val)
!*************************************************************************************
real, intent(in) :: a(:)
integer, intent(in) :: k
logical, intent(in),  optional :: strict
integer, intent(out), optional :: status

integer, parameter :: NTOSORT = 10000, NBIN = 10
integer :: kk(0:NBIN), n, i, ibin, nbin___, k1, k2
real :: v(0:NBIN), vmin, vmax, v1, v2
logical :: strict___
real, allocatable :: b(:)
!*************************************************************************************
n = size(a)
if (present(status)) status = 0
strict___ = .true.; if (present(strict))  strict___ = strict
if (k<1 .or. k>n) then
if (present(status)) then
status = 1
return
else
error stop "wrong k value or empty array"
end if
end if

if (n <= NTOSORT) then
b = a(:)
call quickselect(b,1,n,k)
val = b(k)
return
end if
! Search for the min and max values (faster than using minval/maxval)
vmin = a(1); vmax = a(1)
do i = 2, n
if (a(i) > vmax) then
vmax = a(i)
else if (a(i) < vmin) then
vmin = a(i)
end if
end do

! trivial case
if (vmin == vmax) then
val = vmin
return
end if

! building nbin bin boundaries
v = [(vmin + ibin*(vmax-vmin)/NBIN, ibin=0,NBIN)]
v(nbin) = vmax  ! forcing, just in case
! reducing the number of bins if some boundaries and equal to each others
nbin___ = NBIN
ibin = 1
do
if (ibin > nbin___) exit
if (v(ibin) == v(ibin-1)) then
v(ibin-1:nbin___-1) = v(ibin:nbin___)
nbin___ = nbin___ - 1
else
ibin = ibin + 1
end if
end do

kk(:) = -1 ! bin counter initialization
! estimate in which bin the k-th value may be, based on a uniform distribution hypothesis
ibin = ceiling(nbin___*real(k)/n)
! count the number of elements for this bin, and explore the next bins if not enough
do 
if (ibin == nbin___) then
kk(ibin) = n
else
kk(ibin) = count( a <= v(ibin) )
end if
if (kk(ibin) >= k) exit
ibin = ibin+1
end do
! explore the previous bins is needed
do 
if (ibin == 0) exit
if (kk(ibin-1) >= 0) exit ! the previous bin has already been explored and rejected
kk(ibin-1) = count( a <= v(ibin-1) )
if (kk(ibin-1) < k) exit ! the current bin is the right one 
ibin = ibin - 1
end do
if (ibin == 0) then
val = v(ibin)
else if (.not.strict___ .and. kk(ibin) == k) then
val = v(ibin)
else
! recursion on this bin
v1 = v(ibin-1); k1 = kk(ibin-1)
v2 = v(ibin)  ; k2 = kk(ibin)
call extract(a,v1,v2,b,k2-k1)
val = binning(b,k-k1,strict,status)
end if

end function binning
END MODULE kth_element


Program test
use kth_element
implicit none
real, allocatable :: a(:), b(:)
integer, parameter :: n=108183756, k=16721370
integer :: i, nseed, iseed
integer(8) :: tic, toc, rate
real :: v0, v1, v2, v3, v4

allocate(a(n))
call random_seed(nseed)

!iseed = 0
!do 

!call random_seed(put=[(iseed+i,i=0,nseed-1)])
call random_number(a)

do i = 1, 3

write(*,*)
if (i == 1) then
write(*,*) "Input = uniform distribution"
else if (i == 2) then
a(:) = a(:)**2
write(*,*) "Input = non-uniform distribution"
else if (i == 3) then
a(:) = a(:)**1.5
write(*,*) "Input = highly non-uniform distribution"
end if
write(*,*) "--------------------------------"
call system_clock(tic,rate)
b = a(:)
call quicksort(b,1,n)
v0 = b(k)
call system_clock(toc,rate)
write(*,*) "quicksort           v =", v0, "time =",(toc-tic)/real(rate)
call system_clock(tic,rate)
b = a(:)
call quickselect(b,1,n,k)
v1 = b(k)
call system_clock(toc,rate)
write(*,*) "quickselect         v =", v1, "time =",(toc-tic)/real(rate)
call system_clock(tic,rate)
v2 = bisection(a,k)
call system_clock(toc,rate)
write(*,*) "bisection           v =", v2, "time =",(toc-tic)/real(rate)

call system_clock(tic,rate)
v3 = binning(a,k)
call system_clock(toc,rate)
write(*,*) "binning             v =", v3, "time =",(toc-tic)/real(rate)
!if (v3 /= v2) then
!   write(*,*) "!!!!!!!!!!!!!!!!!", iseed
!   stop
!end if

end do
!iseed = iseed + 1
!end do

End Program

相关内容

  • 没有找到相关文章

最新更新