有效地找到列表中最大值的索引



假设我们有一个列表:[1, 3.5, -1, 7, 10, 20, 5, 17, 31, -5]

我想写一个函数,按顺序返回前3个最大值的索引。

例如,在这种情况下,结果将是:[8, 5, 7]

我知道一种方法是编写嵌套循环,但是,还有其他有效的方法可以实现这一点吗?

使用heapq.nlargest查找n个最大值。

from heapq import nlargest
from operator import itemgetter
l = [1, 3.5, -1, 7, 10, 20, 5, 17, 31, -5]
indices_of_3_largest = [i for i,_ in nlargest(3, enumerate(l), itemgetter(1))]
print(indices_of_3_largest)
# [8, 5, 7]

zip用索引对列表进行排序,并取前3个值

lst = [1, 3.5, -1, 7, 10, 20, 5, 17, 31, -5]
indices = [i for _, i in sorted(zip(lst, range(len(lst))), reverse=True)[:3]]
print(indices) # [8, 5, 7]

这可以在一步中生成结果,但使用了不太优雅的dunder方法作为关键:

>>> lst = [1, 3.5, -1, 7, 10, 20, 5, 17, 31, -5]
>>> heapq.nlargest(3, range(len(lst)), lst.__getitem__)
[8, 5, 7]

lambda函数可以更轻松地完成这项工作。

代码:

def sort_index(lst, rev=True):
index = range(len(lst))
s = sorted(index, reverse=rev, key=lambda i: lst[i])
return s
score = [1, 3.5, -1, 7, 10, 20, 5, 17, 31, -5]
sort_index(score)[:3]

结果:

[8, 5, 7]
l = [1, 3.5, -1, 7, 10, 20, 5, 17, 31, -5]
list(map(lambda x: l.index(x), sorted(l, reverse=True)[:3] )) # [8, 5, 7]

使用连续的max到列表的副本。

lst = [1, 3.5, -1, 7, 10, 20, 5, 17, 31, -5]
lst_cp = lst.copy()
indeces_max = []
for _ in range(3):
m = max(lst_cp)
indeces_max.append(lst.index(m))
lst_cp.remove(m)
del lst_cp # just to remember that is not needed
print(indeces_max)

您可以使用np.argsort:

import numpy as np
ar = [1, 3.5, -1, 7, 10, 20, 5, 17, 31, -5]
ar_argsort = np.argsort(ar)
reversed_argsort = ar_argsort[::-1]
indices_3_max = reversed_argsort[0:3]
print(indices_3_max)

结果:

[8 5 7]

一行中的上述简明版本:

ar_argsort = np.argsort(ar)[::-1][0:3]

虽然所有6个答案(现在大约24小时后可用(都能有效地提供所需的结果,但我想知道哪种解决方案最有效。

我使用timeit进行比较(只有运行时,没有内存使用,5k运行,增加了输入列表——所有都一样(。

按递增顺序排列的结果:

consecutive_max: 2.604 s
heapq_oneStep:   3.052 s
np_argsort:      3.234 s
lambda_unnamed:  6.182 s
heapq_nlargest:  4.522 s
lambda_function: 9.960 s
zip_sort:        15.402 s

评估代码:

import timeit
timeit_runs = 5_000

# heapq_oneStep by Mechanic Pic
# https://stackoverflow.com/a/73568080/11815313
def heapq_oneStep():
timeit_setup_heapq_oneStep = '''
import heapq
import random
random.seed(42)

integer_list = random.sample(range(-10_000, 10_000), 10_000) 
lst = [x/100 for x in integer_list] 
'''
testcode_heapq_oneStep = ''' 
heapq.nlargest(3, range(len(lst)), lst.__getitem__)
'''

return timeit.timeit(stmt=testcode_heapq_oneStep, 
setup=timeit_setup_heapq_oneStep, number = timeit_runs)

# heapq_nlargest by Stef
# https://stackoverflow.com/a/73567876/11815313
def heapq_nlargest():
timeit_setup_heapq_nlargest = '''
from heapq import nlargest
from operator import itemgetter
import random
random.seed(42)

integer_list = random.sample(range(-10_000, 10_000), 10_000) 
lst = [x/100 for x in integer_list] 
'''
testcode_heapq_nlargest = ''' 
[i for i,_ in nlargest(3, enumerate(lst), itemgetter(1))]
'''

return timeit.timeit(stmt=testcode_heapq_nlargest, 
setup=timeit_setup_heapq_nlargest, number = timeit_runs)

# zip_sort by Guy
# https://stackoverflow.com/a/73567874/11815313
def zip_sort():
timeit_setup_zip_sort = '''
import random
random.seed(42)

integer_list = random.sample(range(-10_000, 10_000), 10_000) 
lst = [x/100 for x in integer_list] 
'''
testcode_zip_sort = ''' 
[i for _, i in sorted(zip(lst, range(len(lst))), reverse=True)[:3]]
'''

return timeit.timeit(stmt=testcode_zip_sort, 
setup=timeit_setup_zip_sort, number = timeit_runs)

# lambda_function by Mehmaam
# https://stackoverflow.com/a/73568027/11815313 
def lambda_function():
timeit_setup_lambda_function = '''
import random
random.seed(42)

integer_list = random.sample(range(-10_000, 10_000), 10_000) 
lst = [x/100 for x in integer_list] 
'''
testcode_lambda_function = ''' 
def sort_index(lst, rev=True):
index = range(len(lst))
s = sorted(index, reverse=rev, key=lambda i: lst[i])
return s
sort_index(lst)[:3]
'''

return timeit.timeit(stmt=testcode_lambda_function, 
setup=timeit_setup_lambda_function, number = timeit_runs)

# lambda_unnamed by uozcan12
# https://stackoverflow.com/a/73569339/11815313 
def lambda_unnamed():
timeit_setup_lambda_unnamed = '''
import random
random.seed(42)

integer_list = random.sample(range(-10_000, 10_000), 10_000) 
lst = [x/100 for x in integer_list] 
'''
testcode_lambda_unnamed = ''' 
list(map(lambda x: lst.index(x), sorted(lst, reverse=True)[:3] ))
'''

return timeit.timeit(stmt=testcode_lambda_unnamed, 
setup=timeit_setup_lambda_unnamed, number = timeit_runs)
# np_argsort by MagnusO_O
# https://stackoverflow.com/a/73567884/11815313 
def np_argsort():
timeit_setup_np_argsort = '''
import numpy as np
import random
random.seed(42)
integer_list = random.sample(range(-10_000, 10_000), 10_000) 
lst = [x/100 for x in integer_list] 
lst_nparr = np.array(lst)
'''
testcode_np_argsort = ''' 
lst_nparr_max3 = np.argsort(lst_nparr)[::-1][0:3]
lst_nparr_max3.tolist()
'''

return timeit.timeit(stmt=testcode_np_argsort, 
setup=timeit_setup_np_argsort, number = timeit_runs)

# consecutive_max by cards
def consecutive_max():
timeit_setup_consecutive_max = '''
import random
random.seed(42)
integer_list = random.sample(range(-10_000, 10_000), 10_000)
lst = [x/100 for x in integer_list]
'''
testcode_consecutive_max = '''
lst_cp = lst.copy()
indeces_max = []
for _ in range(3):
m = max(lst_cp)
indeces_max.append(lst.index(m))
lst_cp.remove(m)
'''
return timeit.timeit(stmt=testcode_consecutive_max,
setup=timeit_setup_consecutive_max, number = timeit_runs)
time_heapq_oneStep = heapq_oneStep()
time_heapq_nlargest = heapq_nlargest()
time_zip_sort = zip_sort()
time_lambda_function = lambda_function()
time_lambda_unnamed = lambda_unnamed()
time_np_argsort = np_argsort()
time_consecutive_max = consecutive_max()
print(f'''consecutive_max:  {time_consecutive_max:.3f} s''')
print(f'''np_argsort:      {time_np_argsort:.3f} s''')
print(f'''heapq_oneStep:   {time_heapq_oneStep:.3f} s''')
print(f'''lambda_unnamed:  {time_lambda_unnamed:.3f} s''')
print(f'''heapq_nlargest:  {time_heapq_nlargest:.3f} s''')
print(f'''lambda_function: {time_lambda_function:.3f} s''')
print(f'''zip_sort:        {time_zip_sort:.3f} s''')

最新更新