假设我有一个文件路径为字符串的列表,如下所示:
['/#path#/AAA_123444_10D', '/#path#/AAA_123444_11D', '/#path#/BBB_987666_12D', '/#path#/CCC_987666_15D', '/#path#/DDD_123444_18D']
每个文件名中的ID都是第一个下划线后面的6位数字。我想返回每个ID的最大文件size的文件路径列表。在这种情况下,假设文件/#path#/AAA_123444_11D
是ID为123444的文件中最大的,文件/#path#/BBB_987666_12D
是ID为9877666的文件中磁盘上最大的。在这种情况下,我想返回以下列表:
['/#path#/AAA_123444_11D', '/#path#/BBB_987666_12D']
列表应该只包含每个ID中的一个。顺序无关紧要。
这就是我最终要做的:
import os
import ntpath
# file selector
def multipleFileSelector(Title='Choose Files'):
root = tkinter.Tk()
root.withdraw()
filepath = filedialog.askopenfilenames(parent=root,title=Title)
root.destroy()
filelist_Path = root.tk.splitlist(filepath)
return filelist_Path
# checks if ID exists in the list already or not
def idListCheck(comp, lst):
for path in lst:
if ntpath.basename(path).split('_')[2] == comp:
return True
return False
# Problem really starts here
filelist_Path = list(multipleFileSelector())
sorted_filelist = []
for path in filelist_Path:
print(ntpath.basename(path).split('_')[2])
lst = [i for i, e in enumerate(filelist_Path) if ntpath.basename(e).split('_')[2] == ntpath.basename(path).split('_')[2]]
if lst and not idListCheck(ntpath.basename(path).split('_')[2], sorted_filelist):
biggest_file = [-1,0] # -1 is arbitrary
for same_id_index in lst:
size_of_file = os.path.getsize(filelist_Path[same_id_index])
if size_of_file > biggest_file[1]:
biggest_file = [same_id_index, size_of_file]
sorted_filelist.append(filelist_Path[biggest_file[0]])
print(sorted_filelist)