我应该如何确保所有唯一项目都在唯一项目python列表中



所以我使用python dhash库对唯一图像和相似图像进行排序,在比较图像的过程中,我将每个图像与当前图像两侧的10个邻居进行比较,但我的脚本从唯一列表中删除了所有相似的图像,而没有从相似列表中保留一个图像到唯一列表,我该如何解决这个问题呢?这是我迄今为止写的代码:

def compare_image(curr_img, curr_img_hash, to_compare):
"""
Function for comparing two images
"""
global norm_cache
if to_compare in norm_cache:
print(f"cached val found for {to_compare}")
gray2 = norm_cache[to_compare]
h2r,h2c =  dhash.dhash_row_col(gray2)
hash2 = dhash.format_hex(h2r,h2c)
else:
print("No cached_val found, Computing and storing in norm_cache")
gray2 = _get_image(to_compare)
h2r,h2c =  dhash.dhash_row_col(gray2)
hash2 = dhash.format_hex(h2r,h2c)
norm_cache[to_compare] = gray2  # Update cache...
print(f"Values ----> {curr_img} : {curr_img_hash}, {to_compare} : {hash2}")
if distance.hamming(curr_img_hash,hash2) <= threshold:
print("images are same")
return "similar"
else:
print("images are different")
return "different"

def find_duplicates(folder) -> tuple:
"""
Main function to find duplicates.
"""
uniques: list = list()
similar: list = list()
buffer:  list = list()


image_list = os.listdir(folder)
for image_index in range(0,len(image_list)):
curr_img = image_list[image_index]
full_path = os.path.join(folder, curr_img)
gray1 = _get_image(full_path)
h1r,h1c =  dhash.dhash_row_col(gray1)
hash1 = dhash.format_hex(h1r,h1c)
# compare left 10s
x = image_index - 10
if x < 0:
x = 0
if x < image_index:
for prev_image_index in range(x, image_index):
if os.path.isfile(full_path) and os.path.splitext(full_path)[-1] in image_exts:
prev_image = image_list[prev_image_index]
prev_full_path = os.path.join(folder,prev_image)
result = compare_image(curr_img, hash1,prev_full_path)
if result == 'similar':
if prev_full_path not in similar:
if prev_full_path not in buffer:
buffer.append(prev_full_path)
similar.append(prev_full_path)
if result == 'different':
if prev_full_path not in uniques:
uniques.append(prev_full_path) 
#compare right 10s
x = image_index + 10 + 1
if x > len(image_list):
x = len(image_list)
if x > image_index:
for j in range(image_index+1,x):
if os.path.isfile(full_path) and os.path.splitext(full_path)[-1] in image_exts:
ahead = image_list[j]
ahead_full_path = os.path.join(folder,ahead)
result = compare_image(curr_img, hash1,ahead_full_path)
if result == 'similar':
if ahead_full_path not in similar:
similar.append(ahead_full_path)
if result == 'different':
if ahead_full_path not in uniques:
uniques.append(ahead_full_path) 
for i in similar:
if i in uniques and i in buffer:
uniques.remove(i)
print(f"{len(similar)} Similar Found...")
print(f"{len(uniques)} Unique Found...")
return similar, uniques

您可以将所有元素传递给python集,该集自动只保留唯一值。之后,您可以使用sorted对列表进行排序,并将其转换回列表

mySet = {'a', 'd', 'c', 'b', 'b'}
myList = list(sorted(mySet))

可能不是最有效的解决方案,但它确实起到了的作用

最新更新