>我有一个数据帧的列,里面装满了带有图像的数组。
>>>df.IMAGES.head()
0 ["https://cf-medias.avendrealouer.fr/image/_87...
1 ["http://photos.ubiflow.net/440414/165474561/p...
2 ["https://v.seloger.com/s/width/965/visuels/0/...
3 ["https://pix.yanport.com/ads/e9e07ed0-812f-11...
4 ["https://v.seloger.com/s/width/966/visuels/0/...
我想检查每对行的图像是否相似。所以我做了一个函数来检查两个图像是否相似。如何将我的函数应用于每几行?
from PIL import Image
import imagehash
import requests
from io import BytesIO
def image_similarity(imageAurl,imageB):
responseA = requests.get(imageAurl)
imgA = Image.open(BytesIO(response.content))
responseB = requests.get(imageBurl)
imgB = Image.open(BytesIO(response.content))
hash0 = imagehash.average_hash(Image.open(imageA))
hash1 = imagehash.average_hash(Image.open(imageB))
cutoff = 5
if hash0 - hash1 < cutoff:
print('images are similar')
else:
print('images are not similar')
感谢Kshitij Saxena,我尝试了:
df['NextImage'] = df['IMAGES'][df['IMAGES'].index - 1]
df['IsSimilar'] = df.apply(lambda x: image_similarity(x['IMAGES'], x['NextImage']), axis=1)
但是我收到以下错误:
---------------------------------------------------------------------------
InvalidSchema Traceback (most recent call last)
<ipython-input-25-16b99a7b864a> in <module>
1 df['NextImage'] = df['IMAGES'][df['IMAGES'].index - 1]
----> 2 df['IsSimilar'] = df.apply(lambda x: image_similarity(x['IMAGES'], x['NextImage']), axis=1)
C:ProgramDataAnaconda3libsite-packagespandascoreframe.py in apply(self, func, axis, broadcast, raw, reduce, result_type, args, **kwds)
6012 args=args,
6013 kwds=kwds)
-> 6014 return op.get_result()
6015
6016 def applymap(self, func):
C:ProgramDataAnaconda3libsite-packagespandascoreapply.py in get_result(self)
140 return self.apply_raw()
141
--> 142 return self.apply_standard()
143
144 def apply_empty_result(self):
C:ProgramDataAnaconda3libsite-packagespandascoreapply.py in apply_standard(self)
246
247 # compute the result using the series generator
--> 248 self.apply_series_generator()
249
250 # wrap results
C:ProgramDataAnaconda3libsite-packagespandascoreapply.py in apply_series_generator(self)
275 try:
276 for i, v in enumerate(series_gen):
--> 277 results[i] = self.f(v)
278 keys.append(v.name)
279 except Exception as e:
<ipython-input-25-16b99a7b864a> in <lambda>(x)
1 df['NextImage'] = df['IMAGES'][df['IMAGES'].index - 1]
----> 2 df['IsSimilar'] = df.apply(lambda x: image_similarity(x['IMAGES'], x['NextImage']), axis=1)
<ipython-input-21-3acdcb76f890> in image_similarity(imageAurl, imageB)
7
8 def image_similarity(imageAurl,imageB):
----> 9 responseA = requests.get(imageAurl)
10 imgA = Image.open(BytesIO(response.content))
11 responseB = requests.get(imageBurl)
~AppDataRoamingPythonPython36site-packagesrequestsapi.py in get(url, params, **kwargs)
73
74 kwargs.setdefault('allow_redirects', True)
---> 75 return request('get', url, params=params, **kwargs)
76
77
~AppDataRoamingPythonPython36site-packagesrequestsapi.py in request(method, url, **kwargs)
58 # cases, and look like a memory leak in others.
59 with sessions.Session() as session:
---> 60 return session.request(method=method, url=url, **kwargs)
61
62
~AppDataRoamingPythonPython36site-packagesrequestssessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
531 }
532 send_kwargs.update(settings)
--> 533 resp = self.send(prep, **send_kwargs)
534
535 return resp
~AppDataRoamingPythonPython36site-packagesrequestssessions.py in send(self, request, **kwargs)
638
639 # Get the appropriate adapter to use
--> 640 adapter = self.get_adapter(url=request.url)
641
642 # Start time (approximately) of the request
~AppDataRoamingPythonPython36site-packagesrequestssessions.py in get_adapter(self, url)
729
730 # Nothing matches :-/
--> 731 raise InvalidSchema("No connection adapters were found for '%s'" % url)
732
733 def close(self):
InvalidSchema: ('No connection adapters were found for '["https://cf-medias.avendrealouer.fr/image/_873908158_d.jpg","https://cf-medias.avendrealouer.fr/image/_873908159_d.jpg","https://cf-medias.avendrealouer.fr/image/_873908160_d.jpg","https://cf-medias.avendrealouer.fr/image/_873908161_d.jpg","https://cf-medias.avendrealouer.fr/image/_873908162_d.jpg"]'', 'occurred at index 0')
错误似乎是因为我尝试获取每列的 url 数组作为 url。
这应该有效:
df['NextImage'] = df['Image'][df['IMAGES'].index - 1]
df['IsSimilar'] = df.apply(lambda x: image_similarity(x['IMAGES'], x['NextImage']), axis=1)
确保在函数中返回相似性哈希,而不仅仅是打印它!