使用python数据帧计算谷歌位置距离



我正试图使用谷歌位置数据和Python Pandas数据框来梳理出我在某个地区(一英里左右(的日期。

首先从纬度转换为纬度E7:

with open(Takeout_google_location_history) as f:
data = json.loads(f.read())
df = json_normalize(data['locations'])
df['latitudeE7'] = df['latitudeE7'].div(10000000.0)
df['longitudeE7'] = df['longitudeE7'].div(10000000.0)
df.head()

然后计算距离:

import haversine as hs
from haversine import Unit
loc1 = (31.393300,-99.070050)
df['diff'] = hs.haversine(loc1,(df['latitudeE7'],df['longitudeE7']),unit=Unit.MILES)
df.head()

得到这个错误:

~Anaconda2envsnotebooklibsite-packageshaversinehaversine.py in 
haversine(point1, point2, unit)
92     lat1 = radians(lat1)
93     lng1 = radians(lng1)
---> 94     lat2 = radians(lat2)
95     lng2 = radians(lng2)
96 
~Anaconda2envsnotebooklibsite-packagespandascoreseries.py in             wrapper(self)
183         if len(self) == 1:
184             return converter(self.iloc[0])
--> 185         raise TypeError(f"cannot convert the series to {converter}")
186 
187     wrapper.__name__ = f"__{converter.__name__}__"
TypeError: cannot convert the series to <class 'float'>      

我不知道该如何处理数据以使其成为浮点值。

我试过:

df['latitudeE7'] = df['latitudeE7'].div(10000000.0).astype(float)

以及使用手写距离:

import math
def distance(origin, destination):
lat1, lon1 = origin
lat2, lon2 = destination
radius = 6371  # km
dlat = math.radians(float(lat2) - lat1)
dlon = math.radians(float(lon2) - lon1)
a = (math.sin(dlat / 2) * math.sin(dlat / 2) +
math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) *
math.sin(dlon / 2) * math.sin(dlon / 2))
c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
d = radius * c
return d

仍然得到相同的错误:

~AppDataLocalTemp/ipykernel_22916/3664391511.py in distance(origin, destination)
26     radius = 6371  # km
27 
---> 28     dlat = math.radians(float(lat2) - lat1)
29     dlon = math.radians(float(lon2) - lon1)
30     a = (math.sin(dlat / 2) * math.sin(dlat / 2) +
~Anaconda2envsnotebooklibsite-packagespandascoreseries.py in wrapper(self)
183         if len(self) == 1:
184             return converter(self.iloc[0])
--> 185         raise TypeError(f"cannot convert the series to {converter}")
186 
187     wrapper.__name__ = f"__{converter.__name__}__"
TypeError: cannot convert the series to <class 'float'>

不能直接将pd.Series传递给haversine函数。

代码:

from haversine import haversine, Unit
import pandas as pd
loc1 = (31.393300, -99.070050)
# Sample dataframe
df = pd.DataFrame({'latitudeE7': [0, 0], 'longitudeE7': [0, 0]})
# Calculation
# df['diff'] = haversine(loc1, (df['latitudeE7'], df['longitudeE7']), unit=Unit.MILES) # This doesn't work
df['diff'] = df.apply(lambda row: haversine(loc1, (row['latitudeE7'], row['longitudeE7']), unit=Unit.MILES), axis=1)

输出:

latitudeE7longitudeE7diff
00
0

最新更新