使用两个数据帧计算多个点的haversine最近距离



我想知道数据帧2中最近的位置到数据帧一中每行的距离是多少。(在我的数据帧一中,每个坐标距离最近的地方是什么(

查看下面我的所有代码

我有两个数据帧:(在原始数据帧中,我有数千行(

数据帧1被称为";位置":*

|CLUSTER|     |CLIENT|     |LATITUDE|           |LENGHT|
0            X1        19.45685402  -70.68645898
1            X1        19.39320504  -70.52567322
2            X1        18.614736        -68.71711383
3            X2        18.47977644  -69.93177289
4            X2        19.76546997  -70.51085451
5            X3        18.55835346  -68.38226906
6            X3        19.79037017  -70.68748243
7            X4        19.2232559   -70.52629188
8            X4        18.42865751  -68.9703434
9            X5        19.37935119  -70.51440314
10           X5        18.68743273  -68.45068029
11           X6        19.44126162  -70.73159162
12           X6        19.6678557   -70.36758867
13           X7        18.7816069   -70.2598325
14           X8        19.48708304  -70.74375908
15           X8        18.93720371  -70.40746487
16           X9        19.299298        -69.5559162
17          X10        18.60044506  -68.41991221
18          X10        19.30702896  -69.54500792
19          X11        19.3783253   -70.618205

数据帧2被称为";Coordinates_cords":

| PLACE|               | LATITUDE|           | LENGHT| 
supermarket       18.63609095 -68.39650565
school           19.44512055 -70.66851055
restarant        18.48377033 -69.93910793
spa              18.46608496 -69.92713481
supermarket      18.45646778 -69.9395694
restaurant       18.4845644  -69.9300583
school           18.47284417 -69.9345797
def haversine_np(lon1, lat1, lon2, lat2):


lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])
dlon = lon2 - lon1
dlat = lat2 - lat1
a = np.sin(dlat/2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2.0)**2
c = 2 * np.arcsin(np.sqrt(a))
km = 6371 * c
return km
def Top_nearest(distancia,distancias,todos = False,limit= 1.0):
results = []
for d in distancias:
results.append(haversine_np(distancia[0],distancia[1],d[0],d[1]))
results= np.array(results)
if not todos:
print(results.argmin())
indexes = np.where(results < limit)
else:
indexes = np.where(results>= 0)

return list(indexes[0]),results[indexes]
nearest_coordinates = list()
for index,row in place_locations.iterrows():
indexes,distances=Top_nearest(row[['LATITUDE', 'LENGHT']].values,
Coordinates_coords[['LATITUDE', 'LENGHT']].reset_index(drop=True).values,
todos=True)
nearest_coordinates.append(distances[0])
nearest_coordinates [:5]
place_locations['Distance_locations'] = nearest_coordinates 
place_locations

我得到的结果不正确,微积分中有一些东西我无法识别。我得到的Distance_location列,不要给我最近的距离位置

我之前已经发布过这篇文章。位置、距离、最近的

使用库来计算距离是最简单的。geopy对我来说很好

import geopy.distance
import pandas as pd
import io
df1 = pd.read_csv(io.StringIO("""CLUSTER     CLIENT     LATITUDE           LENGHT
0            X1        19.45685402  -70.68645898
1            X1        19.39320504  -70.52567322
2            X1        18.614736        -68.71711383
3            X2        18.47977644  -69.93177289
4            X2        19.76546997  -70.51085451
5            X3        18.55835346  -68.38226906
6            X3        19.79037017  -70.68748243
7            X4        19.2232559   -70.52629188
8            X4        18.42865751  -68.9703434
9            X5        19.37935119  -70.51440314
10           X5        18.68743273  -68.45068029
11           X6        19.44126162  -70.73159162
12           X6        19.6678557   -70.36758867
13           X7        18.7816069   -70.2598325
14           X8        19.48708304  -70.74375908
15           X8        18.93720371  -70.40746487
16           X9        19.299298        -69.5559162
17          X10        18.60044506  -68.41991221
18          X10        19.30702896  -69.54500792
19          X11        19.3783253   -70.618205"""), sep="s+")
df2 = pd.read_csv(io.StringIO(""" PLACE                LATITUDE            LENGHT 
supermarket       18.63609095 -68.39650565
school           19.44512055 -70.66851055
restarant        18.48377033 -69.93910793
spa              18.46608496 -69.92713481
supermarket      18.45646778 -69.9395694
restaurant       18.4845644  -69.9300583
school           18.47284417 -69.9345797"""), sep="s+")
# no need to calc distance in miles and kms,  there for reference
df3 = (df1
.assign(foo=1)
.merge(df2.assign(foo=1), on="foo")
.assign(distance_km=lambda dfa: dfa.apply(lambda r: 
geopy.distance.geodesic(
(r["LATITUDE_x"],r["LENGHT_y"]), 
(r["LATITUDE_y"],r["LENGHT_y"])).km, axis=1))
.assign(distance_miles=lambda dfa: dfa.apply(lambda r: 
geopy.distance.geodesic(
(r["LATITUDE_x"],r["LENGHT_y"]), 
(r["LATITUDE_y"],r["LENGHT_y"])).miles, axis=1))

)
# now find nearest PLACE to a CLIENT and count
(df3.sort_values(["CLIENT","distance_km"])
.groupby(["CLIENT"]).agg({"PLACE":"first","distance_km":"first"})
.reset_index()
.groupby("PLACE")["CLIENT"].count()
.to_frame().reset_index().sort_values("CLIENT",ascending=False)
)

输出

PLACE  CLIENT
2       school       5
3  supermarket       4
0    restarant       1
1   restaurant       1

最新更新