我想知道数据帧2中最近的位置到数据帧一中每行的距离是多少。(在我的数据帧一中,每个坐标距离最近的地方是什么(
查看下面我的所有代码
我有两个数据帧:(在原始数据帧中,我有数千行(
数据帧1被称为";位置":*
|CLUSTER| |CLIENT| |LATITUDE| |LENGHT|
0 X1 19.45685402 -70.68645898
1 X1 19.39320504 -70.52567322
2 X1 18.614736 -68.71711383
3 X2 18.47977644 -69.93177289
4 X2 19.76546997 -70.51085451
5 X3 18.55835346 -68.38226906
6 X3 19.79037017 -70.68748243
7 X4 19.2232559 -70.52629188
8 X4 18.42865751 -68.9703434
9 X5 19.37935119 -70.51440314
10 X5 18.68743273 -68.45068029
11 X6 19.44126162 -70.73159162
12 X6 19.6678557 -70.36758867
13 X7 18.7816069 -70.2598325
14 X8 19.48708304 -70.74375908
15 X8 18.93720371 -70.40746487
16 X9 19.299298 -69.5559162
17 X10 18.60044506 -68.41991221
18 X10 19.30702896 -69.54500792
19 X11 19.3783253 -70.618205
数据帧2被称为";Coordinates_cords":
| PLACE| | LATITUDE| | LENGHT|
supermarket 18.63609095 -68.39650565
school 19.44512055 -70.66851055
restarant 18.48377033 -69.93910793
spa 18.46608496 -69.92713481
supermarket 18.45646778 -69.9395694
restaurant 18.4845644 -69.9300583
school 18.47284417 -69.9345797
def haversine_np(lon1, lat1, lon2, lat2):
lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])
dlon = lon2 - lon1
dlat = lat2 - lat1
a = np.sin(dlat/2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2.0)**2
c = 2 * np.arcsin(np.sqrt(a))
km = 6371 * c
return km
def Top_nearest(distancia,distancias,todos = False,limit= 1.0):
results = []
for d in distancias:
results.append(haversine_np(distancia[0],distancia[1],d[0],d[1]))
results= np.array(results)
if not todos:
print(results.argmin())
indexes = np.where(results < limit)
else:
indexes = np.where(results>= 0)
return list(indexes[0]),results[indexes]
nearest_coordinates = list()
for index,row in place_locations.iterrows():
indexes,distances=Top_nearest(row[['LATITUDE', 'LENGHT']].values,
Coordinates_coords[['LATITUDE', 'LENGHT']].reset_index(drop=True).values,
todos=True)
nearest_coordinates.append(distances[0])
nearest_coordinates [:5]
place_locations['Distance_locations'] = nearest_coordinates
place_locations
我得到的结果不正确,微积分中有一些东西我无法识别。我得到的Distance_location列,不要给我最近的距离位置
我之前已经发布过这篇文章。位置、距离、最近的
使用库来计算距离是最简单的。geopy对我来说很好
import geopy.distance
import pandas as pd
import io
df1 = pd.read_csv(io.StringIO("""CLUSTER CLIENT LATITUDE LENGHT
0 X1 19.45685402 -70.68645898
1 X1 19.39320504 -70.52567322
2 X1 18.614736 -68.71711383
3 X2 18.47977644 -69.93177289
4 X2 19.76546997 -70.51085451
5 X3 18.55835346 -68.38226906
6 X3 19.79037017 -70.68748243
7 X4 19.2232559 -70.52629188
8 X4 18.42865751 -68.9703434
9 X5 19.37935119 -70.51440314
10 X5 18.68743273 -68.45068029
11 X6 19.44126162 -70.73159162
12 X6 19.6678557 -70.36758867
13 X7 18.7816069 -70.2598325
14 X8 19.48708304 -70.74375908
15 X8 18.93720371 -70.40746487
16 X9 19.299298 -69.5559162
17 X10 18.60044506 -68.41991221
18 X10 19.30702896 -69.54500792
19 X11 19.3783253 -70.618205"""), sep="s+")
df2 = pd.read_csv(io.StringIO(""" PLACE LATITUDE LENGHT
supermarket 18.63609095 -68.39650565
school 19.44512055 -70.66851055
restarant 18.48377033 -69.93910793
spa 18.46608496 -69.92713481
supermarket 18.45646778 -69.9395694
restaurant 18.4845644 -69.9300583
school 18.47284417 -69.9345797"""), sep="s+")
# no need to calc distance in miles and kms, there for reference
df3 = (df1
.assign(foo=1)
.merge(df2.assign(foo=1), on="foo")
.assign(distance_km=lambda dfa: dfa.apply(lambda r:
geopy.distance.geodesic(
(r["LATITUDE_x"],r["LENGHT_y"]),
(r["LATITUDE_y"],r["LENGHT_y"])).km, axis=1))
.assign(distance_miles=lambda dfa: dfa.apply(lambda r:
geopy.distance.geodesic(
(r["LATITUDE_x"],r["LENGHT_y"]),
(r["LATITUDE_y"],r["LENGHT_y"])).miles, axis=1))
)
# now find nearest PLACE to a CLIENT and count
(df3.sort_values(["CLIENT","distance_km"])
.groupby(["CLIENT"]).agg({"PLACE":"first","distance_km":"first"})
.reset_index()
.groupby("PLACE")["CLIENT"].count()
.to_frame().reset_index().sort_values("CLIENT",ascending=False)
)
输出
PLACE CLIENT
2 school 5
3 supermarket 4
0 restarant 1
1 restaurant 1