我写了这些代码来读取数据:
import pandas as pd
import pathlib
def load_coordinates(structure,segments:None):
filedir = pathlib.Path(__file__).parent.parent / "data" / structure
filepath = filedir / f"coordinates_{structure}.csv"
return pd.read_csv(filepath, sep=";")
if segments:
return df.loc[df.segment.isin(segments)].reset_index(drop=True)
else:
return df
def load_population(structure,segments:None):
filedir = pathlib.Path(__file__).parent.parent / "data" / structure
filepath = filedir / f"population_{structure}.csv"
return pd.read_csv(filepath, sep=";")
return df.loc[df.segment.isin(segments)].reset_index(drop=True)
else:
return df
def load_ambul_praxen(structure,segments=None):
filedir = pathlib.Path(__file__).parent.parent / "data" / structure
filepath = filedir / f"ambul_praxen_{structure}.csv"
return pd.read_csv(filepath, sep=";").drop(columns=["planet"])
return df.loc[df.segment.isin(segments)].reset_index(drop=True)
else:
return df
def load_docs_matrix(structure, docs_selection, sum_up_vo):
filedir = pathlib.Path(__file__).parent.parent / "data" / structure
filepath = filedir / f"docs_matrix_{structure}.csv"
return pd.read_csv(filepath, sep=";")
selected_cols = ["segment"] + docs_selection
df = df.loc[:, selected_cols]
if sum_up_vo:
df["vo"] = df[docs_selection].sum(axis=1)
df = df.drop(columns=docs_selection)
return df
def load_weights(structure, request, segments=None, docs_selection=None, sum_up_vo=None):
if request == "population":
return load_population(structure)
elif request == "ambul_praxen":
return load_ambul_praxen(structure)
else:
return load_docs_matrix(structure)
def load_data(structure, request, segments=None, docs_selection=None,
sum_up_vo=None):
coordinates = load_coordinates(structure)
weights = load_weights(structure, request, segments, docs_selection, sum_up_vo)
return coordinates.merge(weights, on="segment", how="inner")
在调用函数之前,我想添加这个:
if segments:
return df.loc[df.segment.isin(segments)].reset_index(drop=True)
else:
return df
到def load_docs_matrix
函数
I called the function like this:
request = "ambul_praxen"
structure = "1868"
load_data(structure,request,
load_coordinates,
load_ambul_praxen,
load population,
load_weights)
我想要实现的是将所有数据集与load data function
放在一个数据帧中。在调用函数后,我得到了一个错误,我给出了7个参数,但预计会有5个。我试着调整论点,但似乎不起作用。
有什么办法可以解决这个问题吗?
我不确定我是否理解了这个问题,但是如果您只是想调用这些读取数据框架的函数,只需:
def load_data(input_df, structure, request):
coordinates = load_coordinates(structure)
population = load_population(structure)
ambul_praxen = load_ambul_praxen(structure)
docs_matrix = load_docs_matrix(structure)
weighs = load_weights(structure,request)
#Maybe concatenate them horizontally?
return pd.concat([coordinates,population,ambul_praxen,docs_matrix,weighs],axis=1)
或者一个一个地返回…