>我现在的函数工作如下: 即使它执行了正确的解决方案,我最终也会在我的CMD行中出现巨大错误 https://i.stack.imgur.com/PBTwz.png(值错误:无效的文件路径或缓冲区对象类型:<类"pandas.core.frame.DataFrame">(。有谁知道为什么即使输出正确执行,我也会收到此错误?
import pandas as pd
import numpy as np
import argparse
target_col = "CountryRefs"
sep = ","
input_file = 'Test_set'
def arg_parse():
parser = argparse.ArgumentParser()
parser.add_argument("-f", "--input_file", required = True)
parser.add_argument("-s", "--sep", required=True,)
parser.add_argument("-t", "--target_col", required=True)
args=parser.parse_args()
return vars(args)
def splitter(input_file, target_col, sep, new_col = None, *argv):
df = pd.read_csv(input_file)
df[target_col] = df[target_col].str.split(sep)
exploded = df.explode(target_col)
exploded[target_col].replace(r'^s*$', np.nan, regex=True, inplace = True)
exploded.dropna(subset=[target_col], inplace=True)
if new_col == None:
return(pd.DataFrame(exploded[[target_col,*argv]]))
else:
exploded[new_col] = exploded[target_col]
return(pd.DataFrame(exploded[[new_col,*argv]]))
if __name__ == '__main__':
args = arg_parse()
print(splitter(**args))
你想要这样的东西
import pandas as pd
import numpy as np
import argparse
target_col = "CountryRefs"
sep = ","
data = {'CountryRefs':['Italy, Germany', 'Japan , France', '', 'Alaska'],
'Authors':['Dom', 'Xavier', 'Kathleen', 'Joe'], 'Friends':['Amy Pete', 'Joe', None, 'Franklin'],
'Colors':['red.blue', ' ', 'yellow', 'black.blue']}
df = pd.DataFrame(data, columns = ['CountryRefs', 'Authors', 'Friends', 'Colors'])
def arg_parse():
parser = argparse.ArgumentParser()
parser.add_argument('argv', type=str, nargs='*', default=[])
parser.add_argument("-s", "--sep", dest="sep", required=True, default=',')
parser.add_argument("-t", "--target_col", dest="target_col", required=True, default='1')
args=parser.parse_args()
return vars(args)
def splitter(df, target_col, sep, new_col = None, argv=[]):
df[target_col] = df[target_col].str.split(sep)
exploded = df.explode(target_col)
exploded[target_col].replace(r'^s*$', np.nan, regex=True, inplace = True)
exploded.dropna(subset=[target_col], inplace=True)
if new_col == None:
return(pd.DataFrame(exploded[[target_col,*argv]]))
else:
exploded[new_col] = exploded[target_col]
return(pd.DataFrame(exploded[[new_col,*argv]]))
if __name__ == '__main__':
args = arg_parse()
print(splitter(df, **args))
然后通过调用此代码来执行此代码
python sep.py -t CountryRefs -s ','
或者像这样
python sep.py -t CountryRefs -s ',' Friends Colors