我正在摆弄python,并试图制作一个简单的数据清理程序。我试图将title
值从read_excel
模块传递到output
模块。但是,它一直在说名称title is not defined
。下面是我的代码:
import os
import pandas as pd
import math
class Item():
__name = ""
__cost = 0
__gender = ""
__prime = ""
def has_all_properties(self):
return bool(self.__name and not math.isnan(self.__cost) and self.__gender and self.__prime)
def clean(self,wanted_cost,wanted_gender,wanted_prime):
return bool(self.__name and self.__gender == wanted_gender and self.__cost <= wanted_cost and self.__prime == wanted_prime)
def __init__(self, name, cost, gender, prime):
self.__name = name
self.__cost = cost
self.__gender = gender
self.__prime = prime
def __eq__(self, other):
return (self.__name == other.__name and self.__cost == other.__cost and self.__gender == other.__gender and self.__prime == other.__prime)
def __hash__(self):
return hash((self.__name, self.__cost, self.__gender, self.__prime))
def __repr__(self):
return f"Item({self.__name},{self.__cost},{self.__gender},{self.__prime})"
def tuple(self):
return self.__name, self.__cost, self.__gender, self.__prime
def read_excel(filetype):
cwd = os.path.abspath('')
files = os.listdir(cwd)
df = pd.DataFrame()
for file in files:
if file.endswith(filetype):
df = df.append(pd.read_excel(file), ignore_index=True)
df = df.where(df.notnull(), None)
df = df[['name', 'cost', 'used_by', 'prime']]
title = list(df.columns.values)
print(title)
array = df.values.tolist()
print(array)
return array
return output(title)
def process(array):
mylist = {Item(*k) for k in array}
print(mylist)
filtered = {obj for obj in mylist if obj.has_all_properties()}
clean = {obj for obj in filtered if obj.clean(20,"male","yes")}
result = list(clean)
print(result)
def output(where, sort_data, title):
t_list = [obj.tuple() for obj in sort_data]
output = pd.DataFrame(t_list, columns = title)
output.to_excel(where, index = False, header = True)
if __name__ == "__main__":
inputfile = read_excel('.XLSX')
processdata = process(inputfile)
result = output('clean_data.xlsx', processdata, title)
你能告诉我怎么做吗?谢谢你的帮助
调用return后,函数将退出,因此从函数返回后不能放入任何语句。你可以像这样返回
def read_excel(filetype):
cwd = os.path.abspath('')
files = os.listdir(cwd)
df = pd.DataFrame()
for file in files:
if file.endswith(filetype):
df = df.append(pd.read_excel(file), ignore_index=True)
df = df.where(df.notnull(), None)
df = df[['name', 'cost', 'used_by', 'prime']]
title = list(df.columns.values)
print(title)
array = df.values.tolist()
print(array)
return array, output(title)
这将返回你的值的元组
(array, output(title))
在执行return
语句后,该函数将退出。这意味着return output(title)
实际上永远不会在代码中发生。同样,output()
不返回任何东西,DataFrame.to_excel()
只写入excel文件。你想在read_excel()
中做的是
def read_excel(filetype):
cwd = os.path.abspath('')
files = os.listdir(cwd)
df = pd.DataFrame()
for file in files:
if file.endswith(filetype):
df = df.append(pd.read_excel(file), ignore_index=True)
df = df.where(df.notnull(), None)
df = df[['name', 'cost', 'used_by', 'prime']]
title = list(df.columns.values)
print(title)
array = df.values.tolist()
print(array)
output(title)
return array
我找到了一个最容易理解的方法来解决我目前的问题。所以,我只是打破read_excel
的定义,并作出get_header
和get_list
的定义。下面是我的解决方案:
import os
import pandas as pd
import math
class Item():
__name = ""
__cost = 0
__gender = ""
__prime = ""
def has_all_properties(self):
return bool(self.__name and not math.isnan(self.__cost) and self.__gender and self.__prime)
def clean(self,wanted_cost,wanted_gender,wanted_prime):
return bool(self.__name and self.__gender == wanted_gender and self.__cost <= wanted_cost and self.__prime == wanted_prime)
def __init__(self, name, cost, gender, prime):
self.__name = name
self.__cost = cost
self.__gender = gender
self.__prime = prime
def __eq__(self, other):
return (self.__name == other.__name and self.__cost == other.__cost and self.__gender == other.__gender and self.__prime == other.__prime)
def __hash__(self):
return hash((self.__name, self.__cost, self.__gender, self.__prime))
def __repr__(self):
return f"Item({self.__name},{self.__cost},{self.__gender},{self.__prime})"
def tuple(self):
return self.__name, self.__cost, self.__gender, self.__prime
def read_excel(filetype):
cwd = os.path.abspath('')
files = os.listdir(cwd)
df = pd.DataFrame()
for file in files:
if file.endswith(filetype):
df = df.append(pd.read_excel(file), ignore_index=True)
df = df.where(df.notnull(), None)
df = df[['name', 'cost', 'used_by', 'prime']]
return df
def get_list(dataframe):
array = dataframe.values.tolist()
print(array)
return array
def get_header(dataframe):
title = list(dataframe.columns.values)
print(title)
return title
def process(array):
mylist = {Item(*k) for k in array}
print(mylist)
filtered = {obj for obj in mylist if obj.has_all_properties()}
clean = {obj for obj in filtered if obj.clean(20,"male","yes")}
result = list(clean)
print(result)
t_list = [obj.tuple() for obj in result]
return t_list
def output(where, sort_data, title):
output = pd.DataFrame(sort_data, columns = title)
output.to_excel(where, index = False, header = True)
if __name__ == "__main__":
inputfile = read_excel('.XLSX')
array = get_list(inputfile)
header = get_header(inputfile)
processdata = process(array)
result = output('clean_data.xlsx', processdata, header)