无法识别Python数据类型



我有下面的代码。这段代码创建并在仪表板上显示新冠肺炎数据。我需要为此添加一个日期选择器,并显示适用于该范围的数据。

我得到错误";ValueError:"的格式编号1;2020-01-01";未被识别";当这样做时。日期选择器输入的值无法转换为日期时间格式,以便我从初始数据帧中筛选出属于该日期范围的记录。非常感谢您的帮助。

由于这种不匹配,每当我试图打印接收数据的数据类型时,或者在比较日期的行中,我都会遇到错误。

import pandas as pd
from dash import html
import plotly.graph_objects as go
from dash import dcc
import dash
import plotly.express as px
from dash.dependencies import Input, Output
from datetime import date
from datetime import datetime
import datetime
df = pd.read_excel("https://covid.ourworldindata.org/data/owid-covid-data.xlsx")
#
from numpy import dtype
app = dash.Dash()
# df = pd.read_csv('new2.csv', index_col=0)
print(df)
print(df.columns.tolist())
df.drop(
df.columns.difference(['continent', 'location', 'date', 'total_cases', 'new_cases', 'total_deaths', 'new_deaths']),
1, inplace=True)
print(df)
df.to_csv('new3.csv', encoding='utf-8', index=False)
app.layout = html.Div(id='parent', children=[
html.H1(id='H1', children='Covid Dashboard', style={'textAlign': 'center', 
'marginTop': 40, 'marginBottom': 40}),
dcc.DatePickerRange(
id='my-date-picker-range',
min_date_allowed=date(2020, 1, 1),
max_date_allowed=date.today(),
initial_visible_month=date(2020, 1, 1),
# end_date=date.today()
display_format='YYYY-MM-DD',
),
html.Div(id='output-container-date-picker-range'),
dcc.Dropdown(id='dropdown',
options=[
{'label': 'Total cases', 'value': 'total_cases'},
{'label': 'New cases', 'value': 'new_cases'},
{'label': 'Total_deaths', 'value': 'total_deaths'},
{'label': 'New deaths', 'value': 'new_deaths'},
],
value='total_cases'),
dcc.Graph(id='bar_plot')
])

@app.callback(Output(component_id='bar_plot', component_property='figure'),
[Input(component_id='dropdown', component_property='value'),
Input('my-date-picker-range', 'start_date'),
Input('my-date-picker-range', 'end_date')
])
def graph_update(dropdown_value, start_date, end_date):
print(dropdown_value)
print(start_date)
print(end_date)
start_date1 = '{}'.format(start_date)
end_date1 = '{}'.format(end_date)

# to get the world subset since the dataset contains multiple locations
worldwide_subset = df.loc[df['location'] == 'World']
if start_date is not None and end_date is not None:
# date_object = date.fromisoformat(start_date)
# print(date_object)
start_date_object = datetime.datetime.strptime(start_date1, '%Y-%m-%d').date()
end_date_object = datetime.datetime.strptime(end_date1, '%Y-%m-%d').date()
print('...............')

mask = (df['date'] > start_date_object) & (df['date'] <= end_date_object)
print(df.loc[mask])
used_df = df.loc[mask]
print(used_df)
else:
used_df = worldwide_subset
fig = go.Figure([go.Scatter(x=used_df['date'], y=df['{}'.format(dropdown_value)], 
line=dict(color='firebrick', width=4))
])
fig.update_layout(title=dropdown_value + ' over time',
xaxis_title='date',
yaxis_title=dropdown_value
)
return fig

if __name__ == '__main__':
app.run_server()

错误跟踪

[2021-12-01 21:12:24,710] ERROR in app: Exception on /_dash-update-component [POST]
Traceback (most recent call last):
File "/home/sithijathewahettige/PycharmProjects/djangoProject/dash/venv/lib/python3.8/site-packages/flask/app.py", line 2073, in wsgi_app
response = self.full_dispatch_request()
File "/home/sithijathewahettige/PycharmProjects/djangoProject/dash/venv/lib/python3.8/site-packages/flask/app.py", line 1518, in full_dispatch_request
rv = self.handle_user_exception(e)
File "/home/sithijathewahettige/PycharmProjects/djangoProject/dash/venv/lib/python3.8/site-packages/flask/app.py", line 1516, in full_dispatch_request
rv = self.dispatch_request()
File "/home/sithijathewahettige/PycharmProjects/djangoProject/dash/venv/lib/python3.8/site-packages/flask/app.py", line 1502, in dispatch_request
return self.ensure_sync(self.view_functions[rule.endpoint])(**req.view_args)
File "/home/sithijathewahettige/PycharmProjects/djangoProject/dash/venv/lib/python3.8/site-packages/dash/dash.py", line 1336, in dispatch
response.set_data(func(*args, outputs_list=outputs_list))
File "/home/sithijathewahettige/PycharmProjects/djangoProject/dash/venv/lib/python3.8/site-packages/dash/_callback.py", line 151, in add_context
output_value = func(*func_args, **func_kwargs)  # %% callback invoked %%
File "dimi2.py", line 86, in graph_update
mask = (df['date'] > start_date_object) & (df['date'] <= end_date_object)
File "/home/sithijathewahettige/PycharmProjects/djangoProject/dash/venv/lib/python3.8/site-packages/pandas/core/ops/common.py", line 69, in new_method
return method(self, other)
File "/home/sithijathewahettige/PycharmProjects/djangoProject/dash/venv/lib/python3.8/site-packages/pandas/core/arraylike.py", line 48, in __gt__
return self._cmp_method(other, operator.gt)
File "/home/sithijathewahettige/PycharmProjects/djangoProject/dash/venv/lib/python3.8/site-packages/pandas/core/series.py", line 5502, in _cmp_method
res_values = ops.comparison_op(lvalues, rvalues, op)
File "/home/sithijathewahettige/PycharmProjects/djangoProject/dash/venv/lib/python3.8/site-packages/pandas/core/ops/array_ops.py", line 284, in comparison_op
res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues)
File "/home/sithijathewahettige/PycharmProjects/djangoProject/dash/venv/lib/python3.8/site-packages/pandas/core/ops/array_ops.py", line 73, in comp_method_OBJECT_ARRAY
result = libops.scalar_compare(x.ravel(), y, op)
File "pandas/_libs/ops.pyx", line 107, in pandas._libs.ops.scalar_compare
TypeError: '>' not supported between instances of 'str' and 'datetime.date'

获得TypeError: '>' not supported between instances of 'str' and 'datetime.date'的错误意味着您应该先将字符串(str(转换为date,然后才能对其进行筛选。这里有一个例子:

import pandas as pd
df = pd.DataFrame({
'date':['2020-01-01','2020-02-02','2020-03-03','2021-12-01']})
print(df)

数据帧:

date
0  2020-01-01
1  2020-02-02
2  2020-03-03
3  2021-12-01

如果你是type(df['date'][0]),那么你会看到它是str,但如果是pd.to_datetime(df['date'])[0],那么类型是Timestamp('2020-01-01 00:00:00')

执行pd.to_datetime(df['date'])将输出:

0   2020-01-01
1   2020-02-02
2   2020-03-03
3   2021-12-01
Name: date, dtype: datetime64[ns]

你可以过滤日期做:

df[(df['date'] > '2020-01-20') & (df['date'] < '2021-03-20')]

输出:

date
1  2020-02-02
2  2020-03-03

最新更新