我试图运行以下代码:
import time
import pandas as pd
import numpy as np
CITY_DATA = {'chicago': 'chicago.csv',
'new york city': 'new_york_city.csv',
'washington': 'washington.csv'}
def get_filters():
"""
Asks user to specify a city, month, and day to analyze.
Returns:
(str) city - name of the city to analyze
(str) month - name of the month to filter by, or "all" to apply no month filter
(str) day - name of the day of week to filter by, or "all" to apply no day filter
"""
print('Hello! Let's explore some US bikeshare data!')
# get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
while True:
city = input('Which city you would like to explore : "chicago" , "new york city" , or "washington" :' )
if city not in ('chicago', 'new york city', 'washington'):
print(" You entered wrong choice , please try again")
continue
else:
break
# get user input for month (all, january, february, ... , june)
while True:
month = input('Enter "all" for all data or chose a month : "january" , "february" , "march", "april" , "may" or "june " :')
if month not in ("all", "january", "february", "march", "april", "may", "june"):
print(" You entered wrong choice , please try again")
continue
else:
break
# get user input for day of week (all, monday, tuesday, ... sunday)
while True:
day = input('Enter "all" for all days or chose a day : "saturday", "sunday", "monday", "tuesday", "wednesday", "thursday", "friday": ')
if day not in ("all","saturday", "sunday", "monday", "tuesday", "wednesday", "thursday", "friday"):
print(" You entered wrong choice , please try again")
continue
else:
break
print('-'*60)
return city, month, day
def load_data(city, month, day):
"""
Loads data for the specified city and filters by month and day if applicable.
Args:
(str) city - name of the city to analyze
(str) month - name of the month to filter by, or "all" to apply no month filter
(str) day - name of the day of week to filter by, or "all" to apply no day filter
Returns:
df - Pandas DataFrame containing city data filtered by month and day
"""
df = pd.read_csv(CITY_DATA[city])
# convert the Start Time column to datetime
df['Start Time'] = pd.to_datetime(df['Start Time'])
# extract month , day of week , and hour from Start Time to new columns
df['month'] = df['Start Time'].dt.month
df['day_of_week'] = df['Start Time'].dt.day_name
df['hour'] = df['Start Time'].dt.hour
# filter by month if applicable
if month != 'all':
# use the index of the month_list to get the corresponding int
months = ['january', 'february', 'march', 'april', 'may', 'june']
month = months.index(month) + 1
# filter by month to create the new dataframe
df = df[df['month'] == month]
# filter by day of week if applicable
if day != 'all':
# filter by day of week to create the new dataframe
df = df[df['day_of_week'] == day.title()]
return df
def time_stats(df):
"""Displays statistics on the most frequent times of travel."""
print('nCalculating The Most Frequent Times of Travel...n')
start_time = time.time()
# display the most common month
popular_month = df['month'].mode()[0]
print('n The most popular month is : n', popular_month)
# display the most common day of week
popular_day = df['day_of_week'].mode()[0]
print('n The most popular day of the week is : n', str(popular_day))
# display the most common start hour
popular_hour = df['hour'].mode()[0]
print('n The most popular hour of the day is :n ', popular_hour)
print("nThis took %s seconds.n" % (time.time() - start_time))
print('-'*60)
def station_stats(df):
"""Displays statistics on the most popular stations and trip."""
print('nCalculating The Most Popular Stations and Trip...n')
start_time = time.time()
# display most commonly used start station
start_station = df['Start Station'].value_counts().idxmax()
print('n The most commonly used start station is : n', start_station)
# display most commonly used end station
end_station = df['End Station'].value_counts().idxmax()
print('nThe most commonly used end station is: n', end_station)
# display most frequent combination of start station and end station trip
combination = df.groupby(['Start Station','End Station']).value_counts().idxmax()
print('nThe most frequent combination of start station and end station are: n', combination)
print("nThis took %s seconds." % (time.time() - start_time))
print('-'*40)
def trip_duration_stats(df):
"""Displays statistics on the total and average trip duration."""
start_time = time.time()
travel_time = sum(df['Trip Duration'])
print('Total travel time:', travel_time / 86400, " Days")
# display total travel time
total_time = sum(df['Trip Duration'])
print('nThe total travel time is {} seconds: n', total_time)
# display mean travel time
mean_time = df['Trip Duration'].mean()
print('n The average travel time is n', mean_time)
print("nThis took %s seconds." % (time.time() - start_time))
print('-'*40)
def user_stats(df):
"""Displays statistics on bikeshare users."""
print('nCalculating User Stats...n')
start_time = time.time()
# TO DO: Display counts of user types
user_types = df['User Type'].value_counts()
#print(user_types)
print('User Types:n', user_types)
# TO DO: Display counts of gender
print("nThis took %s seconds." % (time.time() - start_time))
print('-'*40)
def main():
while True:
city, month, day = get_filters()
df = load_data(city, month, day)
time_stats(df)
station_stats(df)
trip_duration_stats(df)
user_stats(df)
restart = input('nWould you like to restart? Enter yes or no.n')
if restart.lower() != 'yes':
break
if __name__ == "__main__":
main()
和我收到以下错误,有人能帮助吗错误:
> Traceback (most recent call last):
File "C:UsersDELLPycharmProjectsProfessionalvenvLibsite-packagespandascoreindexesrange.py", line 391, in get_loc
return self._range.index(new_key)
^^^^^^^^^^^^^^^^^^^^^^^^^^
ValueError: 0 is not in range
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:UsersDELLPycharmProjectsProfessionalBikeshare.py", line 203, in <module>
main()
File "C:UsersDELLPycharmProjectsProfessionalBikeshare.py", line 192, in main
time_stats(df)
File "C:UsersDELLPycharmProjectsProfessionalBikeshare.py", line 100, in time_stats
popular_month = df['month'].mode()[0]
~~~~~~~~~~~~~~~~~~^^^
File "C:UsersDELLPycharmProjectsProfessionalvenvLibsite-packagespandascoreseries.py", line 981, in __getitem__
Calculating The Most Frequent Times of Travel...
return self._get_value(key)
^^^^^^^^^^^^^^^^^^^^
File "C:UsersDELLPycharmProjectsProfessionalvenvLibsite-packagespandascoreseries.py", line 1089, in _get_value
loc = self.index.get_loc(label)
^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:UsersDELLPycharmProjectsProfessionalvenvLibsite-packagespandascoreindexesrange.py", line 393, in get_loc
raise KeyError(key) from err
KeyError: 0
我希望过滤pandas DataFrame以返回月份、星期几和小时来执行一些统计。
KeyError
表示该密钥无效,因为它不存在。在这种情况下,在尝试获取第一模式时获得KeyError
的一个原因是当dataframe中的列'month'
为空时,因此mode()返回空集合,因此在尝试获取其第一个元素时获得KeyError: 0
。
要避免这种情况,您可以替换:
popular_month = df['month'].mode()[0]
:
try:
# try to get first mode of column 'month'
popular_month = df['month'].mode()[0]
except KeyError:
# if there's no data on column 'month'
popular_month = "unknown"
因为如果'month'列上没有数据,那么试图获取它的模式是没有意义的。
关于处理异常的更多信息:https://docs.python.org/3/tutorial/errors.html#handling-exceptions
当我尝试(不使用过滤器)通过选择"所有";在第二个和第三个输入中,我得到以下结果:
Calculating The Most Frequent Times of Travel...
最流行的月份是:6
最受欢迎的一天是:
绑定方法PandasDelegate._add_delegate_accessors. _create_delegator_method..pandas.core.indexes.accessors. F <</p>
DatetimeProperties对象在0x0000022B7CD5E890>>
一天中最受欢迎的时间是:17日
这需要0.0260775089263916秒。
计算最受欢迎的站点和行程…
最常用的起跑站是:
Streeter Dr &大Ave
最常用的端站是:
Streeter Dr &大Ave
最常见的起始站和结束站的组合是:
('2112 W Peterson Ave', '2112 W Peterson Ave', 1064651, Timestamp('2017-06-02 07:59:13'), '2017-06-02 08:25:42', 1589, 'Subscriber', 'Female', 1963.0, 6,相关内容
最新更新
- 如何在启动GCP VM实例时使用自定义python命令调度该实例?
- 图像消失时,我选择一个文件与Flet Python
- 是否有一种方法允许CORS处理飞行前响应的自定义标头
- OSError: [Errno 57] Socket is not connected (python macos中的S
- 使用lag()或类似的方法,具有开始值和结束值的帐户滚动
- 我如何重写子类中重写超类的函数依次(Python)?
- 无法使用WooCommerce API将产品添加到相应的类别/子类别
- 用户登录到应用程序注册时Azure AD管理员同意错误
- Unity 3d:在不知道我将使用的确切预制的情况下,我如何在运行时启动预制?
- c -构建简单shell时文件重定向的问题
- AWS Lambda实际上记录到控制台以外的任何本地接收器(因此是CloudWatch)吗?
- 我如何在Django中为mymyy输入一个自定义用户模型管理器?
- discord.js v13交互按钮删除原始消息
- 在Flutter中默认禁用BlocBuilder中的TextButton
- 如果数据已经被获取,如何防止Redux工具包中的加载状态
- Javascript:在深度嵌套的数组和对象中查找匹配的属性值
- 将本地时间转换为用户首选时区,将用户首选时区转换为GMT
- 为什么 AWS Web 应用程序的前端和后端在这些关系图中直接连接?
- 是否有一个顺序Java正则表达式?
- 如何向右移动一个字符串n个字符
- 我应该从存储库返回任务<IEnumerable<T>>还是IAsyncEnumerable<T>?
- Python处理程序错误-没有足够的值来解包
- 使用 javascript 获取最接近和最高的 id
- 如何利用向量化技术提高MATLAB程序的效率
- 如何将PostgreSQL查询转换为LINQ或lambda表达式
- Python mysql-connector-python:如何使用mysql-connector-python从终端连
- 新的json数据不显示在android
- 如果不同时更改当前头像,则无法更新用户详细信息
- Kotlin HTTP GET 数据并插入到文本视图中
- 滑块在移动设备上不跟随手指
热门标签:
javascript python java c# php android html jquery c++ css ios sql mysql arrays asp.net json python-3.x ruby-on-rails .net sql-server django objective-c excel regex ruby linux ajax iphone xml vba spring asp.net-mvc database wordpress string postgresql wpf windows xcode bash git oracle list vb.net multithreading eclipse algorithm macos powershell visual-studio image forms numpy scala function api selenium