在尝试访问DataFrame列的模式时出现KeyError



我试图运行以下代码:

import time
import pandas as pd
import numpy as np
CITY_DATA = {'chicago': 'chicago.csv',
'new york city': 'new_york_city.csv',
'washington': 'washington.csv'}
def get_filters():
"""
Asks user to specify a city, month, and day to analyze.
Returns:
(str) city - name of the city to analyze
(str) month - name of the month to filter by, or "all" to apply no month filter
(str) day - name of the day of week to filter by, or "all" to apply no day filter
"""
print('Hello! Let's explore some US bikeshare data!')
# get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
while True:
city = input('Which city you would like to explore : "chicago" , "new york city" , or "washington"  :' )
if city not in ('chicago', 'new york city', 'washington'):
print(" You entered wrong choice , please try again")
continue
else:
break
# get user input for month (all, january, february, ... , june)
while True:
month = input('Enter "all" for all data or chose  a month : "january" , "february" , "march", "april" , "may" or "june "  :')
if month not in ("all", "january", "february", "march", "april", "may", "june"):
print(" You entered wrong choice , please try again")
continue
else:
break
# get user input for day of week (all, monday, tuesday, ... sunday)
while True:
day = input('Enter "all" for all days or chose a day : "saturday", "sunday", "monday", "tuesday", "wednesday", "thursday", "friday":  ')
if day not in ("all","saturday", "sunday", "monday", "tuesday", "wednesday", "thursday", "friday"):
print(" You entered wrong choice , please try again")
continue
else:
break
print('-'*60)
return city, month, day

def load_data(city, month, day):
"""
Loads data for the specified city and filters by month and day if applicable.
Args:
(str) city - name of the city to analyze
(str) month - name of the month to filter by, or "all" to apply no month filter
(str) day - name of the day of week to filter by, or "all" to apply no day filter
Returns:
df - Pandas DataFrame containing city data filtered by month and day
"""
df = pd.read_csv(CITY_DATA[city])
# convert the Start Time column to datetime
df['Start Time'] = pd.to_datetime(df['Start Time'])
# extract month , day of week , and hour from Start Time to new columns
df['month'] = df['Start Time'].dt.month
df['day_of_week'] = df['Start Time'].dt.day_name
df['hour'] = df['Start Time'].dt.hour
# filter by month if applicable
if month != 'all':
# use the index of the month_list to get the corresponding int
months = ['january', 'february', 'march', 'april', 'may', 'june']
month = months.index(month) + 1
# filter by month to create the new dataframe
df = df[df['month'] == month]
# filter by day of week if applicable
if day != 'all':
# filter by day of week to create the new dataframe
df = df[df['day_of_week'] == day.title()]
return df


def time_stats(df):
"""Displays statistics on the most frequent times of travel."""
print('nCalculating The Most Frequent Times of Travel...n')
start_time = time.time()
# display the most common month
popular_month = df['month'].mode()[0]
print('n The most popular month is  : n', popular_month)

# display the most common day of week
popular_day = df['day_of_week'].mode()[0]
print('n The most popular day of the week is  :  n', str(popular_day))
# display the most common start hour
popular_hour = df['hour'].mode()[0]
print('n The most popular hour of the day is :n ', popular_hour)
print("nThis took %s seconds.n" % (time.time() - start_time))
print('-'*60)

def station_stats(df):
"""Displays statistics on the most popular stations and trip."""
print('nCalculating The Most Popular Stations and Trip...n')
start_time = time.time()
# display most commonly used start station
start_station = df['Start Station'].value_counts().idxmax()
print('n The most commonly used start station is :  n', start_station)

# display most commonly used end station
end_station = df['End Station'].value_counts().idxmax()
print('nThe most commonly used end station is:  n', end_station)

# display most frequent combination of start station and end station trip
combination = df.groupby(['Start Station','End Station']).value_counts().idxmax()
print('nThe most frequent combination of start station and end station are:  n', combination)

print("nThis took %s seconds." % (time.time() - start_time))
print('-'*40)

def trip_duration_stats(df):
"""Displays statistics on the total and average trip duration."""
start_time = time.time()
travel_time = sum(df['Trip Duration'])
print('Total travel time:', travel_time / 86400, " Days")
# display total travel time
total_time = sum(df['Trip Duration'])
print('nThe total travel time is {} seconds: n', total_time)

# display mean travel time
mean_time = df['Trip Duration'].mean()
print('n The average travel time is n', mean_time)

print("nThis took %s seconds." % (time.time() - start_time))
print('-'*40)

def user_stats(df):
"""Displays statistics on bikeshare users."""
print('nCalculating User Stats...n')
start_time = time.time()
# TO DO: Display counts of user types
user_types = df['User Type'].value_counts()
#print(user_types)
print('User Types:n', user_types)
# TO DO: Display counts of gender


print("nThis took %s seconds." % (time.time() - start_time))
print('-'*40)
def main():
while True:
city, month, day = get_filters()
df = load_data(city, month, day)
time_stats(df)
station_stats(df)
trip_duration_stats(df)
user_stats(df)
restart = input('nWould you like to restart? Enter yes or no.n')
if restart.lower() != 'yes':
break

if __name__ == "__main__":
main()

和我收到以下错误,有人能帮助吗错误:

> Traceback (most recent call last):
File "C:UsersDELLPycharmProjectsProfessionalvenvLibsite-packagespandascoreindexesrange.py", line 391, in get_loc
return self._range.index(new_key)
^^^^^^^^^^^^^^^^^^^^^^^^^^
ValueError: 0 is not in range
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:UsersDELLPycharmProjectsProfessionalBikeshare.py", line 203, in <module>
main()
File "C:UsersDELLPycharmProjectsProfessionalBikeshare.py", line 192, in main
time_stats(df)
File "C:UsersDELLPycharmProjectsProfessionalBikeshare.py", line 100, in time_stats
popular_month = df['month'].mode()[0]
~~~~~~~~~~~~~~~~~~^^^
File "C:UsersDELLPycharmProjectsProfessionalvenvLibsite-packagespandascoreseries.py", line 981, in __getitem__
Calculating The Most Frequent Times of Travel...
return self._get_value(key)
^^^^^^^^^^^^^^^^^^^^
File "C:UsersDELLPycharmProjectsProfessionalvenvLibsite-packagespandascoreseries.py", line 1089, in _get_value
loc = self.index.get_loc(label)
^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:UsersDELLPycharmProjectsProfessionalvenvLibsite-packagespandascoreindexesrange.py", line 393, in get_loc
raise KeyError(key) from err
KeyError: 0

我希望过滤pandas DataFrame以返回月份、星期几和小时来执行一些统计。

KeyError表示该密钥无效,因为它不存在。在这种情况下,在尝试获取第一模式时获得KeyError的一个原因是当dataframe中的列'month'为空时,因此mode()返回空集合,因此在尝试获取其第一个元素时获得KeyError: 0

要避免这种情况,您可以替换:

popular_month = df['month'].mode()[0]

:

try:
# try to get first mode of column 'month'
popular_month = df['month'].mode()[0]
except KeyError:
# if there's no data on column 'month'
popular_month = "unknown"

因为如果'month'列上没有数据,那么试图获取它的模式是没有意义的。

关于处理异常的更多信息:https://docs.python.org/3/tutorial/errors.html#handling-exceptions

当我尝试(不使用过滤器)通过选择"所有";在第二个和第三个输入中,我得到以下结果:

Calculating The Most Frequent Times of Travel...

最流行的月份是:6

最受欢迎的一天是:
绑定方法PandasDelegate._add_delegate_accessors. _create_delegator_method..pandas.core.indexes.accessors. F <</p> DatetimeProperties对象在0x0000022B7CD5E890>>

一天中最受欢迎的时间是:17日

这需要0.0260775089263916秒。


计算最受欢迎的站点和行程…

最常用的起跑站是:
Streeter Dr &大Ave

最常用的端站是:
Streeter Dr &大Ave

最常见的起始站和结束站的组合是:
('2112 W Peterson Ave', '2112 W Peterson Ave', 1064651, Timestamp('2017-06-02 07:59:13'), '2017-06-02 08:25:42', 1589, 'Subscriber', 'Female', 1963.0, 6,

相关内容

最新更新