使用Python聚合CSV记录



我的CSV文件如下所示:

Mike,6
Mike,5
Bill,3
Bill,1
Sally,4
Sally,2

我想修改一下,这样计数就可以按名称相加,看起来像这样:

Mike,11
Bill,4
Sally,6

查看pandas库。

df = pd.read_csv('data.csv')
df_grouped = df.groupby('name').sum()

更多详情点击此处

http://pandas.pydata.org/pandas-docs/stable/groupby.html

def records_from_file(fname, column_names):
    with open(fname,'r') as input_handler:
    for line in input_handler:
        line = line.strip('n') #strip out newline
        x={} #this creates x as an empty dictionary
        for i in range(len(column_names)):
            x[column_names[i]] = line.split(",")[i]  #append each key and value to the dictionary
        yield x
record_stream = records_from_file('names.csv',['name', 'count'])
class Object:                   #Object to store unique data
    def __init__(self, name, count):
        self.name = name
        self.count = count
rownum = 0 #Row Number currently iterating over
list = []  #List to store objects
def checkList(name, count):
    for object in list:  #Iterate through list        
        count=int(count)
        if object.name == name:  #Check if name and produce combination exists
            object.count += int(count) #If it does add to amount variable and break out
            return
    newObject = Object(name, count) #Create a new object with new name, produce, and amount
    list.append(newObject)  #Add to list and break out
for record in record_stream:  #Iterate through all the rows
    name = record['name']  #Store name
    count = int(record['count']) #Store count
    checkList(name, count)
rownum += 1
for each in list: #Print out result
    print each.name,each.count
import pandas as pd
df = pd.read_csv('names.csv')
df.columns = ['name','count']
df_grouped = df.groupby('name').sum()
print df_grouped

最新更新