从一个包含关键字年份和值列表的txt文件创建一个字典



我有下面的txt文件

2014,STAR,105,234
2014,COMET,2877,1426
2014,ASTEROID,73,68
2014,PLANET,134,143
2014,"ACTIVE STARS,DEADSTARS",166,125
2015,STAR,69,24
2015,ASTEROID,59,32
2015,PLANET,42,13
2015,STAR,79,33
2015,BLACK HOLES,8,3
2015,"ACTIVE STARS,DEADSTARS",19,16
2015,ASTEROID,12,0
2016,STAR,120,47
2016,"ACTIVE STARS,DEADSTARS",4,1
2016,PLANET,14,12
2016,ASTEROID,21,1

我需要写一个函数来读取文件,并创建一个字典,其中包含年份和值,一个列表,其中包含txt文件中年份的3列和4列。

def file_read(fname):
with open(fname,'r') as document:

zip_lists=[]
dictionary1 ={}
tempYear=[]
tempSum3=[]
tempSum4=[]
for line in document:
#print(line)
line=line.strip()
alist=line.split(',')
tempYear.append(alist[0])
#print(alist[2])
if len(alist)==5:
tempSum3.append(int(alist[3]))
tempSum4.append(int(alist[4]))

else:
tempSum3.append(int(alist[2]))
tempSum4.append(int(alist[3]))

zip_lists=zip(tempSum3,tempSum4)

dictionary1=dict(zip(tempYear,zip_lists))

return dictionary1

我不知道你到底想要什么。也许这对你有用:

import pandas as pd
def file_read(fname):
df = pd.read_csv(fname, sep=',')
df.columns = ['Year', 'col2', 'col3', 'col4']
sum_col3 = df.groupby('Year')['col3'].sum().to_list()
sum_col4 = df.groupby('Year')['col4'].sum().to_list()
n1 = zip(sum_col3, sum_col4)
n2 = zip(df['Year'].unique(), n1)
dictionary = {x[0]: list(x[1]) for x in n2}
return dictionary

输出:

{2014: [3250, 1762], 2015: [288, 121], 2016: [159, 61]}

您可以尝试以下操作:

import csv 
from itertools import groupby
def conv(s):
try:
return int(s)
except ValueError:
return s

result={}
with open(ur_file) as f_in:
r=csv.reader(f_in)
for k,g in groupby(r, key=lambda x:x[0]):
sl=list(g)
result[k]=(sum(conv(e[2]) for e in sl), sum(conv(e[3]) for e in sl))
>>> result
{2014: (3355, 1996), 2015: (288, 121), 2016: (159, 61)}

最新更新