来自多行/列csv的数据类



我正在尝试导入一个csv文件,并将值分配给不同的数据类。在此之后,我必须对数据类进行各种分析,如均值、模式、中值等。

我遇到的问题是以一种我以后可以使用的方式导入它们。我写了以下代码:

from dataclasses import dataclass, field
from typing import List
import csv
from csv import DictReader
@dataclass
class Grades:
class_course: str
class_grades: int
@dataclass
class Student:
name: str
grades: List[Grades] = field(default_factory=list)
def create_student_db():
s = Student([],[])
courses = []
with open("Sample Data/Sample2.csv") as read_obj:       # pass the file object to DictReader() to get the DictReader object
csv_dict_reader = DictReader(read_obj)
csv_reader = csv.reader(read_obj, delimiter = ",")  # get column names from a csv file
column_names = csv_dict_reader.fieldnames
for col in column_names:
if col == "Student Name":
continue
else:
courses.append(col)                         # Create list of column names
for row in csv_reader:                              # For each row in the csv file
n = row[0]                                      # The first value is the name
s.name.append(n)                                # Append the name to the name of the student class
i = 0                                           # For iteration through the values on the row
while i < len(row)-1:                           # While i is less than the length of the row
g = Grades(courses[i], row[i + 1])          # Set values in the Grades class to the Course name, and the row value + 1
s.grades.append(g)                          # Append the values for the Grades to the Grades in Student
i += 1
return s
a = create_student_db()
print(a)

输入文件如下(csv格式(:

Student Name,Course_1,Course_2,Course_3,Course_4
Johnny Rotten,10,20,20,40
Sid Vicious,90,50,30,10
Lars Larsson,90,10,30,60
John Jameson,90,90,90,90

输出如下:

Student(name=['Johnny Rotten', 'Sid Vicious', 'Lars Larsson', 'John
Jameson'], grades=[Grades(class_course='Course_1', class_grades='10'),
Grades(class_course='Course_2', class_grades='20'),
Grades(class_course='Course_3', class_grades='20'),
Grades(class_course='Course_4', class_grades='40'),
Grades(class_course='Course_1', class_grades='90'),
Grades(class_course='Course_2', class_grades='50'),
Grades(class_course='Course_3', class_grades='30'),
Grades(class_course='Course_4', class_grades='10'),
Grades(class_course='Course_1', class_grades='90'),
Grades(class_course='Course_2', class_grades='10'),
Grades(class_course='Course_3', class_grades='30'),
Grades(class_course='Course_4', class_grades='60'),
Grades(class_course='Course_1', class_grades='90'),
Grades(class_course='Course_2', class_grades='90'),
Grades(class_course='Course_3', class_grades='90'),
Grades(class_course='Course_4', class_grades='90')])

显然,这是一次学术练习,但我在听讲座后遇到了问题。

有人能建议我如何完成数据类,使其具有完整的意义,并且我可以提取单个学生的意思、模式等值吗?

您就快到了。让你困惑的是,你把学生的名字和成绩放在一个学生对象里,这毫无意义。

  • 有一个Student的列表,其中每个Student将有一个name和一个grades
  • 我建议将类名从Grade更改为Course属性CCD_ 7和CCD_。从grades到的属性名称学生中的courses。这样更容易理解。学生已注册参加他们将要参加的课程等级
  • 此外,请记住将成绩转换为int。那样你可以稍后进行计算

from dataclasses import dataclass, field
from typing import List
import csv
from csv import DictReader
@dataclass
class Course:
name: str
grade: int
@dataclass
class Student:
name: str
courses: List[Course] = field(default_factory=list)
def create_student_db():
students = []
courses = []
with open("Sample Data/Sample2.csv") as read_obj:       # pass the file object to DictReader() to get the DictReader object
csv_dict_reader = DictReader(read_obj)
csv_reader = csv.reader(read_obj, delimiter = ",")  # get column names from a csv file
column_names = csv_dict_reader.fieldnames
for col in column_names:
if col == "Student Name":
continue
else:
courses.append(col)                         # Create list of column names
for row in csv_reader:                              # For each row in the csv file
n = row[0]                                      # The first value is the name
s = Student(name=n)                               # Append the name to the name of the student class
students.append(s)
i = 0                                           # For iteration through the values on the row
while i < len(row)-1:                           # While i is less than the length of the row
g = Course(courses[i], int(row[i + 1]))          # Set values in the Grades class to the Course name, and the row value + 1
s.courses.append(g)                          # Append the values for the Grades to the Grades in Student
i += 1
return students
students = create_student_db()
print(students)

输出:

[Student(name='Johnny Rotten', courses=[Course(name='Course_1', grade=10), Course(name='Course_2', grade=20), Course(name='Course_3', grade=20), Course(name='Course_4', grade=40)]),
Student(name='Sid Vicious', courses=[Course(name='Course_1', grade=90), Course(name='Course_2', grade=50), Course(name='Course_3', grade=30), Course(name='Course_4', grade=10)]),
Student(name='Lars Larsson', courses=[Course(name='Course_1', grade=90), Course(name='Course_2', grade=10), Course(name='Course_3', grade=30), Course(name='Course_4', grade=60)]),
Student(name='John Jameson', courses=[Course(name='Course_1', grade=90), Course(name='Course_2', grade=90), Course(name='Course_3', grade=90), Course(name='Course_4', grade=90)])]

正如@martineau所建议的,您可以使用statitiscs模块中的函数。

示例:

如果你想知道学生所有课程的平均成绩。

import statistics as st
st.mean(map(lambda c: c.grade, students[0].courses))

输出:

22.5

我不清楚你的学生数据库应该是什么格式,所以我只是把它列为Student实例的列表——我认为在完成剩下的练习时,它很容易与statistics模块结合使用。

另一种选择是将学生的字典映射到课程名称和相关成绩的子字典。

import csv
from csv import DictReader
from dataclasses import dataclass, field
from pprint import pprint, pp
from typing import List

INPUT_FILEPATH = "Sample2.csv"
@dataclass
class Grade:
class_course: str
class_grade: int
@dataclass
class Student:
name: str
grades: List[Grade] = field(default_factory=list)
def create_studentDB():
students = []
with open(INPUT_FILEPATH, newline='') as read_obj:
csv_reader = csv.reader(read_obj, delimiter=',')
fieldnames = next(csv_reader)  # Header row.
courses = fieldnames[1:]
for row in csv_reader:
name = row[0]
grades = [Grade(course,  int(grade)) for course, grade in zip(courses, row[1:])]
students.append(Student(name, grades))
return students
studentDB = create_studentDB()
pprint(studentDB)

结果:

[Student(name='Johnny Rotten', grades=[Grade(class_course='Course_1', class_grade=10), Grade(class_course='Course_2', class_grade=20), Grade(class_course='Course_3', class_grade=20), Grade(class_course='Course_4', class_grade=40)]),
Student(name='Sid Vicious', grades=[Grade(class_course='Course_1', class_grade=90), Grade(class_course='Course_2', class_grade=50), Grade(class_course='Course_3', class_grade=30), Grade(class_course='Course_4', class_grade=10)]),
Student(name='Lars Larsson', grades=[Grade(class_course='Course_1', class_grade=90), Grade(class_course='Course_2', class_grade=10), Grade(class_course='Course_3', class_grade=30), Grade(class_course='Course_4', class_grade=60)]),
Student(name='John Jameson', grades=[Grade(class_course='Course_1', class_grade=90), Grade(class_course='Course_2', class_grade=90), Grade(class_course='Course_3', class_grade=90), Grade(class_course='Course_4', class_grade=90)])]

最新更新