我正在尝试导入一个csv文件,并将值分配给不同的数据类。在此之后,我必须对数据类进行各种分析,如均值、模式、中值等。
我遇到的问题是以一种我以后可以使用的方式导入它们。我写了以下代码:
from dataclasses import dataclass, field
from typing import List
import csv
from csv import DictReader
@dataclass
class Grades:
class_course: str
class_grades: int
@dataclass
class Student:
name: str
grades: List[Grades] = field(default_factory=list)
def create_student_db():
s = Student([],[])
courses = []
with open("Sample Data/Sample2.csv") as read_obj: # pass the file object to DictReader() to get the DictReader object
csv_dict_reader = DictReader(read_obj)
csv_reader = csv.reader(read_obj, delimiter = ",") # get column names from a csv file
column_names = csv_dict_reader.fieldnames
for col in column_names:
if col == "Student Name":
continue
else:
courses.append(col) # Create list of column names
for row in csv_reader: # For each row in the csv file
n = row[0] # The first value is the name
s.name.append(n) # Append the name to the name of the student class
i = 0 # For iteration through the values on the row
while i < len(row)-1: # While i is less than the length of the row
g = Grades(courses[i], row[i + 1]) # Set values in the Grades class to the Course name, and the row value + 1
s.grades.append(g) # Append the values for the Grades to the Grades in Student
i += 1
return s
a = create_student_db()
print(a)
输入文件如下(csv格式(:
Student Name,Course_1,Course_2,Course_3,Course_4
Johnny Rotten,10,20,20,40
Sid Vicious,90,50,30,10
Lars Larsson,90,10,30,60
John Jameson,90,90,90,90
输出如下:
Student(name=['Johnny Rotten', 'Sid Vicious', 'Lars Larsson', 'John
Jameson'], grades=[Grades(class_course='Course_1', class_grades='10'),
Grades(class_course='Course_2', class_grades='20'),
Grades(class_course='Course_3', class_grades='20'),
Grades(class_course='Course_4', class_grades='40'),
Grades(class_course='Course_1', class_grades='90'),
Grades(class_course='Course_2', class_grades='50'),
Grades(class_course='Course_3', class_grades='30'),
Grades(class_course='Course_4', class_grades='10'),
Grades(class_course='Course_1', class_grades='90'),
Grades(class_course='Course_2', class_grades='10'),
Grades(class_course='Course_3', class_grades='30'),
Grades(class_course='Course_4', class_grades='60'),
Grades(class_course='Course_1', class_grades='90'),
Grades(class_course='Course_2', class_grades='90'),
Grades(class_course='Course_3', class_grades='90'),
Grades(class_course='Course_4', class_grades='90')])
显然,这是一次学术练习,但我在听讲座后遇到了问题。
有人能建议我如何完成数据类,使其具有完整的意义,并且我可以提取单个学生的意思、模式等值吗?
您就快到了。让你困惑的是,你把学生的名字和成绩放在一个学生对象里,这毫无意义。
- 有一个
Student
的列表,其中每个Student
将有一个name
和一个grades
- 我建议将类名从
Grade
更改为Course
属性CCD_ 7和CCD_。从grades
到的属性名称学生中的courses
。这样更容易理解。学生已注册参加他们将要参加的课程等级 - 此外,请记住将成绩转换为
int
。那样你可以稍后进行计算
from dataclasses import dataclass, field
from typing import List
import csv
from csv import DictReader
@dataclass
class Course:
name: str
grade: int
@dataclass
class Student:
name: str
courses: List[Course] = field(default_factory=list)
def create_student_db():
students = []
courses = []
with open("Sample Data/Sample2.csv") as read_obj: # pass the file object to DictReader() to get the DictReader object
csv_dict_reader = DictReader(read_obj)
csv_reader = csv.reader(read_obj, delimiter = ",") # get column names from a csv file
column_names = csv_dict_reader.fieldnames
for col in column_names:
if col == "Student Name":
continue
else:
courses.append(col) # Create list of column names
for row in csv_reader: # For each row in the csv file
n = row[0] # The first value is the name
s = Student(name=n) # Append the name to the name of the student class
students.append(s)
i = 0 # For iteration through the values on the row
while i < len(row)-1: # While i is less than the length of the row
g = Course(courses[i], int(row[i + 1])) # Set values in the Grades class to the Course name, and the row value + 1
s.courses.append(g) # Append the values for the Grades to the Grades in Student
i += 1
return students
students = create_student_db()
print(students)
输出:
[Student(name='Johnny Rotten', courses=[Course(name='Course_1', grade=10), Course(name='Course_2', grade=20), Course(name='Course_3', grade=20), Course(name='Course_4', grade=40)]),
Student(name='Sid Vicious', courses=[Course(name='Course_1', grade=90), Course(name='Course_2', grade=50), Course(name='Course_3', grade=30), Course(name='Course_4', grade=10)]),
Student(name='Lars Larsson', courses=[Course(name='Course_1', grade=90), Course(name='Course_2', grade=10), Course(name='Course_3', grade=30), Course(name='Course_4', grade=60)]),
Student(name='John Jameson', courses=[Course(name='Course_1', grade=90), Course(name='Course_2', grade=90), Course(name='Course_3', grade=90), Course(name='Course_4', grade=90)])]
正如@martineau所建议的,您可以使用statitiscs
模块中的函数。
示例:
如果你想知道学生所有课程的平均成绩。
import statistics as st
st.mean(map(lambda c: c.grade, students[0].courses))
输出:
22.5
我不清楚你的学生数据库应该是什么格式,所以我只是把它列为Student
实例的列表——我认为在完成剩下的练习时,它很容易与statistics
模块结合使用。
另一种选择是将学生的字典映射到课程名称和相关成绩的子字典。
import csv
from csv import DictReader
from dataclasses import dataclass, field
from pprint import pprint, pp
from typing import List
INPUT_FILEPATH = "Sample2.csv"
@dataclass
class Grade:
class_course: str
class_grade: int
@dataclass
class Student:
name: str
grades: List[Grade] = field(default_factory=list)
def create_studentDB():
students = []
with open(INPUT_FILEPATH, newline='') as read_obj:
csv_reader = csv.reader(read_obj, delimiter=',')
fieldnames = next(csv_reader) # Header row.
courses = fieldnames[1:]
for row in csv_reader:
name = row[0]
grades = [Grade(course, int(grade)) for course, grade in zip(courses, row[1:])]
students.append(Student(name, grades))
return students
studentDB = create_studentDB()
pprint(studentDB)
结果:
[Student(name='Johnny Rotten', grades=[Grade(class_course='Course_1', class_grade=10), Grade(class_course='Course_2', class_grade=20), Grade(class_course='Course_3', class_grade=20), Grade(class_course='Course_4', class_grade=40)]),
Student(name='Sid Vicious', grades=[Grade(class_course='Course_1', class_grade=90), Grade(class_course='Course_2', class_grade=50), Grade(class_course='Course_3', class_grade=30), Grade(class_course='Course_4', class_grade=10)]),
Student(name='Lars Larsson', grades=[Grade(class_course='Course_1', class_grade=90), Grade(class_course='Course_2', class_grade=10), Grade(class_course='Course_3', class_grade=30), Grade(class_course='Course_4', class_grade=60)]),
Student(name='John Jameson', grades=[Grade(class_course='Course_1', class_grade=90), Grade(class_course='Course_2', class_grade=90), Grade(class_course='Course_3', class_grade=90), Grade(class_course='Course_4', class_grade=90)])]