将数据从嵌套的字典和扁平列表或多对多表加载到sql中



为了简化,我有如下列表:

lst = [ 
{
“person_id”: HZT998, “name”: ‘john’, “skills”: [‘python’, ‘sql’, ‘r’], 
“extras”: {“likes_swimming”: False, “likes_cooking”: True}},
{
“person_id”: HTY954, “name”: ‘peter, “skills”: [‘python’, ‘r’, ‘c#’], 
“extras”: {“likes_swimming”: True, “likes_cooking”: False}}
]

我想将它们插入到SQL表中,如下所示:

人表:

likees_swimmingtbody> <<tr>2
person_inner_id (PK)person_idnamelikees_cooking
1HZT998约翰
HTY954彼得真正

假设列表中的键值&;lst&;(如。"person_id"等)总是存在,您只需要将这3个表的复杂列表修改为规范化列表:

lst = [ 
{
"person_id": "HZT998", "name": "john", "skills": ["python", "sql", "r"], "extras": {"likes_swimming": False, "likes_cooking": True}
}, 
{
"person_id": "HTY954", "name": "peter", "skills": ["python", "r", "c#"], "extras": {"likes_swimming": True, "likes_cooking": False}
} 
]
data_for_first_table = []    # saves each user dictonary without 'skills'
data_dict_first_table = {}   # user-dictonary-instance without 'skills'
data_for_second_table = []   # save each 'skill' separately
data_for_third_table = []    # save each skill-user dictonary matches
data_dict_third_table = {}   # skill-user-dictonary-instance

######################################
# modify complex List for SQL-Qeruies
######################################
for entry in lst:
for key in entry:
if key == "skills":
for skills in entry[key]:
#print(skills)
data_for_second_table.append(skills)
# Third Table - "Match Table"
data_dict_third_table[skills] = entry["person_id"]
elif key == "extras":
for extra in entry[key]:
#print(extra + ": " + str(entry[key][extra]))
data_dict_first_table[extra] = entry[key][extra]        
else:
#print(key + ": " + entry[key])
data_dict_first_table[key] = entry[key]

# store user-dictonary-instance without 'skills'
dict_copy  = data_dict_first_table.copy()
data_for_first_table.append(dict_copy)
# store skill-user-dictonary-instance
dict_copy  = data_dict_third_table.copy()
data_for_third_table.append(dict_copy)

### remove duplicates from 2nd list ###
data_for_second_table = list(dict.fromkeys(data_for_second_table))

print("####### TEST1 #########")
print(data_for_first_table)
print("####### TEST2 #########")
print(data_for_second_table)
print("####### TEST3 #########")
print(data_for_third_table)

…为了避免重复的数据条目(例如:设置People.person_idSkills.skillto主键.然后,您只需要执行sql查询:

######################
# Create SQL-Queries
######################
print("/* #### SQL-TEST #### */")
# INSERT(S)/UPDATE(S) for "Peaople"-Table
for entry in data_for_first_table:
raw_sql1 = "INSERT INTO People ( " + str([k for k in entry]).replace("[","").replace("]","").replace("'","") + " ) VALUES ( " + str([str(entry[k]) for k in entry]).replace("[","").replace("]","") + " ) ON DUPLICATE KEY UPDATE " + str([ k + " = " + str(entry[k]) for k in entry if k != 'person_id'] ).replace("[","").replace("]","").replace("'","").replace("= ","= '").replace(",","',") + "';"
print(raw_sql1)
# ONLY INSERT(S) for "Skills"-Table
for entry in data_for_second_table:
raw_sql2 = "INSERT IGNORE INTO Skills ( skill ) VALUES ( '" + entry + "' );"
print(raw_sql2)
# ONLY INSERT(S) for "Skills_People"-Table  
for entry in data_for_third_table:
for multiple_entry in entry:
raw_sql3= "INSERT IGNORE INTO Skills_People ( person_id, skill_id ) VALUES ( '" + entry[multiple_entry]  + "', '" + multiple_entry + "' );"
print(raw_sql3)
注意:这只适用于table-column-names完全匹配list-key-names.此外,键和值(Type = 'CHAR')通过简单的python字符串操作".replace()"
/* #### SQL-TEST #### */
INSERT INTO People ( person_id, name, likes_swimming, likes_cooking ) VALUES ( 'HZT998', 'john', 'False', 'True' ) ON DUPLICATE KEY UPDATE name = 'john', likes_swimming = 'False', likes_cooking = 'True';
INSERT INTO People ( person_id, name, likes_swimming, likes_cooking ) VALUES ( 'HTY954', 'peter', 'True', 'False' ) ON DUPLICATE KEY UPDATE name = 'peter', likes_swimming = 'True', likes_cooking = 'False';
INSERT IGNORE INTO Skills ( skill ) VALUES ( 'python' );
INSERT IGNORE INTO Skills ( skill ) VALUES ( 'sql' );
INSERT IGNORE INTO Skills ( skill ) VALUES ( 'r' );
INSERT IGNORE INTO Skills ( skill ) VALUES ( 'c#' );
INSERT IGNORE INTO Skills_People ( person_id, skill_id ) VALUES ( 'HZT998', 'python' );
INSERT IGNORE INTO Skills_People ( person_id, skill_id ) VALUES ( 'HZT998', 'sql' );
INSERT IGNORE INTO Skills_People ( person_id, skill_id ) VALUES ( 'HZT998', 'r' );
INSERT IGNORE INTO Skills_People ( person_id, skill_id ) VALUES ( 'HTY954', 'python' );
INSERT IGNORE INTO Skills_People ( person_id, skill_id ) VALUES ( 'HZT998', 'sql' );
INSERT IGNORE INTO Skills_People ( person_id, skill_id ) VALUES ( 'HTY954', 'r' );
INSERT IGNORE INTO Skills_People ( person_id, skill_id ) VALUES ( 'HTY954', 'c#' );

最新更新