MySQL连接器+Python的内存泄漏



我在Windows 10(i7-9700K CPU 3.60 GHz(ram 16 Go上使用MySQL 5.7.24和Laragon。Python 3.7通过Anaconda+Mysql连接器8.0.18。我正在对我的数据库进行一些查询和更新,该数据库中最大的表有1200万行。

我做了一个循环(使用缓冲和准备好的游标(,每次占用1000行,然后进行计算和更新,然后关闭连接。最后,如果数据库中还有行要更新(我有一个布尔值(,则循环调用自己,每个循环关闭每个游标和连接,并使用新的连接重新启动。

但内存永远不会释放,我的电脑每次都会在四五个小时后冻结。我读过一些关于这个主题的信息,但不清楚是bug还是其他什么。

你的建议是什么?感谢

编辑:这是我的一个代码,他清理了我数据库中的一些马的名字。

import mysql.connector
from mysql.connector import errorcode

def main():
CONNECTION_STRING = {
'user': 'user',
'passwd': 'pass',
'host': 'localhost',
'database': 'my_database',
'raise_on_warnings': True
}

try:
cnx = mysql.connector.connect(**CONNECTION_STRING)
except mysql.connector.Error as err:
if err.errno == errorcode.ER_ACCESS_DENIED_ERROR:
print("Something is wrong with your user name or password")
elif err.errno == errorcode.ER_BAD_DB_ERROR:
print("Database does not exist")
else:
print(err)
else:
print("Vous êtes connecté")
curGet = cnx.cursor(buffered=True)
curU = cnx.cursor(prepared=True)
curS2 = cnx.cursor()
curS3 = cnx.cursor()

# queries
get_cheval = ('''
SELECT cheval.id as cId, cheval.nom_pt as nPt
FROM cheval
WHERE nom_pt IS NOT NULL
AND nom IS NULL 
LIMIT 1000
;''')

update_cheval = ('''
UPDATE cheval
SET nom = %s, pays_id = %s
WHERE id = %s     
;''')

select_contry_alpah2 = ('''
SELECT id FROM pays
WHERE alpha2_pt = %s
;''')

select_contry_alpha3 = ('''
SELECT id pId FROM pays
WHERE alpha3_pt = %s
;''')

curGet.execute(get_cheval)
list_alpha2_ignore = ['II', 'IV', 'VI', 'JR', ]
list_alpha3_ignore = ['VII', 'III', "J'R", 'XYZ']
is_there_row = False  # for the loop
for (cId, nPt) in curGet:
is_there_row = True
split_nom_pt = nPt.strip().split(' ')
print(split_nom_pt)
contry = split_nom_pt[-1].replace('(', '').replace(')', '')

if contry.isupper() and len(contry) == 3 and contry not in list_alpha3_ignore:
curS3.execute(select_contry_alpha3, (contry,))
pays_id_tmp = curS3.fetchone()
if pays_id_tmp is not None:
pays_id = pays_id_tmp[0]
else:
pays_id = None
if pays_id is not None:
nom = ' '.join(split_nom_pt[:-1])
else:
nom = None

elif contry.isupper() and len(contry) == 2 and contry not in list_alpha2_ignore:
curS2.execute(select_contry_alpah2, (contry,))
pays_id_tmp = curS2.fetchone()
if pays_id_tmp is not None:
pays_id = pays_id_tmp[0]
else:
pays_id = None
if pays_id is not None:
nom = ' '.join(split_nom_pt[:-1])
else:
nom = None

else:
pays_id = 75
nom = nPt.strip()

curU.execute(update_cheval, (nom, pays_id, cId))
cnx.commit()
print('updated:' + str(nom) + '; ' + str(pays_id))

curGet.close()
curS2.close()
curS3.close()
curU.close()
cnx.close()
# LOOP
if is_there_row:
is_there_row = False
main()
else:
print('End')

if __name__ == '__main__':
main()

正如我在评论中所说,代码正在泄漏内存,因为只有在所有调用返回后才能完成第一个main,考虑到函数变量无法从内存中删除。

import mysql.connector
from mysql.connector import errorcode
# First suggestion: Move constants outside of the function
# this way they'll be created only once
connection_string = {
'user': 'user',
'passwd': 'pass',
'host': 'localhost',
'database': 'my_database',
'raise_on_warnings': True
}
# queries
get_cheval = ('SELECT cheval.id as cId, cheval.nom_pt as nPt'
'FROM cheval'
'WHERE nom_pt IS NOT NULL'
'AND nom IS NULL'
'LIMIT 1000;') # Maybe you're missing an offset here? This query should always return the same first 1000 results. (Maybe you're updating the horses so that they don't match the query thought) 
update_cheval = ('UPDATE cheval'
'SET nom = %s, pays_id = %s'
'WHERE id = %s;')
select_contry_alpah2 = ('SELECT id FROM pays'
'WHERE alpha2_pt = %s;')
select_contry_alpha3 = ('SELECT id pId FROM pays'
'WHERE alpha3_pt = %s;')

def main():
done=False
while not done:
try:
cnx = mysql.connector.connect(**connection_string)
except mysql.connector.Error as err:
if err.errno == errorcode.ER_ACCESS_DENIED_ERROR:
print("Something is wrong with your user name or password")
elif err.errno == errorcode.ER_BAD_DB_ERROR:
print("Database does not exist")
else:
print(err)
continue # Go back and try again

print("Vous êtes connecté")
curGet = cnx.cursor(buffered=True)
curU = cnx.cursor(prepared=True)
curS2 = cnx.cursor()
curS3 = cnx.cursor()

while cnx.is_connected():
curGet.execute(get_cheval)
result = curGet.fetchone()
if result is None:
done = True 
break;
list_alpha2_ignore = ['II', 'IV', 'VI', 'JR', ]
list_alpha3_ignore = ['VII', 'III', "J'R", 'XYZ']
for (cId, nPt) in curGet:
split_nom_pt = nPt.strip().split(' ')
print(split_nom_pt)
contry = split_nom_pt[-1].replace('(', '').replace(')', '')

if contry.isupper() and len(contry) == 3 and contry not in list_alpha3_ignore:
curS3.execute(select_contry_alpha3, (contry,))
pays_id_tmp = curS3.fetchone()
if pays_id_tmp is not None:
pays_id = pays_id_tmp[0]
else:
pays_id = None
if pays_id is not None:
nom = ' '.join(split_nom_pt[:-1])
else:
nom = None

elif contry.isupper() and len(contry) == 2 and contry not in list_alpha2_ignore:
curS2.execute(select_contry_alpah2, (contry,))
pays_id_tmp = curS2.fetchone()
if pays_id_tmp is not None:
pays_id = pays_id_tmp[0]
else:
pays_id = None
if pays_id is not None:
nom = ' '.join(split_nom_pt[:-1])
else:
nom = None

else:
pays_id = 75
nom = nPt.strip()

curU.execute(update_cheval, (nom, pays_id, cId))
cnx.commit()
print('updated:' + str(nom) + '; ' + str(pays_id))     

curGet.close()
curS2.close()
curS3.close()
curU.close()
cnx.close()

if __name__ == '__main__':
main()

最新更新