Python中的cx_Oracle executemany()将分页的API JSON数据加载到Oracle数据库

tbls=['tbl1'，'tbl2'，'tbl3']

for tbl in tbls:
apidata = []
offset = 0
limit = 3000

while True:
print("----")
baseurl = f'someurl'
url = baseurl + tbl + '?sysparm_offset=' + '{}'.format(offset) + '&sysparm_limit=' + '{}'.format(limit)
print("Requesting", url)
response = requests.get(url, auth=(apiuser, apipwd), headers=headers )
data = response.json()
json_string = json.dumps(data)
df = pd.DataFrame(data)
try:
with cx_Oracle.connect(dbuser, dbpass, dsn, encoding='UTF-8') as connection:
cursor = connection.cursor()
sql = 'insert into ' + tbl + '(req_data) values (:req_data)'
cursor.execute(sql, [json_string]) #works fine with this sql but super slow

#cursor.executemany(sql, ((i,) for i in data['result'])) #TypeError: parameters should be a list of sequences/dictionaries or an integer specifying the number of times to execute the statement
#cursor.executemany(sql, df.values.tolist()) #Python value of type dict not supported.
connection.commit()
except cx_Oracle.Error as error:
print('CX_Oracle Error occurred:')
print(error)
if len(data['result']) == 0:
break
else:
apidata.extend(data['result'])
offset = offset + 3000
continue

我试图提取json格式的api数据，然后尝试插入到Oracle中的json表中。我的代码遍历tbls列表中的每个tbl，每次提取3000行分页的api数据，并循环使用api表，直到它完成提取。一旦它完成拉动，它就对tbl中的剩余"tbl"执行同样的操作。当使用cursor.execute但速度非常慢(api表有>150k行)时，这可以很好地工作。

当我尝试使用executemany时，它似乎对我不起作用。我知道对于execute，我必须提供一个列表，我在data['result']中尝试过(I，)for I)，也尝试过panda数据帧df.values.tolist()。我仍然是一个Python迷，如果你能指出我在这里做错了什么，我将不胜感激。我看了很多关于死刑的文章/例子，但我在这方面仍然有困难。

apidata样本｛'结果'：[｛'所有者'：｛'链接'：'https://someurl.com/api/now/table/sys_user/39aec4146fedda00f3ab4ecbbb3ee4ec'，'value'：'39aec4146fedda00f3ab4ecbb3ee4ec'｝，'sys_id'：'00c67bbbdbcbcb2c01e05fb541d96196b'，'sys_updated_by'：'xa0380'，'ssys_created_on'：'2017-02-14 22:25:04'，'document'：｛'link'：'https://someurl.com/api/now/table/vtb_task/b1b6bb7bdbbcb2c01e05fb541d961923'，'value'：'b1b6bb7bdbcbcbc01e05fb541d961923'｝，'name'：''，'sys_mod_count'：'0'，'sys_updated_on'：'2017-02-14 22:25:04'，'sys_tags'：''，'sys_created_by'：'xa0380'，'table'：'vtb_task'｝]}

apidata的长度因表而异，但由于我在oracle中将其作为json插入，所以这无关紧要。

Oracle json表ddl：创建表tbl1(req_data blob检查(req_data是json))；

Oracle版本-19cCX_Oracle版本-8.3

提前谢谢。如果您需要任何其他信息，请告诉我。

尝试与executemany()绑定，如下所示：

import cx_Oracle as oracledb
import os
import sys
if sys.platform.startswith('darwin'):
oracledb.init_oracle_client(lib_dir=os.environ.get('HOME')+'/Downloads/instantclient_19_8')
un = os.environ.get('PYTHON_USERNAME')
pw = os.environ.get('PYTHON_PASSWORD')
cs = os.environ.get('PYTHON_CONNECTSTRING')
connection = oracledb.connect(user=un, password=pw, dsn=cs)
# For the SELECT statement
def output_type_handler(cursor, name, default_type, size, precision, scale):
if default_type == oracledb.CLOB:
return cursor.var(oracledb.LONG_STRING, arraysize=cursor.arraysize)
if default_type == oracledb.BLOB:
return cursor.var(oracledb.LONG_BINARY, arraysize=cursor.arraysize)
connection.outputtypehandler = output_type_handler

d1 = """{'result': [{'owner': {'link': 'https://someurl.com/api/now/table/sys_user/39aec4146fedda00f3ab4ecbbb3ee4ec', 'value': '39aec4146fedda00f3ab4ecbbb3ee4ec'}, 'sys_id': '00c67bbbdbbcb2c01e05fb541d96196b', 'sys_updated_by': 'xa0380', 'sys_created_on': '2017-02-14 22:25:04', 'document': {'link': 'https://someurl.com/api/now/table/vtb_task/b1b6bb7bdbbcb2c01e05fb541d961923', 'value': 'b1b6bb7bdbbcb2c01e05fb541d961923'}, 'name': '', 'sys_mod_count': '0', 'sys_updated_on': '2017-02-14 22:25:04', 'sys_tags': '', 'sys_created_by': 'xa0380', 'table': 'vtb_task'}]}"""
d2 = """{'result': [{'owner': {'link': 'https://someurl2.com/api/now/table/sys_user/xyz', 'value': 'xyz'}, 'sys_id': 'xyz', 'sys_updated_by': 'xa0380', 'sys_created_on': '2017-02-14 22:25:04', 'document': {'link': 'https://someurl.com/api/now/table/vtb_task/b1b6bb7bdbbcb2c01e05fb541d961923', 'value': 'xyz'}, 'name': '', 'sys_mod_count': '0', 'sys_updated_on': '2017-02-14 22:25:04', 'sys_tags': '', 'sys_created_by': 'xa0380', 'table': 'vtb_task'}]}"""
data = [
{"req_data": d1},
{"req_data": d2}
]
sql = 'insert into tbl1 (req_data) values (:req_data)'
with connection.cursor() as cursor:
cursor.executemany(sql, data)

with connection.cursor() as cursor:
cursor.execute("select * from tbl1")
r = cursor.fetchall()
print(r)

或者，您可以更改语句以使用命名的绑定变量，然后构造如下数据：

data = [
(d1,),
(d2,)
]
sql = 'insert into tbl1 (req_data) values (:1)'

其他加快应用程序速度的方法：

避免连接太多次。而是连接到while环路之外
试试cx_Oracle的新主要版本，它有一个"精简"模式。更新被重命名为"python oracledb"，请参阅发布公告

相关内容

最新更新

热门标签：