当我将查询结果插入Oracle甚至Netezza表时,下面的代码与queryResultToTable()
方法配合良好,所有记录都按预期加载。
import cx_Oracle
import pandas
import sys
from multiprocessing.pool import ThreadPool # Import ThreadPool to enable parallel execution
from sqlalchemy import create_engine, inspect # Import create_engine to use Pandas database function, e.g. dataframe.to_sql()
from sqlalchemy.dialects.oracle import
BFILE, BLOB, CHAR, CLOB, DATE,
DOUBLE_PRECISION, FLOAT, INTERVAL, LONG, NCLOB,
NUMBER, NVARCHAR, NVARCHAR2, RAW, TIMESTAMP, VARCHAR,
VARCHAR2
import netezza_dialect
class databaseOperation():
def queryResultToTable(self, sourceDBEngineURL, targetDBEngineURL, targetSchemaName, targetTableName, targetDataTypes, queryScript):
sourceDBEngine = create_engine(sourceDBEngineURL)
try:
with sourceDBEngine.connect() as sourceDBConnection:
try:
queryResult = pandas.read_sql(queryScript,sourceDBConnection)
except Exception as e:
print(e)
except Exception as e:
print(e)
return
targetDBEngine = create_engine(targetDBEngineURL)
try:
with targetDBEngine.connect() as targetDBConnection:
targetDBConnection.execution_options(autocommit = True) # sumbit commit() automatically
try:
queryResult.to_sql(targetTableName, targetDBConnection, targetSchemaName, if_exists = 'append', index = False, dtype = targetDataTypes, method = None)
# !!! method = 'multi' doesn't work for Oracle database
except Exception as e:
print(e)
except Exception as e:
print(e)
return
if __name__=='__main__':
db = databaseOperation()
sourceORAEngineURL = "....." # the format like "oracle+cx_oracle://user:pwd@server_address1/db1"
targetORAEngineURL = "....." # the format like "oracle+cx_oracle://user:pwd@server_address2/db2"
sql = "SELECT abc, def, ggg FROM table_name WHERE abc = 'txt'"
ORA_targetSCHEMANAME = 'hr'
ORA_targetTABLENAME = 'cmpresult'
ORA_tagetDATATYPES = {
'abc': NVARCHAR2(20),
'def': NVARCHAR2(100),
'ggg': NVARCHAR2(100)
}
db.queryResultToTable(sourceORAEngineURL, targetORAEngineURL, ORA_targetSCHEMANAME, ORA_targetTABLENAME, ORA_tagetDATATYPES, sql)
sys.exit(0)
但当我将method=None改为method=multi时,比如:
queryResult.to_sql(targetTableName, targetDBConnection, targetSchemaName, if_exists = 'append', index = False, dtype = targetDataTypes, method = 'multi')
使用相同的方法,Netezza运行良好,但Oracle得到的消息如下:
'CompileError' object has no attribute 'orig'
除此之外,没有显示更多信息,我也不知道是什么问题。我也试着打开或关闭Connection.execution_options(autocommit = True)
,但没有改变。有人能帮我吗?
oracle数据库似乎不支持这种方式。根据熊猫文件
在单个INSERT子句中传递多个值。它使用了一个特殊的SQL并非所有后端都支持语法。
SQL Alchemy文档注释:
两阶段事务不是由于驱动程序支持不佳,在cx_Oracle下支持。截至cx_Oracle6.0b1,两阶段事务的接口已被更改为更多地直接传递到底层OCI层自动化支持该系统的附加逻辑不是在SQLAlchemy中实现。
此问题建议使用cx_Oracle 的executemany