我的目标是将数据从h2o帧导出到sql。
我在使用iterrows时收到以下错误,并希望用.apply函数替换它。关于从h2o框架将数据导出到SQL的示例(首选(或参考资料将不胜感激。
代码片段是:
import numpy as np
import pandas as pd
from pandas import DataFrame
import pyodbc
import h2o
h2o.init()
data = {'COL_1': ['C1 First value', 'C1 Second value'],
'COL_2': ['C2 First value', 'C2 Second value'],
'COL_3': ['C3 First value', 'C3 Second value'],
'COL_4': ['C4 First value', 'C4 Second value'],
'COL_5': ['C5 First value', 'C5 Second value'],
'COL_6': ['C6 First value', 'C6 Second value'],
'COL_7': ['C7 First value', 'C7 Second value'],
'COL_8': ['C8 First value', 'C8 Second value'],
'COL_9': ['C9 First value', 'C9 Second value']}
df = pd.DataFrame (data, columns = ['COL_1','COL_2','COL_3','COL_4','COL_5','COL_6','COL_7','COL_8','COL_9'])
h2oframe = h2o.H2OFrame(df)
# removed odbcName and serverName
odbcName = 'xxxxxx'
serverName = 'xxxxxxx'
odbcConnection = pyodbc.connect ('Driver={Sql Server};Server='+serverName+';Database='+odbcName+';Trusted_Connection=yes')
cursor = odbcConnection.cursor()
# TempTable has already been created in the database
for index, row in h2oframe.iterrows():
cursor.execute("INSERT INTO TempTable (COL_1,COL_2,COL_3,COL_4,COL_5,COL_6,COL_7,COL_8,COL_9) VALUES(?,?,?,?,?,?,?,?,?)",row.COL_1, row.COL_2, row.COL_3, row.COL_4, row.COL_5, row.COL_6, row.COL_7, row.COL_8, row.COL_9)
odbcConnection.commit()
cursor.close()
AttributeError Traceback (most recent call last)
<ipython-input-36-19764dc050b1> in <module>
----> 1 for index, row in h2oframe.iterrows():
2 cursor.execute("INSERT INTO TempTable (COL_1,COL_2,COL_3,COL_4,COL_5,COL_6,COL_7,COL_8,COL_9) VALUES(?,?,?,?,?,?,?,?,?)",row.COL_1, row.COL_2, row.COL_3, row.COL_4, row.COL_5, row.COL_6, row.COL_7, row.COL_8, row.COL_9)
3 odbcConnection.commit()
4 cursor.close()
AttributeError: 'H2OFrame' object has no attribute 'iterrows'
AFAIK,不能直接保存到SQL表中。但是您可以将整个帧保存为一个新的Hive表。请参见此处。
iris_hex = h2o.import_file("iris/iris_wheader.csv")
iris_hex.save_to_hive(
jdbc_url = "jdbc:hive2://hive-server:10000/default",
table_name = "airlines",
format = "parquet",
table_path = "/user/bob/tables/iris"
)
您也可以将其转换回熊猫帧h2oframe.as_data_frame()
,然后使用iterrows()
(如错误中所述,.iterrows()
不是H2O的属性(。