Microsoft Purview支持Azure Database for MySQL类型的资源。我有一个用于MariaDB的Azure数据库(也是mySQL的一种风格(,但似乎我无法将其注册为源。有没有一种方法可以注册和扫描Azure数据库中的MariaDB资源?
不幸的是,Azure Purview不支持Azure Maria DB作为数据源(截至8/22(。希望他们很快会支持。在此之前,以下python代码扫描Maria DB服务器并返回数据库、表和列的Atlas实体集合可能很有用。
from mysql.connector import errorcode
from pyapacheatlas.auth import ServicePrincipalAuthentication
from pyapacheatlas.core import PurviewClient
from pyapacheatlas.core import AtlasEntity, AtlasProcess
from pyapacheatlas.core import AtlasAttributeDef, EntityTypeDef, RelationshipTypeDef
from pyapacheatlas.core.util import GuidTracker
from mysql.connector import errorcode
import mysql.connector
def createMariaDbEntities(gt, dbConnParams, serverUri, serverName):
mariaSeverEntity = AtlasEntity(
name=serverName,
typeName="azure_mariadb_server",
qualified_name=serverUri,
guid=gt.get_guid()
)
entities = []
entities.append(mariaSeverEntity)
try:
conn = mysql.connector.connect(**dbConnParams)
#print("Connection established")
except mysql.connector.Error as err:
if err.errno == errorcode.ER_ACCESS_DENIED_ERROR:
print("Something is wrong with the user name or password")
elif err.errno == errorcode.ER_BAD_DB_ERROR:
print("Database does not exist")
else:
print(err)
else:
cursor = conn.cursor()
# find all databases
enumerateDatabasesQuery = "show databases"
cursor.execute(enumerateDatabasesQuery)
res = cursor.fetchall()
databases = [r[0] for r in res if r[0][0].isnumeric()]
for (db) in databases:
# find all the tables in the database
readTablesQuery = "SHOW TABLE STATUS FROM `{0}`;".format(db)
cursor.execute(readTablesQuery)
rows = cursor.fetchall()
tables = [row[0] for row in rows if int(row[4]) > 0]
if (len(tables) > 0):
#print("datebase:", db)
dbEntity = AtlasEntity(
name=db,
typeName="azure_mariadb_db",
qualified_name="{0}/{1}".format(serverUri, db),
guid=gt.get_guid(),
server=mariaSeverEntity
)
entities.append(dbEntity)
for table in tables:
# print("Table:",table)
tableEntity = AtlasEntity(
name=table,
typeName="azure_mariadb_table",
qualified_name="{0}/{1}/{2}".format(serverUri, db, table),
guid=gt.get_guid(),
db=dbEntity
)
entities.append(tableEntity)
# find all the columns in the table
columnsQuery = "SHOW COLUMNS FROM `{0}`.`{1}`;".format(db, table)
cursor.execute(columnsQuery)
rows = cursor.fetchall()
columns = [(row[0], row[1]) for row in rows]
for column in columns:
# print("Column:",column)
columnEntity = AtlasEntity(
name=column[0],
attributes={
"dataType": column[1]
},
typeName="azure_mariadb_table_column",
qualified_name="{0}/{1}/{2}/{3}".format(serverUri, db, table, column[0]),
guid=gt.get_guid()
)
columnEntity.addRelationship(table=tableEntity)
entities.append(columnEntity)
# Cleanup
conn.commit()
cursor.close()
conn.close()
return entities