我正在使用EF6作为ETL工具的负载层。我承认有更好的工具(例如SSI,直接SQL查询等),但是由于转换的复杂性,需要在代码中完成,并且目标数据库是通过EF模型构建的。批处理中插入的记录次数超过100,000 记录。这并不是令人难以置信的慢(使用DBContext.AddRange()
方法),但是内存使用率非常高(超过1GB
)
以示例为例,我有以下数据类(在内存中创建的数据类)
public class Foo
{
public long FooID { get; set; }
public string SomeProperty { get; set; }
public decimal AverageFlightSpeedOfUnladenSwallow { get; set; }
public IEnumerable<Bar> Bars { get; set; }
}
public class Bar
{
public long BarID { get; set; }
public Foo Foo { get; set; }
public long FooID { get; set; }
public string FavoriteColour { get; set; }
}
dbContext.Foos.AddRange(ListOfFoos); //Pre constructed list of Foos
dbContext.Bars.AddRange(ListOfBars); //Pre constructed list of Bars (parent Foo items populated, FooID is not)
dbContext.SaveChanges();
我正在考虑使用LINQ实体数据读取器启用IList<Foo>
转换为数据读取器,以便我可以使用sqlbulkcopy(sqlbulkcopy and entity框架,http://archive.msdn.microsoft.com/linqentitydatareader/release/projectReleases.aspx?releaseid=389)。
要求List<Bar>
将没有父级Foo
类的ID。实体框架可以很好地处理此问题,但是我不确定如何在SQLBULKCOPY中获得相同的功能。有什么方法可以完成吗?
不,没有直接的方法可以使用SQL Bulkcopy进行此操作。
SQL Bulkcopy非常接近数据库,因此非常快。ORM处理FK/PK关系,但缺点是慢。
根据您的datamodel,您可以在此问题中执行类似的操作:填充datatables
sql bulkcopy yyyymmdd问题
so,
提供您的EF用户可以在数据库上更改模式,您可以追求这种解决问题的方法:
- 将GUID列添加到表
- 通过GUID识别内存中的每个对象
- 通过包括识别GUID的批量插入插入值
- 选择它们并将其映射到插入的GUID
- 删除GUID列
这里有一些代码可以做到这一点。它有点脏,没有优化,但是将原始任务降至30MB的内存和1分钟的过程
public static class ForeignKeyBulkInsert
{
private const string GUID_COLUMN_NAME = "GUID_SURROGATE_KEY";
public static string GetTableName<T>(this ObjectContext context) where T : class
{
string sql = context.CreateObjectSet<T>().ToTraceString();
Regex regex = new Regex("FROM (?<table>.*) AS");
Match match = regex.Match(sql);
string table = match.Groups["table"].Value;
return table;
}
public static void AddRange<TEntity>(this DbContext db, IEnumerable<TEntity> range, bool importForeignKeyIDs = false)
where TEntity : class
{
Dictionary<Guid, TEntity> lookup = new Dictionary<Guid, TEntity>();
var objectContext = ((IObjectContextAdapter)db).ObjectContext;
var os = objectContext.CreateObjectSet<TEntity>();
bool hasAutoGeneratedKey = os.EntitySet.ElementType.KeyProperties.Any();
Type entityType = typeof(TEntity);
if (importForeignKeyIDs)
{
var foreignKeyProperties = os.EntitySet.ElementType.NavigationProperties.Where(x => x.ToEndMember.RelationshipMultiplicity == RelationshipMultiplicity.One);
foreach (var foreignKeyProperty in foreignKeyProperties)
{
var foreignKeyIdProperty = foreignKeyProperty.GetDependentProperties().First();
var parentKeyProperty = foreignKeyProperty.ToEndMember.GetEntityType().KeyMembers.First();
PropertyInfo foreignKeyPropertyInfo = null;
Type parentType = null;
PropertyInfo parentKeyPropertyInfo = null;
PropertyInfo foreignKeyIdPropertyInfo = null;
foreach (var item in range)
{
entityType.GetProperty(foreignKeyProperty.Name).GetValue(item);
if (foreignKeyPropertyInfo == null)
foreignKeyPropertyInfo = entityType.GetProperty(foreignKeyProperty.Name);
if (parentType == null)
parentType = foreignKeyPropertyInfo.GetValue(item).GetType();
if (parentKeyPropertyInfo == null)
parentKeyPropertyInfo = parentType.GetProperty(parentKeyProperty.Name);
if (foreignKeyIdPropertyInfo == null)
foreignKeyIdPropertyInfo = entityType.GetProperty(foreignKeyIdProperty.Name);
var foreignKey = foreignKeyPropertyInfo.GetValue(item);
if (foreignKey == null)
break;
var parentKey = parentKeyPropertyInfo.GetValue(foreignKey);
foreignKeyIdPropertyInfo.SetValue(item, parentKey);
}
}
}
string tableName = objectContext.GetTableName<TEntity>();
var entityReader = range.AsDataReader(GUID_COLUMN_NAME, lookup);
if (hasAutoGeneratedKey)
{
try
{
db.Database.ExecuteSqlCommand(string.Format("ALTER TABLE {0} ADD [{1}] uniqueidentifier null", tableName, GUID_COLUMN_NAME));
}
catch (Exception)
{
db.Database.ExecuteSqlCommand(string.Format("ALTER TABLE {0} DROP COLUMN [{1}]", tableName, GUID_COLUMN_NAME));
db.Database.ExecuteSqlCommand(string.Format("ALTER TABLE {0} ADD [{1}] uniqueidentifier null", tableName, GUID_COLUMN_NAME));
}
}
try
{
var connection = db.Database.Connection as SqlConnection;
connection.Open();
using (SqlBulkCopy cpy = new SqlBulkCopy(connection))
{
cpy.BulkCopyTimeout = 0;
cpy.DestinationTableName = tableName;
cpy.WriteToServer(entityReader);
connection.Close();
}
if (hasAutoGeneratedKey)
{
db.Database.Connection.Open();
var comm = db.Database.Connection.CreateCommand();
comm.CommandText = string.Format("SELECT * FROM {0} WHERE [{1}] is not null", tableName, GUID_COLUMN_NAME);
try
{
using (var reader = comm.ExecuteReader())
{
while (reader.Read())
{
Guid surrogateKey = Guid.Parse(reader[GUID_COLUMN_NAME].ToString());
TEntity entity = lookup[surrogateKey];
var keyProperty = entityType.GetProperty(os.EntitySet.ElementType.KeyMembers.First().Name);
keyProperty.SetValue(entity, reader[keyProperty.Name]);
}
}
}
catch (Exception)
{
throw;
}
finally
{
//This should never occur
db.Database.Connection.Close();
}
}
}
catch (Exception)
{
throw;
}
finally
{
if (hasAutoGeneratedKey)
db.Database.ExecuteSqlCommand(string.Format("ALTER TABLE {0} DROP COLUMN [{1}]", tableName, GUID_COLUMN_NAME));
}
}
}