C# 处理包含大数据的 Excel 文件



我正在将数据从不同 excel 文件的第一张复制到单个工作簿。我已经尝试过使用不同的替代方案,例如npoispire.xlsInterop效果很好,但它浪费了我太多的时间。如果有人能给我一个更好的建议,那将不胜感激。在网上浏览了许多表格,但找不到。

仅供参考:我的每个文件的大小都超过 50 MB。有些是 10 MB 或更小。

这是我尝试过的其中之一(使用尖塔.xls(:

workbook = new Workbook();
//laod first file
workbook.LoadFromFile(names[0]);
//load the remaining files starting with second file
for (int i = 1; i < cnt; i++)
{
LoadFIle(names[i]);
//merge the loaded file immediately and than load next file
MergeData();
}
private void LoadFIle(string filePath)
{
//load other workbooks starting with 2nd workbbook
tempbook = new Workbook();
tempbook.LoadFromFile(filePath);
}
private void MergeData()
{
try
{
int c1 = workbook.ActiveSheet.LastRow, c2 = tempbook.Worksheets[0].LastRow;
//check if you have exceeded 1st sheet limit
if ((c1 + c2) <= 1048575)
{
//import the second workbook's worksheet into the first workbook using a datatable
//load 1st sheet of tempbook into sheet
Worksheet sheet = tempbook.Worksheets[0];
//copy data from sheet into a datatable
DataTable dataTable = sheet.ExportDataTable();
//load sheet1
Worksheet sheet1 = workbook.Worksheets[workbook.ActiveSheetIndex];
sheet1.InsertDataTable(dataTable, false, sheet1.LastRow + 1, 1);
}
else if ((c1 >= 1048575 && c2 >= 1048575) || c1 >= 1048575 || c2 >= 1048575 || (c1 + c2) >= 1048575)
{
workbook.Worksheets.AddCopy(tempbook.Worksheets[0]);
indx = workbook.ActiveSheet.Index;
workbook.ActiveSheetIndex = ++indx;
}
else
{
//import the second workbook's worksheet into the first workbook using a datatable
//load 1st sheet of tempbook into sheet
Worksheet sheet = tempbook.Worksheets[0];
//copy data from sheet into a datatable
DataTable dataTable = sheet.ExportDataTable();
//load sheet1
Worksheet sheet1 = workbook.Worksheets[workbook.ActiveSheetIndex];
sheet1.InsertDataTable(dataTable, false, sheet1.LastRow + 1, 1);
}
}
catch (IndexOutOfRangeException)
{
}
}
}

嗯,这很好用,但如前所述需要很长时间。欢迎任何建议。提前谢谢。

这是我(我知道的最快的(使用Excel互操作实现的。尽管我仔细查看以释放所有(一定错过了一个(,但进程列表中仍保留了 2 个 Excel 实例,它们在程序结束后关闭。

关键是只有 2 个 Open Excel 实例,并使用Range.Value2将数据复制为块。

//Helper function to cleanup
public void ReleaseObject(object obj)
{
if (obj != null && Marshal.IsComObject(obj))
{
Marshal.ReleaseComObject(obj);
}
}

public void CopyIntoOne(List<string> pSourceFiles, string pDestinationFile)
{
var sourceExcelApp = new Microsoft.Office.Interop.Excel.Application();
var destinationExcelApp = new Microsoft.Office.Interop.Excel.Application();
// TODO: Check if it exists
destinationExcelApp.Workbooks.Open(pDestinationFile);
// for debug
//destinationExcelApp.Visible = true;
//sourceExcelApp.Visible = true;
int i = 0;
var sheets = destinationExcelApp.ActiveWorkbook.Sheets;
var lastsheet = destinationExcelApp.ActiveWorkbook.Sheets[sheets.Count];
ReleaseObject(sheets);
foreach (var srcFile in pSourceFiles)
{
sourceExcelApp.Workbooks.Open(srcFile);
// get extends
var lastRow = sourceExcelApp.ActiveSheet.Cells.Find("*", System.Reflection.Missing.Value,
System.Reflection.Missing.Value, System.Reflection.Missing.Value, XlSearchOrder.xlByRows,
XlSearchDirection.xlPrevious, false, System.Reflection.Missing.Value, System.Reflection.Missing.Value);
var lastCol = sourceExcelApp.ActiveSheet.Cells.Find("*", System.Reflection.Missing.Value, System.Reflection.Missing.Value,
System.Reflection.Missing.Value, XlSearchOrder.xlByColumns, XlSearchDirection.xlPrevious, false,
System.Reflection.Missing.Value, System.Reflection.Missing.Value);
var startCell = (Range) sourceExcelApp.ActiveWorkbook.ActiveSheet.Cells[1, 1];
var endCell = (Range) sourceExcelApp.ActiveWorkbook.ActiveSheet.Cells[lastRow.Row, lastCol.Column];
var myRange = sourceExcelApp.ActiveWorkbook.ActiveSheet.Range[startCell, endCell];
// copy the values
var value = myRange.Value2;
// create sheet in new Workbook at the end                
Worksheet newSheet = destinationExcelApp.ActiveWorkbook.Sheets.Add(After: lastsheet);
ReleaseObject(lastsheet);
lastsheet = newSheet;
//its even faster when adding it at the front
//Worksheet newSheet = destinationExcelApp.ActiveWorkbook.Sheets.Add();
// change that to a good name
newSheet.Name = ++i + "";
var dstStartCell = (Range) destinationExcelApp.ActiveWorkbook.ActiveSheet.Cells[1, 1];
var dstEndCell = (Range) destinationExcelApp.ActiveWorkbook.ActiveSheet.Cells[lastRow.Row, lastCol.Column];
var dstRange = destinationExcelApp.ActiveWorkbook.ActiveSheet.Range[dstStartCell, dstEndCell];
// this is the actual paste
dstRange.Value2 = value;
//cleanup
ReleaseObject(startCell);
ReleaseObject(endCell);
ReleaseObject(myRange);
ReleaseObject(value);// cannot hurt, but not necessary since its a simple array
ReleaseObject(dstStartCell);
ReleaseObject(dstEndCell);
ReleaseObject(dstRange);
ReleaseObject(newSheet);
ReleaseObject(lastRow);
ReleaseObject(lastCol);
sourceExcelApp.ActiveWorkbook.Close(false);
}
ReleaseObject(lastsheet);
sourceExcelApp.Quit();
ReleaseObject(sourceExcelApp);
destinationExcelApp.ActiveWorkbook.Save();
destinationExcelApp.Quit();
ReleaseObject(destinationExcelApp);
destinationExcelApp = null;
sourceExcelApp = null;
}

我已经在小的 excel 文件上测试过它,很好奇它在大文件中的表现如何。

最新更新