优化将 Excel 文件读取到数据表对象中



我使用以下代码将数据从 excel 文件读取到 DataTable 对象中以供进一步使用。由于它处理从 100k 到 500k 的条目,因此读取可能会有点慢。我可以在我的代码中更改一些内容以优化过程吗?代码如下。

public static DataTable ReadAsDataTable(string filePath)
{
DataTable dataTable = new DataTable();
using (SpreadsheetDocument spreadSheetDocument = SpreadsheetDocument.Open(filePath, false))
{
WorkbookPart workbookPart = spreadSheetDocument.WorkbookPart;
IEnumerable<Sheet> sheets = spreadSheetDocument.WorkbookPart.Workbook.GetFirstChild<Sheets>().Elements<Sheet>();
string relationshipId = sheets.First().Id.Value;
WorksheetPart worksheetPart = (WorksheetPart)spreadSheetDocument.WorkbookPart.GetPartById(relationshipId);
Worksheet workSheet = worksheetPart.Worksheet;
SheetData sheetData = workSheet.GetFirstChild<SheetData>();
IEnumerable<Row> rows = sheetData.Descendants<Row>();
foreach (Cell cell in rows.ElementAt(0))
{
dataTable.Columns.Add(GetCellValue(spreadSheetDocument, cell));
}
foreach (Row row in rows)
{
DataRow dataRow = dataTable.NewRow();
for (int i = 0; i < row.Descendants<Cell>().Count(); i++)
{
dataRow[i] = GetCellValue(spreadSheetDocument, row.Descendants<Cell>().ElementAt(i));
}
dataTable.Rows.Add(dataRow);
}
}
dataTable.Rows.RemoveAt(0);
return dataTable;
}
private static string GetCellValue(SpreadsheetDocument document, Cell cell)
{
SharedStringTablePart stringTablePart = document.WorkbookPart.SharedStringTablePart;
string value = cell.CellValue.InnerXml;
if (cell.DataType != null && cell.DataType.Value == CellValues.SharedString)
{
return stringTablePart.SharedStringTable.ChildElements[Int32.Parse(value)].InnerText;
}
else
{
return value;
}
}

我不确定此 API 的编译器行为或该 API 的性能特征是什么,但是如果您只调用一次row.Descendants<Cell>()会有所帮助吗?这似乎是编译器可以优化的东西,但可能会涉及副作用,因此它不执行任何操作。

foreach (Row row in rows)
{
var cells = row.Descendants<Cell>().ToArray();
DataRow dataRow = dataTable.NewRow();
for (int i = 0; i < cells.Length; i++)
{
dataRow[i] = GetCellValue(spreadSheetDocument, cells[i]);
}
dataTable.Rows.Add(dataRow);
}

最新更新