使用CsvHelper读取多头CSV文件



我有一个大的CSV文件,有多个标题行,你可以在下面看到一个示例。我怎么能读它与CsvHelper在c# ?

如下所示,标题在CSV中周期性重复。还有很多行以"+"开头。

示例如下:

FAUF-Rückmeldungen aus SFC500:  4200 Sätze ausgegeben
+----+---------------+---------------+----+--------------+-------------+------------+
|Werk|Rückmeldenummer|Rückmeldezähler|AVO |Rückmeldedatum|Rückmeldezeit|Arbeitsplatz|
+----+---------------+---------------+----+--------------+-------------+------------+
|TR10|      410959107|              2|0800|26.07.2021    |00:01:24     |155164-B    |
|TR10|      411158037|             20|0900|26.07.2021    |00:02:33     |155217-A    |
|TR10|      410985740|             25|0900|26.07.2021    |00:02:39     |155196-A    |
|TR10|      410279717|             57|0900|26.07.2021    |00:02:40     |155196-A    |
|TR10|      410630007|              6|0900|26.07.2021    |00:02:41     |155196-B    |
|TR10|      411237292|             25|0900|26.07.2021    |00:02:41     |155196-A    |
|TR10|      410276088|             20|0900|26.07.2021    |00:06:56     |155217-A    |
|TR10|      410950998|              1|0900|26.07.2021    |00:06:57     |155217-A    |
|TR10|      411237292|             26|0900|26.07.2021    |00:06:57     |155196-A    |
|TR10|      410556669|              1|0900|26.07.2021    |00:06:58     |155217-A    |
|TR10|      411237292|             27|0900|26.07.2021    |00:06:58     |155196-A    |
|TR10|      410556669|              2|0900|26.07.2021    |00:06:59     |155217-A    |
|TR10|      410630007|              7|0900|26.07.2021    |00:07:00     |155196-B    |
|TR10|      411525402|              5|0900|26.07.2021    |00:07:00     |155114-A    |
|TR10|      411525402|              6|0900|26.07.2021    |00:07:01     |155114-A    |
|TR10|      411528024|              1|0900|26.07.2021    |00:07:02     |155114-A    |
|TR10|      411528024|              2|0900|26.07.2021    |00:07:03     |155114-A    |
|TR10|      411528929|             30|0900|26.07.2021    |00:07:04     |155114-A    |
|TR10|      411544500|              3|0900|26.07.2021    |00:07:05     |155114-A    |
|TR10|      411528928|              8|0905|26.07.2021    |00:10:19     |155123-C    |
|TR10|      410279717|             58|0900|26.07.2021    |00:11:48     |155196-A    |
|TR10|      411237292|             28|0900|26.07.2021    |00:11:49     |155196-A    |
|TR10|      410630007|              8|0900|26.07.2021    |00:11:50     |155196-B    |
|TR10|      411237293|              2|0990|26.07.2021    |00:14:14     |155164-A    |
|TR10|      410633488|              1|0600|26.07.2021    |00:14:52     |155163-0    |
|TR10|      410633212|              1|0600|26.07.2021    |00:14:59     |155163-0    |
|TR10|      411218828|              2|0600|26.07.2021    |00:15:08     |155163-0    |
|TR10|      411438190|              3|0910|26.07.2021    |00:15:14     |155163-E    |
|TR10|      411527748|              1|0910|26.07.2021    |00:15:19     |155163-B    |
|TR10|      411367433|              2|0910|26.07.2021    |00:16:17     |155163-D    |
|TR10|      411032464|              3|0910|26.07.2021    |00:16:26     |155163-D    |
|TR10|      411525402|              7|0900|26.07.2021    |00:16:49     |155114-A    |
|TR10|      411528024|              3|0900|26.07.2021    |00:16:50     |155114-A    |
|TR10|      411544500|              4|0900|26.07.2021    |00:16:51     |155114-A    |
|TR10|      410985740|             26|0900|26.07.2021    |00:16:55     |155196-A    |
|TR10|      410279717|             59|0900|26.07.2021    |00:16:56     |155196-A    |
|TR10|      411237292|             29|0900|26.07.2021    |00:16:57     |155196-A    |
|TR10|      410900407|              2|0040|26.07.2021    |00:17:46     |155135-D    |
|TR10|      409944144|              1|0910|26.07.2021    |00:18:47     |155163-C    |
|TR10|      411544499|              1|0905|26.07.2021    |00:19:42     |155123-C    |
|TR10|      411525401|              5|0905|26.07.2021    |00:19:56     |155123-C    |
|TR10|      410985740|             27|0900|26.07.2021    |00:21:47     |155196-A    |
|TR10|      410630007|              9|0900|26.07.2021    |00:21:48     |155196-B    |
|TR10|      411237292|             30|0900|26.07.2021    |00:21:48     |155196-A    |
|TR10|      411544437|              4|0900|26.07.2021    |00:22:22     |155114-A    |
|TR10|      411544436|              1|0905|26.07.2021    |00:22:41     |155123-C    |
|TR10|      411551402|              2|0005|26.07.2021    |00:24:00     |155115-B    |
|TR10|      411362459|              1|0005|26.07.2021    |00:24:52     |155115-B    |
|TR10|      411369893|              1|0060|26.07.2021    |00:25:25     |155112-G    |
|TR10|      411530629|              1|0005|26.07.2021    |00:25:37     |155115-B    |
|TR10|      411369897|              1|0063|26.07.2021    |00:25:40     |155112-F    |
|TR10|      411369894|              1|0070|26.07.2021    |00:25:54     |155518-0    |
|TR10|      411369897|              2|0063|26.07.2021    |00:26:02     |155112-F    |
|TR10|      411369894|              2|0070|26.07.2021    |00:26:10     |155518-0    |
|TR10|      411369897|              3|0063|26.07.2021    |00:26:21     |155112-F    |
|TR10|      411369894|              3|0070|26.07.2021    |00:26:28     |155518-0    |
|TR10|      411369897|              4|0063|26.07.2021    |00:26:37     |155112-F    |
|TR10|      411369894|              4|0070|26.07.2021    |00:26:43     |155518-0    |
|TR10|      410950998|              2|0900|26.07.2021    |00:26:45     |155217-A    |
+----+---------------+---------------+----+--------------+-------------+------------+
+----+---------------+---------------+----+--------------+-------------+------------+
|Werk|Rückmeldenummer|Rückmeldezähler|AVO |Rückmeldedatum|Rückmeldezeit|Arbeitsplatz|
+----+---------------+---------------+----+--------------+-------------+------------+
|TR10|      410279717|             60|0900|26.07.2021    |00:26:46     |155196-A    |
|TR10|      410950998|              3|0900|26.07.2021    |00:26:46     |155217-A    |
|TR10|      410630007|             10|0900|26.07.2021    |00:26:47     |155196-B    |
|TR10|      411369897|              5|0063|26.07.2021    |00:26:54     |155112-F    |
|TR10|      411369894|              5|0070|26.07.2021    |00:27:04     |155518-0    |
|TR10|      411369897|              6|0063|26.07.2021    |00:27:15     |155112-F    |
|TR10|      411369894|              6|0070|26.07.2021    |00:27:23     |155518-0    |
|TR10|      411086222|              1|0001|26.07.2021    |00:27:50     |155212-A    |
|TR10|      411086223|              1|0005|26.07.2021    |00:27:58     |155210-A    |
|TR10|      411520617|              7|0905|26.07.2021    |00:30:28     |155123-C    |
|TR10|      411872172|              1|0010|26.07.2021    |00:31:27     |155145-A    |
|TR10|      411872177|              1|0010|26.07.2021    |00:31:39     |155145-A    |
|TR10|      411528024|              4|0900|26.07.2021    |00:31:50     |155114-A    |
|TR10|      411872182|              1|0010|26.07.2021    |00:31:50     |155145-A    |
|TR10|      410985740|             28|0900|26.07.2021    |00:31:54     |155196-A    |
|TR10|      410279717|             61|0900|26.07.2021    |00:31:55     |155196-A    |
|TR10|      411872187|              1|0010|26.07.2021    |00:32:02     |155145-A    |
|TR10|      410699054|              1|0060|26.07.2021    |00:32:52     |155112-K    |
|TR10|      410699055|              1|0063|26.07.2021    |00:33:01     |155112-L    |
|TR10|      410699056|              1|0070|26.07.2021    |00:33:11     |155518-0    |
|TR10|      411434349|              2|0080|26.07.2021    |00:33:18     |155213-F    |
|TR10|      410850582|              1|0051|26.07.2021    |00:33:54     |155146-E    |
|TR10|      410850583|              1|0055|26.07.2021    |00:34:01     |155146-F    |
|TR10|      410850580|              1|0080|26.07.2021    |00:34:09     |155518-0    |
|TR10|      410774889|              1|0050|26.07.2021    |00:34:13     |155171-D    |
|TR10|      411243279|              2|0005|26.07.2021    |00:34:27     |155531-A    |
|TR10|      411243280|              3|0010|26.07.2021    |00:34:37     |155550-B    |
|TR10|      411243281|              1|0020|26.07.2021    |00:34:48     |155550-E    |
|TR10|      411228376|              1|0001|26.07.2021    |00:36:15     |155112-D    |
|TR10|      410985740|             29|0900|26.07.2021    |00:36:46     |155196-A    |
|TR10|      411525402|              8|0900|26.07.2021    |00:36:46     |155114-A    |
|TR10|      411237292|             31|0900|26.07.2021    |00:36:47     |155196-A    |
|TR10|      411533238|              1|0001|26.07.2021    |00:36:55     |155144-A    |
|TR10|      410898440|              2|0010|26.07.2021    |00:37:02     |155171-A    |
|TR10|      411533239|              1|0005|26.07.2021    |00:37:02     |155104-A    |
|TR10|      411874854|              1|0010|26.07.2021    |00:37:37     |FCM-E       |
|TR10|      411032291|              1|0060|26.07.2021    |00:40:09     |155112-G    |
|TR10|      411874855|              1|0010|26.07.2021    |00:40:21     |FCM-E       |
|TR10|      411032293|              1|0063|26.07.2021    |00:40:35     |155112-F    |
|TR10|      411032292|              1|0070|26.07.2021    |00:40:42     |155518-0    |
|TR10|      411032293|              2|0063|26.07.2021    |00:40:51     |155112-F    |
|TR10|      411032292|              2|0070|26.07.2021    |00:40:59     |155518-0    |
|TR10|      411032293|              3|0063|26.07.2021    |00:41:08     |155112-F    |
|TR10|      411032292|              3|0070|26.07.2021    |00:41:15     |155518-0    |
|TR10|      411032293|              4|0063|26.07.2021    |00:41:25     |155112-F    |
|TR10|      411032292|              4|0070|26.07.2021    |00:41:32     |155518-0    |
|TR10|      411032293|              5|0063|26.07.2021    |00:41:41     |155112-F    |
|TR10|      410556669|              3|0900|26.07.2021    |00:41:46     |155217-A    |
|TR10|      410279717|             62|0900|26.07.2021    |00:41:47     |155196-A    |
|TR10|      411237292|             32|0900|26.07.2021    |00:41:48     |155196-A    |
|TR10|      411032292|              5|0070|26.07.2021    |00:41:49     |155518-0    |
|TR10|      411032293|              6|0063|26.07.2021    |00:41:59     |155112-F    |
|TR10|      411032292|              6|0070|26.07.2021    |00:42:07     |155518-0    |
|TR10|      411535704|              1|0010|26.07.2021    |00:43:40     |155144-A    |
|TR10|      411875458|              1|0010|26.07.2021    |00:43:54     |155144-A    |
|TR10|      411528024|              5|0900|26.07.2021    |00:46:47     |155114-A    |
|TR10|      410985740|             30|0900|26.07.2021    |00:46:48     |155196-A    |
|TR10|      410279717|             63|0900|26.07.2021    |00:46:50     |155196-A    |
|TR10|      411525401|              6|0905|26.07.2021    |00:46:56     |155123-C    |
|TR10|      411528023|              1|0905|26.07.2021    |00:47:30     |155123-C    |
+----+---------------+---------------+----+--------------+-------------+------------+

生成了一个类

namespace CsvHelper
{
class Program
{
static void Main(string[] args)
{
ReadCsv();
}
static void ReadCsv()
{
var config = new CsvConfiguration(CultureInfo.InvariantCulture)
{
Delimiter="|"
};     
using (var reader = new StreamReader("file.csv"))
using (var csv = new CsvReader(reader, config))
{
var records = csv.GetRecords<SFC>();
} 
}
public class SFC
{
public string Werk { get; set; }
public string Rückmeldenummer { get; set; }
public int Rückmeldezähler { get; set; }
public int AVO { get; set; }
public DateTime Rückmeldedatum { get; set; }
public TimeSpan Rückmeldezeit { get; set; }
public string Arbeitsplatz { get; set; }
}
}
}

我怎么能读这个文件到List<SFC>与CsvHelper?

您的文本文件由以下重复模式组成行:

  • 0或多个初始行被忽略。
  • 初始分隔线,如+----+---------------+
  • |Werk|Rückmeldenummer|之类的标头。
  • 另一个分隔符。
  • |TR10| 410959107|等数据线
  • 最终分隔符。

您可以通过跳过初始行然后检查第一个字段以查看它是否"看起来像"来读取这种格式的CSV文件。分隔符,如下所示:

enum ReadState
{
Initial,
InitialDelimiter,
Header,
HeaderDataDelimiter,
Data,
}
public static List<TRecord> ReadCsv<TRecord>(string filename, ClassMap<TRecord> map)
{
List<TRecord> records = new ();
var config = new CsvConfiguration(CultureInfo.InvariantCulture)
{
Delimiter="|", // Fixed Delimeter => Delimiter
PrepareHeaderForMatch = args => args.Header.Trim(), // Added
TrimOptions = TrimOptions.Trim, // Added
};     
using (var reader = new StreamReader(filename))
using (var csv = new CsvReader(reader, config))
{
csv.Context.RegisterClassMap(map);
var state = ReadState.Initial;
while (csv.Read())
{
var isDelimiter = csv.GetField(0).StartsWith("+-");
var newState = (isDelimiter, state) switch
{
(true, ReadState.Initial) => ReadState.InitialDelimiter,
(true, ReadState.Header) => ReadState.HeaderDataDelimiter,
//(true, ReadState.HeaderDataDelimiter) => ReadState.Initial, // Uncomment if your CSV file might contain empty tables with headers and delimiters but no data.
(true, ReadState.Data) => ReadState.Initial,
(false, ReadState.Initial) => ReadState.Initial,
(false, ReadState.InitialDelimiter) => ReadState.Header,
(false, ReadState.HeaderDataDelimiter) => ReadState.Data,
(false, ReadState.Data) => ReadState.Data,
_ => throw new ApplicationException(string.Format("Unexpected row on state {0}", state))
};
switch (newState)
{
case ReadState.Header: csv.ReadHeader(); break;
case ReadState.Data: records.Add(csv.GetRecord<TRecord>()); break;
}                   
state = newState;
}
} 
return records;
}   

然后为SFC定义一个classmap,如下所示:

class SFCMap : ClassMap<SFC>
{
public SFCMap() : this(new CsvConfiguration(CultureInfo.InvariantCulture)) {}
public SFCMap(CsvConfiguration config)
{
AutoMap(config);
Map(m => m.Rückmeldedatum).TypeConverterOption.Format("dd.mm.yyyy").TypeConverterOption.DateTimeStyles(DateTimeStyles.AllowWhiteSpaces);
}
}
public class SFC
{
public string Werk { get; set; }
public string Rückmeldenummer { get; set; }
public int Rückmeldezähler { get; set; }
public string AVO { get; set; } // Fixed int => string (so as to not lose leading zeros
public DateTime Rückmeldedatum { get; set; }
public TimeSpan Rückmeldezeit { get; set; } // Fixed Timespan => TimeSpan
public string Arbeitsplatz { get; set; }
}

您将能够将CSV文件读取为List<SFC>,如下所示:

var records = ReadCsv(filename, new SFCMap());

指出:

  • 您将AVO定义为int,但字段有前导零,例如0800。因此,我将其类型更改为string,以便保留这些。

  • 解析Rückmeldedatum时需要指定"dd.mm.yyyy"格式。我添加ClassMap<SFC>是为了提供这个。

  • 你"CSV"实际上是一个固定宽度的文件,而不是CSV文件。我的假设是,您希望修整字符串字段周围的格式化空间。如果没有,请删除TrimOptions = TrimOptions.Trim

  • 如果您的CSV文件可能包含带有头和分隔符的空表,但没有数据,如:

    +----+---------------+---------------+----+--------------+-------------+------------+
    |Werk|Rückmeldenummer|Rückmeldezähler|AVO |Rückmeldedatum|Rückmeldezeit|Arbeitsplatz|
    +----+---------------+---------------+----+--------------+-------------+------------+
    +----+---------------+---------------+----+--------------+-------------+------------+
    

    然后取消:

    //(true, ReadState.HeaderDataDelimiter) => ReadState.Initial,
    

  • 另请参阅文档页读取多个数据集,其中讨论了类似的解析问题。

此处演示小提琴

相关内容

  • 没有找到相关文章

最新更新