我有一个大的CSV文件,有多个标题行,你可以在下面看到一个示例。我怎么能读它与CsvHelper在c# ?
如下所示,标题在CSV中周期性重复。还有很多行以"+"开头。
示例如下:
FAUF-Rückmeldungen aus SFC500: 4200 Sätze ausgegeben
+----+---------------+---------------+----+--------------+-------------+------------+
|Werk|Rückmeldenummer|Rückmeldezähler|AVO |Rückmeldedatum|Rückmeldezeit|Arbeitsplatz|
+----+---------------+---------------+----+--------------+-------------+------------+
|TR10| 410959107| 2|0800|26.07.2021 |00:01:24 |155164-B |
|TR10| 411158037| 20|0900|26.07.2021 |00:02:33 |155217-A |
|TR10| 410985740| 25|0900|26.07.2021 |00:02:39 |155196-A |
|TR10| 410279717| 57|0900|26.07.2021 |00:02:40 |155196-A |
|TR10| 410630007| 6|0900|26.07.2021 |00:02:41 |155196-B |
|TR10| 411237292| 25|0900|26.07.2021 |00:02:41 |155196-A |
|TR10| 410276088| 20|0900|26.07.2021 |00:06:56 |155217-A |
|TR10| 410950998| 1|0900|26.07.2021 |00:06:57 |155217-A |
|TR10| 411237292| 26|0900|26.07.2021 |00:06:57 |155196-A |
|TR10| 410556669| 1|0900|26.07.2021 |00:06:58 |155217-A |
|TR10| 411237292| 27|0900|26.07.2021 |00:06:58 |155196-A |
|TR10| 410556669| 2|0900|26.07.2021 |00:06:59 |155217-A |
|TR10| 410630007| 7|0900|26.07.2021 |00:07:00 |155196-B |
|TR10| 411525402| 5|0900|26.07.2021 |00:07:00 |155114-A |
|TR10| 411525402| 6|0900|26.07.2021 |00:07:01 |155114-A |
|TR10| 411528024| 1|0900|26.07.2021 |00:07:02 |155114-A |
|TR10| 411528024| 2|0900|26.07.2021 |00:07:03 |155114-A |
|TR10| 411528929| 30|0900|26.07.2021 |00:07:04 |155114-A |
|TR10| 411544500| 3|0900|26.07.2021 |00:07:05 |155114-A |
|TR10| 411528928| 8|0905|26.07.2021 |00:10:19 |155123-C |
|TR10| 410279717| 58|0900|26.07.2021 |00:11:48 |155196-A |
|TR10| 411237292| 28|0900|26.07.2021 |00:11:49 |155196-A |
|TR10| 410630007| 8|0900|26.07.2021 |00:11:50 |155196-B |
|TR10| 411237293| 2|0990|26.07.2021 |00:14:14 |155164-A |
|TR10| 410633488| 1|0600|26.07.2021 |00:14:52 |155163-0 |
|TR10| 410633212| 1|0600|26.07.2021 |00:14:59 |155163-0 |
|TR10| 411218828| 2|0600|26.07.2021 |00:15:08 |155163-0 |
|TR10| 411438190| 3|0910|26.07.2021 |00:15:14 |155163-E |
|TR10| 411527748| 1|0910|26.07.2021 |00:15:19 |155163-B |
|TR10| 411367433| 2|0910|26.07.2021 |00:16:17 |155163-D |
|TR10| 411032464| 3|0910|26.07.2021 |00:16:26 |155163-D |
|TR10| 411525402| 7|0900|26.07.2021 |00:16:49 |155114-A |
|TR10| 411528024| 3|0900|26.07.2021 |00:16:50 |155114-A |
|TR10| 411544500| 4|0900|26.07.2021 |00:16:51 |155114-A |
|TR10| 410985740| 26|0900|26.07.2021 |00:16:55 |155196-A |
|TR10| 410279717| 59|0900|26.07.2021 |00:16:56 |155196-A |
|TR10| 411237292| 29|0900|26.07.2021 |00:16:57 |155196-A |
|TR10| 410900407| 2|0040|26.07.2021 |00:17:46 |155135-D |
|TR10| 409944144| 1|0910|26.07.2021 |00:18:47 |155163-C |
|TR10| 411544499| 1|0905|26.07.2021 |00:19:42 |155123-C |
|TR10| 411525401| 5|0905|26.07.2021 |00:19:56 |155123-C |
|TR10| 410985740| 27|0900|26.07.2021 |00:21:47 |155196-A |
|TR10| 410630007| 9|0900|26.07.2021 |00:21:48 |155196-B |
|TR10| 411237292| 30|0900|26.07.2021 |00:21:48 |155196-A |
|TR10| 411544437| 4|0900|26.07.2021 |00:22:22 |155114-A |
|TR10| 411544436| 1|0905|26.07.2021 |00:22:41 |155123-C |
|TR10| 411551402| 2|0005|26.07.2021 |00:24:00 |155115-B |
|TR10| 411362459| 1|0005|26.07.2021 |00:24:52 |155115-B |
|TR10| 411369893| 1|0060|26.07.2021 |00:25:25 |155112-G |
|TR10| 411530629| 1|0005|26.07.2021 |00:25:37 |155115-B |
|TR10| 411369897| 1|0063|26.07.2021 |00:25:40 |155112-F |
|TR10| 411369894| 1|0070|26.07.2021 |00:25:54 |155518-0 |
|TR10| 411369897| 2|0063|26.07.2021 |00:26:02 |155112-F |
|TR10| 411369894| 2|0070|26.07.2021 |00:26:10 |155518-0 |
|TR10| 411369897| 3|0063|26.07.2021 |00:26:21 |155112-F |
|TR10| 411369894| 3|0070|26.07.2021 |00:26:28 |155518-0 |
|TR10| 411369897| 4|0063|26.07.2021 |00:26:37 |155112-F |
|TR10| 411369894| 4|0070|26.07.2021 |00:26:43 |155518-0 |
|TR10| 410950998| 2|0900|26.07.2021 |00:26:45 |155217-A |
+----+---------------+---------------+----+--------------+-------------+------------+
+----+---------------+---------------+----+--------------+-------------+------------+
|Werk|Rückmeldenummer|Rückmeldezähler|AVO |Rückmeldedatum|Rückmeldezeit|Arbeitsplatz|
+----+---------------+---------------+----+--------------+-------------+------------+
|TR10| 410279717| 60|0900|26.07.2021 |00:26:46 |155196-A |
|TR10| 410950998| 3|0900|26.07.2021 |00:26:46 |155217-A |
|TR10| 410630007| 10|0900|26.07.2021 |00:26:47 |155196-B |
|TR10| 411369897| 5|0063|26.07.2021 |00:26:54 |155112-F |
|TR10| 411369894| 5|0070|26.07.2021 |00:27:04 |155518-0 |
|TR10| 411369897| 6|0063|26.07.2021 |00:27:15 |155112-F |
|TR10| 411369894| 6|0070|26.07.2021 |00:27:23 |155518-0 |
|TR10| 411086222| 1|0001|26.07.2021 |00:27:50 |155212-A |
|TR10| 411086223| 1|0005|26.07.2021 |00:27:58 |155210-A |
|TR10| 411520617| 7|0905|26.07.2021 |00:30:28 |155123-C |
|TR10| 411872172| 1|0010|26.07.2021 |00:31:27 |155145-A |
|TR10| 411872177| 1|0010|26.07.2021 |00:31:39 |155145-A |
|TR10| 411528024| 4|0900|26.07.2021 |00:31:50 |155114-A |
|TR10| 411872182| 1|0010|26.07.2021 |00:31:50 |155145-A |
|TR10| 410985740| 28|0900|26.07.2021 |00:31:54 |155196-A |
|TR10| 410279717| 61|0900|26.07.2021 |00:31:55 |155196-A |
|TR10| 411872187| 1|0010|26.07.2021 |00:32:02 |155145-A |
|TR10| 410699054| 1|0060|26.07.2021 |00:32:52 |155112-K |
|TR10| 410699055| 1|0063|26.07.2021 |00:33:01 |155112-L |
|TR10| 410699056| 1|0070|26.07.2021 |00:33:11 |155518-0 |
|TR10| 411434349| 2|0080|26.07.2021 |00:33:18 |155213-F |
|TR10| 410850582| 1|0051|26.07.2021 |00:33:54 |155146-E |
|TR10| 410850583| 1|0055|26.07.2021 |00:34:01 |155146-F |
|TR10| 410850580| 1|0080|26.07.2021 |00:34:09 |155518-0 |
|TR10| 410774889| 1|0050|26.07.2021 |00:34:13 |155171-D |
|TR10| 411243279| 2|0005|26.07.2021 |00:34:27 |155531-A |
|TR10| 411243280| 3|0010|26.07.2021 |00:34:37 |155550-B |
|TR10| 411243281| 1|0020|26.07.2021 |00:34:48 |155550-E |
|TR10| 411228376| 1|0001|26.07.2021 |00:36:15 |155112-D |
|TR10| 410985740| 29|0900|26.07.2021 |00:36:46 |155196-A |
|TR10| 411525402| 8|0900|26.07.2021 |00:36:46 |155114-A |
|TR10| 411237292| 31|0900|26.07.2021 |00:36:47 |155196-A |
|TR10| 411533238| 1|0001|26.07.2021 |00:36:55 |155144-A |
|TR10| 410898440| 2|0010|26.07.2021 |00:37:02 |155171-A |
|TR10| 411533239| 1|0005|26.07.2021 |00:37:02 |155104-A |
|TR10| 411874854| 1|0010|26.07.2021 |00:37:37 |FCM-E |
|TR10| 411032291| 1|0060|26.07.2021 |00:40:09 |155112-G |
|TR10| 411874855| 1|0010|26.07.2021 |00:40:21 |FCM-E |
|TR10| 411032293| 1|0063|26.07.2021 |00:40:35 |155112-F |
|TR10| 411032292| 1|0070|26.07.2021 |00:40:42 |155518-0 |
|TR10| 411032293| 2|0063|26.07.2021 |00:40:51 |155112-F |
|TR10| 411032292| 2|0070|26.07.2021 |00:40:59 |155518-0 |
|TR10| 411032293| 3|0063|26.07.2021 |00:41:08 |155112-F |
|TR10| 411032292| 3|0070|26.07.2021 |00:41:15 |155518-0 |
|TR10| 411032293| 4|0063|26.07.2021 |00:41:25 |155112-F |
|TR10| 411032292| 4|0070|26.07.2021 |00:41:32 |155518-0 |
|TR10| 411032293| 5|0063|26.07.2021 |00:41:41 |155112-F |
|TR10| 410556669| 3|0900|26.07.2021 |00:41:46 |155217-A |
|TR10| 410279717| 62|0900|26.07.2021 |00:41:47 |155196-A |
|TR10| 411237292| 32|0900|26.07.2021 |00:41:48 |155196-A |
|TR10| 411032292| 5|0070|26.07.2021 |00:41:49 |155518-0 |
|TR10| 411032293| 6|0063|26.07.2021 |00:41:59 |155112-F |
|TR10| 411032292| 6|0070|26.07.2021 |00:42:07 |155518-0 |
|TR10| 411535704| 1|0010|26.07.2021 |00:43:40 |155144-A |
|TR10| 411875458| 1|0010|26.07.2021 |00:43:54 |155144-A |
|TR10| 411528024| 5|0900|26.07.2021 |00:46:47 |155114-A |
|TR10| 410985740| 30|0900|26.07.2021 |00:46:48 |155196-A |
|TR10| 410279717| 63|0900|26.07.2021 |00:46:50 |155196-A |
|TR10| 411525401| 6|0905|26.07.2021 |00:46:56 |155123-C |
|TR10| 411528023| 1|0905|26.07.2021 |00:47:30 |155123-C |
+----+---------------+---------------+----+--------------+-------------+------------+
生成了一个类
namespace CsvHelper
{
class Program
{
static void Main(string[] args)
{
ReadCsv();
}
static void ReadCsv()
{
var config = new CsvConfiguration(CultureInfo.InvariantCulture)
{
Delimiter="|"
};
using (var reader = new StreamReader("file.csv"))
using (var csv = new CsvReader(reader, config))
{
var records = csv.GetRecords<SFC>();
}
}
public class SFC
{
public string Werk { get; set; }
public string Rückmeldenummer { get; set; }
public int Rückmeldezähler { get; set; }
public int AVO { get; set; }
public DateTime Rückmeldedatum { get; set; }
public TimeSpan Rückmeldezeit { get; set; }
public string Arbeitsplatz { get; set; }
}
}
}
我怎么能读这个文件到List<SFC>
与CsvHelper?
您的文本文件由以下重复模式组成行:
- 0或多个初始行被忽略。
- 初始分隔线,如
+----+---------------+
|Werk|Rückmeldenummer|
之类的标头。- 另一个分隔符。
|TR10| 410959107|
等数据线- 最终分隔符。
您可以通过跳过初始行然后检查第一个字段以查看它是否"看起来像"来读取这种格式的CSV文件。分隔符,如下所示:
enum ReadState
{
Initial,
InitialDelimiter,
Header,
HeaderDataDelimiter,
Data,
}
public static List<TRecord> ReadCsv<TRecord>(string filename, ClassMap<TRecord> map)
{
List<TRecord> records = new ();
var config = new CsvConfiguration(CultureInfo.InvariantCulture)
{
Delimiter="|", // Fixed Delimeter => Delimiter
PrepareHeaderForMatch = args => args.Header.Trim(), // Added
TrimOptions = TrimOptions.Trim, // Added
};
using (var reader = new StreamReader(filename))
using (var csv = new CsvReader(reader, config))
{
csv.Context.RegisterClassMap(map);
var state = ReadState.Initial;
while (csv.Read())
{
var isDelimiter = csv.GetField(0).StartsWith("+-");
var newState = (isDelimiter, state) switch
{
(true, ReadState.Initial) => ReadState.InitialDelimiter,
(true, ReadState.Header) => ReadState.HeaderDataDelimiter,
//(true, ReadState.HeaderDataDelimiter) => ReadState.Initial, // Uncomment if your CSV file might contain empty tables with headers and delimiters but no data.
(true, ReadState.Data) => ReadState.Initial,
(false, ReadState.Initial) => ReadState.Initial,
(false, ReadState.InitialDelimiter) => ReadState.Header,
(false, ReadState.HeaderDataDelimiter) => ReadState.Data,
(false, ReadState.Data) => ReadState.Data,
_ => throw new ApplicationException(string.Format("Unexpected row on state {0}", state))
};
switch (newState)
{
case ReadState.Header: csv.ReadHeader(); break;
case ReadState.Data: records.Add(csv.GetRecord<TRecord>()); break;
}
state = newState;
}
}
return records;
}
然后为SFC
定义一个classmap,如下所示:
class SFCMap : ClassMap<SFC>
{
public SFCMap() : this(new CsvConfiguration(CultureInfo.InvariantCulture)) {}
public SFCMap(CsvConfiguration config)
{
AutoMap(config);
Map(m => m.Rückmeldedatum).TypeConverterOption.Format("dd.mm.yyyy").TypeConverterOption.DateTimeStyles(DateTimeStyles.AllowWhiteSpaces);
}
}
public class SFC
{
public string Werk { get; set; }
public string Rückmeldenummer { get; set; }
public int Rückmeldezähler { get; set; }
public string AVO { get; set; } // Fixed int => string (so as to not lose leading zeros
public DateTime Rückmeldedatum { get; set; }
public TimeSpan Rückmeldezeit { get; set; } // Fixed Timespan => TimeSpan
public string Arbeitsplatz { get; set; }
}
您将能够将CSV文件读取为List<SFC>
,如下所示:
var records = ReadCsv(filename, new SFCMap());
指出:
您将
AVO
定义为int
,但字段有前导零,例如0800
。因此,我将其类型更改为string
,以便保留这些。解析
Rückmeldedatum
时需要指定"dd.mm.yyyy"
格式。我添加ClassMap<SFC>
是为了提供这个。你"CSV"实际上是一个固定宽度的文件,而不是CSV文件。我的假设是,您希望修整字符串字段周围的格式化空间。如果没有,请删除
TrimOptions = TrimOptions.Trim
。如果您的CSV文件可能包含带有头和分隔符的空表,但没有数据,如:
+----+---------------+---------------+----+--------------+-------------+------------+ |Werk|Rückmeldenummer|Rückmeldezähler|AVO |Rückmeldedatum|Rückmeldezeit|Arbeitsplatz| +----+---------------+---------------+----+--------------+-------------+------------+ +----+---------------+---------------+----+--------------+-------------+------------+
然后取消:
//(true, ReadState.HeaderDataDelimiter) => ReadState.Initial,
另请参阅文档页读取多个数据集,其中讨论了类似的解析问题。
此处演示小提琴