

Financial Institution   : LOREMIPSOM      - 019223
FX Settlement Date      : 10.02.2021
Reconciliation File ID  : 801-288881-0005543759-00001
Transaction Currency    : AZN
Reconciliation Currency : USD

!         Settlement Category          ! Transaction Amount ! Reconciliation Amnt !          Fee Amount !  Transaction Amount ! Reconciliation Amnt !          Fee Amount !   Count !           Net Value !
!                                      !             Credit !              Credit !              Credit !               Debit !               Debit !               Debit !   Total !                     !
! MC Acq Fin Detail ATM Out                           5.00                   3.57                 49.75                  0.00                  0.00                  0.00        31                  3.32 !
! MC Acq Fin Detail Retail Out                        5.40                 262.01                  0.00                  0.00                  0.00                 -3.96        10                258.05 !
Totals :                           10.40                 265.58                 49.75                  0.00                  0.00                 -3.96        41                261.37

Financial Institution   : LOREMIPSOM      - 019223
FX Settlement Date      : 10.02.2021
Reconciliation File ID  : 801-288881-0005543759-00002
Transaction Currency    : EUR
Reconciliation Currency : USD

!         Settlement Category          ! Transaction Amount ! Reconciliation Amnt !          Fee Amount !  Transaction Amount ! Reconciliation Amnt !          Fee Amount !   Count !           Net Value !
!                                      !             Credit !              Credit !              Credit !               Debit !               Debit !               Debit !   Total !                     !
! Fee Collection Inc                                  0.00                   0.00                  0.00                  0.00                  0.00                  0.00         0                  0.00 !
! Fee Collection Inc                                  0.00                   0.00                  0.00                  8.00                  0.00                  0.00         0                  0.00 !
! Fee Collection Inc                                  0.00                   0.00                  0.00                  0.00                  0.00                  0.00         0                  0.00 !
! Fee Collection Inc                                  0.00                   0.00                  0.00                 -1.00                  0.00                  0.00         0                  0.00 !
Totals :                            0.00                   0.00                  0.00                  7.00                  0.00                  0.00         0                  0.00

我在想解析它手动,我想也许有一个更好的方法。和解析数据所以我需要每个数据合理的解析,所以我需要解析文件,得到每个数据除了+ -符号。此外,文件结构不会改变,所以列总是在那里(固定)。文件可以看到银行相关文件(过渡语)。这就是所谓的"金融机构"比如我的地图和其他数据。"结算Category"本"金融机构"是"MC获取Fin Detail ATM out";为例。解析文件的最佳方法是什么?

您可以通过使用正则表达式一次解析一行来实现。使用RegEx并查找一些已知的模式,您可以将当前行应用于RegEx. match()调用,它将返回在括号组中捕获的所有部分的列表。这就避免了在整个过程中进行复杂的IndexOf()搜索等操作。


在做内联正则表达式示例和测试表达式以查看它们是如何工作时,可以找到一个这样的工具来测试您计划解析的内容,并且它允许您在描述它正在寻找的内容时进行调试和逐步执行。您可以发布我在代码中的模式,看看它们是如何描述的,并通过从示例文件 中放入一些示例文本进行调试。这个来自StackOverflow的链接也有助于在下一个"标记"之前获得多个单词的可能性。识别下一节的分隔符



private void TryRegParse()
if (!File.Exists("TestingRegex.txt"))
// read the text content into already parsed individual lines
var txtLines = File.ReadAllLines("TestingRegex.txt");
// the "*" indicates zero or more spaces before whatever is following it.
var patFinancial = @"^.*?Financial Institution.*?:.*?(?<FinInst>.+?-).*?(?<FinAccnt>.*)";
// Explanation of what I have here for the pattern
// ^ = start of the string
// .*? = zero OR more possible white space/tab charaters
// Financial Institution = find this exact string
// .*?:  = there may be zero or more white-space/tab before coming up to the ":" character
// .*? and additional check for zero or more white spaces
// (?<FinInst>.+?-) = 
//  using the outer (parens) allows Regular expression to pull the extracted portion into a group results
//      the ?<FinInst> allows this "group" to be recognized by the name "FinInst" see shortly
//      . indicates a single character 
//      the +?- means keep look ahead from where you are now for UNTIL you get to the - character (whatever appears after the ?)
//      This allows you to get multiple possible word(s) / names up to the actual hyphen
//      .*?:  = another instance there may be zero or more white-space/tab before the final data
//      (?<FinAccnt>.*) = parens indicate another group, similarly named like ?<FinInst> above 
// create a regular expression object of just this specific pattern
var RegExFinInst = new Regex( patFinancial );

// Now, prepare another string line to parse and its regular expression object to match against.
// for Dates, https://regexland.com/regex-dates/ had a good clarification, but since your dates
// appear in month.day.year format, I had to alter  
var patFXSettlement = @"^.*?FX Settlement Date.*?:.*?(?<sMonth>(0[1-9]|1[0-2])).(?<sDay>(0[1-9]|[12][0-9]|3[01])).(?<sYear>d{4})";
// each pattern, just creating a regular expression of its corresponding pattern to match
var RegSettle= new Regex(patFXSettlement);
// same here on last 2 samples
var patReconFile = @"^.*?Reconciliation File ID.*?:.*?(?<FileId>.*)";
var RegRecon= new Regex(patReconFile);
var patTxnCurr = @"^.*?Transaction Currency.*?:.*?(?<Currency>[A-Z]{3}).*";
var RegTxnCurr = new Regex(patTxnCurr);
// go through each line
foreach ( var s in txtLines )
// see if the current line "matches" the Financial Institution pattern
// As you can see from the "named" groups, you can get without having to
// know what ordinal number the group is within the expression, you can get by its name
var hasMatch = RegExFinInst.Match(s);
if( hasMatch.Success )
MessageBox.Show("Financial Institution Group: " + hasMatch.Groups["FinInst"] + "rn"
+ "Account: " + hasMatch.Groups["FinAccnt"]);
// done with this line
// if not, try the next, and next and next
hasMatch = RegSettle.Match(s);
if( hasMatch.Success )
MessageBox.Show("FX Settlement Month: " + hasMatch.Groups["sMonth"]
+ "  Day: " + hasMatch.Groups["sDay"]
+ " Year: " + hasMatch.Groups["sYear"] );
// done with this line
hasMatch = RegRecon.Match(s);
if (hasMatch.Success)
MessageBox.Show("Reconcilliation File: " + hasMatch.Groups["FileId"] );
// done with this line
hasMatch = RegTxnCurr.Match(s);
if (hasMatch.Success)
MessageBox.Show("Transaction Currency: " + hasMatch.Groups["Currency"]);
// done with this line


IRuntimeServices runtimeServices = new RuntimeServices();
List<string> transactionTitles = new();
List<string> transactionDetails = new();
string constText = "Financial Institution";
bool isTitleFinished = false;
int counterTable = 0;
int counterTitle = 0;
for (int i = 0, j = i; i < text.Length; i++)
if (text[i] == '+' && !isTitleFinished)
Helper.AddItem(transactionTitles, text, j, counterTitle);
isTitleFinished = true;
j = i;
counterTitle = 0;
else if(!isTitleFinished && text[i] != '+')
counterTitle ++;
if (isTitleFinished)
if (text.Length >= i + constText.Length || text.IsLastIndex(i))
Helper.AddItem(transactionDetails, text, j,null);
else if (text.IsSubStrEqualToSpecificStr(i,constText))
Helper.AddItem(transactionDetails, text, j, counterTable);
isTitleFinished = false;
counterTable = 0;
j = i;
ICollection<Transaction> transactions = new List<Transaction>();
for (int i = 0; i < transactionTitles.Count; i++)
string[] titlePairs = transactionTitles[i]
.Split(new char[] { 'n', 'r' }, 

Dictionary<string, string> transactionTitlesDict = new ();
for (int j = 0; j < titlePairs.Length; j++)
string[] nameAndValue = titlePairs[j].Split(":");
transactionTitlesDict.Add(nameAndValue[0].Trim(), nameAndValue[1].Trim());
Transaction transaction = runtimeServices

string[] detailPairs = transactionDetails[i]
.Split(new char[] { 'n', 'r' },
string[] detailTitlesPart1 = detailPairs[1]
.Split(new char[] { 'n', 'r','!' },
string[] detailTitlesPart2 = detailPairs[2]
.Split(new char[] { 'n', 'r', '!' },
IList<string> transactionDetailsTitles = new List<string>();
if(detailTitlesPart1.Length != detailTitlesPart2.Length)
throw new Exception("Invalid format");
for (int p = 0; p < detailTitlesPart1.Length; p++)
.Add($"{detailTitlesPart1[p].Trim()} {detailTitlesPart2[p].Trim()}");
IList<string[]> transactionDetailsData = new List<string[]>();
for (int k = 4; k < detailPairs.Count() - 2; k++)
string[] data = detailPairs[k]
.Split(new[] { "  ","!" },
Dictionary<string, string> transactionDetailsDict = new();
foreach (string[] transactionDetailDataRow in transactionDetailsData)
for (int l = 0; l < transactionDetailsTitles.Count; l++)
if (transactionDetailDataRow.Count() != transactionDetailsTitles.Count)
throw new Exception("Invalid format");
.Add(transactionDetailsTitles[l].Trim(), transactionDetailDataRow[l].Trim());
// Don't pay attention to this part
SettlementDetail settlementDetail = runtimeServices
