所以我得到了这个文件:
A-4 09-20-2083 B/2
A/4 04/27/2048 C-2
A/2b 1/24/75 A/4b
A/4 05/07/2049 A/2b
A/2b 7/ 3/31 B/2
A/2b 5/11/53 B-4b
C-4 14-07-2051 A/2
C-2b 22-12-44 A/4
C-4b 11-11-2055 C.2
A/2b 5/11/16 C-2
C.4 08.01.2058 A-4b
B-4b 2010-11- 2 B/2b
B/2b 26/ 3/13 A-4b
C-2 18-06-06 B-4
C.2 08.08.09 C-2
A/2 01/20/99 B-2b
C.4 18.08.2077 B/2
A-4b 10-21-2086 B/4b
B/2b 50/ 2/ 5 A/2
A/4 03/08/2001 A-2b
A/4b 11/ 8/2085 B/4
A/4 03/19/2058 C.2b
A/4b 6/18/2051 B-2
A-4 03-01-2001 B-2b
A-4 02-14-2066 C-4
A/2 06/23/48 A-4
C-4b 18- 8-2065 B/2b
C-2 06-04-94 B/4
B/4b 2057/ 9/21 A-2b
C.2b 21. 8.31 A/2b
A/4b 11/13/2098 A/4
A/2 04/31/63 C-2
B/2b 22/ 5/ 7 A-2
C-4 11-09-2032 B/4b
C.4 08.05.2085 B-2
B-4 2038-10-22 C.4
A/2 03/09/37 C.2b
A/2b 4/ 9/21 A/4
C.2b 3.12.95 B-4
C-4b 26- 1-2004 C-2
正如你所看到的,一些日期或行有随机的空白。。。那么,如何使用regex或任何其他阅读器正确读取此输入呢?非常感谢!
读取文件中的每一行,然后使用regex"\s+";(由多个空格组成的字符集(。
编辑:我的坏,没有注意到日期值中的空格。您可以使用";[]{2;作为2个或2个以上空格的正则表达式。
编辑2这是从文件中检索每个数据的完整代码
public static void read(String fileName){
//create file object
File file = new File(fileName);
//create file reader
FileReader fileReader = null ;
//define buffered reader
BufferedReader bufferedReader = null ;
try {
fileReader = new FileReader(file);
bufferedReader = new BufferedReader(fileReader);
String line;
while((line = bufferedReader.readLine()) != null){
//split lines into tokens (values) based on whitespace using regular expression "[ ]{2,}"
//to indicate two ore more instances of whitespace
String[] dataArray = line.trim().split("[ ]{2,}");
//iterate over array of strings
for(int i = 0 ; i< dataArray.length ; i++) {
//get each value as a string
String dateString = dataArray[i];
if(i==1) {
System.out.println(dateString);
}//prints out date in each line
}
}
} catch (FileNotFoundException e) {
// gets and prints filename
System.out.println("Sorry, " + file.getName() + " not found.");
} catch (IOException e) {
// prints the error message and info about which line
e.printStackTrace();
}finally {
//regardless, close file objects
try {
fileReader.close();
bufferedReader.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
这里有一个快速而肮脏的解决方案,只需使用扫描仪即可工作。有些边缘情况可能会失败,但至少在给定的输入以及某些变体的情况下,它似乎可以正常工作。
import java.util.Scanner;
import java.util.Arrays;
class Main
{
public static void main (String args[])
{
String input =
"A-4 09-20-2083 B/2nA/4 04/27/2048 C-2nA/2b 1/24/75 A/4bnA/4 05/07/2049 A/2bnA/2b 7/ 3/31 B/2nA/2b 5/11/53 B-4bnC-4 14-07-2051 A/2nC-2b 22-12-44 A/4nC-4b 11-11-2055 C.2nA/2b 5/11/16 C-2nC.4 08.01.2058 A-4bnB-4b 2010-11- 2 B/2bnB/2b 26/ 3/13 A-4bnC-2 18-06-06 B-4nC.2 08.08.09 C-2nA/2 01/20/99 B-2bnC.4 18.08.2077 B/2nA-4b 10-21-2086 B/4bnB/2b 50/ 2/ 5 A/2nA/4 03/08/2001 A-2bnA/4b 11/ 8/2085 B/4nA/4 03/19/2058 C.2bnA/4b 6/18/2051 B-2nA-4 03-01-2001 B-2bnA-4 02-14-2066 C-4nA/2 06/23/48 A-4nC-4b 18- 8-2065 B/2bnC-2 06-04-94 B/4nB/4b 2057/ 9/21 A-2bnC.2b 21. 8.31 A/2bnA/4b 11/13/2098 A/4nA/2 04/31/63 C-2nB/2b 22/ 5/ 7 A-2nC-4 11-09-2032 B/4bnC.4 08.05.2085 B-2nB-4 2038-10-22 C.4nA/2 03/09/37 C.2bnA/2b 4/ 9/21 A/4nC.2b 3.12.95 B-4nC-4b 26- 1-2004 C-2n";
Scanner scanner = new Scanner(input);
while (scanner.hasNextLine())
{
String line = scanner.nextLine().trim();
String[] words = line.split(" ");
String[] filtered = Arrays.stream(words).filter(s -> !s.isEmpty()).toArray(String[]::new);
String first = filtered[0].trim ();
String third = filtered[filtered.length - 1].trim();
String[] dateArray = Arrays.copyOfRange(filtered, 1, filtered.length - 1);
String second = String.join("", dateArray);
System.out.println (first + "||" + second + "||" + third);
}
scanner.close();
}
}
如果您需要快速编写。。。
String input = Files.readString(Path.of("path\to\your\file.txt"));
//try-catch block omitted
Pattern.compile("(.+?)\s+(.+?)\s+(.+?)\s*n")
.matcher(input)
.results()
.map(m -> {
String firstGroupWithLetter = m.group(1); //"A-4" from first line
String date = m.group(2); //"09-20-2083"
String secondGroupWithLetter = m.group(3); //"B/2"
// m.group(0) is reserved for full match result or kind of that
// Do whatever you want
return null;
})
// further actions
;