在convergence wikiv6.0中有两种不同的表类型。
所以我很难通过第一行来确定表类型(我用类似new Regex(@"(|(rn|r|n)(.*?)|)+");
的regex检测新行,然后用Matches
分割行上的表,但是
表行可能看起来像:
如果其标头
||标题1||标题2||标题3||
如果其常规行
|单元格A1|单元格A2|单元格A3|
并且如果其垂直表行
||标题|单元格B2|单元格B3|
我尝试使用类似^(||.*?|)
的表达式,但发现它也适用于标头。
在我尝试使用这个^(||.*?||)
之后,由于标头标记功能,但如果它是常规的行
那么,是否可以通过使用Regex
来确定行类型,或者至少可以说是垂直行?
还是最好写一些东西来一步一步地处理行?
在不使用regex的情况下编写它,在javascript
中,它看起来像
简单字符串扫描仪
var Scanner = (function(){
function Scanner(text){
this.currentString = text.split('');
this.position = 0;
this.errorList = [];
this.getChar = function(){
var me = this,
pos = me.position,
string = me.currentString,
stringLength = string.length;
if(pos < stringLength){
return string[pos];
}
return -1;
};
this.nextChar = function(){
var me = this,
pos = me.position,
string = me.currentString,
stringLength = string.length;
if(pos < stringLength){
me.position++;
return;
}
me.error("EOL reached");
};
this.error = function(errorMsg){
var me = this,
error = "Error at position " + me.position +"nMessage: "+errorMsg+".n";
errors = me.errorList;
errors.push[error];
};
return this;
};
return Scanner;
})();
简单解析器
/**
LINE ::= { CELL }
CELL ::= '|' CELL1
CELL1 ::= HEADER_CELL | REGULAR_CELL
HEADER_CELL ::= '|' TEXT
REGULAR_CELL ::= TEXT
*/
function RowParser(){
this.scanner = {};
this.rawText = "";
this.cellsData = [];
return this;
};
RowParser.prototype = {
parseRow: function(row){
var me = this;
me.scanner = new Scanner(row);
me.rawText = row;
me.cellsData = [];
me.proceedNext();
},
proceedNext: function(){
var me = this,
scanner = me.scanner;
while(scanner.getChar() === '|'){
me.proceedCell();
}
if (scanner.getChar() !== -1)
{
scanner.error("EOL expected, "+ scanner.getChar() +" got");
}
return;
},
proceedCell: function(){
var me = this,
scanner = me.scanner;
if(scanner.getChar() === '|'){
scanner.nextChar();
me.proceedHeaderCell();
}
},
proceedHeaderCell: function(){
var me = this,
scanner = me.scanner;
if(scanner.getChar() === '|'){
me.onHeaderCell();
} else {
me.onRegularCell();
}
},
onHeaderCell: function(){
var me = this,
scanner = me.scanner,
cellType = TableCellType.info,
cellData = {
type: cellType.Header
}
if(scanner.getChar() === '|'){
scanner.nextChar();
me.proceedInnerText(cellType.Header);
}else{
scanner.error("Expected '|' got "+ currentChar +".");
}
},
onRegularCell:function(){
var me = this,
scanner = me.scanner,
cellType = TableCellType.info;
me.proceedInnerText(cellType.Regular);
},
proceedInnerText: function(cellType){
var me = this,
scanner = me.scanner,
typeData = TableCellType.getValueById(cellType),
innerText = [];
while(scanner.getChar() !== '|' && scanner.getChar() !== -1){
innerText.push(scanner.getChar());
scanner.nextChar();
}
me.cellsData.push({
typeId: typeData.id,
type: typeData.name,
text: innerText.join("")
});
me.proceedNext();
},
getRowData: function(){
var me = this,
scanner = me.scanner,
data = me.cellsData,
emptyCell;
//Proceed cell data
//if there no empty cell in the end - means no close tag
var filteredData = data.filter(function(el){
return el.text.length !== 0;
});
if(filteredData.length === data.length){
scanner.error("No close tag at row "+ me.rawText +".");
return;
}
for (var i = 0; i < filteredData.length; i++) {
filteredData[i].text = filteredData[i].text.trim();
}
return filteredData;
}
};
上面提到的CellTypeEnum
var TableCellType = {
info:{
Regular: 10,
Header: 20
},
data:[
{
id: 10,
name: "regular"
},
{
id: 20,
name: "header"
}
],
getValueById: function(id){
var me = this,
data = me.data,
result = data.filter(function(el){
return el.id === id;
});
return result[0];
}
}
用法:
var rowParser = new RowParser();
var row = "||AAA||BBB||CCC||n|Hi|all|people!|";
rowParser.parseRow(row);
var result = rowParser.getRowData();