Confluence wiki标记-使用Regex确定表行类型



在convergence wikiv6.0中有两种不同的表类型。

所以我很难通过第一行来确定表类型(我用类似new Regex(@"(|(rn|r|n)(.*?)|)+");的regex检测新行,然后用Matches分割行上的表,但是

表行可能看起来像:

如果其标头

||标题1||标题2||标题3||

如果其常规行

|单元格A1|单元格A2|单元格A3|

并且如果其垂直表行

||标题|单元格B2|单元格B3|

我尝试使用类似^(||.*?|)的表达式,但发现它也适用于标头。

在我尝试使用这个^(||.*?||)之后,由于标头标记功能,但如果它是常规的行

那么,是否可以通过使用Regex来确定行类型,或者至少可以说是垂直行?

还是最好写一些东西来一步一步地处理行?

在不使用regex的情况下编写它,在javascript中,它看起来像

简单字符串扫描仪

var Scanner = (function(){
function Scanner(text){
this.currentString = text.split('');
this.position = 0;
this.errorList = [];
this.getChar = function(){
var me = this,
pos = me.position,
string = me.currentString,
stringLength = string.length;
if(pos < stringLength){
return string[pos];
}
return -1;
};
this.nextChar = function(){
var me = this,
pos = me.position,
string = me.currentString,
stringLength = string.length;
if(pos < stringLength){
me.position++;
return;
}
me.error("EOL reached");
};
this.error = function(errorMsg){
var me = this,
error = "Error at position " + me.position +"nMessage: "+errorMsg+".n";
errors = me.errorList;
errors.push[error];
};      
return this;
};
return Scanner;
})();

简单解析器

/**
LINE ::= { CELL }
CELL ::= '|' CELL1
CELL1 ::= HEADER_CELL | REGULAR_CELL
HEADER_CELL ::=  '|'  TEXT
REGULAR_CELL ::=  TEXT
*/
function RowParser(){
this.scanner = {}; 
this.rawText = "";
this.cellsData = [];
return this;
};
RowParser.prototype = {
parseRow: function(row){
var me = this;
me.scanner = new Scanner(row);
me.rawText = row;
me.cellsData = [];
me.proceedNext();
},
proceedNext: function(){
var me = this,
scanner = me.scanner;
while(scanner.getChar() === '|'){
me.proceedCell();
}
if (scanner.getChar() !== -1)
{
scanner.error("EOL expected, "+ scanner.getChar() +" got");
}
return;
},
proceedCell: function(){
var me = this,
scanner = me.scanner;
if(scanner.getChar() === '|'){
scanner.nextChar();
me.proceedHeaderCell();
}
},
proceedHeaderCell: function(){
var me = this,
scanner = me.scanner;
if(scanner.getChar() === '|'){
me.onHeaderCell();
} else { 
me.onRegularCell();
}
},
onHeaderCell: function(){
var me = this,
scanner = me.scanner,
cellType = TableCellType.info,
cellData = {
type: cellType.Header
}
if(scanner.getChar() === '|'){
scanner.nextChar();
me.proceedInnerText(cellType.Header);
}else{
scanner.error("Expected '|' got "+ currentChar +".");
}           
},
onRegularCell:function(){
var me = this,
scanner = me.scanner,
cellType = TableCellType.info;
me.proceedInnerText(cellType.Regular);  
},  
proceedInnerText: function(cellType){
var me = this,
scanner = me.scanner,
typeData = TableCellType.getValueById(cellType),
innerText = [];
while(scanner.getChar() !== '|' && scanner.getChar() !== -1){
innerText.push(scanner.getChar());
scanner.nextChar();
}           
me.cellsData.push({
typeId: typeData.id,
type: typeData.name,
text: innerText.join("")
});
me.proceedNext();       
},
getRowData: function(){
var me = this,
scanner = me.scanner,
data = me.cellsData,
emptyCell;
//Proceed cell data
//if there no empty cell in the end - means no close tag
var filteredData = data.filter(function(el){
return el.text.length !== 0;
});
if(filteredData.length === data.length){
scanner.error("No close tag at row "+ me.rawText +".");
return;
}           
for (var i = 0; i < filteredData.length; i++) {
filteredData[i].text = filteredData[i].text.trim();
}
return filteredData;
}
};

上面提到的CellTypeEnum

var TableCellType = {
info:{
Regular: 10,
Header: 20
},
data:[
{
id: 10,
name: "regular"
},
{
id: 20,
name: "header"
}
],
getValueById: function(id){
var me = this,
data = me.data,
result = data.filter(function(el){
return el.id === id;
});
return result[0];   
}       
}

用法:

var rowParser = new RowParser();
var row = "||AAA||BBB||CCC||n|Hi|all|people!|";
rowParser.parseRow(row);
var result = rowParser.getRowData();

最新更新