将带有坐标的文档中的单词数组转换为句子



我在文档中有一个单词数组及其坐标,我想将它们变成句子。 我的数组输入:

[
{
"bounds": [
{
"x": 10,
"y": 10
},
{
"x": 15,
"y": 10
},
{
"x": 15,
"y": 15
},
{
"x": 10,
"y": 15
}
],
"desc": "Hey"
},
{
"bounds": [
{
"x": 18,
"y": 10
},
{
"x": 24,
"y": 10
},
{
"x": 24,
"y": 15
},
{
"x": 18,
"y": 15
}
],
"desc": "Name"
},
{
"bounds": [
{
"x": 18,
"y": 20
},
{
"x": 24,
"y": 20
},
{
"x": 24,
"y": 25
},
{
"x": 18,
"y": 25
}
],
"desc": "What"
},
{
"bounds": [
{
"x": 18,
"y": 20
},
{
"x": 24,
"y": 20
},
{
"x": 24,
"y": 25
},
{
"x": 18,
"y": 25
}
],
"desc": "Sup"
}
]

程序输出应为:

Hey Name
What Sup
  • 坐标不准确只是一个例子,算法还需要处理句子中间的单词和其他极端情况。

我能做到的最好方法是什么(理想情况下用JavaScript实现(?

您可以使用哈希表并按行和位置对其进行排序,然后按此顺序获取文本。

var data = [{ bounds: [{ x: 10, y: 10 }, { x: 15, y: 10 }, { x: 15, y: 15 }, { x: 10, y: 15 }], desc: "Hey" }, { bounds: [{ x: 18, y: 10 }, { x: 24, y: 10 }, { x: 24, y: 15 }, { x: 18, y: 15 }], desc: "Name" }, { bounds: [{ x: 18, y: 20 }, { x: 24, y: 20 }, { x: 24, y: 25 }, { x: 18, y: 25 }], desc: "What" }, { bounds: [{ x: 18, y: 20 }, { x: 24, y: 20 }, { x: 24, y: 25 }, { x: 18, y: 25 }], desc: "Sup" }],
hash = {},
result;
data.forEach(function (a) {
hash[a.bounds[0].y] = hash[a.bounds[0].y] || {};
hash[a.bounds[0].y][a.bounds[0].x] = hash[a.bounds[0].y][a.bounds[0].x] || [];
hash[a.bounds[0].y][a.bounds[0].x].push({ desc: a.desc, end: a.bounds[2] });
});
result = Object.keys(hash)
.sort((a, b) => a - b)
.map(k => Object.keys(hash[k])
.sort((a, b) => a - b)
.reduce((r, l) => [...r, ...hash[k][l].map(c => c.desc)], [])
.join(' ')
)
.join('n');

console.log(result);
console.log(hash);
.as-console-wrapper { max-height: 100% !important; top: 0; }

最新更新