受iOS7 iMessage下一个单词预测的启发,我决定尝试编写一个脚本,根据用户输入,了解哪些单词/字母最有可能完成用户的当前单词,或者下一个最有可能需要哪个单词。
为了做到这一点,我将使用一个非常类似于Radix Tree(又名Patricia Trie)的数据结构。
以这个用户输入为例:
我喜欢冰淇淋
由此,我的目标是生成以下数据结构:
var speakData = {
"I": { //the key
value: "I", //the stored value for this unit of the combination
count: 1, //the number of times that this combination has occured
followables: { // the next level of the tree; all combinations
// that might follow this one
" ": {
value: " ",
count: 1,
followables: {
"l": {
value: "l",
count: 1,
followables: {
"i": {
value: "i",
count: 1,
followables: {
"k": {
value: "k",
count: 1,
followables: {
"e": {
value: "e",
count: 1,
followables: {
// and so on
}
}
}
}
}
}
}
}
}
}
}
}
}
这本质上是一个基数树,带有一些额外的信息,使我能够权衡用户下一步可能想要键入的习得可能性的概率。
根据上述极其有限的数据集,当用户键入"I"时,我们最好(也是唯一)的猜测是下一个字符将是"。
既然我已经解释了我的目标和方法,下面是我的问题:
如何根据任何给定的用户输入构建此数据结构?
function learn(message, brain){
for(var i = 0; i < message.length; i++){
brain[message[i]] = {};
brain[message[i]].value = message[i];
brain[message[i]].count++;
brain[message[i]].followables =
}
}
这是我所了解的,但我不知道如何递归地将下一个值插入正确的位置。
只需制作一个简单的递归函数,如下所示:
function learn(message, brain){
if(message.length == 0) return {}; // or do something else
var ch = message[0]; // get the first character
if(!brain[ch]) { // create new node when not exists
brain[ch] = {value:ch,count:1,followables:{}};
} else { // increment count when exist
brain[ch].count += 1;
}
var substr = message.substring(1); // remove first character
if(substr) { // do it for the remaining substring
brain[ch].followables = learn(substr,brain[ch].followables)
}
return brain;
}
当然,您也可以使它迭代,而不是递归。
// test code:
var brain = {};
brain = learn('test',brain);
brain = learn('testing',brain);
brain = learn('tes',brain);
brain = learn('yay',brain);
console.log(JSON.stringify(brain, null, 2));
会输出这样的东西:
{
"t": {
"value": "t",
"count": 3,
"followables": {
"e": {
"value": "e",
"count": 3,
"followables": {
"s": {
"value": "s",
"count": 3,
"followables": {
"t": {
"value": "t",
"count": 2,
"followables": {
"i": {
"value": "i",
"count": 1,
"followables": {
"n": {
"value": "n",
"count": 1,
"followables": {
"g": {
"value": "g",
"count": 1,
"followables": {}
}
}
}
}
}
}
}
}
}
}
}
}
},
"y": {
"value": "y",
"count": 1,
"followables": {
"a": {
"value": "a",
"count": 1,
"followables": {
"y": {
"value": "y",
"count": 1,
"followables": {}
}
}
}
}
}
}