我正在尝试从字符串中删除停止字的列表。该列表使用$ .get在下面的第3行中加载。如果我尝试在$中内置console.log(stop_words)。get函数我可以检索数据。但是它们不会以某种方式添加到array stop_words,以便我可以使用$ .get函数之外的数据。
注意:如果我直接在原型内部声明数组的值。
,下面的代码正常。如何以这样的方式将数据分配给stop_words数组,以便我可以在$ .get函数之外使用它?
String.prototype.removeStopWords = function() {
var stop_words = [];
$.get('rsc/stopord.txt', function(data) {
stop_words = data.split('n');
});
var x;
var y;
var word;
var stop_word;
var regex_str;
var regex;
var cleansed_string = this.valueOf();
// Split out all the individual words in the phrase
words = cleansed_string.match(/[^s]+|s+[^s+]$/g)
// Review all the words
for(x=0; x < words.length; x++) {
// For each word, check all the stop words
for(y=0; y < stop_words.length; y++) {
// Get the current word
word = words[x].replace(/s+|[^a-z]+/ig, ""); // Trim the word and remove non-alpha
// Get the stop word
stop_word = stop_words[y];
// If the word matches the stop word, remove it from the keywords
if(word.toLowerCase() == stop_word) {
// Build the regex
regex_str = "^\s*"+stop_word+"\s*$"; // Only word
regex_str += "|^\s*"+stop_word+"\s+"; // First word
regex_str += "|\s+"+stop_word+"\s*$"; // Last word
regex_str += "|\s+"+stop_word+"\s+"; // Word somewhere in the middle
regex = new RegExp(regex_str, "ig");
// Remove the word from the keywords
cleansed_string = cleansed_string.replace(regex, " ");
}
}
}
return cleansed_string.replace(/^s+|s+$/g, "");
}
function keywordDensity() {
var input = tinyMCE.activeEditor.getContent({format : "text"});
input = input.replace(/(<([^>]+)>)/ig, "").replace(/,/g, "").replace(/-/g, "").replace(/"/g, "").replace(/'/g, "").replace(/./g, " ");
input = input.toLowerCase();
input = input.removeStopWords();
console.log(input);
var keyword = $("#keyword").html();
var wordCounts = { };
var words = input.split(" ");
words = words.filter(Boolean);
for(var i = 0; i < words.length; i++)
wordCounts["_" + words[i]] = (wordCounts["_" + words[i]] || 0) + 1;
keysSorted = Object.keys(wordCounts).sort(function(a,b){return wordCounts[b]-wordCounts[a]})
for(var i = 0; i < keysSorted.length; i++) {
keysSorted[i] = keysSorted[i].replace(/[_-]/g, "");
}
var regexString = keysSorted[0];
var regex = new RegExp("\b" + regexString, 'g');
var countMostUsed = input.match(regex, regexString);
console.log(input.match(regex, regexString));
console.log("You use the word " + keysSorted[0] + " " + countMostUsed.length + " times");
}
as @mistergreen, @pointy和 @phobia82已经评论,您需要在回调中进行处理。或者更好的是,在收到数据后,使用then
。
String.prototype.removeStopWords = function() {
var stop_words = [];
var self = this;
return $.get('stopword.txt', function(data) {
stop_words = data.split('rn');
}).then(function(){
var x;
var y;
var word;
var stop_word;
var regex_str;
var regex;
var cleansed_string = self.valueOf(); // note: we can't use this here
// Split out all the individual words in the phrase
words = cleansed_string.match(/[^s]+|s+[^s+]$/g)
// Review all the words
//for(x=0; x < words.length; x++) {
// For each word, check all the stop words
for(y=0; y < stop_words.length; y++) {
// Get the current word
//word = words[x].replace(/s+|[^a-z]+/ig, ""); // Trim the word and remove non-alpha
// Get the stop word
stop_word = stop_words[y];
// If the word matches the stop word, remove it from the keywords
//if(word.toLowerCase() == stop_word) {
// Build the regex
regex_str = "^\s*"+stop_word+"\s*$"; // Only word
regex_str += "|^\s*"+stop_word+"\s+"; // First word
regex_str += "|\s+"+stop_word+"\s*$"; // Last word
regex_str += "|\s+"+stop_word+"\s+"; // Word somewhere in the middle
regex = new RegExp(regex_str, "ig");
// Remove the word from the keywords
cleansed_string = cleansed_string.replace(regex, " ");
// }
}
//}
return cleansed_string.replace(/^s+|s+$/g, "");
});
}
注意:我不确定为什么您需要在String
中的单个words
上迭代。所以我已经评论了那部分。您可以在此处找到工作代码
您需要更改调用功能的方式,如下所示。
"abc test1 test2 xyz".removeStopWords().then(function(data){
alert(data); // alerts "abc xyz"
// do rest of the processing that is dependent on the return value of `removeStopWords`
});
不建议这样做,因为您的获取请求可以阻止脚本返回结果,但是对于记录,您可以使用此脚本而不是$ .get():
$.ajax({
url: 'rsc/stopord.txt',
success: function(data) {
stop_words = data.split('n');
},
async: false
})
编辑:
您的成功回调是函数:
function(data) {
stop_words = data.split('n');
}
您传递给$ .get()调用。当$ .GET请求已收到响应时,这被调用了,这可能需要一些时间... $ GET之后的代码可以在调用回调之前运行,因此您的问题...