JavaScript 加载 txt 单词列表不会填充数组



我正在尝试从字符串中删除停止字的列表。该列表使用$ .get在下面的第3行中加载。如果我尝试在$中内置console.log(stop_words)。get函数我可以检索数据。但是它们不会以某种方式添加到array stop_words,以便我可以使用$ .get函数之外的数据。

注意:如果我直接在原型内部声明数组的值。

,下面的代码正常。

如何以这样的方式将数据分配给stop_words数组,以便我可以在$ .get函数之外使用它?

String.prototype.removeStopWords = function() {
    var stop_words = [];
    $.get('rsc/stopord.txt', function(data) {
        stop_words = data.split('n');
    });
    var x;
    var y;
    var word;
    var stop_word;
    var regex_str;
    var regex;
    var cleansed_string = this.valueOf();
    // Split out all the individual words in the phrase
    words = cleansed_string.match(/[^s]+|s+[^s+]$/g)
    // Review all the words
    for(x=0; x < words.length; x++) {
        // For each word, check all the stop words
        for(y=0; y < stop_words.length; y++) {
            // Get the current word
            word = words[x].replace(/s+|[^a-z]+/ig, "");   // Trim the word and remove non-alpha
            // Get the stop word
            stop_word = stop_words[y];
            // If the word matches the stop word, remove it from the keywords
            if(word.toLowerCase() == stop_word) {
                // Build the regex
                regex_str = "^\s*"+stop_word+"\s*$";      // Only word
                regex_str += "|^\s*"+stop_word+"\s+";     // First word
                regex_str += "|\s+"+stop_word+"\s*$";     // Last word
                regex_str += "|\s+"+stop_word+"\s+";      // Word somewhere in the middle
                regex = new RegExp(regex_str, "ig");
                // Remove the word from the keywords
                cleansed_string = cleansed_string.replace(regex, " ");
             } 
        }
    }
    return cleansed_string.replace(/^s+|s+$/g, "");
}
function keywordDensity() {
    var input = tinyMCE.activeEditor.getContent({format : "text"});
    input = input.replace(/(<([^>]+)>)/ig, "").replace(/,/g, "").replace(/-/g, "").replace(/"/g, "").replace(/'/g, "").replace(/./g, " ");
    input = input.toLowerCase();
    input = input.removeStopWords();
    console.log(input);
    var keyword = $("#keyword").html();
    var wordCounts = { };
    var words = input.split(" ");
    words = words.filter(Boolean);
    for(var i = 0; i < words.length; i++)
        wordCounts["_" + words[i]] = (wordCounts["_" + words[i]] || 0) + 1;
    keysSorted = Object.keys(wordCounts).sort(function(a,b){return wordCounts[b]-wordCounts[a]})
    for(var i = 0; i < keysSorted.length; i++) {
        keysSorted[i] = keysSorted[i].replace(/[_-]/g, "");
    }
    var regexString = keysSorted[0];
    var regex = new RegExp("\b" + regexString, 'g');
    var countMostUsed = input.match(regex, regexString);
    console.log(input.match(regex, regexString));
    console.log("You use the word " + keysSorted[0] + " " + countMostUsed.length + " times");
     }

as @mistergreen, @pointy和 @phobia82已经评论,您需要在回调中进行处理。或者更好的是,在收到数据后,使用then

String.prototype.removeStopWords = function() {
    var stop_words = [];
    var self = this;
    return $.get('stopword.txt', function(data) {
        stop_words = data.split('rn');
    }).then(function(){
      var x;
      var y;
      var word;
      var stop_word;
      var regex_str;
      var regex;
      var cleansed_string = self.valueOf(); // note: we can't use this here
      // Split out all the individual words in the phrase
      words = cleansed_string.match(/[^s]+|s+[^s+]$/g)
      // Review all the words
      //for(x=0; x < words.length; x++) {
          // For each word, check all the stop words
          for(y=0; y < stop_words.length; y++) {
              // Get the current word
              //word = words[x].replace(/s+|[^a-z]+/ig, "");   // Trim the word and remove non-alpha
              // Get the stop word
              stop_word = stop_words[y];
              // If the word matches the stop word, remove it from the keywords
              //if(word.toLowerCase() == stop_word) {
                  // Build the regex
                  regex_str = "^\s*"+stop_word+"\s*$";      // Only word
                  regex_str += "|^\s*"+stop_word+"\s+";     // First word
                  regex_str += "|\s+"+stop_word+"\s*$";     // Last word
                  regex_str += "|\s+"+stop_word+"\s+";      // Word somewhere in the middle
                  regex = new RegExp(regex_str, "ig");
                  // Remove the word from the keywords
                  cleansed_string = cleansed_string.replace(regex, " ");
              // } 
          }
      //}
      return cleansed_string.replace(/^s+|s+$/g, "");
    });
}

注意:我不确定为什么您需要在String中的单个words上迭代。所以我已经评论了那部分。您可以在此处找到工作代码

您需要更改调用功能的方式,如下所示。

"abc test1 test2 xyz".removeStopWords().then(function(data){
  alert(data); // alerts "abc xyz"
  // do rest of the processing that is dependent on the return value of `removeStopWords`
});

不建议这样做,因为您的获取请求可以阻止脚本返回结果,但是对于记录,您可以使用此脚本而不是$ .get():

$.ajax({
    url: 'rsc/stopord.txt',
    success: function(data) {
        stop_words = data.split('n');
    },
    async: false
})

编辑:

您的成功回调是函数:

function(data) {
    stop_words = data.split('n');
}

您传递给$ .get()调用。当$ .GET请求已收到响应时,这被调用了,这可能需要一些时间... $ GET之后的代码可以在调用回调之前运行,因此您的问题...

最新更新