Javascript:If()在循环内或基于带有全局或粘性标志的regex对象时是不可靠的



在测试另一个问题时,我遇到了一个奇怪的怪癖。不知道,如果它与其他两个问题有关,但我不这么认为。

我的脚本遍历图像URL列表(取自文本区域输入(,并分析每个URL中定义的结构,以找到某种类型的图像ID。基本上,对输入的url进行测试,看看它是否包含一些正则表达式或关键字。但是,当我对同一个URL进行多次迭代时,同一个if语句会产生不同的结果。有时它能找到ID,有时却找不到。

我最好用一个小片段来说明这一点:

如果我把if-else语句拆分成单独的if语句,那也没什么区别。如果我使用for(i=0; i < urls.length; i++)循环而不是for of循环来遍历URL,也没有什么区别。

你知道这里发生了什么吗?

js代码片段

不工作(如果在函数内部的循环中(:

function preprocessImgURL(url) {
const urls = url.trim().split(/r?n/);
const aRegex = /alphad{9,11}z/gi;
const cRegex = /d{6}_[a-zA-Z]{2,3}-d{5,8}/gi;

for (let urli = 0; urli < urls.length; urli++) {
console.log('nnPROCESSING NEXT URL');

console.log(urls[urli]);
if(urls[urli].includes("channelB-")) {
console.log('Row ' + urli + ' is B Domain + B URL.');
} else if(aRegex.test(urls[urli])) {
console.log('Row ' + urli + ' is A URL.');
} else if(cRegex.test(urls[urli])) {
console.log('Row ' + urli + ' is C URL.');
} else {
console.log('Row ' + urli + ' doesnt match any criteria. (else statement reached)');
console.log('Row ' + urli + ' matches b criteria: ' + ( urls[urli].includes("channelB-") ));
console.log('Row ' + urli + ' matches aRegex: ' + aRegex.test(urls[urli]));
console.log('Row ' + urli + ' matches cRegex: ' + cRegex.test(urls[urli]));
}
}
}

exampleUrlString = "https://www.domain.de/wp-content/uploads/200115_AB-55789__DSC1235.jpgnhttps://cdn.domain.de/uploads/sites/4/2022/04/alpha1234567890z.jpg?resize=1024%2C600nhttps://www.domainy.de/wp-content/uploads/150411_AB-43827__DSC1378.jpgnhttps://www.domainB.de/wp-content/uploads/2022/07/channelB-881123-maxm-20220805-696x464.jpgnhttps://www.domain.de/wp-content/uploads/200115_AB-55789__DSC1235.jpgnhttps://www.domain.de/wp-content/uploads/200115_AB-55789__DSC1235.jpg";
preprocessImgURL(exampleUrlString);

预期输出:
Row 0 is C URL.
Row 1 is A URL.
Row 2 is C URL.
Row 3 is B URL.
Row 4 is C URL.
Row 5 is C URL.
实际输出:
Row 0 is C URL.
Row 1 is A URL.
Row 2 doesnt match any criteria. (else statement reached)
Row 3 is B URL.
Row 4 doesnt match any criteria. (else statement reached) *
Row 5 doesnt match any criteria. (else statement reached)
  • 如果我删除else语句,第4行将产生所需的输出:
    function preprocessImgURL(url) {
    const urls = url.trim().split(/r?n/);
    const aRegex = /alphad{9,11}z/gi;
    const cRegex = /d{6}_[a-zA-Z]{2,3}-d{5,8}/gi;
    
    for (let urli = 0; urli < urls.length; urli++) {
    console.log('nnPROCESSING NEXT URL');
    
    console.log(urls[urli]);
    if(urls[urli].includes("channelB-")) {
    console.log('Row ' + urli + ' is B Domain + B URL.');
    } else if(aRegex.test(urls[urli])) {
    console.log('Row ' + urli + ' is A URL.');
    } else if(cRegex.test(urls[urli])) {
    console.log('Row ' + urli + ' is C URL.');
    }
    }
    }
    
    exampleUrlString = "https://www.domain.de/wp-content/uploads/200115_AB-55789__DSC1235.jpgnhttps://cdn.domain.de/uploads/sites/4/2022/04/alpha1234567890z.jpg?resize=1024%2C600nhttps://www.domainy.de/wp-content/uploads/150411_AB-43827__DSC1378.jpgnhttps://www.domainB.de/wp-content/uploads/2022/07/channelB-881123-maxm-20220805-696x464.jpgnhttps://www.domain.de/wp-content/uploads/200115_AB-55789__DSC1235.jpgnhttps://www.domain.de/wp-content/uploads/200115_AB-55789__DSC1235.jpg";
    preprocessImgURL(exampleUrlString);
实际输出:
Row 0 is C URL.
Row 1 is A URL.
[no output as there is no else statement that could handle row 2]
Row 3 is B URL.
Row 4 is C URL. *
[no output as there is no else statement that could handle row 5]

WORKS(外部循环,函数只处理一个url(:

function preprocessImgURL(url, imgrow) {
const aRegex = /alphad{9,11}z/gi;
const cRegex = /d{6}_[a-zA-Z]{2,3}-d{5,8}/gi;

console.log(url);
if(url.includes("channelB-") && (url.includes("domainB.de") || url.includes("domain-B.de"))) {
console.log('Row ' + imgrow + ' is B Domain + B URL.');
} else if(aRegex.test(url)) {
console.log('Row ' + imgrow + ' is A URL.');
} else if(cRegex.test(url)) {
console.log('Row ' + imgrow + ' is C URL.');
} else {
console.log('Row ' + imgrow + ' doesnt match any criteria. (else statement reached)');
console.log('Row ' + imgrow + ' matches b criteria: ' + ( url.includes("channelB-") && (url.includes("domainB.de") || url.includes("domain-B.de"))));
console.log('Row ' + imgrow + ' matches aRegex: ' + aRegex.test(url));
console.log('Row ' + imgrow + ' matches cRegex: ' + cRegex.test(url));
}

}
urls = new Array(
"https://www.domain.de/wp-content/uploads/200115_AB-55789__DSC1235.jpg", 
"https://cdn.domain.de/uploads/sites/4/2022/04/alpha1234567890z.jpg?resize=1024%2C600", 
"https://www.domainy.de/wp-content/uploads/150411_AB-43827__DSC1378.jpg", 
"https://www.domainB.de/wp-content/uploads/2022/07/channelB-881123-maxm-20220805-696x464.jpg",
"https://www.domain.de/wp-content/uploads/200115_AB-55789__DSC1235.jpg", "https://www.domain.de/wp-content/uploads/200115_AB-55789__DSC1235.jpg"
);
for (let urli = 0; urli < urls.length; urli++) {
console.log('nnPROCESSING NEXT URL');
preprocessImgURL(urls[urli], urli);
}

完整的html代码再现问题

<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Create Record</title>
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.css">
<style type="text/css">
.wrapper {
max-width: 900px;
margin: auto;
}
</style>
</head>
<body>
<script>
var imgrow = 0;
function preprocessImgURL(url) {
const urls = url.trim().split(/r?n/);
const aRegex = /alphad{9,11}z/gi;
const cRegex = /d{6}_[a-zA-Z]{2,3}-d{5,8}/gi;

for (let urli = 0; urli < urls.length; urli++) {

console.log(urls[urli]);
document.getElementById('result').innerHTML += urls[urli] + '<br>';
if(urls[urli].includes("channelB-") && (urls[urli].includes("domainB.de") || urls[urli].includes("domain-B.de"))) {
console.log('Row ' + imgrow + ' is B Domain + B URL.');
document.getElementById('result').innerHTML += 'Row ' + imgrow + ' is B Domain + B URL.' + '<br>';
} else if(aRegex.test(urls[urli])) {
console.log('Row ' + imgrow + ' is A URL.');
document.getElementById('result').innerHTML += 'Row ' + imgrow + ' is A URL.' + '<br>';
} else if(cRegex.test(urls[urli])) {
console.log('Row ' + imgrow + ' is C URL.');
document.getElementById('result').innerHTML += 'Row ' + imgrow + ' is C URL.' + '<br>';
} else {
console.log(cRegex);
document.getElementById('result').innerHTML += cRegex + '<br>';
console.log(cRegex.test(urls[urli]));
document.getElementById('result').innerHTML += cRegex.test(urls[urli]) + '<br>';
}

console.log('NEXT URL');
document.getElementById('result').innerHTML += '<br><br>';
++imgrow;
}
}
</script>
<div class="wrapper">
<div class="container-fluid">
<form name="myForm" id="myForm">
<div class="form-group">
<h4>example input</h4>
<p style="font-size: 80%;">
https://www.domain.de/wp-content/uploads/200115_AB-55789__DSC1235.jpg<br>
https://cdn.domain.de/uploads/sites/4/2022/04/alpha1234567890z.jpg?resize=1024%2C600<br>
https://www.domainy.de/wp-content/uploads/150411_AB-43827__DSC1378.jpg<br>
https://www.domainB.de/wp-content/uploads/2022/07/channelB-881123-maxm-20220805-696x464.jpg<br>
https://www.domain.de/wp-content/uploads/200115_AB-55789__DSC1235.jpg<br>
https://www.domain.de/wp-content/uploads/200115_AB-55789__DSC1235.jpg
</p>
<h4>relevant criteria (for now):</h4>
<p>A: not B && includes something like alpha[9-11digits]z <br>
B: includes channelB- && ( includes domainB.de || includes domain-B.de)<br>
C: not A && not B && includes something like [6digits]_[2-3letter]-[5-8digits]<br></p>
</div>
<div class="form-group">
<h4>Image URLs (1 per row)</h4>
<textarea rows="5" name="imgurls" class="form-control" required onchange="preprocessImgURL(this.value)" style="font-size: 80%;"></textarea>
</div>
</form>

<div  style="font-family: monospace;" id="result">
<h4>Result:</h4>
</div>

<div style="font-family: monospace;">
<h4>Expected/experienced Result for example input:</h4>
<p>Row 0 is C URL. / 🆗 <br>
Row 1 is A URL. / 🆗 <br>
Row 2 is C URL. / 🛑 <br>
Row 3 is B URL. / 🆗 <br>
Row 4 is C URL. / 🆗 <br>
Row 5 is C URL. / 🛑 <br></p>
</div>     
</div>
</div>
</body>
</html>

当多次使用同一个正则表达式时,就像你在循环中所做的那样,它可能会记住它处理了什么:

在正则表达式上使用test((;全局";旗帜

当正则表达式设置了全局标志时,test()将推进正则表达式的lastIndex。(RegExp.prototype.exec()也推进了lastIndex属性。(

test(str)的进一步呼叫将从lastIndex开始恢复搜索str。每当test()返回true时,lastIndex属性将继续增加。

注意:只要test()返回truelastIndex就不会重置——即使在测试不同的字符串时也是如此

test()返回false时,调用正则表达式的lastIndex属性将重置为0

以下示例演示了这种行为:

const regex = /foo/g; // the "global" flag is set
// regex.lastIndex is at 0
regex.test('foo')     // true
// regex.lastIndex is now at 3
regex.test('foo')     // false
// regex.lastIndex is at 0
regex.test('barfoo')  // true
// regex.lastIndex is at 6
regex.test('foobar')  // false
// regex.lastIndex is at 0
// (...and so on) 

在您的情况下,您不需要全局标志。

最新更新