正则表达式在 JS 文件中查找第一个注释

我正在尝试编写一个正则表达式（在JavaScript中），它将匹配JS文件开头的多行注释。

到目前为止，我想出了这个：/^(/*[^*/]**/)/g

它适用于单行注释：https://regex101.com/r/ZS5PVI/1

但我的问题是它不适用于多行评论：https://regex101.com/r/ZS5PVI/2

你有什么想法如何解决吗？

与 HTML 一样，JavaScript 不能通过正则表达式解析。试图正确地这样做是徒劳的。

相反，您必须使用解析器，该解析器将 JavaScript 源代码正确转换为 AST，您可以通过编程方式检查该 AST 。幸运的是，有库可以为您解析。

下面是输出此代码的 AST 的工作示例：

/* this is a
multi-line
comment */
var test = "this is a string, /* and this is not a comment! */";
// ..but this is

这让我们：

[
  "toplevel",
  [
    [
      {
        "name": "var",
        "start": {
          "type": "keyword",
          "value": "var",
          "line": 5,
          "col": 4,
          "pos": 57,
          "endpos": 60,
          "nlb": true,
          "comments_before": [
            {
              "type": "comment2",
              "value": " this is an    multi-linen    comment ",
              "line": 1,
              "col": 4,
              "pos": 5,
              "endpos": 47,
              "nlb": true
            }
          ]
        },
        "end": {
          "type": "punc",
          "value": ";",
          "line": 5,
          "col": 67,
          "pos": 120,
          "endpos": 121,
          "nlb": false,
          "comments_before": []
        }
      },
      [
        [
          "test",
          [
            {
              "name": "string",
              "start": {
                "type": "string",
                "value": "this is a string, /* and this is not a comment! */",
                "line": 5,
                "col": 15,
                "pos": 68,
                "endpos": 120,
                "nlb": false,
                "comments_before": []
              },
              "end": {
                "type": "string",
                "value": "this is a string, /* and this is not a comment! */",
                "line": 5,
                "col": 15,
                "pos": 68,
                "endpos": 120,
                "nlb": false,
                "comments_before": []
              }
            },
            "this is a string, /* and this is not a comment! */"
          ]
        ]
      ]
    ]
  ]
]

现在只需循环访问 AST 并提取您需要的内容即可。

在这个链接上有一个很好的讨论。这对你有帮助吗？

他的解决方案是：

/*([^*]|[rn]|(*+([^*/]|[rn])))**+/

您建议的正则表达式不起作用，因为注释中有*。此外，它只会查找文件开头的注释。

请尝试改用以下内容：

//*[sS]*?*//

尝试

/*([^*]|[rn]|(*+([^*/]|[rn])))**+/

本页详细介绍了如何查找多行注释。

这是一个将匹配任何多行或单行注释：

/(/*.*?*/|//[^n]+)/

如果你只想要多线比赛，放弃下半场：

//*.*?*//

对于这两种情况，请确保设置了s标志，以便.与新行匹配。

我不是JavaScript专家，但似乎必须考虑C/C++注释。
正确完成意味着在此过程中必须考虑引号（转义等等）。

以下是两种有效的正则表达式方法。正则表达式 1 直接找到第一个 C 样式的注释，只要匹配，就会找到它。正则表达式 2 是一般情况。它发现 C 样式、C++ 样式或非注释样式是全局的，并允许您在找到所需内容时中断。

在这里测试 http://ideone.com/i1UWr

法典

var js = '
// /* C++ comment  */      \n
   /* C++ comment (cont) */  n
/* t "h /* is"               n
 is first C-style /*         n
//  comment */               n
and /*second C-style*/       n
then /*last C-style*/        n
';
var cmtrx1 = /^(?://(?:[^\]|\n?)*?n|(?:"(?:\[Ss]|[^"\])*"|'(?:\[Ss]|[^'\])*'|[^/"'\]*))+(/*[^*]**+(?:[^/*][^*]**+)*/)/;
var cmtrx2 = /(/*[^*]**+(?:[^/*][^*]**+)*/)|(//(?:[^\]|\n?)*?)n|(?:"(?:\[Ss]|[^"\])*"|'(?:\[Ss]|[^'\])*'|[Ss][^/"'\]*)/g;
//
print ('Scriptn===========n'+js+'n===========nn');
var match;
//
print ("Using Regex 1n---------------n");
if ((match=cmtrx1.exec( js )) != null)
    print ("Found C style comment:n'" + match[1] + "'nn");
//
print ("Using Regex 2n---------------n");
while ((match=cmtrx2.exec( js )) != null)
{
   if (match[1] != undefined)
   {
        print ("- C style :n'" + match[1] + "'n");
        // break;     // uncomment to stop after first c-style match
   }
   // comment this to not print it
   if (match[2] != undefined)
   {
        print ("- C++ style :n'" + match[2] + "'n");
   }
}

输出

Script
===========
// /* C++ comment  */      
   /* C++ comment (cont) */  
/* t "h /* is"               
 is first C-style /*         
//  comment */               
and /*second C-style*/       
then /*last C-style*/        
===========

Using Regex 1
---------------
Found C style comment:
'/* t "h /* is"               
 is first C-style /*         
//  comment */'

Using Regex 2
---------------
- C++ style :
'// /* C++ comment  */      
   /* C++ comment (cont) */  '
- C style :
'/* t "h /* is"               
 is first C-style /*         
//  comment */'
- C style :
'/*second C-style*/'
- C style :
'/*last C-style*/'

扩展的正则表达式

Regex 1:
/^(?://(?:[^\]|\n?)*?n|(?:"(?:\[Ss]|[^"\])*"|'(?:\[Ss]|[^'\])*'|[^/"'\]*))+(/*[^*]**+(?:[^/*][^*]**+)*/)/
     /^
     (?:
          //
          (?: [^\] | \n? )*?
          n
       |
          (?:
               "
               (?: \[Ss] | [^"\] )*
               "
            |  '
               (?: \[Ss] | [^'\] )*
               '
            |  [^/"'\]*
          )
     )+
1    (
          /* [^*]* *+
          (?: [^/*] [^*]* *+ )*
          /
1    )
     /

Regex 2:
/(/*[^*]**+(?:[^/*][^*]**+)*/)|(//(?:[^\]|\n?)*?)n|(?:"(?:\[Ss]|[^"\])*"|'(?:\[Ss]|[^'\])*'|[Ss][^/"'\]*)/g
     /
1    (
          /* [^*]* *+
          (?: [^/*] [^*]* *+ )*
          /
1    )
  |
2    (
          //
          (?: [^\] | \n? )*?
2    )
     n
  |
     (?:
          "
          (?: \[Ss] | [^"\] )*
          "
       |  '
          (?: \[Ss] | [^'\] )*
          '
       |  [Ss][^/"'\]*
     )
     /g

相关内容

最新更新

热门标签：