删除SQL Server中的脚本form xml文档



我想在SQL Server 2014中删除xml或txt文档中的内容:

<div class="infotagtitle">
    <script type="text/javascript">
    var sc_project=9934926; 
    var sc_invisible=1; 
    var sc_security="00dd8003"; 
    var scJsHost = (("https:" == document.location.protocol) ?
    "https://secure." : "http://www.");
    document.write("<sc"+"ript type='text/javascript' src='" +
    scJsHost+
    "statcounter.com/counter/counter.js'></"+"script>");
    </script>
</div>
输出:

<div class="infotagtitle">
</div>

问题中的文本代码显然是HTML,而不是格式良好的XML。因此,不能简单地将文本强制转换或转换为XML类型。
这意味着XML类型函数,如f.e. SET @myXML.modify('delete //script');不是一个选项。

这个示例代码使用一个表变量来演示用包含HTML和脚本标记的NVARCHAR来更新一个表。它使用PATINDEX在HTML中查找脚本代码。

更新循环,直到html中没有带有script标签的记录。

declare @T table (Id int identity(1,1), html_text nvarchar(max));
insert into @T (html_text) values
(N'<html>
<body>
  <div class="infotagtitle">
    <script type="text/javascript">
    var sc_project=9934926; 
    var sc_invisible=1; 
    var sc_security="00dd8003";
    var scJsHost = (("https:" == document.location.protocol) ?
    "https://secure." : "http://www.");
    document.write("<sc"+"ript type=''text/javascript'' src=''" +
    scJsHost+
    "statcounter.com/counter/counter.js''></"+"script>");
    </script>
  </div>
  <div class="othertagtitle">
    <script type="text/javascript">
      document.write("<script type=''text/javascript'' src=''" +
    otherHost+
    "othercounter.com/counter/counter.js''></"+"script>");
    </script>
  </div>
</body>
<html>');
declare @N INT, @Counter INT = 0;
select @N = count(*) from @T where patindex('%<script %',html_text) > 0;
while (@N > 0 AND @Counter < 10)
begin
 set @Counter = @Counter + 1; -- just a safety measure to avoid an infinite loop
 update @T 
 set html_text = concat(substring(html_text,1,patindex('%<script %',html_text)-1),substring(html_text,patindex('%</script>%',html_text)+9,len(html_text)))
 where patindex('%<script %',html_text) > 0;
 select @N = count(*) from @T where patindex('%<script %',html_text) > 0;
end;
select * from @T;

最新更新