sql server语言 - sql用户定义的函数来剥离HTML标签并替换HTML实体



我正在尝试编写一个UDF(实际上我正在将我在网络上找到的一些代码改编成一个函数)来做标题中描述的事情。

代码如下:

declare @txt varchar(max), @start int, @end int, @len int
set @txt = '<p class=&#34;answer&#34;>Informamos que a documenta&ccedil;&atilde;o <strong>deve ser impressa e enviada fisicamente pela AG&Ecirc;NCIA</strong>, contendo confere com oringinal por funcion&aacute;rio CAIXA.</p>'
set @start = charindex('<',@txt)
set @end = charindex('>',@txt,@start)
set @len = (@end - @start) + 1
while @start > 0 and @end > 0 and @len > 0
begin
    set @txt = stuff(@txt,@start,@len,'')
    set @start = charindex('<',@txt)
    set @end = charindex('>',@txt,@start)
    set @len = (@end - @start) + 1
end
SET @txt = REPLACE(@txt,'&nbsp;',' ') --space
SET @txt = REPLACE(@txt,'&ldquo;',CHAR(34)) --"
SET @txt = REPLACE(@txt,'&rdquo;',CHAR(34)) --"
SET @txt = REPLACE(@txt,'&lsquo;',CHAR(39)) --'
SET @txt = REPLACE(@txt,'&rsquo;',CHAR(39)) --'
SET @txt = REPLACE(@txt,'&ndash;',CHAR(150)) -- –
SET @txt = REPLACE(@txt,'&mdash;',CHAR(151)) -- —
SET @txt = REPLACE(@txt,'&ordm;',CHAR(186)) -- º
SET @txt = REPLACE(@txt,'&ordf;',CHAR(170)) -- ª
SET @txt = REPLACE(@txt,'&sect;',CHAR(167)) -- §
--------------------------------------------------------------
SET @txt = REPLACE(@txt,'&#34;',CHAR(34)) --"
SET @txt = REPLACE(@txt,'&#39;',CHAR(39)) --'
--------------------------------------------------------------
SET @txt = REPLACE(@txt,'&agrave;','à') --à
SET @txt = REPLACE(@txt,'&aacute;','á') --á
SET @txt = REPLACE(@txt,'&atilde;','ã') --ã
SET @txt = REPLACE(@txt,'&acirc;','â') --â
SET @txt = REPLACE(@txt,'&auml;','ä') --ä
SET @txt = REPLACE(@txt,'&eacute;','é') --é
SET @txt = REPLACE(@txt,'&ecirc;','ê') --ê
SET @txt = REPLACE(@txt,'&iacute;','í') --í
SET @txt = REPLACE(@txt,'&oacute;','ó') --ó
SET @txt = REPLACE(@txt,'&otilde;','õ') --õ     
SET @txt = REPLACE(@txt,'&oslash;','ø') --ø
SET @txt = REPLACE(@txt,'&uacute;','ú') --ú
SET @txt = REPLACE(@txt,'&uuml;','ü') --ü
SET @txt = REPLACE(@txt,'&ccedil;','ç') --ç
--------------------------------------------------------------
SET @txt = REPLACE(@txt,'&Agrave;',CHAR(192)) --À
SET @txt = REPLACE(@txt,'&Aacute;',CHAR(193)) --Á
SET @txt = REPLACE(@txt,'&Atilde;',CHAR(195)) --Ã
SET @txt = REPLACE(@txt,'&Acirc;',CHAR(194)) --Â
SET @txt = REPLACE(@txt,'&Auml;',CHAR(196)) --Ä
SET @txt = REPLACE(@txt,'&Eacute;',CHAR(201)) --É
SET @txt = REPLACE(@txt,'&Ecirc;',CHAR(202)) --Ê
SET @txt = REPLACE(@txt,'&Iacute;',CHAR(205)) --Í
SET @txt = REPLACE(@txt,'&Oacute;',CHAR(211)) --Ó
SET @txt = REPLACE(@txt,'&Otilde;',CHAR(213)) --Õ
SET @txt = REPLACE(@txt,'&Oslash;',CHAR(216)) --Ø
SET @txt = REPLACE(@txt,'&Uacute;',CHAR(218)) --Ú
SET @txt = REPLACE(@txt,'&Uuml;',CHAR(220)) --Ü
SET @txt = REPLACE(@txt,'&Ccedil;',CHAR(199)) --Ç
select LTRIM(RTRIM(@txt))

它剥离了HTML标签,只转换小写的HTML实体,当在单词AG &Ecirc; NCIA (AGÊNCIA)中找到大写的&Ecirc;时不工作,打印AGêNCIA代替。

有什么帮助使它正常工作吗?

编辑: PS:我不能改变我的数据库排序,建议@dzomba

如果你想这样做,你需要将你的db排序设置为区分大小写。

更新!这是一个变通办法,但我认为这将完成工作。我没有一个SQL服务器来测试它,但我几乎可以肯定,它工作得很好。

declare @txt varchar(max)
declare @start int
declare @end int
declare @len int
set @txt = '<p class=&#34;answer&#34;>Informamos que a documenta&ccedil;&atilde;o <strong>deve ser impressa e enviada fisicamente pela AG&Ecirc;NCIA</strong>, contendo confere com oringinal por funcion&aacute;rio CAIXA.</p>'
set @start = charindex('<',@txt)
set @end = charindex('>',@txt,@start)
set @len = (@end - @start) + 1
while @start > 0 and @end > 0 and @len > 0
begin
    set @txt = stuff(@txt,@start,@len,'')
    set @start = charindex('<',@txt)
    set @end = charindex('>',@txt,@start)
    set @len = (@end - @start) + 1
end
DECLARE @table (txtColumn varchar(max) COLLATE SQL_Latin1_General_CP1_CS_AS ) --make the column case sensitive

INSERT INTO @table (txtColumn)
SELECT @txt
UPDATE @table set txtColumn = REPLACE(txtColumn,'&nbsp;',' ') --space
UPDATE @table set txtColumn = REPLACE(txtColumn,'&ldquo;',CHAR(34)) --"
UPDATE @table set txtColumn = REPLACE(txtColumn,'&rdquo;',CHAR(34)) --"
UPDATE @table set txtColumn = REPLACE(txtColumn,'&lsquo;',CHAR(39)) --'
UPDATE @table set txtColumn = REPLACE(txtColumn,'&rsquo;',CHAR(39)) --'
UPDATE @table set txtColumn = REPLACE(txtColumn,'&ndash;',CHAR(150)) -- –
UPDATE @table set txtColumn = REPLACE(txtColumn,'&mdash;',CHAR(151)) -- —
UPDATE @table set txtColumn = REPLACE(txtColumn,'&ordm;',CHAR(186)) -- º
UPDATE @table set txtColumn = REPLACE(txtColumn,'&ordf;',CHAR(170)) -- ª
UPDATE @table set txtColumn = REPLACE(txtColumn,'&sect;',CHAR(167)) -- §
    -------------------------------------------------------------
UPDATE @table set txtColumn = REPLACE(txtColumn,'&#34;',CHAR(34)) --"
UPDATE @table set txtColumn = REPLACE(txtColumn,'&#39;',CHAR(39)) --'
    --------------------------------------------------------------
UPDATE @table set txtColumn = REPLACE(txtColumn,'&agrave;','à') --à
UPDATE @table set txtColumn = REPLACE(txtColumn,'&aacute;','á') --á
UPDATE @table set txtColumn = REPLACE(txtColumn,'&atilde;','ã') --ã
UPDATE @table set txtColumn = REPLACE(txtColumn,'&acirc;','â') --â
UPDATE @table set txtColumn = REPLACE(txtColumn,'&auml;','ä') --ä
UPDATE @table set txtColumn = REPLACE(txtColumn,'&eacute;','é') --é
UPDATE @table set txtColumn = REPLACE(txtColumn,'&ecirc;','ê') --ê
UPDATE @table set txtColumn = REPLACE(txtColumn,'&iacute;','í') --í
UPDATE @table set txtColumn = REPLACE(txtColumn,'&oacute;','ó') --ó
UPDATE @table set txtColumn = REPLACE(txtColumn,'&otilde;','õ') --õ     
UPDATE @table set txtColumn = REPLACE(txtColumn,'&oslash;','ø') --ø
UPDATE @table set txtColumn = REPLACE(txtColumn,'&uacute;','ú') --ú
UPDATE @table set txtColumn = REPLACE(txtColumn,'&uuml;','ü') --ü
UPDATE @table set txtColumn = REPLACE(txtColumn,'&ccedil;','ç') --ç
    -------------------------------------------------------------
UPDATE @table set txtColumn = REPLACE(txtColumn,'&Agrave;',CHAR(192)) --À
UPDATE @table set txtColumn = REPLACE(txtColumn,'&Aacute;',CHAR(193)) --Á
UPDATE @table set txtColumn = REPLACE(txtColumn,'&Atilde;',CHAR(195)) --Ã
UPDATE @table set txtColumn = REPLACE(txtColumn,'&Acirc;',CHAR(194)) --Â
UPDATE @table set txtColumn = REPLACE(txtColumn,'&Auml;',CHAR(196)) --Ä
UPDATE @table set txtColumn = REPLACE(txtColumn,'&Eacute;',CHAR(201)) --É
UPDATE @table set txtColumn = REPLACE(txtColumn,'&Ecirc;',CHAR(202)) --Ê
UPDATE @table set txtColumn = REPLACE(txtColumn,'&Iacute;',CHAR(205)) --Í
UPDATE @table set txtColumn = REPLACE(txtColumn,'&Oacute;',CHAR(211)) --Ó
UPDATE @table set txtColumn = REPLACE(txtColumn,'&Otilde;',CHAR(213)) --Õ
UPDATE @table set txtColumn = REPLACE(txtColumn,'&Oslash;',CHAR(216)) --Ø
UPDATE @table set txtColumn = REPLACE(txtColumn,'&Uacute;',CHAR(218)) --Ú
UPDATE @table set txtColumn = REPLACE(txtColumn,'&Uuml;',CHAR(220)) --Ü
UPDATE @table set txtColumn = REPLACE(txtColumn,'&Ccedil;',CHAR(199)) --Ç
    SELECT LTRIM(RTRIM(txtColumn)) FROM @table
--- IN THE END DROP THE TABLE
    DROP TABLE @table

最新更新