我有一个用例,其中有一个自由文本字段和格式为ab12345(固定)和名称(动态)的用户id可以出现在字符串中的任何地方。
现在我需要将ab12345替换为xxxxxxx,并且无论我在字符串中找到它们,都需要将名称替换为XXXX。
我使用:
select *
from dbo.TEST
WHERE DESCRIPTION like '%[a-zA-z][a-zA-Z][0-9][0-9][0-9][0-9][0-9]%';
获取用户id ab12345,但我无法为此编写替换函数,因为结果是动态的。
与名称相同。
以下可能有助于编辑userID
USE tempdb
GO
CREATE TABLE #CustComments
( CustomerID INT
, CustomerNotes VARCHAR(8000)
)
GO
INSERT dbo.#CustComments
( CustomerID
, CustomerNotes
)
VALUES
( 1, 'An infraction was raised on user id ab12345, and the name of the complainant is John')
, ( 2, 'The customer was not happy with person CD45678 and is going to ask William Jones to speak with George Hillman about this matter' )
, ( 3, 'A customer called and repeatedly mentioned the name of employee ZX98765 and assumes their name was Janet which is not correct')
SELECT * ,
PATINDEX('%[a-zA-z][a-zA-Z][0-9][0-9][0-9][0-9][0-9]%', CustomerNotes) start_pos,
SUBSTRING (customernotes, (PATINDEX('%[a-zA-z][a-zA-Z][0-9][0-9][0-9][0-9][0-9]%', CustomerNotes)) ,7 ) extractstring,
REPLACE(customernotes, substring (customernotes, (PATINDEX('%[a-zA-z][a-zA-Z][0-9][0-9][0-9][0-9][0-9]%', CustomerNotes)) ,7 ), 'XXXXXXX') redacted
FROM #CustComments
--TIDY UP
DROP TABLE #CustComments
如果您已经或可以创建一个"名称"表…
USE tempdb
GO
CREATE TABLE #CustComments (
CustomerID int,
CustomerNotes varchar(8000)
)
GO
INSERT #CustComments (CustomerID
, CustomerNotes)
VALUES (1, 'An infraction was raised on user id ab12345 , and the name of the complainant is Ann')
, (2, 'The customer was not happy with person CD45678 and is going to ask Richard Jones to speak with Todd Hillman about this matter')
, (3, 'A customer called and repeatedly mentioned the name of employee ZX98765 and assumes their name was Shana which is not correct')
CREATE TABLE #empname (
ename varchar(255) NOT NULL
)
GO
INSERT INTO #empname ([ename])
VALUES ('Zeph'), ('Ebony'), ('Felicia'), ('Benedict'), ('Ahmed'), ('Ira'), ('Julie'), ('Levi'),
('Sebastian'), ('Fiona'), ('Lamar'), ('Russell'), ('Abdul'), ('Lev'), ('Isaiah'), ('Charlotte'),
('Rowan'), ('Ivory'), ('Quinn'), ('Jordan'), ('Xantha'), ('Shana'), ('Mufutau'), ('Jessamine'),
('Desirae'), ('Yvette'), ('Odessa'), ('Ray'), ('Ori'), ('Zenaida'), ('Allegra'), ('Allistair'),
('Raymond'), ('Martena'), ('Cameron'), ('Ila'), ('Nigel'), ('Dale'), ('Emerald'), ('Guinevere'),
('Boris'), ('Dolan'), ('Ainsley'), ('Madeson'), ('Kadeem'), ('Ciaran'), ('Hop'), ('Louis'),
('Maia'), ('Hiroko'), ('Hakeem'), ('Cole'), ('Tyrone'), ('Amy'), ('Doris'), ('Keaton'),
('Carlos'), ('Richard'), ('Lysandra'), ('Beverly'), ('Hamish'), ('Demetria'), ('Eric'), ('Nayda'),
('Sydney'), ('Fritz'), ('Blaze'), ('Regina'), ('Ciara'), ('Ina'), ('Joan'), ('Risa'),
('Alea'), ('Denton'), ('Daryl'), ('Mollie'), ('Keane'), ('Jarrod'), ('Ann'), ('Juliet'),
('Germaine'), ('Alexa'), ('Zane'), ('Kiona'), ('Armand'), ('Jin'), ('Geraldine'), ('Natalie'),
('Nomlanga'), ('Todd'), ('Rajah'),('Lucian'), ('Idona'), ('Autumn'), ('Briar'),
-- add surname
('Hillman');
;
-- redact the userID std format
SELECT
CustomerID ,
--PATINDEX('%[a-zA-z][a-zA-Z][0-9][0-9][0-9][0-9][0-9]%', CustomerNotes) start_pos,
--SUBSTRING (customernotes, (PATINDEX('%[a-zA-z][a-zA-Z][0-9][0-9][0-9][0-9][0-9]%', CustomerNotes)) ,7 ) extractstring,
REPLACE(customernotes, substring (customernotes, (PATINDEX('%[a-zA-z][a-zA-Z][0-9][0-9][0-9][0-9][0-9]%', CustomerNotes)) ,7 ), 'XXXXXXX') ID_redacted
INTO #ID_REDACT
FROM #CustComments
-- split into rows
SELECT customerId, value
into #SPLIT
FROM #ID_REDACT
CROSS APPLY STRING_SPLIT(ID_redacted, ' ');
--redact based on join with a ""name"" table
SELECT s.customerid,
CASE
WHEN e.ename IS NULL THEN s.value
ELSE 'XXXXXXX'
END AS name_redact
INTO #NAME_REDACT
FROM #split AS s
LEFT OUTER JOIN #empname AS e
ON s.value = e.ename
SELECT customerId,
STRING_AGG(name_redact, ' ') as full_redact
INTO #RESULTS
from #NAME_REDACT
group by CustomerID
-- RESULTS WITH COMPARISON
SELECT
C.CustomerID,
C.CustomerNotes AS Original,
R.full_redact AS Redacted
FROM #CustComments AS C
INNER JOIN #RESULTS AS R
ON C.CustomerID = R.customerId
--TIDY UP
DROP TABLE #CustComments
DROP TABLE #empname
DROP TABLE #ID_REDACT
DROP TABLE #SPLIT
DROP TABLE #NAME_REDACT
DROP TABLE #RESULTS