屏蔽包含多个数据的字符串中的数据



我有一个列值:

输入:

  1. This is ssn12345 string dob 12-09-1998 string
  2. This is ssn12345786 stringabc string
  3. 输出:

  1. This is ******** string ************** string
  2. This is *********** stringabc string

当我们发现SSN或dob时,值应该被屏蔽,如上面的例子所示。动态长度字符串。

我正在尝试下面的查询-

SELECT CASE 
       WHEN start_pos > 0 
       THEN SUBSTR( 'This is ssn12345 string dob 12-09-1998 string', 1, start_pos - 1) 
            || RPAD('*', end_pos - start_pos, '*') 
            || SUBSTR('This is ssn12345 string dob 12-09-1998 string', end_pos) 
       ELSE 'This is ssn12345 string dob 12-09-1998 string' 
       END AS masked_value 
FROM   ( 
  SELECT 'This is ssn12345 string dob 12-09-1998 string', 
         REGEXP_INSTR( 'This is ssn12345 string dob 12-09-1998 string', 'ssn[0-9. -]+' || 'dob[0-9. -]+', 1, 1, 0, NULL) AS start_pos, 
REGEXP_INSTR( 'This is ssn12345 string dob 12-09-1998 string', 'ssn[0-9. -]+' || 'dob[0-9. -]+', 1, 1, 1, NULL) AS end_pos 
  FROM   dual 
)  

我正在尝试上面的查询没有得到如上所述的预期结果它只有在我们有——的时候才会起作用输入像-This is ssn12345 dob 12-09-1998 string然后它屏蔽输出为-This is ******** ************** string

从你之前的问题,只需改变你正在匹配的正则表达式:

WITH data ( value, masked_value, end_pos ) AS (
SELECT value,
REGEXP_REPLACE(
value,
'ssn([ #]?d+)+|dob ?(-?d+)+',
RPAD(
'*',
LENGTH(
REGEXP_SUBSTR(value, 'ssn([ #]?d+)+|dob ?(-?d+)+', 1, 1)
),
'*'
),
1,
1
),
REGEXP_INSTR(
value,
'ssn([ #]?d+)+|dob ?(-?d+)+',
1,
1,
1
)
FROM   table_name
UNION ALL
SELECT value,
REGEXP_REPLACE(
masked_value,
'ssn([ #]?d+)+|dob ?(-?d+)+',
RPAD(
'*',
LENGTH(
REGEXP_SUBSTR(masked_value, 'ssn([ #]?d+)+|dob ?(-?d+)+', end_pos, 1)
),
'*'
),
end_pos,
1
),
REGEXP_INSTR(
masked_value,
'ssn([ #]?d+)+|dob ?(-?d+)+',
end_pos,
1,
1
)
FROM   data
WHERE  end_pos > 0
)
SEARCH DEPTH FIRST BY value SET value_order
SELECT value, masked_value
FROM   data
WHERE  end_pos = 0;

对于样本数据:

CREATE TABLE table_name ( value ) AS
SELECT 'This is ssn1 string' FROM DUAL UNION ALL
SELECT 'This is ssn12 string' FROM DUAL UNION ALL
SELECT 'This is ssn12345 string' FROM DUAL UNION ALL
SELECT 'This is ssn123456789 string' FROM DUAL UNION ALL
SELECT 'This is ssn#12345 string ssn 5678 9765' FROM DUAL UNION ALL
SELECT 'This is ssn12345 string dob 12-09-1998 string' FROM DUAL UNION ALL
SELECT 'This is dob 12-09-1998 string' FROM DUAL;

输出:

<表类>价值MASKED_VALUEtbody><<tr>这是捐助12-09-1998字符串************** 字符串这是ssn # 12345弦ssn 5678 9765********* 字符串 *************这是ssn1字符串这是****字符串这是ssn12字符串这是*****字符串这是ssn12345字符串******** 字符串这是ssn12345字符串捐助12-09-1998字符串******** 字符串 ************** 字符串这是ssn123456789字符串************ 字符串

如果你想替换固定次数的模式,那么你可以使用:

SELECT value,
CASE
WHEN start_pos2 > 0
THEN SUBSTR( value, 1, start_pos1 - 1) 
|| RPAD('*', end_pos1 - start_pos1 - 1, '*') 
|| SUBSTR(value, end_pos1, start_pos2 - end_pos1) 
|| RPAD('*', end_pos2 - start_pos2, '*')
|| SUBSTR(value, end_pos2) 
WHEN start_pos1 > 0 
THEN SUBSTR( value, 1, start_pos1 - 1)
|| RPAD('*', end_pos1 - start_pos1, '*')
|| SUBSTR(value, end_pos1)
ELSE value
END AS masked_value 
FROM   ( 
SELECT value, 
REGEXP_INSTR( value, 'ssn([ #]*d+)+|dob([. -]*d+)+', 1, 1, 0, NULL) AS start_pos1, 
REGEXP_INSTR( value, 'ssn([ #]*d+)+|dob([. -]*d+)+', 1, 1, 1, NULL) AS end_pos1,
REGEXP_INSTR( value, 'ssn([ #]*d+)+|dob([. -]*d+)+', 1, 2, 0, NULL) AS start_pos2, 
REGEXP_INSTR( value, 'ssn([ #]*d+)+|dob([. -]*d+)+', 1, 2, 1, NULL) AS end_pos2 
FROM   table_name
);

注意:这只适用于字符串中的前两次出现。如果你需要匹配一个动态的出现次数,那么你需要使用递归查询。

对于样本数据:

CREATE TABLE table_name ( value ) AS
SELECT 'This is ssn1 string' FROM DUAL UNION ALL
SELECT 'This is ssn12 string' FROM DUAL UNION ALL
SELECT 'This is ssn12345 string' FROM DUAL UNION ALL
SELECT 'This is ssn123456789 string' FROM DUAL UNION ALL
SELECT 'This is ssn#12345 string ssn 5678 9765' FROM DUAL UNION ALL
SELECT 'This is ssn12345 string dob 12-09-1998 string' FROM DUAL UNION ALL
SELECT 'This is dob 12-09-1998 string' FROM DUAL;

输出:

<表类>价值MASKED_VALUEtbody><<tr>这是ssn1字符串这是* * * *的字符串这是ssn12字符串这是*****字符串这是ssn12345字符串******** 字符串这是ssn123456789字符串************ 字符串这是ssn # 12345弦ssn 5678 9765******** 字符串 *************这是ssn12345字符串捐助12-09-1998字符串这是* * * * * * *的字符串 ************** 字符串这是捐助12-09-1998字符串************** 字符串

这可以工作,但它将任何匹配的字符串替换为固定数量的星号。

WITH sample_data (c) AS
(
SELECT 'This is ssn12. 34-5 string dob 12-09-1998 string' FROM DUAL UNION ALL
SELECT '5 Strings: ssn12. 34-5,  dob 12-09-1998, nr 3: ssn18898, nr 4: dob 01-01-2000, nr 5:ssn1663' FROM DUAL UNION ALL
SELECT 'This is ssn12345786 stringabc  string' FROM DUAL
)
SELECT regexp_replace(c,'((ssn[0-9. -]+)|(dob [0-9. -]+))[0-9.-]','*******') FROM sample_data;

This is ******* string ******* string
5 Strings: *******,  *******, nr 3: *******, nr 4: *******, nr 5:*******
This is ******* stringabc  string

最新更新