我有一个列值:
输入:
This is ssn12345 string dob 12-09-1998 string
This is ssn12345786 stringabc string
输出:
This is ******** string ************** string
This is *********** stringabc string
当我们发现SSN或dob时,值应该被屏蔽,如上面的例子所示。动态长度字符串。
我正在尝试下面的查询-
SELECT CASE
WHEN start_pos > 0
THEN SUBSTR( 'This is ssn12345 string dob 12-09-1998 string', 1, start_pos - 1)
|| RPAD('*', end_pos - start_pos, '*')
|| SUBSTR('This is ssn12345 string dob 12-09-1998 string', end_pos)
ELSE 'This is ssn12345 string dob 12-09-1998 string'
END AS masked_value
FROM (
SELECT 'This is ssn12345 string dob 12-09-1998 string',
REGEXP_INSTR( 'This is ssn12345 string dob 12-09-1998 string', 'ssn[0-9. -]+' || 'dob[0-9. -]+', 1, 1, 0, NULL) AS start_pos,
REGEXP_INSTR( 'This is ssn12345 string dob 12-09-1998 string', 'ssn[0-9. -]+' || 'dob[0-9. -]+', 1, 1, 1, NULL) AS end_pos
FROM dual
)
我正在尝试上面的查询没有得到如上所述的预期结果它只有在我们有——的时候才会起作用输入像-This is ssn12345 dob 12-09-1998 string
然后它屏蔽输出为-This is ******** ************** string
从你之前的问题,只需改变你正在匹配的正则表达式:
WITH data ( value, masked_value, end_pos ) AS (
SELECT value,
REGEXP_REPLACE(
value,
'ssn([ #]?d+)+|dob ?(-?d+)+',
RPAD(
'*',
LENGTH(
REGEXP_SUBSTR(value, 'ssn([ #]?d+)+|dob ?(-?d+)+', 1, 1)
),
'*'
),
1,
1
),
REGEXP_INSTR(
value,
'ssn([ #]?d+)+|dob ?(-?d+)+',
1,
1,
1
)
FROM table_name
UNION ALL
SELECT value,
REGEXP_REPLACE(
masked_value,
'ssn([ #]?d+)+|dob ?(-?d+)+',
RPAD(
'*',
LENGTH(
REGEXP_SUBSTR(masked_value, 'ssn([ #]?d+)+|dob ?(-?d+)+', end_pos, 1)
),
'*'
),
end_pos,
1
),
REGEXP_INSTR(
masked_value,
'ssn([ #]?d+)+|dob ?(-?d+)+',
end_pos,
1,
1
)
FROM data
WHERE end_pos > 0
)
SEARCH DEPTH FIRST BY value SET value_order
SELECT value, masked_value
FROM data
WHERE end_pos = 0;
对于样本数据:
CREATE TABLE table_name ( value ) AS
SELECT 'This is ssn1 string' FROM DUAL UNION ALL
SELECT 'This is ssn12 string' FROM DUAL UNION ALL
SELECT 'This is ssn12345 string' FROM DUAL UNION ALL
SELECT 'This is ssn123456789 string' FROM DUAL UNION ALL
SELECT 'This is ssn#12345 string ssn 5678 9765' FROM DUAL UNION ALL
SELECT 'This is ssn12345 string dob 12-09-1998 string' FROM DUAL UNION ALL
SELECT 'This is dob 12-09-1998 string' FROM DUAL;
输出:
<表类>tbody><<tr> 价值 MASKED_VALUE 这是捐助12-09-1998字符串 ************** 字符串 这是ssn # 12345弦ssn 5678 9765 ********* 字符串 ************* 这是ssn1字符串 这是****字符串 这是ssn12字符串 这是*****字符串 这是ssn12345字符串 ******** 字符串 这是ssn12345字符串捐助12-09-1998字符串 ******** 字符串 ************** 字符串 表类> 这是ssn123456789字符串 ************ 字符串 如果你想替换固定次数的模式,那么你可以使用:
SELECT value, CASE WHEN start_pos2 > 0 THEN SUBSTR( value, 1, start_pos1 - 1) || RPAD('*', end_pos1 - start_pos1 - 1, '*') || SUBSTR(value, end_pos1, start_pos2 - end_pos1) || RPAD('*', end_pos2 - start_pos2, '*') || SUBSTR(value, end_pos2) WHEN start_pos1 > 0 THEN SUBSTR( value, 1, start_pos1 - 1) || RPAD('*', end_pos1 - start_pos1, '*') || SUBSTR(value, end_pos1) ELSE value END AS masked_value FROM ( SELECT value, REGEXP_INSTR( value, 'ssn([ #]*d+)+|dob([. -]*d+)+', 1, 1, 0, NULL) AS start_pos1, REGEXP_INSTR( value, 'ssn([ #]*d+)+|dob([. -]*d+)+', 1, 1, 1, NULL) AS end_pos1, REGEXP_INSTR( value, 'ssn([ #]*d+)+|dob([. -]*d+)+', 1, 2, 0, NULL) AS start_pos2, REGEXP_INSTR( value, 'ssn([ #]*d+)+|dob([. -]*d+)+', 1, 2, 1, NULL) AS end_pos2 FROM table_name );
注意:这只适用于字符串中的前两次出现。如果你需要匹配一个动态的出现次数,那么你需要使用递归查询。
对于样本数据:
CREATE TABLE table_name ( value ) AS SELECT 'This is ssn1 string' FROM DUAL UNION ALL SELECT 'This is ssn12 string' FROM DUAL UNION ALL SELECT 'This is ssn12345 string' FROM DUAL UNION ALL SELECT 'This is ssn123456789 string' FROM DUAL UNION ALL SELECT 'This is ssn#12345 string ssn 5678 9765' FROM DUAL UNION ALL SELECT 'This is ssn12345 string dob 12-09-1998 string' FROM DUAL UNION ALL SELECT 'This is dob 12-09-1998 string' FROM DUAL;
输出:
<表类>tbody><<tr> 价值 MASKED_VALUE 这是ssn1字符串 这是* * * *的字符串 这是ssn12字符串 这是*****字符串 这是ssn12345字符串 ******** 字符串 这是ssn123456789字符串 ************ 字符串 这是ssn # 12345弦ssn 5678 9765 ******** 字符串 ************* 这是ssn12345字符串捐助12-09-1998字符串 这是* * * * * * *的字符串 ************** 字符串 表类> 这是捐助12-09-1998字符串 ************** 字符串 这可以工作,但它将任何匹配的字符串替换为固定数量的星号。
WITH sample_data (c) AS ( SELECT 'This is ssn12. 34-5 string dob 12-09-1998 string' FROM DUAL UNION ALL SELECT '5 Strings: ssn12. 34-5, dob 12-09-1998, nr 3: ssn18898, nr 4: dob 01-01-2000, nr 5:ssn1663' FROM DUAL UNION ALL SELECT 'This is ssn12345786 stringabc string' FROM DUAL ) SELECT regexp_replace(c,'((ssn[0-9. -]+)|(dob [0-9. -]+))[0-9.-]','*******') FROM sample_data; This is ******* string ******* string 5 Strings: *******, *******, nr 3: *******, nr 4: *******, nr 5:******* This is ******* stringabc string