如何将以特定str开头的字符串的特定部分解析到Oracle中的第一个空格



我有这样的数据集:

SELECT 
1 as text_id,
'The first is A.ACCOUNT_ID and the second one is B.IDENTITY_NO and third one is plate_number .' as full_text
FROM DUAL
UNION
SELECT 
2,
'The first is ARC.PREV_RECORD and the second one is ARC.NEXT_RECORD .'
FROM DUAL

我应该解析所有以"开头的短语;是";以每行的第一个空格字符结尾。

因此,我想从full_text中获得的结果是:

text_idparsed_part
1A.ACCOUNT_ID
1B.IDENTITY_NO
1板材编号
2ARC。PREV_RECORD
2ARC.NEXT_RECORD

根据您发布的内容,看看这是否有帮助。

样本数据

SQL> with test as
2  (SELECT
3      1 as text_id,
4      'The first is A.ACCOUNT_ID and the second one is B.IDENTITY_NO and third one is plate_number .' as full_text
5  FROM DUAL
6  UNION
7  SELECT
8      2,
9      'The first is ARC.PREV_RECORD and the second one is ARC.NEXT_RECORD .'
10  FROM DUAL
11  )

查询自身

12  select text_id,
13    ltrim(regexp_substr(full_text, 'is S+', 1, column_value), 'is ') parsed_part
14  from test cross join
15    table(cast(multiset(select level from dual
16                        connect by level <= regexp_count(full_text, ' is ')
17                       ) as sys.odcinumberlist))
18  order by text_id, column_value;
TEXT_ID PARSED_PART
---------- --------------------
1 A.ACCOUNT_ID
1 B.IDENTITY_NO
1 plate_number
2 ARC.PREV_RECORD
2 ARC.NEXT_RECORD
SQL>
  • regexp_substr搜索is字符串,后面跟两个单词,用一个点分隔
  • ltrim删除前导is

通过connect by对虚线单词的展开进行横向连接。

WITH DATA AS (
SELECT 1 as textid, 'The first is A.ACCOUNT_ID and the second one is B.IDENTITY_NO and third one is plate_number .' as full_text FROM DUAL
UNION ALL SELECT 2, 'The first is ARC.PREV_RECORD and the second one is ARC.NEXT_RECORD .' FROM DUAL
) 
SELECT t.textid, w.word
FROM DATA t
CROSS JOIN LATERAL (
SELECT level AS lvl, REGEXP_SUBSTR(full_text, ' is ([A-Z._]+)',1, LEVEL, 'i', 1) AS word 
FROM DUAL
CONNECT BY LEVEL <= REGEXP_COUNT(full_text, ' is ([A-Z._]+)', 1, 'i')
) w
ORDER BY t.textid;
TEXTID|WORD-----:|:--------------1|A.ACCOUNT_ID1|B.IDENTITY_NO1|板材编号2|ARC.PREV_RECORD2|ARC.NEXT_RECORD

db<gt;小提琴这里

虽然Littlefoot打败了我,但我的方法是:

WITH tbl(text_id, full_text) AS (
SELECT 1, 'The first is A.ACCOUNT_ID and the second one is B.IDENTITY_NO and third one is plate_number .' FROM DUAL UNION ALL
SELECT 2, 'The first is ARC.PREV_RECORD and the second one is ARC.NEXT_RECORD .' FROM DUAL
)
SELECT text_id,  
REGEXP_SUBSTR(full_text, ' is (.*?)( |$)', 1, LEVEL, NULL, 1) parsed_part
FROM tbl
CONNECT BY LEVEL <= REGEXP_COUNT(full_text, ' is .*?( |$)')
AND PRIOR text_id = text_id
AND PRIOR SYS_GUID() IS NOT NULL; 

TEXT_ID PARSED_PART         
---------- --------------------
1 A.ACCOUNT_ID        
1 B.IDENTITY_NO       
1 plate_number        
2 ARC.PREV_RECORD     
2 ARC.NEXT_RECORD     
5 rows selected.

您不需要使用(慢速(正则表达式,可以在递归子查询中使用简单的字符串函数:

WITH bounds (text_id, full_text, start_pos, end_pos) AS (
SELECT text_id,
full_text,
INSTR(full_text, ' is ', 1),
INSTR(full_text, ' ', INSTR(full_text, ' is ', 1) + 4 )
FROM   table_name
WHERE  INSTR(full_text, ' is ', 1) > 0
UNION ALL
SELECT text_id,
full_text,
INSTR(full_text, ' is ', end_pos),
INSTR(full_text, ' ', INSTR(full_text, ' is ', end_pos) + 4 )
FROM   bounds
WHERE  start_pos > 0
AND    end_pos > 0
)
SEARCH DEPTH FIRST BY text_id SET order_rn
SELECT text_id,
SUBSTR(full_text, start_pos + 4, end_pos - start_pos - 4) AS parsed_part
FROM   bounds
WHERE  start_pos > 0
AND    end_pos > 0;

对于样本数据:

CREATE TABLE table_name (text_id, full_text) AS
SELECT 1,
'The first is A.ACCOUNT_ID and the second one is B.IDENTITY_NO and third one is plate_number .'
FROM   DUAL
UNION ALL
SELECT 2,
'The first is ARC.PREV_RECORD and the second one is ARC.NEXT_RECORD .'
FROM   DUAL

输出:

text_IDPARSED_PART
1A.ACCOUNT_ID
1B.IDENTITY_NO
1plate_number
2ARC.PREV_RECORD
2ARC.NEXT_RECORD

最新更新