我有一个数据集,如下
表名:Data_Table_1
Customer_ID | Cus_activity | Cus_Mount|
---|---|---|
12985 | CURAC | 201|
20917 | PINT$@CURAC$@CINT$@CURCM | 567$@291$@358$@434 |
548357 | CINT$@CURCM$@CURAC | 300$@934$@450 |
80315 | CURCM$@PINT$@CURAC$@CINT | 809$@345$@420$@567 |
234561 | CURAC$@CURCM$@PINT$@CINT | 500$@359$@200$@400 |
这里有一个选项;基本上,您可以将cus_activity
和cus_amount
拆分为行,然后只返回属于CURAC
的值。
SQL> with
2 temp as
3 (select
4 customer_id, cus_activity, cus_amount,
5 --
6 rtrim(regexp_substr(cus_activity, '[^@]+', 1, column_value), '$') act,
7 rtrim(regexp_substr(cus_amount, '[^@]+', 1, column_value), '$') amt
8 from test cross join
9 table(cast(multiset(select level from dual
10 connect by level <= regexp_count(cus_activity, '@') +1
11 ) as sys.odcinumberlist))
12 )
13 select
14 customer_id, cus_activity, cus_amount,
15 max(case when act = 'CURAC' then amt end) curac_amount
16 from temp
17 group by customer_id, cus_activity, cus_amount
18 order by customer_id;
CUSTOMER_ID CUS_ACTIVITY CUS_AMOUNT CURAC_AMOUNT
----------- ------------------------ ------------------ ------------------
12985 CURAC 201 201
20917 PINT$@CURAC$@CINT$@CURCM 567$@291$@358$@434 291
80315 CURCM$@PINT$@CURAC$@CINT 809$@345$@420$@567 420
234561 CURAC$@CURCM$@PINT$@CINT 500$@359$@200$@400 500
548357 CINT$@CURCM$@CURAC 300$@934$@450 450
SQL>
您不需要分割行(因为这会很慢(,可以使用简单的字符串函数找到位置:
SELECT customer_id,
cus_activity,
cus_amount,
SUBSTR(
amt,
INSTR(amt, '$@', 1, position) + 2,
INSTR(amt, '$@', 1, position + 1) - INSTR(amt, '$@', 1, position) - 2
) AS curac_amount
FROM (
SELECT d.*,
COALESCE((LENGTH(act) - LENGTH(REPLACE(act, '$@')))/2, 0) + 1
AS position
FROM (
SELECT d.*,
SUBSTR(
'$@' || cus_activity || '$@',
1,
INSTR('$@' || cus_activity || '$@', '$@CURAC$@') - 1
) AS act,
CASE
WHEN INSTR('$@' || cus_activity || '$@', '$@CURAC$@') > 0
THEN '$@' || cus_amount || '$@'
END AS amt
FROM data_table_1 d
) d
)
如果您确实想使用较慢的方法将数据拆分成行,那么从Oracle12,您可以使用:
SELECT *
FROM data_table_1 d
LEFT OUTER JOIN LATERAL(
SELECT REGEXP_SUBSTR(cus_amount || '$@', '(.*?)$@', 1, LEVEL, NULL, 1)
AS curac_amount
FROM DUAL
WHERE REGEXP_SUBSTR(cus_activity || '$@', '(.*?)$@', 1, LEVEL, NULL, 1)
= 'CURAC'
CONNECT BY
LEVEL <= REGEXP_COUNT(cus_activity || '$@', '(.*?)$@')
)
ON (1 = 1)
对于样本数据:
CREATE TABLE data_table_1 (Customer_ID, Cus_activity, Cus_Amount) AS
SELECT 12985, 'CURAC', '201' FROM DUAL UNION ALL
SELECT 20917, 'PINT$@CURAC$@CINT$@CURCM', '567$@291$@358$@434' FROM DUAL UNION ALL
SELECT 548357, 'CINT$@CURCM$@CURAC', '300$@934$@450' FROM DUAL UNION ALL
SELECT 80315, 'CURCM$@PINT$@CURAC$@CINT', '809$@345$@420$@567' FROM DUAL UNION ALL
SELECT 234561, 'CURAC$@CURCM$@PINT$@CINT', '500$@359$@200$@400' FROM DUAL UNION ALL
SELECT 234562, 'NOCURAC$@CURCM$@PINT$@CINT', '500$@359$@200$@400' FROM DUAL;
两种输出:
CUSTOMER_ID CUS_ACTIVITY 12985 CURAC 201 20917 PINT$@CURAC$@CINT$@CURCM >td style="text-align:left;">567$@291$@358$@434291 548357 80315 CURCM$@PINT$@CURAC$@CINT >td style="text-align:left234561 CURAC$@CURCM$@PINT$@CINT 500$@359$@200$@400 234562 NOCURAC$@CURCM$@PINT$@CINT 500$@359$@200$@400 em>null//tr>