我有三列如下-
Dated status count
01-02-13 1A + 2B + 73PLO 76
01-02-13 29A + 17ACB 46
01-02-13 9PLO + 11B + 5TY 25
02-02-13 18FGH + 23B + 4ACB 45
02-02-13 8ACB + 12A + 2FGH 22
02-02-13 6A + 2B + 42ACB 50
03-02-13 ..... ...
............
因此,我的最终结果应该为特定的"日期"添加三个字符串("状态"),但请记住特定日期的三个字符串的公共子字符串如下-
dated status count
01-02-13 30A + 13B + 82PLO + 17ACB + 5TY 147
02-02-13 20FGH + 25B + 73PLO + 54ACB + 18A 117
03-02-13 ......(and similarly) ....
在这里,我实际上已经通过添加计数和合并"状态"在"dated"上的表达式创建了一个组,但实际上我无法达到这一点。。。。需要一些帮助。
没有彻底测试,但我认为这是您所需要的,我没有考虑所有的测试用例,在某些情况下可能总数不同,但状态分组是正确的。
SELECT dated
,LISTAGG(num||val, ' + ') WITHIN GROUP (ORDER BY val) status
,MAX(cnt) "count"
FROM
--middle section start
(
SELECT dated
,SUM(regexp_substr(sub_status,'[0-9]+')) num
,regexp_substr(sub_status,'[A-Z]+')val,MAX(cnt) cnt
FROM
--inner section start
(
SELECT DISTINCT dated
,TRIM(REGEXP_SUBSTR( status, '[^+]+', 1, LEVEL)) sub_status
,SUM(DISTINCT "count") OVER(PARTITION BY dated) cnt
FROM DATA
CONNECT BY LEVEL <= LENGTH(REGEXP_REPLACE(status, '[^+]+')) + 1
)
--inner section end
GROUP BY dated
,regexp_substr(sub_status,'[A-Z]+')
)
--middle section end
GROUP BY dated;
- 内部部分:首先,我尝试在由
+
分隔的行中分隔所有不同的值 - 然后我进一步分离数值,去掉数字部分和字母部分
- 中间部分:我试着根据日期栏和字母部分的分组来计算数字部分的总和
- 外部部分:我根据日期列组合中间部分的结果,用
+
分隔
*注意*我还没有测试过,但这应该是解决这个问题的方法,不考虑在同一组中具有相同值的计数
SQLFIDDLE
SQL Fiddle
Oracle 11g R2架构设置:
CREATE TABLE Data ( Dated, status, "count" ) AS
SELECT TO_DATE( '01-02-13', 'DD-MM-YY' ), '1A + 2B + 73PLO', 76 FROM DUAL
UNION ALL SELECT TO_DATE( '01-02-13', 'DD-MM-YY' ), '29A + 17ACB', 46 FROM DUAL
UNION ALL SELECT TO_DATE( '01-02-13', 'DD-MM-YY' ), '0A + 1ACB', 46 FROM DUAL
UNION ALL SELECT TO_DATE( '01-02-13', 'DD-MM-YY' ), '9PLO + 11B + 5TY', 25 FROM DUAL
UNION ALL SELECT TO_DATE( '02-02-13', 'DD-MM-YY' ), '18FGH + 23B + 4ACB', 45 FROM DUAL
UNION ALL SELECT TO_DATE( '02-02-13', 'DD-MM-YY' ), '8ACB + 12A + 2FGH', 22 FROM DUAL
UNION ALL SELECT TO_DATE( '02-02-13', 'DD-MM-YY' ), '6A + 2B + 42ACB', 50 FROM DUAL;
查询1:
一个对Oracle 11g R1有效的SQL解决方案(尽管如果用LENGTH(REGEXP_REPLACE(status,'d+[A-Z]+( + |$)', 'X'))
替换REGEXP_COUNT
,那么我认为您应该能够在Oracle 9i中使用它):
WITH counts AS (
SELECT Dated,
SUM( "count" ) AS total_count
FROM Data
GROUP BY Dated
),
split_statuses AS (
SELECT DISTINCT
Dated,
TO_NUMBER( REGEXP_SUBSTR( status, '(d+)([A-Z]+)', 1, LEVEL, NULL, 1) ) AS num_status,
REGEXP_SUBSTR( status, '(d+)([A-Z]+)', 1, LEVEL, NULL, 2) AS val_status
FROM Data
CONNECT BY LEVEL <= REGEXP_COUNT(status, '(d+)([A-Z]+)')
),
summed_statuses AS (
SELECT Dated,
SUM( num_status ) || val_status AS totaled_status,
val_status
FROM split_statuses
GROUP BY Dated, val_status
),
indexed_summed_statuses AS (
SELECT Dated,
totaled_status,
ROW_NUMBER() OVER ( PARTITION BY Dated ORDER BY val_status ) AS idx
FROM summed_statuses
),
grouped_statuses AS (
SELECT Dated,
SYS_CONNECT_BY_PATH( totaled_status, ' + ' ) AS status,
idx
FROM indexed_summed_statuses
START WITH idx = 1
CONNECT BY PRIOR idx + 1 = idx AND prior Dated = Dated
),
max_grouped_statuses AS (
SELECT Dated,
SUBSTR( MAX( status ) KEEP ( DENSE_RANK LAST ORDER BY idx ), 4 ) AS status
FROM grouped_statuses
GROUP BY Dated
)
SELECT m.Dated,
status,
total_count
FROM max_grouped_statuses m
INNER JOIN
counts c
ON ( m.Dated = c.Dated )
ORDER BY Dated
结果:
| DATED | STATUS | TOTAL_COUNT |
|---------------------------------|---------------------------------|-------------|
| February, 01 2013 00:00:00+0000 | 30A + 18ACB + 13B + 82PLO + 5TY | 193 |
| February, 02 2013 00:00:00+0000 | 18A + 54ACB + 25B + 20FGH | 117 |
查询2:
Oracle 11g R2:提供了一个更简单的解决方案
WITH counts AS (
SELECT Dated,
SUM( "count" ) AS total_count
FROM Data
GROUP BY Dated
),
split_statuses AS (
SELECT DISTINCT
Dated,
TO_NUMBER( REGEXP_SUBSTR( status, '(d+)([A-Z]+)', 1, LEVEL, NULL, 1) ) AS num_status,
REGEXP_SUBSTR( status, '(d+)([A-Z]+)', 1, LEVEL, NULL, 2) AS val_status
FROM Data
CONNECT BY LEVEL <= REGEXP_COUNT(status, '(d+)([A-Z]+)')
),
summed_statuses AS (
SELECT Dated,
SUM( num_status ) || val_status AS totaled_status,
val_status
FROM split_statuses
GROUP BY Dated, val_status
),
grouped_statuses AS (
SELECT Dated,
LISTAGG( totaled_status, ' + ') WITHIN GROUP (ORDER BY val_status) AS status
FROM summed_statuses
GROUP BY Dated
)
SELECT g.Dated,
status,
total_count
FROM grouped_statuses g
INNER JOIN
counts c
ON ( g.Dated = c.Dated )
ORDER BY Dated
结果:
| DATED | STATUS | TOTAL_COUNT |
|---------------------------------|---------------------------------|-------------|
| February, 01 2013 00:00:00+0000 | 30A + 18ACB + 13B + 82PLO + 5TY | 193 |
| February, 02 2013 00:00:00+0000 | 18A + 54ACB + 25B + 20FGH | 117 |