SQL:如何根据行的值透视行



我想转换这个数据:

id  value
---------
1   v1=10
1   v2=20
1   v3=30
2   v1=40

看起来像这样:

id  v1     v2     v3
---------------------------
1   10     20     30
2   40     null   null

有什么解决方案吗?我想解决它 bigquery 的标准 sql。

以下几个选项 - 全部用于 BigQuery Standrad SQL

选项 1 - 硬编码透视


假设您事先知道要创建的
列的数量和名称 在这种情况下,下面做你需要的

#standardSQL
SELECT 
id, 
MAX(IF(key = 'v1', val, NULL)) v1,
MAX(IF(key = 'v2', val, NULL)) v2,
MAX(IF(key = 'v3', val, NULL)) v3
FROM `project.dataset.table`, 
UNNEST([STRUCT<key STRING, val STRING>(SPLIT(value,"=")[OFFSET(0)], SPLIT(value, "=")[OFFSET(1)])])
GROUP BY id   

您可以使用问题中的虚拟数据进行测试,玩上面,如下所示

#standardSQL
WITH `project.dataset.table` AS (
SELECT 1 id, 'v1=10' value UNION ALL
SELECT 1, 'v2=20' UNION ALL
SELECT 1, 'v3=30' UNION ALL
SELECT 2, 'v1=40' 
)
SELECT 
id, 
MAX(IF(key = 'v1', val, NULL)) v1,
MAX(IF(key = 'v2', val, NULL)) v2,
MAX(IF(key = 'v3', val, NULL)) v3
FROM `project.dataset.table`, 
UNNEST([STRUCT<key STRING, val STRING>(SPLIT(value,"=")[OFFSET(0)], SPLIT(value, "=")[OFFSET(1)])])
GROUP BY id
-- ORDER BY id   

结果符合预期:

Row     id      v1      v2      v3   
1       1       10      20      30   
2       2       40      null    null       

选项 2 - 动态枢轴


如果您不知道列的数量和名称 - 您将首先需要生成类似于上述选项 #1 中的脚本

你应该在下面运行以获得它

#standardSQL
SELECT CONCAT('SELECT id, ', 
STRING_AGG(
CONCAT('MAX(IF(key = "', key, '", val, NULL)) as ', key)
) 
,' FROM `project.dataset.table`, UNNEST([STRUCT<key STRING, val STRING>(SPLIT(value, "=")[OFFSET(0)], SPLIT(value, "=")[OFFSET(1)])]) GROUP BY id')
FROM (
SELECT SPLIT(value, '=')[OFFSET(0)] key 
FROM `project.dataset.table`
GROUP BY key
)   

例如,如果您将针对同一个假人运行它

#standardSQL
WITH `project.dataset.table` AS (
SELECT 1 id, 'v1=10' value UNION ALL
SELECT 1, 'v2=20' UNION ALL
SELECT 1, 'v3=30' UNION ALL
SELECT 2, 'v1=40' 
)
SELECT CONCAT('SELECT id, ', 
STRING_AGG(
CONCAT('MAX(IF(key = "', key, '", val, NULL)) as ', key)
) 
,' FROM `project.dataset.table`, UNNEST([STRUCT<key STRING, val STRING>(SPLIT(value, "=")[OFFSET(0)], SPLIT(value, "=")[OFFSET(1)])]) GROUP BY id')
FROM (
SELECT SPLIT(value, '=')[OFFSET(0)] key 
FROM `project.dataset.table`
GROUP BY key
) 

你会得到下面的字符串

SELECT id, MAX(IF(key = "v1", val, NULL)) AS v1,MAX(IF(key = "v2", val, NULL)) AS v2,MAX(IF(key = "v3", val, NULL)) AS v3 FROM `project.dataset.table`, UNNEST([STRUCT<key STRING, val STRING>(SPLIT(value, "=")[OFFSET(0)], SPLIT(value, "=")[OFFSET(1)])]) GROUP BY id

所以,现在 - 如果你对你的虚拟数据运行这个脚本

#standardSQL
WITH `project.dataset.table` AS (
SELECT 1 id, 'v1=10' value UNION ALL
SELECT 1, 'v2=20' UNION ALL
SELECT 1, 'v3=30' UNION ALL
SELECT 2, 'v1=40' 
)
SELECT id, MAX(IF(key = "v1", val, NULL)) AS v1,MAX(IF(key = "v2", val, NULL)) AS v2,MAX(IF(key = "v3", val, NULL)) AS v3 FROM `project.dataset.table`, UNNEST([STRUCT<key STRING, val STRING>(SPLIT(value, "=")[OFFSET(0)], SPLIT(value, "=")[OFFSET(1)])]) GROUP BY id   

您将获得与选项 1 中相同的结果 - 但不是 - 现在最终查询是为您动态生成的

选项 3 - 展平键 - 值与透视


旋转很酷,但对于下面的许多实际情况,发现简单的方法非常有用,更适合处理

#standardSQL
WITH `project.dataset.table` AS (
SELECT 1 id, 'v1=10' value UNION ALL
SELECT 1, 'v2=20' UNION ALL
SELECT 1, 'v3=30' UNION ALL
SELECT 2, 'v1=40' 
)
SELECT 
id, 
SPLIT(value,"=")[OFFSET(0)] key, 
SPLIT(value, "=")[OFFSET(1)] val
FROM `project.dataset.table`

这给出了普通的原版键值扁平化结构

Row     id      key     val  
1       1       v1      10   
2       1       v2      20   
3       1       v3      30   
4       2       v1      40   
SELECT
id,
MAX(IF(name = 'v1', value, NULL)) AS v1,
MAX(IF(name = 'v2', value, NULL)) AS v2,
MAX(IF(name = 'v3', value, NULL)) AS v3
FROM (
SELECT
SPLIT(value, '=')[OFFSET(0)] AS name,
SPLIT(value, '=')[OFFSET(1)] AS value
FROM dataset.table
)
GROUP BY id

最新更新