我在MSSQL数据库中有一个巨大的表(超过95 000 000条记录(
id | ||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |
此查询对95M条记录的性能可能并不理想,但这应该可以做到。
查找包含多行的组的精确匹配
DROP TABLE IF EXISTS #Config
CREATE TABLE #Config
(id int
,configuration_id int
,equipment_group_id int
,name VARCHAR(100)
,price INT
)
INSERT INTO #Config
VALUES
(1,1,100,'item1',10)
,(2,1,100,'item2',20)
,(3,1,100,'item3',30)
,(4,2,100,'item1',10)
,(5,2,100,'item2',20)
,(6,2,100,'item3',30)
,(7,3,100,'item1',10)
,(8,3,100,'item2',20)
,(9,3,100,'item3',31)
;WITH cte_ConfigCount AS (
SELECT *,ConfigTotalRowCnt = COUNT(*) OVER (PARTITION BY A.configuration_id) /*Counts how many rows in each config*/
FROM #Config AS A
)
SELECT
A.configuration_id
,B.configuration_id
,TextDescription = CONCAT('Config #',A.configuration_id,' matches Config #',B.configuration_id)
,A.ConfigTotalRowCnt
,RowsMatch = COUNT(*)
FROM cte_ConfigCount AS A
INNER JOIN cte_ConfigCount AS B
ON A.configuration_id < B.configuration_id /*Don't join to self and only join 1 way (so don't have one row with A-B and another row with B-A)*/
AND a.equipment_group_id = B.equipment_group_id
AND A.name = B.name
AND A.price = B.price
GROUP BY A.configuration_id,A.ConfigTotalRowCnt,B.configuration_id
HAVING A.ConfigTotalRowCnt = COUNT(*) /*Only return where the total row for the config matches the rows where the configs match*/