一个有趣的SQL Server 层级汇总数据问题

2022-12-01,,,,

    看SQL Server大V宋大侠的博客文章,发现了一个有趣的sql server层级汇总数据问题。
    
    具体的问题如下:
    parent_id emp_id emp_name total_amout 
    NULL 2 Andrew 200 
    2 1 Nancy 100 
    2 3 Janet 120 
    3 4 Michael 80 
    1 5 Robert 50 
    每个员工的总销售额=自己的销售额+其下级员工的总销售额, 
    比如: 
    Andrew = 200_100_120_80_50=550 
    Nancy = 100+50=150 
    Janet = 120+80=200 
    Michael = 80 
    Robert = 50 
    这个用SQL怎样可以查询得到,请教一下大家???
 
    从数据表中的数据以及问题阐述来看可以确定该数据表是个父子层级类型数据表,这个在纬度类型中是一种比较常见:父子纬度。 从名字解释来看就是一种自引用的数据表,大家最熟悉的组织机构就是具有这种层级结构,其中不同级别的机构具有共同的特性。这种层级结构如下图所示:

以上图来自百度查询得到。
         
     看来宋大侠针对该问题的解决方案(CTE递归查询+游标),还有其他的博友的评论(有的支出数据表设计的不完善,还有通过虚拟出层级层级字符串列来实现的,还有获取当前层级以下所有层级的汇总封装成存储等等)。
    
    为了实现该问题,我使用的是CTE递归查询+APPLY,具体的实现思路如下:
    1、通过CTE递归查询虚拟出若干列,其中就有层级索引字符串列(该列表示具有层级的标识ID的字符串格式,便于查找)。
    2、使用APPLY来实现汇总数据(当然也可以使用SELECT + SUBQUERY)。
   
具体演示实现代码如下:

 IF OBJECT_ID(N'dbo.MyEmp', N'U') IS NOT NULL
BEGIN
DROP TABLE dbo.MyEmp;
END
GO CREATE TABLE dbo.MyEmp (
MyEmpID INT NOT NULL,
ParentID INT NULL,
MyEmpName NVARCHAR(20) NOT NULL,
HoursSalary INT NOT NULL
);
GO IF OBJECT_ID(N'PK_U_CL_MyEmp_MyEmpID', N'PK') IS NULL
BEGIN
ALTER TABLE [dbo].[MyEmp] ADD CONSTRAINT [PK_U_CL_MyEmp_MyEmpID] PRIMARY KEY CLUSTERED
(
[MyEmpID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 90)
ON [PRIMARY];
END
GO IF OBJECT_ID(N'FK_MyEmp_MyEmp_ParentID', N'F') IS NULL
BEGIN
ALTER TABLE dbo.MyEmp ADD CONSTRAINT FK_MyEmp_MyEmp_ParentID FOREIGN KEY (ParentID) REFERENCES dbo.MyEmp (MyEmpID);
END
GO -- Insert Test Data
INSERT INTO dbo.MyEmp (MyEmpID, ParentID, MyEmpName, HoursSalary) VALUES
(1, NULL, N'Andrew', 200),
(2, 1, N'Nancy', 100),
(3, 1, N'Janet', 120),
(4, 3, N'Michael', 80),
(5, 2, N'Robert', 50)
GO ;WITH tData (MyEmpID, MyEmpName, ParentID, ParentName, HoursSalary, ParentHierarchyIndex, HierarchyIndex, LevelID, HierarchyName, HierarchyName2) AS (
-- 基准点查询
SELECT MyEmpID /*雇员ID*/
,MyEmpName /*雇员名称*/
,ISNULL(ParentID, 0) AS ParentID /*父雇员ID*/
,CAST(N'' AS NVARCHAR(20)) AS ParentName /*父雇员名称*/
,HoursSalary /*小时薪水*/
,CAST(CONCAT(',', ISNULL(ParentID, 0), ',') AS VARCHAR(300)) AS ParentHierarchyIndex /*父层级索引字符串*/
,CAST(CONCAT(',', ISNULL(ParentID, 0), ',', MyEmpID, ',') AS VARCHAR(300)) AS HierarchyIndex /*层级索引字符串串,包含当前层级*/
,CAST(1 AS INT) AS LevelID /*层级ID,根层级为1,层级越深则数字越大*/
,CAST(MyEmpName AS NVARCHAR(800)) AS HierarchyName /*层级名称,树形结构显示*/
,CAST(MyEmpName AS NVARCHAR(80)) AS HierarchyName2 /*层级名称2,水平结构显示*/
FROM dbo.MyEmp
WHERE ParentID IS NULL
-- 递归查询
UNION ALL
SELECT T.MyEmpID
,T.MyEmpName
,T.ParentID
,T2.MyEmpName
,T.HoursSalary
,CAST(CONCAT(T2.ParentHierarchyIndex, T.ParentID, ',') AS VARCHAR(300)) AS ParentHierarchyIndex
,CAST(CONCAT(T2.HierarchyIndex, T.MyEmpID, ',') AS VARCHAR(300)) AS HierarchyIndex
,T2.LevelID + 1 AS LevelID
,CAST(CONCAT(REPLICATE(N'| ', T2.LevelID), T.MyEmpName) AS NVARCHAR(800)) AS HierarchyName
,CAST(CONCAT(T2.HierarchyName2, '->', T.MyEmpName) AS NVARCHAR(80)) AS HierarchyName2
FROM dbo.MyEmp AS T
INNER JOIN tData AS T2
ON T.ParentID = T2.MyEmpID
) -- 使用HierarchyIndex来实现 -- CROSS APPLY
SELECT T.*, T2.TotalSalary AS TotalSalary
FROM tData AS T
CROSS APPLY (SELECT SUM(tData.HoursSalary) AS TotalSalary
FROM tData
WHERE HierarchyIndex LIKE CONCAT(T.HierarchyIndex, '%')) AS T2
ORDER BY T.HierarchyIndex ASC; -- SELECT + 子查询
--SELECT T.*, TotalSalary = ( SELECT SUM(tData.HoursSalary) FROM tData WHERE tData.HierarchyIndex LIKE CONCAT(T.HierarchyIndex, '%'))
--FROM tData AS T
--ORDER BY T.HierarchyIndex ASC; -- 使用ParentHierarchyIndex
-- CROSS APPLY
--SELECT T.*, T.HoursSalary + T2.DownMemberTotalHoursSalary AS TotalSalary
--FROM tData AS T
-- CROSS APPLY (
-- SELECT ISNULL(SUM(tData.HoursSalary), 0) AS DownMemberTotalHoursSalary
-- FROM tData
-- WHERE tData.ParentHierarchyIndex LIKE CONCAT(T.ParentHierarchyIndex, T.MyEmpID, '%')
-- ) AS T2
--ORDER BY T.HierarchyIndex ASC; -- SELECT + 子查询
--SELECT T.*, T.HoursSalary + (SELECT ISNULL(SUM(tData.HoursSalary), 0) AS DownMemberTotalHoursSalary
-- FROM tData
-- WHERE tData.ParentHierarchyIndex LIKE CONCAT(T.ParentHierarchyIndex, T.MyEmpID, '%')) AS TotalSalary
--FROM tData AS T
--ORDER BY T.HierarchyIndex ASC;
GO

以上解决方案是在不修改数据结构的情况下来实现的,从以上解决方案中,我们可以从数据表的设计入手,将虚拟出来的父层级索引字符串列增加到数据表中,将该列创建为聚集索引, 便于提高查询性能。

 增加新列的T-SQL脚本如下:

 IF NOT EXISTS (SELECT 1 FROM sys.columns WHERE OBJECT_ID = OBJECT_ID(N'dbo.MyEmp', N'U') AND name = N'HierarchyIndex')
BEGIN
ALTER TABLE dbo.MyEmp ADD HierarchyIndex VARCHAR(800) NOT NULL CONSTRAINT DF_MyEmp_HierarchyIndex DEFAULT '';
END
GO

如果该列创建为聚集且唯一,则相应的T-SQL脚本如下:

 -- 删除外键
IF OBJECT_ID(N'FK_MyEmp_MyEmp_ParentID', N'F') IS NOT NULL
BEGIN
ALTER TABLE dbo.MyEmp DROP CONSTRAINT FK_MyEmp_MyEmp_ParentID;
END
GO -- 删除主键
IF OBJECT_ID(N'PK_U_CL_MyEmp_MyEmpID', N'PF') IS NULL
BEGIN
ALTER TABLE dbo.MyEmp DROP CONSTRAINT PK_U_CL_MyEmp_MyEmpID;
END
GO -- 创建(唯一:语义分析得到的,没有使用创建UNIQUE关键字)聚集索引
IF NOT EXISTS (SELECT 1 FROM sys.indexes WHERE OBJECT_ID = OBJECT_ID(N'dbo.MyEmp', N'U') AND name = N'IX_U_CL_MyEmp_HierarchyIndex')
BEGIN
CREATE CLUSTERED INDEX IX_U_CL_MyEmp_HierarchyIndex ON dbo.MyEmp
(
HierarchyIndex ASC
) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 90)
END
GO -- 创建主键且非聚集索引
IF OBJECT_ID(N'PK_U_CL_MyEmp_MyEmpID', N'PK') IS NULL
BEGIN
ALTER TABLE [dbo].[MyEmp] ADD CONSTRAINT [PK_U_NCL_MyEmp_MyEmpID] PRIMARY KEY NONCLUSTERED
(
[MyEmpID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 90)
ON [PRIMARY];
END
GO -- 创建外键
IF OBJECT_ID(N'FK_MyEmp_MyEmp_ParentID', N'F') IS NULL
BEGIN
ALTER TABLE dbo.MyEmp ADD CONSTRAINT FK_MyEmp_MyEmp_ParentID FOREIGN KEY (ParentID) REFERENCES dbo.MyEmp (MyEmpID);
END
GO

同步HierarchyIndex字段列值的T-SQL如下:

 ;WITH tData (MyEmpID, HierarchyIndex) AS (
-- 基准点查询
SELECT MyEmpID, CAST(CONCAT(',', ISNULL(ParentID, 0), ',', MyEmpID, ',') AS VARCHAR(300)) AS HierarchyIndex
FROM dbo.MyEmp
WHERE ParentID IS NULL
-- 递归查询
UNION ALL
SELECT T.MyEmpID, CAST(CONCAT(T2.HierarchyIndex, T.MyEmpID, ',') AS VARCHAR(300)) AS HierarchyIndex
FROM dbo.MyEmp AS T /*子表*/
INNER JOIN tData AS T2 /*父表*/
ON T.ParentID = T2.MyEmpID
) --SELECT T.*
UPDATE T2
SET T2.HierarchyIndex = T.HierarchyIndex
FROM tData AS T
INNER JOIN dbo.MyEmp AS T2
ON T.MyEmpID = T2.MyEmpID
WHERE T2.HierarchyIndex = '';
GO

使用以下T-SQL验证数据是否已经修改:

1 SELECT MyEmpID, ParentID, MyEmpName, HoursSalary, HierarchyIndex
2 FROM dbo.MyEmp;
3 GO
解决该问题的解决方案的T-SQL语句如下:
 SELECT T.MyEmpID, T.ParentID, T.MyEmpName, T.HoursSalary, T.HierarchyIndex, T2.TotalSalary AS TotalSalary
FROM dbo.MyEmp AS T
CROSS APPLY (SELECT SUM(HoursSalary) AS TotalSalary FROM dbo.MyEmp WHERE HierarchyIndex LIKE CONCAT(T.HierarchyIndex, '%')) AS T2;
GO
 

 

一个有趣的SQL Server 层级汇总数据问题的相关教程结束。

《一个有趣的SQL Server 层级汇总数据问题.doc》

下载本文的Word格式文档,以方便收藏与打印。