【SQL】窗口函数:求数据组内累计值和累计百分比

2023-02-12,,,,

〇、概述

1、所需资料

窗口函数实现组内百分比累计值、累计百分比:https://blog.csdn.net/weixin_39751959/article/details/88828922

2、背景

需求:不同场景不同规则下各区间内基线值的计算和MQ发送

计算位于场景列表内的各场景组合(scene),满足不同规则(rule)某区间dataRange(如20%-80%)的基线平均值

其他场景,计算平均数作为基线值

一、概述

1、输入信息

传入参数:

{"rules":
[{"dataRange":[20,80],"ruleTypeName":"标准基线","duration":30,"ruleType":"1","ruleId":"123"},
{"dataRange":[0,20],"ruleTypeName":"管理基线","duration":60,"ruleType":"2","ruleId":"234"},
{"dataRange":[80,100],"ruleTypeName":"异常基线","duration":90,"ruleType":"3","ruleId":"123"}],
"modules":
[{"moduleNumber":"ltc_contract_basic_info","moduleName":"合同基本信息","calField":"create_time","nextFields":["ltc_contract_basic_info_id"]},
{"moduleNumber":"ltc_contract_assess_basic_info","moduleName":"合同评审信息","preFields":["ltc_contract_basic_info_id"],"nextFields":["ltc_contract_assess_basic_info_id"]},
{"moduleNumber":"ltc_contract_assess_record","moduleName":"合同评审记录","preFields":["ltc_contract_assess_basic_info_id"],"calField":"contract_assess_end_date"}],"
scenes":
{"sceneKeys":
[{"values":["010101","010102","0102","0103","0103"],"key":"sales_scenario"},
{"values":["01","02","03","04","05"],"key":"contract_register_type_code"}],
"sceneGroups":
[{"contract_register_type_code":"03","sales_scenario":"0101"},
{"contract_register_type_code":"03","sales_scenario":"020102"}]},
"definition":
{"definitionName":"从注册到评审的时间基线","version":1,"definitionId":"123"}}

2、SQL查询结果

二、实现过程

1、初始信息

根据json可以按照模板生成下列SQL

SELECT
sales_scenario,
contract_register_type_code,
-- ltc_contract_basic_info.create_time,
-- ltc_contract_assess_record.contract_assess_end_date,
DATE_PART('MINUTE', ltc_contract_assess_record.contract_assess_end_date - ltc_contract_basic_info.create_time) subtime,
1 num_every
FROM
ltc_contract_basic_info
LEFT JOIN ltc_contract_assess_basic_info
ON ltc_contract_basic_info.ltc_contract_basic_info_id = ltc_contract_assess_basic_info.ltc_contract_basic_info_id
LEFT JOIN ltc_contract_assess_record
ON ltc_contract_assess_basic_info.ltc_contract_assess_basic_info_id = ltc_contract_assess_record.ltc_contract_assess_basic_info_id
WHERE
ltc_contract_assess_record.contract_assess_end_date > now( ) - INTERVAL '90 days'
and
(sales_scenario,contract_register_type_code) in (('0101','03'),('0102','01'),('020102','03'))

设置根据时间差排序,得到如下结果

下一步思路:计算subtime值的百分比

2、计算组内累计值

sum(num_every) over(partition by sales_scenario,contract_register_type_code order by subtime) rk_combine

整体:

select
sales_scenario,
contract_register_type_code,
subtime,
sum(num_every) over(partition by sales_scenario,contract_register_type_code order by subtime) rk_combine
from (
SELECT
sales_scenario,
contract_register_type_code,
-- ltc_contract_basic_info.create_time,
-- ltc_contract_assess_record.contract_assess_end_date,
DATE_PART('MINUTE', ltc_contract_assess_record.contract_assess_end_date - ltc_contract_basic_info.create_time) subtime,
1 num_every
FROM
ltc_contract_basic_info
LEFT JOIN ltc_contract_assess_basic_info
ON ltc_contract_basic_info.ltc_contract_basic_info_id = ltc_contract_assess_basic_info.ltc_contract_basic_info_id
LEFT JOIN ltc_contract_assess_record
ON ltc_contract_assess_basic_info.ltc_contract_assess_basic_info_id = ltc_contract_assess_record.ltc_contract_assess_basic_info_id
WHERE
ltc_contract_assess_record.contract_assess_end_date > now( ) - INTERVAL '90 days'
and
(sales_scenario,contract_register_type_code) in (('0101','03'),('0102','01'),('020102','03'))
) res_start

3、获得组内最大值

max(rk_combine) over(partition by sales_scenario,contract_register_type_code) max_rk_combine

整体:

select
sales_scenario,
contract_register_type_code,
subtime,
rk_combine,
max(rk_combine) over(partition by sales_scenario,contract_register_type_code) max_rk_combine
from
( select
sales_scenario,
contract_register_type_code,
subtime,
sum(num_every) over(partition by sales_scenario,contract_register_type_code order by subtime) rk_combine
from
(
SELECT
sales_scenario,
contract_register_type_code,
-- ltc_contract_basic_info.create_time,
-- ltc_contract_assess_record.contract_assess_end_date,
DATE_PART('MINUTE', ltc_contract_assess_record.contract_assess_end_date - ltc_contract_basic_info.create_time) subtime,
1 num_every
FROM
ltc_contract_basic_info
LEFT JOIN ltc_contract_assess_basic_info
ON ltc_contract_basic_info.ltc_contract_basic_info_id = ltc_contract_assess_basic_info.ltc_contract_basic_info_id
LEFT JOIN ltc_contract_assess_record
ON ltc_contract_assess_basic_info.ltc_contract_assess_basic_info_id = ltc_contract_assess_record.ltc_contract_assess_basic_info_id
WHERE
ltc_contract_assess_record.contract_assess_end_date > now( ) - INTERVAL '90 days'
and
(sales_scenario,contract_register_type_code) in (('0101','03'),('0102','01'),('020102','03'))
) res_start
) res_middle

4、获得百分比

round(rk_combine/max_rk_combine,2)*100 percent

整体:

(select
sales_scenario,
contract_register_type_code,
subtime,
round(rk_combine/max_rk_combine,2)*100 percent,
null default_value
from
(
select
sales_scenario,
contract_register_type_code,
subtime,
rk_combine,
max(rk_combine) over(partition by sales_scenario,contract_register_type_code) max_rk_combine
from
(
select
sales_scenario,
contract_register_type_code,
subtime,
sum(num_every) over(partition by sales_scenario,contract_register_type_code order by subtime) rk_combine
from
(
SELECT
sales_scenario,
contract_register_type_code,
-- ltc_contract_basic_info.create_time,
-- ltc_contract_assess_record.contract_assess_end_date,
DATE_PART('MINUTE', ltc_contract_assess_record.contract_assess_end_date - ltc_contract_basic_info.create_time) subtime,
1 num_every
FROM
ltc_contract_basic_info
LEFT JOIN ltc_contract_assess_basic_info
ON ltc_contract_basic_info.ltc_contract_basic_info_id = ltc_contract_assess_basic_info.ltc_contract_basic_info_id
LEFT JOIN ltc_contract_assess_record
ON ltc_contract_assess_basic_info.ltc_contract_assess_basic_info_id = ltc_contract_assess_record.ltc_contract_assess_basic_info_id
WHERE
ltc_contract_assess_record.contract_assess_end_date > now( ) - INTERVAL '90 days'
and
(sales_scenario,contract_register_type_code) in (('0101','03'),('0102','01'),('020102','03'))
) res_start
) res_middle
) res_end)

获得位于场景组合的基线均值结果

5、获得默认基线值

(
SELECT
sales_scenario,
contract_register_type_code,
DATE_PART('MINUTE', ltc_contract_assess_record.contract_assess_end_date - ltc_contract_basic_info.create_time) subtime,
0 percent,
avg(DATE_PART('MINUTE', ltc_contract_assess_record.contract_assess_end_date - ltc_contract_basic_info.create_time)) over() default_value
FROM
ltc_contract_basic_info
LEFT JOIN ltc_contract_assess_basic_info
ON ltc_contract_basic_info.ltc_contract_basic_info_id = ltc_contract_assess_basic_info.ltc_contract_basic_info_id
LEFT JOIN ltc_contract_assess_record
ON ltc_contract_assess_basic_info.ltc_contract_assess_basic_info_id = ltc_contract_assess_record.ltc_contract_assess_basic_info_id
WHERE
ltc_contract_assess_record.contract_assess_end_date > now( ) - INTERVAL '90 days'
and
(
sales_scenario is null
or
contract_register_type_code is null
or
(sales_scenario,contract_register_type_code) not in (('0101','03'),('0102','01'),('020102','03')
)
)
)

6、结果组合

(select
sales_scenario,
contract_register_type_code,
subtime,
round(rk_combine/max_rk_combine,2)*100 percent,
null default_value
from
(
select
sales_scenario,
contract_register_type_code,
subtime,
rk_combine,
max(rk_combine) over(partition by sales_scenario,contract_register_type_code) max_rk_combine
from
(
select
sales_scenario,
contract_register_type_code,
subtime,
sum(num_every) over(partition by sales_scenario,contract_register_type_code order by subtime) rk_combine
from
(
SELECT
sales_scenario,
contract_register_type_code,
-- ltc_contract_basic_info.create_time,
-- ltc_contract_assess_record.contract_assess_end_date,
DATE_PART('MINUTE', ltc_contract_assess_record.contract_assess_end_date - ltc_contract_basic_info.create_time) subtime,
1 num_every
FROM
ltc_contract_basic_info
LEFT JOIN ltc_contract_assess_basic_info
ON ltc_contract_basic_info.ltc_contract_basic_info_id = ltc_contract_assess_basic_info.ltc_contract_basic_info_id
LEFT JOIN ltc_contract_assess_record
ON ltc_contract_assess_basic_info.ltc_contract_assess_basic_info_id = ltc_contract_assess_record.ltc_contract_assess_basic_info_id
WHERE
ltc_contract_assess_record.contract_assess_end_date > now( ) - INTERVAL '90 days'
and
(sales_scenario,contract_register_type_code) in (('0101','03'),('0102','01'),('020102','03'))
) res_start
) res_middle
) res_end)
union all
(
SELECT
sales_scenario,
contract_register_type_code,
DATE_PART('MINUTE', ltc_contract_assess_record.contract_assess_end_date - ltc_contract_basic_info.create_time) subtime,
0 percent,
avg(DATE_PART('MINUTE', ltc_contract_assess_record.contract_assess_end_date - ltc_contract_basic_info.create_time)) over() default_value
FROM
ltc_contract_basic_info
LEFT JOIN ltc_contract_assess_basic_info
ON ltc_contract_basic_info.ltc_contract_basic_info_id = ltc_contract_assess_basic_info.ltc_contract_basic_info_id
LEFT JOIN ltc_contract_assess_record
ON ltc_contract_assess_basic_info.ltc_contract_assess_basic_info_id = ltc_contract_assess_record.ltc_contract_assess_basic_info_id
WHERE
ltc_contract_assess_record.contract_assess_end_date > now( ) - INTERVAL '90 days'
and
(
sales_scenario is null
or
contract_register_type_code is null
or
(sales_scenario,contract_register_type_code) not in (('0101','03'),('0102','01'),('020102','03')
)
)
)

三、步骤总结

1、计算组内累计值

sum(saleroom) over(partition by area order by date) ---求组内累计值

2、计算组内总计值/最大值

sum(saleroom) over(partition by area order by area)    ---求组内总计值

3、累计值/总计值

组内百分比= saleroom / total_value

累计百分比 = aggregate_value/total_value

四、总结

1、过程

用1表示每项的值

分组计算,按照subtime排序,得到累计值

求出最大的累计值,作为和

用各项累计值除以每一项的和,得到百分比

2、结果计算与返回

package com.boulderai.baseline.cal.service.impl;

import com.boulderai.baseline.cal.mq.MessageProducer;
import com.boulderai.baseline.cal.service.BaseLineCalService;
import cn.hutool.json.JSONUtil;
import com.boulderai.timeline.api.bigdata.BaseLineCalRequest;
import com.boulderai.timeline.api.bigdata.BaseLineMessage;
import org.apache.commons.lang3.StringUtils;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;
import lombok.extern.slf4j.Slf4j;
import javax.annotation.Resource;
import java.math.BigDecimal;
import java.util.*;
import java.util.concurrent.atomic.AtomicReference;
import java.util.stream.Collectors; /**
* @Title:BaseLineCalServiceImpl
* @Descript:
* @author: yanwei (yanwei@yanxxcloud.cn)
* @date:2022/8/3
**/
@Service
@Slf4j
public class BaseLineCalServiceImpl implements BaseLineCalService { @Resource
private MessageProducer messageProducer; @Resource
private JdbcTemplate jdbcTemplate; /**
* @author 刘金辉
* @param request
* @return 发送MQ是否成功
*/
@Override
public Boolean calculate(BaseLineCalRequest request) {
List<BaseLineMessage> messageList = new ArrayList<>();
request.getRules().forEach(rule -> {
String sql = request.computeSql(rule.getRuleType());
String[] union_sql_list = sql.split("union all");
String sql_in_scene_list = union_sql_list[0];
Integer[] dataRange = rule.getDataRange();
List<Map<String, Object>> resListMiddle = jdbcTemplate.queryForList(sql_in_scene_list);
//sceneKeysList为场景的集合,如[delivery_way_code,contract_register_type_code]
List<String> sceneKeysList = request.getScenes()
.getSceneKeys().stream()
.map(x -> x.getKey()).collect(Collectors.toList());
List<Map<String, Object>> resListUltimate = new ArrayList<>();
//inSceneListRes为查询到的拆分场景的结果集
List<Map<String, Object>> inSceneListRes = jdbcTemplate.queryForList(sql_in_scene_list).stream()
.filter(x -> Math.round(Double.valueOf(x.get("percent").toString())) >= dataRange[0]
&& Math.round(Double.valueOf(x.get("percent").toString())) <= dataRange[1])
.collect(Collectors.toList());
request.getScenes().getSceneGroups().forEach(sceneValueMap -> {
//sceneValueMap为每一个场景组合,如[delivery_way_code -> 01,contract_register_type_code -> 01,02]
Map<String, Object> resInMap = new HashMap<String, Object>(); //构建每个要插入的map
sceneValueMap.entrySet().forEach(everySceneCombine->{
resInMap.put(everySceneCombine.getKey(),everySceneCombine.getValue());
});
Double baselineValue = computeResult2New(sceneValueMap, sceneKeysList, inSceneListRes);
resInMap.put("value", baselineValue);
resListUltimate.add(resInMap);
});
int default_value = 0;
String sql_not_in_scene_list = union_sql_list[1];
List<Map<String, Object>> result = jdbcTemplate.queryForList(sql_not_in_scene_list);
if (!CollectionUtils.isEmpty(result)) {
default_value = Math.round(Float.parseFloat(result.get(0).get("default_value").toString()));
}
BaseLineMessage ansMsg = new BaseLineMessage();
ansMsg.setDefinition(request.getDefinition());
ansMsg.setRule(rule);
ansMsg.setValues(resListUltimate);
ansMsg.setDefaultValue(default_value);// 其他场景的平均值,如何确定
messageList.add(ansMsg);
});
log.info("cal:{}", JSONUtil.toJsonStr(request));
log.info("cal.return:{}", JSONUtil.toJsonStr(messageList));
messageProducer.SendCalMessageList(messageList);
return true;
} /**
* 场景组合拆分为子场景,去数据库查询
* sceneValueMap为每一个场景组合,如[delivery_way_code -> 01,contract_register_type_code -> 01,02, value=10]
* sceneKeysList为场景的集合,如[delivery_way_code,contract_register_type_code]
* inSceneListRes为查询到的拆分场景的结果集
* @param group
* @param sceneKeys
* @param result
* @return
*/
private Double computeResult2New(Map<String, String> sceneValueMap, List<String> sceneKeysList, List<Map<String, Object>> inSceneListResList) {
Double avgOfCombineSceneVal = 0.0;
List<String> splitSceneValList = Arrays.asList("");
String[] sceneKeysArray = String.join(",", sceneKeysList).split(",");
for (String sceneKey : sceneKeysArray) {
String assignKeyValue = String.valueOf(sceneValueMap.get(sceneKey));
String[] splitValueArray = assignKeyValue.split(","); //assignKeyValue=01,02 | splitValueArray=[01][02]
splitSceneValList = calgroup(splitValueArray, splitSceneValList); //splitSceneValList=[01][02]=>splitSceneValList=[01,01][01,02]
}
for (String everySceneCombineStr : splitSceneValList) { //splitSceneValList=[“01,01”,"01,02"] everySceneCombineStr="01,02"
List<Map<String, Object>> filterEverySceneCombineResList = inSceneListResList.stream().filter(qryResMap -> {
boolean fetched = true;
String[] everySceneStr = everySceneCombineStr.split(","); //["01", "02"]
for (int i = 0; i < everySceneStr.length; i++) {
fetched = everySceneStr[i].equals(String.valueOf(qryResMap.get(sceneKeysArray[i]))); //找到满足条件的子数据
if (!fetched) {
return false;
}
}
return fetched;
}).collect(Collectors.toList());//过滤得到满足每个场景组合的数据
Double avgRes = filterEverySceneCombineResList.stream().map(p -> Math.round(Double.valueOf(p.get("subtime").toString())))
.collect(Collectors.averagingLong(Long::longValue));
avgOfCombineSceneVal += avgRes;
}
return avgOfCombineSceneVal/splitSceneValList.size();
} /**
* @param group
* @param sceneKeys
* @param result
* @return
*/
private BigDecimal calResultBefore(Map<String, String> group, List<String> sceneKeys, List<Map<String, Object>> result) {
List<String> rarry = Arrays.asList("");
String[] arrKeys = String.join(",", sceneKeys).split(",");
for (String key : arrKeys) {
String v = String.valueOf(group.get(key));
String[] varr = v.split(",");
rarry = calgroup(varr, rarry);
}
//fetch data
List<Map<String, Object>> results = new ArrayList<>();
for (String r : rarry) { //,020102,03
List<Map<String, Object>> subList = result.stream().filter(rmap -> {
boolean fetched = true;
//String[] var = (String[]) Arrays.stream(r.split(",")).skip(0).collect(Collectors.toList()).toArray();
String[] var = r.split(","); //["", "020102", "03"]
for (int i = 0; i < var.length; i++) {
fetched = var[i].equals(String.valueOf(rmap.get(arrKeys[i])));
if (!fetched) {
return false;
}
}
return fetched;
}).collect(Collectors.toList());
results.addAll(subList);
}
//计算结果
return BigDecimal.ZERO;
} /**
* @param vs
* @param sub
* @return
*/
private List<String> calgroup(String[] vs, List<String> sub) {
List<String> ans = new ArrayList<>(); for (String v : vs) {
sub.stream().forEach(s -> {
if (StringUtils.isBlank(s)) {
ans.add(v);
} else {
ans.add(s + "," + v);
}
});
}
return ans;
} }

【SQL】窗口函数:求数据组内累计值和累计百分比的相关教程结束。

《【SQL】窗口函数:求数据组内累计值和累计百分比.doc》

下载本文的Word格式文档,以方便收藏与打印。