Doris 有序窗口漏斗 window_funnel

基于 Doris 1.0.1 版本的有序窗口漏斗 window_funnel 测试 DEMO

1.0.1 版本 window_funnel 测试

DEMO 1

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
CREATE TABLE test.`dev_windowfunnel_test2` (
`xwho` varchar(50) NULL COMMENT 'xwho',
`xwhen` datetime COMMENT 'xwhen',
`xwhat` int NULL COMMENT 'xwhat'
)
DUPLICATE KEY(xwho)
DISTRIBUTED BY HASH(xwho) BUCKETS 3
PROPERTIES (
"replication_num" = "1"
);

truncate table test.`dev_windowfunnel_test2`;

INSERT into test.dev_windowfunnel_test2 (xwho, xwhen, xwhat) VALUES('1', '2022-03-12 10:41:00', 1);
INSERT INTO test.dev_windowfunnel_test2 (xwho, xwhen, xwhat) VALUES('1', '2022-03-12 13:28:02', 2);
INSERT INTO test.dev_windowfunnel_test2 (xwho, xwhen, xwhat) VALUES('1', '2022-03-12 16:15:01', 3);
INSERT INTO test.dev_windowfunnel_test2 (xwho, xwhen, xwhat) VALUES('1', '2022-03-12 19:05:04', 4);

select * from test.dev_windowfunnel_test2;

select
window_funnel(
86400,
'default',
t.xwhen,
t.xwhat = 1,
t.xwhat = 2,
t.xwhat = 3
) AS level
from test.`dev_windowfunnel_test2` t
;

DEMO 2

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
-- 建库、建表
create database test;

CREATE TABLE test.`dev_windowfunnel_test` (
`xwho` varchar(50) NULL COMMENT 'xwho',
`xwhen` datetime COMMENT 'xwhen',
`xwhat` varchar(50) NULL COMMENT 'xwhat'
)
DUPLICATE KEY(xwho)
DISTRIBUTED BY HASH(xwho) BUCKETS 3
PROPERTIES (
"replication_num" = "1"
);

truncate table test.`dev_windowfunnel_test`;

-- Doris 造数
INSERT into test.dev_windowfunnel_test (xwho, xwhen, xwhat) VALUES('1', '2022-03-12 10:41:00', '加入购物车');
INSERT INTO test.dev_windowfunnel_test (xwho, xwhen, xwhat) VALUES('1', '2022-03-12 13:28:02', '下一步');
INSERT INTO test.dev_windowfunnel_test (xwho, xwhen, xwhat) VALUES('1', '2022-03-12 16:15:01', '收货地址');
INSERT INTO test.dev_windowfunnel_test (xwho, xwhen, xwhat) VALUES('1', '2022-03-12 19:05:04', '确认兑换');
INSERT INTO test.dev_windowfunnel_test (xwho, xwhen, xwhat) VALUES('2', '2022-03-12 13:27:40', '加入购物车');
INSERT INTO test.dev_windowfunnel_test (xwho, xwhen, xwhat) VALUES('2', '2022-03-12 13:28:02', '下一步');
INSERT INTO test.dev_windowfunnel_test (xwho, xwhen, xwhat) VALUES('2', '2022-03-12 13:28:21', '收货地址');
INSERT INTO test.dev_windowfunnel_test (xwho, xwhen, xwhat) VALUES('2', '2022-03-12 16:18:24', '确认兑换');
INSERT INTO test.dev_windowfunnel_test (xwho, xwhen, xwhat) VALUES('3', '2022-03-12 16:14:20', '加入购物车');
INSERT INTO test.dev_windowfunnel_test (xwho, xwhen, xwhat) VALUES('3', '2022-03-12 16:14:42', '下一步');
INSERT INTO test.dev_windowfunnel_test (xwho, xwhen, xwhat) VALUES('3', '2022-03-12 16:15:01', '收货地址');
INSERT INTO test.dev_windowfunnel_test (xwho, xwhen, xwhat) VALUES('3', '2022-03-12 16:35:04', '确认兑换');
INSERT INTO test.dev_windowfunnel_test (xwho, xwhen, xwhat) VALUES('4', '2022-03-12 07:54:20', '加入购物车');
INSERT INTO test.dev_windowfunnel_test (xwho, xwhen, xwhat) VALUES('4', '2022-03-12 07:54:42', '下一步');
INSERT INTO test.dev_windowfunnel_test (xwho, xwhen, xwhat) VALUES('4', '2022-03-12 16:15:01', '收货地址');
INSERT INTO test.dev_windowfunnel_test (xwho, xwhen, xwhat) VALUES('4', '2022-03-12 22:08:24', '确认兑换');

select * from test.dev_windowfunnel_test;


-- 计算 (加入购物车 -> 下一步 ->收货地址 -> 确认兑换) 完成这一系列动作所需要的平均时间
with t0 as (
select xwho, xwhat, xwhen
from test.`dev_windowfunnel_test`
where DATE_FORMAT(xwhen,'%Y-%m-%d') >= '2022-03-12' and DATE_FORMAT(xwhen,'%Y-%m-%d') <= '2022-03-12' and
xwhat = '加入购物车'
),
t1 as (
select xwho, xwhat, xwhen from
t0
union all
(select t2.xwho, t2.xwhat, t2.xwhen
from
t0
INNER JOIN
(select xwho, xwhat, xwhen
from test.`dev_windowfunnel_test`
where DATE_FORMAT(xwhen,'%Y-%m-%d') >= '2022-03-12' and DATE_FORMAT(xwhen,'%Y-%m-%d') <= '2022-03-13' and xwhat = '下一步') t2
on t0.xwho = t2.xwho)
union all
(select t2.xwho, t2.xwhat, t2.xwhen
from
t0
INNER JOIN
(select xwho, xwhat, xwhen
from test.`dev_windowfunnel_test`
where DATE_FORMAT(xwhen,'%Y-%m-%d') >= '2022-03-12' and DATE_FORMAT(xwhen,'%Y-%m-%d') <= '2022-03-13' and xwhat = '收货地址') t2
on t0.xwho = t2.xwho)
union all
(select t2.xwho, t2.xwhat, t2.xwhen
from
t0
INNER JOIN
(select xwho, xwhat, xwhen
from test.`dev_windowfunnel_test`
where DATE_FORMAT(xwhen,'%Y-%m-%d') >= '2022-03-12' and DATE_FORMAT(xwhen,'%Y-%m-%d') <= '2022-03-13' and xwhat = '确认兑换') t2
on t0.xwho = t2.xwho)
),
t2 as (
SELECT
t2.xwho,
min(t2.xwhen) AS start_time,
max(t2.xwhen) AS end_time,
max(t2.xwhen)-min(t2.xwhen) as duration,
window_funnel(
86400, -- 转化周期 1 天
'default',
xwhen,
t2.xwhat = '加入购物车',
t2.xwhat = '下一步',
t2.xwhat = '收货地址',
t2.xwhat = '确认兑换'
) AS level
FROM (
SELECT
xwho,
xwhat,
xwhen
FROM t1
) t2
GROUP BY xwho
)
select * from t2
union all
select '平均' as xwho, null as start_time ,null as end_time,sum(duration)/COUNT(1) as duration,0 as level from t2;

0.15.3 增加 window_funnel

0.15.3 增加了以下 Patch 后,编译缺少依赖…

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
be/src/exprs/aggregate_functions.cpp
be/src/exprs/aggregate_functions.h
be/src/vec/CMakeLists.txt
be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp
be/src/vec/aggregate_functions/aggregate_function_window_funnel.cpp
be/src/vec/aggregate_functions/aggregate_function_window_funnel.h
be/test/exprs/CMakeLists.txt
be/test/exprs/window_funnel_test.cpp
be/test/vec/aggregate_functions/CMakeLists.txt
be/test/vec/aggregate_functions/vec_window_funnel_test.cpp

fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java
fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateFunction.java
fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
fe/fe-core/src/test/java/org/apache/doris/analysis/AggregateTest.java