增量"bigger than"性能与单独"between"运算符的查询性能



我有一个查询,看起来像这样:

select         
CASE
WHEN col BETWEEN 0 AND 20 THEN 0
WHEN col BETWEEN 20 AND 50 THEN 20
WHEN col BETWEEN 50 AND 100 THEN 40
WHEN col BETWEEN 100 AND 200 THEN 75
WHEN col BETWEEN 200 AND 10000 THEN 86
ELSE 0 END AS t_col
from TAB1;

我想获得更好的性能,并将其重写为:

select case  
WHEN col < 20 then 0
WHEN col < 50 then 20
WHEN col < 100 then 40
WHEN col < 200 then 75
WHEN col < 1000 then 86
END AS t_col
from TAB1;

我认为第二个查询可能会更快,因为不会创建间隔,而只会将column的值与一个数字进行比较。Explain Plan为两个查询提供了相同的结果。我想知道他们中哪一个表现更好?

它们实际上是相同的。CPU周期的任何微小差异都会被执行查询所需的I/O,网络开销等所掩盖。

我认为between版本允许您将最频繁出现的值放在前面,如果很大一部分值落在一个或两个范围内,理论上可以提高效率。然后,<版本只需要对每个案例进行一次比较,而不是两次。

我仍然给<版本更多的风格点:)

我在PL/SQL游标循环中测试了两个版本,以消除网络问题,并多次运行测试。在1000万次迭代中,结果相差不到半秒,这与多次运行的变化大致相同(因为在任何服务器或笔记本电脑上总是有其他东西在运行)。

create table t1 (c1, c2, c3) pctfree 0 nologging
as
select round(dbms_random.value(1,100))
, cast(dbms_random.string('X',40) as varchar2(40))
, cast(dbms_random.string('X',40) as varchar2(40))
from   xmltable('1 to 1000000');
declare
type test_rectype is record(num number);
totalTime1 simple_integer := 0;
totalTime2 simple_integer := 0;
time1Percentage simple_integer := 0;
time2Percentage simple_integer := 0;
function testCursor
( cursorNum in integer )
return sys_refcursor
is
testCursor sys_refcursor;
begin
if cursorNum = 1 then
open testCursor for
select case
when c1 < 20 then 0
when c1 < 50 then 20
when c1 < 100 then 40
when c1 < 200 then 75
when c1 < 1000 then 86
end as t_c1
from   t1;
elsif cursorNum = 2 then
open testCursor for
select case
when c1 < 20 then 0
when c1 < 50 then 20
when c1 < 100 then 40
when c1 < 200 then 75
when c1 < 1000 then 86
end as t_c1
from   t1;
end if;
return testCursor;
end testCursor;

-- Fetch all rows from a cursor and return time in hundredths of a second:
procedure time_cursor
( inCursor in sys_refcursor
, outTime  in out nocopy simple_integer )
is
startTime simple_integer := dbms_utility.get_time;
begin
-- 21c new iterator syntax
for r test_rectype in values of inCursor loop
null;  -- Could also compare rowcounts here
end loop;
outTime := dbms_utility.get_time - startTime;
close inCursor;
end time_cursor;
-- Report timing difference:
procedure print_comparison
( time1 simple_integer
, time2 simple_integer )
is
begin
time1Percentage := 100 * time1 / (time1 + time2);
time2Percentage := 100 * time2 / (time2 + time2);
dbms_output.put_line('Between:  '||to_char(time1/100,'900d00')|| rpad(' |',time1Percentage,'|'));
dbms_output.put_line('LessThan: '||to_char(time2/100,'900d00')|| rpad(' |',time2Percentage,'|'));
end print_comparison;
procedure compare_cursors
( runningTime1 in out nocopy simple_integer
, runningTime2 in out nocopy simple_integer )
is
testCursor1 sys_refcursor := testCursor(1);
testCursor2 sys_refcursor := testCursor(2);
time1 simple_integer := 0;
time2 simple_integer := 0;
time1Percentage simple_integer := 0;
time2Percentage simple_integer := 0;
begin
time_cursor(testCursor1, time1);
time_cursor(testCursor2, time2);
print_comparison(time1, time2);
-- Update running totals:
runningTime1 := nvl(runningTime1,0) + nvl(time1,0);
runningTime2 := nvl(runningTime2,0) + nvl(time2,0);
dbms_output.new_line;
end compare_cursors;
begin
for i in 1..10 loop
compare_cursors(totalTime1, totalTime2);
end loop;
dbms_output.put_line('Total:'||chr(10));
print_comparison(totalTime1, totalTime2);
end;
Between:    02.18 ||||||||||||||||||||||||||||||||||||||||||||||||
LessThan:   02.25 |||||||||||||||||||||||||||||||||||||||||||||||||
Between:    02.03 |||||||||||||||||||||||||||||||||||||||||||||||||
LessThan:   02.03 |||||||||||||||||||||||||||||||||||||||||||||||||
Between:    02.13 |||||||||||||||||||||||||||||||||||||||||||||||||
LessThan:   02.13 |||||||||||||||||||||||||||||||||||||||||||||||||
Between:    02.13 ||||||||||||||||||||||||||||||||||||||||||||||||||
LessThan:   02.07 |||||||||||||||||||||||||||||||||||||||||||||||||
Between:    02.16 ||||||||||||||||||||||||||||||||||||||||||||||||
LessThan:   02.27 |||||||||||||||||||||||||||||||||||||||||||||||||
Between:    02.28 |||||||||||||||||||||||||||||||||||||||||||||||||
LessThan:   02.28 |||||||||||||||||||||||||||||||||||||||||||||||||
Between:    02.17 ||||||||||||||||||||||||||||||||||||||||||||||||
LessThan:   02.27 |||||||||||||||||||||||||||||||||||||||||||||||||
Between:    02.24 ||||||||||||||||||||||||||||||||||||||||||||||||
LessThan:   02.30 |||||||||||||||||||||||||||||||||||||||||||||||||
Between:    02.24 |||||||||||||||||||||||||||||||||||||||||||||||||
LessThan:   02.27 |||||||||||||||||||||||||||||||||||||||||||||||||
Between:    02.25 ||||||||||||||||||||||||||||||||||||||||||||||||||
LessThan:   02.18 |||||||||||||||||||||||||||||||||||||||||||||||||
Total:
Between:    21.81 |||||||||||||||||||||||||||||||||||||||||||||||||
LessThan:   22.05 |||||||||||||||||||||||||||||||||||||||||||||||||

我来做一个PoC,看看会发生什么

SQL> create table t1 ( c1 number, c2 varchar2(40) , c3 varchar2(40) ) ;
Table created.
SQL> declare
2  begin
3  for i in 1 .. 1000000
4  loop
5   insert into t1 values ( round(dbms_random.value(1,100)) , dbms_random.string('X',40) , dbms_random.string('X',40) );
6  end loop;
7  end;
8  /
PL/SQL procedure successfully completed.
SQL> exec dbms_stats.gather_table_stats('TESTUSER','T1');
PL/SQL procedure successfully completed.
SQL> select count(*) from t1 ;
COUNT(*)
----------
1000000

SQL> set autotrace traceonly
SQL> select
CASE
WHEN c1 BETWEEN 0 AND 20 THEN 0
WHEN c1 BETWEEN 20 AND 50 THEN 20
WHEN c1 BETWEEN 50 AND 100 THEN 40
WHEN c1 BETWEEN 100 AND 200 THEN 75
WHEN c1 BETWEEN 200 AND 10000 THEN 86
ELSE 0 END AS t_c1
from t1;  2    3    4    5    6    7    8    9
1000000 rows selected.
Execution Plan
----------------------------------------------------------
Plan hash value: 3617692013
--------------------------------------------------------------------------
| Id  | Operation         | Name | Rows  | Bytes | Cost (%CPU)| Time     |
--------------------------------------------------------------------------
|   0 | SELECT STATEMENT  |      |  1000K|  2929K|  2322   (1)| 00:00:01 |
|   1 |  TABLE ACCESS FULL| T1   |  1000K|  2929K|  2322   (1)| 00:00:01 |
--------------------------------------------------------------------------
SQL> select case
WHEN c1 < 20 then 0
WHEN c1 < 50 then 20
WHEN c1 < 100 then 40
WHEN c1 < 200 then 75
WHEN c1 < 1000 then 86
END AS t_c1
from t1;  2    3    4    5    6    7    8
1000000 rows selected.

Execution Plan
----------------------------------------------------------
Plan hash value: 3617692013
--------------------------------------------------------------------------
| Id  | Operation         | Name | Rows  | Bytes | Cost (%CPU)| Time     |
--------------------------------------------------------------------------
|   0 | SELECT STATEMENT  |      |  1000K|  2929K|  2322   (1)| 00:00:01 |
|   1 |  TABLE ACCESS FULL| T1   |  1000K|  2929K|  2322   (1)| 00:00:01 |
--------------------------------------------------------------------------

它们的行为相同,对吧?不完全是,虽然计划看起来是一样的,但让我们查一下统计数据。为此,我将在每次测试后刷新缓冲区缓存和共享池。

SQL> set autotrace traceonly timing on
SQL> select
CASE
WHEN c1 BETWEEN 0 AND 20 THEN 0
WHEN c1 BETWEEN 20 AND 50 THEN 20
WHEN c1 BETWEEN 50 AND 100 THEN 40
WHEN c1 BETWEEN 100 AND 200 THEN 75
WHEN c1 BETWEEN 200 AND 10000 THEN 86
ELSE 0 END AS t_c1
from t1;  2    3    4    5    6    7    8    9
1000000 rows selected.
Elapsed: 00:00:02.92
Execution Plan
----------------------------------------------------------
Plan hash value: 3617692013
--------------------------------------------------------------------------
| Id  | Operation         | Name | Rows  | Bytes | Cost (%CPU)| Time     |
--------------------------------------------------------------------------
|   0 | SELECT STATEMENT  |      |  1000K|  2929K|  2322   (1)| 00:00:01 |
|   1 |  TABLE ACCESS FULL| T1   |  1000K|  2929K|  2322   (1)| 00:00:01 |
--------------------------------------------------------------------------

Statistics
----------------------------------------------------------
12  recursive calls
0  db block gets
72870  consistent gets
6180  physical reads
0  redo size
19435128  bytes sent via SQL*Net to client
733901  bytes received via SQL*Net from client
66668  SQL*Net roundtrips to/from client
3  sorts (memory)
0  sorts (disk)
1000000  rows processed
SQL> alter system flush shared_pool ;
System altered.
Elapsed: 00:00:00.08
SQL> alter system flush buffer_cache ;
System altered.
Elapsed: 00:00:00.04
SQL> select t1.* , case
WHEN c1 < 20 then 0
WHEN c1 < 50 then 20
WHEN c1 < 100 then 40
WHEN c1 < 200 then 75
WHEN c1 < 1000 then 86
END AS t_c1
from t1;  2    3    4    5    6    7    8
1000000 rows selected.
Elapsed: 00:00:03.49
Execution Plan
----------------------------------------------------------
Plan hash value: 3617692013
--------------------------------------------------------------------------
| Id  | Operation         | Name | Rows  | Bytes | Cost (%CPU)| Time     |
--------------------------------------------------------------------------
|   0 | SELECT STATEMENT  |      |  1000K|    81M|  2323   (1)| 00:00:01 |
|   1 |  TABLE ACCESS FULL| T1   |  1000K|    81M|  2323   (1)| 00:00:01 |
--------------------------------------------------------------------------

Statistics
----------------------------------------------------------
18  recursive calls
0  db block gets
72878  consistent gets
6180  physical reads
0  redo size
101747627  bytes sent via SQL*Net to client
733834  bytes received via SQL*Net from client
66668  SQL*Net roundtrips to/from client
3  sorts (memory)
0  sorts (disk)
1000000  rows processed

第二个模型比第一个模型慢,可能是因为第二个模型(18)比第一个模型(12)做了recursive calls。如果你开始放更多的字段,数据是巨大的,我很确定你会得到更好的性能与between查询比在另一个。

但显然,这只是一种感觉,您必须在自己的数据库中进行测试。我很确定最后的查询包含其他字段,其中条件等…因此,这个答案只涵盖您在原始问题中输入的内容,而不包括在许多其他字段的实际场景中会发生的情况,其中条件,索引等…

最新更新