Query Execution: Intro To Database Systems Andy Pavlo
Query Execution: Intro To Database Systems Andy Pavlo
Query Execution: Intro To Database Systems Andy Pavlo
12 Part I
ADMINISTRIVIA
QUERY PL AN
SELECT R.id, S.cdate
The operators are arranged in a tree. FROM R JOIN S
ON R.id = S.id
Data flows from the leaves of the tree WHERE S.value > 100
up towards the root.
p R.id, S.value
s value>100
R S
CMU 15-445/645 (Fall 2019)
4
T O D AY ' S A G E N D A
Processing Models
Access Methods
Expression Evaluation
PROCESSING MODEL
I T E R AT O R M O D E L
I T E R AT O R M O D E L
s
if evalPred(t): emit(t)
value>100
Next() for t in R: Next() for t in S:
R S
emit(t) emit(t)
I T E R AT O R M O D E L
SELECT R.id, S.cdate
1 for t in child.Next():
emit(projection(t)) FROM R JOIN S
ON R.id = S.id
for t1 in left.Next(): WHERE S.value > 100
buildHashTable(t1)
for t2 in right.Next():
if probe(t2): emit(t1⨝t2) p R.id, S.value
s
if evalPred(t): emit(t)
value>100
for t in R: for t in S:
R S
emit(t) emit(t)
I T E R AT O R M O D E L
SELECT R.id, S.cdate
1 for t in child.Next():
emit(projection(t)) FROM R JOIN S
ON R.id = S.id
WHERE S.value > 100
2 for t1 in left.Next():
buildHashTable(t1)
for t2 in right.Next():
if probe(t2): emit(t1⨝t2) p R.id, S.value
s
if evalPred(t): emit(t)
value>100
for t in R: for t in S:
R S
emit(t) emit(t)
I T E R AT O R M O D E L
SELECT R.id, S.cdate
1 for t in child.Next():
emit(projection(t)) FROM R JOIN S
ON R.id = S.id
WHERE S.value > 100
2 for t1 in left.Next():
buildHashTable(t1)
for t2 in right.Next():
if probe(t2): emit(t1⨝t2) p R.id, S.value
Single Tuple
for t in child.Next(): ⨝ R.id=S.id
s
if evalPred(t): emit(t)
value>100
3 for t in R: for t in S:
R S
emit(t) emit(t)
I T E R AT O R M O D E L
SELECT R.id, S.cdate
1 for t in child.Next():
emit(projection(t)) FROM R JOIN S
ON R.id = S.id
WHERE S.value > 100
2 for t1 in left.Next():
buildHashTable(t1)
for t2 in right.Next():
if probe(t2): emit(t1⨝t2) p R.id, S.value
for t in child.Next():
4 ⨝ R.id=S.id
s
if evalPred(t): emit(t)
value>100
3 for t in R: for t in S:
5
R S
emit(t) emit(t)
I T E R AT O R M O D E L
SELECT R.id, S.cdate
1 for t in child.Next():
emit(projection(t)) FROM R JOIN S
ON R.id = S.id
WHERE S.value > 100
2 for t1 in left.Next():
buildHashTable(t1)
for t2 in right.Next():
if probe(t2): emit(t1⨝t2) p R.id, S.value
for t in child.Next():
4 ⨝ R.id=S.id
s
if evalPred(t): emit(t)
value>100
3 for t in R: for t in S:
5
R S
emit(t) emit(t)
I T E R AT O R M O D E L
M AT E R I A L I Z AT I O N M O D E L
M AT E R I A L I Z AT I O N M O D E L
out = [ ]
1 for t in child.Output():
out.add(projection(t)) SELECT R.id, S.cdate
return out FROM R JOIN S
ON R.id = S.id
out = [ ]
for t1 in left.Output(): WHERE S.value > 100
p
buildHashTable(t1)
for t2 in right.Output(): R.id, S.value
if probe(t2): out.add(t1⨝t2)
return out
out = [ ] ⨝ R.id=S.id
s
for t in child.Output():
if evalPred(t): out.add(t)
return out value>100
R S
out = [ ] out = [ ]
for t in R: for t in S:
out.add(t) out.add(t)
return out return out
CMU 15-445/645 (Fall 2019)
10
M AT E R I A L I Z AT I O N M O D E L
out = [ ]
1 for t in child.Output():
out.add(projection(t)) SELECT R.id, S.cdate
return out FROM R JOIN S
ON R.id = S.id
out = [ ]
WHERE S.value > 100
2
for t1 in left.Output():
p
buildHashTable(t1)
for t2 in right.Output(): R.id, S.value
if probe(t2): out.add(t1⨝t2)
return out
out = [ ] ⨝ R.id=S.id
s
for t in child.Output():
if evalPred(t): out.add(t)
All Tuples return out value>100
3
R S
out = [ ] out = [ ]
for t in R: for t in S:
out.add(t) out.add(t)
return out return out
CMU 15-445/645 (Fall 2019)
10
M AT E R I A L I Z AT I O N M O D E L
out = [ ]
1 for t in child.Output():
out.add(projection(t)) SELECT R.id, S.cdate
return out FROM R JOIN S
ON R.id = S.id
out = [ ]
WHERE S.value > 100
2
for t1 in left.Output():
p
buildHashTable(t1)
for t2 in right.Output(): R.id, S.value
if probe(t2): out.add(t1⨝t2)
return out
out = [ ] ⨝ R.id=S.id
4
s
for t in child.Output():
if evalPred(t): out.add(t)
return out value>100
3 5
R S
out = [ ] out = [ ]
for t in R: for t in S:
out.add(t) out.add(t)
return out return out
CMU 15-445/645 (Fall 2019)
10
M AT E R I A L I Z AT I O N M O D E L
out = [ ]
1 for t in child.Output():
out.add(projection(t)) SELECT R.id, S.cdate
return out FROM R JOIN S
ON R.id = S.id
out = [ ]
WHERE S.value > 100
2
for t1 in left.Output():
p
buildHashTable(t1)
for t2 in right.Output(): R.id, S.value
if probe(t2): out.add(t1⨝t2)
return out
out = [ ] ⨝ R.id=S.id
4
s
for t in child.Output():
if evalPred(t): out.add(t)
return out value>100
3 5
R S
out = [ ] out = [ ]
for t in R: for t in S:
out.add(t) out.add(t)
return out return out
CMU 15-445/645 (Fall 2019)
11
M AT E R I A L I Z AT I O N M O D E L
V E C T O R I Z AT I O N M O D E L
V E C T O R I Z AT I O N M O D E L
out = [ ]
for t in child.Next(): 1 SELECT R.id, S.cdate
out.add(projection(t))
if |out|>n: emit(out) FROM R JOIN S
ON R.id = S.id
2 out = [ ]
for t1 in left.Next(): WHERE S.value > 100
p
buildHashTable(t1)
for t2 in right.Next(): R.id, S.value
if probe(t2): out.add(t1⨝t2)
if |out|>n: emit(out)
out = [ ]
for t in child.Next():
⨝ R.id=S.id
if evalPred(t): out.add(t)
if |out|>n: emit(out) s value>100
V E C T O R I Z AT I O N M O D E L
out = [ ]
for t in child.Next(): 1 SELECT R.id, S.cdate
out.add(projection(t))
if |out|>n: emit(out) FROM R JOIN S
ON R.id = S.id
2 out = [ ]
for t1 in left.Next(): WHERE S.value > 100
p
buildHashTable(t1)
for t2 in right.Next(): R.id, S.value
if probe(t2): out.add(t1⨝t2)
if |out|>n: emit(out)
out = [ ]
for t in child.Next(): 4 ⨝ R.id=S.id
if evalPred(t): out.add(t)
if |out|>n: emit(out) s value>100
V E C T O R I Z AT I O N M O D E L
ACCESS METHODS
SELECT R.id, S.cdate
An access method is a way that the FROM R JOIN S
DBMS can access the data stored in a ON R.id = S.id
table. WHERE S.value > 100
→ Not defined in relational algebra.
p R.id, S.value
→ Index Scan
→ Multi-Index / "Bitmap" Scan s value>100
R S
CMU 15-445/645 (Fall 2019)
17
SEQUENTIAL SCAN
S E Q U E N T I A L S C A N : O P T I M I Z AT I O N S
ZONE MAPS
L AT E M AT E R I A L I Z AT I O N
s
a b c
a>100 0
1
bar foo 2
3
CMU 15-445/645 (Fall 2019)
20
L AT E M AT E R I A L I Z AT I O N
s
a b c
a>100 Offsets 0
1
bar foo 2
3
CMU 15-445/645 (Fall 2019)
20
L AT E M AT E R I A L I Z AT I O N
s
a b c
a>100 Offsets 0
1
bar foo 2
3
CMU 15-445/645 (Fall 2019)
20
L AT E M AT E R I A L I Z AT I O N
s
a b c
a>100 Offsets 0
1
bar foo 2
3
CMU 15-445/645 (Fall 2019)
21
HEAP CLUSTERING
If the query accesses tuples using the 101 102 103 104
clustering index's attributes, then the
DBMS can jump directly to the pages
that it needs.
INDEX SCAN
INDEX SCAN
SELECT * FROM students
Suppose that we a single table with WHERE age < 30
100 tuples and two indexes: AND dept = 'CS'
→ Index #1: age AND country = 'US'
→ Index #2: dept
Scenario #1 Scenario #2
There are 99 people There are 99 people in
under the age of 30 but the CS department but
only 2 people in the CS only 2 people under the
department. age of 30.
CMU 15-445/645 (Fall 2019)
24
M U LT I - I N D E X S C A N
M U LT I - I N D E X S C A N
SELECT * FROM students
With an index on age and an index WHERE age < 30
on dept, AND dept = 'CS'
→ We can retrieve the record ids satisfying AND country = 'US'
age<30 using the first,
→ Then retrieve the record ids satisfying
dept='CS' using the second,
→ Take their intersection
→ Retrieve records and check
country='US'.
M U LT I - I N D E X S C A N
SELECT * FROM students
Set intersection can be done with WHERE age < 30
bitmaps, hash tables, or Bloom filters. AND dept = 'CS'
AND country = 'US'
age<30 dept='CS'
record ids
M U LT I - I N D E X S C A N
SELECT * FROM students
Set intersection can be done with WHERE age < 30
bitmaps, hash tables, or Bloom filters. AND dept = 'CS'
AND country = 'US'
age<30 dept='CS'
record ids record ids
M U LT I - I N D E X S C A N
SELECT * FROM students
Set intersection can be done with WHERE age < 30
bitmaps, hash tables, or Bloom filters. AND dept = 'CS'
AND country = 'US'
age<30 dept='CS'
record ids record ids
I N D E X S C A N PA G E S O R T I N G
Scan Direction
Retrieving tuples in the order that
appear in an unclustered index is
inefficient.
101 102 103 104
The DBMS can first figure out all the
tuples that it needs and then sort them
based on their page id.
I N D E X S C A N PA G E S O R T I N G
Scan Direction
Retrieving tuples in the order that
appear in an unclustered index is
inefficient.
101 102 103 104
The DBMS can first figure out all the
tuples that it needs and then sort them
based on their page id.
I N D E X S C A N PA G E S O R T I N G
Scan Direction
Retrieving tuples in the order that
appear in an unclustered index is
inefficient.
101 102 103 104
The DBMS can first figure out all the Page 102
tuples that it needs and then sort them Page
Page
103
104
based on their page id. Page
Page
104
102
Page 103
Page 102
Page 102
Page 101
Page 103
Page 104
Page 103
I N D E X S C A N PA G E S O R T I N G
Scan Direction
Retrieving tuples in the order that
appear in an unclustered index is
inefficient.
101 102 103 104
The DBMS can first figure out all the Page 102 Page 101
tuples that it needs and then sort them Page
Page
103
104
Page
Page
101
102
based on their page id. Page
Page
104
102
Page
Page
102
102
Page 103 Page 102
Page 102 Page 103
Page 102 Page 103
Page 101 Page 103
Page 103 Page 104
Page 104 Page 104
Page 103 Page 104
E X P R E S S I O N E VA L U AT I O N
SELECT R.id, S.cdate
The DBMS represents a WHERE clause FROM R JOIN S
as an expression tree. ON R.id = S.id
WHERE S.value > 100
The nodes in the tree represent
different expression types: AND
→ Comparisons (=, <, >, !=)
→ Conjunction (AND), Disjunction (OR)
→ Arithmetic Operators (+, -, *, /, %) = >
→ Constant Values
→ Tuple Attribute References
Attribute(R.id) Attribute(S.id) Attribute(value) Constant(100)
E X P R E S S I O N E VA L U AT I O N
SELECT * FROM S
WHERE B.value = ? + 1
E X P R E S S I O N E VA L U AT I O N
Execution Context
SELECT * FROM S Current Tuple Query Parameters Table Schema
WHERE B.value = ? + 1 (123, 1000) (int:999) S→(int:id, int:value)
Attribute(S.value) +
Parameter(0) Constant(1)
E X P R E S S I O N E VA L U AT I O N
Execution Context
SELECT * FROM S Current Tuple Query Parameters Table Schema
WHERE B.value = ? + 1 (123, 1000) (int:999) S→(int:id, int:value)
Attribute(S.value) +
Parameter(0) Constant(1)
E X P R E S S I O N E VA L U AT I O N
Execution Context
SELECT * FROM S Current Tuple Query Parameters Table Schema
WHERE B.value = ? + 1 (123, 1000) (int:999) S→(int:id, int:value)
Attribute(S.value) +
1000
Parameter(0) Constant(1)
E X P R E S S I O N E VA L U AT I O N
Execution Context
SELECT * FROM S Current Tuple Query Parameters Table Schema
WHERE B.value = ? + 1 (123, 1000) (int:999) S→(int:id, int:value)
Attribute(S.value) +
1000
Parameter(0) Constant(1)
999
CMU 15-445/645 (Fall 2019)
29
E X P R E S S I O N E VA L U AT I O N
Execution Context
SELECT * FROM S Current Tuple Query Parameters Table Schema
WHERE B.value = ? + 1 (123, 1000) (int:999) S→(int:id, int:value)
Attribute(S.value) +
1000
Parameter(0) Constant(1)
999 1
CMU 15-445/645 (Fall 2019)
29
E X P R E S S I O N E VA L U AT I O N
Execution Context
SELECT * FROM S Current Tuple Query Parameters Table Schema
WHERE B.value = ? + 1 (123, 1000) (int:999) S→(int:id, int:value)
Attribute(S.value) +
1000 1000
Parameter(0) Constant(1)
999 1
CMU 15-445/645 (Fall 2019)
29
E X P R E S S I O N E VA L U AT I O N
Execution Context
SELECT * FROM S Current Tuple Query Parameters Table Schema
WHERE B.value = ? + 1 (123, 1000) (int:999) S→(int:id, int:value)
=
true
Attribute(S.value) +
1000 1000
Parameter(0) Constant(1)
999 1
CMU 15-445/645 (Fall 2019)
30
E X P R E S S I O N E VA L U AT I O N
CONCLUSION
NEXT CLASS