Tips For SQL Optimization ?

Download as pdf or txt
Download as pdf or txt
You are on page 1of 10

SQL

Optimization
#1
Use ‘regexp_like’ to replace ‘LIKE’ clauses

SELECT * SELECT *
FROM FROM
table1 table1
WHERE WHERE
lower(item_name) LIKE'%samsung%'OR REGEXP_LIKE(lower(item_name),
lower(item_name) LIKE'%xiaomi%'OR 'samsung|xiaomi|iphone|huawei')
lower(item_name) LIKE'%iphone%'OR
lower(item_name) LIKE'%huawei%'
--and soon
#2
Use ‘regexp_extract’ to replace ‘Case-when Like’

SELECT
CASE
WHEN concat(' ',item_name,' ') LIKE'%acer%'then 'Acer'
WHEN concat(' ',item_name,' ') LIKE'%advance%'then 'Advance'
WHEN concat(' ',item_name,' ') LIKE'%alfalink%'then 'Alfalink'

AS brand
FROM item_list

SELECT
regexp_extract(item_name,'(asus|lenovo|hp|acer|dell|zyrex|...)')
AS brand
FROM item_list
#3
Convert long list of IN clause into a temporary table

SELECT * SELECT *
FROMTable1 ast1 FROMTable1 ast1
WHERE JOIN (
itemidin(3363134, SELECT
5189076, …, 4062349) itemid
FROM (
SELECT
split('3363134, 5189076, …,', ', ')
as bar
)
CROSS JOIN
UNNEST(bar) ASt(itemid)
) ASTable2 as t2
ON
t1.itemid = t2.itemid
#4
Always order your JOINs from largest tables to
smallest tables

SELECT SELECT
* *
FROM FROM
small_table large_table
JOIN JOIN
large_table small_table
ON small_table.id = large_table.id ON small_table.id = large_table.id
#5
Use simple equi-joins
Two tables with date string e.g., ‘2020-09-01’, but one of the tables only has columns for year,
month, day values

SELECT * SELECT *
FROM FROM
table1 a table1 a
JOIN JOIN (
table2 b select
ONa.date=CONCAT(b.year, '-', name, CONCAT(b.year, '-', b.month, '-', b.day) as date
b.month, '-', b.day) from
table2 b
) new
ON a.date = new.date
#6
Always "GROUP BY" by the attribute/column with the
largest number of unique entities/values

select select
main_category, main_category,
sub_category, sub_category,
itemid, itemid,
sum(price) sum(price)
from from
table1 table1
group by group by
main_category, sub_category, itemid itemid, sub_category, main_category
#7
Avoid subqueries in WHERE clause

select with t2 as(


sum(price) select itemid
from fromtable2
table1 )
where
itemid in ( select
select itemid sum(price)
fromtable2 from
) table1 as t1
join
t2
on t1.itemid = t2.itemid
#8
Use Max instead of Rank

SELECT * SELECT userid,


from ( max(prdate) from table1
select groupby 1
userid,
rank() over (order by prdatedesc) asrank
from table1
)
whereranking =1
#9
Other Tips

● Use approx_distinct() instead of count(distinct) for very


large datasets
● Use approx_percentile(metric, 0.5) for median
● Avoid UNIONs where possible
● Use WITH statements vs. nested subqueries

You might also like