MYSQL - Notes - Colab

Download as pdf or txt
Download as pdf or txt
You are on page 1of 10

04/08/2024, 22:06 MYSQL_Notes - Colab

Notes the below two code shows how to create database and upload it in colab
keyboard_arrow_down
[ ] ↳ 6 cells hidden

keyboard_arrow_down SQL_Queries
# import sqlite3

# def query_database(query):
# db_path = '/content/airbnb.db'
# conn = sqlite3.connect(db_path)
# cursor = conn.cursor()
# cursor.execute(query)
# results = cursor.fetchall()
# cursor.close()
# conn.close()
# return results

# for handling edge case as well

def query_database(query):
db_path = '/content/airbnb.db'
try:
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
cursor.execute(query)
results = cursor.fetchall()
except sqlite3.Error as e:
print(f"An error occurred: {e}")
results = []
finally:
cursor.close()
conn.close()
return results

# Retrieve all records from the airbnb_listings table


all_listings = query_database("SELECT * FROM airbnb_listings")
for listing in all_listings:
print(listing)

(1, 'Paris', 'France', 5, 2018)


(2, 'Tokyo', 'Japan', 2, 2017)
(3, 'New York', 'USA', 2, 2022)

# Find listings in Paris


paris_listings = query_database("SELECT * FROM airbnb_listings WHERE city = 'Paris'")
for listing in paris_listings:
print(listing)

(1, 'Paris', 'France', 5, 2018)

import sqlite3
import pandas as pd

def load_data_to_df(query):
db_path = '/content/airbnb.db'
conn = sqlite3.connect(db_path)
df = pd.read_sql_query(query, conn)
conn.close()
return df

# Load and display data from airbnb_listings


df = load_data_to_df("SELECT * FROM airbnb_listings")
print("Data in airbnb_listings:")
display(df) # In a notebook environment, use display() to show DataFrame

Data in airbnb_listings:
id city country number_of_rooms year_listed

0 1 Paris France 5 2018

1 2 Tokyo Japan 2 2017

2 3 New York USA 2 2022

https://colab.research.google.com/drive/1P3yDvKI2XlFPwQ5y5bbUAVLqBkdAgCfM#scrollTo=gKITIhybsOHL&printMode=true 1/10
04/08/2024, 22:06 MYSQL_Notes - Colab
result = query_database("SELECT id,city FROM airbnb_listings ORDER BY number_of_rooms ASC ")
for listing in result:
print(listing)

(2, 'Tokyo')
(3, 'New York')
(1, 'Paris')

all_listings = query_database("SELECT id, city FROM airbnb_listings ORDER BY number_of_rooms DESC")


for listing in all_listings:
print(listing)

(1, 'Paris')
(2, 'Tokyo')
(3, 'New York')

all_listings = query_database("SELECT * FROM airbnb_listings LIMIT 5;")


for listing in all_listings:
print(listing)

(1, 'Paris', 'France', 5, 2018)


(2, 'Tokyo', 'Japan', 2, 2017)
(3, 'New York', 'USA', 2, 2022)

all_listings = query_database("SELECT DISTINCT city FROM airbnb_listings ")


for listing in all_listings:
print(listing)

('Paris',)
('Tokyo',)
('New York',)

keyboard_arrow_down Filtering_DATA
all_listings = query_database("SELECT * FROM airbnb_listings WHERE number_of_rooms >= 3;")
for listing in all_listings:
print(listing)

(1, 'Paris', 'France', 5, 2018)

all_listings = query_database(" SELECT * FROM airbnb_listings WHERE number_of_rooms BETWEEN 3 AND 6")
for listing in all_listings:
print(listing)

(1, 'Paris', 'France', 5, 2018)

all_listings = query_database("SELECT country, SUM(number_of_rooms) FROM airbnb_listings GROUP BY country;")


for listing in all_listings:
print(listing)

('France', 5)
('Japan', 2)
('USA', 2)

all_listings = query_database("SELECT country, AVG (number_of_rooms)FROM airbnb_listings GROUP BY country;")


for listing in all_listings:
print(listing)

('France', 5.0)
('Japan', 2.0)
('USA', 2.0)

all_listings = query_database("SELECT country, MAX(number_of_rooms) FROM airbnb_listings GROUP BY country;")


for listing in all_listings:
print(listing)

('France', 5)
('Japan', 2)
('USA', 2)

all_listings = query_database("SELECT country, MIN(number_of_rooms) FROM airbnb_listings GROUP BY country;")


for listing in all_listings:
print(listing)

https://colab.research.google.com/drive/1P3yDvKI2XlFPwQ5y5bbUAVLqBkdAgCfM#scrollTo=gKITIhybsOHL&printMode=true 2/10
04/08/2024, 22:06 MYSQL_Notes - Colab
('France', 5)
('Japan', 2)
('USA', 2)

all_listings = query_database("SELECT country, AVG(number_of_rooms) AS avg_rooms FROM airbnb_listings GROUP BY country ORDER
for listing in all_listings:
print(listing)

('Japan', 2.0)
('USA', 2.0)
('France', 5.0)

all_listings = query_database("SELECT country, MAX(number_of_rooms) FROM airbnb_listings WHERE country IN ('USA', 'Japan') G
for listing in all_listings:
print(listing)

('Japan', 2)
('USA', 2)

all_listings = query_database("SELECT country, COUNT(city) AS number_of_cities FROM airbnb_listings GROUP BY country;")


for listing in all_listings:
print(listing)

('France', 1)
('Japan', 1)
('USA', 1)

all_listings = query_database("SELECT year_listed FROM airbnb_listings GROUP BY year_listed HAVING COUNT (id) > 100;")
for listing in all_listings:
print(listing)

all_listings = query_database("SELECT * FROM airbnb_listings WHERE city = 'Paris';")


for listing in all_listings:
print(listing)

(1, 'Paris', 'France', 5, 2018)

# 8. Get the listings based in the 'USA' and in 'France'


all_listings = query_database("SELECT * FROM airbnb_listings WHERE country IN ('USA', 'France');")
for listing in all_listings:
print(listing)

(1, 'Paris', 'France', 5, 2018)


(3, 'New York', 'USA', 2, 2022)

# 9. Get all the listings where the city starts with 'j' and where the city does not end in 't'
all_listings = query_database("SELECT * FROM airbnb_listings WHERE city LIKE 'j%' AND city NOT LIKE '%t';")
for listing in all_listings:
print(listing)

# 10. Get all the listings in 'Paris' where number_of_rooms is bigger than 3
all_listings = query_database("SELECT * FROM airbnb_listings WHERE city = 'Paris' AND number_of_rooms > 3;")
for listing in all_listings:
print(listing)

(1, 'Paris', 'France', 5, 2018)

# 11. Get all the listings in 'Paris' OR the ones that were listed after 2012
all_listings = query_database("SELECT * FROM airbnb_listings WHERE city = 'Paris' OR year_listed > 2012;")
for listing in all_listings:
print(listing)

(1, 'Paris', 'France', 5, 2018)


(2, 'Tokyo', 'Japan', 2, 2017)
(3, 'New York', 'USA', 2, 2022)

# 12. Return the listings where number_of_rooms is missing


all_listings = query_database("SELECT * FROM airbnb_listings WHERE number_of_rooms IS NULL;")
for listing in all_listings:
print(listing)

https://colab.research.google.com/drive/1P3yDvKI2XlFPwQ5y5bbUAVLqBkdAgCfM#scrollTo=gKITIhybsOHL&printMode=true 3/10
04/08/2024, 22:06 MYSQL_Notes - Colab
# 13. Return the listings where number_of_rooms is not missing
all_listings = query_database("SELECT * FROM airbnb_listings WHERE number_of_rooms IS NOT NULL;")
for listing in all_listings:
print(listing)

(1, 'Paris', 'France', 5, 2018)


(2, 'Tokyo', 'Japan', 2, 2017)
(3, 'New York', 'USA', 2, 2022)

keyboard_arrow_down Aggregating_Data
# 1. Get the total number of rooms available across all listings
all_listings = query_database("SELECT SUM(number_of_rooms) FROM airbnb_listings;")
for listing in all_listings:
print(listing)

(9,)

# 2. Get the average number of rooms per listing across all listings
all_listings = query_database("SELECT AVG (number_of_rooms) FROM airbnb_listings;")
for listing in all_listings:
print(listing)

(3.0,)

# 3. Get the listing with the highest number of rooms across all listings
all_listings = query_database("SELECT MAX(number_of_rooms) FROM airbnb_listings;")
for listing in all_listings:
print(listing)

(5,)

# 4. Get the listing with the lowest number of rooms across all listings
all_listings = query_database("SELECT MIN(number_of_rooms) FROM airbnb_listings;")
for listing in all_listings:
print(listing)

(2,)

keyboard_arrow_down New Database

https://colab.research.google.com/drive/1P3yDvKI2XlFPwQ5y5bbUAVLqBkdAgCfM#scrollTo=gKITIhybsOHL&printMode=true 4/10
04/08/2024, 22:06 MYSQL_Notes - Colab
import sqlite3

# Define the path to your SQLite database file


db_path = '/content/music.db'

# Connect to the SQLite database


conn = sqlite3.connect(db_path)
cursor = conn.cursor()

# Create the 'Artist' and 'Album' tables again


cursor.execute('''
CREATE TABLE IF NOT EXISTS Artist (
artist_id INTEGER PRIMARY KEY,
name TEXT
)
''')

cursor.execute('''
CREATE TABLE IF NOT EXISTS Album (
album_id INTEGER PRIMARY KEY,
title TEXT,
artist_id INTEGER,
FOREIGN KEY (artist_id) REFERENCES Artist (artist_id)
)
''')

# Clear any existing data


cursor.execute('DELETE FROM Artist')
cursor.execute('DELETE FROM Album')

# Insert data into the 'Artist' table


artists = [
(123, 'AC/DC'),
(124, 'Aerosmith'),
(125, 'Alanis Morissette')
]
cursor.executemany('''
INSERT INTO Artist (artist_id, name) VALUES (?, ?)
''', artists)

# Insert data into the 'Album' table


albums = [
(12345, 'For Those Who Rock', 123),
(12346, 'Dream On', 124),
(12347, 'Restless and Wild', 125),
(12348, 'Let There Be Rock', 123),
(12349, 'Rumours', 124)
]
cursor.executemany('''
INSERT INTO Album (album_id, title, artist_id) VALUES (?, ?, ?)
''', albums)

# Commit changes and close the connection


conn.commit()
cursor.close()
conn.close()

from google.colab import files


files.download('music.db')

from google.colab import files


uploaded = files.upload()

Choose files music.db


music.db(n/a) - 12288 bytes, last modified: 04/08/2024 - 100% done
Saving music.db to music (1).db

import os

# Check if the file exists and its size


if os.path.isfile('music.db'):
print("File uploaded successfully.")
print("File size:", os.path.getsize('music.db'), "bytes")
else:
print("File not found.")

File uploaded successfully.


File size: 12288 bytes

https://colab.research.google.com/drive/1P3yDvKI2XlFPwQ5y5bbUAVLqBkdAgCfM#scrollTo=gKITIhybsOHL&printMode=true 5/10
04/08/2024, 22:06 MYSQL_Notes - Colab

def query_database(query):
db_path = '/content/music.db'
try:
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
cursor.execute(query)
results = cursor.fetchall()
except sqlite3.Error as e:
print(f"An error occurred: {e}")
results = []
finally:
cursor.close()
conn.close()
return results

# verifying whether data exist or not

import sqlite3

def query_database(query):
db_path = '/content/music.db'
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
cursor.execute(query)
results = cursor.fetchall()
cursor.close()
conn.close()
return results

# Query to select all data from the Album table


all_listings = query_database("SELECT * FROM Album")

print("Data in Album table:")


for listing in all_listings:
print(listing)

Data in Album table:


(12345, 'For Those Who Rock', 123)
(12346, 'Dream On', 124)
(12347, 'Restless and Wild', 125)
(12348, 'Let There Be Rock', 123)
(12349, 'Rumours', 124)

import sqlite3
import pandas as pd

def load_data_to_df(query):
db_path = '/content/music.db'
conn = sqlite3.connect(db_path)
df = pd.read_sql_query(query, conn)
conn.close()
return df

# Load and display data from Table


df = load_data_to_df("SELECT * FROM Album")
print("Data in Album:")
display(df) # In a notebook environment, use display() to show DataFrame

# Load and display data from Table


df = load_data_to_df("SELECT * FROM Artist")
print("Data in Artist:")
display(df) # In a notebook environment, use display() to show DataFrame

https://colab.research.google.com/drive/1P3yDvKI2XlFPwQ5y5bbUAVLqBkdAgCfM#scrollTo=gKITIhybsOHL&printMode=true 6/10
04/08/2024, 22:06 MYSQL_Notes - Colab
Data in Album:
album_id title artist_id

0 12345 For Those Who Rock 123

1 12346 Dream On 124

2 12347 Restless and Wild 125

3 12348 Let There Be Rock 123

4 12349 Rumours 124


Data in Artist:
artist_id name

0 123 AC/DC

1 124 Aerosmith

2 125 Alanis Morissette

toggle_off toggle_off
View View
Next Generate New interactive Generate New interactive
df recommended df recommended
steps: code with sheet code with sheet
l t l t

keyboard_arrow_down Join Operations


# A left join keeps all of the original records in the left table and
# returns missing values for any columns from the right table where
# the joining field did not find a match.

df = load_data_to_df(" SELECT * FROM artist AS art LEFT JOIN album AS alb ON art.artist_id = alb.artist_id;")
display(df)

artist_id name album_id title artist_id

0 123 AC/DC 12345 For Those Who Rock 123

1 123 AC/DC 12348 Let There Be Rock 123

2 124 Aerosmith 12346 Dream On 124

3 124 Aerosmith 12349 Rumours 124

4 125 Alanis Morissette 12347 Restless and Wild 125

Next steps: Generate code with df toggle_off View recommended plots New interactive sheet

# INNER JOIN with USING

df = load_data_to_df("SELECT * FROM artist AS art INNER JOIN album AS alb USING (artist_id);")
display(df)

artist_id name album_id title

0 123 AC/DC 12345 For Those Who Rock

1 124 Aerosmith 12346 Dream On

2 125 Alanis Morissette 12347 Restless and Wild

3 123 AC/DC 12348 Let There Be Rock

4 124 Aerosmith 12349 Rumours

Next steps: Generate code with df


toggle_off View recommended plots New interactive sheet

# An inner join between two tables will return only records where a
# joining field, such as a key, finds a match in both tables.

# INNER JOIN join ON one field

df = load_data_to_df("SELECT * FROM artist AS art INNER JOIN album AS alb ON art.artist_id = alb.artist_id")
display(df)

https://colab.research.google.com/drive/1P3yDvKI2XlFPwQ5y5bbUAVLqBkdAgCfM#scrollTo=gKITIhybsOHL&printMode=true 7/10
04/08/2024, 22:06 MYSQL_Notes - Colab

artist_id name album_id title artist_id

0 123 AC/DC 12345 For Those Who Rock 123

1 124 Aerosmith 12346 Dream On 124

2 125 Alanis Morissette 12347 Restless and Wild 125

3 123 AC/DC 12348 Let There Be Rock 123

4 124 Aerosmith 12349 Rumours 124

Next steps: Generate code with df


toggle_off View recommended plots New interactive sheet

# A left join keeps all of the original records in the left table
# and returns missing values for any columns from the right table
# where the joining field did not find a match.

df = load_data_to_df("SELECT * FROM artist AS art LEFT JOIN album AS alb ON art.artist_id = alb.artist_id;")
display(df)

artist_id name album_id title artist_id

0 123 AC/DC 12345 For Those Who Rock 123

1 123 AC/DC 12348 Let There Be Rock 123

2 124 Aerosmith 12346 Dream On 124

3 124 Aerosmith 12349 Rumours 124

4 125 Alanis Morissette 12347 Restless and Wild 125

Next steps: Generate code with df


toggle_off View recommended plots New interactive sheet

# Self-joins are used to compare values in a table to other


# values of the same table by joining different parts of a table together.

df = load_data_to_df("SELECT alb1.artist_id, alb1.title AS albi_title, alb2.title AS alb2_title FROM album AS alb1 INNER JO
display(df)

artist_id albi_title alb2_title

0 123 For Those Who Rock Let There Be Rock

1 124 Dream On Rumours

2 123 Let There Be Rock For Those Who Rock

3 124 Rumours Dream On

Next steps: Generate code with df


toggle_off View recommended plots New interactive sheet

# A right join keeps all of the original records in the right table
# and returns missing values for any columns from the left table
# where the joining field did not find a match. Right joins are far
# less common than left joins, because right joins can always be
# re-written as left joins.

# RIGHT JOIN on one field

df = load_data_to_df("SELECT * FROM artist as art RIGHT JOIN album AS alb ON art.artist_id = alb.artist_id;")
display(df)

# Note Right join is not supported by sqlite which we are currently using

# A full join combines a left join and right join. A full join
# will return all records from a table, irrespective of whether
# there is a match on the joining field in the other table,
# returning null values accordingly.
# FULL JOIN on one field

df = load_data_to_df(" SELECT * FROM artist as art FULL OUTER JOIN album AS alb ON art.artist_id = alb.artist_id; ")
display(df)

# Note Full outer join is not supported by sqlite in colab

https://colab.research.google.com/drive/1P3yDvKI2XlFPwQ5y5bbUAVLqBkdAgCfM#scrollTo=gKITIhybsOHL&printMode=true 8/10
04/08/2024, 22:06 MYSQL_Notes - Colab
# The UNION operator is used to vertically combine the results of two
# SELECT statements. For UNION to work without errors, all SELECT
# statements must have the same number of columns and corresponding
# columns must have the same data type. UNION does not return duplicates.

df = load_data_to_df(" SELECT artist_id FROM artist UNION SELECT artist_id FROM album; ")
display(df)

artist_id

0 123

1 124

2 125

Next steps: Generate code with df


toggle_off View recommended plots New interactive sheet

# The anti join chooses records in the first table where a condition is
# NOT met in the second table. It makes use of a WHERE clause to use
# exclude values from the second table.

df = load_data_to_df("SELECT * FROM album WHERE artist_id NOT IN (SELECT artist_id FROM artist);")
display(df)

album_id title artist_id

# CROSS JOIN creates all possible combinations of two tables. CROSS JOIN
# does not require a field to join ON.

df = load_data_to_df("SELECT name, title FROM artist CROSS JOIN album;")


display(df)

name title

0 AC/DC For Those Who Rock

1 AC/DC Dream On

2 AC/DC Restless and Wild

3 AC/DC Let There Be Rock

4 AC/DC Rumours

5 Aerosmith For Those Who Rock

6 Aerosmith Dream On

7 Aerosmith Restless and Wild

8 Aerosmith Let There Be Rock

9 Aerosmith Rumours

10 Alanis Morissette For Those Who Rock

11 Alanis Morissette Dream On

12 Alanis Morissette Restless and Wild

13 Alanis Morissette Let There Be Rock

14 Alanis Morissette Rumours

Next steps: Generate code with df


toggle_off View recommended plots New interactive sheet

# The UNION ALL operator works just like UNION, but it returns
# duplicate values. The same restrictions of UNION hold true for UNION ALL

df = load_data_to_df("SELECT artist_id FROM artist UNION ALL SELECT artist_id FROM album;")
display(df)

https://colab.research.google.com/drive/1P3yDvKI2XlFPwQ5y5bbUAVLqBkdAgCfM#scrollTo=gKITIhybsOHL&printMode=true 9/10
04/08/2024, 22:06 MYSQL_Notes - Colab

artist_id

0 123

1 124

toggle_off
2 125
Next steps:
3 Generate
123 code with df View recommended plots New interactive sheet

4 124
# The INTERSECT operator returns only identical rows from two tables.
5 125

https://colab.research.google.com/drive/1P3yDvKI2XlFPwQ5y5bbUAVLqBkdAgCfM#scrollTo=gKITIhybsOHL&printMode=true 10/10

You might also like