MYSQL - Notes - Colab

04/08/2024, 22:06 MYSQL_Notes - Colab
Notes the below two code shows how to create database and upload it in colab
keyboard_arrow_down
[ ] ↳ 6 cells hidden
keyboard_arrow_down SQL_Queries
# import sqlite3
# def query_database(query):
# db_path = '/content/airbnb.db'
# conn = sqlite3.connect(db_path)
# cursor = conn.cursor()
# cursor.execute(query)
# results = cursor.fetchall()
# cursor.close()
# conn.close()
# return results
# for handling edge case as well
def query_database(query):
db_path = '/content/airbnb.db'
try:
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
cursor.execute(query)
results = cursor.fetchall()
except sqlite3.Error as e:
print(f"An error occurred: {e}")
results = []
finally:
cursor.close()
conn.close()
return results
# Retrieve all records from the airbnb_listings table

all_listings = query_database("SELECT * FROM airbnb_listings")
for listing in all_listings:
print(listing)
(1, 'Paris', 'France', 5, 2018)

(2, 'Tokyo', 'Japan', 2, 2017)
(3, 'New York', 'USA', 2, 2022)
# Find listings in Paris

paris_listings = query_database("SELECT * FROM airbnb_listings WHERE city = 'Paris'")
for listing in paris_listings:
print(listing)
(1, 'Paris', 'France', 5, 2018)
import sqlite3
import pandas as pd
def load_data_to_df(query):
db_path = '/content/airbnb.db'
df = pd.read_sql_query(query, conn)
conn.close()
return df
# Load and display data from airbnb_listings

df = load_data_to_df("SELECT * FROM airbnb_listings")
print("Data in airbnb_listings:")
display(df) # In a notebook environment, use display() to show DataFrame
Data in airbnb_listings:
id city country number_of_rooms year_listed
0 1 Paris France 5 2018
1 2 Tokyo Japan 2 2017
2 3 New York USA 2 2022
https://colab.research.google.com/drive/1P3yDvKI2XlFPwQ5y5bbUAVLqBkdAgCfM#scrollTo=gKITIhybsOHL&printMode=true 1/10
result = query_database("SELECT id,city FROM airbnb_listings ORDER BY number_of_rooms ASC ")
for listing in result:
print(listing)
(2, 'Tokyo')
(3, 'New York')
(1, 'Paris')
all_listings = query_database("SELECT id, city FROM airbnb_listings ORDER BY number_of_rooms DESC")

print(listing)
(1, 'Paris')
(2, 'Tokyo')
(3, 'New York')
all_listings = query_database("SELECT * FROM airbnb_listings LIMIT 5;")

print(listing)
(1, 'Paris', 'France', 5, 2018)

(2, 'Tokyo', 'Japan', 2, 2017)
(3, 'New York', 'USA', 2, 2022)
all_listings = query_database("SELECT DISTINCT city FROM airbnb_listings ")

print(listing)
('Paris',)
('Tokyo',)
('New York',)
keyboard_arrow_down Filtering_DATA
all_listings = query_database("SELECT * FROM airbnb_listings WHERE number_of_rooms >= 3;")
print(listing)
(1, 'Paris', 'France', 5, 2018)
all_listings = query_database(" SELECT * FROM airbnb_listings WHERE number_of_rooms BETWEEN 3 AND 6")
print(listing)
(1, 'Paris', 'France', 5, 2018)
all_listings = query_database("SELECT country, SUM(number_of_rooms) FROM airbnb_listings GROUP BY country;")

print(listing)
('France', 5)
('Japan', 2)
('USA', 2)
all_listings = query_database("SELECT country, AVG (number_of_rooms)FROM airbnb_listings GROUP BY country;")

print(listing)
('France', 5.0)
('Japan', 2.0)
('USA', 2.0)
all_listings = query_database("SELECT country, MAX(number_of_rooms) FROM airbnb_listings GROUP BY country;")

print(listing)
('France', 5)
('Japan', 2)
('USA', 2)
all_listings = query_database("SELECT country, MIN(number_of_rooms) FROM airbnb_listings GROUP BY country;")

print(listing)
('France', 5)
('Japan', 2)
('USA', 2)
all_listings = query_database("SELECT country, AVG(number_of_rooms) AS avg_rooms FROM airbnb_listings GROUP BY country ORDER
print(listing)
('Japan', 2.0)
('USA', 2.0)
('France', 5.0)
all_listings = query_database("SELECT country, MAX(number_of_rooms) FROM airbnb_listings WHERE country IN ('USA', 'Japan') G
print(listing)
('Japan', 2)
('USA', 2)
all_listings = query_database("SELECT country, COUNT(city) AS number_of_cities FROM airbnb_listings GROUP BY country;")

print(listing)
('France', 1)
('Japan', 1)
('USA', 1)
all_listings = query_database("SELECT year_listed FROM airbnb_listings GROUP BY year_listed HAVING COUNT (id) > 100;")
print(listing)
all_listings = query_database("SELECT * FROM airbnb_listings WHERE city = 'Paris';")

print(listing)
(1, 'Paris', 'France', 5, 2018)
# 8. Get the listings based in the 'USA' and in 'France'

all_listings = query_database("SELECT * FROM airbnb_listings WHERE country IN ('USA', 'France');")
print(listing)
(1, 'Paris', 'France', 5, 2018)

(3, 'New York', 'USA', 2, 2022)
# 9. Get all the listings where the city starts with 'j' and where the city does not end in 't'
all_listings = query_database("SELECT * FROM airbnb_listings WHERE city LIKE 'j%' AND city NOT LIKE '%t';")
print(listing)
# 10. Get all the listings in 'Paris' where number_of_rooms is bigger than 3
all_listings = query_database("SELECT * FROM airbnb_listings WHERE city = 'Paris' AND number_of_rooms > 3;")
print(listing)
(1, 'Paris', 'France', 5, 2018)
# 11. Get all the listings in 'Paris' OR the ones that were listed after 2012
all_listings = query_database("SELECT * FROM airbnb_listings WHERE city = 'Paris' OR year_listed > 2012;")
print(listing)
(1, 'Paris', 'France', 5, 2018)

(2, 'Tokyo', 'Japan', 2, 2017)
(3, 'New York', 'USA', 2, 2022)
# 12. Return the listings where number_of_rooms is missing

all_listings = query_database("SELECT * FROM airbnb_listings WHERE number_of_rooms IS NULL;")
print(listing)
# 13. Return the listings where number_of_rooms is not missing
all_listings = query_database("SELECT * FROM airbnb_listings WHERE number_of_rooms IS NOT NULL;")
print(listing)
(1, 'Paris', 'France', 5, 2018)

(2, 'Tokyo', 'Japan', 2, 2017)
(3, 'New York', 'USA', 2, 2022)
keyboard_arrow_down Aggregating_Data
# 1. Get the total number of rooms available across all listings
all_listings = query_database("SELECT SUM(number_of_rooms) FROM airbnb_listings;")
print(listing)
(9,)
# 2. Get the average number of rooms per listing across all listings
all_listings = query_database("SELECT AVG (number_of_rooms) FROM airbnb_listings;")
print(listing)
(3.0,)
# 3. Get the listing with the highest number of rooms across all listings
all_listings = query_database("SELECT MAX(number_of_rooms) FROM airbnb_listings;")
print(listing)
(5,)
# 4. Get the listing with the lowest number of rooms across all listings
all_listings = query_database("SELECT MIN(number_of_rooms) FROM airbnb_listings;")
print(listing)
(2,)
keyboard_arrow_down New Database
import sqlite3
# Define the path to your SQLite database file

db_path = '/content/music.db'
# Connect to the SQLite database

# Create the 'Artist' and 'Album' tables again

cursor.execute('''
CREATE TABLE IF NOT EXISTS Artist (
artist_id INTEGER PRIMARY KEY,
name TEXT
)
''')
cursor.execute('''
CREATE TABLE IF NOT EXISTS Album (
album_id INTEGER PRIMARY KEY,
title TEXT,
artist_id INTEGER,
FOREIGN KEY (artist_id) REFERENCES Artist (artist_id)
)
''')
# Clear any existing data

cursor.execute('DELETE FROM Artist')
cursor.execute('DELETE FROM Album')
# Insert data into the 'Artist' table

artists = [
(123, 'AC/DC'),
(124, 'Aerosmith'),
(125, 'Alanis Morissette')
]
cursor.executemany('''
INSERT INTO Artist (artist_id, name) VALUES (?, ?)
''', artists)
# Insert data into the 'Album' table

albums = [
(12345, 'For Those Who Rock', 123),
(12346, 'Dream On', 124),
(12347, 'Restless and Wild', 125),
(12348, 'Let There Be Rock', 123),
(12349, 'Rumours', 124)
]
cursor.executemany('''
INSERT INTO Album (album_id, title, artist_id) VALUES (?, ?, ?)
''', albums)
# Commit changes and close the connection

conn.commit()
cursor.close()
conn.close()
from google.colab import files

files.download('music.db')
from google.colab import files

uploaded = files.upload()
Choose files music.db

music.db(n/a) - 12288 bytes, last modified: 04/08/2024 - 100% done
Saving music.db to music (1).db
import os
# Check if the file exists and its size

if os.path.isfile('music.db'):
print("File uploaded successfully.")
print("File size:", os.path.getsize('music.db'), "bytes")
else:
print("File not found.")
File uploaded successfully.

File size: 12288 bytes
try:
except sqlite3.Error as e:
print(f"An error occurred: {e}")
results = []
finally:
cursor.close()
conn.close()
return results
# verifying whether data exist or not
import sqlite3
cursor.close()
conn.close()
return results
# Query to select all data from the Album table

all_listings = query_database("SELECT * FROM Album")
print("Data in Album table:")

print(listing)
Data in Album table:

(12345, 'For Those Who Rock', 123)
(12346, 'Dream On', 124)
(12347, 'Restless and Wild', 125)
(12348, 'Let There Be Rock', 123)
(12349, 'Rumours', 124)
import sqlite3
import pandas as pd
def load_data_to_df(query):
df = pd.read_sql_query(query, conn)
conn.close()
return df
# Load and display data from Table

df = load_data_to_df("SELECT * FROM Album")
print("Data in Album:")
# Load and display data from Table

df = load_data_to_df("SELECT * FROM Artist")
print("Data in Artist:")
Data in Album:
album_id title artist_id
0 12345 For Those Who Rock 123
1 12346 Dream On 124
2 12347 Restless and Wild 125
3 12348 Let There Be Rock 123
4 12349 Rumours 124

Data in Artist:
artist_id name
0 123 AC/DC
1 124 Aerosmith
2 125 Alanis Morissette
toggle_off toggle_off
View View
Next Generate New interactive Generate New interactive
df recommended df recommended
steps: code with sheet code with sheet
l t l t
keyboard_arrow_down Join Operations

# A left join keeps all of the original records in the left table and
# returns missing values for any columns from the right table where
# the joining field did not find a match.
df = load_data_to_df(" SELECT * FROM artist AS art LEFT JOIN album AS alb ON art.artist_id = alb.artist_id;")
display(df)
artist_id name album_id title artist_id
0 123 AC/DC 12345 For Those Who Rock 123
1 123 AC/DC 12348 Let There Be Rock 123
2 124 Aerosmith 12346 Dream On 124
3 124 Aerosmith 12349 Rumours 124
4 125 Alanis Morissette 12347 Restless and Wild 125
Next steps: Generate code with df toggle_off View recommended plots New interactive sheet
# INNER JOIN with USING
df = load_data_to_df("SELECT * FROM artist AS art INNER JOIN album AS alb USING (artist_id);")
display(df)
artist_id name album_id title
0 123 AC/DC 12345 For Those Who Rock
1 124 Aerosmith 12346 Dream On
2 125 Alanis Morissette 12347 Restless and Wild
3 123 AC/DC 12348 Let There Be Rock
4 124 Aerosmith 12349 Rumours
Next steps: Generate code with df

toggle_off View recommended plots New interactive sheet
# An inner join between two tables will return only records where a
# joining field, such as a key, finds a match in both tables.
# INNER JOIN join ON one field
df = load_data_to_df("SELECT * FROM artist AS art INNER JOIN album AS alb ON art.artist_id = alb.artist_id")
display(df)

# A left join keeps all of the original records in the left table
# and returns missing values for any columns from the right table
# where the joining field did not find a match.
df = load_data_to_df("SELECT * FROM artist AS art LEFT JOIN album AS alb ON art.artist_id = alb.artist_id;")
display(df)

# Self-joins are used to compare values in a table to other

# values of the same table by joining different parts of a table together.
df = load_data_to_df("SELECT alb1.artist_id, alb1.title AS albi_title, alb2.title AS alb2_title FROM album AS alb1 INNER JO
display(df)
artist_id albi_title alb2_title
0 123 For Those Who Rock Let There Be Rock
1 124 Dream On Rumours
2 123 Let There Be Rock For Those Who Rock
3 124 Rumours Dream On

# A right join keeps all of the original records in the right table
# and returns missing values for any columns from the left table
# where the joining field did not find a match. Right joins are far
# less common than left joins, because right joins can always be
# re-written as left joins.
# RIGHT JOIN on one field
df = load_data_to_df("SELECT * FROM artist as art RIGHT JOIN album AS alb ON art.artist_id = alb.artist_id;")
display(df)
# Note Right join is not supported by sqlite which we are currently using
# A full join combines a left join and right join. A full join
# will return all records from a table, irrespective of whether
# there is a match on the joining field in the other table,
# returning null values accordingly.
# FULL JOIN on one field
df = load_data_to_df(" SELECT * FROM artist as art FULL OUTER JOIN album AS alb ON art.artist_id = alb.artist_id; ")
display(df)
# Note Full outer join is not supported by sqlite in colab
# The UNION operator is used to vertically combine the results of two
# SELECT statements. For UNION to work without errors, all SELECT
# statements must have the same number of columns and corresponding
# columns must have the same data type. UNION does not return duplicates.
df = load_data_to_df(" SELECT artist_id FROM artist UNION SELECT artist_id FROM album; ")
display(df)
artist_id
0 123
1 124
2 125

# The anti join chooses records in the first table where a condition is
# NOT met in the second table. It makes use of a WHERE clause to use
# exclude values from the second table.
df = load_data_to_df("SELECT * FROM album WHERE artist_id NOT IN (SELECT artist_id FROM artist);")
display(df)
album_id title artist_id
# CROSS JOIN creates all possible combinations of two tables. CROSS JOIN
# does not require a field to join ON.
df = load_data_to_df("SELECT name, title FROM artist CROSS JOIN album;")

display(df)
name title
0 AC/DC For Those Who Rock
1 AC/DC Dream On
2 AC/DC Restless and Wild
3 AC/DC Let There Be Rock
4 AC/DC Rumours
5 Aerosmith For Those Who Rock
6 Aerosmith Dream On
7 Aerosmith Restless and Wild
8 Aerosmith Let There Be Rock
9 Aerosmith Rumours
10 Alanis Morissette For Those Who Rock
11 Alanis Morissette Dream On
12 Alanis Morissette Restless and Wild
13 Alanis Morissette Let There Be Rock
14 Alanis Morissette Rumours

# The UNION ALL operator works just like UNION, but it returns
# duplicate values. The same restrictions of UNION hold true for UNION ALL
df = load_data_to_df("SELECT artist_id FROM artist UNION ALL SELECT artist_id FROM album;")
display(df)
artist_id
0 123
1 124
toggle_off
2 125
Next steps:
3 Generate
123 code with df View recommended plots New interactive sheet
4 124
# The INTERSECT operator returns only identical rows from two tables.
5 125

MYSQL - Notes - Colab

Uploaded by

Copyright:

Available Formats

MYSQL - Notes - Colab

Uploaded by

Document Information

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

MYSQL - Notes - Colab

Uploaded by

Copyright:

Available Formats

04/08/2024, 22:06 MYSQL_Notes - Colab

# for handling edge case as well

# Retrieve all records from the airbnb_listings table

(1, 'Paris', 'France', 5, 2018)

# Find listings in Paris

(1, 'Paris', 'France', 5, 2018)

# Load and display data from airbnb_listings

0 1 Paris France 5 2018

1 2 Tokyo Japan 2 2017

2 3 New York USA 2 2022

all_listings = query_database("SELECT id, city FROM airbnb_listings ORDER BY number_of_rooms DESC")

all_listings = query_database("SELECT * FROM airbnb_listings LIMIT 5;")

(1, 'Paris', 'France', 5, 2018)

all_listings = query_database("SELECT DISTINCT city FROM airbnb_listings ")

(1, 'Paris', 'France', 5, 2018)

(1, 'Paris', 'France', 5, 2018)

all_listings = query_database("SELECT country, SUM(number_of_rooms) FROM airbnb_listings GROUP BY country;")

all_listings = query_database("SELECT country, AVG (number_of_rooms)FROM airbnb_listings GROUP BY country;")

all_listings = query_database("SELECT country, MAX(number_of_rooms) FROM airbnb_listings GROUP BY country;")

all_listings = query_database("SELECT country, MIN(number_of_rooms) FROM airbnb_listings GROUP BY country;")

all_listings = query_database("SELECT country, COUNT(city) AS number_of_cities FROM airbnb_listings GROUP BY country;")

all_listings = query_database("SELECT * FROM airbnb_listings WHERE city = 'Paris';")

(1, 'Paris', 'France', 5, 2018)

# 8. Get the listings based in the 'USA' and in 'France'

(1, 'Paris', 'France', 5, 2018)

(1, 'Paris', 'France', 5, 2018)

(1, 'Paris', 'France', 5, 2018)

# 12. Return the listings where number_of_rooms is missing

(1, 'Paris', 'France', 5, 2018)

keyboard_arrow_down New Database

# Define the path to your SQLite database file

# Connect to the SQLite database

# Create the 'Artist' and 'Album' tables again

# Clear any existing data

# Insert data into the 'Artist' table

# Insert data into the 'Album' table

# Commit changes and close the connection

from google.colab import files

from google.colab import files

Choose files music.db

# Check if the file exists and its size

File uploaded successfully.

# verifying whether data exist or not

# Query to select all data from the Album table

print("Data in Album table:")

Data in Album table:

# Load and display data from Table

# Load and display data from Table

0 12345 For Those Who Rock 123

1 12346 Dream On 124

2 12347 Restless and Wild 125

3 12348 Let There Be Rock 123

4 12349 Rumours 124

2 125 Alanis Morissette

keyboard_arrow_down Join Operations

artist_id name album_id title artist_id

0 123 AC/DC 12345 For Those Who Rock 123

1 123 AC/DC 12348 Let There Be Rock 123

2 124 Aerosmith 12346 Dream On 124