Data Science Assignment 1

Download as pdf or txt
Download as pdf or txt
You are on page 1of 5

FIN42110: Data Science for Trading and Risk Management

Homework 1
Harsh Desai(23205088)
February 2024

1 Yahoo Finance

import yfinance as yf
import mplfinance as mpf
from datetime import datetime, timedelta

# Define the stock symbol (Tesla)


stock_symbol = "TSLA"

# Download historical data for the last year


end_date = datetime.today().strftime('%Y-%m-%d')
start_date = (datetime.today() - timedelta(days=365)).strftime('%Y-%m-%d')
stock_data = yf.download(stock_symbol, start=start_date, end=end_date)

# Plot the candlestick chart


mpf.plot(stock_data, type='candle', title=f'Candlestick Chart for {stock_symbol} (Last Year)',
ylabel='Price (USD)', xlabel='Date', style='yahoo', mav=(20, 50))

# display the plot


mpf.show()

1
2 PDF Scraping

pip install tabula-py

import tabula as tb
import pandas as pd
from IPython.display import display

#import pdf file


file='GDP12.pdf'

#Convert file into dataframe using tabula


data = tb.read_pdf(file, pages = '1')
display(data)

#convert data to latex


for idx, df in enumerate(data):
latex_output_file = f"table_output_page_{idx+1}.tex"
df.to_latex(latex_output_file, index=False)

Table 1: Gross Domestic Product 2020.

Country Ranking Economy Millions of Dollars


USA 1 United States 20,936,600 Unnamed: 0
CHN 2 China 14,722,731 NaN
JPN 3 Japan 5,064,873 NaN
DEU 4 Germany 3,806,060 NaN
GBR 5 United Kingdom 2,707,744 NaN
IND 6 India 2,622,984 NaN
FRA 7 France 2,603,004 NaN
ITA 8 Italy 1,886,445 NaN
CAN 9 Canada 1,643,408 NaN
KOR 10 Korea, Rep. 1,630,525 NaN
RUS 11 Russian Federation 1,483,498 a
BRA 12 Brazil 1,444,733 NaN
AUS 13 Australia 1,330,901 NaN
ESP 14 Spain 1,281,199 NaN
MEX 15 Mexico 1,076,163 NaN
IDN 16 Indonesia 1,058,424 NaN
NLD 17 Netherlands 912,242 NaN
CHE 18 Switzerland 747,969 NaN
TUR 19 Turkey 720,101 NaN
SAU 20 Saudi Arabia 700,118 NaN
POL 21 Poland 594,165 NaN
SWE 22 Sweden 537,610 NaN
BEL 23 Belgium 515,332 NaN
THA 24 Thailand 501,795 NaN
NGA 25 Nigeria 432,294 NaN
AUT 26 Austria 428,965 NaN
ARE 27 United Arab Emirates 421,142 NaN
IRL 28 Ireland 418,622 NaN
ISR 29 Israel 401,954 NaN
ARG 30 Argentina 383,067 b
EGY 31 Egypt, Arab Rep. 363,069 NaN
NOR 32 Norway 362,009 NaN
PHL 33 Philippines 361,489 NaN
DNK 34 Denmark 355,184 NaN
HKG 35 Hong Kong SAR, China 346,586 NaN

2
SGP 36 Singapore 339,998 NaN
MYS 37 Malaysia 336,664 NaN
BGD 38 Bangladesh 324,239 NaN
ZAF 39 South Africa 301,924 NaN
COL 40 Colombia 271,347 NaN
FIN 41 Finland 271,234 NaN
VNM 42 Vietnam 271,158 NaN
PAK 43 Pakistan 263,687 NaN
CHL 44 Chile 252,940 NaN
ROU 45 Romania 248,716 NaN
CZE 46 Czech Republic 243,530 NaN
PRT 47 Portugal 231,256 NaN
NZL 48 New Zealand 212,482 NaN
PER 49 Peru 202,014 NaN
IRN 50 Iran, Islamic Rep. 191,718 NaN
GRC 51 Greece 189,410 NaN
KAZ 52 Kazakhstan 169,835 NaN
IRQ 53 Iraq 167,224 NaN
UKR 54 Ukraine 155,582 a
HUN 55 Hungary 155,013 NaN
QAT 56 Qatar 146,374 NaN
DZA 57 Algeria 145,164 NaN
KWT 58 Kuwait 136,197 NaN
MAR 59 Morocco 112,871 c
ETH 60 Ethiopia 107,645 NaN
SVK 61 Slovak Republic 104,574 NaN
PRI 62 Puerto Rico 103,138 NaN
CUB 63 Cuba 103,131 NaN
KEN 64 Kenya 98,843 NaN
ECU 65 Ecuador 98,808 NaN

3
3 Crypto Punk Web Scrapping

import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
# URL of the page to scrape
url = 'https://cryptopunks.app/cryptopunks/details/9'

# Send a GET request to the page


response = requests.get(url)

# Parse the HTML content of the page


soup = BeautifulSoup(response.text, 'html.parser')

# Extract information
# Example: Find the section that contains the attributes
attributes_section = soup.find('div', {'col-md-10 col-md-offset-1': 'row'})
attributes = soup.find_all('li') # Assuming each attribute is in a list item

print('Attributes:')

# Extract current market status


market_status_section = soup.find('div', class_='col-md-10 col-md-offset-1')
if market_status_section:
market_status = market_status_section.text.strip()
print('\nCurrent Market Status:')
print(market_status)
else:
print("Market status not found.")

# Find the image element


image_element = soup.find('img', class_='img-responsive pixelated center-block')

# Extract the image URL


image_url = image_element['src']

# Send a GET request to download the image


base_url = 'https://cryptopunks.app'
absolute_image_url = urljoin(base_url, image_url)

# Send a GET request to download the image


image_response = requests.get(absolute_image_url)

# Check if the image request was successful


if image_response.status_code == 200:
# Save the image to a file
with open('crypto_punk_image.png', 'wb') as f:
f.write(image_response.content)
print("CryptoPunk image saved as 'crypto_punk_image.png'")
else:
print("Failed to fetch the CryptoPunk image. Status code:", image_response.status_code)

# Format the output into LaTeX-friendly format


latex_output = r"""
\documentclass{article}
\usepackage{enumitem}

\begin{document}

4
\section*{Attributes}
\begin{itemize}[leftmargin=*]"""

for attribute in attributes:


latex_output += f"\n \item {attribute}"

latex_output += r"""
\end{itemize}

\section*{Current Market Status}


"""

if market_status != "Market status not found.":


latex_output += market_status
else:
latex_output += "Market status not found."

latex_output += r"""

\end{document}
"""

# Write the LaTeX-formatted output to a file


with open('output.tex', 'w') as f:
f.write(latex_output)

Cryptopunk 9
Attributes: This Punk has 3 attributes, one of 4501 with that many.

1. Clown Nose- 212 Punks have this.

2. Police Cap- 203 Punks have this.

3. Big Beard- 146 Punks have this.

You might also like