Prasun Kumar - Assignment1

Download as docx, pdf, or txt
Download as docx, pdf, or txt
You are on page 1of 9

Assignment #1

NumPy and Pandas Documentation


NumPy:
NumPy is a library for the Python programming language, adding support for large, multi-
dimensional arrays and matrices, along with a large collection of high-level mathematical
functions to operate on these arrays.
(i)NumPy Array: -
NumPy array have one data type(integer,double,float)
import numpy as np

my_list1=[1,2,3,4]
my_list2=[5,6,7,8]

my_array=np.array([my_list1,my_list2])

print(my_array)

print(my_array.shape)

print(my_array.dtype)

new_array1=np.zeros(5)
new_array2=np.ones([5,5])
new_array3=np.eye(5)
new_array4=np.arange(5,50,3)

print(new_array1)
print(new_array2)
print(new_array3)
print(new_array4)

In the above code we first need to load the NumPy library. After loading the library create two
list my_list1 and my_list2.
Now create a NumPy array of the two lists.
#creating 2d array
my_array=np. array([my_list1,my_list2])
[[1 2 3 4]
[5 6 7 8]]
#shape of the array
my_array. shape
(2, 4)
#datatype of the array
my_array. dtype
int32
#creating a zero matrix
np.zeros(5)
[0. 0. 0. 0. 0.]
#creating a 5 x5 matrix
np.ones([5,5])
[[1. 1. 1. 1. 1.]
[1. 1. 1. 1. 1.]
[1. 1. 1. 1. 1.]
[1. 1. 1. 1. 1.]
[1. 1. 1. 1. 1.]]
#creating an identical matrix
np.eye(5)
[[1. 0. 0. 0. 0.]
[0. 1. 0. 0. 0.]
[0. 0. 1. 0. 0.]
[0. 0. 0. 1. 0.]
[0. 0. 0. 0. 1.]]
#creating a sequence of evenly spaced values
np.arange(5,50,3)
[ 5 8 11 14 17 20 23 26 29 32 35 38 41 44 47]

(ii) Scalar Operation on array: -


import numpy as np

array1=np.array(([1,2,3,4],[5,6,7,8]))
#multiplication

print(array1)
array2=(array1*array1)

#exponentil multiplication

array3=array1 **3
print(array3)

#substraction

array4=array1 -array1
print(array4)

array5=array2-array1
array6=(array5)
print(array6)

#reciprocal

print(1/array1)

#Multiplication of two NumPy array


array2=array1*array1
[[1. 0.5 0.33333333 0.25 ]
[0.2 0.16666667 0.14285714 0.125 ]]

#Exponential multiplication
array3=array1 **3
[[ 1 8 27 64]
[125 216 343 512]]
(iii)Indexing NumPy array
import numpy as np

arr=np.arange(0,12)
print(arr)

print(arr[0])

print(arr[2])

#print(arr[0:5])
#print(arr[2:6])

arr[0:5]=20
print(arr)

#interesting thing

arr2=arr[0:6]
#print(arr2)

arr2[:]=29
print(arr)

#creating new array copy

arrcopy=arr.copy()

print(arrcopy)

#access the value of 0 index


arr[0]
#slicing of array from 0th index to 4th index
arr[0:5]
#indexing using loop
arr2d=np.array(([1,2,3],[4,5,6],[7,8,9]))
arr_len=arr2d.shape[0]

for i in range (arr_len):


arr2d[i]=i
print(arr2d)

[[0 0 0]
[1 1 1]
[2 2 2]]
(iv) Saving and loading array from external memory
import numpy as np
#saving single arrays
arr=np.arange(10)
print(arr)

np.save('saved_array',arr)
#new file is created----saved_array.npy

new_array=np.load('saved_array.npy')

print(new_array)

#saving multiple arrays

array_1=np.arange(25)
array_2=np.arange(30)
np.savez('saved_archive.npz',x=array_1,y=array_2)
load_archive=np.load('saved_archive.npz')
print('load archive of x')
print(load_archive['x'])
print('load archive of y')
print(load_archive['y'])

#save to text file

np.savetxt('notepadfile.txt',array_1,delimiter=',')

#loading txt file

load_txt_file=np.loadtxt('notepadfile.txt',delimiter=',')
print('load_txt_file is')
print(load_txt_file)

[0 1 2 3 4 5 6 7 8 9]
[0 1 2 3 4 5 6 7 8 9]
load archive of x
[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24]
load archive of y
[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28 29]
load_txt_file is
[ 0. 1. 2. 3. 4. 5. 6. 7. 8. 9. 10. 11. 12. 13. 14. 15. 16. 17.
18. 19. 20. 21. 22. 23. 24.]

Pandas: -
In computer programming, pandas is a software library written for the Python programming
language for data manipulation and analysis. In particular, it offers data structures and operations
for manipulating numerical tables and time series.
First of all load the pandas library
import pandas as pd
#DataFrames
import pandas as pd
from pandas import Series, DataFrame
import numpy as np

#example - Revenue of companies

revenue_df = pd.read_clipboard()
print (revenue_df)

#index and columns


print (revenue_df.columns)
print (revenue_df['Rank '])
#multiple columns

print (DataFrame(revenue_df,columns=['Rank ','Name ','Industry ']))

#Nan Values
revenue_df2 = DataFrame(revenue_df,columns=['Rank ','Name ','Industry ','Profit'])
print (revenue_df2)

#head and tail


print (revenue_df.head(2))
print (revenue_df.tail(2))

#access rows in df
print (revenue_df.ix[0] )#row 1
print (revenue_df.ix[5] )#row 6

#assign values to df
#numpy

array1 = np.array([1,2,3,4,5,6])
revenue_df2['Profit'] = array1
print (revenue_df2)

#series
profits = Series([900,1000],index=[3,5])
revenue_df2['Profit'] = profits

print (revenue_df2)

#deletion
del revenue_df2['Profit']
print (revenue_df2)

#dictionary function to dataframe


sample = {
'company':['A','B'],
'Profit':[1000,5000]
}

print (sample)

sample_df = DataFrame(sample)
print (sample_df)

dx:
[[-100 -90 -80 -70 -60 -50 -40 -30 -20 -10 0 10 20 30
40 50 60 70 80 90]
[-100 -90 -80 -70 -60 -50 -40 -30 -20 -10 0 10 20 30
40 50 60 70 80 90]
[-100 -90 -80 -70 -60 -50 -40 -30 -20 -10 0 10 20 30
40 50 60 70 80 90]
[-100 -90 -80 -70 -60 -50 -40 -30 -20 -10 0 10 20 30
40 50 60 70 80 90]
[-100 -90 -80 -70 -60 -50 -40 -30 -20 -10 0 10 20 30
40 50 60 70 80 90]
[-100 -90 -80 -70 -60 -50 -40 -30 -20 -10 0 10 20 30
40 50 60 70 80 90]
[-100 -90 -80 -70 -60 -50 -40 -30 -20 -10 0 10 20 30
40 50 60 70 80 90]
[-100 -90 -80 -70 -60 -50 -40 -30 -20 -10 0 10 20 30
40 50 60 70 80 90]
[-100 -90 -80 -70 -60 -50 -40 -30 -20 -10 0 10 20 30
40 50 60 70 80 90]
[-100 -90 -80 -70 -60 -50 -40 -30 -20 -10 0 10 20 30
40 50 60 70 80 90]
[-100 -90 -80 -70 -60 -50 -40 -30 -20 -10 0 10 20 30
40 50 60 70 80 90]
[-100 -90 -80 -70 -60 -50 -40 -30 -20 -10 0 10 20 30
40 50 60 70 80 90]
[-100 -90 -80 -70 -60 -50 -40 -30 -20 -10 0 10 20 30
40 50 60 70 80 90]
[-100 -90 -80 -70 -60 -50 -40 -30 -20 -10 0 10 20 30
40 50 60 70 80 90]
[-100 -90 -80 -70 -60 -50 -40 -30 -20 -10 0 10 20 30
40 50 60 70 80 90]
[-100 -90 -80 -70 -60 -50 -40 -30 -20 -10 0 10 20 30
40 50 60 70 80 90]
[-100 -90 -80 -70 -60 -50 -40 -30 -20 -10 0 10 20 30
40 50 60 70 80 90]
[-100 -90 -80 -70 -60 -50 -40 -30 -20 -10 0 10 20 30
40 50 60 70 80 90]
[-100 -90 -80 -70 -60 -50 -40 -30 -20 -10 0 10 20 30
40 50 60 70 80 90]
[-100 -90 -80 -70 -60 -50 -40 -30 -20 -10 0 10 20 30
40 50 60 70 80 90]]

#Indexing in pandas
import pandas as pd
from pandas import Series, DataFrame

series1 = Series([10,20,30,40],index=['a','b','c','d'])
print (series1)

index1 = series1.index
print (index1)

print (index1[2:])

#negative indexes
print (index1[-2:])
print (index1[:-2])

print (index1[2:4])

#interesting
index1[0] = 'e'
print (index1)

#Merging and Joining


df1= pd.DataFrame({
"HPI":[80,90,70,60],"Int_Rate":[2,1,2,3],"IND_GDP":[50,4
5,45,67]},
index=[2001, 2002,2003,2004])
df2=pd.DataFrame({
"HPI":[80,90,70,60],"Int_Rate":[2,1,2,3],"IND_GDP":[50,4
5,45,67]},
index=[2005, 2006,2007,2008])

merged= pd.merge(df1,df2)

print(merged)

You might also like