Pandas Dataframe1
Pandas Dataframe1
COLUMNS
A B C D E
1 25 88 99 87 54
ROWS
2 66 54 45 75 84
3 84 85 86 95 89
4 74 75 78 87 65
CREATING DATAFRAME AND DISPLAY
1.CREATING DATAFRAME FROM LISTS.
>>> import pandas as pd
>>> a=[10,20,30,40,50]
>>> df=pd.DataFrame(a)
>>> print(a)
[10, 20, 30, 40, 50]
>>> print(df)
0
0 10
1 20
2 30
3 40
4 50
>>> import pandas as pd
>>> a=[["ram",22],["mohan",15],["sonam",16],
["kirti",21]]
>>> df=pd.DataFrame(a)
>>> print(a)
[['ram', 22], ['mohan', 15], ['sonam', 16], ['kirti',
21]]
>>> print(df)
0 1
0 ram 22
1 mohan 15
2 sonam 16
3 kirti 21
2.CREATING DATAFRAME FROM SERIES
>>> import pandas as pd
>>>
s=pd.Series(data=["ram","mohan","kapil"],index=[
'a','b','c'])
>>> df=pd.DataFrame(s)
>>> print(df)
0
a ram
b mohan
c kapil
>>> import pandas as pd
>>> dic={"jan":31,"feb":28,"mar":31}
>>> s=pd.Series(dic)
>>> df=pd.DataFrame(s)
>>> print(df)
0
jan 31
feb 28
mar 31
>>> import pandas as pd
>>>
sm=pd.Series({"vijaya":80,"rahul":73,"soni":94})
>>>
sa=pd.Series({"vijaya":22,"rahul":24,"soni":21})
>>> df=pd.DataFrame({"marks":sm,"age":sa})
>>> print(df)
marks age
vijaya 80 22
rahul 73 24
soni 94 21
>>>
3.CREATING DATAFRAME FROM DICTIONARY
>>> import pandas as pd
>>> dic={"roll":[1,2,3,4,5],"name":
["ram","mohan","kapil","sunil","jyotsana"]}
>>> df=pd.DataFrame(dic)
>>> print(df)
roll name
0 1 ram
1 2 mohan
2 3 kapil
3 4 sunil
4 5 jyotsana
>>> import pandas as pd
>>>
nm=pd.Series(["ram","mohan","sohan","kapil","so
nu"])
>>> eng=pd.Series([55,56,58,59,56])
>>> math=pd.Series([75,78,79,88,98])
>>> ip=pd.Series([89,88,98,87,89])
>>>
std={"Name":nm,"English":eng,"Maths":math,"Inf
ormatics Practices":ip}
>>> df=pd.DataFrame(std)
>>> print(df)
Name English Maths Informatics Practices
0 ram 55 75 89
1 mohan 56 78 88
2 sohan 58 79 98
3 kapil 59 88 87
4 sonu 56 98 89
>>> import pandas as pd
>>> dic={"Name:":["ram","mohan","kapil"],"Eng":
[85,95,78],"Hin":[88,77,85]}
>>> df=pd.DataFrame(dic)
>>> print(df)
Name: Eng Hin
0 ram 85 88
1 mohan 95 77
2 kapil 78 85
4.CREATING DATAFRAME USING ARRAY
>>> import pandas as pd
>>> import numpy as np
>>> a=np.array([[54,55,56,57],[65,66,67,68],
[87,88,89,85]])
>>> df=pd.DataFrame(a)
>>> print(a)
[[54 55 56 57]
[65 66 67 68]
[87 88 89 85]]
>>> print(df)
0 1 2 3
0 54 55 56 57
1 65 66 67 68
2 87 88 89 85
5.CREATING DATAFRAME USING LIST OF
DICTIONARY
>>> import pandas as pd
>>> a=[{"ram":55,"sunil":75,"kapil":75},
{"ram":65,"sunil":78,"kapil":77},
{"ram":55,"sunil":88,"kapil":87}]
>>> df=pd.DataFrame(a)
>>> print(a)
[{'ram': 55, 'sunil': 75, 'kapil': 75}, {'ram': 65,
'sunil': 78, 'kapil': 77}, {'ram': 55, 'sunil': 88, 'kapil':
87}]
>>> print(df)
ram sunil kapil
0 55 75 75
1 65 78 77
2 55 88 87
/////////////////////////////////////////////////////
/////////////////////////////
ATTRIBUTES OF DATAFRAME OBJECT
/////////////////////////////////////////////////////
////////////////////////////
Index : It return the index (row labels) of the
DataFrame.
>>> import pandas as pd
>>> a={"ram":[55,57,98],"mohan":
[75,95,85],"kapil":[57,85,78]}
>>> b=pd.DataFrame(a)
>>> print(b)
ram mohan kapil
0 55 75 57
1 57 95 85
2 98 85 78
>>> b.index
RangeIndex(start=0, stop=3, step=1)
Columns: It return the column labels index of the
DataFrame
>>> b.columns
Index(['ram', 'mohan', 'kapil'], dtype='object')
Axes : Return a list representing both the axes.
>>> b.axes
[RangeIndex(start=0, stop=3, step=1),
Index(['ram', 'mohan', 'kapil'], dtype='object')]
dtypes : Returns the dtypes of data and
DataFrame.
>>> b.dtypes
ram int64
mohan int64
kapil int64
dtype: object
size : Returns the number of element in the
DataFrame
>>> b.size
9
Shape : Return the tuple representing the
dimensionality of the DataFrame.
>>> b.shape
(3, 3)
values : Return a numpy representation of
DataFrame.
>>> b.values
array([[55, 75, 57],
[57, 95, 85],
[98, 85, 78]], dtype=int64)
empty : Indicate whether DataFrame is Empty or
not. It returns True if it is empty ,otherwise it
returns False.
>>> b.empty
False
ndim : Return the number of axes/array
dimension.
>>> b.ndim
2
T : It Transpose the DataFrame, i.e. from row to
Column and vice versa.
>>> b.T
0 1 2
ram 55 57 98
mohan 75 95 85
kapil 57 85 78
count() :It count number of rows or count(0) . if
Count(1) it count number of columns.By Default
is row count i.e. 0.
>>> b.count(0)
ram 3
mohan 3
kapil 3
dtype: int64
or
>>> b.count(axis=0)
ram 3
mohan 3
kapil 3
dtype: int64
/////////////
>>> b.count(1)
0 3
1 3
2 3
dtype: int64
>>> b.count(axis=1)
0 3
1 3
2 3
dtype: int64
>>> df.NAME[0]="KAMAL"
>>> df
NAME ENG ECO IP ACCT MAT
0 KAMAL 67 85 75 65 88
1 PANKAJ 88 77 87 85 88
2 ADITYA 57 75 84 75 88
3 RITU 68 87 49 87 88
4 ram ram ram ram ram ram
5 rahim rahim rahim rahim rahim rahim
>>> df.at[3,"ENG"]=88
>>> df
NAME ENG ECO IP ACCT MAT
0 KAMAL 67 85 75 65 88
1 PANKAJ 88 77 87 85 88
2 ADITYA 57 75 84 75 88
3 RITU 88 87 49 87 88
4 ram ram ram ram ram ram
5 rahim rahim rahim rahim rahim rahim
//////////////////////////////////////////////
DELETING COLUMNS
///////////////////////////////////////////
>>> df
NAME ENG ECO IP ACCT
0 KAMAL 67 85 75 65
1 PANKAJ 88 77 87 85
2 ADITYA 57 75 84 75
3 RITU 88 87 49 87
4 ram ram ram ram ram
5 rahim rahim rahim rahim Rahim
Del df[“column name”] # it will delete
dataframe column and its entire value
>>> del df["ACCT"]
>>> df
NAME ENG ECO IP
0 RINKU 67 85 75
1 PANKAJ 88 77 87
2 ADITYA 57 75 84
3 RAMAN 88 87 65
4 ram ram ram ram
5 rahim rahim rahim Rahim
Df.drop(“col/row”,axis=0/1) # it will delete data
for one instance
OR
Df.drop(“col/row”, axis=0/1, inplace=True) # it
will delete data permanently
>>> df.drop(5,axis=0,inplace=True)
>>> df
NAME ENG ECO IP
0 RINKU 67 85 75
1 PANKAJ 88 77 87
2 ADITYA 57 75 84
3 RAMAN 88 87 65
4 ram ram ram ram
Df.pop(“col name”) #it will delete given col and
its entire data
>>> df.pop("IP") #it delete and pop up the
deleted value
0 75
1 87
2 84
3 65
4 ram
Name: IP, dtype: object
>>> df
NAME ENG ECO
0 RINKU 67 85
1 PANKAJ 88 77
2 ADITYA 57 75
3 RAMAN 88 87
4 ram ram ram
////////////////////////////////////////////////////
Iterrows( ) and iteritems( ) function in
DataFrame
///////////////////////////////////////////////////
Df.iterrows( ) # it iterate over the horizontal
subsets in pair i.e. row index and row series
>>> df
NAME ENG ECO
0 RINKU 67 85
1 PANKAJ 88 77
2 ADITYA 57 75
3 RAMAN 88 87
4 ram ram ram
>>> for (ri,rs) in df.iterrows():
print("Row Index = ",ri)
print("Row Series = ",rs)
Row Index = 0
Row Series = NAME RINKU
ENG 67
ECO 85
Name: 0, dtype: object
Row Index = 1
Row Series = NAME PANKAJ
ENG 88
ECO 77
Name: 1, dtype: object
Row Index = 2
Row Series = NAME ADITYA
ENG 57
ECO 75
Name: 2, dtype: object
Row Index = 3
Row Series = NAME RAMAN
ENG 88
ECO 87
Name: 3, dtype: object
Row Index = 4
Row Series = NAME ram
ENG ram
ECO ram
Name: 4, dtype: object
/////////////////////////////////////////////////////
///////
Df.iteritems( ) # it iterate over vertical subsets in
form of col index and col series.
/////////////////////////////////////////////////////
///////
>>> df
NAME ENG ECO
0 RINKU 67 85
1 PANKAJ 88 77
2 ADITYA 57 75
3 RAMAN 88 87
4 ram ram ram
>>> for (ci,cs) in df.iteritems():
print("Column Index=",ci)
print("Column Series=\n",cs)
>>> df1.fillna(method='ffill')
0 1 2 3
0 1 2.0 3.0 4.0
1 8 2.0 3.0 4.0
2 10 4.0 3.0 4.0
/////////////////////////////////////////////////////
//////////////////
Dropna()
It is method of dataframe which drop NaN values
row .
>>> import pandas as pd
>>> a=[[1,2,3,4],[8],[10,4]]
>>> df1=pd.DataFrame(a)
>>> print(df1)
0 1 2 3
0 1 2.0 3.0 4.0
1 8 NaN NaN NaN
2 10 4.0 NaN NaN
>>> df1.dropna()
0 1 2 3
0 1 2.0 3.0 4.0
CHECKING NaN value in DATAFRAME
Isnull() function is used to check NaN value in
DataFrame
>>> import pandas as pd
>>> a=[[1,2,3,4],[8],[10,4]]
>>> df1=pd.DataFrame(a)
>>> print(df1)
0 1 2 3
0 1 2.0 3.0 4.0
1 8 NaN NaN NaN
2 10 4.0 NaN NaN
>>> df1.isnull()
0 1 2 3
0 False False False False
1 False True True True
2 False False True True
/////////////////////////////////////////////////////
///
CONCATENATING THE DATAFRAME
/////////////////////////////////////////////////////
/
Pd.concat([df1,df2],axis= 0/1) : it
concat/append two dataframe along with its axis
i.e. row-wise or column-wise.
>>> import pandas as pd
>>> r1={"roll":[1,2,3,4],"name":
["mohan","kapil","danish","rahul"]}
>>> r2={"roll":[5,6,7,8],"name":
["kavita","gita","sita","dipika"]}
>>> df1=pd.DataFrame(r1)
>>> df2=pd.DataFrame(r2)
>>> df1
roll name
0 1 mohan
1 2 kapil
2 3 danish
3 4 rahul
>>> df2
roll name
0 5 kavita
1 6 gita
2 7 sita
3 8 dipika
>>> df3=pd.concat([df1,df2],axis=0)
>>> df3
roll name
0 1 mohan
1 2 kapil
2 3 danish
3 4 rahul
0 5 kavita
1 6 gita
2 7 sita
3 8 dipika
>>> df4=pd.concat([df1,df2],axis=1)
>>> df4
roll name roll name
0 1 mohan 5 kavita
1 2 kapil 6 gita
2 3 danish 7 sita
3 4 rahul 8 dipika
>>>
df5=pd.concat([df1,df2],axis=0,ignore_index=Tru
e)
>>> df5
roll name
0 1 mohan
1 2 kapil
2 3 danish
3 4 rahul
4 5 kavita
5 6 gita
6 7 sita
7 8 dipika
///////////////////////////////////////////////
////
MERGE OPERATION IN DATAFRAME
///////////////////////////////////////////////
///
Pd.merge(df1,df2,on=”fieldname”)
It let the user to merge two dataframe using
field name upon same data
>>> import pandas as pd
>>> a={"roll":[1,2,3],"name":
["ram","rama","mangal"]}
>>> b={"roll":[3,4,5],"name":
["sohan","kapil","ram"]}
>>> df1=pd.DataFrame(a)
>>> df2=pd.DataFrame(b)
>>> df3=pd.merge(df1,df2,on="roll")
>>> df3
roll name_x name_y
0 3 mangal sohan
>>> df4=pd.merge(df1,df2,on="name")
>>> df4
roll_x name roll_y
0 1 ram 5
//////////////////////////////////////////////////////////////
BOOLEAN INDEXING
////////////////////////////////////////////////////////////
DataFrame indexing can be done on Boolean value
i.e. True/False.
>>> import pandas as pd
>>> a={"roll":[1,2,3,4],"name":
["abhishek","balkishore","chandan","danish"]}
>>>
df1=pd.DataFrame(a,index=[True,False,True,False
])
>>>print(df1)
roll name
True 1 abhishek
False 2 balkishore
True 3 chandan
False 4 danish
/////////////////////////////////////////////////////////////////
BOOLEAN REDUCTION
///////////////////////////////////////////////////////////////
Using empty(),any(),all() functions of DataFrame
it provides a way to summarise a Boolean result
i.e. termed as Boolean Reduction.
Df.cmpty
It returns True if it is empty otherwise False
>>> import pandas as pd
>>> a=pd.DataFrame({"x":[]})
>>> a
Empty DataFrame
Columns: [x]
Index: []
>>> a.empty
True
Df.all()
It return True if all values are True or non-zero
>>> import pandas as pd
>>> a=pd.DataFrame({"x":[True,True],"y":
[True,False],"z":[False,False]})
>>> a
x y z
0 True True False
1 True False False
>>> a.all()
x True
y False
z False
dtype: bool
df.any()
It returns true is any one is True
>>> import pandas as pd
>>> a=pd.DataFrame({"x":[True,True],"y":
[True,False],"z":[False,False]})
>>> a
x y z
0 True True False
1 True False False
>>> a.any()
x True
y True
z False
dtype: bool