DSP_Lec6

Download as pdf or txt
Download as pdf or txt
You are on page 1of 10

Panda with Python

In [1]: Series equivalent to List,


Dataframe e.q.t. Multidimentional array.

Cell In[1], line 1


Series equivalent to List,
^
SyntaxError: invalid syntax

Series
In [2]: from pandas import Series,DataFrame
import pandas as pd
obj = Series([4,7,-3,5])
obj

Out[2]: 0 4
1 7
2 -3
3 5
dtype: int64

In [4]: obj.values

Out[4]: array([ 4, 7, -3, 5], dtype=int64)

In [5]: obj.index

Out[5]: RangeIndex(start=0, stop=4, step=1)

In [6]: obj[1]

Out[6]: 7

In [7]: obj2 = Series([4,7,-3,5],index=['a','b','c','d'])


obj2

Out[7]: a 4
b 7
c -3
d 5
dtype: int64

In [8]: obj2 = Series([4,7,-3,5],index=['d','c','b','a'])


obj2
Out[8]: d 4
c 7
b -3
a 5
dtype: int64

In [9]: obj[obj>0]

Out[9]: 0 4
1 7
3 5
dtype: int64

In [10]: sdata = {'dac':30000000,'sy':500000,'cht':10000000,'raj':700000}


obj3 = Series(sdata)

In [11]: obj3

Out[11]: dac 30000000


sy 500000
cht 10000000
raj 700000
dtype: int64

In [13]: div = ['sy','khu','jes','raj']


obj4 = Series(sdata,index=div)
obj4

Out[13]: sy 500000.0
khu NaN
jes NaN
raj 700000.0
dtype: float64

In [14]: obj4.isnull()

Out[14]: sy False
khu True
jes True
raj False
dtype: bool

In [17]: pd.isnull(obj4)

Out[17]: sy False
khu True
jes True
raj False
dtype: bool

In [19]: obj4.hasnans

Out[19]: True

In [24]: obj4.isnull().sum()
Out[24]: 2

In [25]: obj2 = Series([4,7,-3,5],index=['a','b','c','d'])


obj2

Out[25]: a 4
b 7
c -3
d 5
dtype: int64

In [26]: obj2.index=('Samia','Kamal','Salman','Ragib')
obj2

Out[26]: Samia 4
Kamal 7
Salman -3
Ragib 5
dtype: int64

Introduction to DataFrame
In [27]: data = {'div':['syl','raj','com','kulna'],
'year':[2020,2021,2022,2023],
'pop':[4,6,12,7]}
frame = DataFrame(data)
frame

Out[27]: div year pop

0 syl 2020 4

1 raj 2021 6

2 com 2022 12

3 kulna 2023 7

In [28]: frame2 = DataFrame(data,columns=['div','year','pop','area'])


frame2

Out[28]: div year pop area

0 syl 2020 4 NaN

1 raj 2021 6 NaN

2 com 2022 12 NaN

3 kulna 2023 7 NaN

In [29]: frame2['area'] = [5,23,7,12]


frame2
Out[29]: div year pop area

0 syl 2020 4 5

1 raj 2021 6 23

2 com 2022 12 7

3 kulna 2023 7 12

In [32]: del frame2['area']


frame2

Out[32]: div year pop

0 syl 2020 4

1 raj 2021 6

2 com 2022 12

3 kulna 2023 7

In [34]: obj

Out[34]: 0 4
1 7
2 -3
3 5
dtype: int64

In [36]: obj5 = obj.reindex([3,2,1,0,7])


obj5

Out[36]: 3 5.0
2 -3.0
1 7.0
0 4.0
7 NaN
dtype: float64

Fill NaN
In [37]: obj5 = obj.reindex([3,2,1,0,7],method='ffill')
obj5

Out[37]: 3 5
2 -3
1 7
0 4
7 5
dtype: int64
In [49]: obj5 = obj.reindex([3,2,1,0,7],fill_value=8)
obj5

Out[49]: 3 5
2 -3
1 7
0 4
7 8
dtype: int64

In [51]: obj5 = obj.reindex([3,2,1,7,0],method = 'bfill')


obj5

Out[51]: 3 5.0
2 -3.0
1 7.0
7 NaN
0 4.0
dtype: float64

In [52]: ob = Series([12,7,8,9],index=[0,2,4,6])
ob

Out[52]: 0 12
2 7
4 8
6 9
dtype: int64

In [55]: ob2 = ob.reindex([0,1,2,3,4,5,6],method = 'ffill')


ob2

Out[55]: 0 12
1 12
2 7
3 7
4 8
5 8
6 9
dtype: int64

In [56]: ob3 = ob.reindex([0,1,2,3,4,5,6],method = 'bfill')


ob3

Out[56]: 0 12
1 7
2 7
3 8
4 8
5 9
6 9
dtype: int64

In [58]: ob4 = ob.reindex([0,1,2,3,4,5,6],fill_value=10)


ob4
Out[58]: 0 12
1 10
2 7
3 10
4 8
5 10
6 9
dtype: int64

Delete NaN raw


In [59]: ob3 = Series([4,7,-3,5],index=['a','b','c','d'])
ob3

Out[59]: a 4
b 7
c -3
d 5
dtype: int64

In [61]: ob4 = ob3.reindex(['a','b','c','d','e'])


ob4

Out[61]: a 4.0
b 7.0
c -3.0
d 5.0
e NaN
dtype: float64

In [63]: del ob4['e']


ob4

Out[63]: a 4.0
b 7.0
c -3.0
d 5.0
dtype: float64

In [64]: del ob4['c']


ob4

Out[64]: a 4.0
b 7.0
d 5.0
dtype: float64

In [68]: ob4.drop('a')

Out[68]: b 7.0
d 5.0
dtype: float64
Delete all NaN by dropna()
In [70]: ob4 = ob3.reindex(['a','b','c','d','e','f'])
ob4

Out[70]: a 4.0
b 7.0
c -3.0
d 5.0
e NaN
f NaN
dtype: float64

In [71]: clean_ob = ob4.dropna()


clean_ob

Out[71]: a 4.0
b 7.0
c -3.0
d 5.0
dtype: float64

In [73]: clean_ob = ob4.dropna(axis=0)


clean_ob

Out[73]: a 4.0
b 7.0
c -3.0
d 5.0
dtype: float64

Sorting index
In [74]: ob = Series([3,4,5,62,3],index=[5,3,1,0,2])
ob

Out[74]: 5 3
3 4
1 5
0 62
2 3
dtype: int64

In [75]: ob.sort_index()

Out[75]: 0 62
1 5
2 3
3 4
5 3
dtype: int64
In [76]: import numpy as np
ob = DataFrame(np.arange(20).reshape((4,5)))
ob

Out[76]: 0 1 2 3 4

0 0 1 2 3 4

1 5 6 7 8 9

2 10 11 12 13 14

3 15 16 17 18 19

CSV
In [4]: df = pd.read_csv("C:\\Users\\husna\\.anaconda\\data1.csv",header=None)
df

Out[4]: 0 1 2 3 4

0 1 2 3 4 hello

1 5 6 7 8 world

2 9 10 11 12 foo

In [5]: df = pd.read_table("C:\\Users\\husna\\.anaconda\\data1.csv",sep=',')
df

Out[5]: 1 2 3 4 hello

0 5 6 7 8 world

1 9 10 11 12 foo

Text File
In [3]: df2 = pd.read_table("C:\\Users\\husna\\.anaconda\\Book2.txt")
df2
Out[3]: A B C Unnamed: 3

0 aaa -0.264438 -1.026059 -0.619500

1 bbb 0.927272 0.302904 -0.032399

2 ccc -0.264273 -0.386314 -0.217601

3 ddd -0.871858 -0.348382 1.100491

In [4]: df2 = pd.read_table("C:\\Users\\husna\\.anaconda\\Book2.txt",sep='\\s+')


df2

Out[4]: A B C

aaa -0.264438 -1.026059 -0.619500

bbb 0.927272 0.302904 -0.032399

ccc -0.264273 -0.386314 -0.217601

ddd -0.871858 -0.348382 1.100491

In [5]: df3 = pd.read_csv("C:\\Users\\husna\\.anaconda\\Book2.txt",sep='\\s+')


df3

Out[5]: A B C

aaa -0.264438 -1.026059 -0.619500

bbb 0.927272 0.302904 -0.032399

ccc -0.264273 -0.386314 -0.217601

ddd -0.871858 -0.348382 1.100491

Json
In [6]: import json
obj = obj = """
{"name": "Wes",
"cities_lived": ["Akron", "Nashville", "New York", "San Francisco"],
"pet": null,
"siblings": [{"name": "Scott", "age": 34, "hobbies": ["guitars", "soccer"]},
{"name": "Katie", "age": 42, "hobbies": ["diving", "art"]}]
}
"""
result = json.loads(obj)
result
Out[6]: {'name': 'Wes',
'cities_lived': ['Akron', 'Nashville', 'New York', 'San Francisco'],
'pet': None,
'siblings': [{'name': 'Scott', 'age': 34, 'hobbies': ['guitars', 'soccer']},
{'name': 'Katie', 'age': 42, 'hobbies': ['diving', 'art']}]}

In [7]: asjson = json.dumps(result)


asjson

Out[7]: '{"name": "Wes", "cities_lived": ["Akron", "Nashville", "New York", "San Francisc
o"], "pet": null, "siblings": [{"name": "Scott", "age": 34, "hobbies": ["guitars",
"soccer"]}, {"name": "Katie", "age": 42, "hobbies": ["diving", "art"]}]}'

In [8]: siblings = pd.DataFrame(result["siblings"], columns=["name", "age"])


siblings

Out[8]: name age

0 Scott 34

1 Katie 42

Exel File
In [ ]: f = pd.ExcelFile('data.xls')
t = f.parse('sheet1') #Particular excel sheet file

In [ ]: df = pd.read_csv('file name(location)')
df

In [ ]: df.to_csv('file name(location)\\sampleout')

In [ ]: import sys
df.to_csv(sys.stdout,'|')

You might also like