DSP_Lec6

Panda with Python
In [1]: Series equivalent to List,

Dataframe e.q.t. Multidimentional array.
Cell In[1], line 1

Series equivalent to List,
^
SyntaxError: invalid syntax
Series
In [2]: from pandas import Series,DataFrame
import pandas as pd
obj = Series([4,7,-3,5])
obj
Out[2]: 0 4
1 7
2 -3
3 5
dtype: int64
In [4]: obj.values
Out[4]: array([ 4, 7, -3, 5], dtype=int64)
In [5]: obj.index
Out[5]: RangeIndex(start=0, stop=4, step=1)
In [6]: obj[1]
Out[6]: 7
In [7]: obj2 = Series([4,7,-3,5],index=['a','b','c','d'])

obj2
Out[7]: a 4
b 7
c -3
d 5
dtype: int64
In [8]: obj2 = Series([4,7,-3,5],index=['d','c','b','a'])

obj2
Out[8]: d 4
c 7
b -3
a 5
dtype: int64
In [9]: obj[obj>0]
Out[9]: 0 4
1 7
3 5
dtype: int64
In [10]: sdata = {'dac':30000000,'sy':500000,'cht':10000000,'raj':700000}

obj3 = Series(sdata)
In [11]: obj3
Out[11]: dac 30000000

sy 500000
cht 10000000
raj 700000
dtype: int64
In [13]: div = ['sy','khu','jes','raj']

obj4 = Series(sdata,index=div)
obj4
Out[13]: sy 500000.0
khu NaN
jes NaN
raj 700000.0
dtype: float64
In [14]: obj4.isnull()
Out[14]: sy False
khu True
jes True
raj False
dtype: bool
In [17]: pd.isnull(obj4)
Out[17]: sy False
khu True
jes True
raj False
dtype: bool
In [19]: obj4.hasnans
Out[19]: True
In [24]: obj4.isnull().sum()
Out[24]: 2
In [25]: obj2 = Series([4,7,-3,5],index=['a','b','c','d'])

obj2
Out[25]: a 4
b 7
c -3
d 5
dtype: int64
In [26]: obj2.index=('Samia','Kamal','Salman','Ragib')
obj2
Out[26]: Samia 4
Kamal 7
Salman -3
Ragib 5
dtype: int64
Introduction to DataFrame
In [27]: data = {'div':['syl','raj','com','kulna'],
'year':[2020,2021,2022,2023],
'pop':[4,6,12,7]}
frame = DataFrame(data)
frame
Out[27]: div year pop
0 syl 2020 4
1 raj 2021 6
2 com 2022 12
3 kulna 2023 7
In [28]: frame2 = DataFrame(data,columns=['div','year','pop','area'])

frame2
Out[28]: div year pop area
0 syl 2020 4 NaN
1 raj 2021 6 NaN
2 com 2022 12 NaN
3 kulna 2023 7 NaN
In [29]: frame2['area'] = [5,23,7,12]

frame2
Out[29]: div year pop area
0 syl 2020 4 5
1 raj 2021 6 23
2 com 2022 12 7
3 kulna 2023 7 12
In [32]: del frame2['area']

frame2
Out[32]: div year pop
0 syl 2020 4
1 raj 2021 6
2 com 2022 12
3 kulna 2023 7
In [34]: obj
Out[34]: 0 4
1 7
2 -3
3 5
dtype: int64
In [36]: obj5 = obj.reindex([3,2,1,0,7])

obj5
Out[36]: 3 5.0
2 -3.0
1 7.0
0 4.0
7 NaN
dtype: float64
Fill NaN
In [37]: obj5 = obj.reindex([3,2,1,0,7],method='ffill')
obj5
Out[37]: 3 5
2 -3
1 7
0 4
7 5
dtype: int64
In [49]: obj5 = obj.reindex([3,2,1,0,7],fill_value=8)
obj5
Out[49]: 3 5
2 -3
1 7
0 4
7 8
dtype: int64
In [51]: obj5 = obj.reindex([3,2,1,7,0],method = 'bfill')

obj5
Out[51]: 3 5.0
2 -3.0
1 7.0
7 NaN
0 4.0
dtype: float64
In [52]: ob = Series([12,7,8,9],index=[0,2,4,6])
ob
Out[52]: 0 12
2 7
4 8
6 9
dtype: int64
In [55]: ob2 = ob.reindex([0,1,2,3,4,5,6],method = 'ffill')

ob2
Out[55]: 0 12
1 12
2 7
3 7
4 8
5 8
6 9
dtype: int64
In [56]: ob3 = ob.reindex([0,1,2,3,4,5,6],method = 'bfill')

ob3
Out[56]: 0 12
1 7
2 7
3 8
4 8
5 9
6 9
dtype: int64
In [58]: ob4 = ob.reindex([0,1,2,3,4,5,6],fill_value=10)

ob4
Out[58]: 0 12
1 10
2 7
3 10
4 8
5 10
6 9
dtype: int64
Delete NaN raw

In [59]: ob3 = Series([4,7,-3,5],index=['a','b','c','d'])
ob3
Out[59]: a 4
b 7
c -3
d 5
dtype: int64
In [61]: ob4 = ob3.reindex(['a','b','c','d','e'])

ob4
Out[61]: a 4.0
b 7.0
c -3.0
d 5.0
e NaN
dtype: float64
In [63]: del ob4['e']

ob4
Out[63]: a 4.0
b 7.0
c -3.0
d 5.0
dtype: float64
In [64]: del ob4['c']

ob4
Out[64]: a 4.0
b 7.0
d 5.0
dtype: float64
In [68]: ob4.drop('a')
Out[68]: b 7.0
d 5.0
dtype: float64
Delete all NaN by dropna()
In [70]: ob4 = ob3.reindex(['a','b','c','d','e','f'])
ob4
Out[70]: a 4.0
b 7.0
c -3.0
d 5.0
e NaN
f NaN
dtype: float64
In [71]: clean_ob = ob4.dropna()

clean_ob
Out[71]: a 4.0
b 7.0
c -3.0
d 5.0
dtype: float64
In [73]: clean_ob = ob4.dropna(axis=0)

clean_ob
Out[73]: a 4.0
b 7.0
c -3.0
d 5.0
dtype: float64
Sorting index
In [74]: ob = Series([3,4,5,62,3],index=[5,3,1,0,2])
ob
Out[74]: 5 3
3 4
1 5
0 62
2 3
dtype: int64
In [75]: ob.sort_index()
Out[75]: 0 62
1 5
2 3
3 4
5 3
dtype: int64
In [76]: import numpy as np
ob = DataFrame(np.arange(20).reshape((4,5)))
ob
Out[76]: 0 1 2 3 4
0 0 1 2 3 4
1 5 6 7 8 9
2 10 11 12 13 14
3 15 16 17 18 19
CSV
In [4]: df = pd.read_csv("C:\\Users\\husna\\.anaconda\\data1.csv",header=None)
df
Out[4]: 0 1 2 3 4
0 1 2 3 4 hello
1 5 6 7 8 world
2 9 10 11 12 foo
In [5]: df = pd.read_table("C:\\Users\\husna\\.anaconda\\data1.csv",sep=',')
df
Out[5]: 1 2 3 4 hello
0 5 6 7 8 world
1 9 10 11 12 foo
Text File
In [3]: df2 = pd.read_table("C:\\Users\\husna\\.anaconda\\Book2.txt")
df2
Out[3]: A B C Unnamed: 3
0 aaa -0.264438 -1.026059 -0.619500
1 bbb 0.927272 0.302904 -0.032399
2 ccc -0.264273 -0.386314 -0.217601
3 ddd -0.871858 -0.348382 1.100491
In [4]: df2 = pd.read_table("C:\\Users\\husna\\.anaconda\\Book2.txt",sep='\\s+')

df2
Out[4]: A B C
aaa -0.264438 -1.026059 -0.619500
bbb 0.927272 0.302904 -0.032399
ccc -0.264273 -0.386314 -0.217601
ddd -0.871858 -0.348382 1.100491
In [5]: df3 = pd.read_csv("C:\\Users\\husna\\.anaconda\\Book2.txt",sep='\\s+')

df3
Out[5]: A B C
aaa -0.264438 -1.026059 -0.619500
bbb 0.927272 0.302904 -0.032399
ccc -0.264273 -0.386314 -0.217601
ddd -0.871858 -0.348382 1.100491
Json
In [6]: import json
obj = obj = """
{"name": "Wes",
"cities_lived": ["Akron", "Nashville", "New York", "San Francisco"],
"pet": null,
"siblings": [{"name": "Scott", "age": 34, "hobbies": ["guitars", "soccer"]},
{"name": "Katie", "age": 42, "hobbies": ["diving", "art"]}]
}
"""
result = json.loads(obj)
result
Out[6]: {'name': 'Wes',
'cities_lived': ['Akron', 'Nashville', 'New York', 'San Francisco'],
'pet': None,
'siblings': [{'name': 'Scott', 'age': 34, 'hobbies': ['guitars', 'soccer']},
{'name': 'Katie', 'age': 42, 'hobbies': ['diving', 'art']}]}
In [7]: asjson = json.dumps(result)

asjson
Out[7]: '{"name": "Wes", "cities_lived": ["Akron", "Nashville", "New York", "San Francisc
o"], "pet": null, "siblings": [{"name": "Scott", "age": 34, "hobbies": ["guitars",
"soccer"]}, {"name": "Katie", "age": 42, "hobbies": ["diving", "art"]}]}'
In [8]: siblings = pd.DataFrame(result["siblings"], columns=["name", "age"])

siblings
Out[8]: name age
0 Scott 34
1 Katie 42
Exel File
In [ ]: f = pd.ExcelFile('data.xls')
t = f.parse('sheet1') #Particular excel sheet file
In [ ]: df = pd.read_csv('file name(location)')
df
In [ ]: df.to_csv('file name(location)\\sampleout')
In [ ]: import sys
df.to_csv(sys.stdout,'|')

DSP_Lec6

Uploaded by

Copyright:

Available Formats

DSP_Lec6

Uploaded by

Document Information

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

DSP_Lec6

Uploaded by

Copyright:

Available Formats

Panda with Python

In [1]: Series equivalent to List,

Cell In[1], line 1

Out[4]: array([ 4, 7, -3, 5], dtype=int64)

Out[5]: RangeIndex(start=0, stop=4, step=1)

In [7]: obj2 = Series([4,7,-3,5],index=['a','b','c','d'])

In [8]: obj2 = Series([4,7,-3,5],index=['d','c','b','a'])

In [10]: sdata = {'dac':30000000,'sy':500000,'cht':10000000,'raj':700000}

Out[11]: dac 30000000

In [13]: div = ['sy','khu','jes','raj']

In [25]: obj2 = Series([4,7,-3,5],index=['a','b','c','d'])

Out[27]: div year pop

In [28]: frame2 = DataFrame(data,columns=['div','year','pop','area'])

Out[28]: div year pop area

0 syl 2020 4 NaN

1 raj 2021 6 NaN

2 com 2022 12 NaN

3 kulna 2023 7 NaN

In [29]: frame2['area'] = [5,23,7,12]

In [32]: del frame2['area']

Out[32]: div year pop

In [36]: obj5 = obj.reindex([3,2,1,0,7])

In [51]: obj5 = obj.reindex([3,2,1,7,0],method = 'bfill')

In [55]: ob2 = ob.reindex([0,1,2,3,4,5,6],method = 'ffill')

In [56]: ob3 = ob.reindex([0,1,2,3,4,5,6],method = 'bfill')

In [58]: ob4 = ob.reindex([0,1,2,3,4,5,6],fill_value=10)

Delete NaN raw

In [61]: ob4 = ob3.reindex(['a','b','c','d','e'])

In [63]: del ob4['e']

In [64]: del ob4['c']

In [71]: clean_ob = ob4.dropna()

In [73]: clean_ob = ob4.dropna(axis=0)

0 aaa -0.264438 -1.026059 -0.619500

1 bbb 0.927272 0.302904 -0.032399

2 ccc -0.264273 -0.386314 -0.217601

3 ddd -0.871858 -0.348382 1.100491

In [4]: df2 = pd.read_table("C:\\Users\\husna\\.anaconda\\Book2.txt",sep='\\s+')

aaa -0.264438 -1.026059 -0.619500

bbb 0.927272 0.302904 -0.032399

ccc -0.264273 -0.386314 -0.217601

ddd -0.871858 -0.348382 1.100491

In [5]: df3 = pd.read_csv("C:\\Users\\husna\\.anaconda\\Book2.txt",sep='\\s+')

aaa -0.264438 -1.026059 -0.619500

bbb 0.927272 0.302904 -0.032399

ccc -0.264273 -0.386314 -0.217601

ddd -0.871858 -0.348382 1.100491

In [7]: asjson = json.dumps(result)

In [8]: siblings = pd.DataFrame(result["siblings"], columns=["name", "age"])

Out[8]: name age

You might also like