DSP_Lec6
DSP_Lec6
DSP_Lec6
Series
In [2]: from pandas import Series,DataFrame
import pandas as pd
obj = Series([4,7,-3,5])
obj
Out[2]: 0 4
1 7
2 -3
3 5
dtype: int64
In [4]: obj.values
In [5]: obj.index
In [6]: obj[1]
Out[6]: 7
Out[7]: a 4
b 7
c -3
d 5
dtype: int64
In [9]: obj[obj>0]
Out[9]: 0 4
1 7
3 5
dtype: int64
In [11]: obj3
Out[13]: sy 500000.0
khu NaN
jes NaN
raj 700000.0
dtype: float64
In [14]: obj4.isnull()
Out[14]: sy False
khu True
jes True
raj False
dtype: bool
In [17]: pd.isnull(obj4)
Out[17]: sy False
khu True
jes True
raj False
dtype: bool
In [19]: obj4.hasnans
Out[19]: True
In [24]: obj4.isnull().sum()
Out[24]: 2
Out[25]: a 4
b 7
c -3
d 5
dtype: int64
In [26]: obj2.index=('Samia','Kamal','Salman','Ragib')
obj2
Out[26]: Samia 4
Kamal 7
Salman -3
Ragib 5
dtype: int64
Introduction to DataFrame
In [27]: data = {'div':['syl','raj','com','kulna'],
'year':[2020,2021,2022,2023],
'pop':[4,6,12,7]}
frame = DataFrame(data)
frame
0 syl 2020 4
1 raj 2021 6
2 com 2022 12
3 kulna 2023 7
0 syl 2020 4 5
1 raj 2021 6 23
2 com 2022 12 7
3 kulna 2023 7 12
0 syl 2020 4
1 raj 2021 6
2 com 2022 12
3 kulna 2023 7
In [34]: obj
Out[34]: 0 4
1 7
2 -3
3 5
dtype: int64
Out[36]: 3 5.0
2 -3.0
1 7.0
0 4.0
7 NaN
dtype: float64
Fill NaN
In [37]: obj5 = obj.reindex([3,2,1,0,7],method='ffill')
obj5
Out[37]: 3 5
2 -3
1 7
0 4
7 5
dtype: int64
In [49]: obj5 = obj.reindex([3,2,1,0,7],fill_value=8)
obj5
Out[49]: 3 5
2 -3
1 7
0 4
7 8
dtype: int64
Out[51]: 3 5.0
2 -3.0
1 7.0
7 NaN
0 4.0
dtype: float64
In [52]: ob = Series([12,7,8,9],index=[0,2,4,6])
ob
Out[52]: 0 12
2 7
4 8
6 9
dtype: int64
Out[55]: 0 12
1 12
2 7
3 7
4 8
5 8
6 9
dtype: int64
Out[56]: 0 12
1 7
2 7
3 8
4 8
5 9
6 9
dtype: int64
Out[59]: a 4
b 7
c -3
d 5
dtype: int64
Out[61]: a 4.0
b 7.0
c -3.0
d 5.0
e NaN
dtype: float64
Out[63]: a 4.0
b 7.0
c -3.0
d 5.0
dtype: float64
Out[64]: a 4.0
b 7.0
d 5.0
dtype: float64
In [68]: ob4.drop('a')
Out[68]: b 7.0
d 5.0
dtype: float64
Delete all NaN by dropna()
In [70]: ob4 = ob3.reindex(['a','b','c','d','e','f'])
ob4
Out[70]: a 4.0
b 7.0
c -3.0
d 5.0
e NaN
f NaN
dtype: float64
Out[71]: a 4.0
b 7.0
c -3.0
d 5.0
dtype: float64
Out[73]: a 4.0
b 7.0
c -3.0
d 5.0
dtype: float64
Sorting index
In [74]: ob = Series([3,4,5,62,3],index=[5,3,1,0,2])
ob
Out[74]: 5 3
3 4
1 5
0 62
2 3
dtype: int64
In [75]: ob.sort_index()
Out[75]: 0 62
1 5
2 3
3 4
5 3
dtype: int64
In [76]: import numpy as np
ob = DataFrame(np.arange(20).reshape((4,5)))
ob
Out[76]: 0 1 2 3 4
0 0 1 2 3 4
1 5 6 7 8 9
2 10 11 12 13 14
3 15 16 17 18 19
CSV
In [4]: df = pd.read_csv("C:\\Users\\husna\\.anaconda\\data1.csv",header=None)
df
Out[4]: 0 1 2 3 4
0 1 2 3 4 hello
1 5 6 7 8 world
2 9 10 11 12 foo
In [5]: df = pd.read_table("C:\\Users\\husna\\.anaconda\\data1.csv",sep=',')
df
Out[5]: 1 2 3 4 hello
0 5 6 7 8 world
1 9 10 11 12 foo
Text File
In [3]: df2 = pd.read_table("C:\\Users\\husna\\.anaconda\\Book2.txt")
df2
Out[3]: A B C Unnamed: 3
Out[4]: A B C
Out[5]: A B C
Json
In [6]: import json
obj = obj = """
{"name": "Wes",
"cities_lived": ["Akron", "Nashville", "New York", "San Francisco"],
"pet": null,
"siblings": [{"name": "Scott", "age": 34, "hobbies": ["guitars", "soccer"]},
{"name": "Katie", "age": 42, "hobbies": ["diving", "art"]}]
}
"""
result = json.loads(obj)
result
Out[6]: {'name': 'Wes',
'cities_lived': ['Akron', 'Nashville', 'New York', 'San Francisco'],
'pet': None,
'siblings': [{'name': 'Scott', 'age': 34, 'hobbies': ['guitars', 'soccer']},
{'name': 'Katie', 'age': 42, 'hobbies': ['diving', 'art']}]}
Out[7]: '{"name": "Wes", "cities_lived": ["Akron", "Nashville", "New York", "San Francisc
o"], "pet": null, "siblings": [{"name": "Scott", "age": 34, "hobbies": ["guitars",
"soccer"]}, {"name": "Katie", "age": 42, "hobbies": ["diving", "art"]}]}'
0 Scott 34
1 Katie 42
Exel File
In [ ]: f = pd.ExcelFile('data.xls')
t = f.parse('sheet1') #Particular excel sheet file
In [ ]: df = pd.read_csv('file name(location)')
df
In [ ]: df.to_csv('file name(location)\\sampleout')
In [ ]: import sys
df.to_csv(sys.stdout,'|')