Untitled 21

Download as pdf or txt
Download as pdf or txt
You are on page 1of 6

import pandas as pd

import numpy as np
import seaborn as sn
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler

import numpy as np # linear algebra


import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/"


directory
# For example, running this (by clicking run or pressing Shift+Enter)
will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
for filename in filenames:
print(os.path.join(dirname, filename))

df=pd.read_csv("/content/auto1.csv")

df.head()

symboling normalized-losses make fuel-type aspiration \


0 3 NaN alfa-romero gas std
1 3 NaN alfa-romero gas std
2 1 NaN alfa-romero gas std
3 2 164.0 audi gas std
4 2 164.0 audi gas std

num-of-doors body-style drive-wheels engine-location wheel-


base ... \
0 two convertible rwd front
88.6 ...
1 two convertible rwd front
88.6 ...
2 two hatchback rwd front
94.5 ...
3 four sedan fwd front
99.8 ...
4 four sedan 4wd front
99.4 ...

engine-size fuel-system bore stroke compression-ratio horsepower


\
0 130 mpfi 3.47 2.68 9.0 111.0

1 130 mpfi 3.47 2.68 9.0 111.0

2 152 mpfi 2.68 3.47 9.0 154.0


3 109 mpfi 3.19 3.40 10.0 102.0

4 136 mpfi 3.19 3.40 8.0 115.0

peak-rpm city-mpg highway-mpg price


0 5000.0 21 27 13495.0
1 5000.0 21 27 16500.0
2 5000.0 19 26 16500.0
3 5500.0 24 30 13950.0
4 5500.0 18 22 17450.0

[5 rows x 26 columns]

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 205 entries, 0 to 204
Data columns (total 26 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 symboling 205 non-null int64
1 normalized-losses 164 non-null float64
2 make 205 non-null object
3 fuel-type 205 non-null object
4 aspiration 205 non-null object
5 num-of-doors 203 non-null object
6 body-style 205 non-null object
7 drive-wheels 205 non-null object
8 engine-location 205 non-null object
9 wheel-base 205 non-null float64
10 length 205 non-null float64
11 width 205 non-null float64
12 height 205 non-null float64
13 curb-weight 205 non-null int64
14 engine-type 205 non-null object
15 num-of-cylinders 205 non-null object
16 engine-size 205 non-null int64
17 fuel-system 205 non-null object
18 bore 201 non-null float64
19 stroke 201 non-null float64
20 compression-ratio 205 non-null float64
21 horsepower 203 non-null float64
22 peak-rpm 203 non-null float64
23 city-mpg 205 non-null int64
24 highway-mpg 205 non-null int64
25 price 201 non-null float64
dtypes: float64(11), int64(5), object(10)
memory usage: 41.8+ KB
df.replace("?", np.nan, inplace = True)
df.to_csv("auto1.csv", index=False)

df2=pd.read_csv('auto1.csv')

df2.head(10)

symboling normalized-losses make fuel-type aspiration \


0 3 NaN alfa-romero gas std
1 3 NaN alfa-romero gas std
2 1 NaN alfa-romero gas std
3 2 164.0 audi gas std
4 2 164.0 audi gas std
5 2 NaN audi gas std
6 1 158.0 audi gas std
7 1 NaN audi gas std
8 1 158.0 audi gas turbo
9 0 NaN audi gas turbo

num-of-doors body-style drive-wheels engine-location wheel-


base ... \
0 two convertible rwd front
88.6 ...
1 two convertible rwd front
88.6 ...
2 two hatchback rwd front
94.5 ...
3 four sedan fwd front
99.8 ...
4 four sedan 4wd front
99.4 ...
5 two sedan fwd front
99.8 ...
6 four sedan fwd front
105.8 ...
7 four wagon fwd front
105.8 ...
8 four sedan fwd front
105.8 ...
9 two hatchback 4wd front
99.5 ...

engine-size fuel-system bore stroke compression-ratio horsepower


\
0 130 mpfi 3.47 2.68 9.0 111.0

1 130 mpfi 3.47 2.68 9.0 111.0

2 152 mpfi 2.68 3.47 9.0 154.0


3 109 mpfi 3.19 3.40 10.0 102.0

4 136 mpfi 3.19 3.40 8.0 115.0

5 136 mpfi 3.19 3.40 8.5 110.0

6 136 mpfi 3.19 3.40 8.5 110.0

7 136 mpfi 3.19 3.40 8.5 110.0

8 131 mpfi 3.13 3.40 8.3 140.0

9 131 mpfi 3.13 3.40 7.0 160.0

peak-rpm city-mpg highway-mpg price


0 5000.0 21 27 13495.0
1 5000.0 21 27 16500.0
2 5000.0 19 26 16500.0
3 5500.0 24 30 13950.0
4 5500.0 18 22 17450.0
5 5500.0 19 25 15250.0
6 5500.0 19 25 17710.0
7 5500.0 19 25 18920.0
8 5500.0 17 20 23875.0
9 5500.0 16 22 NaN

[10 rows x 26 columns]

df2.isnull().sum()

symboling 0
normalized-losses 41
make 0
fuel-type 0
aspiration 0
num-of-doors 2
body-style 0
drive-wheels 0
engine-location 0
wheel-base 0
length 0
width 0
height 0
curb-weight 0
engine-type 0
num-of-cylinders 0
engine-size 0
fuel-system 0
bore 4
stroke 4
compression-ratio 0
horsepower 2
peak-rpm 2
city-mpg 0
highway-mpg 0
price 4
dtype: int64

a_n = df2["normalized-losses"].astype("float").median(axis=0)
print("Average of normalized-losses: ", a_n)

a_b = df2['bore'].astype('float').median(axis=0)
print("Average of bore: ", a_b)

a_s = df2["stroke"].astype("float").median(axis = 0)
print("Average of stroke:", a_s)

a_h = df2['horsepower'].astype('float').median(axis=0)
print("Average horsepower:", a_h)

a_p = df2['peak-rpm'].astype('float').median(axis=0)
print("Average peak rpm:", a_p)

Average of normalized-losses: 115.0


Average of bore: 3.31
Average of stroke: 3.29
Average horsepower: 95.0
Average peak rpm: 5200.0

df2["normalized-losses"].replace(np.nan, a_n, inplace=True)


df2["stroke"].replace(np.nan, a_s, inplace = True)
df2["bore"].replace(np.nan, a_b, inplace=True)
df2['horsepower'].replace(np.nan, a_h, inplace=True)
df2['peak-rpm'].replace(np.nan, a_p, inplace=True)
df2[df2['num-of-doors'].isna()]

symboling normalized-losses make fuel-type aspiration num-of-


doors \
27 1 148.0 dodge gas turbo
NaN
63 0 115.0 mazda diesel std
NaN

body-style drive-wheels engine-location wheel-base ... engine-


size \
27 sedan fwd front 93.7 ...
98
63 sedan fwd front 98.8 ...
122
fuel-system bore stroke compression-ratio horsepower peak-rpm
city-mpg \
27 mpfi 3.03 3.39 7.6 102.0 5500.0
24
63 idi 3.39 3.39 22.7 64.0 4650.0
36

highway-mpg price
27 30 8558.0
63 42 10795.0

[2 rows x 26 columns]

You might also like