Untitled 21

import pandas as pd
import numpy as np
import seaborn as sn
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
import numpy as np # linear algebra

import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
# Input data files are available in the read-only "../input/"

directory
# For example, running this (by clicking run or pressing Shift+Enter)
will list all files under the input directory
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
for filename in filenames:
print(os.path.join(dirname, filename))
df=pd.read_csv("/content/auto1.csv")
df.head()
symboling normalized-losses make fuel-type aspiration \

0 3 NaN alfa-romero gas std
3 2 164.0 audi gas std
num-of-doors body-style drive-wheels engine-location wheel-

base ... \
0 two convertible rwd front
88.6 ...
88.6 ...
2 two hatchback rwd front
94.5 ...
3 four sedan fwd front
99.8 ...
4 four sedan 4wd front
99.4 ...
engine-size fuel-system bore stroke compression-ratio horsepower

\
0 130 mpfi 3.47 2.68 9.0 111.0
1 130 mpfi 3.47 2.68 9.0 111.0
2 152 mpfi 2.68 3.47 9.0 154.0

3 109 mpfi 3.19 3.40 10.0 102.0
4 136 mpfi 3.19 3.40 8.0 115.0
peak-rpm city-mpg highway-mpg price

0 5000.0 21 27 13495.0
1 5000.0 21 27 16500.0
2 5000.0 19 26 16500.0
3 5500.0 24 30 13950.0
4 5500.0 18 22 17450.0
[5 rows x 26 columns]
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 205 entries, 0 to 204
Data columns (total 26 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 symboling 205 non-null int64
1 normalized-losses 164 non-null float64
2 make 205 non-null object
3 fuel-type 205 non-null object
4 aspiration 205 non-null object
5 num-of-doors 203 non-null object
6 body-style 205 non-null object
7 drive-wheels 205 non-null object
8 engine-location 205 non-null object
9 wheel-base 205 non-null float64
10 length 205 non-null float64
11 width 205 non-null float64
12 height 205 non-null float64
13 curb-weight 205 non-null int64
14 engine-type 205 non-null object
15 num-of-cylinders 205 non-null object
16 engine-size 205 non-null int64
17 fuel-system 205 non-null object
18 bore 201 non-null float64
19 stroke 201 non-null float64
20 compression-ratio 205 non-null float64
21 horsepower 203 non-null float64
22 peak-rpm 203 non-null float64
23 city-mpg 205 non-null int64
24 highway-mpg 205 non-null int64
25 price 201 non-null float64
dtypes: float64(11), int64(5), object(10)
memory usage: 41.8+ KB
df.replace("?", np.nan, inplace = True)
df.to_csv("auto1.csv", index=False)
df2=pd.read_csv('auto1.csv')
df2.head(10)
symboling normalized-losses make fuel-type aspiration \

5 2 NaN audi gas std
7 1 NaN audi gas std
8 1 158.0 audi gas turbo
9 0 NaN audi gas turbo
num-of-doors body-style drive-wheels engine-location wheel-

base ... \
88.6 ...
88.6 ...
2 two hatchback rwd front
94.5 ...
99.8 ...
4 four sedan 4wd front
99.4 ...
5 two sedan fwd front
99.8 ...
105.8 ...
7 four wagon fwd front
105.8 ...
105.8 ...
9 two hatchback 4wd front
99.5 ...
engine-size fuel-system bore stroke compression-ratio horsepower

\
0 130 mpfi 3.47 2.68 9.0 111.0
1 130 mpfi 3.47 2.68 9.0 111.0
2 152 mpfi 2.68 3.47 9.0 154.0

3 109 mpfi 3.19 3.40 10.0 102.0
4 136 mpfi 3.19 3.40 8.0 115.0
5 136 mpfi 3.19 3.40 8.5 110.0
6 136 mpfi 3.19 3.40 8.5 110.0
7 136 mpfi 3.19 3.40 8.5 110.0
8 131 mpfi 3.13 3.40 8.3 140.0
9 131 mpfi 3.13 3.40 7.0 160.0
peak-rpm city-mpg highway-mpg price

0 5000.0 21 27 13495.0
1 5000.0 21 27 16500.0
2 5000.0 19 26 16500.0
3 5500.0 24 30 13950.0
4 5500.0 18 22 17450.0
5 5500.0 19 25 15250.0
6 5500.0 19 25 17710.0
7 5500.0 19 25 18920.0
8 5500.0 17 20 23875.0
9 5500.0 16 22 NaN
df2.isnull().sum()
symboling 0
normalized-losses 41
make 0
fuel-type 0
aspiration 0
num-of-doors 2
body-style 0
drive-wheels 0
engine-location 0
wheel-base 0
length 0
width 0
height 0
curb-weight 0
engine-type 0
num-of-cylinders 0
engine-size 0
fuel-system 0
bore 4
stroke 4
compression-ratio 0
horsepower 2
peak-rpm 2
city-mpg 0
highway-mpg 0
price 4
dtype: int64
a_n = df2["normalized-losses"].astype("float").median(axis=0)
print("Average of normalized-losses: ", a_n)
a_b = df2['bore'].astype('float').median(axis=0)
print("Average of bore: ", a_b)
a_s = df2["stroke"].astype("float").median(axis = 0)
print("Average of stroke:", a_s)
a_h = df2['horsepower'].astype('float').median(axis=0)
print("Average horsepower:", a_h)
a_p = df2['peak-rpm'].astype('float').median(axis=0)
print("Average peak rpm:", a_p)
Average of normalized-losses: 115.0

Average of bore: 3.31
Average of stroke: 3.29
Average horsepower: 95.0
Average peak rpm: 5200.0
df2["normalized-losses"].replace(np.nan, a_n, inplace=True)

df2["stroke"].replace(np.nan, a_s, inplace = True)
df2["bore"].replace(np.nan, a_b, inplace=True)
df2['horsepower'].replace(np.nan, a_h, inplace=True)
df2['peak-rpm'].replace(np.nan, a_p, inplace=True)
df2[df2['num-of-doors'].isna()]
symboling normalized-losses make fuel-type aspiration num-of-

doors \
27 1 148.0 dodge gas turbo
NaN
63 0 115.0 mazda diesel std
NaN
body-style drive-wheels engine-location wheel-base ... engine-

size \
27 sedan fwd front 93.7 ...
98
63 sedan fwd front 98.8 ...
122
fuel-system bore stroke compression-ratio horsepower peak-rpm
city-mpg \
27 mpfi 3.03 3.39 7.6 102.0 5500.0
24
63 idi 3.39 3.39 22.7 64.0 4650.0
36
highway-mpg price
27 30 8558.0
63 42 10795.0

Untitled 21

Uploaded by

Copyright:

Available Formats

Untitled 21

Uploaded by

Document Information

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Untitled 21

Uploaded by

Copyright:

Available Formats

import pandas as pd

import numpy as np # linear algebra

# Input data files are available in the read-only "../input/"

symboling normalized-losses make fuel-type aspiration \

num-of-doors body-style drive-wheels engine-location wheel-

engine-size fuel-system bore stroke compression-ratio horsepower

1 130 mpfi 3.47 2.68 9.0 111.0

2 152 mpfi 2.68 3.47 9.0 154.0

4 136 mpfi 3.19 3.40 8.0 115.0

peak-rpm city-mpg highway-mpg price

symboling normalized-losses make fuel-type aspiration \

num-of-doors body-style drive-wheels engine-location wheel-

engine-size fuel-system bore stroke compression-ratio horsepower

1 130 mpfi 3.47 2.68 9.0 111.0

2 152 mpfi 2.68 3.47 9.0 154.0

4 136 mpfi 3.19 3.40 8.0 115.0

5 136 mpfi 3.19 3.40 8.5 110.0

6 136 mpfi 3.19 3.40 8.5 110.0

7 136 mpfi 3.19 3.40 8.5 110.0

8 131 mpfi 3.13 3.40 8.3 140.0

9 131 mpfi 3.13 3.40 7.0 160.0

peak-rpm city-mpg highway-mpg price

[10 rows x 26 columns]

Average of normalized-losses: 115.0

df2["normalized-losses"].replace(np.nan, a_n, inplace=True)

symboling normalized-losses make fuel-type aspiration num-of-

body-style drive-wheels engine-location wheel-base ... engine-

You might also like