Untitled 21
Untitled 21
Untitled 21
import numpy as np
import seaborn as sn
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
for filename in filenames:
print(os.path.join(dirname, filename))
df=pd.read_csv("/content/auto1.csv")
df.head()
[5 rows x 26 columns]
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 205 entries, 0 to 204
Data columns (total 26 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 symboling 205 non-null int64
1 normalized-losses 164 non-null float64
2 make 205 non-null object
3 fuel-type 205 non-null object
4 aspiration 205 non-null object
5 num-of-doors 203 non-null object
6 body-style 205 non-null object
7 drive-wheels 205 non-null object
8 engine-location 205 non-null object
9 wheel-base 205 non-null float64
10 length 205 non-null float64
11 width 205 non-null float64
12 height 205 non-null float64
13 curb-weight 205 non-null int64
14 engine-type 205 non-null object
15 num-of-cylinders 205 non-null object
16 engine-size 205 non-null int64
17 fuel-system 205 non-null object
18 bore 201 non-null float64
19 stroke 201 non-null float64
20 compression-ratio 205 non-null float64
21 horsepower 203 non-null float64
22 peak-rpm 203 non-null float64
23 city-mpg 205 non-null int64
24 highway-mpg 205 non-null int64
25 price 201 non-null float64
dtypes: float64(11), int64(5), object(10)
memory usage: 41.8+ KB
df.replace("?", np.nan, inplace = True)
df.to_csv("auto1.csv", index=False)
df2=pd.read_csv('auto1.csv')
df2.head(10)
df2.isnull().sum()
symboling 0
normalized-losses 41
make 0
fuel-type 0
aspiration 0
num-of-doors 2
body-style 0
drive-wheels 0
engine-location 0
wheel-base 0
length 0
width 0
height 0
curb-weight 0
engine-type 0
num-of-cylinders 0
engine-size 0
fuel-system 0
bore 4
stroke 4
compression-ratio 0
horsepower 2
peak-rpm 2
city-mpg 0
highway-mpg 0
price 4
dtype: int64
a_n = df2["normalized-losses"].astype("float").median(axis=0)
print("Average of normalized-losses: ", a_n)
a_b = df2['bore'].astype('float').median(axis=0)
print("Average of bore: ", a_b)
a_s = df2["stroke"].astype("float").median(axis = 0)
print("Average of stroke:", a_s)
a_h = df2['horsepower'].astype('float').median(axis=0)
print("Average horsepower:", a_h)
a_p = df2['peak-rpm'].astype('float').median(axis=0)
print("Average peak rpm:", a_p)
highway-mpg price
27 30 8558.0
63 42 10795.0
[2 rows x 26 columns]