exp10
exp10
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
housing = pd.read_csv("Housing.csv")
# Check the head of the dataset
housing.head()
housing.shape
housing.info()
housing.describe()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 545 entries, 0 to 544
Data columns (total 13 columns):
price 545 non-null int64
area 545 non-null int64
bedrooms 545 non-null int64
bathrooms 545 non-null int64
stories 545 non-null int64
mainroad 545 non-null object
guestroom 545 non-null object
basement 545 non-null object
hotwaterheating 545 non-null object
airconditioning 545 non-null object
parking 545 non-null int64
prefarea 545 non-null object
furnishingstatus 545 non-null object
dtypes: int64(6), object(7)
memory usage: 55.5+ KB
Out[1]:
price area bedrooms bathrooms stories parking
In [3]: sns.pairplot(housing)
plt.show()
Out[6]:
price area bedrooms bathrooms stories mainroad guestroom basement hotwaterheating airconditioning parking prefarea furnishingstatus
In [8]: # Get the dummy variables for the feature 'furnishingstatus' and storeit in a new variable - 'status'
status = pd.get_dummies(housing['furnishingstatus'])
# Check what the dataset 'status' looks like
status.head()
Out[8]:
furnished semi-furnished unfurnished
0 1 0 0
1 1 0 0
2 0 1 0
3 1 0 0
4 1 0 0
In [9]: # Let's drop the first column from status df using 'drop_first = True'
status = pd.get_dummies(housing['furnishingstatus'], drop_first =
True)
# Add the results to the original housing dataframe
housing = pd.concat([housing, status], axis = 1)
# Now let's see the head of our dataframe.
housing.head()
Out[9]:
semi-
price area bedrooms bathrooms stories mainroad guestroom basement hotwaterheating airconditioning parking prefarea unfurnished
furnished
0 13300000 7420 4 2 3 1 0 0 0 1 2 1 0 0
1 12250000 8960 4 4 4 1 0 0 0 1 3 0 0 0
2 12250000 9960 3 2 2 1 0 1 0 0 2 1 1 0
3 12215000 7500 4 2 2 1 0 1 0 1 3 1 0 0
4 11410000 7420 4 1 2 1 1 1 0 1 2 0 0 0
# Let's check the correlation coefficients to see which variables are highly correlated
plt.figure(figsize = (16, 10))
sns.heatmap(df_train.corr(), annot = True, cmap="YlGnBu")
plt.show()
In [19]: plt.figure(figsize=[6,6])
plt.scatter(df_train.area, df_train.price)
plt.show()
# Let's visualise the data with a scatter plot and the fitted regression line
plt.scatter(X_train_lm.iloc[:, 1], y_train)
plt.plot(X_train_lm.iloc[:, 1], 0.127 + 0.462*X_train_lm.iloc[:, 1],
'r')
plt.show()
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
print(lr_1.summary())
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
Out[51]:
Features VIF
1 bedrooms 7.33
4 mainroad 6.02
0 area 4.67
3 stories 2.70
11 semi-furnished 2.19
9 parking 2.12
6 basement 2.02
12 unfurnished 1.82
8 airconditioning 1.77
2 bathrooms 1.67
10 prefarea 1.51
5 guestroom 1.47
7 hotwaterheating 1.14
In [ ]:
In [ ]: