import numpy as np
import numpy as np
import os
import numpy as np
import pandas as pd
test=pd.read_csv("Testing.csv")
train=pd.read_csv("Training.csv")
train.sample(n=1)
data.sample(10)
data.head(5)
# This will retrieve the 1st 5 data
data.tail(5)
data.columns
# This will retrieve all the colums associated with the dataset
data.shape
data.info()
data1 =data.loc[:,["itching","skin_rash","coma"] ]
data1.plot()
data1.plot(subplots =True)
data.isnull().sum()
X, y=data.iloc[:,:-1], data.iloc[:,-1]
clf=RandomForestClassifier(n_estimators=100)
clf.fit(X_train,y_train)
y_pred=clf.predict(X_test)
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
feature_imp = pd.Series(clf.feature_importances_,index=list(data.columns[:-
1])).sort_values(ascending=False).head(50)
feature_imp[::-1]
feature_imp2 =
pd.Series(clf2.feature_importances_,index=list(X_reduced.columns)).sort_values(asce
nding=False)
feature_imp2[::-1]