first code for svm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
dataset= pd.read_csv('your file')
x=dataset.iloc[:,:-1].values
y=dataset.iloc[:,1].values
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=.4)
print(x_train)
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(x_train,y_train)
print("Intercept : ",regressor.intercept_)
print("Coef : ",regressor.coef_)
y_pred = regressor.predict(x_test)
df=pd.DataFrame({'Actual':y_test , 'Predict' : y_pred})
print(df)
from sklearn import metrics
print("Mean Absoulte Error : " ,metrics.mean_absolute_error(y_test,y_pred))
print("Mean squared Error : " ,metrics.mean_squared_error(y_test,y_pred))
print("Root Mean squared Error : " ,np.sqrt(metrics.mean_squared_error(y_test,y_pred)))
dataset.plot(x='Hours',y='Scores',style='o')
plt.title('Hours vs precentage')
plt.xlabel('Hours Studied')
plt.ylabel('Precentage Score')
plt.plot(x_test,y_pred,color='blue',linewidth=3)
plt.show()
and this for SMOTE
import numpy as np
import pandas as pd
from sklearn import svm
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
data = pd.read_csv("")
print(data.shape)
x=data.iloc[: , :-1]
y=data.iloc[:,-1]
x_train , x_test , y_train,y_test = train_test_split(x,y,test_size=0.3)
print('#############################')
print("Before overSampling # 1 = " , sum(y_train==1))
print("Before overSampling # 0 = " , sum(y_train==0))
sm=SMOTE()
x_res,y_res= sm.fit_resample(x_train,y_train)
print('#############################')
print("After overSampling # 1 = " , sum(y_res==1))
print("After overSampling # 0 = " , sum(y_res==0))
and this for Decision Tree
################# To Tranform data from male and female to 0 and 1
# from sklearn.preprocessing import LabelEncoder
# label_encoder = LabelEncoder()
# data.sex=label_encoder.fit_transform(data.sex)
import pandas as pd
from sklearn import tree
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
pima= pd.read_csv("pima-indians-diabetes.csv")
x = pima.drop("class",axis=1)
y=pima['class']
x_train,x_test,y_train,y_test= train_test_split(x,y,test_size=0.3)
from sklearn.tree import DecisionTreeClassifier
dtree=DecisionTreeClassifier()
clf = dtree.fit(x_train,y_train)
pred=clf.predict(x_test)
from sklearn.metrics import accuracy_score
accuracy_score(pred,y_test)
plt.figure(figsize=(12,12))
tree.plot_tree(clf, fontsize=6)
plt.savefig('tree.png', dpi=100)
plt.show()
dtree= DecisionTreeClassifier(criterion='gini')
dtree.fit(x_train,y_train)
pred=dtree.predict(x_test)
print("Criterion = gini : ",accuracy_score(y_test,pred))
dtree= DecisionTreeClassifier(criterion='entropy')
dtree.fit(x_train,y_train)
pred=dtree.predict(x_test)
print("Criterion = entropy : ",accuracy_score(y_test,pred))
first code for svm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
dataset= pd.read_csv('your file')
x=dataset.iloc[:,:-1].values
y=dataset.iloc[:,1].values
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=.4)
print(x_train)
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(x_train,y_train)
print("Intercept : ",regressor.intercept_)
print("Coef : ",regressor.coef_)
y_pred = regressor.predict(x_test)
df=pd.DataFrame({'Actual':y_test , 'Predict' : y_pred})
print(df)
from sklearn import metrics
print("Mean Absoulte Error : " ,metrics.mean_absolute_error(y_test,y_pred))
print("Mean squared Error : " ,metrics.mean_squared_error(y_test,y_pred))
print("Root Mean squared Error : " ,np.sqrt(metrics.mean_squared_error(y_test,y_pred)))
dataset.plot(x='Hours',y='Scores',style='o')
plt.title('Hours vs precentage')
plt.xlabel('Hours Studied')
plt.ylabel('Precentage Score')`enter code here`
plt.plot(x_test,y_pred,color='blue',linewidth=3)
plt.show()
and this for SMOTE
import numpy as np
import pandas as pd
from sklearn import svm
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
data = pd.read_csv("")
print(data.shape)
x=data.iloc[: , :-1]
y=data.iloc[:,-1]
x_train , x_test , y_train,y_test = train_test_split(x,y,test_size=0.3)
print('#############################')
print("Before overSampling # 1 = " , sum(y_train==1))
print("Before overSampling # 0 = " , sum(y_train==0))
sm=SMOTE()
x_res,y_res= sm.fit_resample(x_train,y_train)
print('#############################')
print("After overSampling # 1 = " , sum(y_res==1))
print("After overSampling # 0 = " , sum(y_res==0))
and this for Decision Tree
################# To Tranform data from male and female to 0 and 1
# from sklearn.preprocessing import LabelEncoder
# label_encoder = LabelEncoder()
# data.sex=label_encoder.fit_transform(data.sex)
import pandas as pd
from sklearn import tree
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
pima= pd.read_csv("pima-indians-diabetes.csv")
x = pima.drop("class",axis=1)
y=pima['class']
x_train,x_test,y_train,y_test= train_test_split(x,y,test_size=0.3)
from sklearn.tree import DecisionTreeClassifier
dtree=DecisionTreeClassifier()
clf = dtree.fit(x_train,y_train)
pred=clf.predict(x_test)
from sklearn.metrics import accuracy_score
accuracy_score(pred,y_test)
plt.figure(figsize=(12,12))
tree.plot_tree(clf, fontsize=6)
plt.savefig('tree.png', dpi=100)
plt.show()
dtree= DecisionTreeClassifier(criterion='gini')
dtree.fit(x_train,y_train)
pred=dtree.predict(x_test)
print("Criterion = gini : ",accuracy_score(y_test,pred))
dtree= DecisionTreeClassifier(criterion='entropy')
dtree.fit(x_train,y_train)
pred=dtree.predict(x_test)
print("Criterion = entropy : ",accuracy_score(y_test,pred))