MACHINE LEARNING ASSIGNMENTS PROGRAMS
AASIN 1
DATA PREPRATION
import pandas as pd
from sklearn.model_selection import train_test_split
# to read the csv file
filename = 'Heart.csv'
data = pd.read_csv(filename)
# Printing the summary of the dataset i.e first five rows
print("The summary of data")
print(data.head())
print("")
# Finding Shape of dataset (No. of rows and cloumns )
print('\nShape of dataset\t:', data.shape)
print("")
# Finding the null values form dataset
print("Null values present in each column")
print(data.isnull().sum())
print("")
# Finding the datatypes
print("Data types of the columns are")
print(data.dtypes)
print("")
# Finding out zero's
zero = data[data == 0]
print(zero)
print(zero.count())
print("")
# finding the mean age of paitents
print("Mean age of the patient is")
print(data['Age'].mean())
print("")
# Extracting the specific colums
newdata = data[['Age', 'Sex', 'ChestPain', 'RestBP', 'Chol']]
print(newdata)
print("")
# dividing data in training and testing
train, test = train_test_split(data, random_state=0, test_size=0.25)
print(train.shape)
print(test.shape)
# confusion matrix
import numpy as np
# creating a random dataset
actual = list(np.ones(45)) + list(np.zeros((55)))
# print(np.array(actual))
predicted = list(np.ones(40)) + list(np.zeros(52)) + list(np.ones(8))
# print(np.array(predicted))
from sklearn.metrics import ConfusionMatrixDisplay
print(ConfusionMatrixDisplay.from_predictions(actual, predicted))
from sklearn.metrics import classification_report
print(classification_report(actual, predicted))
from sklearn.metrics import accuracy_score
print(accuracy_score(actual, predicted))
ASS 2
REGRESSION TECHNIQUE
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
# reading csv file
filename = "temperatures.csv"
data = pd.read_csv(filename)
#summary of data
print(data.head)
#input data
x = data['YEAR']
#output data
y= data['DEC']
# reshaping the data for regression
print(x.shape)
x = x.values
x= x.reshape(117, 1)
print(x.shape)
# Linear regression
regress = LinearRegression() # creating the class of the object
regress.fit(x, y) # training a model
# prediction of tempreture
predicted = regress.predict(x)
print(predicted)
#Mean Absolute Error, Mean squared error, R-Square metrics
mae = np.mean(abs(y-predicted))
print("Mean Absolute Error is : ",mae)
mse = np.mean((y - predicted)**2)
print("Mean squared Error is : ", mse)
from sklearn.metrics import r2_score
rsm = r2_score(y, predicted)
print("R-Square Metrics : ", rsm)
# visualization
import seaborn as sns
sns.regplot(x='YEAR', y='DEC', data=data)
plt.show()
ASS 3
CLASSIFICATION TECHNIQUE
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# reading csv file
filename = 'Admission_Predict.csv'
data = pd.read_csv(filename)
# summary
print(data.head())
print(data.columns)
# converting 0..... valuest to 0 and 1 i.e data pre-processing
from sklearn.preprocessing import Binarizer
bi = Binarizer(threshold=0.75)
data['Chance of Admit '] = bi.fit_transform(data[['Chance of Admit ']])
print(data.head())
x = data.drop('Chance of Admit ', axis=1)
y = data['Chance of Admit ']
print(y.value_counts())
# data preparation train test split
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=0, test_size=0.25)
# decision tree
from sklearn.tree import DecisionTreeClassifier
classifier = DecisionTreeClassifier(random_state=0)
classifier.fit(x_train, y_train)
y_pred = classifier.predict(x_test)
# confusion matrix
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, accuracy_score, classification_report
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix \n: ",cm)
ConfusionMatrixDisplay.from_predictions(y_test, y_pred)
accuracy_score(y_test, y_pred)
print(classification_report(y_test, y_pred))
plt.show()
Ass.4
CLUSTERING TECHNIQUE
import matplotlib.pyplot as plt
import pandas as pd
# Reading the csv file
filename = "Mall_Customers.xls"
data = pd.read_csv(filename)
x = data.iloc[:,3:]
print(data.head())
from sklearn.cluster import KMeans, AgglomerativeClustering
# kmeans algorithm and elbow method
sse = []
for i in range(1, 11):
km = KMeans(n_clusters=i)
km.fit(x)
sse.append(km.inertia_)
# daigram for elbow method
plt.title('Elbow Method')
plt.xlabel('Value of K')
plt.ylabel('SSE')
plt.plot(range(1, 11), sse)
plt.show()
#k-mean clustering
km = KMeans(n_clusters=5, random_state=0)
labels = km.fit_predict(x)
cent = km.cluster_centers_ # identifying the centroids
plt.title('Clustered Data Using K- Means Algorithm')
plt.xlabel('Annual Income')
plt.ylabel('Spending Score')
plt.scatter(x['Annual Income (k$)'], x['Spending Score (1-100)'], c= labels)
plt.scatter(cent[:,0], cent[:,1], s=100, color='k')
plt.show()
# agloromerative clustering
agl = AgglomerativeClustering(n_clusters=5)
alabels= agl.fit_predict(x)
plt.title('Clustered Data Using Agglomerative Clustering algorithm')
plt.xlabel('Annual Income')
plt.ylabel('Spending Score')
plt.scatter(x['Annual Income (k$)'], x['Spending Score (1-100)'], c= alabels)
plt.scatter(cent[:,0], cent[:,1], s=100, color='k')
plt.show()
# dendogram Optional
import scipy.cluster.hierarchy as sch
dendrogram = sch.dendrogram(sch.linkage(x, method = 'ward'))
plt.title('Dendrogram')
plt.xlabel('Customers')
plt.ylabel('Euclidean distances')
plt.show()
0 Comments