MACHINE LEARNING





MACHINE LEARNING ASSIGNMENTS PROGRAMS

 AASIN 1 

DATA PREPRATION


import pandas as pd

from sklearn.model_selection import train_test_split


# to read the csv file

filename = 'Heart.csv'

data = pd.read_csv(filename)


# Printing the summary of the dataset i.e first five rows

print("The summary of data")

print(data.head())

print("")


# Finding Shape of dataset (No. of rows and cloumns )

print('\nShape of dataset\t:', data.shape)

print("")


# Finding the null values form dataset

print("Null values present in each column")

print(data.isnull().sum())

print("")


# Finding the datatypes

print("Data types of the columns are")

print(data.dtypes)

print("")


# Finding out zero's

zero = data[data == 0]

print(zero)

print(zero.count())

print("")


# finding the mean age of paitents

print("Mean age of the patient is")

print(data['Age'].mean())

print("")


# Extracting the specific colums

newdata = data[['Age', 'Sex', 'ChestPain', 'RestBP', 'Chol']]

print(newdata)

print("")


# dividing data in training and testing

train, test = train_test_split(data, random_state=0, test_size=0.25)

print(train.shape)

print(test.shape)


# confusion matrix

import numpy as np


# creating a random dataset

actual = list(np.ones(45)) + list(np.zeros((55)))

# print(np.array(actual))


predicted = list(np.ones(40)) + list(np.zeros(52)) + list(np.ones(8))

# print(np.array(predicted))


from sklearn.metrics import ConfusionMatrixDisplay

print(ConfusionMatrixDisplay.from_predictions(actual, predicted))


from sklearn.metrics import classification_report

print(classification_report(actual, predicted))


from sklearn.metrics import accuracy_score

print(accuracy_score(actual, predicted))

ASS 2

REGRESSION TECHNIQUE

import pandas as pd

import numpy as np

from sklearn.linear_model import LinearRegression

import matplotlib.pyplot as plt

# reading csv file

filename = "temperatures.csv"

data = pd.read_csv(filename)


#summary of data

print(data.head)


#input data

x = data['YEAR']

#output data

y= data['DEC']


# reshaping the data for regression

print(x.shape)

x = x.values

x= x.reshape(117, 1)

print(x.shape)


# Linear regression

regress = LinearRegression()   # creating the class of the object

regress.fit(x, y)              # training a model

 # prediction of tempreture

predicted = regress.predict(x)

print(predicted)


#Mean Absolute Error, Mean squared error, R-Square metrics

mae = np.mean(abs(y-predicted))

print("Mean Absolute Error is : ",mae)

mse = np.mean((y - predicted)**2)

print("Mean squared Error is : ", mse)

from sklearn.metrics import r2_score

rsm = r2_score(y, predicted)

print("R-Square Metrics : ", rsm)


# visualization

import seaborn as sns

sns.regplot(x='YEAR', y='DEC', data=data)

plt.show()

ASS 3 

CLASSIFICATION TECHNIQUE

import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

import seaborn as sns

# reading csv file

filename = 'Admission_Predict.csv'

data = pd.read_csv(filename)


# summary

print(data.head())

print(data.columns)

 # converting 0..... valuest to 0 and 1 i.e data pre-processing

from sklearn.preprocessing import Binarizer

bi = Binarizer(threshold=0.75)

data['Chance of Admit '] = bi.fit_transform(data[['Chance of Admit ']])

print(data.head())


x = data.drop('Chance of Admit ', axis=1)

y = data['Chance of Admit ']

print(y.value_counts())


# data preparation train test split

from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=0, test_size=0.25)


# decision tree

from sklearn.tree import DecisionTreeClassifier

classifier = DecisionTreeClassifier(random_state=0)

classifier.fit(x_train, y_train)


y_pred = classifier.predict(x_test)


# confusion matrix

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, accuracy_score, classification_report

cm = confusion_matrix(y_test, y_pred)

print("Confusion Matrix \n: ",cm)

ConfusionMatrixDisplay.from_predictions(y_test, y_pred)

accuracy_score(y_test, y_pred)

print(classification_report(y_test, y_pred))

plt.show()

Ass.4

CLUSTERING TECHNIQUE

import matplotlib.pyplot as plt

import pandas as pd


# Reading the csv file

filename = "Mall_Customers.xls"

data = pd.read_csv(filename)

x = data.iloc[:,3:]

print(data.head())


from sklearn.cluster import KMeans, AgglomerativeClustering

# kmeans algorithm and elbow method

sse = []

for i in range(1, 11):

 km = KMeans(n_clusters=i)

 km.fit(x)

 sse.append(km.inertia_)


# daigram for elbow method

plt.title('Elbow Method')

plt.xlabel('Value of K')

plt.ylabel('SSE')

plt.plot(range(1, 11), sse)

plt.show()


#k-mean clustering

km = KMeans(n_clusters=5, random_state=0)

labels = km.fit_predict(x)

cent = km.cluster_centers_  # identifying the centroids


plt.title('Clustered Data Using K- Means Algorithm')

plt.xlabel('Annual Income')

plt.ylabel('Spending Score')

plt.scatter(x['Annual Income (k$)'], x['Spending Score (1-100)'], c= labels)

plt.scatter(cent[:,0], cent[:,1], s=100, color='k')

plt.show()


# agloromerative clustering

agl = AgglomerativeClustering(n_clusters=5)

alabels= agl.fit_predict(x)



plt.title('Clustered Data Using Agglomerative Clustering algorithm')

plt.xlabel('Annual Income')

plt.ylabel('Spending Score')

plt.scatter(x['Annual Income (k$)'], x['Spending Score (1-100)'], c= alabels)

plt.scatter(cent[:,0], cent[:,1], s=100, color='k')

plt.show()


# dendogram Optional

import scipy.cluster.hierarchy as sch

dendrogram = sch.dendrogram(sch.linkage(x, method = 'ward'))

plt.title('Dendrogram')

plt.xlabel('Customers')

plt.ylabel('Euclidean distances')

plt.show()


Post a Comment

0 Comments