!/usr/bin/env python3
–– coding: utf-8 ––
“””
Created on Thu Jan 3 22:08:15 2019
@author: zenzen
“””
import csv
with open(‘finds.csv’) as csv_file:
data = list(csv.reader(csv_file))
h = [‘0′,’0′,’0′,’0′,’0′,’0’]
for row in data:
if row[-1]==’Yes’:
j=0
for col in row:
if col != ‘Yes’:
if col != h[j] and h[j] == ‘0’:
h[j] = col
elif col != h[j] and h[j] != ‘0’ :
h[j] = ‘?’
j = j+1
print(“The maximum hypothesis is:”,h)
Microsoft Word – 1. ISE71 – ML List of lab programs.docx
DEPARTMENT OF INFORMATION SCIENCE &
ENGINEERING
COURSE NAME: FUNDAMENTALS OF MACHINE LEARNING
COURSE CODE:ISE71
- Implement and demonstrate the FIND-S algorithm for finding the most specific hypothesis based on a given set of training data samples. Read the training data from a .CSV file.
- For a given set of training data examples stored in a .CSV file, implement and demonstrate the Candidate-Elimination algorithm to output a description of the set of all hypotheses consistent with the training examples.
- Develop a program to demonstrate the prediction of values of a given dataset using Linear regression
- Develop a program to demonstrate the prediction of values of a given dataset using logistic regression techniques
- Develop a program to demonstrate the working of the decision tree based ID3 algorithm. Use an appropriate data set for building the decision tree and apply this knowledge to classify a new sample.
- Develop a program to implement the naïve Bayesian Classifier for a sample training data set stored as a .CSV file. Compute the accuracy of the classifier, considering few test data sets.
- Assuming a set of documents that need to be classified, use the naïve Bayesian Classifier model to perform this task. Calculate the accuracy, precision, and recall for your data set.
- Develop a program to construct a Bayesian network considering medical data. Use this model to demonstrate the diagnosis of heart patients using standard Heart Disease Data Set.
- Develop a program to construct Support Vector Machine considering a Sample Dataset.
10. Apply K-Means algorithm to cluster a set of data stored in a .CSV file. You can add Python ML library classes/API in the program.
11. Develop a program to implement K-Nearest Neighbour algorithm to classify the iris data set. Print both correct and wrong predictions.
12. Develop a program to Implement ANN for a sample dataset
1. Find – S
import csv
hypo = [‘%’,’%’,’%’,’%’,’%’,’%’
with open(‘finds.csv’) as csv_file:
readcsv=csv.reader(csv_file,delimiter=’,’) print(readcsv)
data=[]
print(“\n The given training examples are :”) for row in readcsv:
print(row)
if row[len(row)-1].upper()==”YES”:
data.append(row)
print(“\n The positive examples are:”) for x in data
print(x) print(“\n”)
TotalExamples=len(data);
i=0; j=0; k=0;
print(“the steps of Find-S algorithm are\n”,hypo)
list=[]
p=o
d=len(data[p]-1)
for j in range(d)
list.append(data[i][j]) hypo=list
i=1
for i in range(TotalExamples):
for k in range(d):
if hypo[k]!=data[i][k]:
hypo[k]=’?’
k=k+1 else:
hypo[k]
print(hypo)
print(“\n The maximally specific Find-S hypothesis:”)
list=[]
for i in range(d)
list.append(hypo[i]) print(list)
Data Set
Sky Sunny Sunny Cloudy Sunny
Airtemp Humidity W arm Normal W arm High Cold High Cold High
Wind Strong Strong Strong Strong
Water W arm W arm Warm Cool
Forecast Same Same Change Change
WaterSport
Y es
Y es
No
Yes
2. Candidate Elimination import numpy as np
import pandas as pd
# Loading Data from a CSV File
data = pd.DataFrame(data=pd.read_csv(‘candidate.csv’))
# Separating concept features from Target
concepts = np.array(data.iloc[:,0:-1])
# Isolating target into a separate DataFrame
target = np.array(data.iloc[:,-1])
def learn(concepts, target):
”’
learn() function implements the learning method of the Candidate elimination algorithm.
Arguments:
concepts – a data frame with all the features
target – a data frame with corresponding output values
# Initialise S0 with the first instance from concepts
# .copy() makes sure a new list is created instead of just pointing to the same memory location ”’
specific_h = concepts[0].copy()
#
#
#
#
#
#
#
#
#
general_h = [[“?” for i in range(len(specific_h))] for i in
Initialises G0 using list comprehension
Creates as many lists inside as there are arguments,
that which later will be replaced with actual parameters
G0 = [[‘?’, ‘?’, ‘?’, ‘?’, ‘?’, ‘?’],
[‘?’, ‘?’, ‘?’, ‘?’, ‘?’, ‘?’], [‘?’, ‘?’, ‘?’, ‘?’, ‘?’, ‘?’], [‘?’, ‘?’, ‘?’, ‘?’, ‘?’, ‘?’], [‘?’, ‘?’, ‘?’, ‘?’, ‘?’, ‘?’], [‘?’, ‘?’, ‘?’, ‘?’, ‘?’, ‘?’]]
range(len(specific_h))]
# The learning iterations
for i, h in enumerate(concepts):
# Checking if the hypothesis has a positive target
if target[i] == “Yes”:
for x in range(len(specific_h)):
# Change values in S & G only if values change
if h[x] != specific_h[x]: specific_h[x] = ‘?’ general_h[x][x] = ‘?’
# Checking if the hypothesis has a positive target
if target[i] == “No”:
for x in range(len(specific_h)):
# For negative hyposthesis change values only in G
if h[x] != specific_h[x]: general_h[x][x] = specific_h[x]
else:
general_h[x][x] = ‘?’
# find indices where we have empty rows, meaning those that are unchanged
indices = [i for i,val in enumerate(general_h) if val == [‘?’, ‘?’, ‘?’, ‘?’, ‘?’, ‘?’]]
for i in indices:
# remove those rows from general_h
general_h.remove([‘?’, ‘?’, ‘?’, ‘?’, ‘?’, ‘?’])
# Return final values
return specific_h, general_h
s_final, g_final = learn(concepts, target)
print(“Final S:”, s_final, sep=”\n”)
print(“Final G:”, g_final, sep=”\n”)
data.head()
Data for Candidate
Sky Sunny Sunny
Airtemp Humidity Warm Normal Warm High
Wind Water Strong Warm Strong Warm
Forecast WaterSport
Same Yes
Same Yes
Cloudy Cold High Sunny Cold High
3. Linear Regression import numpy as np
import matplotlib.pyplot as plt
Strong Warm Strong Cool
Change No Change Yes
def estimate_coef(x, y):
# number of observations/points
n = np.size(x)
# mean of x and y vector
m_x, m_y = np.mean(x), np.mean(y)
# calculating cross-deviation and deviation about x
SS_xy = np.sum(y*x – n*m_y*m_x)
SS_xx = np.sum(x*x – n*m_x*m_x)
# calculating regression coefficients
b_1 = SS_xy / SS_xx
b_0 = m_y – b_1*m_x
return(b_0, b_1)
def plot_regression_line(x, y, b):
# plotting the actual points as scatter plot
plt.scatter(x, y, color = “m”,
marker = “o”, s = 30) # predicted response vector
y_pred = b[0] + b[1]*x
# plotting the regression line
plt.plot(x, y_pred, color = “g”)
# putting labels
plt.xlabel(‘x’)
plt.ylabel(‘y’)
# function to show plot
plt.show()
# observations
x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
y = np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12])
# estimating coefficients
b = estimate_coef(x, y)
print(“Estimated coefficients:\nb_0 = {}\nb_1 = {}”.format(b[0], b[1]))
# plotting regression line
plot_regression_line(x, y, b)
4. Logistic Regression
import csv
import numpy as np
import matplotlib.pyplot as plt
def loadCSV(filename):
”’
function to load dataset
”’
with open(filename,”r”) as csvfile:
lines = csv.reader(csvfile)
dataset = list(lines)
for i in range(len(dataset)):
dataset[i] = [float(x) for x in dataset[i]] return np.array(dataset)
def normalize(X): ”’
function to normalize feature matrix, X
”’
mins = np.min(X,axis=0)
maxs = np.max(X,axis=0)
rng = maxs – mins
norm_X = 1 – ((maxs – X)/rng) return norm_X
def logistic_func(beta, X): ”’
logistic(sigmoid) function
”’
return 1.0/(1 + np.exp(-np.dot(X, beta.T)))
def log_gradient(beta, X, y): ”’
logistic gradient function
”’
first_calc = logistic_func(beta, X) – y.reshape(X.shape[0], -1)
final_calc = np.dot(first_calc.T, X)
return final_calc
def cost_func(beta, X, y): ”’
cost function, J
”’
log_func_v = logistic_func(beta, X)
y = np.squeeze(y)
step1 = y * np.log(log_func_v)
step2 = (1 – y) * np.log(1 – log_func_v)
final = -step1 – step2
return np.mean(final)
def grad_desc(X, y, beta, lr=.01, converge_change=.001): ”’
gradient descent function
”’
cost = cost_func(beta, X, y)
change_cost = 1
num_iter = 1
while(change_cost > converge_change):
old_cost = cost
beta = beta – (lr * log_gradient(beta, X, y))
cost = cost_func(beta, X, y)
change_cost = old_cost – cost
num_iter += 1
return beta, num_iter
def pred_values(beta, X): ”’
function to predict labels
”’
pred_prob = logistic_func(beta, X)
pred_value = np.where(pred_prob >= .5, 1, 0)
return np.squeeze(pred_value)
def plot_reg(X, y, beta): ”’
function to plot decision boundary
”’
# labelled observations
x_0 = X[np.where(y == 0.0)]
x_1 = X[np.where(y == 1.0)]
# plotting points with diff color for diff label
plt.scatter([x_0[:, 1]], [x_0[:, 2]], c=’b’, label=’y = 0′)
plt.scatter([x_1[:, 1]], [x_1[:, 2]], c=’r’, label=’y = 1′)
# plotting decision boundary
x1 = np.arange(0,1,0.1)
x2 = -(beta[0,0] + beta[0,1]*x1)/beta[0,2]
plt.plot(x1, x2, c=’k’, label=’reg line’)
plt.xlabel(‘x1’)
plt.ylabel(‘x2’)
plt.legend() plt.show()
# load the dataset
dataset = loadCSV(‘logistic_data.csv’)
# normalizing feature matrix
X = normalize(dataset[:,:-1])
# stacking columns wth all ones in feature matrix
X = np.hstack((np.matrix(np.ones(X.shape[0])).T,X))
# response vector
y = dataset[:,-1]
# initial beta values
beta = np.matrix(np.zeros(X.shape[1]))
# beta values after running gradient descent
beta,num_iter = grad_desc(X,y,beta)
# estimated beta values and number of iterations
print(“Estimated regression coefficients:”,beta) print(“No. of iterations:”,num_iter)
# predicted labels
y_pred = pred_values(beta,X)
# number of correctly predicted labels
print(“Correctly predicted labels:”, np.sum(y == y_pred)) # plotting regression line
plot_reg(X, y, beta)
Data Set
4.5192 2.6487 1 2.4443 1.5438 1 4.2409 1.899 1 5.8097 2.4711 1 6.4423 3.359 1 5.8097 3.2406 1
6.3917 3.8128 1 6.8725 4.4441 1 6.7966 3.6747 1 8.163 4.7401 1 7.4038 3.8917 1 7.6316 4.602 1 7.7581 5.7265 1 6.5688 4.9571 1 5.3543 3.9903 1 4.4686 3.0236 1 2.9757 2.0568 1 2.4443 1.2676 1 0.9008 1.169 1 2.1154 1.7411 1 3.2794 1.386 1 4.165 1.5636 1 4.8482 1.8793 1 3.33 2.7868 1 5.1518 3.5563 1 6.2652 4.0693 1 6.2652 4.3849 1 7.2014 1.5438 1 7.6569 2.412 1 6.1387 1.7806 1 4.4939 1.4057 1 4.8735 2.6093 1 5.5314 3.0828 1 6.0121 3.9311 1 7.1508 4.7598 1 7.7075 5.3122 1 8.3148 5.7068 1 8.5172 5.1149 1 8.7449 5.4109 1 7.8593 3.8128 1 6.999 3.2406 1 5.5061 2.9052 1 4.9241 2.6882 1 6.6447 3.8325 1 7.6822 4.5428 1 8.0364 5.7857 1
8.9221 6.5552 1 7.8593 5.253 1 6.5941 5.2333 1 6.0374 4.7598 1 2.7227 4.5822 0 1.9383 3.6549 0 1.6852 2.9841 0 4.3168 4.4244 0 3.4312 3.7536 0 5.4808 5.2728 0 4.1144 4.8387 0 3.2034 4.4244 0 4.1144 5.3911 0 5.1012 6.0817 0 4.8988 5.5687 0 5.9615 6.4565 0 5.7591 6.0028 0 6.6953 6.7722 0 5.7338 6.6538 0 6.6194 7.1471 0 7.2014 7.5219 0 7.2014 6.8314 0 8.5931 7.6206 0 7.7581 7.1865 0 7.7581 7.7784 0 5.1012 7.6009 0 4.2156 6.496 0 3.4818 5.8055 0 2.3684 5.0163 0 1.7864 4.1876 0 0.9008 3.4379 0 0.9008 5.7857 0 1.9636 6.3382 0 1.4069 4.9571 0 2.419 6.8511 0 2.8745 6.0817 0 4.0132 7.1668 0 4.6711 7.226 0 5.1771 8.1533 0 6.2146 7.4825 0
5.4555 7.0484 0 5.9868 8.5084 0 4.0891 7.5417 0 2.3937 7.2063 0 1.331 6.5355 0 1.7358 5.4503 0 2.4443 5.8449 0 3.1781 4.8979 0 4.6711 5.8055 0 5.9868 7.3641 0 4.6711 6.2592 0 7.581 8.3703 0 4.6457 8.5676 0 4.6457 8.1676 0
5. ID3- Algorithm
Data_loader.py
import csv
def read_data(filename):
with open(filename, ‘r’) as csvfile:
datareader = csv.reader(csvfile, delimiter=’,’)
headers = next(datareader)
metadata = []
traindata = []
for name in headers:
metadata.append(name)
for row in datareader: traindata.append(row) return (metadata, traindata)
id3.py
import numpy as np
import math
from data_loader import read_data
class Node:
def __init__(self, attribute): self.attribute = attribute self.children = [] self.answer = “”
def __str__(self): return self.attribute
def subtables(data, col, delete): dict = {}
items = np.unique(data[:, col])
count = np.zeros((items.shape[0], 1), dtype=np.int32)
for x in range(items.shape[0]):
for y in range(data.shape[0]): if data[y, col] == items[x]:
count[x] += 1
for x in range(items.shape[0]):
dict[items[x]] = np.empty((int(count[x]), data.shape[1]), dtype=”|S32″)
pos = 0
for y in range(data.shape[0]):
if data[y, col] == items[x]: dict[items[x]][pos] = data[y] pos += 1
if delete:
dict[items[x]] = np.delete(dict[items[x]], col, 1)
return items, dict def entropy(S):
items = np.unique(S) if items.size == 1:
return 0
counts = np.zeros((items.shape[0], 1))
sums = 0
for x in range(items.shape[0]):
counts[x] = sum(S == items[x]) / (S.size * 1.0) for count in counts:
sums += -1 * count * math.log(count, 2) return sums
def gain_ratio(data, col):
items, dict = subtables(data, col, delete=False)
total_size = data.shape[0]
entropies = np.zeros((items.shape[0], 1))
intrinsic = np.zeros((items.shape[0], 1))
for x in range(items.shape[0]):
ratio = dict[items[x]].shape[0]/(total_size * 1.0) entropies[x] = ratio * entropy(dict[items[x]][:, -1]) intrinsic[x] = ratio * math.log(ratio, 2)
total_entropy = entropy(data[:, -1])
iv = -1 * sum(intrinsic)
for x in range(entropies.shape[0]):
total_entropy -= entropies[x] return total_entropy / iv
def create_node(data, metadata):
if (np.unique(data[:, -1])).shape[0] == 1:
node = Node(“”)
node.answer = np.unique(data[:, -1])[0]
return node
gains = np.zeros((data.shape[1] – 1, 1)) for col in range(data.shape[1] – 1):
gains[col] = gain_ratio(data, col)
split = np.argmax(gains)
node = Node(metadata[split])
metadata = np.delete(metadata, split, 0)
items, dict = subtables(data, split, delete=True)
for x in range(items.shape[0]):
child = create_node(dict[items[x]], metadata)
node.children.append((items[x], child)) return node
def empty(size): s = “”
for x in range(size): s+=” ”
return s
def print_tree(node, level):
if node.answer != “”: print(empty(level), node.answer) return
print(empty(level), node.attribute) for value, n in node.children:
print(empty(level + 1), value)
print_tree(n, level + 2)
metadata, traindata = read_data(“tennis.csv”)
data = np.array(traindata)
node = create_node(data, metadata)
print_tree(node, 0)
DataSet
outlook sunny sunny overcast rain rain rain overcast sunny sunny rain
temperature humidity wind answer
hot high weak no
hot high strong no
hot high weak yes
mild high weak yes
cool normal weak yes
cool normal strong no
cool normal strong yes
mild high weak no
cool normal weak yes
mild normal weak yes
sunny mild overcast mild overcast hot rain mild
normal strong yes high strong yes normal weak yes high strong no
6. Naïve Bayes Algorithm
print(“\nNaive Bayes Classifier for concept learning problem”)
import csv
import math
def safe_div(x,y):
if y == 0: return 0 return x / y
def loadCsv(filename):
lines = csv.reader(open(filename))
dataset = list(lines)
for i in range(len(dataset)):
dataset[i] = [float(x) for x in dataset[i]]
return dataset
def splitDataset(dataset, splitRatio):
trainSize = int(len(dataset) * splitRatio)
trainSet = []
copy = list(dataset)
i=0
while len(trainSet) < trainSize:
#index = random.randrange(len(copy))
trainSet.append(copy.pop(i))
return [trainSet, copy]
def separateByClass(dataset):
separated = {}
for i in range(len(dataset)):
vector = dataset[i]
if (vector[-1] not in separated): separated[vector[-1]] = [] separated[vector[-1]].append(vector) return separated
def mean(numbers):
return safe_div(sum(numbers),float(len(numbers)))
def stdev(numbers):
avg = mean(numbers)
variance = safe_div(sum([pow(x-avg,2) for x in
numbers]),float(len(numbers)-1))
return math.sqrt(variance)
def summarize(dataset):
summaries = [(mean(attribute), stdev(attribute)) for attribute in
zip(*dataset)]
del summaries[-1]
return summaries
def summarizeByClass(dataset):
separated = separateByClass(dataset)
summaries = {}
for classValue, instances in separated.items():
summaries[classValue] = summarize(instances)
return summaries
def calculateProbability(x, mean, stdev): exponent = math.exp(-safe_div(math.pow(x- mean,2),(2*math.pow(stdev,2))))
final = safe_div(1 , (math.sqrt(2*math.pi) * stdev)) * exponent return final
def calculateClassProbabilities(summaries, inputVector):
probabilities = {}
for classValue, classSummaries in summaries.items():
probabilities[classValue] = 1
for i in range(len(classSummaries)):
mean, stdev = classSummaries[i]
x = inputVector[i]
probabilities[classValue] *= calculateProbability(x, mean, stdev)
return probabilities
def predict(summaries, inputVector):
probabilities = calculateClassProbabilities(summaries, inputVector)
bestLabel, bestProb = None, -1
for classValue, probability in probabilities.items():
if bestLabel is None or probability > bestProb:
bestProb = probability
bestLabel = classValue
return bestLabel
def getPredictions(summaries, testSet):
predictions = []
for i in range(len(testSet)):
result = predict(summaries, testSet[i])
predictions.append(result)
return predictions
def getAccuracy(testSet, predictions):
correct = 0
for i in range(len(testSet)):
if testSet[i][-1] == predictions[i]:
correct += 1
accuracy = safe_div(correct,float(len(testSet))) * 100.0
return accuracy
def main():
filename = ‘tennis_naive.csv’
splitRatio = 0.9
dataset = loadCsv(filename)
trainingSet, testSet = splitDataset(dataset, splitRatio)
print(‘Split {0} rows into’.format(len(dataset)))
print(‘Number of Training data: ‘ + (repr(len(trainingSet))))
print(‘Number of Test Data: ‘ + (repr(len(testSet))))
print(“\nThe values assumed for the concept learning attributes are\n”)
print(“OUTLOOK=> Sunny=1 Overcast=2
Rain=3\nTEMPERATURE=> Hot=1 Mild=2 Cool=3\nHUMIDITY=>
High=1 Normal=2\nWIND=> Weak=1 Strong=2”)
print(“TARGET CONCEPT:PLAY TENNIS=> Yes=10 No=5”)
print(“\nThe Training set are:”)
for x in trainingSet:
print(x)
print(“\nThe Test data set are:”)
for x in testSet:
print(x)
print(“\n”)
# prepare model
summaries = summarizeByClass(trainingSet)
# test model
predictions = getPredictions(summaries, testSet)
actual = []
for i in range(len(testSet)):
vector = testSet[i]
actual.append(vector[-1])
# Since there are five attribute values, each attribute constitutes to
20% accuracy. So if all attributes
#match with predictions then 100% accuracy
print(‘Actual values: {0}%’.format(actual))
print(‘Predictions: {0}%’.format(predictions))
accuracy = getAccuracy(testSet, predictions) print(‘Accuracy: {0}%’.format(accuracy))
main()
DataSet 11115
11125 2 1 1 1 10 3 2 1 1 10 3 3 2 1 10 33225 2 3 2 2 10 12115 1 3 2 1 10 3 2 2 1 10 1 2 2 2 10 2 2 1 2 10 2 1 2 1 10 32125 12125 12125
7. Document Classifier
from sklearn.datasets import fetch_20newsgroups from sklearn.metrics import confusion_matrix from sklearn.metrics import classification_report import numpy as np
categories = [‘alt.atheism’, ‘soc.religion.christian’,’comp.graphics’,
‘sci.med’]
twenty_train =
fetch_20newsgroups(subset=’train’,categories=categories,shuffle=True)
twenty_test =
fetch_20newsgroups(subset=’test’,categories=categories,shuffle=True)
print(len(twenty_train.data))
print(len(twenty_test.data))
print(twenty_train.target_names)
#prints first line of first data file
print(“\n”.join(twenty_train.data[0].split(“\n”)))
#print number of lines
print(twenty_train.target[0])
from sklearn.feature_extraction.text import CountVectorizer
#Each unique word in our dictionary will correspond to a feature
(descriptive feature) & its count
count_vect = CountVectorizer()
#learning the vocabulary dictionary and it returns a Document-Term
matrix. [n_samples, n_features]
X_train_tf = count_vect.fit_transform(twenty_train.data)
print(“DOCUMENT-TERM-MA TRIX”,X_train_tf)
from sklearn.feature_extraction.text import TfidfTransformer
#Term Frequency times inverse document frequency
tfidf_transformer = TfidfTransformer()
X_train_tfidf = tfidf_transformer.fit_transform(X_train_tf)
X_train_tfidf.shape
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score
from sklearn import metrics
mod = MultinomialNB()
mod.fit(X_train_tfidf, twenty_train.target)
X_test_tf = count_vect.transform(twenty_test.data)
X_test_tfidf = tfidf_transformer.transform(X_test_tf)
predicted = mod.predict(X_test_tfidf)
print(“Accuracy:”, accuracy_score(twenty_test.target, predicted))
print(classification_report(twenty_test.target,predicted,target_names=tw
enty_test.target_names))
print(“confusion matrix is
\n”,metrics.confusion_matrix(twenty_test.target, predicted))
8. Bayesian Belief Network
import bayespy as bp
import numpy as np
import csv
from colorama import init
from colorama import Fore, Back, Style
init()
# Define Parameter Enum values
#Age
ageEnum = {‘SuperSeniorCitizen’:0, ‘SeniorCitizen’:1, ‘MiddleAged’:2,
‘Youth’:3, ‘Teen’:4}
# Gender
genderEnum = {‘Male’:0, ‘Female’:1}
# FamilyHistory
familyHistoryEnum = {‘Yes’:0, ‘No’:1}
# Diet(Calorie Intake)
dietEnum = {‘High’:0, ‘Medium’:1, ‘Low’:2}
# LifeStyle
lifeStyleEnum = {‘Athlete’:0, ‘Active’:1, ‘Moderate’:2, ‘Sedetary’:3}
# Cholesterol
cholesterolEnum = {‘High’:0, ‘BorderLine’:1, ‘Normal’:2}
# HeartDisease
heartDiseaseEnum = {‘Yes’:0, ‘No’:1}
#heart_disease_data.csv
with open(‘heart_disease_data.csv’) as csvfile:
lines = csv.reader(csvfile)
dataset = list(lines)
data = []
for x in dataset:
data.append([ageEnum[x[0]],genderEnum[x[1]],familyHistoryEnum[x[2
]],dietEnum[x[3]],lifeStyleEnum[x[4]],cholesterolEnum[x[5]],heartDise
aseEnum[x[6]]])
# Training data for machine learning todo: should import from csv
data = np.array(data) N = len(data)
# Input data column assignment
p_age = bp.nodes.Dirichlet(1.0*np.ones(5)) age = bp.nodes.Categorical(p_age, plates=(N,)) age.observe(data[:,0])
p_gender = bp.nodes.Dirichlet(1.0*np.ones(2)) gender = bp.nodes.Categorical(p_gender, plates=(N,)) gender.observe(data[:,1])
p_familyhistory = bp.nodes.Dirichlet(1.0*np.ones(2)) familyhistory = bp.nodes.Categorical(p_familyhistory, plates=(N,)) familyhistory.observe(data[:,2])
p_diet = bp.nodes.Dirichlet(1.0*np.ones(3)) diet = bp.nodes.Categorical(p_diet, plates=(N,)) diet.observe(data[:,3])
p_lifestyle = bp.nodes.Dirichlet(1.0*np.ones(4)) lifestyle = bp.nodes.Categorical(p_lifestyle, plates=(N,)) lifestyle.observe(data[:,4])
p_cholesterol = bp.nodes.Dirichlet(1.0*np.ones(3)) cholesterol = bp.nodes.Categorical(p_cholesterol, plates=(N,)) cholesterol.observe(data[:,5])
# Prepare nodes and establish edges
# np.ones(2) -> HeartDisease has 2 options Yes/No
# plates(5, 2, 2, 3, 4, 3) -> corresponds to options present for domain
values
p_heartdisease = bp.nodes.Dirichlet(np.ones(2), plates=(5, 2, 2, 3, 4, 3))
heartdisease = bp.nodes.MultiMixture([age, gender, familyhistory, diet,
lifestyle, cholesterol],
bp.nodes.Categorical, p_heartdisease) heartdisease.observe(data[:,6]) p_heartdisease.update()
# Sample Test with hardcoded values
#print(“Sample Probability”)
#print(“Probability(HeartDisease|Age=SuperSeniorCitizen,
Gender=Female, FamilyHistory=Yes, DietIntake=Medium,
LifeStyle=Sedetary, Cholesterol=High)”)
#print(bp.nodes.MultiMixture([ageEnum[‘SuperSeniorCitizen’],
genderEnum[‘Female’], familyHistoryEnum[‘Yes’], dietEnum[‘Medium’],
lifeStyleEnum[‘Sedetary’], cholesterolEnum[‘High’]],
bp.nodes.Categorical,p_heartdisease).get_moments()[0][heartDiseaseEn
um[‘Yes’]])
# Interactive Test
m=0
while m == 0:
print(“\n”)
res = bp.nodes.MultiMixture([int(input(‘Enter Age: ‘ +
str(ageEnum))), int(input(‘Enter Gender: ‘
+ str(genderEnum))), int(input(‘Enter FamilyHistory: ‘ +
str(familyHistoryEnum))),
int(input(‘Enter dietEnum: ‘ + str(dietEnum))), int(input(‘Enter LifeStyle:
‘ + str(lifeStyleEnum))),
int(input(‘Enter Cholesterol: ‘ + str(cholesterolEnum)))],
bp.nodes.Categorical,
p_heartdisease).get_moments()[0][heartDiseaseEnum[‘Yes’]]
print(“Probability(HeartDisease) = ” + str(res)) #print(Style.RESET_ALL)
m = int(input(“Enter for Continue:0, Exit :1 “)) Data Set
SuperSeniorCitize n SuperSeniorCitize n
SeniorCitizen
Teen
Y outh
MiddleAged
Teen
SuperSeniorCitize
n
Y outh
SeniorCitizen
Teen
Teen
MiddleAged
MiddleAged
Y outh SuperSeniorCitize n
SeniorCitizen
Y outh Teen
9. SVM
# -*- coding: utf-8 -*-
Male Yes Female Yes
Male No Male Yes Female Yes Male Yes Male Yes
Male Yes Female Yes Female No Female No Male Yes Female No Male Yes
Female Yes
Male Yes
Female No
Female Yes Male Yes
Medium Medium
High Medium High Medium High
Medium High High Medium Medium High Medium
High
High
Medium
Medium Medium
Sedetary Sedetary
Moderate Sedetary Athlete Active Moderate
Sedetary Athlete Athlete Moderate Sedetary Athlete Active
Athlete
Athlete
Moderate
Athlete Sedetary
High Yes
High Yes
BorderLin
e Yes
Normal No Normal No High Yes High Yes
High Yes Normal No Normal Yes High Yes Normal No High No
High Yes
BorderLin
e No
Normal Yes
BorderLin
e Yes
BorderLin
e No Normal No
“””
Created on Wed Jan 17 23:01:06 2018
@author: SHR “””
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.style as style
from sklearn import svm
style.use(“ggplot”)
X = np.array([[1,2], [5,8],
[1.5,1.8], [8,8], [1,0.6], [9,11], [10,10], [3,2]])
y = [0,1,0,1,0,1,1,0]
clf = svm.SVC(kernel=’linear’)
clf.fit(X,y)
print(“Prediction is:”, clf.predict([[7,4.0]]))
w = clf.coef_[0] print(w)
a = -w[0] / w[1]
xx = np.linspace(0,12)
yy = a * xx – clf.intercept_[0] / w[1]
h0 = plt.plot(xx, yy, ‘k-‘, label=”non weighted div”)
plt.scatter(X[:, 0], X[:, 1], c = y)
plt.legend()
plt.show()
10. kMeans
# -*- coding: utf-8 -*-
""" Created on Wed Oct 24 19:24:13 2018
@author: Vinay """
import matplotlib.pyplot as plt #matplotlib inline import numpy as np from sklearn.cluster import KMeans
X = np.array([[5,3],
[10,15],
[15,12],
[24,10],
[30,45],
[85,70],
[71,80],
[60,78],
[55,52],
[80,91],])
plt.scatter(X[:,0],X[:,1], label='True Position')
kmeans = KMeans(n_clusters=2) kmeans.fit(X)
print(kmeans.cluster_centers_) print(kmeans.labels_) plt.scatter(X[:,0], X[:,1], c=kmeans.labels_, cmap='rainbow')
plt.scatter(kmeans.cluster_centers_[:,0] ,kmeans.cluster_centers_[:,1], color='black')
11. kNN
# -*- coding: utf-8 -*-
“””
Created on Wed Oct 24 19:13:38 2018
Created on Wed Oct 24 19:35:39 2018
@author: Vinay M HARITSA
@ vtricks technologies (gurukulam vidya)
@ for any queries contact : 9620749749
@author: Vinay “””
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
url = “https://archive.ics.uci.edu/ml/machine-learning- databases/iris/iris.data”
# Assign colum names to the dataset
names = [‘sepal-length’, ‘sepal-width’, ‘petal-length’, ‘petal-width’, ‘Class’]
# Read dataset to pandas dataframe
dataset = pd.read_csv(url, names=names)
dataset.head()
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, 4].values
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train) X_test = scaler.transform(X_test)
from sklearn.neighbors import KNeighborsClassifier classifier = KNeighborsClassifier(n_neighbors=5) classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
from sklearn.metrics import classification_report, confusion_matrix print(confusion_matrix(y_test, y_pred)) print(classification_report(y_test, y_pred))
12. ANN
# -*- coding: utf-8 -*-
“””
Created on Wed Oct 24 19:35:39 2018
@author: Vinay M HARITSA
@ vtricks technologies (gurukulam vidya)
@ for any queries contact : 9620749749
@CREDITS : STACKK ABUSE
@ steps are as :
@FEED FORWARD AND BACK PROPOGATION
@ FEED FOWARD HAS 1)DOT PRODUCT OF INPUT AND
WEIGHTS
@2)PASS RESULT FROM STEP 1 TO ACIVATION
FUNCTION(SIGMOID FUNCTION)
@BACK PROPAGATION HAS 2 STEPS
@STEP 1 : CALCULATE COST
@STEP 2 : MINIMIZE COST (GAUSS THEOREM)
Person Person1 Person2 Person3 Person4 Person5
“””
Smoking
0 1
0 0
1 0
1 1 1 1
Obesity Exercise Diabetic
0 1
1 0
0 0
0 1 1 1
## importing numpy package
import numpy as np
# creating the dataset mentioned above
feature_set = np.array([[0,1,0],[0,0,1],[1,0,0],[1,1,0],[1,1,1]])
#creating labels
labels = np.array([[1,0,0,1,1]]) labels = labels.reshape(5,1)
# creating 3 paramerters for forward network they are weights,bias and
learning rate
np.random.seed(42)
weights = np.random.rand(3,1)
bias = np.random.rand(1) lr = 0.0
# creating a sigmoid function for forward network
def sigmoid(x):
return 1/(1+np.exp(-x))
# creating the derivative of sigmoid
def sigmoid_der(x):
return sigmoid(x)*(1-sigmoid(x))
# now we are trying to run feed forward nework will all the above parameters
# epoch 20000 is number of iterations
for epoch in range(20000): inputs = feature_set
# feedforward step1
XW = np.dot(feature_set, weights) + bias
#feedforward step2
z = sigmoid(XW)
# backpropagation step 1
error = z – labels print(error.sum())
# backpropagation step 2
dcost_dpred = error dpred_dz = sigmoid_der(z)
z_delta = dcost_dpred * dpred_dz
inputs = feature_set.T
weights -= lr * np.dot(inputs, z_delta)
for num in z_delta: bias -= lr * num
XW = np.dot(feature_set, weights) + bias z = sigmoid(XW)
error = z – labels dcost_dpred = error dpred_dz = sigmoid_der(z)
#slope = input x dcost_dpred x dpred_dz
z_delta = dcost_dpred * dpred_dz
inputs = feature_set.T
weights -= lr * np.dot(inputs, z_delta)
