Videos related to this segment (click the title to watch)
03 ML1 - 2A: A first version (13:22)
03 ML1 - 2B: A better version (10:28)
03 ML1 - 2C: Cross-validation implementation (21:7)

################################################################################

Introduction to Scientific Programming and Machine Learning with Julia

###

Run each script on a new clean Julia session

GitHub: https://github.com/sylvaticus/IntroSPMLJuliaCourse

Licence (apply to all material of the course: scripts, videos, quizes,..)###

Creative Commons By Attribution (CC BY 4.0), Antonello Lobianco

################################################################################

0302 - The Perceptron algorithm for linear classification

Some stuff to set-up the environment..

julia> cd(@__DIR__)
julia> using Pkg
julia> Pkg.activate(".") # If using a Julia version different than 1.10 please uncomment and run the following line (the guarantee of reproducibility will however be lost) # Pkg.resolve() Activating project at `~/work/SPMLJ/SPMLJ/buildedDoc/03_-_ML1_-_Introduction_to_Machine_Learning`
julia> Pkg.instantiate()
julia> using Random
julia> Random.seed!(123)Random.TaskLocalRNG()

Perceptron elementary operations

julia> using StatsPlots
julia> function plot2DClassifierWithData(X,y,θ;d1=1,d2=2,origin=false,pid=1) colors = [y == -1 ? "red" : "green" for y in y] labels = [y == -1 ? "-1" : "+1" for y in y] minD1,maxD1 = extrema(X[:,d1]) minD2,maxD2 = extrema(X[:,d2]) myplot = scatter(X[:,d1],X[:,d2], colour=colors, title="Linear classifier in 2D",xlabel="Dimx: $d1", ylabel="Dimy: $d2", group=labels) constTerm = 0.0 if !origin d1 += 1 d2 += 1 constTerm = -θ[1]/θ[d2] end d2Class(x) = constTerm -x * θ[d1]/θ[d2] if θ[d2] == 0 vline!([0], color= "blue",label="",linewidth=5) else plot!(d2Class,min(θ[d1],minD1),max(maxD1,θ[d1]), color= "blue",label="",linewidth=5) end plot!([0,θ[d1]],[0,θ[d2]],arrow=true,color=:black,linewidth=2,label="") display(myplot) savefig("currentPlot$(pid).svg"); endplot2DClassifierWithData (generic function with 1 method)
julia> isClassificationError(θ,y,x) = y * (θ' * x) <= eps()isClassificationError (generic function with 1 method)
julia> perceptronUpdate(θ,y,x) = return θ .+ y .* xperceptronUpdate (generic function with 1 method)
julia> X = [ 2 4 -6 1]2×2 Matrix{Int64}: 2 4 -6 1
julia> y = [-1,-1]2-element Vector{Int64}: -1 -1
julia> θ₀ = [0,0]2-element Vector{Int64}: 0 0
julia> θ = θ₀2-element Vector{Int64}: 0 0
julia> ϵ = isClassificationError(θ,y[1],X[1,:])true
julia> θ = perceptronUpdate(θ,y[1],X[1,:])2-element Vector{Int64}: -2 -4
julia> plot2DClassifierWithData(X,y,θ,origin=true,pid=1)"/home/runner/work/SPMLJ/SPMLJ/buildedDoc/03_-_ML1_-_Introduction_to_Machine_Learning/currentPlot1.svg"

julia> ϵ = isClassificationError(θ,y[1],X[1,:])false
julia> ϵ = isClassificationError(θ,y[2],X[2,:])true
julia> θ = perceptronUpdate(θ,y[2],X[2,:])2-element Vector{Int64}: 4 -5
julia> plot2DClassifierWithData(X,y,θ,origin=true,pid=2)"/home/runner/work/SPMLJ/SPMLJ/buildedDoc/03_-_ML1_-_Introduction_to_Machine_Learning/currentPlot2.svg"

julia> ϵ = isClassificationError(θ,y[2],X[2,:])false
julia> ϵ = isClassificationError(θ,y[1],X[1,:])false
julia> X = [ 2 4 1 -2]2×2 Matrix{Int64}: 2 4 1 -2
julia> θ = θ₀2-element Vector{Int64}: 0 0
julia> ϵ = isClassificationError(θ,y[1],X[1,:])true
julia> θ = perceptronUpdate(θ,y[1],X[1,:])2-element Vector{Int64}: -2 -4
julia> plot2DClassifierWithData(X,y,θ, origin=true,pid=3)"/home/runner/work/SPMLJ/SPMLJ/buildedDoc/03_-_ML1_-_Introduction_to_Machine_Learning/currentPlot3.svg"

julia> ϵ = isClassificationError(θ,y[1],X[1,:])false
julia> ϵ = isClassificationError(θ,y[2],X[2,:])true
julia> θ = perceptronUpdate(θ,y[2],X[2,:])2-element Vector{Int64}: -3 -2
julia> plot2DClassifierWithData(X,y,θ, origin=true,pid=4)"/home/runner/work/SPMLJ/SPMLJ/buildedDoc/03_-_ML1_-_Introduction_to_Machine_Learning/currentPlot4.svg"

julia> ϵ = isClassificationError(θ,y[1],X[1,:])false
julia> ϵ = isClassificationError(θ,y[2],X[2,:])true
julia> θ = perceptronUpdate(θ,y[2],X[2,:])2-element Vector{Int64}: -4 0
julia> plot2DClassifierWithData(X,y,θ,origin=true,pid=5)"/home/runner/work/SPMLJ/SPMLJ/buildedDoc/03_-_ML1_-_Introduction_to_Machine_Learning/currentPlot5.svg"

julia> ϵ = isClassificationError(θ,y[1],X[1,:])false
julia> ϵ = isClassificationError(θ,y[2],X[2,:])false
julia> θ2-element Vector{Int64}: -4 0
julia> X = [ 2 4 -2 2]2×2 Matrix{Int64}: 2 4 -2 2
julia> y = [-1,1]2-element Vector{Int64}: -1 1
julia> θ = θ₀2-element Vector{Int64}: 0 0
julia> ϵ = isClassificationError(θ,y[1],X[1,:])true
julia> θ = perceptronUpdate(θ,y[1],X[1,:])2-element Vector{Int64}: -2 -4
julia> plot2DClassifierWithData(X,y,θ, origin=true,pid=6)"/home/runner/work/SPMLJ/SPMLJ/buildedDoc/03_-_ML1_-_Introduction_to_Machine_Learning/currentPlot6.svg"

julia> ϵ = isClassificationError(θ,y[2],X[2,:])true
julia> θ = perceptronUpdate(θ,y[2],X[2,:])2-element Vector{Int64}: -4 -2
julia> plot2DClassifierWithData(X,y,θ,origin=true,pid=7)"/home/runner/work/SPMLJ/SPMLJ/buildedDoc/03_-_ML1_-_Introduction_to_Machine_Learning/currentPlot7.svg"

The complete algorithm

Tip

The algorithm implemented from scratch in this page form the core of the PerceptronClassifier model in BetaML, a multiclass linear classifier.

julia> function perceptronOrigin(X,y,epochs=1;verbose=false)
           (nR,nD) = size(X)
           local θ = zeros(nD)
           for t in 1:epochs
               for n in 1:nR
                   if verbose
                       println("$n: X[n,:] \t θ: $θ")
                   end
                   if isClassificationError(θ,y[n],X[n,:])
                       θ = perceptronUpdate(θ,y[n],X[n,:])
                       if verbose
                           println("**update! New theta: $θ")
                       end
                   end
               end
           end
           return θ
       endperceptronOrigin (generic function with 2 methods)
julia> θopt = perceptronOrigin(X,y,verbose=true)1: X[n,:] θ: [0.0, 0.0] **update! New theta: [-2.0, -4.0] 2: X[n,:] θ: [-2.0, -4.0] **update! New theta: [-4.0, -2.0] 2-element Vector{Float64}: -4.0 -2.0
julia> plot2DClassifierWithData(X,y,θopt, origin=true,pid=8)"/home/runner/work/SPMLJ/SPMLJ/buildedDoc/03_-_ML1_-_Introduction_to_Machine_Learning/currentPlot8.svg"

Loading binary supervised data

julia> using BetaML, DelimitedFiles
julia> baseDir = joinpath(dirname(pathof(BetaML)),"..","test","data")"/home/runner/.julia/packages/BetaML/IyufA/src/../test/data"
julia> perceptronData = readdlm(joinpath(dirname(pathof(BetaML)),"..","test","data","binary2DData.csv"),'\t')200×3 Matrix{Float64}: -1.0 1.76 0.4 -1.0 0.979 2.24 -1.0 1.87 -0.977 -1.0 0.95 -0.151 -1.0 -0.103 0.411 -1.0 0.144 1.45 -1.0 0.761 0.122 -1.0 0.444 0.334 -1.0 1.49 -0.205 -1.0 0.313 -0.854 ⋮ 1.0 -0.256 0.977 1.0 2.04 0.343 1.0 1.01 0.528 1.0 3.65 2.16 1.0 2.57 1.78 1.0 1.65 0.384 1.0 1.71 1.24 1.0 2.86 3.14 1.0 3.47 2.85
julia> nR = size(perceptronData,1)200
julia> idx = shuffle(1:nR)200-element Vector{Int64}: 123 131 74 23 19 78 43 130 83 186 ⋮ 137 175 182 37 71 89 142 82 170
julia> perceptronData = perceptronData[idx,:]200×3 Matrix{Float64}: 1.0 1.69 0.324 1.0 0.811 1.49 -1.0 -0.913 1.12 -1.0 -0.5097 -0.4381 -1.0 1.23 1.2 -1.0 -0.0985 -0.6635 -1.0 1.49 1.9 1.0 0.417 2.61 -1.0 -1.23 0.844 1.0 2.28 1.01 ⋮ 1.0 3.96 2.39 1.0 2.58 2.35 1.0 2.93 2.34 -1.0 1.14 -1.23 -1.0 -1.49 0.439 -1.0 -0.8034 -0.6895 1.0 1.31 3.54 -1.0 0.949 0.0876 1.0 1.32 3.66
julia> X = copy(perceptronData[:,[2,3]])200×2 Matrix{Float64}: 1.69 0.324 0.811 1.49 -0.913 1.12 -0.5097 -0.4381 1.23 1.2 -0.0985 -0.6635 1.49 1.9 0.417 2.61 -1.23 0.844 2.28 1.01 ⋮ 3.96 2.39 2.58 2.35 2.93 2.34 1.14 -1.23 -1.49 0.439 -0.8034 -0.6895 1.31 3.54 0.949 0.0876 1.32 3.66
julia> y = convert(Array{Int64,1},copy(perceptronData[:,1]))200-element Vector{Int64}: 1 1 -1 -1 -1 -1 -1 1 -1 1 ⋮ 1 1 1 -1 -1 -1 1 -1 1

A better organisation

Now we rewrite the perceptron algorithm setting all the parameters in a structure and using what could be a generic interface for any supervised model. This is the approach used by most ML libraries. We will see how to measure the classification error and as we are here we add the constant term with the constant addition to the data trick (note that editing every time the feature matrix is NOT efficient and we do it here only for simplicity. A better way is to explicitly model the perceptron model with a constant parameter.)

julia> abstract type SupervisedModel end
julia> abstract type TrainingOptions end
julia> mutable struct Perceptron <: SupervisedModel θ::Vector{Float64} end
julia> mutable struct PerceptronTrainingOptions <: TrainingOptions epochs::Int64 verbose::Bool shuffle::Bool function PerceptronTrainingOptions(;epochs=1,verbose=false,shuffle=false) return new(epochs,verbose,shuffle) end end
julia> function predict(model::Perceptron,x::AbstractVector) x = vcat(1.0,x) x' * model.θ > eps() ? (return 1) : (return -1) endpredict (generic function with 1 method)
julia> function predict(model::Perceptron,X::AbstractMatrix) return [predict(model,r) for r in eachrow(X)] endpredict (generic function with 2 methods)
julia> function update!(model::Perceptron,X::Vector,y) X = vcat(1.0,X) model.θ = model.θ .+ y .* X return model.θ endupdate! (generic function with 1 method)
julia> function train!(model::Perceptron,X,y,ops=PerceptronTrainingOptions()::TrainingOptions) epochs = ops.epochs verbose = ops.verbose (nR,nD) = size(X) nD += 1 for t in 1:epochs errors = 0 if ops.shuffle # more efficient ! idx = shuffle(1:nR) X = X[idx,:] y = y[idx] end for n in 1:nR if verbose println("$n: X[n,:] \t θ: $(model.θ)") end if predict(model,X[n,:]) != y[n] errors += 1 θ = update!(model,X[n,:],y[n]) if verbose println("**update! New theta: $(model.θ)") end end end if verbose println("Epoch $t errors: $errors") end end return model.θ endtrain! (generic function with 2 methods)

Testing the Perceptron algorithm

julia> m   = Perceptron(zeros(size(X,2)+1))Main.var"Main".Perceptron([0.0, 0.0, 0.0])
julia> ops = PerceptronTrainingOptions()Main.var"Main".PerceptronTrainingOptions(1, false, false)
julia> train!(m,X,y,ops)3-element Vector{Float64}: -5.0 1.7424999999999995 2.6404999999999994
julia> plot2DClassifierWithData(X,y,m.θ,pid=9)"/home/runner/work/SPMLJ/SPMLJ/buildedDoc/03_-_ML1_-_Introduction_to_Machine_Learning/currentPlot9.svg"

julia> ŷ = predict(m,X)200-element Vector{Int64}:
 -1
  1
 -1
 -1
  1
 -1
  1
  1
 -1
  1
  ⋮
  1
  1
  1
 -1
 -1
 -1
  1
 -1
  1
julia> inSampleAccuracy = sum(y .== ŷ)/length(y)0.89

Let's see if shuffling and increasing epochs we improve the accuracy....

julia> ops = PerceptronTrainingOptions(verbose=false,epochs=5,shuffle=true)Main.var"Main".PerceptronTrainingOptions(5, false, true)
julia> m = Perceptron(zeros(size(X,2)+1))Main.var"Main".Perceptron([0.0, 0.0, 0.0])
julia> train!(m,X,y,ops)3-element Vector{Float64}: -6.0 3.2406999999999955 3.5374000000000034
julia> plot2DClassifierWithData(X,y,m.θ,pid=10)"/home/runner/work/SPMLJ/SPMLJ/buildedDoc/03_-_ML1_-_Introduction_to_Machine_Learning/currentPlot10.svg"

julia> ŷ = predict(m,X)200-element Vector{Int64}:
  1
  1
 -1
 -1
  1
 -1
  1
  1
 -1
  1
  ⋮
  1
  1
  1
 -1
 -1
 -1
  1
 -1
  1
julia> inSampleAccuracy = sum(y .== ŷ)/length(y)0.92

Cross-validation and hyperparameters optimisation

Let's see now using separate training/validation We use the BetaML partition() function

julia> ((xtrain,xtest),(ytrain,ytest)) = partition([X,y],[0.6,0.4])2-element Vector{Vector}:
 AbstractMatrix{Float64}[[-0.861 1.91; 1.35 1.48; … ; 0.0637 2.19; 0.396 -1.09], [1.37 1.52; 1.18 -0.18; … ; 1.98 2.38; -1.49 0.439]]
 AbstractVector{Int64}[[-1, 1, 1, -1, 1, -1, -1, 1, -1, -1  …  1, 1, 1, 1, 1, 1, -1, -1, 1, -1], [1, -1, 1, -1, 1, -1, -1, -1, -1, -1  …  1, -1, -1, 1, 1, 1, 1, -1, 1, -1]]
julia> m = Perceptron(zeros(size(X,2)+1))Main.var"Main".Perceptron([0.0, 0.0, 0.0])
julia> ops = PerceptronTrainingOptions(epochs=5,shuffle=true)Main.var"Main".PerceptronTrainingOptions(5, false, true)
julia> train!(m,xtrain,ytrain,ops)3-element Vector{Float64}: -4.0 3.7297999999999973 1.829999999999997
julia> plot2DClassifierWithData(xtrain,ytrain,m.θ,pid=11)"/home/runner/work/SPMLJ/SPMLJ/buildedDoc/03_-_ML1_-_Introduction_to_Machine_Learning/currentPlot11.svg"

julia> ŷtrain = predict(m,xtrain)120-element Vector{Int64}:
 -1
  1
  1
 -1
  1
 -1
  1
  1
 -1
 -1
  ⋮
  1
  1
  1
  1
  1
  1
 -1
  1
 -1
julia> trainAccuracy = accuracy(ŷtrain,ytrain)0.9166666666666666
julia> sum(ytrain .== ŷtrain)/length(ytrain) # @edit accuracy(ŷtrain,ytrain)0.9166666666666666
julia> ŷtest = predict(m,xtest)80-element Vector{Int64}: 1 1 1 1 1 -1 -1 1 -1 -1 ⋮ -1 1 1 1 1 1 -1 1 -1
julia> testAccuracy = accuracy(ŷtest,ytest)0.8625
julia> plot2DClassifierWithData(xtest,ytest,m.θ,pid=12)"/home/runner/work/SPMLJ/SPMLJ/buildedDoc/03_-_ML1_-_Introduction_to_Machine_Learning/currentPlot12.svg"

julia> cfOut = ConfusionMatrix()A BetaML.Utils.ConfusionMatrix BetaMLModel (unfitted)
julia> fit!(cfOut,ŷtest,ytest)2×2 Matrix{Float64}: 0.804878 0.195122 0.0769231 0.923077
julia> print(cfOut)A BetaML.Utils.ConfusionMatrix BetaMLModel (fitted) ----------------------------------------------------------------- *** CONFUSION MATRIX *** Scores actual (rows) vs predicted (columns): 3×3 Matrix{Any}: "Labels" "1" "-1" "1" 33 8 "-1" 3 36 Normalised scores actual (rows) vs predicted (columns): 3×3 Matrix{Any}: "Labels" "1" "-1" "1" 0.804878 0.195122 "-1" 0.0769231 0.923077 *** CONFUSION REPORT *** - Accuracy: 0.8625 - Misclassification rate: 0.13749999999999996 - Number of classes: 2 N Class precision recall specificity f1score actual_count predicted_count TPR TNR support 1 1 0.917 0.805 0.923 0.857 41 36 2 -1 0.818 0.923 0.805 0.867 39 44 - Simple avg. 0.867 0.864 0.864 0.862 - Weigthed avg. 0.869 0.863 0.865 0.862 ----------------------------------------------------------------- Output of `info(cm)`: - mean_precision: (0.8674242424242424, 0.8686553030303029) - fitted_records: 80 - specificity: [0.9230769230769231, 0.8048780487804879] - precision: [0.9166666666666666, 0.8181818181818182] - misclassification: 0.13749999999999996 - mean_recall: (0.8639774859287055, 0.8625) - n_categories: 2 - normalised_scores: [0.8048780487804879 0.1951219512195122; 0.07692307692307693 0.9230769230769231] - tn: [36, 33] - mean_f1score: (0.8623063683304647, 0.8621772805507746) - actual_count: [41, 39] - accuracy: 0.8625 - recall: [0.8048780487804879, 0.9230769230769231] - f1score: [0.8571428571428571, 0.8674698795180723] - mean_specificity: (0.8639774859287055, 0.8654549718574109) - predicted_count: [36, 44] - scores: [33 8; 3 36] - tp: [33, 36] - fn: [8, 3] - categories: [1, -1] - fp: [3, 8]
julia> res = info(cfOut)Dict{String, Any} with 21 entries: "mean_precision" => (0.867424, 0.868655) "fitted_records" => 80 "specificity" => [0.923077, 0.804878] "precision" => [0.916667, 0.818182] "misclassification" => 0.1375 "mean_recall" => (0.863977, 0.8625) "n_categories" => 2 "normalised_scores" => [0.804878 0.195122; 0.0769231 0.923077] "tn" => [36, 33] "mean_f1score" => (0.862306, 0.862177) "actual_count" => [41, 39] "accuracy" => 0.8625 "recall" => [0.804878, 0.923077] "f1score" => [0.857143, 0.86747] "mean_specificity" => (0.863977, 0.865455) "predicted_count" => [36, 44] "scores" => [33 8; 3 36] "tp" => [33, 36] "fn" => [8, 3] ⋮ => ⋮
julia> heatmap(string.(res["categories"]),string.(res["categories"]),res["normalised_scores"],seriescolor=cgrad([:white,:blue]),xlabel="Predicted",ylabel="Actual", title="Confusion Matrix (normalised scores)")Plot{Plots.GRBackend() n=1}

Let's use CrossValidation

julia> ((xtrain,xvalidation,xtest),(ytrain,yvalidation,ytest)) = partition([X,y],[0.6,0.2,0.2])
       # Very few records..... let's go back to using only two subsets but with CrossValidation2-element Vector{Vector}:
 AbstractMatrix{Float64}[[0.947 -0.155; -0.77 0.539; … ; 1.32 3.66; 1.86 3.14], [-1.1475 -0.4378; 1.31 0.786; … ; 0.577 -0.208; -0.4136 -0.7475], [1.14 -1.23; 1.24 0.562; … ; 0.873 1.27; 2.1 2.58]]
 AbstractVector{Int64}[[-1, -1, 1, 1, -1, -1, -1, 1, 1, 1  …  -1, 1, 1, 1, -1, -1, -1, 1, 1, 1], [-1, 1, -1, 1, -1, 1, 1, -1, 1, 1  …  -1, -1, -1, 1, -1, -1, -1, -1, -1, -1], [-1, 1, 1, 1, -1, -1, -1, 1, -1, 1  …  1, -1, -1, -1, -1, -1, 1, 1, 1, 1]]
julia> ((xtrain,xtest),(ytrain,ytest)) = partition([X,y],[0.6,0.4])2-element Vector{Vector}: AbstractMatrix{Float64}[[1.52 2.62; -0.74 1.54; … ; 2.84 1.75; 1.94 1.89], [-0.659 2.61; 0.844 2.78; … ; -1.1 0.0522; 2.75 0.811]] AbstractVector{Int64}[[1, -1, 1, -1, 1, -1, 1, 1, -1, -1 … 1, 1, 1, -1, 1, -1, -1, 1, 1, 1], [1, 1, 1, 1, 1, -1, -1, -1, -1, -1 … -1, 1, 1, -1, -1, -1, 1, -1, -1, 1]]
julia> sampler = KFold(nsplits=10)BetaML.Utils.KFold(10, 1, true, Random._GLOBAL_RNG())
julia> ops = PerceptronTrainingOptions(epochs=10,shuffle=true)Main.var"Main".PerceptronTrainingOptions(10, false, true)
julia> (acc,σ) = cross_validation([xtrain,ytrain],sampler) do trainData,valData,rng (xtrain,ytrain) = trainData; (xval,yval) = valData m = Perceptron(zeros(size(xtrain,2)+1)) train!(m,xtrain,ytrain,ops) ŷval = predict(m,xval) valAccuracy = accuracy(ŷval,yval) return valAccuracy end(0.8583333333333334, 0.08827915878928168)
julia> epochsSet = 1:10:3011:10:301
julia> shuffleSet = [false,true]2-element Vector{Bool}: 0 1
julia> bestE = 00
julia> bestShuffle = falsefalse
julia> bestAcc = 0.00.0
julia> for e in epochsSet, s in shuffleSet global bestE, bestShuffle, bestAcc local acc local ops = PerceptronTrainingOptions(epochs=e,shuffle=s) (acc,_) = cross_validation([xtrain,ytrain],sampler) do trainData,valData,rng (xtrain,ytrain) = trainData; (xval,yval) = valData m = Perceptron(zeros(size(xtrain,2)+1)) train!(m,xtrain,ytrain,ops) ŷval = predict(m,xval) valAccuracy = accuracy(ŷval,yval) return valAccuracy end if acc > bestAcc bestAcc = acc bestE = e bestShuffle = s end end
julia> bestAcc0.925
julia> bestE161
julia> bestShuffletrue
julia> ops = PerceptronTrainingOptions(epochs=bestE,shuffle=bestShuffle)Main.var"Main".PerceptronTrainingOptions(161, false, true)
julia> m = Perceptron(zeros(size(xtest,2)+1))Main.var"Main".Perceptron([0.0, 0.0, 0.0])
julia> train!(m,xtrain,ytrain,ops)3-element Vector{Float64}: -5.0 2.5832999999999586 4.0829999999999735
julia> ŷtest = predict(m,xtest)80-element Vector{Int64}: 1 1 1 1 1 -1 -1 1 -1 -1 ⋮ 1 1 -1 -1 -1 1 -1 -1 1
julia> testAccuracy = accuracy(ŷtest,ytest)0.875
julia> plot2DClassifierWithData(xtest,ytest,m.θ,pid=13)"/home/runner/work/SPMLJ/SPMLJ/buildedDoc/03_-_ML1_-_Introduction_to_Machine_Learning/currentPlot13.svg"

View this file on Github.


This page was generated using Literate.jl.