Neural networks are an awesome modeling technique for finding complex patterns in our data. Most Neural Network R packages will have levers for customizing the model architecture. For example, the R package RSNNS (Stuggart Neural Network Simulator) contains a multi-augment function called mlp. The mlp function creates a multi-layer perception based on the argument values provided by the developer. If we want to build the best model possible we need a way to test the model architecture. One option is to manually run the function and test different sets of configurations for the best model. Another, more automated, way of parameter evaluation is to randomly generate configuration sets.

Lots of parameters to choose from:


mlp(x, y, size = c(5), maxit = 100,
  initFunc = "Randomize_Weights", initFuncParams = c(-0.3, 0.3),
  learnFunc = "Std_Backpropagation", learnFuncParams = c(0.2, 0),
  updateFunc = "Topological_Order", updateFuncParams = c(0),
  hiddenActFunc = "Act_Logistic", shufflePatterns = TRUE, linOut = FALSE,
  outputActFunc = if (linOut) "Act_Identity" else "Act_Logistic",
  inputsTest = NULL, targetsTest = NULL, pruneFunc = NULL,
  pruneFuncParams = NULL, ...)

A technique I found to be useful is randomly generating the configurations using either a sample or random uniform distribution. Then set the function inside a loop and run many simulations with many configuration sets. R has a popular package called "foreach" which can be used for our loop. The added benefit of the foreach package is it's compatibility with Azure. If the developer specifies %dopar% instead then the workload will execute against a specified number of VMs in Azure. Running the workload against Azure can be orders of magnitude faster. In some cases I have experienced a 10x reduction in processing time.

The dev needs to set up a batch serivices account and a storage account before they can use the %dopar% functionality. After that, it's only a few quick configuration steps which can be ran inside a local R session. Here is 20, one core machines in the boot process:

These were configured directly from my R session.
DoPar

Here is a sample script for testing the service.


install.packages("RSNNS")
install.packages("clusterSim")
install.packages("dplyr")
install.packages("curl")
install.packages("httr")
install.packages("devtools")
install.packages("bitops")
install.packages("party")
install.packages("rpart")
install.packages("e1071")
install.packages("scales")
install.packages("reshape")
install.packages("ggplot2")
library(devtools) # Use Dev tools to install from github
install_github("Azure/rAzureBatch")
install_github("Azure/doAzureParallel")


sessionInfo()
.libPaths("D:\\Rpackages")

# 1) Import Data

getwd()
setwd('C:\\Users\\mshar\\Desktop\\SQL_Saturday')
#data from kaggle.com 
my.data <- read.csv('HR_comma_sep.csv')


# Take a look

head(my.data)
class(my.data)
nrow(my.data)

#  2 Data Prep

my.target <- my.data$left

my.inputs <- my.data[, -10] #Removes target var

my.numericCol <- sapply(my.inputs, is.numeric) # a data frame is a list


my.numericInputs <- my.inputs[, my.numericCol]
my.nominalInputs <- my.inputs[, my.numericCol == FALSE]
my.RawInputs <- my.numericInputs[, 3:5]


library(clusterSim)

my.normInputs <- data.Normalization(my.RawInputs, type = "n1", normalization = "column")

library(RSNNS)

my.nominalInputsBool <- apply(my.nominalInputs, 2, decodeClassLabels)

my.data <- cbind(my.numericInputs[, 1:2], my.normInputs, my.nominalInputsBool, my.target)
head(my.data)

#write.csv(my.finaldata, file = 'C:\\Users\\mshar\\Desktop\\SQL_Saturday\\cleanData.csv')

#shuffle data
my.data <- my.data[sample(1:nrow(my.data), length(1:nrow(my.data))), 1:ncol(my.data)]

my.splitData <- splitForTrainingAndTest(my.data[, 1:18], my.data[, 19], .33)


my.model <- mlp(x = my.splitData$inputsTrain, y = my.splitData$targetsTrain
            , size = c(5), maxit = 50
            , learnFuncParams = c(0.2, 0.0),
              learnFunc = "Std_Backpropagation"

)


#Testing Data
my.prediction <- predict(my.model, my.splitData$inputsTest)
my.error <- sqrt(sum((my.prediction - my.splitData$targetsTest) ^ 2)) #RSME
my.prediction <- ifelse(my.prediction >= .9, 1, 0)

my.output <- cbind(my.prediction, my.splitData$targetsTest)


#Evaluation 

confusionMatrix(my.splitData$targetsTest, my.prediction)
my.TruePositiveRate <- sum(my.output[my.splitData$targetsTest == 1, 1]) / sum(my.output[my.splitData$targetsTest == 1, 2])
my.TrueNegativeRate <- (sum(ifelse(my.output[my.splitData$targetsTest == 0, 1] == 0, 1, 0))) / (nrow(my.output) - sum(my.output[my.splitData$targetsTest == 1, 2]))

my.TruePositiveRate
my.TrueNegativeRate
my.error


library(scales)
library(reshape)
library(devtools)
library(ggplot2)

plot.nnet(my.model)


# Performs well, but we can tune the network architecture


# 1. Use intuition, research to find most set optimal configurations

# or

# 2. Randomly  generate configuration sets, run the mlp function
#    with the random configs on a loop




NeuralNetworkSimulation <- function(my.maxit, my.numHidddenNeurons, my.learningRate,
                                    my.learnfunctionswitch) {


 
    #----------------------------------------------------

    my.learnfunction <- switch(my.learnfunctionswitch, "Std_Backpropagation", "BackpropBatch",
        "BackpropChunk", "BackpropMomentum", "BackpropWeightDecay", "Rprop", "Quickprop", "SCG")


    #----------------------------------------------------
    startdate <- Sys.time()

    my.model <- mlp(x = my.splitData$inputsTrain, y = my.splitData$targetsTrain,
                    size = my.numHidddenNeurons,
                    maxit = my.maxit,
                    learnFuncParam = c(.3),
                    learnFunc = my.learnfunction
                    )

    my.executionTime <- difftime(Sys.time(), startdate, units = "secs")
    my.prediction <- predict(my.model, my.splitData$inputsTest)
    my.error <- sqrt(sum((my.prediction - my.splitData$targetsTest) ^ 2)) #RSME
    my.prediction <- ifelse(my.prediction >= .9, 1, 0)
    my.output <- cbind(my.prediction, my.splitData$targetsTest)
    my.TruePositiveRate <- sum(my.output[my.splitData$targetsTest == 1, 1]) / sum(my.output[my.splitData$targetsTest == 1, 2])
    my.TrueNegativeRate <- (sum(ifelse(my.output[my.splitData$targetsTest == 0, 1] == 0, 1, 0))) / (nrow(my.output) - sum(my.output[my.splitData$targetsTest == 1, 2]))


    my.results <- cbind(
                      iterations = my.maxit,
                      hiddenlayerneuronsL1 = my.numHidddenNeurons,
                      learningfunction = my.learnfunction,
                      learningrate = my.learningRate,
                      test_rmse = my.error,
                      TruePositiveRate = my.TruePositiveRate,
                      my.TrueNegativeRate = my.TrueNegativeRate,
                      executionTime = my.executionTime)



    return(my.results)

}



NeuralNetworkSimulation(
                        my.maxit <- sample(50:400, 1),
                        my.numHidddenNeurons <- sample(4:16, 1),
                        my.learningrate <- runif(1, .1, .5),
                        my.learnfunctionswitch  <- sample(1:8, 1)
                        )




## Execute simulation to analyze the optimal network configurations

## Resource intensive as data grows

## Run against doAzureParallel backend for scale and many simulations at once



library(doAzureParallel)

generateClusterConfig("pool_config.json") #  Generate pool config file
generateCredentialsConfig("credentials.json")
setCredentials("credentials.json")


pool <- makeCluster("pool_config.json") #  Create pool
registerDoAzureParallel(pool) #Register pool as backend

# Validate cluste configured
getDoParWorkers()



# InvokeNeuralNetwork() simulations
parStartdate <- Sys.time()
my.packages <- c("RSNNS")
my.iterations <- 1000
my.simulation <- foreach(i = 1:my.iterations, .combine = 'c', .packages = my.packages) %do% {

    NeuralNetworkSimulation(
                        my.maxit <- sample(50:400, 1),
                        my.numHidddenNeurons <- sample(4:16, 1),
                        my.learningrate <- runif(1, .1, .5),
                        my.learnfunctionswitch <- sample(1:8, 1)
        )

}
parExecutionTime <- difftime(Sys.time(), parStartdate, units = "mins")
#Write to flat file
my.simulationstg <- matrix(my.simulation, nrow = my.iterations, ncol = 8, byrow = TRUE)
df <- data.frame(my.simulationstg)
colnames(df) <- c("iterations", "hiddenlayerneuronsL1", "learningfunction",
    "learningrate", "test_rmse", "TruePositiveRate", "TrueNegativeRate",
    "executionTime")

write.csv(df, file = "NetworkTuninglocal.csv")



#Shut down clusters
stopCluster(pool)