library(HMM) |
library(readr) |
library(stringdist) |
usernames <- c() |
scenarios <- c() |
filenames <- c() |
hmmResults <- c() |
files <- |
list.files( |
path = "~/answers", |
pattern = "*.csv", |
full.names = T, |
recursive = TRUE |
) |
for (f in files) |
|
username <- regmatches(f, regexpr("-[A-Za-z0-9]+∖ ∖.", f)) |
username <- sub("-", "", username) |
username <- sub("∖∖.", "", username) |
usernames <- c(usernames, username) |
scenario <- regmatches(f, regexpr("-[1-3]-", f)) |
scenario <- sub("-", "", scenario) |
scenario <- sub("-", "", scenario) |
scenarios <- c(scenarios, scenario) |
filenames <- c(filenames, f) |
|
for (i in 1:length(usernames)) |
|
answerFile <- |
read_csv(filenames[i], |
col_names = FALSE, |
col_types = cols_only(X3 = col_guess())) |
answerFileX3 <- |
as.POSIXct(answerFileX3, format = "%m/%d/%Y %H:%M:%S", tz = "UTC") |
user <- |
cert_r4_2_dataset[cert_r4_2_datasetuser == usernames[i], ] |
m <- |
match(answerFileX3, userdate) #match answer file dates to user dates |
week <- |
userweek[m[1:length(m)]] #week in which the attack ACTUALLY occurred |
allWeeks <- |
split(cert_r4_2_dataset[cert_r4_2_datasetuser %in% usernames[i], ]activity, |
cert_r4_2_dataset[cert_r4_2_datasetuser %in% usernames[i], ]week) |
#Filter dataset to only include data relevent to chosen user. |
indx <- |
sapply(allWeeks, length) #Convert the allWeeks variable into DataFrame. |
res <- |
as.data.frame(do.call(cbind, lapply(allWeeks, length<-, max(indx)))) |
############## HMM Phase ######################## |
hmm = initHMM(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), c(1, 2, 3, 4, 5, 6, 7)) |
#Initiate a 10 state HMM with 7 labels (which represent activities of user.) |
model = baumWelch( |
hmm, |
na.omit(unlist(res[1:5])), |
maxIterations = 20, |
pseudoCount = 0.1, |
delta = 0.01 |
) #Train our model with the first 5 weeks of user activity. |
vector = c() |
for (i in 6:length(res)) |
#For the remaining weeks of activity... |
|
#What is the probability of a given observed sequence with respect to our model? |
logForwardProbabilities = forward(modelhmm, na.omit(unlist(res[i]))) #... calculate the |
probability of week i occurring against model... |
like <- ((logForwardProbabilities)) |
lenthOfLike <- (length(like) / 10) |
answer <- sum(like[, lenthOfLike]) |
vector[i - 5] <- |
answer #... store result of probability in vector... |
print(i) #Prints the current week to so we can see progress of computations in the console. |
model = baumWelch( |
modelhmm, |
na.omit(unlist(res[1:i])), |
maxIterations = 20, |
pseudoCount = 0.1, |
delta = 0.01 |
) #... and update model with week i. |
|
########## Find Lowest Probability ############# |
probability = 0 |
for (result in vector) |
|
if ((result) < probability) |
|
probability = result |
|
|
hmmWeek = match(probability, vector) + 5 #Find week which lowest probability occurred in. Offet is |
+5 since our results start at week 6. |
if (hmmWeek %in% week) |
|
hmmResults <- c(hmmResults, hmmWeek) |
|
else |
|
hmmResults <- c(hmmResults, "FALSE") |
|
|
fullResults <- |
data.frame(usernames, scenarios, filenames, hmmResults) |