Research Article

Distance Measurement Methods for Improved Insider Threat Detection

Algorithm 1

CERT data extrapolation code.
library(lubridate)
library(readr)
#Read Each File ############################
logon <- read_csv("~/r4.2/logon.csv", col_types = cols(id = col_skip(), pc
= col_skip()))
device <- read_csv("~/r4.2/device.csv", col_types = cols(id = col_skip(),
pc = col_skip()))
http <- read_csv("~/r4.2/http.csv", col_types = cols(content = col_skip(),
id = col_skip(), pc = col_skip(), url = col_skip()))
email <- read_csv("~/r4.2/email.csv", col_types = cols(attachments =
col_skip(), bcc = col_skip(), cc = col_skip(), content = col_skip(), from
=col_skip(),  id = col_skip(), pc = col_skip(), size = col_skip(), to =
col_skip()))
file <- read_csv("~/r4.2/file.csv", col_types = cols(content = col_skip(),
filename = col_skip(), id = col_skip(), pc = col_skip()))
http["activity"] = "Http"
email["activity"] = "Email"
file["activity"] = "File"
###############################################
#Assign Tags to Each Activity ################
# Logon = 1, Logoff = 2, Connect = 3, Disconnect = 4, Http = 5, Email = 6,  File = 7
logonactivity = replace(logonactivity, logonactivity=="Logon",  1)
logonactivity = replace(logonactivity, logonactivity=="Logoff", 2)
deviceactivity = replace(deviceactivity, deviceactivity=="Connect", 3)
deviceactivity = replace(deviceactivity, deviceactivity=="Disconnect", 4)
httpactivity = replace(httpactivity, httpactivity=="Http", 5)
emailactivity = replace(emailactivity, emailactivity=="Email",  6)
fileactivity = replace(fileactivity, fileactivity=="File", 7)
##############################################
#Data Frame Conversion and Join ##############
logon <- as.data.frame(logon)
device <- as.data.frame(device)
http <- as.data.frame(http)
email <- as.data.frame(email)
file <- as.data.frame(file)
join <- mapply(c, logon, device, http, email, file, SIMPLIFY=FALSE)
join <- as.data.frame(join)
##############################################
#Parse and Sort data by Date/Time ############
joindate <- as.POSIXct(joindate, format = "%m/%d/%Y %H:%M:%S")
join <- join[order(joindate),]
joinweek <- (as.numeric(joindate-joindate[1]) %/% 604800) + 1 #Label the
weeks starting at week 1
##############################################