| #/# PART 1 -Dataset for OSSC-Hazard Ratio/. |
| Input =(" |
| Age Treat Dist Survival Time Hazard |
| 59 1 1 0 48 0.2019 |
| 62 1 1 0 132 0.1895 |
| 66 1 2 1 13 0.3341 |
| 53 1 1 0 132 0.2292 |
| 60 1 1 0 120 0.1977 |
| 30 1 1 1 8 0.3729 |
| |
| |
| 63 1 1 0 72 0.1855 |
| 63 0 1 1 2 0.5061 |
| 71 0 1 1 1 0.4273 |
| 81 0 1 1 12 0.3459 |
| 43 0 1 0 24 0.7726 |
| 71 1 1 0 24 0.1566 |
| 69 1 1 0 24 0.1634 |
| 57 1 1 1 6 0.2106 |
| 78 1 1 0 24 0.1351 |
| ") |
| data = read.table(textConnection(Input),header=TRUE) |
| print (data) |
| # PART 2 -PERFORMING BOOTSTRAP PROCEDURE |
| mydata <- rbind.data.frame(data, stringsAsFactors = FALSE) |
| iboot <- sample(1:nrow(mydata), size=1000, replace = TRUE) |
| bootdata <- mydata[iboot,] |
| print (bootdata) |
| # PART 3- RANDOMLY SPLIT THE DATA INTO 70:30 |
| #70 PERCENT OF THE DATA AT OUR DISPOSAL TO TRAIN DATASET |
| #30 PERCENT TO TEST DATASET |
| #THIS PROCEDURE INVOLVING THE BOOTSTRAP DATA |
| index = sample(1:nrow(bootdata),round(0.70nrow(bootdata))) |
| train_data <- as.data.frame(bootdata[index,]) |
| test_data <- as.data.frame(bootdata[-index,]) |
| # PART 4-BUILD THE MODEL ON TRAINING DATA |
| # Prediction MSE of the model using the testing dataset |
| Model3 <- lm(Hazard~Age+Treat+Dist, data=train_data) # build the model |
| summary(Model3) |
| test <- data[-index,] |
| predict_lm <- predict(Model3,test) |
| MSE.lm <- sum((predict_lm - test$Hazard)^2)/nrow(test) |
| MSE.lm |
| # PART 5- MODEL EVALUATION |
| test_data$PredictedHazard <- predict(Model3, test_data) |
| distPred <- predict(Model3, test_data) |
| preds <- predict(Model3, test_data) |
| modelEval <- cbind(test_data$Hazard, preds) |
| colnames(modelEval) <- c(‘Actual’,’Predicted’) |
| modelEval <- as.data.frame(modelEval) |
| print (modelEval,max=15) |