

candidates_var_train <- model.matrix(job_change ~ 0 + ., data = candidates_train)
candidates_train_xgb <- xgb.DMatrix(data = candidates_var_train, 
label = ifelse(candidates_train$job_change == "Interested", 1, 0))
candidates_var_test <- model.matrix(job_change ~ 0 + ., data = candidates_test)
candidates_test_xgb <- xgb.DMatrix(data = candidates_var_test, 
label = ifelse(candidates_test$job_change == "Interested", 1, 0))


candidates_predict_sparse <- as(as.matrix(candidates_predict), "sparseMatrix")
candidates_predict_xgb <- xgb.DMatrix(data = candidates_predict_sparse)


Error in predict.xgb.Booster(xgb_model, newdata = candidates_predict_sparse,  : 
Feature names stored in `object` and `newdata` are different!




structure(list(enrollee_id = c(23427, 17605, 20912, 13948, 15205, 
15140, 21736, 19800, 23755, 12148), city_development_index = c(0.698, 
0.896, 0.754, 0.926, 0.92, 0.878, 0.926, 0.767, 0.689, 0.92), 
gender = structure(c(4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L), levels = c("Female", "Male", "Other", "keine Angabe"
), class = "factor"), enrolled_university = structure(c(4L, 
2L, 1L, 2L, 1L, 3L, 3L, 2L, 2L, 2L), levels = c("Full time course", 
"no_enrollment", "Part time course", "keine Angabe"), class = "factor"), 
company_size = structure(c(9L, 9L, 9L, 5L, 3L, 9L, 3L, 6L, 
2L, 9L), levels = c("<10", "10/49", "100-500", "1000-4999", 
"10000+", "50-99", "500-999", "5000-9999", "keine Angabe"
), class = "factor"), company_type = structure(c(7L, 7L, 
7L, 6L, 6L, 7L, 6L, 6L, 6L, 7L), levels = c("Early Stage Startup", 
"Funded Startup", "NGO", "Other", "Public Sector", "Pvt Ltd", 
"keine Angabe"), class = "factor"), last_new_job = structure(c(6L, 
6L, 6L, 1L, 1L, 1L, 1L, 1L, 5L, 5L), levels = c("1", "2", 
"3", "4", ">4", "never", "keine Angabe"), class = "factor"), 
training_hours = c(63, 10, 46, 18, 55, 4, 324, 26, 140, 158
), education_detail = structure(c(8L, 7L, 7L, 21L, 8L, 22L, 
7L, 7L, 7L, 19L), levels = c("Graduate Arts", "Graduate Business Degree", 
"Graduate Humanities", "Graduate No Major", "Graduate no major discipline", 
"Graduate Other", "Graduate STEM", "High School", "keine Angabe", 
"Masters Arts", "Masters Business Degree", "Masters Humanities", 
"Masters No Major", "Masters no major discipline", "Masters Other", 
"Masters STEM", "Phd Arts", "Phd Business Degree", "Phd Humanities", 
"Phd Other", "Phd STEM", "Primary School"), class = "factor"), 
experience_detail = structure(c(23L, 23L, 23L, 23L, 23L, 
21L, 23L, 17L, 10L, 23L), levels = c("<1", ">20", "1", "10", 
"11", "12", "13", "14", "15", "16", "17", "18", "19", "2", 
"20", "3", "4", "5", "6", "7", "8", "9", "no relevant experience"
), class = "factor")), row.names = c(NA, -10L), class = c("tbl_df", 
"tbl", "data.frame"))


structure(list(enrollee_id = c(26270, 3166, 20087, 8518, 8899, 
25403, 14514, 3300, 10364, 5220), city_development_index = c(0.92, 
0.887, 0.698, 0.92, 0.92, 0.92, 0.624, 0.84, 0.926, 0.754), gender = structure(c(1L, 
2L, 2L, 2L, 4L, 2L, 2L, 4L, 4L, 2L), levels = c("Female", "Male", 
"Other", "keine Angabe"), class = "factor"), enrolled_university = structure(c(2L, 
2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L), levels = c("Full time course", 
"no_enrollment", "Part time course", "keine Angabe"), class = "factor"), 
company_size = structure(c(7L, 9L, 1L, 9L, 9L, 3L, 9L, 2L, 
5L, 9L), levels = c("<10", "10/49", "100-500", "1000-4999", 
"10000+", "50-99", "500-999", "5000-9999", "keine Angabe"
), class = "factor"), company_type = structure(c(2L, 7L, 
2L, 7L, 7L, 6L, 7L, 6L, 4L, 7L), levels = c("Early Stage Startup", 
"Funded Startup", "NGO", "Other", "Public Sector", "Pvt Ltd", 
"keine Angabe"), class = "factor"), last_new_job = structure(c(3L, 
1L, 1L, 1L, 6L, 1L, 6L, 3L, 5L, 4L), levels = c("1", "2", 
"3", "4", ">4", "never", "keine Angabe"), class = "factor"), 
training_hours = c(127, 36, 7, 39, 53, 168, 111, 52, 107, 
46), job_change = c("Interested", "Not interested", "Not interested", 
"Not interested", "Not interested", "Not interested", "Not interested", 
"Not interested", "Not interested", "Not interested"), education_detail = structure(c(3L, 
7L, 16L, 22L, 22L, 3L, 8L, 7L, 8L, 6L), levels = c("Graduate Arts", 
"Graduate Business Degree", "Graduate Humanities", "Graduate No Major", 
"Graduate no major discipline", "Graduate Other", "Graduate STEM", 
"High School", "keine Angabe", "Masters Arts", "Masters Business Degree", 
"Masters Humanities", "Masters No Major", "Masters no major discipline", 
"Masters Other", "Masters STEM", "Phd Arts", "Phd Business Degree", 
"Phd Humanities", "Phd Other", "Phd STEM", "Primary School"
), class = "factor"), experience_detail = structure(c(17L, 
5L, 18L, 23L, 23L, 14L, 23L, 8L, 5L, 2L), levels = c("<1", 
">20", "1", "10", "11", "12", "13", "14", "15", "16", "17", 
"18", "19", "2", "20", "3", "4", "5", "6", "7", "8", "9", 
"no relevant experience"), class = "factor")), row.names = c(NA, 
-10L), class = c("tbl_df", "tbl", "data.frame"), na.action = structure(c(`505` = 505L, 
`688` = 688L, `1355` = 1355L, `1498` = 1498L, `1594` = 1594L, 
`3607` = 3607L, `4897` = 4897L, `5743` = 5743L, `5863` = 5863L, 
`5908` = 5908L, `6377` = 6377L, `7449` = 7449L, `7578` = 7578L
), class = "omit"))  


structure(list(enrollee_id = c(402, 27107, 8722, 6588, 4167, 
19061, 17139, 14928, 10164, 8612), city_development_index = c(0.762, 
0.92, 0.624, 0.926, 0.92, 0.926, 0.624, 0.92, 0.926, 0.92), gender = structure(c(2L, 
2L, 4L, 2L, 4L, 2L, 4L, 2L, 2L, 4L), levels = c("Female", "Male", 
"Other", "keine Angabe"), class = "factor"), enrolled_university = structure(c(2L, 
2L, 1L, 2L, 2L, 2L, 3L, 2L, 2L, 2L), levels = c("Full time course", 
"no_enrollment", "Part time course", "keine Angabe"), class = "factor"), 
company_size = structure(c(1L, 6L, 9L, 2L, 6L, 3L, 7L, 3L, 
3L, 9L), levels = c("<10", "10/49", "100-500", "1000-4999", 
"10000+", "50-99", "500-999", "5000-9999", "keine Angabe"
), class = "factor"), company_type = structure(c(6L, 6L, 
7L, 6L, 6L, 6L, 6L, 6L, 6L, 7L), levels = c("Early Stage Startup", 
"Funded Startup", "NGO", "Other", "Public Sector", "Pvt Ltd", 
"keine Angabe"), class = "factor"), last_new_job = structure(c(5L, 
1L, 6L, 5L, 6L, 2L, 1L, 3L, 4L, 4L), levels = c("1", "2", 
"3", "4", ">4", "never", "keine Angabe"), class = "factor"), 
training_hours = c(18, 46, 26, 18, 106, 50, 148, 40, 42, 
50), job_change = c("Interested", "Interested", "Not interested", 
"Not interested", "Not interested", "Not interested", "Interested", 
"Not interested", "Interested", "Not interested"), education_detail = structure(c(7L, 
7L, 8L, 7L, 7L, 16L, 7L, 7L, 21L, 7L), levels = c("Graduate Arts", 
"Graduate Business Degree", "Graduate Humanities", "Graduate No Major", 
"Graduate no major discipline", "Graduate Other", "Graduate STEM", 
"High School", "keine Angabe", "Masters Arts", "Masters Business Degree", 
"Masters Humanities", "Masters No Major", "Masters no major discipline", 
"Masters Other", "Masters STEM", "Phd Arts", "Phd Business Degree", 
"Phd Humanities", "Phd Other", "Phd STEM", "Primary School"
), class = "factor"), experience_detail = structure(c(7L, 
20L, 23L, 10L, 3L, 5L, 8L, 2L, 2L, 23L), levels = c("<1", 
">20", "1", "10", "11", "12", "13", "14", "15", "16", "17", 
"18", "19", "2", "20", "3", "4", "5", "6", "7", "8", "9", 
"no relevant experience"), class = "factor")), row.names = c(NA, 
-10L), class = c("tbl_df", "tbl", "data.frame"), na.action = structure(c(`531` = 531L, 
`615` = 615L, `715` = 715L, `1000` = 1000L, `1148` = 1148L, `1318` = 1318L, 
`1416` = 1416L), class = "omit"))


> colnames(candidates_train)
[1] "enrollee_id"            "city_development_index" "gender"                 "enrolled_university"    "company_size"          
[6] "company_type"           "last_new_job"           "training_hours"         "job_change"             "education_detail"      
[11] "experience_detail"     
> colnames(candidates_var_train)
[1] "enrollee_id"                                  "city_development_index"                      
[3] "genderFemale"                                 "genderMale"                                  
[5] "genderOther"                                  "genderkeine Angabe"                                                 
[69] "experience_detail6"                           "experience_detail7"                          
[71] "experience_detail8"                           "experience_detail9"                          
[73] "experience_detailno relevant experience"
> colnames(candidates_predict_sparse)
[1] "enrollee_id"            "city_development_index" "gender"                 "enrolled_university"    "company_size"          
[6] "company_type"           "last_new_job"           "training_hours"         "education_detail"       "experience_detail"   


# arbitrary value to ensure model.matrix has a formula
candidates_predict$job_change <- 0
candidates_predict_dummied <- model.matrix(job_change ~ 0 + ., data = candidates_predict)
# Now you have the same structure and you can use it to predict:
> predict(xgb_model, candidates_predict_dummied)
[1]  0.3696896434  0.1225184500  0.0037288326 -0.0001312745 -0.1928645670 -0.0001312745 -0.2914776802  0.1280405670  0.3696896434
[10] -0.0001312745
