r-在使用`recipes::step_dummy()`时,是否有方法指定引用变量



使用step_dummy()创建伪变量时,是否有方法指定引用级别?我可以通过设置one_hot = TRUE然后删除引用列来做到这一点,但不知道是否可以在step_dummy()本身中指定

library(tidymodels)
#> Registered S3 method overwritten by 'tune':
#>   method                   from   
#>   required_pkgs.model_spec parsnip
data(okc)
# level "anything" is the reference level
recipe(Class ~ ., data = okc) %>%
step_dummy(diet) %>%
prep() %>%
bake(new_data = NULL) %>%
select(starts_with("diet")) %>%
names()
#> Warning: There are new levels in a factor: NA
#>  [1] "diet_halal"               "diet_kosher"             
#>  [3] "diet_mostly.anything"     "diet_mostly.halal"       
#>  [5] "diet_mostly.kosher"       "diet_mostly.other"       
#>  [7] "diet_mostly.vegan"        "diet_mostly.vegetarian"  
#>  [9] "diet_other"               "diet_strictly.anything"  
#> [11] "diet_strictly.halal"      "diet_strictly.kosher"    
#> [13] "diet_strictly.other"      "diet_strictly.vegan"     
#> [15] "diet_strictly.vegetarian" "diet_vegan"              
#> [17] "diet_vegetarian"
# all 18 diet levels included
recipe(Class ~ ., data = okc) %>%
step_dummy(diet, one_hot = TRUE) %>%
prep() %>%
bake(new_data = NULL) %>%
select(starts_with("diet")) %>%
names()
#> Warning: There are new levels in a factor: NA
#>  [1] "diet_anything"            "diet_halal"              
#>  [3] "diet_kosher"              "diet_mostly.anything"    
#>  [5] "diet_mostly.halal"        "diet_mostly.kosher"      
#>  [7] "diet_mostly.other"        "diet_mostly.vegan"       
#>  [9] "diet_mostly.vegetarian"   "diet_other"              
#> [11] "diet_strictly.anything"   "diet_strictly.halal"     
#> [13] "diet_strictly.kosher"     "diet_strictly.other"     
#> [15] "diet_strictly.vegan"      "diet_strictly.vegetarian"
#> [17] "diet_vegan"               "diet_vegetarian"
# force diet_vegan to be reference level
recipe(Class ~ ., data = okc) %>%
step_dummy(diet, one_hot = TRUE) %>%
step_select(-diet_vegan) %>%
prep() %>%
bake(new_data = NULL) %>%
select(starts_with("diet")) %>%
names()
#> Warning: There are new levels in a factor: NA
#>  [1] "diet_anything"            "diet_halal"              
#>  [3] "diet_kosher"              "diet_mostly.anything"    
#>  [5] "diet_mostly.halal"        "diet_mostly.kosher"      
#>  [7] "diet_mostly.other"        "diet_mostly.vegan"       
#>  [9] "diet_mostly.vegetarian"   "diet_other"              
#> [11] "diet_strictly.anything"   "diet_strictly.halal"     
#> [13] "diet_strictly.kosher"     "diet_strictly.other"     
#> [15] "diet_strictly.vegan"      "diet_strictly.vegetarian"
#> [17] "diet_vegetarian"

创建于2021-11-19由reprex包(v2.0.1(

来自step_dummy((文档

默认情况下,排除的伪变量(即引用单元格(将对应于正在转换的无序因子的第一级。

我们可以使用step_relevel((通过设置ref_level参数来创建新的引用级别。

library(tidymodels)
data(okc)
recipe(Class ~ ., data = okc) %>%
step_relevel(diet, ref_level = "vegan") %>%
step_dummy(diet) %>%
prep() %>%
bake(new_data = NULL) %>%
select(starts_with("diet")) %>%
names()
#> Warning: There are new levels in a factor: NA
#>  [1] "diet_anything"            "diet_halal"              
#>  [3] "diet_kosher"              "diet_mostly.anything"    
#>  [5] "diet_mostly.halal"        "diet_mostly.kosher"      
#>  [7] "diet_mostly.other"        "diet_mostly.vegan"       
#>  [9] "diet_mostly.vegetarian"   "diet_other"              
#> [11] "diet_strictly.anything"   "diet_strictly.halal"     
#> [13] "diet_strictly.kosher"     "diet_strictly.other"     
#> [15] "diet_strictly.vegan"      "diet_strictly.vegetarian"
#> [17] "diet_vegetarian"

相关内容

  • 没有找到相关文章

最新更新