r-数据包装:重塑



是否需要将数据从df1转换为df2?

a <- c("New Zealand","Afghanistan","Afghanistan" , "New Zealand",  "Afghanistan", "Australia" )
b <- c("Sri Lanka", "Zimbabwe" , "Zimbabwe",  "Sri Lanka", "Zimbabwe" , "India" )
d <- c("no result"  , "Zimbabwe"   , "Zimbabwe"    ,"New Zealand", "Afghanistan", "Australia" )
df1 <- data.frame("Team1" = a, "Team2" = b, "Winner" = d)
Country <- c("New Zealand", "Sri Lanka","Afghanistan","Zimbabwe", "Australia","India"   )
Match <- c(2,2,3,3,1,1)
Win <- c(1,0,1,2,1,0)
Loss <- c(0,1,2,1,0,1)
Draw <- c(1,1,0,0,0,0)
df2 <- data.frame(Country, Match,Win, Loss, Draw )

提前谢谢。

以下是使用data.table:的粗略概念

library(data.table)
df1_melted <- melt(setDT(df1), id.vars = "Winner", value.name = "Country")
df2b <- df1_melted[, 
                   .(Matches = .N, 
                     Win = sum(Winner == Country), 
                     Loss = sum(Winner != Country & Winner != "no result"),
                     Draw = sum(Winner == "no result")), 
                   by = Country]
df2b
       Country Matches Win Loss Draw
1: New Zealand       2   1    0    1
2: Afghanistan       3   1    2    0
3:   Australia       1   1    0    0
4:   Sri Lanka       2   0    1    1
5:    Zimbabwe       3   2    1    0
6:       India       1   0    1    0

使用dplyr 的结果相同

library(tidyverse)
a <- c("New Zealand","Afghanistan","Afghanistan" , "New Zealand",  "Afghanistan", "Australia" )
b <- c("Sri Lanka", "Zimbabwe" , "Zimbabwe",  "Sri Lanka", "Zimbabwe" , "India" )
d <- c("no result"  , "Zimbabwe"   , "Zimbabwe"    ,"New Zealand", "Afghanistan", "Australia" )
df1 <- data.frame("Team1" = a, "Team2" = b, "Winner" = d, stringsAsFactors = FALSE)

df1 %>% 
  gather(Team1, Team2, key = Team, value = Country) %>% 
  mutate(Result = replace(ifelse(Country == Winner, "Win", "Loss"), Winner == "no result", "Draw")) %>% 
  group_by(Country, Result) %>% 
  summarise(count = n()) %>% 
  spread(key = Result, value = count, fill = 0) %>% 
  mutate(Match = Win + Loss + Draw) %>% 
  select(Country, Match, Win, Loss, Draw)

# A tibble: 6 x 5
# Groups:   Country [6]
    Country     Match   Win  Loss  Draw
    <chr>       <dbl>  <dbl> <dbl> <dbl>
1 Afghanistan     3     1     2     0
2   Australia     1     1     0     0
3       India     1     0     1     0
4 New Zealand     2     1     0     1
5   Sri Lanka     2     0     1     1
6    Zimbabwe     3     2     1     0

这里有一个使用dplyr 的方法

tableresults <- function(team,df) {
  require(tidyverse)
  df2 <- df %>%
    filter(Team1 == team | Team2 == team) %>%
    mutate(win = ifelse(Winner == team,1,0),
       draw = ifelse(Winner == 'no result',1,0),
       loss = ifelse(!Winner %in% c('no result',team),1,0),
       country = team) %>%
    group_by(country) %>%
    summarize(match = n(),
          win = sum(win),
          loss = sum(loss),
          draw = sum(draw)) %>%
    ungroup()
  return(df2)
}
countries <- df1 %>% distinct(Team1,Team2) %>% gather() %>% pull(value)
results_tbl <- tibble()
for (i in 1:length(countries)) {
  country_tbl <- tableresults(countries[[i]],df1)
  results_tbl <- bind_rows(results_tbl,country_tbl)
}

结果:

> results_tbl
# A tibble: 6 x 5
  country     match   win  loss  draw
  <chr>       <int> <dbl> <dbl> <dbl>
1 New Zealand     2     1     0     1
2 Afghanistan     3     1     2     0
3 Australia       1     1     0     0
4 Sri Lanka       2     0     1     1
5 Zimbabwe        3     2     1     0
6 India           1     0     1     0

最新更新