函数if else语句从不同的数据集读取



我对r很陌生,我试图让我的函数在用户输入日期(mm/dd/yy)和季节(yyyy/yy)时输出一个表,其中包含我在函数中指定的列。我对如何设置我的初始数据帧感到困惑。

所以有3个季节,我从(3个url)。我的IF语句应该读入一个df(叫它dfmess,因为它很乱)。我该怎么做呢,希望这是有意义的,有人可以帮助我。如果有人能帮我整理一下这个烂摊子,我将非常感激。

    Season_2015_16 <- read.csv("http://www.football-data.co.uk/mmz4281/1516/E0.csv", stringsAsFactors = FALSE)
    Season_2014_15 <- read.csv("http://www.football-data.co.uk/mmz4281/1415/E0.csv", stringsAsFactors = FALSE)
    Season_2013_14 <- read.csv("http://www.football-data.co.uk/mmz4281/1314/E0.csv", stringsAsFactors = FALSE)
    **dfmess <- read.csv("http://www.football-data.co.uk/mmz4281/1516/E0.csv", stringsAsFactors = FALSE)
    dfmess <- read.csv("http://www.football-data.co.uk/mmz4281/1415/E0.csv", stringsAsFactors = FALSE)
    dfmess <- read.csv("http://www.football-data.co.uk/mmz4281/1314/E0.csv", stringsAsFactors = FALSE)**
    EPL_Standings <- function(Standingdate, season){
      Standingdate <- as.Date(Standingdate, format = "%m/%d/%Y")
      Standingdate <- as.Date("08/30/15", format = "%m/%d/%y")
      if (season == "2015/16"){
        read.csv("http://www.football-data.co.uk/mmz4281/1516/E0.csv", stringsAsFactors = FALSE) -> dfmess
      } else if (season == "2014/15") {
        read.csv("http://www.football-data.co.uk/mmz4281/1415/E0.csv", stringsAsFactors = FALSE) -> dfmess
      } else if (season == "2013/14"){
        read.csv("http://www.football-data.co.uk/mmz4281/1314/E0.csv", stringsAsFactors = FALSE) -> dfmess
      }
    dfmess[dfmess$Date <= Standingdate, 2:6]
  #Team Record

  ddply(dfmess, 
        .(HomeTeam)
        , summarize
        , win = sum(FTR == "H")
        , Loss = sum(FTR == "A")
        , Draws = sum(FTR == "D")
  ) -> homewins1    #Homerecord
  ddply(dfmess, 
        .(AwayTeam)
        , summarize
        , win = sum(FTR == "A")
        , Loss = sum(FTR == "H")
        , Draws = sum(FTR == "D")
  ) -> awaywins1  #Awayrecord
  merge(homewins1, awaywins1, by.x = "HomeTeam", by.y = "AwayTeam", all = TRUE) -> homeawayrec
  #homeawayrec
  (homeawayrec$win.x + homeawayrec$win.y) -> homeawayrec$totalwins #adds away and home wins together, then puts on table. renamed column later
  (homeawayrec$Loss.x + homeawayrec$Loss.y) -> homeawayrec$Total.Losses #same as above, but Losses. Named consistently with what I changed too after I changed above column. 
  (homeawayrec$Draws.x + homeawayrec$Draws.y) -> homeawayrec$Total.Draws #same as above, but with Draws. 
  #names(homeawayrec) <- c("Teams","Home.Wins", "Home.Losses", "Home.Draws", "Away.Wins", "Away.Losses", "Away.Draws", "Total.Wins") #Name all columns. make sure name right thing. 
  #homeawayrec[, c("Teams", "Total.Wins", "Total.Draws", "Total.Losses")] -> WDL
  #Need to make home and away record, should be easy. 
  #homewins1 #homerecord
  #awaywins1 #awayrecord
  #Matches Played
  (homeawayrec$totalwins + homeawayrec$Total.Draws + homeawayrec$Total.Losses) -> homeawayrec$MatchesPlayed
  #homeawayrec
  #homeawayrec[, c("Teams", "MatchesPlayed")] -> Matches.Played
  #Matches.Played #number of matches played
  #POINTS (need to make it per match)
  #ddply(dfmess, 
  # .(HomeTeam)                    #DDPLY Points was eliminated because multiplying the wins by 3 and draws by 1 was easier. 
  #  , summarize
  #   , win = 3*(sum(FTR == "H"))
  #    , Loss = 0*(sum(FTR == "A"))
  #     , Draws = 1*(sum(FTR == "D"))
  #) -> HomePoints   #team points at home
  #ddply(dfmess, 
  #     .(AwayTeam)
  #    , summarize
  #   , win = 3*(sum(FTR == "A"))
  #  , Loss = 0*(sum(FTR == "H"))
  #      , Draws = 1*(sum(FTR == "D"))
  #) -> AwayPoints   #team points on road
  #merge(HomePoints, AwayPoints, by.x = "HomeTeam", by.y = "AwayTeam", all = TRUE) -> Pointshomeaway
  #(HomePoints$win +HomePoints$Draws + AwayPoints$win + AwayPoints$Draws) -> Pointshomeaway$TotalPoints
  #names(Pointshomeaway) <- c("Teams", "HomeWinPoints", "HomeLossPoints", "HomeDrawPoints", "AwayWinPoints", "AwayLossPoints", "AwayDrawPoints", "TotalPoints")
  #Pointshomeaway[, c("Teams", "TotalPoints")] -> Points.standings #table with just total points, total points standings. 
  #homeawayrec
  (3*(homeawayrec$totalwins)+(1*(homeawayrec$Total.Draws))) -> homeawayrec$Points
  #points per match and point percentage %
  #homeawayrec
  (3*(homeawayrec$totalwins)+(1*(homeawayrec$Total.Draws)))/(homeawayrec$MatchesPlayed) -> homeawayrec$PPM
  #homeawayrec[, c("Teams", "PointsPerMatch")] -> PointsPer.Standings #standings with teams and points per match. 
  #Point Percentage
  (homeawayrec$Points)/(3*(homeawayrec$MatchesPlayed)) -> homeawayrec$PtPct
  #homeawayrec
  #Goals Scored
  ddply(dfmess,                           #THIS WORKED, make sure HomeTeam and FTHG/AwayTeam and FTAG match up. Goals allowed will be the inverse for each. 
        .(HomeTeam)
        , summarize
        , goalsscored1 = sum(FTHG)
  ) -> Homegoalmade1
  ddply(dfmess, 
        .(AwayTeam)
        , summarize
        , goalsscored1 = sum(FTAG)
  ) -> Awaygoalsmade1
  merge(Homegoalmade1, Awaygoalsmade1, by.x = "HomeTeam", by.y = "AwayTeam", all = TRUE) -> goalsmade1 #merged home/away goalsmmade1 THIS WORKED
  (goalsmade1$goalsscored1.x + goalsmade1$goalsscored1.y) -> goalsmade1$allgoalsmade1 #total goalsmade
  #goalsmade1
  goalsmade1[, c("HomeTeam", "allgoalsmade1")] -> GS
  merge(homeawayrec, GS, by.x = "HomeTeam", by.y = "HomeTeam", all = TRUE) -> homeawayrec1
  #homeawayrec
  #homeawayrec1
  #Goals Scored Per Match
  ((homeawayrec1$allgoalsmade1)/(homeawayrec1$MatchesPlayed)) -> homeawayrec1$GSM
  #homeawayrec1
  #Goals Allowed
  ddply(dfmess,                           #THIS WORKED, make sure HomeTeam and FTAG/AwayTeam and FTHG match up. Goals scored will be the inverse for each. 
        .(HomeTeam)
        , summarize
        , goalsscored1 = sum(FTAG)
  ) -> Homegoalallowed1
  ddply(dfmess, 
        .(AwayTeam)
        , summarize
        , goalsscored1 = sum(FTHG)
  ) -> Awaygoalsallowed1
  merge(Homegoalallowed1, Awaygoalsallowed1, by.x = "HomeTeam", by.y = "AwayTeam", all = TRUE) -> goalsallowed1
  (goalsallowed1$goalsscored1.x + goalsallowed1$goalsscored1.y) -> goalsallowed1$GA
  #goalsallowed1
  goalsallowed1[, c("HomeTeam", "allgoalsallowed1")] -> GoalsAllowedall
  merge(homeawayrec1, goalsallowed1, by.x = "HomeTeam", by.y = "HomeTeam", all = TRUE) -> homeawayrec2
  #Goals Allowed Per Match
  ((homeawayrec2$GA)/homeawayrec2$MatchesPlayed) -> homeawayrec2$GAM
  names(homeawayrec2)
  #putting columns in correct order
  #if I rename columns the function gets confused and won't work?
  homeawayrec2[, c(1, 8, 10, 9, 2, 4, 3, 5, 7, 6, 11, 12, 13, 14, 15, 16, 19, 20)]
  #Last 10 matches played
  ((tail(homeawayrec2$totalwins, n=10)) (tail(homeawayrec2$Total.Draws, n=10)) (tail(homeawayrec2$Total.Losses, n=10)))
  tail(homeawayrec2[, c("totalwins", "Total.Draws", "Total.Losses")], n=10)
  homeawayrec2
  print(homeawayrec2)
  return(dfmess)
}
      ((homeawayrec2$GA)/homeawayrec2$MatchesPlayed) -> homeawayrec2$GAM
      names(homeawayrec2)
      homeawayrec2[, c(1, 8, 10, 9, 2, 4, 3, 5, 7, 6, 11, 12, 13, 14, 15, 16, 19, 20)]
      print(homeawayrec2)
      return(dfmess)
    }
    A <- EPL_Standings("09/26/14", "2013/14") #example numbers, placeholders. 
    head(A)

从这里开始。一般来说,最好将相似的数据保存在同一个表中。

library(dplyr)
library(lubridate)
library(magrittr)
data =
  data_frame(start_season = c(2013, 2014, 2015)) %>%
  mutate(url =
           (start_season - 2000) %>%
           paste0(., . + 1) %>%
           sprintf("http://www.football-data.co.uk/mmz4281/%s/E0.csv", . ) ) %>%
  group_by(start_season) %>%
  do(read.csv(.$url)) %>%
  mutate(Date = dmy(Date) %>% as.Date) %>%
  arrange(Date)
cutoffs =
  data %>%
  group_by(start_season) %>%
  summarize(start_date = first(Date))
EPL_Standings = function(specific_date) {
  specific_date = mdy(specific_date) %>% as.Date
  this_season_so_far = 
    cutoffs %>%
    filter(specific_date > start_date) %>%
    last %>%
    left_join(data) %>%
    filter(Date <= specific_date)
  # some sort of processing here
}

最新更新