在 r 中将多阶段事件数据转换为纵向数据

我有一个关于工作晋升的数据集。对于每个人，我都有关于他们何时被公司雇用、晋升和离开公司的确切数据。我也有一些特点。我正在使用 r。

SubjectID Entry     Stage1    Stage2    Stage3    Stage4   Exit     Race  Edu
1      1/12/1990 1/12/1990 1/12/1990  4/3/1994          5/5/1994 B     M
2      1/17/1991 1/17/1991 3/3/1991   3/18/1992 1/1/1993         W     C 
3      1/24/1991 1/24/1991 5/6/1994                              B     M

我想将这些数据转换为一个长纵向数据集，其中每个日期报告个人所处的阶段，同时还报告时间不变特征。我的结束日期是 1/1/1995，之后没有发生任何观察。我已经研究了重塑包，但它没有我需要的东西。

我的数据：

结构(列表(ï..名称 = 结构(c(2L， 1L， 4L， 3L(， .标签 = c("Ademulegun， Sauel Adesujo"， "Bassey， Wellington Umo"， "Imo， U. O."，"拉万，奥马尔"(，类="因素"(， Mons = c(0L， 0L， 0L， 0L(， Sandhurst = c(0L， 0L， 1L， 0L(，条目 = 结构(c(2L， 3L， 1L， 4L(， .标签 = c("2/6/1953"， "4/30/1949"、"6/11/1949"、"6/4/1955"(， class = "factor"(，二、中尉=结构(c(2L， 3L， 1L， 4L(， .标签 = c("2/6/1953"， "4/30/1949"、"6/11/1949"、"6/4/1955"(， class = "factor"(，中尉 = 结构(c(2L， 1L， 4L， 3L(， .标签 = c("12/20/1949"， "4/30/1949"、"5/3/1958"、"8/1/1955"(，类 = "因子"(，船长 = 结构(c(2L， 3L， 1L， 4L(， .标签 = c("， "2/7/1951"， "3/5/1952"， "5/3/1958"(，类 = "因子"(，主要 = 结构(c(4L， 3升、1升、2升(。标签 = c("， "1/15/1963"， "12/27/1958"， "6/21/1957" (， class = "factor"(，中校 = structure(c(4L， 3升、1升、2升(。标签 = c("， "1/15/1963"， "10/3/1962"， "8/30/1962" (，类 = "因子"(，上校 = 结构(c(3L， 2L， 1L， 1L ), .标签 = c("， "10/3/1962"， "2/26/1966"(， class = "factor"(，准将 = 结构(c(3L， 2L， 1L， 1L(， .标签 = c("， "10/3/1962"， "2/26/1966"(，类 = "因子"(， Depature = structure(c(2L， 1升、3升、4升(、.标签 = c("1/15/1966"， "11/1/1966"， "5/8/1956"， "7/6/1967"(，类 = "因素"(，种族 = 结构(c(1L， 4升、3升、2升(、.标签 = c("Efik"， "Igbo"， "Kanuri"， "Yoruba" (， class = "factor"((， class = "data.frame"， row.names = c(NA， -4升((

我正在寻找这样的东西：

Name  Date       Mons Sandhurst Ethnicity Rank
Bassey 4/30/1949  0     0       Efik      Lieutenant
Bassey 5/1/1949   0     0       Efik      Lieutenant
....
Bassey 2/7/1951   0     0       Efik      Captain

data.table solution

library(data.table)

示例数据

df <- structure(list(Name = structure(c(2L, 1L, 4L, 3L), .Label = c("Ademulegun, Sauel Adesujo", "Bassey, Wellington Umo", "Imo, U. O.", "Lawan, Umar"), class = "factor"), Mons = c(0L, 0L, 0L, 0L), Sandhurst = c(0L, 0L, 1L, 0L), Entry = structure(c(2L, 3L, 1L, 4L), .Label = c("2/6/1953", "4/30/1949", "6/11/1949", "6/4/1955"), class = "factor"), Second.Lieutenant = structure(c(2L, 3L, 1L, 4L), .Label = c("2/6/1953", "4/30/1949", "6/11/1949", "6/4/1955"), class = "factor"), Lieutenant = structure(c(2L, 1L, 4L, 3L), .Label = c("12/20/1949", "4/30/1949", "5/3/1958", "8/1/1955"), class = "factor"), Captain = structure(c(2L, 3L, 1L, 4L), .Label = c("", "2/7/1951", "3/5/1952", "5/3/1958"), class = "factor"), Major = structure(c(4L, 3L, 1L, 2L), .Label = c("", "1/15/1963", "12/27/1958", "6/21/1957" ), class = "factor"), Lieutenant.Colonel = structure(c(4L, 3L, 1L, 2L), .Label = c("", "1/15/1963", "10/3/1962", "8/30/1962" ), class = "factor"), Colonel = structure(c(3L, 2L, 1L, 1L ), .Label = c("", "10/3/1962", "2/26/1966"), class = "factor"), Brigadier.General = structure(c(3L, 2L, 1L, 1L), .Label = c("", "10/3/1962", "2/26/1966"), class = "factor"), Depature = structure(c(2L, 1L, 3L, 4L), .Label = c("1/15/1966", "11/1/1966", "5/8/1956", "7/6/1967"), class = "factor"), ethnicity = structure(c(1L, 4L, 3L, 2L), .Label = c("Efik", "Igbo", "Kanuri", "Yoruba" ), class = "factor")), class = "data.frame", row.names = c(NA, -4L))

使用数据快速重塑melt

library( data.table )
data.table::melt( data = setDT( df ),
id.vars = c("Name", "Mons", "Sandhurst", "ethnicity" ),
value.name = "Date",
variable.name = "Rank",
na.rm = TRUE )

结果

#                         Name Mons Sandhurst ethnicity               Rank       Date
# 1:    Bassey, Wellington Umo    0         0      Efik              Entry  4/30/1949
# 2: Ademulegun, Sauel Adesujo    0         0    Yoruba              Entry  6/11/1949
# 3:               Lawan, Umar    0         1    Kanuri              Entry   2/6/1953
# 4:                Imo, U. O.    0         0      Igbo              Entry   6/4/1955
# 5:    Bassey, Wellington Umo    0         0      Efik  Second.Lieutenant  4/30/1949
# 6: Ademulegun, Sauel Adesujo    0         0    Yoruba  Second.Lieutenant  6/11/1949
# 7:               Lawan, Umar    0         1    Kanuri  Second.Lieutenant   2/6/1953
# 8:                Imo, U. O.    0         0      Igbo  Second.Lieutenant   6/4/1955
# 9:    Bassey, Wellington Umo    0         0      Efik         Lieutenant  4/30/1949

根据需要重新排序...

注意：如果您希望"空"日期在熔融数据中消失，请确保在源数据的空白日期中NA的。na.rm = TRUE熔化功能，然后将它们移除。

相关内容

最新更新

热门标签：