r-用于强制转换数据的矢量化解决方案.能够为每个非空单元格设置长格式,并将列+行索引作为引用



我从excel表中读取了以下数据。表。

structure(list(AC = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, 280.905387337496, NA, 278.107886102079, NA, 273.477288966662, 
NA, 271.411774831244, NA, 273.638082495827, NA, 272.50162891041, 
NA, 280.674549724992, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA), AD = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, 275.737756776872, 
NA, 269.727645491455, NA, 266.930144256038, NA, 263.89490362062, 
NA, 263.621819435203, NA, 261.212208799786, NA, 261.633573914368, 
NA, 265.549082578951, NA, 261.625447343534, NA, 268.803616458116, 
NA, NA, NA, NA, NA, NA, NA, NA, NA), AE = c(NA, NA, NA, NA, NA, 
NA, 277.733589716248, NA, 270.562934780831, NA, 264.953226695414, 
NA, 258.852399059996, NA, 255.063274274579, NA, 250.388883039162, 
NA, NA, NA, 248.428790868327, NA, 251.88758963291, NA, 255.240031297492, 
NA, 258.895903512075, NA, 259.733312576658, NA, 264.890696791241, 
NA, NA, NA, NA, NA, NA), AF = c(NA, NA, NA, NA, NA, 280.641907320207, 
NA, 267.471460684789, NA, 269.244186599372, NA, 250.102101613955, 
NA, 245.493401528538, NA, 242.83666704312, NA, 240.439569007703, 
NA, 238.261441472286, NA, 239.827709486868, NA, 241.347055251451, 
NA, 248.193640466034, NA, 252.819239180617, NA, 257.366634145199, 
NA, 262.117358859782, NA, NA, NA, NA, NA), AG = c(NA, NA, NA, 
NA, 275.779900324165, NA, 265.215202638748, NA, 251.181386603331, 
NA, 243.322538667914, NA, 236.940177532496, NA, 232.863262947079, 
NA, 231.417122511662, NA, 229.620629276244, NA, 231.333920340827, 
NA, 233.78232665541, NA, 236.352730819992, NA, 242.010619034575, 
NA, 247.896862199158, NA, 254.787241513741, NA, 260.814251428323, 
NA, NA, NA, NA), AH = c(NA, NA, NA, 275.525658278124, NA, 262.261367142707, 
NA, 252.735215257289, NA, NA, NA, 230.470301436455, NA, 223.928404651038, 
NA, 220.39266001562, NA, 220.607567230203, NA, 222.001786994786, 
NA, 223.987227109368, NA, 228.712926623951, NA, 233.128939288534, 
NA, NA, NA, 244.669942517699, NA, 253.171319082282, NA, 260.011647996865, 
NA, NA, NA), AI = c(NA, NA, 282.635081332083, NA, 260.852837746665, 
NA, 250.416394211248, NA, 238.343928225831, NA, 228.376707190414, 
NA, 220.780623854996, NA, 216.303306069579, NA, 211.960498734162, 
NA, 212.769754448744, NA, 217.648733313327, NA, 223.14708587791, 
NA, 224.854120642492, NA, 231.669424357075, NA, 239.157280321658, 
NA, 245.750485386241, NA, 254.311296800823, NA, 260.441535665406, 
NA, NA), AJ = c(NA, 275.902440636041, NA, 263.470237400624, NA, 
250.209074415207, NA, 236.422382229789, NA, 226.336291494372, 
NA, 232.454017758955, NA, 211.735345173538, NA, 207.87740108812, 
NA, 205.076771702703, NA, 209.677345217286, NA, 213.211219581868, 
NA, 220.924302346451, NA, 226.000354661034, NA, 233.162883025617, 
NA, 240.888478390199, NA, 248.160492004782, NA, 255.157228419365, 
NA, 262.885951933947, NA), AK = c(275.94224469, NA, 263.456862904583, 
NA, 251.916182419165, NA, 239.543414033748, NA, 227.402128748331, 
NA, 220.710080762913, NA, NA, NA, 205.361506592079, NA, 201.609919606662, 
NA, 202.234614471244, NA, 206.181404635827, NA, 212.52122955041, 
NA, NA, NA, 224.972524429575, NA, 235.879448344158, NA, 245.969925108741, 
NA, 251.865552723323, NA, 259.625557737906, NA, 270.551250552489
), AL = c(NA, 266.993938658541, NA, 254.377174573124, NA, 242.248401887707, 
NA, 230.210345552289, NA, 220.593477316872, NA, 211.549060531455, 
NA, 208.279208646038, NA, 199.70714251062, NA, 197.219328125203, 
NA, 199.608299589786, NA, 206.755187204368, NA, 214.727906418951, 
NA, 221.439981183534, NA, 230.498168448116, NA, 239.859786262699, 
NA, 250.044107527282, NA, 256.896949041865, NA, 266.433743256447, 
NA), AM = c(267.9847003125, NA, 257.617076077083, NA, NA, NA, 
234.773454506248, NA, 224.221269420831, NA, 215.236287485413, 
NA, 216.001749099996, NA, 240.078184514579, NA, 195.038145679162, 
NA, NA, NA, 199.822344908327, NA, 209.29031982291, NA, 217.478881387492, 
NA, 225.761287452075, NA, 235.013420016658, NA, 240.539925931241, 
NA, NA, NA, 261.143179710406, NA, 271.408832874989), AN = c(NA, 
263.963230531041, NA, 251.818817095624, NA, 240.403262610207, 
NA, 228.756225024789, NA, 219.802524589372, NA, 220.561729903955, 
NA, 230.492671018538, NA, 196.73274108312, NA, 194.845531497703, 
NA, 195.767400612286, NA, 201.544158526868, NA, 230.178308491451, 
NA, 220.827333006034, NA, 231.299444070617, NA, 239.419186335199, 
NA, 250.066473799782, NA, 259.515679814365, NA, 270.191120628947, 
NA), AO = c(272.452167735, NA, 260.576775199583, NA, 245.435594064165, 
NA, 235.121148378748, NA, 225.438588993331, NA, 217.986400557913, 
NA, 218.379612322496, NA, 212.388269937079, NA, 214.577039801662, 
NA, 198.553720366244, NA, 204.327350130827, NA, 209.12186894541, 
NA, 222.431212059992, NA, 228.070331374575, NA, 240.991783889158, 
NA, 247.528682253741, NA, 258.373043168323, NA, 268.366547282906, 
NA, 277.843906647489), AP = c(NA, 271.180341803541, NA, 259.239258118124, 
NA, 244.310791182707, NA, 235.945182947289, NA, 224.714189311872, 
NA, 217.327692026455, NA, 210.863998991038, NA, 206.54308870562, 
NA, 206.661023270203, NA, 207.998936334786, NA, 211.429581749368, 
NA, 217.963351963951, NA, 226.036171978534, NA, 235.022411793116, 
NA, 247.168083107699, NA, 258.262696022282, NA, 269.213414036865, 
NA, 278.556262951447, NA), AQ = c(NA, NA, 265.332032422083, NA, 
254.889332586665, NA, 245.353796251248, NA, 236.181125315831, 
NA, 246.862822430414, NA, 220.754347394996, NA, 215.923548659579, 
NA, 215.375187274162, NA, NA, NA, 218.104191953327, NA, 221.05935856791, 
NA, 226.570223732492, NA, 237.214383047075, NA, 248.631195411658, 
NA, 262.907136876241, NA, 274.280155140823, NA, 280.288396155406, 
NA, NA), AR = c(NA, NA, NA, 263.925696040624, NA, 252.303683655207, 
NA, 243.440699569789, NA, NA, NA, 230.009681348955, NA, 225.379084213538, 
NA, 221.49298677812, NA, 222.258448392703, NA, 223.881023107286, 
NA, 227.865351071868, NA, 233.567033386451, NA, 240.823406251034, 
NA, NA, NA, 262.968837980199, NA, 274.582723794782, NA, 287.400947359365, 
NA, NA, NA), AS = c(NA, NA, NA, NA, 262.678895309165, NA, 254.335184123748, 
NA, 245.910141038331, NA, 239.365116102914, NA, 235.247535567496, 
NA, 231.226927682079, NA, 229.921553996662, NA, 230.167742711244, 
NA, 232.816350625827, NA, 239.98200714041, NA, 247.019409504992, 
NA, 255.849241819575, NA, 265.332857484158, NA, 275.798712248741, 
NA, 285.457504313323, NA, NA, NA, NA), AT = c(NA, NA, NA, NA, 
NA, 264.316248877707, NA, 257.226925842289, NA, 251.720446706872, 
NA, 244.484100621455, NA, 241.489525936038, NA, 236.97154220062, 
NA, 237.802694965203, NA, 242.043531229786, NA, 247.401117044368, 
NA, 252.452144158951, NA, 261.447768423534, NA, 269.483050638116, 
NA, 282.454553552699, NA, 292.948561667282, NA, NA, NA, NA, NA
), AU = c(NA, NA, NA, NA, NA, NA, 267.320603996248, NA, 261.507566160831, 
NA, 255.888473625413, NA, 252.030529539996, NA, 251.472783704579, 
NA, 250.708579969162, NA, NA, NA, 255.129913798327, NA, 261.06307921291, 
NA, 268.313195777492, NA, 276.135763792075, NA, 288.994653306658, 
NA, 297.358391271241, NA, NA, NA, NA, NA, NA), AV = c(NA, NA, 
NA, NA, NA, NA, NA, NA, NA, 266.701626279372, NA, 262.824913293955, 
NA, 262.920950808538, NA, 260.03898952312, NA, 261.182957287703, 
NA, 265.007749602286, NA, 269.689655016868, NA, 275.579026331451, 
NA, 283.470413646034, NA, 292.428500110616, NA, NA, NA, NA, NA, 
NA, NA, NA, NA), AW = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, 270.731079462496, NA, 272.475652027079, NA, 271.630116391662, 
NA, 273.868936656244, NA, 277.452861420827, NA, 283.75827668541, 
NA, 290.848857599992, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA)), row.names = c(NA, -37L), class = c("tbl_df", "tbl", 
"data.frame"))

有没有一种矢量化的方法可以将此表转换为以下格式?结果应该是一个包含两列("单元格"one_answers"值"(的新data.frame/data.table。在原始表中不是NA的每个单元格都将是转换后的表中的一行。列"value"的内容是值,列"cell"的内容应包括来自原始单元格(例如AC9(的连接列和行信息

structure(list(cell = c("AC13", "AC15", "AC17", "AC19", "AC21", 
"AC23", "AC25", "AD10", "AD12", "AD14", "AD16", "AD18", "AD20", 
"AD22", "AD24", "AD26", "AD28", "AE7", "AE9", "AE11", "AE13", 
"AE15", "AE17", "AE21", "AE23", "AE25", "AE27", "AE29", "AE31", 
"AF6", "AF8", "AF10", "AF12", "AF14", "AF16", "AF18", "AF20", 
"AF22", "AF24", "AF26", "AF28", "AF30", "AF32", "AG5", "AG7", 
"AG9", "AG11", "AG13", "AG15", "AG17", "AG19", "AG21", "AG23", 
"AG25", "AG27", "AG29", "AG31", "AG33", "AH4", "AH6", "AH8", 
"AH12", "AH14", "AH16", "AH18", "AH20", "AH22", "AH24", "AH26", 
"AH30", "AH32", "AH34", "AI3", "AI5", "AI7", "AI9", "AI11", "AI13", 
"AI15", "AI17", "AI19", "AI21", "AI23", "AI25", "AI27", "AI29", 
"AI31", "AI33", "AI35", "AJ2", "AJ4", "AJ6", "AJ8", "AJ10", "AJ12", 
"AJ14", "AJ16", "AJ18", "AJ20", "AJ22", "AJ24", "AJ26", "AJ28", 
"AJ30", "AJ32", "AJ34", "AJ36", "AK1", "AK3", "AK5", "AK7", "AK9", 
"AK11", "AK15", "AK17", "AK19", "AK21", "AK23", "AK27", "AK29", 
"AK31", "AK33", "AK35", "AK37", "AL2", "AL4", "AL6", "AL8", "AL10", 
"AL12", "AL14", "AL16", "AL18", "AL20", "AL22", "AL24", "AL26", 
"AL28", "AL30", "AL32", "AL34", "AL36", "AM1", "AM3", "AM7", 
"AM9", "AM11", "AM13", "AM15", "AM17", "AM21", "AM23", "AM25", 
"AM27", "AM29", "AM31", "AM35", "AM37", "AN2", "AN4", "AN6", 
"AN8", "AN10", "AN12", "AN14", "AN16", "AN18", "AN20", "AN22", 
"AN24", "AN26", "AN28", "AN30", "AN32", "AN34", "AN36", "AO1", 
"AO3", "AO5", "AO7", "AO9", "AO11", "AO13", "AO15", "AO17", "AO19", 
"AO21", "AO23", "AO25", "AO27", "AO29", "AO31", "AO33", "AO35", 
"AO37", "AP2", "AP4", "AP6", "AP8", "AP10", "AP12", "AP14", "AP16", 
"AP18", "AP20", "AP22", "AP24", "AP26", "AP28", "AP30", "AP32", 
"AP34", "AP36", "AQ3", "AQ5", "AQ7", "AQ9", "AQ11", "AQ13", "AQ15", 
"AQ17", "AQ21", "AQ23", "AQ25", "AQ27", "AQ29", "AQ31", "AQ33", 
"AQ35", "AR4", "AR6", "AR8", "AR12", "AR14", "AR16", "AR18", 
"AR20", "AR22", "AR24", "AR26", "AR30", "AR32", "AR34", "AS5", 
"AS7", "AS9", "AS11", "AS13", "AS15", "AS17", "AS19", "AS21", 
"AS23", "AS25", "AS27", "AS29", "AS31", "AS33", "AT6", "AT8", 
"AT10", "AT12", "AT14", "AT16", "AT18", "AT20", "AT22", "AT24", 
"AT26", "AT28", "AT30", "AT32", "AU7", "AU9", "AU11", "AU13", 
"AU15", "AU17", "AU21", "AU23", "AU25", "AU27", "AU29", "AU31", 
"AV10", "AV12", "AV14", "AV16", "AV18", "AV20", "AV22", "AV24", 
"AV26", "AV28", "AW13", "AW15", "AW17", "AW19", "AW21", "AW23", 
"AW25"), value = c(280.905387337496, 278.107886102079, 273.477288966662, 
271.411774831244, 273.638082495827, 272.50162891041, 280.674549724992, 
275.737756776872, 269.727645491455, 266.930144256038, 263.89490362062, 
263.621819435203, 261.212208799786, 261.633573914368, 265.549082578951, 
261.625447343534, 268.803616458116, 277.733589716248, 270.562934780831, 
264.953226695414, 258.852399059996, 255.063274274579, 250.388883039162, 
248.428790868327, 251.88758963291, 255.240031297492, 258.895903512075, 
259.733312576658, 264.890696791241, 280.641907320207, 267.471460684789, 
269.244186599372, 250.102101613955, 245.493401528538, 242.83666704312, 
240.439569007703, 238.261441472286, 239.827709486868, 241.347055251451, 
248.193640466034, 252.819239180617, 257.366634145199, 262.117358859782, 
275.779900324165, 265.215202638748, 251.181386603331, 243.322538667914, 
236.940177532496, 232.863262947079, 231.417122511662, 229.620629276244, 
231.333920340827, 233.78232665541, 236.352730819992, 242.010619034575, 
247.896862199158, 254.787241513741, 260.814251428323, 275.525658278124, 
262.261367142707, 252.735215257289, 230.470301436455, 223.928404651038, 
220.39266001562, 220.607567230203, 222.001786994786, 223.987227109368, 
228.712926623951, 233.128939288534, 244.669942517699, 253.171319082282, 
260.011647996865, 282.635081332083, 260.852837746665, 250.416394211248, 
238.343928225831, 228.376707190414, 220.780623854996, 216.303306069579, 
211.960498734162, 212.769754448744, 217.648733313327, 223.14708587791, 
224.854120642492, 231.669424357075, 239.157280321658, 245.750485386241, 
254.311296800823, 260.441535665406, 275.902440636041, 263.470237400624, 
250.209074415207, 236.422382229789, 226.336291494372, 232.454017758955, 
211.735345173538, 207.87740108812, 205.076771702703, 209.677345217286, 
213.211219581868, 220.924302346451, 226.000354661034, 233.162883025617, 
240.888478390199, 248.160492004782, 255.157228419365, 262.885951933947, 
275.94224469, 263.456862904583, 251.916182419165, 239.543414033748, 
227.402128748331, 220.710080762913, 205.361506592079, 201.609919606662, 
202.234614471244, 206.181404635827, 212.52122955041, 224.972524429575, 
235.879448344158, 245.969925108741, 251.865552723323, 259.625557737906, 
270.551250552489, 266.993938658541, 254.377174573124, 242.248401887707, 
230.210345552289, 220.593477316872, 211.549060531455, 208.279208646038, 
199.70714251062, 197.219328125203, 199.608299589786, 206.755187204368, 
214.727906418951, 221.439981183534, 230.498168448116, 239.859786262699, 
250.044107527282, 256.896949041865, 266.433743256447, 267.9847003125, 
257.617076077083, 234.773454506248, 224.221269420831, 215.236287485413, 
216.001749099996, 240.078184514579, 195.038145679162, 199.822344908327, 
209.29031982291, 217.478881387492, 225.761287452075, 235.013420016658, 
240.539925931241, 261.143179710406, 271.408832874989, 263.963230531041, 
251.818817095624, 240.403262610207, 228.756225024789, 219.802524589372, 
220.561729903955, 230.492671018538, 196.73274108312, 194.845531497703, 
195.767400612286, 201.544158526868, 230.178308491451, 220.827333006034, 
231.299444070617, 239.419186335199, 250.066473799782, 259.515679814365, 
270.191120628947, 272.452167735, 260.576775199583, 245.435594064165, 
235.121148378748, 225.438588993331, 217.986400557913, 218.379612322496, 
212.388269937079, 214.577039801662, 198.553720366244, 204.327350130827, 
209.12186894541, 222.431212059992, 228.070331374575, 240.991783889158, 
247.528682253741, 258.373043168323, 268.366547282906, 277.843906647489, 
271.180341803541, 259.239258118124, 244.310791182707, 235.945182947289, 
224.714189311872, 217.327692026455, 210.863998991038, 206.54308870562, 
206.661023270203, 207.998936334786, 211.429581749368, 217.963351963951, 
226.036171978534, 235.022411793116, 247.168083107699, 258.262696022282, 
269.213414036865, 278.556262951447, 265.332032422083, 254.889332586665, 
245.353796251248, 236.181125315831, 246.862822430414, 220.754347394996, 
215.923548659579, 215.375187274162, 218.104191953327, 221.05935856791, 
226.570223732492, 237.214383047075, 248.631195411658, 262.907136876241, 
274.280155140823, 280.288396155406, 263.925696040624, 252.303683655207, 
243.440699569789, 230.009681348955, 225.379084213538, 221.49298677812, 
222.258448392703, 223.881023107286, 227.865351071868, 233.567033386451, 
240.823406251034, 262.968837980199, 274.582723794782, 287.400947359365, 
262.678895309165, 254.335184123748, 245.910141038331, 239.365116102914, 
235.247535567496, 231.226927682079, 229.921553996662, 230.167742711244, 
232.816350625827, 239.98200714041, 247.019409504992, 255.849241819575, 
265.332857484158, 275.798712248741, 285.457504313323, 264.316248877707, 
257.226925842289, 251.720446706872, 244.484100621455, 241.489525936038, 
236.97154220062, 237.802694965203, 242.043531229786, 247.401117044368, 
252.452144158951, 261.447768423534, 269.483050638116, 282.454553552699, 
292.948561667282, 267.320603996248, 261.507566160831, 255.888473625413, 
252.030529539996, 251.472783704579, 250.708579969162, 255.129913798327, 
261.06307921291, 268.313195777492, 276.135763792075, 288.994653306658, 
297.358391271241, 266.701626279372, 262.824913293955, 262.920950808538, 
260.03898952312, 261.182957287703, 265.007749602286, 269.689655016868, 
275.579026331451, 283.470413646034, 292.428500110616, 270.731079462496, 
272.475652027079, 271.630116391662, 273.868936656244, 277.452861420827, 
283.75827668541, 290.848857599992)), row.names = c(NA, -301L), class = "data.frame")

目前,我使用两个嵌套循环来实现所需的结果:

cols <- ncol(dt)
rows <- nrow(dt)
transformedDT <- data.frame(cell=character(), value=numeric())
for (c in 1:cols) {
currentCol <- colnames(dt)[c]
for (r in 1:rows) {
curCell <- dt[[r, c]]
if (!is.na(curCell)){
transformedDT <- rbind(transformedDT, data.frame(cell=paste0(currentCol,r), value=curCell))
}
}
}

由于我有很多这样的文件要处理,所以执行时间需要改进。

data.table解决方案比dplyr解决方案更可取,因为我把data.table作为我的主力。

我们可以在一个新列中创建一个行索引,并以长格式获取数据。粘贴列名和行索引以创建cell列。

library(data.table)
setDT(df)[, row := seq_len(.N)]
df <- melt(df, na.rm = TRUE, id.vars = 'row', variable.name = 'cell')
df[, cell := paste0(cell, row)][, row := NULL]
df
#     cell    value
#  1: AC13 280.9054
#  2: AC15 278.1079
#  3: AC17 273.4773
#  4: AC19 271.4118
#  5: AC21 273.6381
# ---              
#297: AW17 271.6301
#298: AW19 273.8689
#299: AW21 277.4529
#300: AW23 283.7583
#301: AW25 290.8489

等效dplyr答案为:

library(dplyr)
df %>%
mutate(row = row_number()) %>%
tidyr::pivot_longer(cols = -row, names_to = 'cell', values_drop_na = TRUE) %>%
mutate(cell = paste0(cell, row)) %>%
select(-row)

使用基R的reshape。只需事先在列名中添加适当的前缀即可。

names(dat) <- paste0("value.", names(dat))
r <- reshape(transform(dat, id=rownames(dat)), timevar="cell", varying=names(dat), dir="l")
head(na.omit(r))  ## result
#       id cell    value
# 13.AC 13   AC 280.9054
# 15.AC 15   AC 278.1079
# 17.AC 17   AC 273.4773
# 19.AC 19   AC 271.4118
# 21.AC 21   AC 273.6381
# 23.AC 23   AC 272.5016

最新更新