我有一组从hadoop中提取的数据作为
1234567 ask 13
1234567 death 2
1234567 freek 15
1234567 hurt 2
1234567 pain 10
9999999 death 15
9999999 frerik 14
9999999 hurt 1
9999999 lisa 2
9999999 pain 12
我需要将其转换为的数据帧
death freek frerik hurt lisa pain
1234567 2 15 0 2 0 10
9999999 15 0 14 1 2 12
我应该用什么最好的方法。
另一种选择是使用library(reshape2)
(使用@Richard Scriven帖子中的示例df
)
library(reshape2)
as.data.frame(acast(df, V1~V2, value.var="V3", fill=0))
# ask death freek frerik hurt lisa pain
#1234567 13 2 15 0 2 0 10
#9999999 0 15 0 14 1 2 12
或使用tidyr
library(dplyr)
library(tidyr)
df %>%
spread(V2, V3, fill=0)
# V1 ask death freek frerik hurt lisa pain
#1 1234567 13 2 15 0 2 0 10
#2 9999999 0 15 0 14 1 2 12
我通过实现ARRAYLIST来存储A&B,并将它们复制到形成标题行和1列的2D矩阵中;B…
让我知道是否有更好的方式代码低于
CSVReader reader=null;
try { reader = new CSVReader(new FileReader(args[0]));
} catch (FileNotFoundException e) { e.printStackTrace();
}
String [] field_name;
ArrayList<String> listRows = new ArrayList<String>();
ArrayList<String> listCols = new ArrayList<String>();
ArrayList<String> report = new ArrayList<String>();
try {
String hold_var = new String();
String hold_var2 = new String();
while ((field_name = reader.readNext()) != null)
{
if(!hold_var.equals(field_name[0]))
{
listRows.add(field_name[0]);
report.add(field_name[2]);
// stores from 0
}
hold_var = field_name[0];
if(!hold_var2.equals(field_name[1]))
{
listCols.add(field_name[1]);
}
hold_var2 = field_name[1];
}
reader.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
String[][] table = new String[listRows.size()+1][listCols.size()+2];
table[0][0] = new String(" ");
int i=0;
for ( String str: listRows)
{
table[++i][0]= str ;
}
i=0;
for ( String str: report)
{
table[++i][1]= str ;
}
table[0][1]="report";
// leaving the first column to fill the fda codes
for (int cols=2;cols<=listCols.size();cols++)
{
table[0][cols] = listCols.get(cols-2); // we need to traverse from 0
}
try { reader = new CSVReader(new FileReader(args[0]));
} catch (FileNotFoundException e) { e.printStackTrace();
}
try {
while ((field_name = reader.readNext()) != null)
{
for ( int rows=0;rows<listRows.size();rows++)
{
if (!field_name[0].equals(table[rows+1][0]))
continue;
for (int cols=0;cols<listCols.size();cols++)
{
if(!field_name[1].equals(table[0][cols+2]))
continue;
table[rows+1][cols+2]=field_name[3].trim();
}
}
}
reader.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
PrintWriter pout = null;
try {
pout = new PrintWriter(args[1]);
} catch (FileNotFoundException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
for ( int rows=0;rows<listRows.size()+1;rows++)
{
for (int cols=0;cols<listCols.size()+1;cols++)
{
if(table[rows][cols] == null)
{
table[rows][cols]=new String("0");
}
// writing to a csv file.
pout.write(table[rows][cols] +",");
}
pout.write("n");
pout.flush();
}
System.out.print("done");
}
}