MongoDB中使用MapReduce连接两个集合



我已经知道MongoDB不支持连接操作,但我必须模拟$lookup(从聚合框架)与mapReduce范式。

我的两个集合是:

// Employees sample 
{
  "_id" : "1234",
  "first_name" : "John",
  "last_name" : "Bush",
  "departments" : 
  [ 
    { "dep_id" : "d001", "hire_date" : "date001" },
    { "dep_id" : "d004", "hire_date" : "date004" }
  ]
}
{ 
  "_id" : "5678", 
  "first_name" : "Johny", 
  "last_name" : "Cash", 
  "departments" : [ { "dep_id" : "d001", "hire_date" : "date03" } ] 
}
{ 
  "_id" : "9012", 
  "first_name" : "Susan", 
  "last_name" : "Bowdy", 
  "departments" : [ { "dep_id" : "d004", "hire_date" : "date04" } ] 
}
// Departments sample 
{
  "_id" : "d001",
  "dep_name" : "Sales",
  "employees" : [ "1234", "5678" ]
},
{
  "_id" : "d004",
  "name" : "Quality M",
  "employees" : [ "1234", "9012" ]
}

实际上我希望得到这样的结果:

{
  "_id" : "1234",
  "value" : 
  {
    "first_name" : "John",
    "departments" :
    [
      { "dep_id" : "d001", "dep_name" : "Sales" },
      { "dep_id" : "d004", "dep_name" : "Quality M" }
    ]
  }
}
{ 
  "_id" : "5678", 
  "value" : 
  { 
    "first_name" : "Johnny", 
    "departments" : [ { "dep_id" : "d001", "dep_name" : "Sales" } ]
  } 
}
{ 
  "_id" : "9012", 
  "value" : 
  { 
    "first_name" : "Susan", 
    "departments" : [ { "dep_id" : "d004", "dep_name" : "Quality M" } ] 
  } 
}

常见字段为dep_id (Employees)和_id (Departments)。

我的代码是下一个,但它不工作,因为我需要。

var mapD = function() {
  for (var i=0; i<this.employees.length; i++) {
    emit(this.employees[i], { dep_id: 0, dep_name: this.dep_name });
  }
}
var mapE = function() {
  for (var i=0; i<this.departments.length; i++) {
    emit(this._id, { dep_id: this.departments[i].dep_id, dep_name: 0 });
  }
}
var reduceLookUp = function(key, values) {
  var result = {dep_id: 0, dep_name: 0};
  values.forEach(function(value) {
    if (value.dep_name !== null && value.dep_name !== undefined) {
      result.dep_name = values.dep_name;
    }
    if (value.dep_id !== null && value.dep_id !== undefined) {
      result.dep_id = value.dep_id;
    }
  });
  return result;
};
db.Departments.mapReduce(mapD, reduceLookUp, { out: { reduce: "joined" } });
db.Employees.mapReduce(mapE, reduceLookUp, { out: { reduce: "joined" } });
我真的很感激你的帮助!

在您的问题中,first_name只能从Employees集合中获取,dep_name只能从Departments集合中获取。

可以通过MapReduce和聚合框架实现。

1。MapReduce的解决方案

如果你修改你的map和reduce函数如下

var mapD = function() {
  for (var i=0; i<this.employees.length; i++)
    emit(this.employees[i], { dep_id: this._id, dep_name: this.dep_name });  
}
var mapE = function() { emit(this._id, { first_name: this.first_name }); }
var reduceLookUp = function(key, values) {
  var results = {};
  var departments = [];
  values.forEach(function(value) {
    var department = {};
    if (value.dep_id !== undefined) department["dep_id"] = value.dep_id;
    if (value.dep_name !== undefined) department["dep_name"] = value.dep_name;
    if (Object.keys(department).length > 0) departments.push(department);
    if (value.first_name !== undefined) results["first_name"] = value.first_name;
    if (value.departments !== undefined) results["departments"] = value.departments;
  });
  if (Object.keys(departments).length > 0) results["departments"] = departments;
  return results;
}

then first MapReduce call

db.Departments.mapReduce(mapD, reduceLookUp, { out: { reduce: "joined" } });

将插入joined collection

{ 
  "_id" : "1234", 
  "value" : 
  {
    "departments" : 
    [ 
      { "dep_id" : "d001", "dep_name" : "Sales" }, 
      { "dep_id" : "d004", "dep_name" : "Quality M" } 
    ] 
  }
}

第二次调用

db.Employees.mapReduce(mapE, reduceLookUp, { out: { reduce: "joined" } });

应该插入

{ "_id" : "1234", "value" : { "first_name" : "John" } }

但是,根据文档,reduce输出选项将

将输出集合中的新结果与现有结果合并已经存在。如果现有文档与新文档具有相同的键结果,将reduce函数应用于新数据和现有数据并使用

结果覆盖现有文档

因此,reduce函数将在您的情况下再次调用参数

key = "1234",
values =
[
  {
    "departments" : 
    [ 
      { "dep_id" : "d001", "dep_name" : "Sales" }, 
      { "dep_id" : "d004", "dep_name" : "Quality M" } 
    ] 
  },
  { "first_name" : "John" }
]

,最终结果为

{ 
  "_id" : "1234", 
  "value" : 
  { 
    "first_name" : "John", 
    "departments" : 
    [ 
      { "dep_id" : "d001", "dep_name" : "Sales" }, 
      { "dep_id" : "d004", "dep_name" : "Quality M" }
    ] 
  } 
}

2。聚合框架解决方案

一个更好的解决方案是使用聚合框架而不是Map-Reduce。这里您将使用 $lookup 阶段从Employees

获取一些数据
db.Departments.aggregate([
  { $unwind: "$employees" },
  { 
    $lookup: 
      { 
        from: "Employees", 
        localField: "employees", 
        foreignField: "_id", 
        as: "employee"
      }
  },
  { $unwind: "$employee" },
  { 
    $group: 
      { 
        "_id": "$employees",
        "first_name": { $first: "$employee.first_name" }, 
        "departments": { $push: { dep_id: "$_id", dep_name: "$dep_name" } } 
      } 
  } 
]);

将生成

{ 
  "_id" : "1234",
  "first_name" : "John",
  "departments" : 
    [ 
      { "dep_id" : "d001", "dep_name" : "Sales" }, 
      { "dep_id" : "d004", "dep_name" : "Quality M" } 
    ] 
}

相关内容

  • 没有找到相关文章

最新更新