Easticsearch将多类型父/子索引(v5.0)重新索引为连接类型索引(v6.2)



我正在将索引数据从 ES 5.0(父子级(重新索引到 ES 6.2(连接类型(

索引

ES 5.0 中的数据作为父子文档以单独的类型存储,对于重新索引,我在新集群中基于 6.2 创建了新的索引/映射。

父文档完美地重新索引到新索引,但子文档抛出错误如下

{
  "index": "index_two",
  "type": "_doc",
  "id": "AVpisCkMuwDYFnQZiFXl",
  "cause": {
    "type": "mapper_parsing_exception",
    "reason": "failed to parse",
    "caused_by": {
      "type": "illegal_argument_exception",
      "reason": "[routing] is missing for join field [field_relationship]"
    }
  },
  "status": 400
}

我用来重新索引数据的脚本

  {
  "source": {
    "remote": {
      "host": "http://myescluster.com:9200",
      "socket_timeout": "1m",
      "connect_timeout": "20s"
    },
    "index": "index_two",
    "type": ["actions"],
    "size": 5000,
    "query":{
        "bool":{
            "must":[
                {"term": {"client_id.raw": "cl14ous0ydao"}}
            ]
        }
    }
  },
  "dest": {
    "index": "index_two",
    "type": "_doc"
  },
  "script": {
    "params": {
        "jdata": {
            "name": "actions"
        }
    },
    "source": "ctx._routing=ctx._routing;ctx.remove('_parent');params.jdata.parent=ctx._source.user_id;ctx._source.field_relationship=params.jdata"
  }
}

我已经以无痛脚本传递了路由字段,因为文档是从源索引动态的。

目标索引的映射

{
  "index_two": {
    "mappings": {
      "_doc": {
        "dynamic_templates": [
          {
            "template_actions": {
              "match_mapping_type": "string",
              "mapping": {
                "fields": {
                  "raw": {
                    "index": true,
                    "ignore_above": 256,
                    "type": "keyword"
                  }
                },
                "type": "text"
              }
            }
          }
        ],
        "date_detection": false,
        "properties": {
          "attributes": {
            "type": "nested"
          }
        },
        "cl_other_params": {
          "type": "nested"
        },
        "cl_triggered_ts": {
          "type": "date"
        },
        "cl_utm_params": {
          "type": "nested"
        },
        "end_ts": {
          "type": "date"
        },
        "field_relationship": {
          "type": "join",
          "eager_global_ordinals": true,
          "relations": {
            "users": [
              "actions",
              "segments"
            ]
          }
        },
        "ip_address": {
          "type": "ip"
        },
        "location": {
          "type": "geo_point"
        },
        "processed_ts": {
          "type": "date"
        },
        "processing_time": {
          "type": "date"
        },
        "products": {
          "type": "nested",
          "properties": {
            "traits": {
              "type": "nested"
            }
          }
        },
        "segment_id": {
          "type": "integer"
        },
        "start_ts": {
          "type": "date"
        }
      }
    }
  }
}

我的示例源文档

    {
    "_index": "index_two",
    "_type": "actions",
    "_id": "AVvKUYcceQCc2OyLKWZ9",
    "_score": 7.4023576,
    "_routing": "cl14ous0ydaob71ab2a1-837c-4904-a755-11e13410fb94",
    "_parent": "cl14ous0ydaob71ab2a1-837c-4904-a755-11e13410fb94",
    "_source": {
      "user_id": "cl14ous0ydaob71ab2a1-837c-4904-a755-11e13410fb94",
      "client_id": "cl14ous0ydao",
      "session_id": "CL-e0ec3941-6dad-4d2d-bc9b",
      "source": "betalist",
      "action": "pageview",
      "action_type": "pageview",
      "device": "Desktop",
      "ip_address": "49.35.14.224",
      "location": "20.7333 , 77",
      "attributes": [
        {
          "key": "url",
          "value": "https://www.google.com/",
          "type": "string"
        }
      ],
      "products": []
    }
  }

我遇到了同样的问题,在弹性搜索讨论中搜索时,我发现这有效:

POST_reindex

{
    "source": {
        "index": "old_index",
        "type": "actions"
    },
    "dest": {
        "index": "index_two"
    },
    "script": {
        "source": """
            ctx._type = "_doc";
            String  routingCode = ctx._source.user_id;
            Map join = new HashMap();
            join.put('name', 'actions');
            join.put('parent', routingCode);
            ctx._source.put('field_relationship', join);
            ctx._parent = null;
            ctx._routing = new StringBuffer(routingCode)"""
    }
}

希望这对:)有所帮助。

我想指出的是,联接字段通常不需要路由,但是如果您在创建父级之前创建子项,那么您将面临此问题。

建议先重新索引所有父母,然后再重新索引孩子。

相关内容

  • 没有找到相关文章

最新更新