我有一个Javascript对象格式的标记元素列表,我正在NodeJS项目中为其编写导入函数。列表中可能存在这些元素的重复项,因为该列表是来自不同源的列表的组合。
示例列表(这是test.json的内容):
[
//0 - first time this element appears on the list
{
name : "Name 1",
identifier : "string0001",
added_date : "1437013195",
tags : ["tag1", "tag2"]
},
//1 - same as 0 but the added_date is different and the name is different, an additional tag3 is present
{
name : "Name 2",
identifier : "string0001",
added_date : "1437082145",
tags : ["tag1", "tag3"]
},
//3 - a second unique element but it uses the same tags as 0
{
name : "Name 3",
identifier : "string0002",
added_date : "1358426363",
tags : ["tag1", "tag2"]
},
//4 - third unique element with a new tag tag4
{
name : "Name 4",
identifier : "string0003",
added_date : "1422912783",
tags : ["tag1", "tag4"]
},
// 5 - same element as 4, it was added before 4, it's tagged less than 4
{
name : "Name 4",
identifier : "string0003",
added_date : "1358426363",
tags : ["tag1"]
}
]
这里的唯一标识符是属性identifier
,并不关心名称是否不同。在元素0
和1
的情况下,我希望数据库中有一个:Element
节点。
我将有两个节点:
Element
拥有name
、identifier
和added_date
- 具有标记名称的
Tag
- 我的关系是:
Element
TAGGED_WITH
Tag
到目前为止,我在下面可怕的小脚本创建了一个由 3 个查询组成的查询,这些查询首先创建Element
,然后创建Tag
,然后将Element
与Tag
相关联并执行它。
我的脚本不做的是:
MERGE
仅使用标识符的Element
,它使用所有三个属性name
、identifier
和added_date
。- 它不会记录同一标识符是否具有多个名称(例如元素
0
和1
具有两个不同的名称,并且可以存储一组名称:{name: "Name 2", identifier:"string0001", added_date:"1437013195", all_names:["Name 1", "Name 2"]}
)。这并不重要,当我知道如何正确链接查询时,我会处理这个问题 - 同样,added_date属性也可以合并:
{name: "Name 2", identifier:"string0001", added_date:"1437013195", all_names:["Name 1", "Name 2"], all_added_dates: ["1437013195","1437082145"]}
一旦我学会了正确的链接,我将再次解决这个问题。 - 它不会在每个步骤中捕获错误
我的代码是:
var neo4j = require('neo4j-driver').v1;
// Create a driver instance, for the user neo4j with password neo4j.
// It should be enough to have a single driver per database per application.
var driver = neo4j.driver("bolt://localhost:7687", neo4j.auth.basic("neo4j", "123456"));
// Register a callback to know if driver creation was successful:
driver.onCompleted = function () {
// proceed with using the driver, it was successfully instantiated
console.log('successfully connected');
};
// Register a callback to know if driver creation failed.
// This could happen due to wrong credentials or database unavailability:
driver.onError = function (error) {
console.log('Driver instantiation failed', error);
};
// Create a session to run Cypher statements in.
// Note: Always make sure to close sessions when you are done using them!
var session = driver.session();
//console.log(session);
var test = require('./test.json');
for ( var element in test ) {
if (test.hasOwnProperty(element)) {
var obj = test[element];
var element_object = {name:'', identifier:'',add_date:''};
var tags;
for ( var prop in obj ) {
if (obj.hasOwnProperty(prop)) {
//console.log('obj.' + prop + ' = ' + obj[prop]);
if (prop === 'tags') {
tags = obj[prop];
} else {
element_object[prop] = obj[prop].replace(/["']/g, "\"");
}
}
}
console.log('gonna create this element', JSON.stringify(element_object));
console.log('tagged by', tags);
var q = 'MERGE (element:Element {identifier:"'+element_object.identifier+'", name:"'+element_object.name+'", add_date:"'+element_object.add_date+'"})n';
var q2 = '';
var q3 = '';
for(var i=0; i<tags.length;i++){
q2+= 'MERGE(tag'+i+':Tag {name:"'+tags[i]+'"})n';
q3+= 'MERGE(element)-[:TAGGED_WITH]->(tag'+i+')n';
}
q += q2;
q += q3;
q += ";";
console.log('query:', q);
session
.run(q)
.then( function(result) {
console.log('added element:',result);
driver.close();
})
.catch( function(error) {
console.log(error);
// Close the driver when application exits
driver.close();
})
}
}
我想为查询运行编写的是与此算法对应的承诺链:
- 是否已经有标识符
string0001
的元素? - 如果没有创建它;如果是,则使用它(在这里我可能会编写这些增强功能来记录所有其他
added_date
和name
属性)。现在我有一个参考element
- 是否有名为
tag1
的标签?创建或返回标记。现在我有一个参考tag
在 element
和tag
之间建立关系(如果该关系尚不存在)。
预期结果:
如果结果是 JavaScript 数组格式,则数据库中的结果可以可视化为以下内容:
[
{
name : "Name 2", //took the latest name on the list order, notice it is not Name 1 anymore
identifier : "string0001",
added_date : "1437082145", //took the latest added_date on the list order
tags : ["tag1", "tag2", "tag3"]
},
{
name : "Name 3",
identifier : "string0002",
added_date : "1358426363",
tags : ["tag1", "tag2"]
},
{
name : "Name 4",
identifier : "string0003",
added_date : "1358426363", //notice that the element 4 was added later than this element 5 but we took the
// older date because this was merged last. in other words it was the latest element
// with "string0003" in the list.
tags : ["tag1", "tag4"]
}
]
在我进行增强以支持所有名称和所有添加日期之后,它可能看起来像这样:
[
{
name : "Name 2", //took the newest name in terms of added_date
identifier : "string0001",
added_date : "1437082145", //took the greatest added_date
all_added_dates_and_names : [{'1437013195' : 'Name 1', '1437082145' : 'Name 2'}],
tags : ["tag1", "tag2", "tag3"]
},
{
name : "Name 3",
identifier : "string0002",
added_date : "1358426363",
tags : ["tag1", "tag2"]
},
{
name : "Name 4",
identifier : "string0003",
added_date : "1422912783", //took the greatest added_date
all_added_dates_and_names : [{'1422912783' : 'Name 4', '1358426363' : 'Name 4'}],
tags : ["tag1", "tag4"]
}
]
我一直在研究这些以找出最佳实践:
- https://github.com/sebinsua/neo4j-simple(让我知道这是否有任何好处,或者是否有更好的)
- https://neo4j.com/developer/javascript/
- https://neo4j.com/docs/api/javascript-driver/current/我在这里很困惑
- http://neo4j.com/docs/developer-manual/current/drivers/
- 如何在 neo4j 中进行嵌套查询 在这里查找查询
我的期望类似于 https://neo4j.com/developer/javascript/上的示例:
var neo4j = require('neo4j-driver').v1;
var driver = neo4j.driver("bolt://localhost:7687", neo4j.auth.basic("neo4j", "neo4j"));
var session = driver.session();
session
.run( "CREATE (a:Person {name: {name}, title: {title}})", {name: "Arthur", title: "King"})
.then( function()
{
return session.run( "MATCH (a:Person) WHERE a.name = {name} RETURN a.name AS name, a.title AS title",
{name: "Arthur"})
})
.then( function( result ) {
console.log( result.records[0].get("title") + " " + result.records[0].get("name") );
session.close();
driver.close();
});
但我希望它在每个步骤中捕获错误并添加想在我的 for 循环中参数化。
有一些复杂情况需要一些更改。
首先是迭代方法。这通常不适用于 Cypher,也不建议使用字符串连接来构造查询。相反,我建议参数化您的输入集合,并在查询中使用 UNWIND 将集合展开为行,以便您的整个 JSON 一次性得到处理。
第二个复杂因素是你的all_added_dates_and_names属性。Neo4j 目前不允许映射类型属性,也不允许映射类型属性的集合。其余选项是将映射转换为字符串,或将每个添加的名称转换为具有附加日期属性的连接节点。
您还需要 APOC 过程来尽可能轻松地构造查询,因为您需要使用集合联合函数。
这是一个应该可以工作的查询,尽管您需要将with ... as json
替换为引用您传入的 json 参数,unwind $json as row
。
with [
{
name : "Name 1",
identifier : "string0001",
added_date : "1437013195",
tags : ["tag1", "tag2"]
},
{
name : "Name 2",
identifier : "string0001",
added_date : "1437082145",
tags : ["tag1", "tag3"]
},
{
name : "Name 3",
identifier : "string0002",
added_date : "1358426363",
tags : ["tag1", "tag2"]
},
{
name : "Name 4",
identifier : "string0003",
added_date : "1422912783",
tags : ["tag1", "tag4"]
},
{
name : "Name 4",
identifier : "string0003",
added_date : "1358426363",
tags : ["tag1"]
}
] as json
unwind json as row
with row.identifier as identifier, max(toInt(row.added_date)) as latestDate,
collect({date:toInt(row.added_date), name:row.name}) as allDatesAndNames, collect(row.tags) as allTags
// now union all collections of tags per entry with the same identifier
with identifier, latestDate, allDatesAndNames,
reduce(tagSet = head(allTags), tags in allTags | apoc.coll.union(tagSet, tags)) as allTags
// now get the latest name corresponding with latest date
with identifier, latestDate, allDatesAndNames, allTags,
head([entry in allDatesAndNames where entry.date = latestDate | entry.name]) as latestName
// data pre-processed, now start the merge
merge (el:Element{identifier:identifier})
set el.added_date = latestDate, el.name = latestName
foreach (entry in allDatesAndNames |
merge (el)-[:NAME_CHANGE]->(:NameChange{date:entry.date, name:entry.name}))
foreach (tagName in allTags |
merge (tag:Tag{name:tagName})
merge (el)-[:TAGGED_WITH]->(tag))
如果需要考虑添加日期和名称,其中图形中的日期和名称比要添加的任何日期和名称都新,则可能需要合并 :NameChange 节点(忽略处理 latestDate 或 latestName 的查询的任何部分),然后在最后找到具有最新日期的 :NameChange 节点,并从该节点设置date_added和名称属性。