1、在现有节点中创建关系
from py2neo import NodeSelector,Graph,Node,Relationship
graph = Graph('http://127.0.0.1:7474/db/data')
tx = graph.begin()
selector = NodeSelector(graph)
node_cache = {}
with open("file","r") as relations:
for line in relations:
line_split=line.split(";")
# Check if we have this node in the cache
if line_split[0] in node_cache:
node1 = node_cache[line_split[0]]
else:
# Query and store for later
node1 = selector.select("Node",unique_name=line_split[0]).first()
node_cache[line_split[0]] = node1
if line_split[1] in node_cache:
node2 = node_cache[line_split[1]]
else:
node2 = selector.select("Node",unique_name=line_split[1]).first()
node_cache[line_split[1]] = node2
rs = Relationship(node1,"Relates to",node2)
tx.create(rs)
tx.commit()
2、在现有图数据库中添加节点和关系
查询数据库中的节点
# 用find_one()方法进行node查找,返回的是查找node的第一个node
data1 = graph.find_one(label='Person', property_key='name', property_value="Alice")
# 用find()方法进行node查找,需要遍历输出
data2 = graph.find(label='PersonTest')
matcher = NodeMatcher(graph)
print(matcher.match('subject').where('_.name='+"'"+s+"'").first())
#(_157:subject {name: 'Georgia'})
#若在数据库中找到节点则不创建新的节点,否则创建新的节点
if len(data2)>0:
pass
else:
graph.create(node)
关系查询
match(nodes=None, r_type=None, **properties)
relationship = graph.match_one(r_type='know')
# 若要新增的节点已存在,则判断关系。若新关系与现有关系不同,则添加关系。
mma = RelationshipMatcher(graph)
print(mma.match(r_type="'" + df_data['relation'][m] + "'").first())
#单条关系查询,效果与上面一样,命名已经创建了,但是返回都是None
rel = graph.match_one(r_type="'" + df_data['relation'][m] + "'")
print(rel)
#换种写法就好了
rel = graph.match_one(r_type='contains')
print(rel)
print(mma.match(r_type='contains').first())
# 是我多加了单引号,这下正常了
for m in range(len(df_data)):
print(df_data['relation'][m])
# rel = graph.match_one(r_type='contains')
# print(rel)
print(mma.match(r_type=df_data['relation'][m]).first())
思路:
1、要先将triple_result.json
三元组处理成df_data
,之后在创建关系时要用到对应位置的subject
和object
2、构建neo4j语义网络的程序
import json
import pandas as pd
from py2neo import Graph,Node,Relationship,NodeMatcher
graph = Graph("http://localhost:7474", username="neo4j", password="neoj4")
graph.delete_all()
matcher = NodeMatcher(graph)
'''
node1 = Node('animal',name = 'cat')
r1 = Relationship(node2,'has',node1)
graph.create(node1)
'''
def data():
# 去重之后的 665
triplea = []
# 去重之前的 1591
datas = json.load(open('test_triples.json'))
for data in datas:
for triples in data['triple_list']:
if triples in triplea:
pass
else:
triplea.append(triples)
links_dict = {}
subject_list = []
relation_list = []
object_list = []
for s, r, o in triplea:
subject_list.append(s)
relation_list.append(r)
object_list.append(o)
# print(len(subject_list)) #665
# print(len(relation_list))#665
# print(len(object_list)) #665
subject_list = [str(i) for i in subject_list]
relation_list = [str(i) for i in relation_list]
object_list = [str(i) for i in object_list]
# 将提取的三元组信息存到字典中,构成dataframe
links_dict['subject'] = subject_list
links_dict['relation'] = relation_list
links_dict['object'] = object_list
df_data = pd.DataFrame(links_dict)
# print(df_data)
# 只有结点需要去重
node_sub_key = list(set(subject_list))
node_obj_key = list(set(object_list))
return node_sub_key, node_obj_key, df_data
def create_node(nodes,nodeo):
for name in nodes:
s_node = Node('subject',name=name)
graph.create(s_node)
for name in nodeo:
o_node = Node('object',name=name)
graph.create(o_node)
def create_relation(df_data):
for m in range(0,len(df_data)):
try:
rel = Relationship(matcher.match('subject').where("_.name="+"'"+df_data['subject'][m]+"'").first(),
df_data['relation'][m],
matcher.match('object').where("_.name="+"'"+df_data['object'][m]+"'").first())
print('创建第%d个关系'%(m))
graph.create(rel)
except AttributeError as e:
print(e,m)
来源:
图挖掘算法总结:https://blog.csdn.net/siwangtt/article/details/109231135