Skip to content

Commit

Permalink
Merge pull request #57 from rlustemberg/feature/issue-56-neo4j-v5-com…
Browse files Browse the repository at this point in the history
…patibility

Resolves #56 Compatibility with NEO4J v5.x
  • Loading branch information
pgwilliams authored Jan 3, 2023
2 parents 86e7c33 + 4862a13 commit 850eb4a
Show file tree
Hide file tree
Showing 9 changed files with 165 additions and 119 deletions.
3 changes: 3 additions & 0 deletions NEO4J/Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ Requirements:

NOTE: the database load will fail if these requirements are not met.

#####ADDENDUM:
In order to support python version 3.9 and neo4j version 5.x, it is necessary to install the latest version of p2neo (2021.2.3) and make sure to have a symbolic link from python to python3

##### Syntax to create the NEO4J database from an RF2 release using the python scripts from this project:

```
Expand Down
12 changes: 7 additions & 5 deletions NEO4J/snomed_g_graphdb_cypher_add_assoc_refset.template
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,11 @@

// Create edges for association refset
RETURN 'Creating ASSOCIATION REFSET edges between ObjectConcept nodes';
USING PERIODIC COMMIT 200
LOAD csv with headers from "<<<file_protocol>>><<<output_dir>>>assoc_refset_new.csv" as line
with line
MATCH (s:ObjectConcept { sctid: line.referencedComponentId }), (d:ObjectConcept { sctid: line.targetComponentId })
WITH s,d,line
CREATE UNIQUE (s)-[:HAS_ASSOCIATION {association: line.association, refsetId: line.refsetId, active: line.active, effectiveTime: line.effectiveTime, referencedComponentId: line.referencedComponentId, targetComponentId: line.targetComponentId, moduleId: line.moduleId, id: line.id } ]->(d);
CALL {
with line
MATCH (s:ObjectConcept { sctid: line.referencedComponentId }), (d:ObjectConcept { sctid: line.targetComponentId })
WITH s,d,line
CREATE UNIQUE (s)-[:HAS_ASSOCIATION {association: line.association, refsetId: line.refsetId, active: line.active, effectiveTime: line.effectiveTime, referencedComponentId: line.referencedComponentId, targetComponentId: line.targetComponentId, moduleId: line.moduleId, id: line.id } ]->(d)
} IN TRANSACTIONS OF 200 ROWS;

72 changes: 42 additions & 30 deletions NEO4J/snomed_g_graphdb_cypher_create.template
Original file line number Diff line number Diff line change
Expand Up @@ -11,26 +11,26 @@

// NEXT STEP -- create INDEXES

CREATE CONSTRAINT ON (c:ObjectConcept) ASSERT c.id IS UNIQUE;
CREATE CONSTRAINT ON (c:ObjectConcept) ASSERT c.sctid IS UNIQUE;
CREATE CONSTRAINT FOR (c:ObjectConcept) REQUIRE c.id IS UNIQUE;
CREATE CONSTRAINT FOR (c:ObjectConcept) REQUIRE c.sctid IS UNIQUE;
// id,sctid index created, requiring uniqueness
// Note: Can't have "FSN is UNIQUE"" constraint, can have dups (inactive concepts)
// for example -- "retired procedure" is FSN of multiple inactive concepts
CREATE CONSTRAINT ON (c:Description) ASSERT c.id IS UNIQUE;
CREATE INDEX ON :Description(sctid);
CREATE CONSTRAINT FOR (c:Description) REQUIRE c.id IS UNIQUE;
CREATE INDEX FOR (x:Description) ON (x.sctid);
// need index so setting HAS_DESCRIPTION edges doesn't stall
// there can be more than one description for the same sctid, sctid not unique, but id is unique

// ROLE_GROUP nodes. Index needed for defining relationship assignment.
CREATE INDEX ON :RoleGroup(sctid);
CREATE INDEX FOR (x:RoleGroup) ON (x.sctid);

// NEXT STEP -- create CONCEPT nodes

RETURN 'Creating NEW ObjectConcept nodes';
USING PERIODIC COMMIT 200
LOAD csv with headers from "<<<file_protocol>>><<<output_dir>>>concept_new.csv" as line
with line
CREATE (:ObjectConcept
CALL {
with line
CREATE (n:ObjectConcept
{ nodetype: 'concept',
id: line.id,
sctid: line.id,
Expand All @@ -39,14 +39,17 @@ CREATE (:ObjectConcept
moduleId: line.moduleId,
definitionStatusId: line.definitionStatusId,
FSN: line.FSN,
history: line.history} );
history: line.history} )

} IN TRANSACTIONS OF 200 ROWS;

// NEXT STEP -- create DESCRIPTION nodes (info from Language+Description file)
RETURN 'Creating NEW Description nodes';
USING PERIODIC COMMIT 200

LOAD csv with headers from "<<<file_protocol>>><<<output_dir>>>descrip_new.csv" as line
with line
CREATE (:Description
CALL {
with line
CREATE (n:Description
{ nodetype:'description',
id: line.id,
sctid: line.sctid,
Expand All @@ -61,26 +64,31 @@ CREATE (:Description
refsetId: line.refsetId,
caseSignificanceId: line.caseSignificanceId,
languageCode: line.languageCode,
history: line.history} );
history: line.history} )

} IN TRANSACTIONS OF 200 ROWS;

// NEXT STEP - create DESCRIPTION edges
RETURN 'Creating HAS_DESCRIPTION edges for new Description nodes related to ObjectConcept nodes';
USING PERIODIC COMMIT 200

LOAD csv with headers from "<<<file_protocol>>><<<output_dir>>>descrip_new.csv" as line
with line
MATCH (c:ObjectConcept { sctid: line.sctid }), (f:Description { id: line.id })
MERGE (c)-[:HAS_DESCRIPTION]->(f);
CALL {
with line
MATCH (c:ObjectConcept { sctid: line.sctid }), (f:Description { id: line.id })
MERGE (c)-[:HAS_DESCRIPTION]->(f)
} IN TRANSACTIONS OF 200 ROWS;

// --------------------------------------------------------------------------------------
// NEXT STEP -- create ISA relationships
// --------------------------------------------------------------------------------------

RETURN 'Creating NEW ISA edges';
USING PERIODIC COMMIT 200

LOAD csv with headers from "<<<file_protocol>>><<<output_dir>>>isa_rel_new.csv" as line
with line
MATCH (c1:ObjectConcept { id: line.sourceId }), (c2:ObjectConcept { id: line.destinationId })
MERGE (c1)-[:ISA { id: line.id,
CALL {
with line
MATCH (c1:ObjectConcept { id: line.sourceId }), (c2:ObjectConcept { id: line.destinationId })
MERGE (c1)-[:ISA { id: line.id,
active: line.active,
effectiveTime: line.effectiveTime,
moduleId: line.moduleId,
Expand All @@ -89,27 +97,31 @@ MERGE (c1)-[:ISA { id: line.id,
characteristicTypeId: line.characteristicTypeId,
sourceId: line.sourceId,
destinationId: line.destinationId,
history: line.history }]->(c2);
history: line.history }]->(c2)
} IN TRANSACTIONS OF 200 ROWS;

// --------------------------------------------------------------------------------------
// NEXT STEP -- create RoleGroup nodes
// --------------------------------------------------------------------------------------
RETURN 'Creating RoleGroup nodes';
USING PERIODIC COMMIT 500
LOAD csv with headers from "<<<file_protocol>>><<<output_dir>>>rolegroups.csv" as line
with line
MERGE (rg:RoleGroup
CALL {
with line
MERGE (rg:RoleGroup
{ nodetype:'rolegroup',
sctid: line.sctid,
rolegroup: line.rolegroup});
rolegroup: line.rolegroup})
} IN TRANSACTIONS OF 500 ROWS;

// Add edge in 2nd step, Java memory issue
RETURN 'Creating HAS_ROLE_GROUP edges';
USING PERIODIC COMMIT 500
LOAD csv with headers from "<<<file_protocol>>><<<output_dir>>>rolegroups.csv" as line
with line
MATCH (c:ObjectConcept { sctid: line.sctid }), (rg:RoleGroup { sctid: line.sctid, rolegroup: line.rolegroup })
MERGE (c)-[:HAS_ROLE_GROUP]->(rg);
CALL {
with line
MATCH (c:ObjectConcept { sctid: line.sctid }), (rg:RoleGroup { sctid: line.sctid, rolegroup: line.rolegroup })
MERGE (c)-[:HAS_ROLE_GROUP]->(rg)
} IN TRANSACTIONS OF 500 ROWS;


// --------------------------------------------------------------------------------------
// NEXT STEP -- create Defining relationships
Expand Down
14 changes: 8 additions & 6 deletions NEO4J/snomed_g_graphdb_cypher_refset_assoc_create.template
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,19 @@

// Create edges for association refset
RETURN 'Creating ASSOCIATION REFSET edges between ObjectConcept nodes';
USING PERIODIC COMMIT 200
LOAD csv with headers from "<<<file_protocol>>><<<output_dir>>>assoc_refset_new.csv" as line
with line
MATCH (s:ObjectConcept { sctid: line.referencedComponentId }), (d:ObjectConcept { sctid: line.targetComponentId })
WITH s,d,line
MERGE (s)-[:HAS_ASSOCIATION {id: line.id,
CALL {
with line
MATCH (s:ObjectConcept { sctid: line.referencedComponentId }), (d:ObjectConcept { sctid: line.targetComponentId })
WITH s,d,line
MERGE (s)-[:HAS_ASSOCIATION {id: line.id,
association: line.association,
refsetId: line.refsetId,
active: line.active,
effectiveTime: line.effectiveTime,
referencedComponentId: line.referencedComponentId,
targetComponentId: line.targetComponentId,
moduleId: line.moduleId,
history: line.history } ]->(d);
history: line.history } ]->(d)
} IN TRANSACTIONS OF 200 ROWS;

16 changes: 10 additions & 6 deletions NEO4J/snomed_g_graphdb_cypher_rmv_problem_edges.template
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,17 @@
// --------------------------------------------------------------------

RETURN 'Removing defining-relationships (DRs) that changed source/destination';'
USING PERIODIC COMMIT 200
LOAD csv with headers from "<<<file_protocol>>><<<output_dir>>>defining_rel_chg.csv" as line
MATCH (rg:RoleGroup)-[r {id: line.id}]->(c:ObjectConcept)
DELETE r;
CALL {
MATCH (rg:RoleGroup)-[r {id: line.id}]->(c:ObjectConcept)
DELETE r;
} IN TRANSACTIONS OF 200 ROWS


RETURN 'Removing ISA relationships that changed the source or destination.';
USING PERIODIC COMMIT 200
LOAD csv with headers from "<<<file_protocol>>><<<output_dir>>>defining_rel_chg.csv" as line
MATCH (b:ObjectConcept)-[r {id: line.id}]->(c:ObjectConcept)
DELETE r;
CALL {
MATCH (b:ObjectConcept)-[r {id: line.id}]->(c:ObjectConcept)
DELETE r
} IN TRANSACTIONS OF 200 ROWS;

Loading

0 comments on commit 850eb4a

Please sign in to comment.