diff --git a/input/fsh/Alias.fsh b/input/fsh/Alias.fsh index 168e4063c..b5ffa481e 100644 --- a/input/fsh/Alias.fsh +++ b/input/fsh/Alias.fsh @@ -17,4 +17,5 @@ Alias: $mesh = urn:oid:2.16.840.1.113883.6.177 Alias: $ncpi-dob-method = https://nih-ncpi.github.io/ncpi-fhir-ig-2/CodeSystem/research-data-date-of-birth-method Alias: $ncit = http://purl.obolibrary.org/obo/ncit.owl +Alias: $edam = http://edamontology.org Alias: $ucum = http://unitsofmeasure.org \ No newline at end of file diff --git a/input/fsh/examples/files.fsh b/input/fsh/examples/files.fsh new file mode 100644 index 000000000..d9ce16110 --- /dev/null +++ b/input/fsh/examples/files.fsh @@ -0,0 +1,22 @@ +Instance: PT-006SP660 +InstanceOf: NcpiFile +Title: "Example file based on CBTN" +Usage: #example +Description: "Use case of file information from CBTN" +* identifier.value = "PT_006SP660" +* subject = Reference(GF_6BAD9S7D) +* description = "Annotated Variant Call" +* type = $edam#operation_3227 "Variant calling" +* extension[content-version].valueString = "V1" +* status = #current +* content[+] + * attachment.url = "s3://kf-strides-study-us-east-1-prd-sd-54g4wg4r/harmonized-data/family-variants/155bb529-2e7b-474f-ba24-cd0656d5f3d0.CGP.filtered.deNovo.vep.vcf.gz" + * extension[location-access].valueReference = Reference(kf-gru-dac-consent) +* extension[file-format].valueCodeableConcept.coding = $edam#format_3016 "VCF" +* extension[file-size] + * valueQuantity + * value = 1044770380 + * unit = "bytes" +* extension[hash] + * extension[hash-value].valueString = "8f107912d862cf91fbfb77bf9c1bab36-4" + * extension[hash-type].valueCode = #etag \ No newline at end of file diff --git a/input/fsh/modules/files.fsh b/input/fsh/modules/files.fsh new file mode 100644 index 000000000..9e0921ce2 --- /dev/null +++ b/input/fsh/modules/files.fsh @@ -0,0 +1,142 @@ +/* +Files Module profiles and logical Model +*/ + +Logical: CdmFile +Id: SharedDataModelFile +Title: "Shared Data Model for File" +Description: "The **Shared Data Model for File**" +* participantID 1..1 reference "The participant(s) for whom this file contains data" +* fileExternalID 0..1 string "A related identifier of this file" +* format 1..1 code "The file format used" +* location 1..* List "List of locations where this data can be accessed" +* location.uri 1..1 uri "The URI at which this data can be accessed" +* location.accessPolicy 0..* reference "If present, only those under the specific Access Policy can access the file in this location." +* fileSize 1..1 Quantity "The size of the file, e.g., in bytes." +* hash 0..* List "Provides a list of hashes for confirming file transfers" +* hash.type 0..1 code "Algorithm used to calculate the hash (and size, where applicable)" +* hash.value 1..1 string "Value of hashing the file" +* contentVersion 0..1 string "Version of the file content" +* description 0..1 string "A description of the file" +* type 1..1 code "The type of data contained in this file. Should be as detailed as possible, e.g., Whole Exome Variant Calls." +* relatedFile 0..1 List "Provides a reference to another file that is related to this one" +* relatedFile.file 0..1 reference "The file to which this related file is related" +* relatedFile.type 0..1 code "The relationship of the file to the parent file in reference" + +/* TODO Add Related file to metadata - AH 2024-07-30 */ + +CodeSystem: HashTypeCS +Id: example-hash-type-code-system +Title: "Hash Types Code System" +Description: "Algorithm used to calculate the hash (and size, where applicable)" +* #md5 "md5 hash type" +* #sha256 "sha256 hash type" +* #sha512 "sha512 hash type" +* #sha1 "sha1 hash type" +* #crc32 "crc32 hash type" +* #crc32c "crc32c hash type" +* #etag "etag hash type" + +CodeSystem: RelatedFileTypeCS +Id: related-file-type-code-system +Title: "Related File Type Code System" +Description: "Explains the relationship of this file to the file of reference" +* #index_of "Index of" +* #has_index "Has index" +* #data_dictionary_of "Data dictionary of" +* #has_data_dictionary "Has data dictionary" +* #plink-type-associated-files "Plink-type associated files" + +ValueSet: EDAMOntologyTerms +Id: edam-ontology-terms +Title: "Enumerations for the EDAM ontology" +Description: "Enumerations for the EDAM ontology" +* include codes from system $edam + +Extension: FileFormat +Id: file-format +Title: "The file format used" +Description: "The file format used" +* insert SetContext(DocumentReference) +* value[x] only CodeableConcept +* valueCodeableConcept from $edam (extensible) + +Extension: LocationAccess +Id: location-access +Title: "If present, only those under the specific Access Policy can access the file in this location." +Description: "If present, only those under the specific Access Policy can access the file in this location." +* insert SetContext(DocumentReference.content) +* value[x] only Reference +* valueReference ^short = "If present, only those under the specific Access Policy can access the file in this location." + +Extension: FileSize +Id: file-size +Title: "The size of the file, e.g., in bytes." +Description: "The size of the file, e.g., in bytes." +* insert SetContext(DocumentReference) +* value[x] only Quantity +* valueQuantity ^short = "Indicate the size of the file in reference" + +Extension: ContentVersion +Id: content-version +Title: "Version of the contents of the file" +Description: "Version of the contents of the file" +* insert SetContext(DocumentReference) +* value[x] only string +* valueString ^short = "Indicate the version (e.g., V1) for the contents of this file" + +Extension: HashValue +Id: hash-value +Title: "Value of hashing the file" +Description: "Value of hashing the file" +* insert SetContext(DocumentReference.extension) +* value[x] only string +* valueString ^short = "Value of hashing the file" + +Extension: HashType +Id: hash-type +Title: "Algorithm used to calculate the hash (and size, where applicable)" +Description: "Algorithm used to calculate the hash (and size, where applicable)" +* insert SetContext(DocumentReference.extension) +* value[x] only code +* valueCode ^short = "Algorithm used to calculate the hash (and size, where applicable)" + +Extension: HashExtension +Id: hash-extension +Title: "Provides a list of hashes for confirming file transfers" +Description: "Provides a list of hashes for confirming file transfers" +* insert SetContext(DocumentReference) +* extension contains HashValue named hash-value 1..1 +* extension[hash-value] ^short = "Value of hashing the file" +* extension contains HashType named hash-type 1..1 +* extension[hash-type] ^short = "Algorithm used to calculate the hash (and size, where applicable)" + +/** TODO Add Related file to metadata - AH 2024-07-30 */ + +Profile: NcpiFile +Parent: DocumentReference +Id: ncpi-file +Title: "NCPI File" +Description: "Information about a file related to a research participant" +* ^version = "0.0.1" +* ^status = #draft +* identifier 0..* /*File External ID*/ +* identifier ^short = "A related external file ID" +* subject 0..1 /*Participant*/ +* subject ^short = "The participant(s) for whom this file contains data (i.e., ParticipantID)" +* extension contains FileFormat named file-format 1..1 /*File Format*/ +* extension[file-format] ^short = "The file format used (EDAM is preferred)" +* content.attachment.url 1..1 /*Location uri*/ +* content.attachment.url ^short = "The URI at which this data can be accessed" +* extension contains LocationAccess named location-access 0..* /*Location Access Policy*/ +* extension[location-access] ^short = "If present, only those under the specific Access Policy can access the file in this location." +* extension contains FileSize named file-size 1..1 /*File Size*/ +* extension[file-size] ^short = "Indicate the size of the file in reference" +* extension contains HashExtension named hash 0..* /*Hash (contains type and value)*/ +* extension contains ContentVersion named content-version 0..1 /*Content Version*/ +* extension[content-version] ^short = "The version of the content in the file" +* description 0..1 /*Description*/ +* description ^short = "A description of the file" +* type 0..1 /*File Type*/ +* type from edam-ontology-terms (extensible) +* type ^short = "The type of data contained in this file." \ No newline at end of file diff --git a/input/pagecontent/StructureDefinition-SharedDataModelFile-intro.md b/input/pagecontent/StructureDefinition-SharedDataModelFile-intro.md new file mode 100644 index 000000000..e8613a977 --- /dev/null +++ b/input/pagecontent/StructureDefinition-SharedDataModelFile-intro.md @@ -0,0 +1,11 @@ +### NCPI File +#### Introduction +Files are a common research product. In this straightforward representation, we provide basic details of the file and how to access it. Details about what is contained in the file or how the content was generated should be described with other entities, such as data dictionaries, summaries, or assays. + +#### File Definitions +File contains basic file metadata about the file location and contents. Files are typically associated with one or more participants, though they can also include general study documents. The file content may have different access control restrictions when compared to this entity, which is only the file metadata. + +There can be multiple file location references, for example DRS and cloud storage references, though the access approaches for those locations should be reasonably apparent through the Access Policy for the file content. + +#### Example +If a data file is ONLY accessible through DRS, the underlying bucket locations should not be included here as no user would be able to access them directly. However, if there are multiple Access Policies that provide routes to access the data through different URIs, those can be included. Controlled access release via DRS with a consortium access model permitting direct bucket access could both be stated here to permit consistent reference to the File irrespective of the access mechanism. \ No newline at end of file diff --git a/input/pagecontent/StructureDefinition-ncpi-file-intro.md b/input/pagecontent/StructureDefinition-ncpi-file-intro.md new file mode 100644 index 000000000..414123d81 --- /dev/null +++ b/input/pagecontent/StructureDefinition-ncpi-file-intro.md @@ -0,0 +1,40 @@ +#### Key Guidelines +The NCPI File profile is based on the standard resource type, [DocumentReference](https://hl7.org/fhir/r4/documentreference.html) and is intended to represent the files associated with a participant in a research study. + +##### Added Profile Restrictions +In order to ensure that our resources are interoperable across studies, we have employed a number of restrictions that should make consuming Patient resources more consistent. + + +* participantID **should** be a globally unique identifier associated with the patient. This practice is intended to make constructing queries for the same patient compatible across different servers (such as QA vs PROD) but also to make the resource URLs more meaningful. + +* fileExternalID **should** have all appropriate Identifiers with a meaningful system/value pair. Such identifiers may include DbGAP accession IDs, global and external IDs, etc. + +* format and relatedFile.type **should** use [EDAM](https://edamontology.org/) terminology (i.e., codes) when available. Othe file type code systems are allowed if a suitable EDAM code does not exist. + + +#### Recommended Practices +TDOD: Write Recommended Practices + +##### FHIR Mappings +The following fields from the shared data model are to be mapped to the NCPI File as shown below: + +| **Logical Model Property** | **Cardinality** | **NCPI FHIR Mapping** | **Usage Guidance** | **Notes**| +participantID|1..1|identifier.value|The participant(s) for whom this file contains data| +fileExternalID|0..1|subject|A related identifier of this file| +format|1..1|extension[file-format].valueCodeableConcept.coding|The file format used| +location|1..*|content|List of locations where this data can be accessed| +location.uri|1..1|content.attachment.url|The URI at which this data can be accessed| +location.accessPolicy|0..*|content.extension[location-access].valueReference|If present, only those under the specific Access Policy can access the file in this location.| +fileSize|1..1|extension[file-size].valueQuantity.value, extension[file-size].valueQuantity.unit|The size of the file, e.g., in bytes.| +hash|0..*|extension[hash]|Provides a list of hashes for confirming file transfers| +hash.type|0..1|extension[hash-type].valueCode|Algorithm used to calculate the hash (and size, where applicable)| +hash.value|1..1|extension[hash-value].valueString|Value of hashing the file| +contentVersion|0..1|extension[content-version].valueString|Version of the file content| +description|0..1|description|A description of the file| +type|1..1|type|The type of data contained in this file. Should be as detailed as possible, e.g., Whole Exome Variant Calls.| +relatedFile|0..1|TODO|Provides a reference to another file that is related to this one| +relatedFile.file|0..1|TODO|The file to which this related file is related| +relatedFile.type|0..1|TODO|The relationship of the file to the parent file in reference| + +##### Note on Related Files +The fields of related files are under the scope of file metadata and that module has yet to be written at the time of the definition of Files. Once a file metadata module is written, the FHIR mappings for related file in this IG should be updated. \ No newline at end of file