From b7df41276dd379b140758973186c7ebc919e6347 Mon Sep 17 00:00:00 2001 From: Github aggregate action Date: Thu, 19 Dec 2024 13:04:28 +0000 Subject: [PATCH] Update from neicnordic/sensitive-data-archive at 13:04 on 2024-12-19 --- docs/dictionary/wordlist.txt | 112 +++++++------ docs/services/api.md | 305 +++++++++++++++++++++++++++++++++++ docs/services/sda.md | 1 - 3 files changed, 367 insertions(+), 51 deletions(-) create mode 100644 docs/services/api.md diff --git a/docs/dictionary/wordlist.txt b/docs/dictionary/wordlist.txt index 163bdc9..037470a 100644 --- a/docs/dictionary/wordlist.txt +++ b/docs/dictionary/wordlist.txt @@ -1,11 +1,52 @@ +ACCESSIONROUTING +AUTHURL +BIGINT +Bigpicture +CENTERPREFIX +CORS +Conf +DNS +DOCTYPE +ECDSA +EL +FK +FS +GRPC +HOSTKEY +INGESTROUTING +JWKPATH +JWTPUBKEYURL +MAPPINGROUTING +Mina's +NSS +NetworkPolicy +PEMKEYPASS +PEMKEYPATH +PGDATA +PREFETCHCOUNT +PRIVATEKEY +RBAC +RESIGNJWT +RegisterFile +SERVERCERT +SERVERKEY +SIGNATUREALG +SPRINGFRAMEWORK +SYNCPUBKEYPATH +SetAccessionID +TCP +TOKENTTL +UDP +URLSearchParams +XC aaf aai aaiconnectprofile abug accessionid accessionids -ACCESSIONROUTING accesskey +accesstoken ack'ed acked adminPassword @@ -15,27 +56,24 @@ aissue aj amqp amqps -apis apiVersion +apis appendChild assignees assigneesreencrypts atitle auth authURL -AUTHURL automagically autonumber backend backupArchive -backuppubkey backupRoutingKey +backuppubkey bbug bfac bigint -BIGINT bigpicture -Bigpicture blockquote bmi bugfix @@ -44,7 +82,6 @@ cacert ccacd cega cegamq -CENTERPREFIX centralega centralega's cgktxeg @@ -62,14 +99,13 @@ cmd cn commandline conf -Conf conffile config confpath const controlledaccessgrants copyheader -CORS +createAt createElement creds cryptographic @@ -83,8 +119,8 @@ datasetid datasetids datasets dbPassword -dbschema dbUser +dbschema ddfqevq decrypt decryptable @@ -95,20 +131,16 @@ decryptor dev discoverable dns -DNS -DOCTYPE doi dsn ebi ecc -ECDSA ef ega +ega's egac egaf -ega's egas -EL encodeURIComponent endcoordinate env @@ -117,17 +149,17 @@ erDiagram exportrequests federatedega fega +fileStatus fileid filepath filesystem fjddcmrvlawqmvrbly -FK formsubmission frontend -FS funders -getarchived +gRPC getElementById +getarchived getheader getheaderforstableid getinboxpath @@ -139,10 +171,7 @@ golangci gopath gradle grpc -gRPC -GRPC helpdesk -HOSTKEY hostname howto href @@ -150,8 +179,8 @@ htslib https ietf img +inboxPath incrementing -INGESTROUTING init initd initdb @@ -165,17 +194,15 @@ jku json jsonb jwk -JWKPATH jwks jwt jwtKey jwtPub +jwtSecret jwtpubeyurl jwtpubkeypath jwtpubkeyurl -JWTPUBKEYURL jwts -jwtSecret jwttoken keyfile keypair @@ -193,7 +220,6 @@ localmq logstash makefile mapfilestodataset -MAPPINGROUTING markcompleted markready matchLabels @@ -201,7 +227,6 @@ microservice microservices migratedb mina -Mina's minio miniostorage misspelled words: @@ -220,12 +245,10 @@ namespaceSelector nbis neic neicnordic -NetworkPolicy nginx nodeport notls nss -NSS oidc ol oldpath @@ -241,15 +264,12 @@ ou pacx params pem -PEMKEYPASS -PEMKEYPATH -PGDATA pgvolume phenome pkcs png -podman podSelector +podman policyTypes posix postgres @@ -257,14 +277,14 @@ postgresAdminPassword postgresql pre prefetchcount -PREFETCHCOUNT prepended -PRIVATEKEY programmatically +pubkey publickey querySelector rabbitmq rabbitmqctl +rbacFile readme readthedocs readypath @@ -277,11 +297,13 @@ reencrypted reencrypting reencrypts registerfile -RegisterFile repo reqs requesters -RESIGNJWT +reverification +roleBinding +rolebinding +rolename rootedfilesystem routingError routingkey @@ -290,25 +312,20 @@ samtools schemas sda sda's -secretkey secretName +secretkey sequenceDiagram servercert -SERVERCERT serverkey -SERVERKEY +setAttribute setaccessionid -SetAccessionID setarchived -setAttribute sftp sftpinbox sha -SIGNATUREALG smth somedir speciffic -SPRINGFRAMEWORK src sshd ssl @@ -322,21 +339,18 @@ submitters svc svg syncapi -SYNCPUBKEYPATH sysdevs tada -TCP testsuite +testuser thumbsup tls todo -TOKENTTL tryggve tsd ttl txt tz -UDP ui uio unencrypted @@ -344,7 +358,6 @@ unioslo updatedatasetevent uppsala uri -URLSearchParams useif userinfo usit @@ -359,7 +372,6 @@ vscqc wHPVQaYXmdDHg wjkbduq wyenrumyh -XC yaml yihkqimti yml diff --git a/docs/services/api.md b/docs/services/api.md new file mode 100644 index 0000000..abccbb8 --- /dev/null +++ b/docs/services/api.md @@ -0,0 +1,305 @@ +# API + +The API service provides data submitters with functionality to control +their submissions. Users are authenticated with a JWT. + +## Service Description + +Endpoints: + +- `/files` + 1. Parses and validates the JWT token against the public keys, either locally provisioned or from OIDC JWK endpoints. + 2. The `sub` field from the token is extracted and used as the user's identifier + 3. All files belonging to this user are extracted from the database, together with their latest status and creation date + + Example: + + ```bash + $ curl 'https://server/files' -H "Authorization: Bearer $token" + [{"inboxPath":"requester_demo.org/data/file1.c4gh","fileStatus":"uploaded","createAt":"2023-11-13T10:12:43.144242Z"}] + ``` + + If the `token` is invalid, 401 is returned. + +- `/datasets` + - accepts `GET` requests + - Returns all datasets, along with their status and last modified timestamp, for which the user has submitted data. + + - Error codes + - `200` Query execute ok. + - `400` Error due to bad payload. + - `401` Token user is not in the list of admins. + - `500` Internal error due to DB failures. + + Example: + + ```bash + $curl -H "Authorization: Bearer $token" -X GET https://HOSTNAME/datasets + [{"DatasetID":"EGAD74900000101","Status":"deprecated","Timestamp":"2024-11-05T11:31:16.81475Z"}] + ``` + +### Admin endpoints + +Admin endpoints are only available to a set of whitelisted users specified in the application config. + +- `/file/ingest` + - accepts `POST` requests with JSON data with the format: `{"filepath": "", "user": ""}` + - triggers the ingestion of the file. + + - Error codes + - `200` Query execute ok. + - `400` Error due to bad payload i.e. wrong `user` + `filepath` combination. + - `401` Token user is not in the list of admins. + - `500` Internal error due to DB or MQ failures. + + Example: + + ```bash + curl -H "Authorization: Bearer $token" -H "Content-Type: application/json" -X POST -d '{"filepath": "/uploads/file.c4gh", "user": "testuser"}' https://HOSTNAME/file/ingest + ``` + +- `/file/accession` + - accepts `POST` requests with JSON data with the format: `{"accession_id": "", "filepath": "", "user": ""}` + - assigns accession ID to the file. + + - Error codes + - `200` Query execute ok. + - `400` Error due to bad payload i.e. wrong `user` + `filepath` combination. + - `401` Token user is not in the list of admins. + - `500` Internal error due to DB or MQ failures. + + Example: + + ```bash + curl -H "Authorization: Bearer $token" -H "Content-Type: application/json" -X POST -d '{"accession_id": "my-id-01", "filepath": "/uploads/file.c4gh", "user": "testuser"}' https://HOSTNAME/file/accession + ``` + +- `/file/verify/:accession` + - accepts `PUT` requests with an accession ID as the last element in the query + - triggers re-verification of the file with the specific accession ID. + + - Error codes + - `200` Query execute ok. + - `404` Error due to non existing accession ID. + - `401` Token user is not in the list of admins. + - `500` Internal error due to DB or MQ failures. + + Example: + + ```bash + curl -H "Authorization: Bearer $token" -H "Content-Type: application/json" -X PUT -d '{"accession_id": "my-id-01", "filepath": "/uploads/file.c4gh", "user": "testuser"}' https://HOSTNAME/file/accession + ``` + +- `/file/:username/:fileid` + - accepts `DELETE` requests + - marks the file as `disabled` in the database, and deletes it from the inbox. + - The file is identified by its id, returned by `users/:username/:files` + + - Response codes + - `200` Query execute ok. + - `400` File id not provided + - `401` Token user is not in the list of admins. + - `404` File not found + - `500` Internal error due to Inbox, DB or MQ failures. + + Example: + + ```bash + curl -H "Authorization: Bearer $token" -X DELETE https://HOSTNAME/file/user@demo.org/123abc + ``` + +- `/dataset/create` + - accepts `POST` requests with JSON data with the format: `{"accession_ids": ["", ""], "dataset_id": "", "user": ""}` + - creates a dataset from the list of accession IDs and the dataset ID. + +- Error codes + - `200` Query execute ok. + - `400` Error due to bad payload. + - `401` Token user is not in the list of admins. + - `500` Internal error due to DB or MQ failures. + + Example: + + ```bash + curl -H "Authorization: Bearer $token" -H "Content-Type: application/json" -X POST -d '{"accession_ids": ["my-id-01", "my-id-02"], "dataset_id": "my-dataset-01"}' https://HOSTNAME/dataset/create + ``` + +- `/dataset/release/*dataset` + - accepts `POST` requests with the dataset name as last part of the path` + - releases a dataset so that it can be downloaded. + + - Error codes + - `200` Query execute ok. + - `400` Error due to bad payload. + - `401` Token user is not in the list of admins. + - `500` Internal error due to DB or MQ failures. + + Example: + + ```bash + curl -H "Authorization: Bearer $token" -X POST https://HOSTNAME/dataset/release/my-dataset-01 + ``` + +- `/dataset/verify/*dataset` + - accepts `PUT` requests with the dataset name as last part of the path` + - triggers reverification of all files in the dataset. + + - Error codes + - `200` Query execute ok. + - `404` Error wrong dataset name. + - `401` Token user is not in the list of admins. + - `500` Internal error due to DB or MQ failures. + + Example: + + ```bash + curl -H "Authorization: Bearer $token" -X PUT https://HOSTNAME/dataset/verify/my-dataset-01 + ``` + +- `/datasets/list` + - accepts `GET` requests + - Returns all datasets together with their status and last modified timestamp. + + - Error codes + - `200` Query execute ok. + - `400` Error due to bad payload. + - `401` Token user is not in the list of admins. + - `500` Internal error due to DB failures. + + Example: + + ```bash + $curl -H "Authorization: Bearer $token" -X GET https://HOSTNAME/datasets/list + [{"DatasetID":"EGAD74900000101","Status":"deprecated","Timestamp":"2024-11-05T11:31:16.81475Z"},{"DatasetID":"SYNC-001-12345","Status":"registered","Timestamp":"2024-11-05T11:31:16.965226Z"}] + ``` + +- `/datasets/list/:username` + - accepts `GET` requests with the username name as last part of the path` + - Returns all datasets, along with their status and last modified timestamp,for which the user has submitted data. + + - Error codes + - `200` Query execute ok. + - `400` Error due to bad payload. + - `401` Token user is not in the list of admins. + - `500` Internal error due to DB failures. + + Example: + + ```bash + curl -H "Authorization: Bearer $token" -X GET https://HOSTNAME/datasets/list/submission-user + [{"DatasetID":"EGAD74900000101","Status":"deprecated","Timestamp":"2024-11-05T11:31:16.81475Z"}] + ``` + +- `/users` + - accepts `GET` requests + - Returns all users with active uploads as a JSON array + + Example: + + ```bash + curl -H "Authorization: Bearer $token" -X GET https://HOSTNAME/users + ``` + + - Error codes + - `200` Query execute ok. + - `401` Token user is not in the list of admins. + - `500` Internal error due to DB failure. + +- `/users/:username/files` + - accepts `GET` requests + - Returns all files (that are not part of a dataset) for a user with active uploads as a JSON array + + Example: + + ```bash + curl -H "Authorization: Bearer $token" -X GET https://HOSTNAME/users/submitter@example.org/files + ``` + + - Error codes + - `200` Query execute ok. + - `401` Token user is not in the list of admins. + - `500` Internal error due to DB failure. + +- `/c4gh-keys/add` + - accepts `POST` requests with the hex hash of the key and its description + - registers the key hash in the database. + + - Error codes + - `200` Query execute ok. + - `400` Error due to bad payload. + - `401` Token user is not in the list of admins. + - `409` Key hash already exists in the database. + - `500` Internal error due to DB failures. + + Example: + + ```bash + curl -H "Authorization: Bearer $token" -H "Content-Type: application/json" -X POST -d '{"pubkey": "'"$( base64 -w0 /PATH/TO/c4gh.pub)"'", "description": "this is the key description"}' https://HOSTNAME/c4gh-keys/add + ``` + +#### Configure RBAC + +RBAC is configured according to the JSON schema below. +The path to the JSON file containing the RBAC policies needs to be passed through the `api.rbacFile` config definition. + +The `policy` section will configure access to the defined endpoints. Unless specific rules are set, an endpoint will not be accessible. + +- `action`: can be single string value i,e `GET` or a regex string with `|` as separator i.e. `(GET)|(POST)|(PUT)`. In the later case all actions in the list are allowed. +- `path`: the endpoint. Should be a string value with two different wildcard notations: `*`, matches any value and `:` that matches a specific named value +- `role`: the role that will be able to access the path, `"*"` will match any role or user. + +The `roles` section defines the available roles + +- `role`: rolename or username from the accesstoken +- `roleBinding`: maps a user/role to another role, this makes roles work as groups which simplifies the policy definitions. + +```json +{ + "policy": [ + { + "role": "admin", + "path": "/c4gh-keys/*", + "action": "(GET)|(POST)|(PUT)" + }, + { + "role": "submission", + "path": "/file/ingest", + "action": "POST" + }, + { + "role": "submission", + "path": "/file/accession", + "action": "POST" + }, + { + "role": "submission", + "path": "/users", + "action": "GET" + }, + { + "role": "submission", + "path": "/users/:username/files", + "action": "GET" + }, + { + "role": "*", + "path": "/files", + "action": "GET" + } + ], + "roles": [ + { + "role": "admin", + "rolebinding": "submission" + }, + { + "role": "dummy@example.org", + "rolebinding": "admin" + }, + { + "role": "test@example.org", + "rolebinding": "submission" + } + ] +} +``` diff --git a/docs/services/sda.md b/docs/services/sda.md index c62152d..9affbc0 100644 --- a/docs/services/sda.md +++ b/docs/services/sda.md @@ -23,4 +23,3 @@ There are also additional support services: 4. [s3inbox](s3inbox.md) proxies uploads to the an S3 compatible storage backend. 5. [sync](sync.md) mirrors ingested data between sites in the [Bigpicture](https://bigpicture.eu/) project. 6. [syncapi](syncapi.md) is used in the [Bigpicture](https://bigpicture.eu/) project for mirroring data between two installations of SDA. -7. [api](api.md) Client and Admin api to control the pipeline