From b3a4ad089217617fd15ecba40b2d5e392b14aec3 Mon Sep 17 00:00:00 2001 From: Adam Coffman Date: Fri, 25 Oct 2024 10:22:23 -0500 Subject: [PATCH] update clinical trial scraper to use new api, auto fetch titles and descriptions --- .../jobs/populate_clinical_trial_record.rb | 5 ++++ server/app/lib/scrapers/clinical_trial.rb | 27 +++++++++++++++++++ .../lib/scrapers/clinical_trial_response.rb | 24 +++++++++++++++++ server/app/models/clinical_trial.rb | 12 ++++++++- 4 files changed, 67 insertions(+), 1 deletion(-) create mode 100644 server/app/jobs/populate_clinical_trial_record.rb create mode 100644 server/app/lib/scrapers/clinical_trial.rb create mode 100644 server/app/lib/scrapers/clinical_trial_response.rb diff --git a/server/app/jobs/populate_clinical_trial_record.rb b/server/app/jobs/populate_clinical_trial_record.rb new file mode 100644 index 000000000..755335dbe --- /dev/null +++ b/server/app/jobs/populate_clinical_trial_record.rb @@ -0,0 +1,5 @@ +class PopulateClinicalTrialRecord < ApplicationJob + def perform(clinical_trial) + Scrapers::ClinicalTrial.populate_fields(clinical_trial) + end +end diff --git a/server/app/lib/scrapers/clinical_trial.rb b/server/app/lib/scrapers/clinical_trial.rb new file mode 100644 index 000000000..6a40ec8ac --- /dev/null +++ b/server/app/lib/scrapers/clinical_trial.rb @@ -0,0 +1,27 @@ +module Scrapers + module ClinicalTrial + def self.populate_fields(clinical_trial) + resp = call_clinical_trials_api(clinical_trial.nct_id) + clinical_trial.description = resp.description + clinical_trial.name = resp.name + clinical_trial.save + end + + def self.call_clinical_trials_api(nct_id) + http_resp = Util.make_get_request(url_for_nct_id(nct_id)) + ClinicalTrialResponse.new(http_resp) + end + + def self.url_for_nct_id(nct_id) + "https://clinicaltrials.gov/api/v2/studies/#{nct_id}?format=json&fields=#{fields}" + end + + def self.fields + [ + "protocolSection.identificationModule.nctId", + "protocolSection.identificationModule.briefTitle", + "protocolSection.descriptionModule.briefSummary" + ].join(",") + end + end +end diff --git a/server/app/lib/scrapers/clinical_trial_response.rb b/server/app/lib/scrapers/clinical_trial_response.rb new file mode 100644 index 000000000..0e6729458 --- /dev/null +++ b/server/app/lib/scrapers/clinical_trial_response.rb @@ -0,0 +1,24 @@ +module Scrapers + class ClinicalTrialResponse + attr_reader :json + def initialize(response_body) + @json = JSON.parse(response_body) + end + + def nct_id + json.dig("protocolSection", "identificationModule", "nctId") + end + + def name + json.dig("protocolSection", "identificationModule", "briefTitle") + end + + def description + if desc = json.dig("protocolSection", "descriptionModule", "briefSummary") + desc.squish + else + nil + end + end + end +end diff --git a/server/app/models/clinical_trial.rb b/server/app/models/clinical_trial.rb index d46abab0c..8eb19074e 100644 --- a/server/app/models/clinical_trial.rb +++ b/server/app/models/clinical_trial.rb @@ -1,15 +1,25 @@ class ClinicalTrial < ActiveRecord::Base has_and_belongs_to_many :sources + after_create :populate_additional_fields_if_needed + def self.url_for(nct_id:) if nct_id.blank? nil else - "https://clinicaltrials.gov/ct2/show/#{nct_id}" + "https://clinicaltrials.gov/study/#{nct_id}" end end def link Rails.application.routes.url_helpers.url_for("/clinical-trials/#{self.id}") end + + + private + def populate_additional_fields_if_needed + if self.name.blank? || self.description.blank? + PopulateClinicalTrialRecord.perform_later(self) + end + end end