Skip to content

Commit

Permalink
Merge pull request #1294 from NEU-Libraries/feature/load_report_obj_a…
Browse files Browse the repository at this point in the history
…nd_spec

Feature/load report obj and spec
  • Loading branch information
dgcliff authored Oct 10, 2024
2 parents ddfc629 + bcdbd38 commit 2936070
Show file tree
Hide file tree
Showing 23 changed files with 353 additions and 148 deletions.
2 changes: 1 addition & 1 deletion .atlas
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.0.156
0.0.157
2 changes: 1 addition & 1 deletion .version
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.8.1
2.8.8
1 change: 1 addition & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ group :development, :test do
# See https://guides.rubyonrails.org/debugging_rails_applications.html#debugging-with-the-debug-gem
gem 'debug', platforms: %i[mri mingw x64_mingw]
gem 'faker'
gem 'fix-db-schema-conflicts'
gem 'rspec'
gem 'rspec-rails'

Expand Down
9 changes: 8 additions & 1 deletion Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ GEM
addressable (2.8.7)
public_suffix (>= 2.0.2, < 7.0)
ast (2.4.2)
atlas_rb (0.0.61)
atlas_rb (0.0.64)
faraday (~> 2.7)
faraday-follow_redirects (~> 0.3.0)
faraday-multipart (~> 1)
Expand Down Expand Up @@ -178,6 +178,8 @@ GEM
faraday-net_http (3.3.0)
net-http
ffi (1.17.0-x86_64-linux-gnu)
fix-db-schema-conflicts (3.1.1)
rubocop (>= 0.38.0)
fugit (1.11.1)
et-orbi (~> 1, >= 1.2.11)
raabro (~> 1.4)
Expand Down Expand Up @@ -341,6 +343,9 @@ GEM
actionpack (>= 5.2)
railties (>= 5.2)
rexml (3.3.7)
roo (2.10.1)
nokogiri (~> 1)
rubyzip (>= 1.3.0, < 3.0.0)
rsolr (2.6.0)
builder (>= 2.1.2)
faraday (>= 0.9, < 3, != 2.0.0)
Expand Down Expand Up @@ -457,6 +462,7 @@ DEPENDENCIES
enumerations
factory_bot_rails
faker
fix-db-schema-conflicts
good_job
hamlit
importmap-rails
Expand All @@ -469,6 +475,7 @@ DEPENDENCIES
puma
rails
rails-controller-testing
roo
rsolr (>= 1.0, < 3)
rspec
rspec-rails
Expand Down
88 changes: 62 additions & 26 deletions app/controllers/loads_controller.rb
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
# frozen_string_literal: true

require 'zip'
require 'roo'

# There needs to be work done on ingests and whether or not they go into load_report
# as of right now as long as the files seem valid they make it into ingest and then attempt to load
# but that makes the success rate of load_report 100% pretty much always (not always correct).
# Also with some things it will look like it failed but all valid jobs haven't
class LoadsController < ApplicationController
def index
@ingests = Ingest.order(created_at: :desc)
@load_reports = LoadReport.order(created_at: :desc)
end

def show
@load_report = LoadReport.find(params[:id])
end

def create
Expand All @@ -25,39 +30,70 @@ def create
private

def process_zip(zip)
Zip::File.open(zip) do |zip_file|
manifest_file = zip_file.find_entry('manifest.xlsx')
if manifest_file
process_spreadsheet(manifest_file, zip_file)
else
redirect_to loads_path, alert: 'Manifest file not found in ZIP.'
failures = []
load_report = nil

begin
Zip::File.open(zip) do |zip_file|
manifest_file = zip_file.find_entry('manifest.xlsx')
if manifest_file
load_report = process_spreadsheet(manifest_file, zip_file, failures)
else
failures << 'Manifest file not found in ZIP.'
end
end
rescue Zip::Error => e
failures << "Error processing ZIP file: #{e.message}"
end

if failures.empty?
load_report&.finish_load
redirect_to loads_path, notice: 'ZIP file processed successfully.'
else
load_report&.fail_load
redirect_to loads_path, alert: "Errors occurred during processing: #{failures.join(', ')}"
end
rescue Zip::Error => e
redirect_to loads_path, alert: "Error processing ZIP file: #{e.message}"
end

def process_spreadsheet(xlsx_file, zip_file)
def process_spreadsheet(xlsx_file, zip_file, failures)
spreadsheet_content = xlsx_file.get_input_stream.read
spreadsheet = Roo::Spreadsheet.open(StringIO.new(spreadsheet_content), extension: :xlsx)
load_report = LoadReport.create!(status: :in_progress)
load_report.start_load

spreadsheet.each_with_index do |row, index|
next if index.zero?
pid = row[0]
file_name = row[1]
if pid && file_name
xml_entry = zip_file.find_entry(file_name)
if xml_entry
ingest = Ingest.create_from_spreadsheet_row(row)
xml_content = xml_entry.get_input_stream.read
UpdateMetadataJob.perform_later(pid, xml_content, ingest.id)
header_row = spreadsheet.row(1)
header_hash = {}
header_row.each_with_index do |cell, index|
header_hash[cell] = index
end
if header_hash.key?("PIDs") && header_hash.key?("MODS XML File Path")
pid_column = header_hash["PIDs"]
file_path_column = header_hash["MODS XML File Path"]
spreadsheet.each_with_index do |row, index|
next if index.zero?
pid = row[pid_column]
file_name = row[file_path_column]
if pid && file_name
xml_entry = zip_file.find_entry(file_name)
if xml_entry
ingest = Ingest.create_from_spreadsheet_row(pid, file_name, load_report.id)
xml_content = xml_entry.get_input_stream.read
UpdateMetadataJob.perform_later(pid, xml_content, ingest.id)
else
failures << "#{file_name} file not found in ZIP"
end
else
redirect_to loads_path, alert: "#{file_name} file not found in ZIP: "; return
failures << "Missing PID or filename in row #{index + 1}"
end
end

load_report
else
failures << "Cannot find header labels"
load_report
end
redirect_to loads_path, notice: 'ZIP file processed successfully.'
rescue StandardError => e
redirect_to loads_path, alert: "Error processing spreadsheet: #{e.message}"
failures << "Error processing spreadsheet: #{e.message}"
load_report
end
end
13 changes: 8 additions & 5 deletions app/models/ingest.rb
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
# frozen_string_literal: true

class Ingest < ApplicationRecord
enum status: { pending: 0, completed: 1, failed: 2 }
belongs_to :load_report

enum :status, { pending: 0, completed: 1, failed: 2 }

validates :pid, presence: true
validates :xml_filename, presence: true
validates :status, presence: true

def self.create_from_spreadsheet_row(row)
def self.create_from_spreadsheet_row(pid, file_name, load_report_id)
create!(
pid: row[0],
xml_filename: row[1],
status: :pending
pid: pid,
xml_filename: file_name,
status: :pending,
load_report: LoadReport.find(load_report_id)
)
end
end
26 changes: 26 additions & 0 deletions app/models/load_report.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# frozen_string_literal: true

class LoadReport < ApplicationRecord
has_many :ingests

enum :status, { in_progress: 0, completed: 1, failed: 2 }

validates :status, presence: true

def start_load
update(status: :in_progress, started_at: Time.now)
end

def finish_load
update(status: :completed, finished_at: Time.now)
end

def fail_load
update(status: :failed, finished_at: Time.now)
end

def success_rate
return 0 if ingests.empty?
((ingests.completed.count.to_f / ingests.count.to_f) * 100).round(2)
end
end
35 changes: 0 additions & 35 deletions app/views/loads/index.html.erb

This file was deleted.

30 changes: 30 additions & 0 deletions app/views/loads/index.html.haml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
%h2.py-3
Ingest Management

.upload-form
%h3.py-3
Upload ZIP File
= form_tag loads_path, multipart: true do
.input-group.mb-3
%input#file.form-control{name: "file", type: "file", accept: 'application/zip,application/x-zip-compressed'}/
= submit_tag "Upload", class: 'btn btn-primary'
%div
%h2.py-3
Load Reports
%table.table
%thead
%tr
%th Status
%th Started At
%th Finished At
%th Success Rate
%th View
%tbody
- @load_reports.each do |load_report|
%tr
%td= load_report.status
%td= load_report.started_at
%td= load_report.finished_at
%td= load_report.success_rate
%td
%a.btn.btn-sm.btn-primary{:href => load_path(load_report.id)} View
17 changes: 17 additions & 0 deletions app/views/loads/show.html.haml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
%h2.py-3
Load Report
%table.table
%thead
%tr
%th PID
%th XML Filename
%th Status
%th Created At
%tbody
- @load_report.ingests.each do |ingest|
%tr
%td= ingest.pid
%td= ingest.xml_filename
%td= ingest.status
%td= ingest.created_at

3 changes: 1 addition & 2 deletions config/routes.rb
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,11 @@

resources :communities
resources :collections
resources :loads
resources :works

# xml
get '/xml/editor/:id' => 'xml#editor', as: 'xml_editor'
put '/xml/validate' => 'xml#validate'
put '/xml/update' => 'xml#update'

resources :loads, only: [:index, :create]
end
11 changes: 11 additions & 0 deletions db/migrate/20240926172305_create_load_reports.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
class CreateLoadReports < ActiveRecord::Migration[7.2]
def change
create_table :load_reports do |t|
t.integer :status, null: false, default: 0
t.datetime :started_at
t.datetime :finished_at

t.timestamps
end
end
end
1 change: 1 addition & 0 deletions db/migrate/20240926172306_create_ingests.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ def change
t.string :pid, null: false
t.string :xml_filename, null: false
t.integer :status, null: false, default: 0
t.references :load_report, foreign_key: true, null: false

t.timestamps
end
Expand Down
Loading

0 comments on commit 2936070

Please sign in to comment.