Skip to content

Commit

Permalink
Merge pull request #3158 from nulib/deploy/staging
Browse files Browse the repository at this point in the history
Deploy Meadow v6.1.0
  • Loading branch information
mbklein authored Aug 9, 2022
2 parents b74ac89 + d2e2023 commit dc2db19
Show file tree
Hide file tree
Showing 34 changed files with 864 additions and 267 deletions.
4 changes: 1 addition & 3 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ name: meadow
on: [push]
jobs:
dependencies:
if: ${{ ! (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/heads/deploy/') || startsWith(github.ref, 'refs/heads/build/')) }}
runs-on: ubuntu-latest
env:
MIX_ENV: test
Expand Down Expand Up @@ -222,9 +223,6 @@ jobs:
run: mix ecto.rollback --all
working-directory: app
publish:
needs:
- elixir-test
- js-test
if: ${{ (!github.event.pull_request) && (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/heads/deploy/') || startsWith(github.ref, 'refs/heads/build/')) }}
runs-on: ubuntu-latest
steps:
Expand Down
32 changes: 0 additions & 32 deletions app/assets/js/hooks/useAcceptedMimeTypes.js
Original file line number Diff line number Diff line change
Expand Up @@ -73,38 +73,6 @@ export default function useAcceptedMimeTypes() {
}
break;
case "P":
switch (workTypeId) {
case "IMAGE":
if (!isImage) {
isValid = false;
code = "invalid-image";
message =
"Image work types Preservation fileset roles must be image mime type";
}
break;
case "AUDIO":
if (!isAudio) {
isValid = false;
code = "invalid-audio";
message =
"Audio work types Preservation fileset roles must be audio mime type";
}
break;
case "VIDEO":
if (!isVideo) {
isValid = false;
code = "invalid-video";
message =
"Video work types Preservation fileset roles must be video mime type";
}
break;
default:
console.error(`Invalid work type id: ${workTypeId}`);
isValid = false;
code = "invalid-work-type";
message = "Work type is invalid";
break;
}
break;
default:
console.error(`Invalid file set role: ${fileSetRole}`);
Expand Down
28 changes: 7 additions & 21 deletions app/assets/js/hooks/useAcceptedMimeTypes.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -67,28 +67,14 @@ describe("useAcceptedMimeTypes hook", () => {
describe("Preservation role", () => {
const { isFileValid } = useAcceptedMimeTypes();

it("returns the correct mime types for Image work type", () => {
const result = isFileValid("P", "IMAGE", "image/tiff");
const resultBad = isFileValid("P", "IMAGE", "audio/tiff");
expect(result.isValid).toBeTruthy();
expect(resultBad.isValid).toBeFalsy();
expect(resultBad.code).toEqual("invalid-image");
});

it("returns the correct mime types for Audio work type", () => {
const result = isFileValid("P", "AUDIO", "audio/flac");
const resultBad = isFileValid("P", "AUDIO", "video/mp4");
expect(result.isValid).toBeTruthy();
expect(resultBad.isValid).toBeFalsy();
expect(resultBad.code).toEqual("invalid-audio");
});
it("accepts all file types", () => {
const results = [
isFileValid("P", "VIDEO", "video/x-mts"),
isFileValid("P", "IMAGE", "application/octet-stream"),
isFileValid("P", "AUDIO", "video/mp4"),
];

it("returns the correct mime types for Video work type", () => {
const result = isFileValid("P", "VIDEO", "video/mp4");
const resultBad = isFileValid("P", "VIDEO", "audio/mp4");
expect(result.isValid).toBeTruthy();
expect(resultBad.isValid).toBeFalsy();
expect(resultBad.code).toEqual("invalid-video");
expect(results.every((result) => result.isValid)).toBeTruthy();
});
});
});
31 changes: 15 additions & 16 deletions app/assets/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions app/lib/meadow/application/children.ex
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ defmodule Meadow.Application.Children do
"batch_driver" => Meadow.BatchDriver,
"csv_update_driver" => Meadow.CSVMetadataUpdateDriver,
"index_worker" => {Meadow.Data.IndexWorker, interval: Config.index_interval()},
"reindex_worker" => {Meadow.Data.ReindexWorker, interval: Config.index_interval()},
"database_listeners" => [
Meadow.ARKListener,
Meadow.FilesetDeleteListener,
Expand Down
40 changes: 40 additions & 0 deletions app/lib/meadow/config.ex
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,46 @@ defmodule Meadow.Config do
|> Keyword.get(:primary_index)
end

def indexes do
Application.get_env(:meadow, Meadow.ElasticsearchCluster)
|> Keyword.get(:indexes)
|> Map.keys()
end

def v1_index, do: elasticsearch_index() |> to_string()

def v2_index(model) when is_atom(model) do
model
|> search_model_from_schema()
|> v2_index()
end

def v2_index(model) do
model_regex = model |> kebab_case() |> Regex.compile!()

indexes()
|> Enum.map(&to_string/1)
|> Enum.find(&(&1 =~ model_regex))
end

# def v2_pipeline(model), do: "#{Env.prefix()}-v1-to-v2-#{kebab_case(model)}"
def v2_pipeline(index_name) when is_binary(index_name) do
index_name
|> String.to_existing_atom()
|> v2_pipeline()
end

def v2_pipeline(index_name) do
Application.get_env(:meadow, Meadow.ElasticsearchCluster)
|> Keyword.get(:indexes)
|> get_in([index_name, :default_pipeline])
end

def search_model_from_schema(schema),
do: schema |> to_string() |> String.split(".") |> List.last()

defp kebab_case(model), do: model |> Macro.underscore() |> String.replace("_", "-")

@doc "Retrieve shared links index name"
def shared_links_index do
Application.get_env(:meadow, :shared_links_index)
Expand Down
34 changes: 29 additions & 5 deletions app/lib/meadow/data/indexer.ex
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ defmodule Meadow.Data.Indexer do
alias Meadow.Data.Schemas.{Collection, FileSet, Work}
alias Meadow.ElasticsearchCluster, as: Cluster
alias Meadow.ElasticsearchDiffStore, as: Store
alias Meadow.Search.Client, as: SearchClient

require Logger

Expand Down Expand Up @@ -42,8 +43,29 @@ defmodule Meadow.Data.Indexer do
end

def reindex_all! do
IndexTimes.reset_all!()
synchronize_index()
with now <- NaiveDateTime.utc_now() do
IndexTimes.reset_all!()
synchronize_index()
delete_outdated_documents(now)
end

Logger.info("Reindex complete")
end

defp delete_outdated_documents(time) do
query = %{
query: %{
range: %{
indexed_at: %{
lt: time
}
}
}
}

for index <- Config.indexes() do
SearchClient.delete_by_query(index, query)
end
end

def synchronize_schema(schema) do
Expand All @@ -67,14 +89,16 @@ defmodule Meadow.Data.Indexer do
end

def encode!(id, :deleted) do
%{delete: %{_index: index(), _id: id}}
|> json_encode()
for index <- Config.indexes() do
%{delete: %{_index: index, _id: id}}
end
|> Enum.map_join("\n", &json_encode/1)
end

def encode!(indexable, _) do
[
%{index: %{_index: index(), _id: indexable.id}},
indexable |> Elasticsearch.Document.encode()
Elasticsearch.Document.encode(indexable)
]
|> Enum.map_join("\n", &json_encode/1)
end
Expand Down
18 changes: 18 additions & 0 deletions app/lib/meadow/data/reindex_worker.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
defmodule Meadow.Data.ReindexWorker do
@moduledoc """
IntervalTask that reindexes from V1 to V2
"""
alias Meadow.Data.Reindexer
alias Meadow.IntervalTask

use IntervalTask, default_interval: 1_000, function: :synchronize

@impl IntervalTask
def initial_state(_args),
do: %{override: true, tasks: %{}}

def synchronize(%{tasks: tasks} = state) do
tasks = Reindexer.synchronize(tasks)
{:noreply, Map.replace(state, :tasks, tasks)}
end
end
52 changes: 52 additions & 0 deletions app/lib/meadow/data/reindexer.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
defmodule Meadow.Data.Reindexer do
@moduledoc """
Reindexes from v1 to v2 using the OpenSearch Reindex API.
"""
use Meadow.Utils.Logging

alias Meadow.Config
alias Meadow.Data.Schemas.{Collection, FileSet, Work}
alias Meadow.Search.Client, as: SearchClient

require Logger

def synchronize(tasks) do
with_log_metadata module: __MODULE__ do
[FileSet, Work, Collection]
|> Enum.map(&process_schema(&1, Map.get(tasks, &1, nil)))
|> Enum.into(%{})
end
end

defp process_schema(schema, task_id) do
if SearchClient.task_completed?(task_id) do
synchronize_schema(schema)
else
{schema, task_id}
end
end

defp synchronize_schema(schema) do
destination = Config.v2_index(schema)

case SearchClient.latest_v2_indexed_time(schema) do
{:ok, indexed_at} ->
case SearchClient.reindex(schema, indexed_at) do
{:ok, task} ->
Logger.info(
"Documents newer than #{indexed_at} reindexing into #{destination}, task: #{task}"
)

{schema, task}

{:error, error} ->
Logger.error("Error reindexing into #{destination}: #{inspect(error)}")
{schema, nil}
end

{:error, error} ->
Logger.error("Error reindexing into #{destination}: #{inspect(error)}")
{schema, nil}
end
end
end
3 changes: 2 additions & 1 deletion app/lib/meadow/indexing/collection.ex
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ defimpl Elasticsearch.Document, for: Meadow.Data.Schemas.Collection do
}
end,
title: collection.title,
visibility: format(collection.visibility)
visibility: format(collection.visibility),
indexed_at: NaiveDateTime.utc_now()
}
end

Expand Down
3 changes: 2 additions & 1 deletion app/lib/meadow/indexing/file_set.ex
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ defimpl Elasticsearch.Document, for: Meadow.Data.Schemas.FileSet do
streamingUrl: FileSets.distribution_streaming_uri_for(file_set),
visibility: format(file_set.work.visibility),
webvtt: file_set.structural_metadata.value,
workId: file_set.work.id
workId: file_set.work.id,
indexed_at: NaiveDateTime.utc_now()
}
end

Expand Down
3 changes: 2 additions & 1 deletion app/lib/meadow/indexing/work.ex
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ defimpl Elasticsearch.Document, for: Meadow.Data.Schemas.Work do
url -> url <> "/full/!300,300/0/default.jpg"
end,
visibility: format(work.visibility),
workType: format(work.work_type)
workType: format(work.work_type),
indexed_at: NaiveDateTime.utc_now()
}
|> Map.merge(AdministrativeMetadataDocument.encode(work.administrative_metadata))
|> Map.merge(DescriptiveMetadataDocument.encode(work.descriptive_metadata))
Expand Down
Loading

0 comments on commit dc2db19

Please sign in to comment.