Merge pull request #3158 from nulib/deploy/staging

Deploy Meadow v6.1.0
nulib · Aug 9, 2022 · dc2db19 · dc2db19
2 parents b74ac89 + d2e2023
commit dc2db19
Show file tree

Hide file tree

Showing 34 changed files with 864 additions and 267 deletions.
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -3,6 +3,7 @@ name: meadow
 on: [push]
 jobs:
   dependencies:
+    if: ${{ ! (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/heads/deploy/') || startsWith(github.ref, 'refs/heads/build/')) }}
     runs-on: ubuntu-latest
     env:
       MIX_ENV: test
@@ -222,9 +223,6 @@ jobs:
         run: mix ecto.rollback --all
         working-directory: app
   publish:
-    needs:
-      - elixir-test
-      - js-test
     if: ${{ (!github.event.pull_request) && (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/heads/deploy/') || startsWith(github.ref, 'refs/heads/build/')) }}
     runs-on: ubuntu-latest
     steps:

diff --git a/app/assets/js/hooks/useAcceptedMimeTypes.js b/app/assets/js/hooks/useAcceptedMimeTypes.js
@@ -73,38 +73,6 @@ export default function useAcceptedMimeTypes() {
         }
         break;
       case "P":
-        switch (workTypeId) {
-          case "IMAGE":
-            if (!isImage) {
-              isValid = false;
-              code = "invalid-image";
-              message =
-                "Image work types Preservation fileset roles must be image mime type";
-            }
-            break;
-          case "AUDIO":
-            if (!isAudio) {
-              isValid = false;
-              code = "invalid-audio";
-              message =
-                "Audio work types Preservation fileset roles must be audio mime type";
-            }
-            break;
-          case "VIDEO":
-            if (!isVideo) {
-              isValid = false;
-              code = "invalid-video";
-              message =
-                "Video work types Preservation fileset roles must be video mime type";
-            }
-            break;
-          default:
-            console.error(`Invalid work type id: ${workTypeId}`);
-            isValid = false;
-            code = "invalid-work-type";
-            message = "Work type is invalid";
-            break;
-        }
         break;
       default:
         console.error(`Invalid file set role: ${fileSetRole}`);

diff --git a/app/assets/js/hooks/useAcceptedMimeTypes.test.js b/app/assets/js/hooks/useAcceptedMimeTypes.test.js
@@ -67,28 +67,14 @@ describe("useAcceptedMimeTypes hook", () => {
   describe("Preservation role", () => {
     const { isFileValid } = useAcceptedMimeTypes();
 
-    it("returns the correct mime types for Image work type", () => {
-      const result = isFileValid("P", "IMAGE", "image/tiff");
-      const resultBad = isFileValid("P", "IMAGE", "audio/tiff");
-      expect(result.isValid).toBeTruthy();
-      expect(resultBad.isValid).toBeFalsy();
-      expect(resultBad.code).toEqual("invalid-image");
-    });
-
-    it("returns the correct mime types for Audio work type", () => {
-      const result = isFileValid("P", "AUDIO", "audio/flac");
-      const resultBad = isFileValid("P", "AUDIO", "video/mp4");
-      expect(result.isValid).toBeTruthy();
-      expect(resultBad.isValid).toBeFalsy();
-      expect(resultBad.code).toEqual("invalid-audio");
-    });
+    it("accepts all file types", () => {
+      const results = [
+        isFileValid("P", "VIDEO", "video/x-mts"),
+        isFileValid("P", "IMAGE", "application/octet-stream"),
+        isFileValid("P", "AUDIO", "video/mp4"),
+      ];
 
-    it("returns the correct mime types for Video work type", () => {
-      const result = isFileValid("P", "VIDEO", "video/mp4");
-      const resultBad = isFileValid("P", "VIDEO", "audio/mp4");
-      expect(result.isValid).toBeTruthy();
-      expect(resultBad.isValid).toBeFalsy();
-      expect(resultBad.code).toEqual("invalid-video");
+      expect(results.every((result) => result.isValid)).toBeTruthy();
     });
   });
 });
diff --git a/app/assets/package-lock.json b/app/assets/package-lock.json
diff --git a/app/lib/meadow/application/children.ex b/app/lib/meadow/application/children.ex
@@ -12,6 +12,7 @@ defmodule Meadow.Application.Children do
       "batch_driver" => Meadow.BatchDriver,
       "csv_update_driver" => Meadow.CSVMetadataUpdateDriver,
       "index_worker" => {Meadow.Data.IndexWorker, interval: Config.index_interval()},
+      "reindex_worker" => {Meadow.Data.ReindexWorker, interval: Config.index_interval()},
       "database_listeners" => [
         Meadow.ARKListener,
         Meadow.FilesetDeleteListener,

diff --git a/app/lib/meadow/config.ex b/app/lib/meadow/config.ex
@@ -17,6 +17,46 @@ defmodule Meadow.Config do
     |> Keyword.get(:primary_index)
   end
 
+  def indexes do
+    Application.get_env(:meadow, Meadow.ElasticsearchCluster)
+    |> Keyword.get(:indexes)
+    |> Map.keys()
+  end
+
+  def v1_index, do: elasticsearch_index() |> to_string()
+
+  def v2_index(model) when is_atom(model) do
+    model
+    |> search_model_from_schema()
+    |> v2_index()
+  end
+
+  def v2_index(model) do
+    model_regex = model |> kebab_case() |> Regex.compile!()
+
+    indexes()
+    |> Enum.map(&to_string/1)
+    |> Enum.find(&(&1 =~ model_regex))
+  end
+
+  # def v2_pipeline(model), do: "#{Env.prefix()}-v1-to-v2-#{kebab_case(model)}"
+  def v2_pipeline(index_name) when is_binary(index_name) do
+    index_name
+    |> String.to_existing_atom()
+    |> v2_pipeline()
+  end
+
+  def v2_pipeline(index_name) do
+    Application.get_env(:meadow, Meadow.ElasticsearchCluster)
+    |> Keyword.get(:indexes)
+    |> get_in([index_name, :default_pipeline])
+  end
+
+  def search_model_from_schema(schema),
+    do: schema |> to_string() |> String.split(".") |> List.last()
+
+  defp kebab_case(model), do: model |> Macro.underscore() |> String.replace("_", "-")
+
   @doc "Retrieve shared links index name"
   def shared_links_index do
     Application.get_env(:meadow, :shared_links_index)

diff --git a/app/lib/meadow/data/indexer.ex b/app/lib/meadow/data/indexer.ex
@@ -9,6 +9,7 @@ defmodule Meadow.Data.Indexer do
   alias Meadow.Data.Schemas.{Collection, FileSet, Work}
   alias Meadow.ElasticsearchCluster, as: Cluster
   alias Meadow.ElasticsearchDiffStore, as: Store
+  alias Meadow.Search.Client, as: SearchClient
 
   require Logger
 
@@ -42,8 +43,29 @@ defmodule Meadow.Data.Indexer do
   end
 
   def reindex_all! do
-    IndexTimes.reset_all!()
-    synchronize_index()
+    with now <- NaiveDateTime.utc_now() do
+      IndexTimes.reset_all!()
+      synchronize_index()
+      delete_outdated_documents(now)
+    end
+
+    Logger.info("Reindex complete")
+  end
+
+  defp delete_outdated_documents(time) do
+    query = %{
+      query: %{
+        range: %{
+          indexed_at: %{
+            lt: time
+          }
+        }
+      }
+    }
+
+    for index <- Config.indexes() do
+      SearchClient.delete_by_query(index, query)
+    end
   end
 
   def synchronize_schema(schema) do
@@ -67,14 +89,16 @@ defmodule Meadow.Data.Indexer do
   end
 
   def encode!(id, :deleted) do
-    %{delete: %{_index: index(), _id: id}}
-    |> json_encode()
+    for index <- Config.indexes() do
+      %{delete: %{_index: index, _id: id}}
+    end
+    |> Enum.map_join("\n", &json_encode/1)
   end
 
   def encode!(indexable, _) do
     [
       %{index: %{_index: index(), _id: indexable.id}},
-      indexable |> Elasticsearch.Document.encode()
+      Elasticsearch.Document.encode(indexable)
     ]
     |> Enum.map_join("\n", &json_encode/1)
   end

diff --git a/app/lib/meadow/data/reindex_worker.ex b/app/lib/meadow/data/reindex_worker.ex
@@ -0,0 +1,18 @@
+defmodule Meadow.Data.ReindexWorker do
+  @moduledoc """
+  IntervalTask that reindexes from V1 to V2
+  """
+  alias Meadow.Data.Reindexer
+  alias Meadow.IntervalTask
+
+  use IntervalTask, default_interval: 1_000, function: :synchronize
+
+  @impl IntervalTask
+  def initial_state(_args),
+    do: %{override: true, tasks: %{}}
+
+  def synchronize(%{tasks: tasks} = state) do
+    tasks = Reindexer.synchronize(tasks)
+    {:noreply, Map.replace(state, :tasks, tasks)}
+  end
+end
diff --git a/app/lib/meadow/data/reindexer.ex b/app/lib/meadow/data/reindexer.ex
@@ -0,0 +1,52 @@
+defmodule Meadow.Data.Reindexer do
+  @moduledoc """
+  Reindexes from v1 to v2 using the OpenSearch Reindex API.
+  """
+  use Meadow.Utils.Logging
+
+  alias Meadow.Config
+  alias Meadow.Data.Schemas.{Collection, FileSet, Work}
+  alias Meadow.Search.Client, as: SearchClient
+
+  require Logger
+
+  def synchronize(tasks) do
+    with_log_metadata module: __MODULE__ do
+      [FileSet, Work, Collection]
+      |> Enum.map(&process_schema(&1, Map.get(tasks, &1, nil)))
+      |> Enum.into(%{})
+    end
+  end
+
+  defp process_schema(schema, task_id) do
+    if SearchClient.task_completed?(task_id) do
+      synchronize_schema(schema)
+    else
+      {schema, task_id}
+    end
+  end
+
+  defp synchronize_schema(schema) do
+    destination = Config.v2_index(schema)
+
+    case SearchClient.latest_v2_indexed_time(schema) do
+      {:ok, indexed_at} ->
+        case SearchClient.reindex(schema, indexed_at) do
+          {:ok, task} ->
+            Logger.info(
+              "Documents newer than #{indexed_at} reindexing into #{destination}, task: #{task}"
+            )
+
+            {schema, task}
+
+          {:error, error} ->
+            Logger.error("Error reindexing into #{destination}: #{inspect(error)}")
+            {schema, nil}
+        end
+
+      {:error, error} ->
+        Logger.error("Error reindexing into #{destination}: #{inspect(error)}")
+        {schema, nil}
+    end
+  end
+end
diff --git a/app/lib/meadow/indexing/collection.ex b/app/lib/meadow/indexing/collection.ex
@@ -26,7 +26,8 @@ defimpl Elasticsearch.Document, for: Meadow.Data.Schemas.Collection do
             }
         end,
       title: collection.title,
-      visibility: format(collection.visibility)
+      visibility: format(collection.visibility),
+      indexed_at: NaiveDateTime.utc_now()
     }
   end
 

diff --git a/app/lib/meadow/indexing/file_set.ex b/app/lib/meadow/indexing/file_set.ex
@@ -24,7 +24,8 @@ defimpl Elasticsearch.Document, for: Meadow.Data.Schemas.FileSet do
       streamingUrl: FileSets.distribution_streaming_uri_for(file_set),
       visibility: format(file_set.work.visibility),
       webvtt: file_set.structural_metadata.value,
-      workId: file_set.work.id
+      workId: file_set.work.id,
+      indexed_at: NaiveDateTime.utc_now()
     }
   end
 

diff --git a/app/lib/meadow/indexing/work.ex b/app/lib/meadow/indexing/work.ex
@@ -62,7 +62,8 @@ defimpl Elasticsearch.Document, for: Meadow.Data.Schemas.Work do
           url -> url <> "/full/!300,300/0/default.jpg"
         end,
       visibility: format(work.visibility),
-      workType: format(work.work_type)
+      workType: format(work.work_type),
+      indexed_at: NaiveDateTime.utc_now()
     }
     |> Map.merge(AdministrativeMetadataDocument.encode(work.administrative_metadata))
     |> Map.merge(DescriptiveMetadataDocument.encode(work.descriptive_metadata))