From fb8ad68c2de7163635bd40d38ff4b69be5f1b830 Mon Sep 17 00:00:00 2001
From: NTPape <10488949+NTPape@users.noreply.github.com>
Date: Sun, 9 Jun 2024 23:39:09 +0200
Subject: [PATCH] feat: introduce `FileSystem.unjarOnce`

`specs2-html` currently copies all of its html resources (41 files) for each specification (executed with html output), reading and traversing the `specs2-html.jar` four times in the process, overwriting its own previously copied files over and over. This is inefficient and causes unnecessary strain on the disk usage. The newly introduced `unjarOnce` method is used by `specs2-html` to only unjar its resources once for each target location and filter criteria.
---
 .github/workflows/ci.yml                      |  4 +-
 .../main/scala/org/specs2/io/FileSystem.scala | 37 +++++++++++++++++++
 .../org/specs2/reporter/HtmlPrinter.scala     |  2 +-
 3 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 99cf83c2c6..c9b7064643 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -23,7 +23,7 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-latest]
-        scala: [3.4.2]
+        scala: [3.3.3]
         java: [temurin@18]
     runs-on: ${{ matrix.os }}
     steps:
@@ -56,7 +56,7 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-latest]
-        scala: [3.4.2]
+        scala: [3.3.3]
         java: [temurin@18]
     runs-on: ${{ matrix.os }}
     steps:
diff --git a/common/shared/src/main/scala/org/specs2/io/FileSystem.scala b/common/shared/src/main/scala/org/specs2/io/FileSystem.scala
index fef8a24e46..61304a174a 100644
--- a/common/shared/src/main/scala/org/specs2/io/FileSystem.scala
+++ b/common/shared/src/main/scala/org/specs2/io/FileSystem.scala
@@ -58,6 +58,40 @@ case class FileSystem(logger: Logger) extends FilePathReader:
   def mkdirs(path: FilePath): Operation[Unit] =
     mkdirs(path.dir)
 
+  /** Unjaring the same thing over and over is inefficient. LRU cache to keep track of what was already done. */
+  private object UnjarLRUCache:
+    private var unjarLRUCache: Map[(URL, DirectoryPath, String), Long] = Map.empty
+    private val maxSize = 1000
+
+    /** Checks if the given parameters were already processed; if not immediately adds them to the cache. */
+    def alreadyUnjared(params: (URL, DirectoryPath, String)): Boolean =
+      UnjarLRUCache.synchronized:
+        val alreadyUnjared = unjarLRUCache.contains(params)
+        unjarLRUCache += params -> System.nanoTime
+        if !alreadyUnjared then clean()
+        alreadyUnjared
+
+    /** Clean up LRU entries until cache is at most max size. */
+    private def clean(): Unit = while unjarLRUCache.size > maxSize do unjarLRUCache -= unjarLRUCache.minBy(_._2)._1
+
+  /** Unjar the jar (or zip file) specified by "path" to the "dest" directory. Filters files which shouldn't be
+    * extracted with a regular expression. This is only done once per argument list (unless eventually evicted from LRU
+    * cache).
+    * @param jarUrl
+    *   path of the jar file
+    * @param dest
+    *   destination directory path
+    * @param regexFilter
+    *   regular expression filtering files which shouldn't be extracted; the expression must capture the path of an
+    *   entry as group 1 which will then be used relative to dirPath as target path for that entry
+    *
+    * @see
+    *   [[unjar]]
+    */
+  def unjarOnce(jarUrl: URL, dest: DirectoryPath, regexFilter: String): Operation[Unit] =
+    if UnjarLRUCache.alreadyUnjared((jarUrl, dest, regexFilter)) then Operation.ok(())
+    else unjar(jarUrl, dest, regexFilter)
+
   /** Unjar the jar (or zip file) specified by "path" to the "dest" directory. Filters files which shouldn't be
     * extracted with a regular expression.
     * @param jarUrl
@@ -67,6 +101,9 @@ case class FileSystem(logger: Logger) extends FilePathReader:
     * @param regexFilter
     *   regular expression filtering files which shouldn't be extracted; the expression must capture the path of an
     *   entry as group 1 which will then be used relative to dirPath as target path for that entry
+    *
+    * @see
+    *   [[unjarOnce]]
     */
   def unjar(jarUrl: URL, dest: DirectoryPath, regexFilter: String): Operation[Unit] =
     val regex = compile(regexFilter)
diff --git a/html/src/main/scala/org/specs2/reporter/HtmlPrinter.scala b/html/src/main/scala/org/specs2/reporter/HtmlPrinter.scala
index a079d60296..95024ce625 100644
--- a/html/src/main/scala/org/specs2/reporter/HtmlPrinter.scala
+++ b/html/src/main/scala/org/specs2/reporter/HtmlPrinter.scala
@@ -187,7 +187,7 @@ case class HtmlPrinter(env: Env, searchPage: SearchPage, logger: Logger = Consol
       case Some(url) =>
         val fs = env.fileSystem
         if url.getProtocol.equalsIgnoreCase("jar") then
-          fs.unjar(jarOf(url), outputDir, s"^${quote(base.path)}(/${quote(src.path)}/.*)$$")
+          fs.unjarOnce(jarOf(url), outputDir, s"^${quote(base.path)}(/${quote(src.path)}/.*)$$")
         else fs.copyDir(DirectoryPath.unsafe(url.toURI), outputDir / src)
       case _ =>
         val message = s"no resource found for path ${(base / src).path}"