From fb8ad68c2de7163635bd40d38ff4b69be5f1b830 Mon Sep 17 00:00:00 2001 From: NTPape <10488949+NTPape@users.noreply.github.com> Date: Sun, 9 Jun 2024 23:39:09 +0200 Subject: [PATCH] feat: introduce `FileSystem.unjarOnce` `specs2-html` currently copies all of its html resources (41 files) for each specification (executed with html output), reading and traversing the `specs2-html.jar` four times in the process, overwriting its own previously copied files over and over. This is inefficient and causes unnecessary strain on the disk usage. The newly introduced `unjarOnce` method is used by `specs2-html` to only unjar its resources once for each target location and filter criteria. --- .github/workflows/ci.yml | 4 +- .../main/scala/org/specs2/io/FileSystem.scala | 37 +++++++++++++++++++ .../org/specs2/reporter/HtmlPrinter.scala | 2 +- 3 files changed, 40 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 99cf83c2c6..c9b7064643 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,7 +23,7 @@ jobs: strategy: matrix: os: [ubuntu-latest] - scala: [3.4.2] + scala: [3.3.3] java: [temurin@18] runs-on: ${{ matrix.os }} steps: @@ -56,7 +56,7 @@ jobs: strategy: matrix: os: [ubuntu-latest] - scala: [3.4.2] + scala: [3.3.3] java: [temurin@18] runs-on: ${{ matrix.os }} steps: diff --git a/common/shared/src/main/scala/org/specs2/io/FileSystem.scala b/common/shared/src/main/scala/org/specs2/io/FileSystem.scala index fef8a24e46..61304a174a 100644 --- a/common/shared/src/main/scala/org/specs2/io/FileSystem.scala +++ b/common/shared/src/main/scala/org/specs2/io/FileSystem.scala @@ -58,6 +58,40 @@ case class FileSystem(logger: Logger) extends FilePathReader: def mkdirs(path: FilePath): Operation[Unit] = mkdirs(path.dir) + /** Unjaring the same thing over and over is inefficient. LRU cache to keep track of what was already done. */ + private object UnjarLRUCache: + private var unjarLRUCache: Map[(URL, DirectoryPath, String), Long] = Map.empty + private val maxSize = 1000 + + /** Checks if the given parameters were already processed; if not immediately adds them to the cache. */ + def alreadyUnjared(params: (URL, DirectoryPath, String)): Boolean = + UnjarLRUCache.synchronized: + val alreadyUnjared = unjarLRUCache.contains(params) + unjarLRUCache += params -> System.nanoTime + if !alreadyUnjared then clean() + alreadyUnjared + + /** Clean up LRU entries until cache is at most max size. */ + private def clean(): Unit = while unjarLRUCache.size > maxSize do unjarLRUCache -= unjarLRUCache.minBy(_._2)._1 + + /** Unjar the jar (or zip file) specified by "path" to the "dest" directory. Filters files which shouldn't be + * extracted with a regular expression. This is only done once per argument list (unless eventually evicted from LRU + * cache). + * @param jarUrl + * path of the jar file + * @param dest + * destination directory path + * @param regexFilter + * regular expression filtering files which shouldn't be extracted; the expression must capture the path of an + * entry as group 1 which will then be used relative to dirPath as target path for that entry + * + * @see + * [[unjar]] + */ + def unjarOnce(jarUrl: URL, dest: DirectoryPath, regexFilter: String): Operation[Unit] = + if UnjarLRUCache.alreadyUnjared((jarUrl, dest, regexFilter)) then Operation.ok(()) + else unjar(jarUrl, dest, regexFilter) + /** Unjar the jar (or zip file) specified by "path" to the "dest" directory. Filters files which shouldn't be * extracted with a regular expression. * @param jarUrl @@ -67,6 +101,9 @@ case class FileSystem(logger: Logger) extends FilePathReader: * @param regexFilter * regular expression filtering files which shouldn't be extracted; the expression must capture the path of an * entry as group 1 which will then be used relative to dirPath as target path for that entry + * + * @see + * [[unjarOnce]] */ def unjar(jarUrl: URL, dest: DirectoryPath, regexFilter: String): Operation[Unit] = val regex = compile(regexFilter) diff --git a/html/src/main/scala/org/specs2/reporter/HtmlPrinter.scala b/html/src/main/scala/org/specs2/reporter/HtmlPrinter.scala index a079d60296..95024ce625 100644 --- a/html/src/main/scala/org/specs2/reporter/HtmlPrinter.scala +++ b/html/src/main/scala/org/specs2/reporter/HtmlPrinter.scala @@ -187,7 +187,7 @@ case class HtmlPrinter(env: Env, searchPage: SearchPage, logger: Logger = Consol case Some(url) => val fs = env.fileSystem if url.getProtocol.equalsIgnoreCase("jar") then - fs.unjar(jarOf(url), outputDir, s"^${quote(base.path)}(/${quote(src.path)}/.*)$$") + fs.unjarOnce(jarOf(url), outputDir, s"^${quote(base.path)}(/${quote(src.path)}/.*)$$") else fs.copyDir(DirectoryPath.unsafe(url.toURI), outputDir / src) case _ => val message = s"no resource found for path ${(base / src).path}"