From 4375b8a13bf15dc6c8b09f094ef1b6eefe84b043 Mon Sep 17 00:00:00 2001 From: catalinii Date: Thu, 19 Aug 2021 18:52:50 -0700 Subject: [PATCH] [SPARK-28098] Fix NPE when specifying parquet files instead of paths (#41) --- .../sql/execution/datasources/InMemoryFileIndex.scala | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala index 227b8a03ec662..c902a9decb303 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala @@ -99,18 +99,20 @@ class InMemoryFileIndex( new mutable.LinkedHashMap[Path, FileStatus]() ++= files.map(f => f.getPath -> f) cachedLeafDirToChildrenFiles = if (readPartitionWithSubdirectoryEnabled) { - files.toArray.groupBy(file => getRootPathsLeafDir(file.getPath.getParent)) + files.toArray.groupBy(file => getRootPathsLeafDir(file.getPath.getParent, file.getPath)) } else { files.toArray.groupBy(_.getPath.getParent) } cachedPartitionSpec = null } - private def getRootPathsLeafDir(path: Path): Path = { - if (rootPaths.contains(path)) { + private def getRootPathsLeafDir(path: Path, child: Path): Path = { + if (rootPaths.contains(child)) { + path + } else if (rootPaths.contains(path)) { path } else { - getRootPathsLeafDir(path.getParent) + getRootPathsLeafDir(path.getParent, path) } }