createVariantAlleles(Path filePath) throws IOEx
VCFCodec codec = new VCFCodec();
codec.setVCFHeader(header, header.getVCFHeaderVersion() == null ? VCFHeaderVersion.VCF4_1 : header.getVCFHeaderVersion());
- BufferedReader reader;
- if (filePath.toFile().getName().endsWith(".gz"))
- reader = new BufferedReader(new InputStreamReader(new GzipCompressorInputStream(new FileInputStream(filePath.toFile()))));
- else
- reader = Files.newBufferedReader(filePath);
+ BufferedReader reader = openFileForReading(filePath);
return reader.lines()
- .onClose(() -> {try {reader.close();} catch (IOException ignored) {}})
+ .onClose(closeReader(reader))
.map(toVariantContext(codec))
.flatMap(Optional::stream)
.map(toVariants())
.flatMap(Optional::stream);
}
+ private static BufferedReader openFileForReading(Path filePath) throws IOException {
+ BufferedReader reader;
+ if (filePath.toFile().getName().endsWith(".gz"))
+ reader = new BufferedReader(
+ new InputStreamReader(
+ new GZIPInputStream(Files.newInputStream(filePath)),
+ StandardCharsets.UTF_8));
+ else
+ reader = Files.newBufferedReader(filePath, StandardCharsets.UTF_8);
+ return reader;
+ }
+
+ private static Runnable closeReader(BufferedReader reader) {
+ return () -> {
+ try {
+ LOGGER.trace("Closing VCF file");
+ reader.close();
+ } catch (IOException e) {
+ LOGGER.warn("Error while closing the VCF file", e);
+ }
+ };
+ }
+
/**
* One variant context might represent multiple sequence variants or a single symbolic variant/breakend.
* This function melts the variant context to a collection of variants.
diff --git a/svanna-io/src/main/java/org/monarchinitiative/svanna/io/service/SilentGenesGeneService.java b/svanna-io/src/main/java/org/monarchinitiative/svanna/io/service/SilentGenesGeneService.java
index 58fd046a..d21a3d2b 100644
--- a/svanna-io/src/main/java/org/monarchinitiative/svanna/io/service/SilentGenesGeneService.java
+++ b/svanna-io/src/main/java/org/monarchinitiative/svanna/io/service/SilentGenesGeneService.java
@@ -1,6 +1,5 @@
package org.monarchinitiative.svanna.io.service;
-import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
import org.monarchinitiative.svanna.core.service.GeneService;
import org.monarchinitiative.svanna.core.service.QueryResult;
import org.monarchinitiative.svanna.io.service.jannovar.IntervalArray;
@@ -23,6 +22,7 @@
import java.nio.file.Path;
import java.util.*;
import java.util.stream.Collectors;
+import java.util.zip.GZIPInputStream;
public class SilentGenesGeneService implements GeneService {
@@ -64,7 +64,7 @@ public static SilentGenesGeneService of(GenomicAssembly assembly, Path silentGen
private static InputStream openForReading(Path silentGenesJsonPath) throws IOException {
if (silentGenesJsonPath.toFile().getName().endsWith(".gz")) {
LOGGER.debug("Assuming the file is gzipped");
- return new BufferedInputStream(new GzipCompressorInputStream(Files.newInputStream(silentGenesJsonPath)));
+ return new BufferedInputStream(new GZIPInputStream(Files.newInputStream(silentGenesJsonPath)));
} else {
return new BufferedInputStream(Files.newInputStream(silentGenesJsonPath));
}
diff --git a/svanna-io/src/test/java/org/monarchinitiative/svanna/io/parse/VcfVariantParserTest.java b/svanna-io/src/test/java/org/monarchinitiative/svanna/io/parse/VcfVariantParserTest.java
index e95dda80..fc8297d6 100644
--- a/svanna-io/src/test/java/org/monarchinitiative/svanna/io/parse/VcfVariantParserTest.java
+++ b/svanna-io/src/test/java/org/monarchinitiative/svanna/io/parse/VcfVariantParserTest.java
@@ -4,15 +4,12 @@
import htsjdk.variant.vcf.VCFCodec;
import htsjdk.variant.vcf.VCFFileReader;
import htsjdk.variant.vcf.VCFHeaderVersion;
+import org.junit.jupiter.api.*;
import org.monarchinitiative.svanna.core.reference.SvannaVariant;
import org.monarchinitiative.svanna.core.reference.VariantAware;
import org.monarchinitiative.svanna.core.reference.Zygosity;
import org.monarchinitiative.svanna.io.FullSvannaVariant;
import org.monarchinitiative.svanna.io.TestDataConfig;
-import org.junit.jupiter.api.BeforeAll;
-import org.junit.jupiter.api.DisplayName;
-import org.junit.jupiter.api.Nested;
-import org.junit.jupiter.api.Test;
import org.monarchinitiative.svart.*;
import org.monarchinitiative.svart.assembly.GenomicAssembly;
import org.monarchinitiative.svart.assembly.GenomicAssemblies;
@@ -31,7 +28,8 @@
@SpringBootTest(classes = TestDataConfig.class)
public class VcfVariantParserTest {
- private static final Path SV_EXAMPLE_PATH = Paths.get("src/test/resources/org/monarchinitiative/svanna/io/parse/sv_example.vcf");
+ private static final Path TEST_VCF_DIR = Paths.get("src/test/resources/org/monarchinitiative/svanna/io/parse");
+ private static final Path SV_EXAMPLE_PATH = TEST_VCF_DIR.resolve("sv_example.vcf");
private static final VCFCodec VCF_CODEC = new VCFCodec();
@BeforeAll
@@ -426,6 +424,41 @@ public void toVariants_breakendVariant() {
}
}
+ /**
+ * Per issue 235,
+ * HTSlib >1.17 produces a gzipped file that cannot be read by common-compress's `GzipCompressorInputStream`.
+ * As a fix, the class was replaced by JRE's {@link java.util.zip.GZIPInputStream}.
+ *
+ * Here we test that both older and newer VCFs can be correctly read by SvAnna's code.
+ */
+ @Nested
+ public class GzipQuirkTests {
+
+ private final GenomicAssembly GRCh38p13 = GenomicAssemblies.GRCh38p13();
+ private VcfVariantParser instance;
+
+ @BeforeEach
+ public void setUp() {
+ instance = new VcfVariantParser(GRCh38p13);
+ }
+
+ @Test
+ public void loadHtslibLeq16() throws Exception {
+ Path input = TEST_VCF_DIR.resolve("htslib_16.vcf.gz");
+ List alleles = instance.createVariantAlleleList(input);
+
+ assertThat(alleles, hasSize(8));
+ }
+
+ @Test
+ public void loadHtslibGeq17() throws Exception {
+ Path input = TEST_VCF_DIR.resolve("htslib_17.vcf.gz");
+ List alleles = instance.createVariantAlleleList(input);
+
+ assertThat(alleles, hasSize(8));
+ }
+ }
+
private static GenomicAssembly testAssembly(List contigs) {
return GenomicAssembly.of("toy", "Wookie", "9999", "Han Solo", "2100-01-01",
"GB1", "RS1", contigs);
diff --git a/svanna-io/src/test/resources/org/monarchinitiative/svanna/io/parse/htslib_16.vcf.gz b/svanna-io/src/test/resources/org/monarchinitiative/svanna/io/parse/htslib_16.vcf.gz
new file mode 100644
index 00000000..07c50b3f
Binary files /dev/null and b/svanna-io/src/test/resources/org/monarchinitiative/svanna/io/parse/htslib_16.vcf.gz differ
diff --git a/svanna-io/src/test/resources/org/monarchinitiative/svanna/io/parse/htslib_17.vcf.gz b/svanna-io/src/test/resources/org/monarchinitiative/svanna/io/parse/htslib_17.vcf.gz
new file mode 100644
index 00000000..5da69e82
Binary files /dev/null and b/svanna-io/src/test/resources/org/monarchinitiative/svanna/io/parse/htslib_17.vcf.gz differ
diff --git a/svanna-model/pom.xml b/svanna-model/pom.xml
index 87eca9af..9698f940 100644
--- a/svanna-model/pom.xml
+++ b/svanna-model/pom.xml
@@ -3,7 +3,7 @@
SvAnna
org.monarchinitiative.svanna
- 1.0.3
+ 1.0.4
4.0.0
diff --git a/svanna-test/pom.xml b/svanna-test/pom.xml
index c5d40d9c..519ac549 100644
--- a/svanna-test/pom.xml
+++ b/svanna-test/pom.xml
@@ -3,7 +3,7 @@
SvAnna
org.monarchinitiative.svanna
- 1.0.3
+ 1.0.4
4.0.0