diff --git a/examples/application.properties b/examples/application.properties index d0d77b367..5584303e2 100644 --- a/examples/application.properties +++ b/examples/application.properties @@ -4,11 +4,8 @@ spring.profiles.active=production -# EXTERNAL APPLICATIONS +# OpenCGA app.opencga.path=/path/to/opencga -app.vep.path=/path/to/variant_effect_predictor.pl -app.vep.num-forks=4 - # STEPS MANAGEMENT ## Job repository database @@ -29,15 +26,7 @@ spring.data.mongodb.port= spring.data.mongodb.authentication-database= spring.data.mongodb.username= spring.data.mongodb.password= -spring.data.mongodb.database= config.db.read-preference=primary -db.collections.features.name=features -db.collections.stats.name=populationStatistics -# TODO The following 2 properties will be used exclusive after removing readers and writers dependency -# on OpenCGA. At the moment they need to be specified in both. -db.collections.files.name=files -db.collections.variants.name=variants - # LOGGING # https://docs.spring.io/spring-boot/docs/current/reference/html/boot-features-logging.html diff --git a/examples/initialize-database.properties b/examples/initialize-database.properties index 287bf83f1..fb0187b4f 100644 --- a/examples/initialize-database.properties +++ b/examples/initialize-database.properties @@ -4,3 +4,5 @@ spring.batch.job.names=init-database-job # INITIALIZATION PARAMETERS input.gtf=/home/cyenyxe/tmp/sample.gtf.gz +# Database configuration +spring.data.mongodb.database= \ No newline at end of file diff --git a/examples/load-aggregated-vcf.properties b/examples/load-aggregated-vcf.properties index cfe2c6c2b..9ef43b59e 100755 --- a/examples/load-aggregated-vcf.properties +++ b/examples/load-aggregated-vcf.properties @@ -17,10 +17,11 @@ output.dir.statistics=/path/to/statistics-files/ input.fasta=/path/to/homo_sapiens/sequence.fa # VEP +app.vep.path=/path/to/variant_effect_predictor.pl app.vep.cache.path=/path/to/vep/cache_folder app.vep.cache.version=82 app.vep.cache.species=homo_sapiens - +app.vep.num-forks=4 # STEPS MANAGEMENT @@ -28,7 +29,9 @@ app.vep.cache.species=homo_sapiens statistics.skip=false annotation.skip=false -## Repeat steps -## true: The already COMPLETED steps will be rerun. This is restarting the job from the beginning -## false(default): if the job was aborted and is relaunched, COMPLETEd steps will NOT be done again -config.restartability.allow=false \ No newline at end of file +# Database configuration +spring.data.mongodb.database= +# TODO The following 2 properties will be used exclusive after removing readers and writers dependency +# on OpenCGA. At the moment they need to be specified in both. +db.collections.files.name=files +db.collections.variants.name=variants \ No newline at end of file diff --git a/examples/load-genotyped-vcf.properties b/examples/load-genotyped-vcf.properties index b9df595f4..2738b8593 100755 --- a/examples/load-genotyped-vcf.properties +++ b/examples/load-genotyped-vcf.properties @@ -19,9 +19,11 @@ output.dir.statistics=/path/to/statistics-files/ # VEP +app.vep.path=/path/to/variant_effect_predictor.pl app.vep.cache.path=/path/to/vep/cache_folder app.vep.cache.version=82 app.vep.cache.species=homo_sapiens +app.vep.num-forks=4 # STEPS MANAGEMENT @@ -30,8 +32,9 @@ app.vep.cache.species=homo_sapiens statistics.skip=false annotation.skip=false -## Repeat steps -## true: The already COMPLETED steps will be rerun. This is restarting the job from the beginning -## false(default): if the job was aborted and is relaunched, COMPLETEd steps will NOT be done again -config.restartability.allow=false - +# Database configuration +spring.data.mongodb.database= +# TODO The following 2 properties will be used exclusive after removing readers and writers dependency +# on OpenCGA. At the moment they need to be specified in both. +db.collections.files.name=files +db.collections.variants.name=variants \ No newline at end of file diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/Application.java b/src/main/java/uk/ac/ebi/eva/pipeline/Application.java index f69825763..9519bc8a8 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/Application.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/Application.java @@ -17,6 +17,9 @@ import org.springframework.boot.SpringApplication; import org.springframework.boot.autoconfigure.SpringBootApplication; +import org.springframework.boot.autoconfigure.batch.JobLauncherCommandLineRunner; +import org.springframework.boot.autoconfigure.data.mongo.MongoDataAutoConfiguration; +import org.springframework.context.ConfigurableApplicationContext; /** @@ -28,7 +31,7 @@ * Append any parameter as needed. * TODO document all parameters */ -@SpringBootApplication +@SpringBootApplication(exclude = {MongoDataAutoConfiguration.class, JobLauncherCommandLineRunner.class}) public class Application { public static final String VARIANT_WRITER_MONGO_PROFILE = "variant-writer-mongo"; @@ -45,6 +48,7 @@ public class Application { public static final String MONGO_EXPERIMENTAL_PROFILE = "experimental"; public static void main(String[] args) throws Exception { - SpringApplication.run(Application.class, args); + ConfigurableApplicationContext context = SpringApplication.run(Application.class, args); + System.exit(SpringApplication.exit(context)); } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/IntegrationTestJobLauncherCommandLineRunner.java b/src/main/java/uk/ac/ebi/eva/pipeline/IntegrationTestJobLauncherCommandLineRunner.java deleted file mode 100644 index bc9432f35..000000000 --- a/src/main/java/uk/ac/ebi/eva/pipeline/IntegrationTestJobLauncherCommandLineRunner.java +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright 2015-2017 EMBL - European Bioinformatics Institute - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package uk.ac.ebi.eva.pipeline; - -import org.springframework.batch.core.JobExecutionException; -import org.springframework.batch.core.explore.JobExplorer; -import org.springframework.batch.core.launch.JobLauncher; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.beans.factory.annotation.Value; -import org.springframework.boot.autoconfigure.batch.JobLauncherCommandLineRunner; -import org.springframework.context.annotation.Profile; -import org.springframework.stereotype.Component; -import uk.ac.ebi.eva.pipeline.parameters.ParametersFromProperties; - -/** - * Custom JobLauncherCommandLineRunner that retrieves all known jobParameters from the application context - * and injects them in the running instance of the job. Used for integration tests. - */ -@Component -@Profile("integrationTest") -public class IntegrationTestJobLauncherCommandLineRunner extends JobLauncherCommandLineRunner { - - @Value("${spring.batch.job.names:#{null}}") - private String springBatchJob; - - @Autowired - private ParametersFromProperties parametersFromProperties; - - public IntegrationTestJobLauncherCommandLineRunner(JobLauncher jobLauncher, JobExplorer jobExplorer) { - super(jobLauncher, jobExplorer); - } - - public void run(String... args) throws JobExecutionException { - if (springBatchJob != null) { - setJobNames(springBatchJob); - } - launchJobFromProperties(parametersFromProperties.getProperties()); - } - -} diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/ChunkSizeCompletionPolicyConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/ChunkSizeCompletionPolicyConfiguration.java new file mode 100644 index 000000000..235f12324 --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/ChunkSizeCompletionPolicyConfiguration.java @@ -0,0 +1,37 @@ +/* + * Copyright 2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.pipeline.configuration; + +import org.springframework.batch.core.configuration.annotation.StepScope; +import org.springframework.batch.repeat.policy.SimpleCompletionPolicy; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import uk.ac.ebi.eva.pipeline.parameters.ChunkSizeParameters; + +/** + * Spring configuration to inject a SimplecompletionPolicy that modifies the chunk size with the configured + * JobParameters chunk size. + */ +@Configuration +public class ChunkSizeCompletionPolicyConfiguration { + + @Bean + @StepScope + public SimpleCompletionPolicy chunkSizecompletionPolicy(ChunkSizeParameters chunkSizeParameters) { + return new SimpleCompletionPolicy(chunkSizeParameters.getChunkSize()); + } + +} diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/JobExecutionApplicationListenerConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/JobExecutionApplicationListenerConfiguration.java new file mode 100644 index 000000000..20be2c904 --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/JobExecutionApplicationListenerConfiguration.java @@ -0,0 +1,34 @@ +/* + * Copyright 2015-2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.pipeline.configuration; + +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import uk.ac.ebi.eva.pipeline.runner.JobExecutionApplicationListener; + +/** + * This configuration adds to the application context a JobExecutionApplicationListener that holds the state of the + * last job execution. This is used from the runner to decide the return code of the application. + */ +@Configuration +public class JobExecutionApplicationListenerConfiguration { + + @Bean + public JobExecutionApplicationListener applicationJobInstanceListener() { + return new JobExecutionApplicationListener(); + } + +} diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/JobExecutionDeciderConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/JobExecutionDeciderConfiguration.java new file mode 100644 index 000000000..8a28dddfb --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/JobExecutionDeciderConfiguration.java @@ -0,0 +1,45 @@ +/* + * Copyright 2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.pipeline.configuration; + +import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing; +import org.springframework.batch.core.job.flow.JobExecutionDecider; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import uk.ac.ebi.eva.pipeline.jobs.deciders.SkipStepDecider; +import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; + +import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.ANNOTATION_SKIP_STEP_DECIDER; +import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.STATISTICS_SKIP_STEP_DECIDER; + +/** + * This class defines the beans for the deciders to skip annotation and statistics step. + */ +@Configuration +@EnableBatchProcessing +public class JobExecutionDeciderConfiguration { + + @Bean(ANNOTATION_SKIP_STEP_DECIDER) + public JobExecutionDecider annotationSkipStepDecider() { + return new SkipStepDecider(JobParametersNames.ANNOTATION_SKIP); + } + + @Bean(STATISTICS_SKIP_STEP_DECIDER) + public JobExecutionDecider statisticsSkipStepDecider() { + return new SkipStepDecider(JobParametersNames.STATISTICS_SKIP); + } + +} diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/MongoConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/MongoConfiguration.java index 407ac0885..e5565e764 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/MongoConfiguration.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/MongoConfiguration.java @@ -15,10 +15,9 @@ */ package uk.ac.ebi.eva.pipeline.configuration; -import java.net.UnknownHostException; -import java.util.Collections; - -import org.springframework.beans.factory.annotation.Autowired; +import com.mongodb.MongoClient; +import com.mongodb.MongoCredential; +import org.springframework.batch.core.configuration.annotation.StepScope; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.data.mongodb.MongoDbFactory; @@ -29,79 +28,78 @@ import org.springframework.data.mongodb.core.convert.DefaultDbRefResolver; import org.springframework.data.mongodb.core.convert.MappingMongoConverter; import org.springframework.data.mongodb.core.mapping.MongoMappingContext; - -import com.mongodb.MongoClient; -import com.mongodb.MongoCredential; -import com.mongodb.ReadPreference; - -import uk.ac.ebi.eva.utils.MongoConnection; +import uk.ac.ebi.eva.pipeline.parameters.DatabaseParameters; +import uk.ac.ebi.eva.pipeline.parameters.MongoConnection; import uk.ac.ebi.eva.utils.MongoDBHelper; +import java.net.UnknownHostException; +import java.util.Collections; + /** * Utility class dealing with MongoDB connections using pipeline options */ @Configuration public class MongoConfiguration { - @Autowired - private MongoMappingContext mongoMappingContext; - @Bean public MongoMappingContext mongoMappingContext() { return new MongoMappingContext(); } - public MongoOperations getDefaultMongoOperations(String database) throws UnknownHostException { - MongoClient mongoClient = new MongoClient(); - mongoClient.setReadPreference(ReadPreference.primary()); - MongoDbFactory mongoFactory = getMongoDbFactory(mongoClient, database); - MongoTemplate mongoTemplate = new MongoTemplate(mongoFactory, getMappingMongoConverter(mongoFactory)); - return mongoTemplate; + @Bean + @StepScope + public MongoOperations mongoTemplate(DatabaseParameters databaseParameters, MongoMappingContext mongoMappingContext) + throws UnknownHostException { + return getMongoOperations(databaseParameters.getDatabaseName(),databaseParameters.getMongoConnection(), + mongoMappingContext); } - public MongoOperations getMongoOperations(String database, MongoConnection connection) + public static MongoOperations getMongoOperations(String databaseName, MongoConnection mongoConnection, + MongoMappingContext mongoMappingContext) throws UnknownHostException { - MongoClient mongoClient = getMongoClient(connection); - MongoDbFactory mongoFactory = getMongoDbFactory(mongoClient, database); - MongoTemplate mongoTemplate = new MongoTemplate(mongoFactory, getMappingMongoConverter(mongoFactory)); - return mongoTemplate; + MongoClient mongoClient = getMongoClient(mongoConnection); + MongoDbFactory mongoFactory = getMongoDbFactory(mongoClient, databaseName); + MappingMongoConverter mappingMongoConverter = getMappingMongoConverter(mongoFactory, mongoMappingContext); + return new MongoTemplate(mongoFactory, mappingMongoConverter); } - - public MongoClient getMongoClient(MongoConnection connection) throws UnknownHostException { + + private static MongoDbFactory getMongoDbFactory(MongoClient client, String database) { + return new SimpleMongoDbFactory(client, database); + } + + private static MongoClient getMongoClient(MongoConnection mongoConnection) throws UnknownHostException { String authenticationDatabase = null; String user = null; String password = null; MongoClient mongoClient; // The Mongo API is not happy to deal with empty strings for authentication DB, user and password - if (connection.getAuthenticationDatabase() != null && !connection.getAuthenticationDatabase().trim().isEmpty()) { - authenticationDatabase = connection.getAuthenticationDatabase(); + if (mongoConnection.getAuthenticationDatabase() != null && !mongoConnection.getAuthenticationDatabase().trim() + .isEmpty()) { + authenticationDatabase = mongoConnection.getAuthenticationDatabase(); } - if (connection.getUser() != null && !connection.getUser().trim().isEmpty()) { - user = connection.getUser(); + if (mongoConnection.getUser() != null && !mongoConnection.getUser().trim().isEmpty()) { + user = mongoConnection.getUser(); } - if (connection.getPassword() != null && !connection.getPassword().trim().isEmpty()) { - password = connection.getPassword(); + if (mongoConnection.getPassword() != null && !mongoConnection.getPassword().trim().isEmpty()) { + password = mongoConnection.getPassword(); } if (user == null || password == null) { - mongoClient = new MongoClient(MongoDBHelper.parseServerAddresses(connection.getHosts())); + mongoClient = new MongoClient(MongoDBHelper.parseServerAddresses(mongoConnection.getHosts())); } else { mongoClient = new MongoClient( - MongoDBHelper.parseServerAddresses(connection.getHosts()), - Collections.singletonList(MongoCredential.createCredential(connection.getUser(), - authenticationDatabase, connection.getPassword().toCharArray()))); + MongoDBHelper.parseServerAddresses(mongoConnection.getHosts()), + Collections.singletonList(MongoCredential.createCredential(mongoConnection.getUser(), + authenticationDatabase, mongoConnection.getPassword().toCharArray()))); } - mongoClient.setReadPreference(connection.getReadPreference()); + mongoClient.setReadPreference(mongoConnection.getReadPreference()); return mongoClient; } - private MongoDbFactory getMongoDbFactory(MongoClient client, String database) { - return new SimpleMongoDbFactory(client, database); - } - - private MappingMongoConverter getMappingMongoConverter(MongoDbFactory mongoFactory) { + private static MappingMongoConverter getMappingMongoConverter(MongoDbFactory mongoFactory, + MongoMappingContext mongoMappingContext) { DbRefResolver dbRefResolver = new DefaultDbRefResolver(mongoFactory); MappingMongoConverter mongoConverter = new MappingMongoConverter(dbRefResolver, mongoMappingContext); diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/PostgreDataSourceConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/PostgreDataSourceConfiguration.java index fda336b37..dc9b954e9 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/PostgreDataSourceConfiguration.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/PostgreDataSourceConfiguration.java @@ -16,7 +16,7 @@ package uk.ac.ebi.eva.pipeline.configuration; import javax.sql.DataSource; -import org.springframework.beans.factory.annotation.Autowired; + import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.Primary; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/GeneReaderConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/GeneReaderConfiguration.java index a7a052dc9..63cc9fd2e 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/GeneReaderConfiguration.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/GeneReaderConfiguration.java @@ -1,5 +1,5 @@ /* - * Copyright 2016 EMBL - European Bioinformatics Institute + * Copyright 2016-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,10 +19,10 @@ import org.springframework.batch.item.ItemStreamReader; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; + import uk.ac.ebi.eva.pipeline.io.readers.GeneReader; import uk.ac.ebi.eva.pipeline.model.FeatureCoordinates; -import uk.ac.ebi.eva.pipeline.parameters.JobOptions; -import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; +import uk.ac.ebi.eva.pipeline.parameters.InputParameters; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.GENE_READER; @@ -34,8 +34,8 @@ public class GeneReaderConfiguration { @Bean(GENE_READER) @StepScope - public ItemStreamReader geneReader(JobOptions jobOptions) { - return new GeneReader(jobOptions.getPipelineOptions().getString(JobParametersNames.INPUT_GTF)); + public ItemStreamReader geneReader(InputParameters inputParameters) { + return new GeneReader(inputParameters.getGtf()); } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/NonAnnotatedVariantsMongoReaderConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/NonAnnotatedVariantsMongoReaderConfiguration.java index eb175737c..12f44310e 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/NonAnnotatedVariantsMongoReaderConfiguration.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/NonAnnotatedVariantsMongoReaderConfiguration.java @@ -16,37 +16,25 @@ package uk.ac.ebi.eva.pipeline.configuration.readers; import org.springframework.batch.core.configuration.annotation.StepScope; -import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; -import org.springframework.context.annotation.Import; import org.springframework.data.mongodb.core.MongoOperations; - -import uk.ac.ebi.eva.pipeline.configuration.MongoConfiguration; import uk.ac.ebi.eva.pipeline.io.readers.NonAnnotatedVariantsMongoReader; -import uk.ac.ebi.eva.pipeline.parameters.JobOptions; - -import java.net.UnknownHostException; +import uk.ac.ebi.eva.pipeline.parameters.DatabaseParameters; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.NON_ANNOTATED_VARIANTS_READER; /** - * Configuration to inject a NonannotatedVariants bean that reads from a mongo database in the pipeline + * Configuration to inject a NonAnnotatedVariantsMongoReader bean that reads from a mongo database in the pipeline */ @Configuration -@Import({ MongoConfiguration.class }) public class NonAnnotatedVariantsMongoReaderConfiguration { - @Autowired - private MongoConfiguration mongoConfiguration; - @Bean(NON_ANNOTATED_VARIANTS_READER) @StepScope - public NonAnnotatedVariantsMongoReader nonAnnotatedVariantsMongoReader(JobOptions jobOptions) - throws UnknownHostException { - MongoOperations mongoOperations = mongoConfiguration.getMongoOperations( - jobOptions.getDbName(), jobOptions.getMongoConnection()); - return new NonAnnotatedVariantsMongoReader(mongoOperations, jobOptions.getDbCollectionsVariantsName()); + public NonAnnotatedVariantsMongoReader nonAnnotatedVariantsMongoReader(MongoOperations mongoOperations, + DatabaseParameters databaseParameters) { + return new NonAnnotatedVariantsMongoReader(mongoOperations, databaseParameters.getCollectionVariantsName()); } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/VariantAnnotationReaderConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/VariantAnnotationReaderConfiguration.java index 0ef304ece..b433237ae 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/VariantAnnotationReaderConfiguration.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/VariantAnnotationReaderConfiguration.java @@ -19,20 +19,22 @@ import org.springframework.batch.core.configuration.annotation.StepScope; import org.springframework.batch.item.ItemStreamReader; import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; import uk.ac.ebi.eva.pipeline.io.readers.AnnotationFlatFileReader; -import uk.ac.ebi.eva.pipeline.parameters.JobOptions; +import uk.ac.ebi.eva.pipeline.parameters.AnnotationParameters; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.VARIANT_ANNOTATION_READER; /** * Configuration to inject a AnnotationFlatFileReader as a Variant Annotation Reader in the pipeline. */ +@Configuration public class VariantAnnotationReaderConfiguration { @Bean(VARIANT_ANNOTATION_READER) @StepScope - public ItemStreamReader variantAnnotationReader(JobOptions jobOptions) { - return new AnnotationFlatFileReader(jobOptions.getVepOutput()); + public ItemStreamReader variantAnnotationReader(AnnotationParameters annotationParameters) { + return new AnnotationFlatFileReader(annotationParameters.getVepOutput()); } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/VcfReaderConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/VcfReaderConfiguration.java index a33381c6b..b4ccd5a8b 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/VcfReaderConfiguration.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/readers/VcfReaderConfiguration.java @@ -16,19 +16,17 @@ package uk.ac.ebi.eva.pipeline.configuration.readers; import org.opencb.biodata.models.variant.VariantSource; -import org.opencb.opencga.storage.core.variant.VariantStorageManager; import org.springframework.batch.core.configuration.annotation.StepScope; import org.springframework.batch.item.ItemStreamReader; -import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import uk.ac.ebi.eva.commons.models.data.Variant; import uk.ac.ebi.eva.pipeline.io.readers.AggregatedVcfReader; import uk.ac.ebi.eva.pipeline.io.readers.UnwindingItemStreamReader; import uk.ac.ebi.eva.pipeline.io.readers.VcfReader; -import uk.ac.ebi.eva.pipeline.parameters.JobOptions; -import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; +import uk.ac.ebi.eva.pipeline.parameters.InputParameters; +import java.io.File; import java.io.IOException; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.VARIANT_READER; @@ -41,7 +39,7 @@ public class VcfReaderConfiguration { @Bean(VARIANT_READER) @StepScope - public ItemStreamReader unwindingReader(VcfReader vcfReader) throws Exception { + public ItemStreamReader unwindingReader(VcfReader vcfReader) { return new UnwindingItemStreamReader<>(vcfReader); } @@ -49,24 +47,22 @@ public ItemStreamReader unwindingReader(VcfReader vcfReader) throws Exc * The aggregation type is passed so that spring won't cache the instance of VcfReader if it is already built * with other aggregation type. * - * @param aggregationType to decide whether to instantiate a VcfReader or AggregatedVcfReader. * @return a VcfReader for the given aggregation type. * @throws IOException if the file doesn't exist, because it has to be read to see if it's compressed. */ @Bean @StepScope - public VcfReader vcfReader(@Value("${" + JobParametersNames.INPUT_VCF_AGGREGATION + "}") - String aggregationType, - JobOptions jobOptions) throws IOException { - VariantSource.Aggregation aggregation = VariantSource.Aggregation.valueOf(aggregationType); - if (VariantSource.Aggregation.NONE.equals(aggregation)) { - return new VcfReader( - (VariantSource) jobOptions.getVariantOptions().get(VariantStorageManager.VARIANT_SOURCE), - jobOptions.getPipelineOptions().getString(JobParametersNames.INPUT_VCF)); + public VcfReader vcfReader(InputParameters parameters) throws IOException { + String fileId = parameters.getVcfId(); + String studyId = parameters.getStudyId(); + File vcfFile = new File(parameters.getVcf()); + VariantSource.Aggregation vcfAggregation = parameters.getVcfAggregation(); + + if (VariantSource.Aggregation.NONE.equals(vcfAggregation)) { + return new VcfReader(fileId, studyId, vcfFile); } else { - return new AggregatedVcfReader( - (VariantSource) jobOptions.getVariantOptions().get(VariantStorageManager.VARIANT_SOURCE), - jobOptions.getPipelineOptions().getString(JobParametersNames.INPUT_VCF)); + return new AggregatedVcfReader(fileId, studyId, vcfAggregation, parameters.getAggregatedMappingFile(), + vcfFile); } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/GeneWriterConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/GeneWriterConfiguration.java index e5e9084f3..ccc98e689 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/GeneWriterConfiguration.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/GeneWriterConfiguration.java @@ -17,34 +17,23 @@ import org.springframework.batch.core.configuration.annotation.StepScope; import org.springframework.batch.item.ItemWriter; -import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; -import org.springframework.context.annotation.Import; import org.springframework.data.mongodb.core.MongoOperations; - -import uk.ac.ebi.eva.pipeline.configuration.MongoConfiguration; import uk.ac.ebi.eva.pipeline.io.writers.GeneWriter; import uk.ac.ebi.eva.pipeline.model.FeatureCoordinates; -import uk.ac.ebi.eva.pipeline.parameters.JobOptions; - -import java.net.UnknownHostException; +import uk.ac.ebi.eva.pipeline.parameters.DatabaseParameters; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.GENE_WRITER; @Configuration -@Import({ MongoConfiguration.class }) public class GeneWriterConfiguration { - @Autowired - private MongoConfiguration mongoConfiguration; - @Bean(GENE_WRITER) @StepScope - public ItemWriter geneWriter(JobOptions jobOptions) throws UnknownHostException { - MongoOperations mongoOperations = mongoConfiguration.getMongoOperations( - jobOptions.getDbName(), jobOptions.getMongoConnection()); - return new GeneWriter(mongoOperations, jobOptions.getDbCollectionsFeaturesName()); + public ItemWriter geneWriter(MongoOperations mongoOperations, + DatabaseParameters databaseParameters) { + return new GeneWriter(mongoOperations, databaseParameters.getCollectionFeaturesName()); } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/VariantAnnotationWriterConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/VariantAnnotationWriterConfiguration.java index 9a876ddea..e7878f57f 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/VariantAnnotationWriterConfiguration.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/VariantAnnotationWriterConfiguration.java @@ -18,36 +18,24 @@ import org.opencb.biodata.models.variant.annotation.VariantAnnotation; import org.springframework.batch.core.configuration.annotation.StepScope; import org.springframework.batch.item.ItemWriter; -import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; -import org.springframework.context.annotation.Import; import org.springframework.context.annotation.Profile; import org.springframework.data.mongodb.core.MongoOperations; import uk.ac.ebi.eva.pipeline.Application; -import uk.ac.ebi.eva.pipeline.configuration.MongoConfiguration; import uk.ac.ebi.eva.pipeline.io.writers.VepAnnotationMongoWriter; -import uk.ac.ebi.eva.pipeline.parameters.JobOptions; -import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; - -import java.net.UnknownHostException; +import uk.ac.ebi.eva.pipeline.parameters.DatabaseParameters; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.VARIANT_ANNOTATION_WRITER; @Configuration -@Import({ MongoConfiguration.class }) public class VariantAnnotationWriterConfiguration { - @Autowired - private MongoConfiguration mongoConfiguration; - @Bean(VARIANT_ANNOTATION_WRITER) @StepScope @Profile(Application.VARIANT_ANNOTATION_MONGO_PROFILE) - public ItemWriter variantAnnotationItemWriter(JobOptions jobOptions) throws UnknownHostException { - MongoOperations mongoOperations = mongoConfiguration.getMongoOperations( - jobOptions.getDbName(), jobOptions.getMongoConnection()); - String collections = jobOptions.getPipelineOptions().getString(JobParametersNames.DB_COLLECTIONS_VARIANTS_NAME); - return new VepAnnotationMongoWriter(mongoOperations, collections); + public ItemWriter variantAnnotationItemWriter(MongoOperations mongoOperations, + DatabaseParameters databaseParameters) { + return new VepAnnotationMongoWriter(mongoOperations, databaseParameters.getCollectionVariantsName()); } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/VariantWriterConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/VariantWriterConfiguration.java index e7ee815b6..f12dd3add 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/VariantWriterConfiguration.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/VariantWriterConfiguration.java @@ -15,53 +15,46 @@ */ package uk.ac.ebi.eva.pipeline.configuration.writers; -import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.VARIANT_WRITER; - -import org.opencb.opencga.storage.core.variant.VariantStorageManager; +import org.opencb.biodata.models.variant.VariantSource; import org.springframework.batch.core.configuration.annotation.StepScope; import org.springframework.batch.item.ItemWriter; -import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; -import org.springframework.context.annotation.Import; import org.springframework.context.annotation.Profile; import org.springframework.data.mongodb.core.MongoOperations; - import uk.ac.ebi.eva.commons.models.data.Variant; import uk.ac.ebi.eva.pipeline.Application; -import uk.ac.ebi.eva.pipeline.configuration.MongoConfiguration; import uk.ac.ebi.eva.pipeline.io.writers.VariantMongoWriter; import uk.ac.ebi.eva.pipeline.model.converters.data.VariantToMongoDbObjectConverter; -import uk.ac.ebi.eva.pipeline.parameters.JobOptions; +import uk.ac.ebi.eva.pipeline.parameters.DatabaseParameters; +import uk.ac.ebi.eva.pipeline.parameters.InputParameters; + +import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.VARIANT_WRITER; @Configuration -@Import({ MongoConfiguration.class }) public class VariantWriterConfiguration { - @Autowired - private MongoConfiguration mongoConfiguration; - @Bean(VARIANT_WRITER) @StepScope @Profile(Application.VARIANT_WRITER_MONGO_PROFILE) - public ItemWriter variantMongoWriter(JobOptions jobOptions) throws Exception { - MongoOperations mongoOperations = mongoConfiguration.getMongoOperations( - jobOptions.getDbName(), jobOptions.getMongoConnection()); - - return new VariantMongoWriter(jobOptions.getDbCollectionsVariantsName(), - mongoOperations, - variantToMongoDbObjectConverter(jobOptions)); + public ItemWriter variantMongoWriter(InputParameters inputParameters, MongoOperations mongoOperations, + DatabaseParameters databaseParameters) { + return new VariantMongoWriter(databaseParameters.getCollectionVariantsName(), mongoOperations, + variantToMongoDbObjectConverter(inputParameters)); } @Bean @StepScope - public VariantToMongoDbObjectConverter variantToMongoDbObjectConverter(JobOptions jobOptions) throws Exception { - return new VariantToMongoDbObjectConverter( - jobOptions.getVariantOptions().getBoolean(VariantStorageManager.INCLUDE_STATS), - jobOptions.getVariantOptions().getBoolean(VariantStorageManager.CALCULATE_STATS), - jobOptions.getVariantOptions().getBoolean(VariantStorageManager.INCLUDE_SAMPLES), - (VariantStorageManager.IncludeSrc) jobOptions.getVariantOptions() - .get(VariantStorageManager.INCLUDE_SRC)); + public VariantToMongoDbObjectConverter variantToMongoDbObjectConverter(InputParameters inputParameters) { + boolean includeSamples, includeStats; + if (VariantSource.Aggregation.NONE.equals(inputParameters.getVcfAggregation())) { + includeSamples = true; + includeStats = false; + } else { + includeSamples = false; + includeStats = true; + } + return new VariantToMongoDbObjectConverter(includeStats, includeSamples); } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/VepInputFlatFileWriterConfiguration.java b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/VepInputFlatFileWriterConfiguration.java index 23d077bbb..327af139e 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/VepInputFlatFileWriterConfiguration.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/configuration/writers/VepInputFlatFileWriterConfiguration.java @@ -1,5 +1,5 @@ /* - * Copyright 2016 EMBL - European Bioinformatics Institute + * Copyright 2016-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,7 +21,7 @@ import org.springframework.context.annotation.Configuration; import uk.ac.ebi.eva.pipeline.io.writers.VepInputFlatFileWriter; import uk.ac.ebi.eva.pipeline.model.VariantWrapper; -import uk.ac.ebi.eva.pipeline.parameters.JobOptions; +import uk.ac.ebi.eva.pipeline.parameters.AnnotationParameters; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.VEP_INPUT_WRITER; @@ -30,8 +30,8 @@ public class VepInputFlatFileWriterConfiguration { @Bean(VEP_INPUT_WRITER) @StepScope - public ItemStreamWriter vepInputFlatFileWriter(JobOptions jobOptions) { - return new VepInputFlatFileWriter(jobOptions.getVepInput()); + public ItemStreamWriter vepInputFlatFileWriter(AnnotationParameters annotationParameters) { + return new VepInputFlatFileWriter(annotationParameters.getVepInput()); } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/AggregatedVcfLineMapper.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/AggregatedVcfLineMapper.java index eb369c49a..d7dbc9482 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/AggregatedVcfLineMapper.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/AggregatedVcfLineMapper.java @@ -17,10 +17,14 @@ import org.opencb.biodata.models.variant.VariantSource; import org.springframework.batch.item.file.LineMapper; - +import org.springframework.util.Assert; import uk.ac.ebi.eva.commons.models.data.Variant; +import uk.ac.ebi.eva.utils.FileUtils; +import java.io.FileInputStream; +import java.io.IOException; import java.util.List; +import java.util.Properties; import static org.junit.Assert.assertNotNull; @@ -30,34 +34,46 @@ * The actual implementation is reused from {@link VariantVcfFactory}. */ public class AggregatedVcfLineMapper implements LineMapper> { - private final VariantSource source; + private final String fileId; + private final String studyId; private VariantVcfFactory factory; - public AggregatedVcfLineMapper(VariantSource source) { - switch (source.getAggregation()) { + public AggregatedVcfLineMapper(String fileId, String studyId, VariantSource.Aggregation aggregation, + String mappingFilePath) throws IOException { + Assert.notNull(fileId); + Assert.notNull(studyId); + Assert.notNull(aggregation); + + this.fileId = fileId; + this.studyId = studyId; + + Properties mappings = null; + if(mappingFilePath!=null){ + mappings = FileUtils.getPropertiesFile(new FileInputStream(mappingFilePath)); + } + + switch (aggregation) { case EVS: - factory = new VariantVcfEVSFactory(); + factory = new VariantVcfEVSFactory(mappings); break; case EXAC: - factory = new VariantVcfExacFactory(); + factory = new VariantVcfExacFactory(mappings); break; case BASIC: - factory = new VariantAggregatedVcfFactory(); + factory = new VariantAggregatedVcfFactory(mappings); break; case NONE: throw new IllegalArgumentException( this.getClass().getSimpleName() + " should be used to read aggregated VCFs only, " + "but the VariantSource.Aggregation is set to NONE"); } - this.source = source; } @Override public List mapLine(String line, int lineNumber) throws Exception { assertNotNull(this.getClass().getSimpleName() + " should be used to read aggregated VCFs only " + - "(hint: do not set VariantSource.Aggregation to NONE)", - factory); - return factory.create(source, line); + "(hint: do not set VariantSource.Aggregation to NONE)", factory); + return factory.create(fileId, studyId, line); } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/VariantAggregatedVcfFactory.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/VariantAggregatedVcfFactory.java index 71878f52f..3c8bacc99 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/VariantAggregatedVcfFactory.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/VariantAggregatedVcfFactory.java @@ -17,7 +17,6 @@ package uk.ac.ebi.eva.pipeline.io.mappers; import org.opencb.biodata.models.feature.Genotype; -import org.opencb.biodata.models.variant.VariantSource; import org.opencb.biodata.models.variant.exceptions.NonStandardCompliantSampleField; import uk.ac.ebi.eva.commons.models.data.Variant; @@ -63,29 +62,42 @@ public VariantAggregatedVcfFactory() { } /** - * @param tagMap Properties that contains case-sensitive tag mapping for aggregation data. A valid example structure - * of this file is: - *
-     * {@code
+     * @param mappings Properties that contains case-sensitive tag mapping for aggregation data. A valid example
+     *                 structure of this file is:
+     *                 
+     *                               {@code
      *
-     * EUR.AF=EUR_AF
-     * EUR.AC=AC_EUR
-     * EUR.AN=EUR_AN
-     * EUR.GTC=EUR_GTC
-     * ALL.AF=AF
-     * ALL.AC=TAC
-     * ALL.AN=AN
-     * ALL.GTC=GTC
-     * }
-     * 
- *

- *

- * where the right side of the '=' is how the values appear in the vcf, and left side is how it will loaded. It must - * be a bijection, i.e. there must not be repeated entries in any side. The part before the '.' can be any string - * naming the group. The part after the '.' must be one of AF, AC, AN or GTC. + * EUR.AF=EUR_AF + * EUR.AC=AC_EUR + * EUR.AN=EUR_AN + * EUR.GTC=EUR_GTC + * ALL.AF=AF + * ALL.AC=TAC + * ALL.AN=AN + * ALL.GTC=GTC + * } + *

+ *

+ *

+ * where the right side of the '=' is how the values appear in the vcf, and left side is how it will + * loaded. It must be a bijection, i.e. there must not be repeated entries in any side. The part + * before the '.' can be any string naming the group. The part after the '.' must be one of AF, + * AC, AN or GTC. */ - public VariantAggregatedVcfFactory(Properties tagMap) { - this.tagMap = tagMap; + public VariantAggregatedVcfFactory(Properties mappings) { + if (mappings == null) { + loadDefaultMappings(); + } else { + loadMappings(mappings); + } + } + + protected void loadDefaultMappings() { + // No default mapping. + } + + protected void loadMappings(Properties mappings) { + this.tagMap = mappings; if (tagMap != null) { this.reverseTagMap = new LinkedHashMap<>(tagMap.size()); for (String tag : tagMap.stringPropertyNames()) { @@ -97,18 +109,22 @@ public VariantAggregatedVcfFactory(Properties tagMap) { } @Override - protected void parseSplitSampleData(Variant variant, VariantSource source, String[] fields, + protected void parseSplitSampleData(Variant variant, String fileId, String studyId, String[] fields, String[] alternateAlleles, String[] secondaryAlternates, int alternateAlleleIdx) throws NonStandardCompliantSampleField { - // Nothing to do + if (fields.length > 8) { + throw new IllegalArgumentException("Aggregated VCFs should not have column FORMAT nor " + + "further sample columns, i.e. there should be only 8 columns"); + } } @Override - protected void setOtherFields(Variant variant, VariantSource source, Set ids, float quality, String filter, - String info, String format, int numAllele, String[] alternateAlleles, String line) { + protected void setOtherFields(Variant variant, String fileId, String studyId, Set ids, float quality, + String filter, String info, String format, int numAllele, String[] alternateAlleles, + String line) { // Fields not affected by the structure of REF and ALT fields variant.setIds(ids); - VariantSourceEntry sourceEntry = variant.getSourceEntry(source.getFileId(), source.getStudyId()); + VariantSourceEntry sourceEntry = variant.getSourceEntry(fileId, studyId); if (quality > -1) { sourceEntry.addAttribute("QUAL", String.valueOf(quality)); } @@ -116,22 +132,22 @@ protected void setOtherFields(Variant variant, VariantSource source, Set sourceEntry.addAttribute("FILTER", filter); } if (!info.isEmpty()) { - parseInfo(variant, source.getFileId(), source.getStudyId(), info, numAllele); + parseInfo(variant, fileId, studyId, info, numAllele); } sourceEntry.setFormat(format); sourceEntry.addAttribute("src", line); if (tagMap == null) { - parseStats(variant, source, numAllele, alternateAlleles, info); + parseStats(variant, fileId, studyId, numAllele, alternateAlleles, info); } else { - parseCohortStats(variant, source, numAllele, alternateAlleles, info); + parseCohortStats(variant, fileId, studyId, numAllele, alternateAlleles, info); } } - protected void parseStats(Variant variant, VariantSource source, int numAllele, String[] alternateAlleles, + protected void parseStats(Variant variant, String fileId, String studyId, int numAllele, String[] alternateAlleles, String info) { - VariantSourceEntry file = variant.getSourceEntry(source.getFileId(), source.getStudyId()); + VariantSourceEntry file = variant.getSourceEntry(fileId, studyId); VariantStats vs = new VariantStats(variant); Map stats = new LinkedHashMap<>(); String[] splittedInfo = info.split(";"); @@ -149,10 +165,11 @@ protected void parseStats(Variant variant, VariantSource source, int numAllele, file.setStats(vs); } - protected void parseCohortStats(Variant variant, VariantSource source, int numAllele, String[] alternateAlleles, - String info) { - VariantSourceEntry file = variant.getSourceEntry(source.getFileId(), source.getStudyId()); - Map> cohortStats = new LinkedHashMap<>(); // cohortName -> (statsName -> statsValue): EUR->(AC->3,2) + protected void parseCohortStats(Variant variant, String fileId, String studyId, int numAllele, + String[] alternateAlleles, String info) { + VariantSourceEntry file = variant.getSourceEntry(fileId, studyId); + Map> cohortStats = new LinkedHashMap<>(); + // cohortName -> (statsName -> statsValue): EUR->(AC->3,2) String[] splittedInfo = info.split(";"); for (String attribute : splittedInfo) { String[] assignment = attribute.split("="); @@ -268,7 +285,7 @@ protected void addStats(Variant variant, VariantSourceEntry sourceEntry, int num getGenotype(i, alleles); gtc = Integer.parseInt(gtcs[i]); gt = mapToMultiallelicIndex(alleles[0], numAllele) + "/" + mapToMultiallelicIndex(alleles[1], - numAllele); + numAllele); } else { // GTC=0/0:0,0/1:5,1/1:8 Matcher matcher = numNum.matcher(gtcSplit[0]); if (matcher.matches()) { // number/number:number @@ -302,7 +319,7 @@ protected void addStats(Variant variant, VariantSourceEntry sourceEntry, int num * returns in alleles[] the genotype specified in index in the sequence: * 0/0, 0/1, 1/1, 0/2, 1/2, 2/2, 0/3... * - * @param index in this sequence, starting in 0 + * @param index in this sequence, starting in 0 * @param alleles returned genotype. */ public static void getGenotype(int index, Integer alleles[]) { @@ -333,7 +350,7 @@ protected Genotype parseGenotype(String gt, Variant variant, int numAllele, Stri m = singleRef.matcher(gt); if (m.matches()) { // R g = new Genotype(variant.getReference() + "/" + variant.getReference(), variant.getReference(), - variant.getAlternate()); + variant.getAlternate()); return g; } @@ -354,7 +371,7 @@ protected Genotype parseGenotype(String gt, Variant variant, int numAllele, Stri m = refRef.matcher(gt); if (m.matches()) { // RR g = new Genotype(variant.getReference() + "/" + variant.getReference(), variant.getReference(), - variant.getAlternate()); + variant.getAlternate()); return g; } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/VariantVcfEVSFactory.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/VariantVcfEVSFactory.java index 2649ddef2..64731e354 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/VariantVcfEVSFactory.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/VariantVcfEVSFactory.java @@ -16,21 +16,22 @@ */ package uk.ac.ebi.eva.pipeline.io.mappers; -import org.opencb.biodata.models.variant.VariantSource; - import uk.ac.ebi.eva.commons.models.data.Variant; import uk.ac.ebi.eva.commons.models.data.VariantSourceEntry; import uk.ac.ebi.eva.commons.models.data.VariantStats; +import uk.ac.ebi.eva.utils.FileUtils; +import java.io.IOException; import java.util.Properties; import java.util.Set; /** - * Overrides the methods in VariantAggregatedVcfFactory that take care of the fields QUAL, FILTER and INFO, to support + * Overrides the methods in VariantAggregatedVcfFactory that take care of the fields QUAL, FILTER and INFO, to support * the specific format of Exome Variant Server VCFs. */ public class VariantVcfEVSFactory extends VariantAggregatedVcfFactory { + private static final String EVS_MAPPING_FILE = "/mappings/evs-mapping.properties"; public VariantVcfEVSFactory() { this(null); @@ -61,13 +62,22 @@ public VariantVcfEVSFactory(Properties tagMap) { super(tagMap); } + @Override + protected void loadDefaultMappings() { + try { + loadMappings(FileUtils.getPropertiesFile(FileUtils.getResourceAsStream(EVS_MAPPING_FILE))); + } catch (IOException e) { + throw new RuntimeException(e); + } + } @Override - protected void setOtherFields(Variant variant, VariantSource source, Set ids, float quality, String filter, - String info, String format, int numAllele, String[] alternateAlleles, String line) { + protected void setOtherFields(Variant variant, String fileId, String studyId, Set ids, float quality, + String filter, String info, String format, int numAllele, String[] alternateAlleles, + String line) { // Fields not affected by the structure of REF and ALT fields variant.setIds(ids); - VariantSourceEntry sourceEntry = variant.getSourceEntry(source.getFileId(), source.getStudyId()); + VariantSourceEntry sourceEntry = variant.getSourceEntry(fileId, studyId); if (quality > -1) { sourceEntry.addAttribute("QUAL", String.valueOf(quality)); } @@ -75,21 +85,21 @@ protected void setOtherFields(Variant variant, VariantSource source, Set sourceEntry.addAttribute("FILTER", filter); } if (!info.isEmpty()) { - parseInfo(variant, source.getFileId(), source.getStudyId(), info, numAllele); + parseInfo(variant, fileId, studyId, info, numAllele); } sourceEntry.setFormat(format); sourceEntry.addAttribute("src", line); if (tagMap == null) { // whether we can parse population stats or not - parseEVSAttributes(variant, source, numAllele, alternateAlleles); + parseEVSAttributes(variant, fileId, studyId, numAllele, alternateAlleles); } else { parseCohortEVSInfo(variant, sourceEntry, numAllele, alternateAlleles); } } - private void parseEVSAttributes(Variant variant, VariantSource source, int numAllele, String[] alternateAlleles) { - VariantSourceEntry file = variant.getSourceEntry(source.getFileId(), source.getStudyId()); + private void parseEVSAttributes(Variant variant, String fileId, String studyId, int numAllele, String[] alternateAlleles) { + VariantSourceEntry file = variant.getSourceEntry(fileId, studyId); VariantStats stats = new VariantStats(variant); if (file.hasAttribute("MAF")) { String splitsMAF[] = file.getAttribute("MAF").split(","); diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/VariantVcfExacFactory.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/VariantVcfExacFactory.java index d6aff73dc..3de6ef2b4 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/VariantVcfExacFactory.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/VariantVcfExacFactory.java @@ -17,12 +17,13 @@ package uk.ac.ebi.eva.pipeline.io.mappers; import org.opencb.biodata.models.feature.Genotype; -import org.opencb.biodata.models.variant.VariantSource; import uk.ac.ebi.eva.commons.models.data.Variant; import uk.ac.ebi.eva.commons.models.data.VariantSourceEntry; import uk.ac.ebi.eva.commons.models.data.VariantStats; +import uk.ac.ebi.eva.utils.FileUtils; +import java.io.IOException; import java.util.LinkedHashMap; import java.util.Map; import java.util.Properties; @@ -43,6 +44,8 @@ public class VariantVcfExacFactory extends VariantAggregatedVcfFactory { private static final String COMMA = ","; + private static final String EXAC_MAPPING_FILE = "/mappings/exac-mapping.properties"; + public VariantVcfExacFactory() { this(null); } @@ -72,11 +75,19 @@ public VariantVcfExacFactory(Properties tagMap) { super(tagMap); } + @Override + protected void loadDefaultMappings() { + try { + loadMappings(FileUtils.getPropertiesFile(FileUtils.getResourceAsStream(EXAC_MAPPING_FILE))); + } catch (IOException e) { + throw new RuntimeException(e); + } + } @Override - protected void parseStats(Variant variant, VariantSource source, int numAllele, String[] alternateAlleles, + protected void parseStats(Variant variant, String fileId, String studyId, int numAllele, String[] alternateAlleles, String info) { - VariantSourceEntry sourceEntry = variant.getSourceEntry(source.getFileId(), source.getStudyId()); + VariantSourceEntry sourceEntry = variant.getSourceEntry(fileId, studyId); VariantStats stats = new VariantStats(variant); if (sourceEntry.hasAttribute(AC_HET)) { // heterozygous genotype count @@ -119,9 +130,9 @@ protected void parseStats(Variant variant, VariantSource source, int numAllele, @Override - protected void parseCohortStats(Variant variant, VariantSource source, int numAllele, String[] alternateAlleles, + protected void parseCohortStats(Variant variant, String fileId, String studyId, int numAllele, String[] alternateAlleles, String info) { - VariantSourceEntry sourceEntry = variant.getSourceEntry(source.getFileId(), source.getStudyId()); + VariantSourceEntry sourceEntry = variant.getSourceEntry(fileId, studyId); String[] attributes = info.split(";"); Map ans = new LinkedHashMap<>(); Map acs = new LinkedHashMap<>(); diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/VariantVcfFactory.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/VariantVcfFactory.java index 8e65df77e..202a4c1b9 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/VariantVcfFactory.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/VariantVcfFactory.java @@ -19,7 +19,6 @@ import org.apache.commons.lang3.StringUtils; import org.opencb.biodata.models.feature.Genotype; import org.opencb.biodata.models.variant.VariantFactory; -import org.opencb.biodata.models.variant.VariantSource; import org.opencb.biodata.models.variant.exceptions.NonStandardCompliantSampleField; import org.opencb.biodata.models.variant.exceptions.NotAVariantException; @@ -51,11 +50,12 @@ public class VariantVcfFactory { * as Ensembl does, except for insertions, where start is greater than end: * http://www.ensembl.org/info/docs/tools/vep/vep_formats.html#vcf * - * @param source context of the variant: studyId, fileId, etc + * @param fileId, + * @param studyId * @param line Contents of the line in the file * @return The list of Variant objects that can be created using the fields from a VCF record */ - public List create(VariantSource source, + public List create(String fileId, String studyId, String line) throws IllegalArgumentException, NotAVariantException { String[] fields = line.split("\t"); if (fields.length < 8) { @@ -119,14 +119,14 @@ public List create(VariantSource source, Variant variant = new Variant(chromosome, keyFields.start, keyFields.end, keyFields.reference, keyFields.alternate); String[] secondaryAlternates = getSecondaryAlternates(variant, keyFields.getNumAllele(), alternateAlleles); - VariantSourceEntry file = new VariantSourceEntry(source.getFileId(), source.getStudyId(), + VariantSourceEntry file = new VariantSourceEntry(fileId, studyId, secondaryAlternates, format); variant.addSourceEntry(file); try { - parseSplitSampleData(variant, source, fields, alternateAlleles, secondaryAlternates, altAlleleIdx); + parseSplitSampleData(variant, fileId, studyId, fields, alternateAlleles, secondaryAlternates, altAlleleIdx); // Fill the rest of fields (after samples because INFO depends on them) - setOtherFields(variant, source, ids, quality, filter, info, format, keyFields.getNumAllele(), + setOtherFields(variant, fileId, studyId, ids, quality, filter, info, format, keyFields.getNumAllele(), alternateAlleles, line); variants.add(variant); } catch (NonStandardCompliantSampleField ex) { @@ -242,10 +242,10 @@ protected String[] getSecondaryAlternates(Variant variant, int numAllele, String return secondaryAlternates; } - protected void parseSplitSampleData(Variant variant, VariantSource source, String[] fields, + protected void parseSplitSampleData(Variant variant, String fileId, String studyId, String[] fields, String[] alternateAlleles, String[] secondaryAlternates, int alternateAlleleIdx) throws NonStandardCompliantSampleField { - String[] formatFields = variant.getSourceEntry(source.getFileId(), source.getStudyId()).getFormat().split(":"); + String[] formatFields = variant.getSourceEntry(fileId, studyId).getFormat().split(":"); for (int i = 9; i < fields.length; i++) { Map map = new TreeMap<>(); @@ -263,7 +263,7 @@ protected void parseSplitSampleData(Variant variant, VariantSource source, Strin } // Add sample to the variant entry in the source file - variant.getSourceEntry(source.getFileId(), source.getStudyId()).addSampleData(map); + variant.getSourceEntry(fileId, studyId).addSampleData(map); } } @@ -315,22 +315,22 @@ private String processGenotypeField(int alternateAlleleIdx, String genotype) { return genotype.intern(); } - protected void setOtherFields(Variant variant, VariantSource source, Set ids, float quality, String filter, + protected void setOtherFields(Variant variant, String fileId, String studyId, Set ids, float quality, String filter, String info, String format, int numAllele, String[] alternateAlleles, String line) { // Fields not affected by the structure of REF and ALT fields variant.setIds(ids); if (quality > -1) { - variant.getSourceEntry(source.getFileId(), source.getStudyId()) + variant.getSourceEntry(fileId, studyId) .addAttribute("QUAL", String.valueOf(quality)); } if (!filter.isEmpty()) { - variant.getSourceEntry(source.getFileId(), source.getStudyId()).addAttribute("FILTER", filter); + variant.getSourceEntry(fileId, studyId).addAttribute("FILTER", filter); } if (!info.isEmpty()) { - parseInfo(variant, source.getFileId(), source.getStudyId(), info, numAllele); + parseInfo(variant, fileId, studyId, info, numAllele); } - variant.getSourceEntry(source.getFileId(), source.getStudyId()).addAttribute("src", line); + variant.getSourceEntry(fileId, studyId).addAttribute("src", line); } protected void parseInfo(Variant variant, String fileId, String studyId, String info, int numAllele) { diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/VcfLineMapper.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/VcfLineMapper.java index 5ba01a53d..08cdfd0cf 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/VcfLineMapper.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/mappers/VcfLineMapper.java @@ -15,15 +15,11 @@ */ package uk.ac.ebi.eva.pipeline.io.mappers; -import org.opencb.biodata.models.variant.VariantSource; import org.springframework.batch.item.file.LineMapper; - import uk.ac.ebi.eva.commons.models.data.Variant; import java.util.List; -import static org.junit.Assert.assertNotNull; - /** * Maps a String (in VCF format) to a list of variants. *

@@ -31,25 +27,19 @@ */ public class VcfLineMapper implements LineMapper> { - private final VariantSource source; + private final String fileId; + private final String studyId; private final VariantVcfFactory factory; - public VcfLineMapper(VariantSource source) { - if (!VariantSource.Aggregation.NONE.equals(source.getAggregation())) { - throw new IllegalArgumentException( - this.getClass().getSimpleName() + " should be used to read genotyped VCFs only, " + - "but the VariantSource.Aggregation set to " + source.getAggregation().toString()); - } - this.source = source; + public VcfLineMapper(String fileId, String studyId) { + this.fileId = fileId; + this.studyId = studyId; this.factory = new VariantVcfFactory(); } @Override public List mapLine(String line, int lineNumber) { - assertNotNull(this.getClass().getSimpleName() + " should be used to read genotyped VCFs only " + - "(hint: set VariantSource.Aggregation to NONE)", - factory); - return factory.create(source, line); + return factory.create(fileId, studyId, line); } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/AggregatedVcfReader.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/AggregatedVcfReader.java index c6ac2bc68..9d2b76245 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/AggregatedVcfReader.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/AggregatedVcfReader.java @@ -16,7 +16,6 @@ package uk.ac.ebi.eva.pipeline.io.readers; import org.opencb.biodata.models.variant.VariantSource; - import uk.ac.ebi.eva.pipeline.io.mappers.AggregatedVcfLineMapper; import java.io.File; @@ -32,11 +31,8 @@ */ public class AggregatedVcfReader extends VcfReader { - public AggregatedVcfReader(VariantSource source, String file) throws IOException { - this(source, new File(file)); - } - - public AggregatedVcfReader(VariantSource source, File file) throws IOException { - super(new AggregatedVcfLineMapper(source), file); + public AggregatedVcfReader(String fileId, String studyId, VariantSource.Aggregation aggregation, + String mappingFilePath, File file) throws IOException { + super(new AggregatedVcfLineMapper(fileId, studyId, aggregation, mappingFilePath), file); } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/NonAnnotatedVariantsMongoReader.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/NonAnnotatedVariantsMongoReader.java index 2e21612dd..6451cadda 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/NonAnnotatedVariantsMongoReader.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/NonAnnotatedVariantsMongoReader.java @@ -23,8 +23,6 @@ import org.springframework.data.mongodb.core.MongoOperations; -import java.net.UnknownHostException; - /** * Mongo variant reader using an ItemReader cursor based. This is speeding up * the reading of the variant in big collections. The @@ -33,8 +31,7 @@ */ public class NonAnnotatedVariantsMongoReader extends MongoDbCursorItemReader { - public NonAnnotatedVariantsMongoReader(MongoOperations template, String collectionsVariantsName) - throws UnknownHostException { + public NonAnnotatedVariantsMongoReader(MongoOperations template, String collectionsVariantsName) { super(); setTemplate(template); diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/PedReader.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/PedReader.java new file mode 100644 index 000000000..dad2e31dd --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/PedReader.java @@ -0,0 +1,118 @@ +/* + * Copyright 2016-2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.pipeline.io.readers; + +import org.opencb.biodata.formats.pedigree.io.PedigreePedReader; +import org.opencb.biodata.formats.pedigree.io.PedigreeReader; +import org.opencb.biodata.models.pedigree.Pedigree; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.batch.item.ExecutionContext; +import org.springframework.batch.item.ItemStreamException; +import org.springframework.batch.item.file.ResourceAwareItemReaderItemStream; +import org.springframework.core.io.FileSystemResource; +import org.springframework.core.io.Resource; + +import java.io.IOException; + +/** + * ItemReader that parses a PED file + *

+ * PED specs + * http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#ped + */ +public class PedReader implements ResourceAwareItemReaderItemStream { + private static final Logger logger = LoggerFactory.getLogger(PedReader.class); + + private boolean readAlreadyDone; + + private PedigreeReader pedigreeReader; + + private Resource resource; + + public PedReader() { + this.readAlreadyDone = false; + } + + public PedReader(String pedigreePath) { + this(); + setResource(new FileSystemResource(pedigreePath)); + } + + @Override + public void setResource(Resource resource) { + this.resource = resource; + } + + /** + * The ItemReader interface requires a null to be returned after all the elements are read, and we will just + * read one Pedigree from a ped file. + */ + @Override + public Pedigree read() throws Exception { + if (readAlreadyDone) { + return null; + } else { + readAlreadyDone = true; + return doRead(); + } + } + + private Pedigree doRead() { + if (pedigreeReader == null) { + throw new IllegalStateException("The method PedReader.open() should be called before reading"); + } + return pedigreeReader.read().get(0); + } + + @Override + public void open(ExecutionContext executionContext) throws ItemStreamException { + readAlreadyDone = false; + checkResourceIsProvided(); + String resourcePath = getResourcePath(); + pedigreeReader = new PedigreePedReader(resourcePath); + doOpen(resourcePath); + } + + private void checkResourceIsProvided() { + if (resource == null) { + throw new ItemStreamException("Resource was not provided."); + } + } + + private String getResourcePath() { + try { + return resource.getFile().getAbsolutePath(); + } catch (IOException innerException) { + throw new ItemStreamException(innerException); + } + } + + private void doOpen(String path) { + if (!pedigreeReader.open()) { + throw new ItemStreamException("Couldn't open file " + path); + } + } + + @Override + public void update(ExecutionContext executionContext) throws ItemStreamException { + } + + @Override + public void close() throws ItemStreamException { + pedigreeReader.close(); + } +} diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/VcfHeaderReader.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/VcfHeaderReader.java index 574b3167c..50a9f53e1 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/VcfHeaderReader.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/VcfHeaderReader.java @@ -18,20 +18,50 @@ import org.opencb.biodata.formats.variant.vcf4.io.VariantVcfReader; import org.opencb.biodata.models.variant.VariantSource; import org.opencb.biodata.models.variant.VariantStudy; -import org.springframework.batch.item.ItemReader; +import org.springframework.batch.item.ExecutionContext; +import org.springframework.batch.item.ItemStreamException; +import org.springframework.batch.item.file.ResourceAwareItemReaderItemStream; +import org.springframework.core.io.FileSystemResource; +import org.springframework.core.io.Resource; import uk.ac.ebi.eva.commons.models.data.VariantSourceEntity; import java.io.File; +import java.io.IOException; -public class VcfHeaderReader implements ItemReader { +/** + * Before providing the VariantSource as argument to a VcfReader (that uses the VariantVcfFactory inside + * the mapper), we need to fill some attributes from the header: + *

    + *
  • Common VCF fields (FORMAT, INFO, ALT, FILTER, contig)
  • + *
  • Other fields (may be custom fields from users: reference, source...)
  • + *
  • Sample names
  • + *
  • Full header string
  • + *
+ *

+ * As some tags from the header will appear more than once (INFO, contig, ALT...), they are stored in a Map + * where the key is the field tag, and the value is a list of lines: + *

+ * {@code INFO -> [{ "id" : "CIEND", "number" : "2", "type" : "Integer", "description" : "Confidence..." }, ... ]} + *

+ * We are breaking retrocompatibility here, since the previous structure was wrong. In fields that are different but + * start with the same key, only the last line was stored, e.g.: {@literal "ALT" : "ID=CN124,Description=...\>"}. + * Now ALT would map to a list of deconstructed objects: {@code ALT -> [ {id, description}, ...] } + *

+ * Look at the test to see how is this checked. + */ +public class VcfHeaderReader implements ResourceAwareItemReaderItemStream { /** * The header of the VCF can be retrieved using `source.getMetadata().get(VARIANT_FILE_HEADER_KEY)`. */ public static final String VARIANT_FILE_HEADER_KEY = "header"; - private final File file; + private boolean readAlreadyDone; + + private VariantVcfReader variantReader; + + private Resource resource; private final VariantSource source; @@ -41,45 +71,80 @@ public VcfHeaderReader(File file, String studyName, VariantStudy.StudyType type, VariantSource.Aggregation aggregation) { - this.file = file; - this.source = new VariantSource(file.getName(), fileId, studyId, studyName, type, aggregation); + this(file, new VariantSource(file.getName(), fileId, studyId, studyName, type, aggregation)); } public VcfHeaderReader(File file, VariantSource source) { - this.file = file; this.source = source; + this.readAlreadyDone = false; + setResource(new FileSystemResource(file)); + } + + @Override + public void setResource(Resource resource) { + this.resource = resource; + source.setFileName(resource.getFilename()); } /** - * Before providing the VariantSource as argument to a VcfReader (that uses the VariantVcfFactory inside - * the mapper), we need to fill some attributes from the header: - *

    - *
  • Common VCF fields (FORMAT, INFO, ALT, FILTER, contig)
  • - *
  • Other fields (may be custom fields from users: reference, source...)
  • - *
  • Sample names
  • - *
  • Full header string
  • - *
- *

- * As some tags from the header will appear more than once (INFO, contig, ALT...), they are stored in a Map - * where the key is the field tag, and the value is a list of lines: - *

- * {@code INFO -> [{ "id" : "CIEND", "number" : "2", "type" : "Integer", "description" : "Confidence..." }, ... ]} - *

- * We are breaking retrocompatibility here, since the previous structure was wrong. In fields that are different but - * start with the same key, only the last line was stored, e.g.: {@literal "ALT" : "ID=CN124,Description=...\>"}. - * Now ALT would map to a list of deconstructed objects: {@code ALT -> [ {id, description}, ...] } - *

- * Look at the test to see how is this checked. + * The ItemReader interface requires a null to be returned after all the elements are read, and we will just + * read one VariantSourceEntity from a VCF */ @Override - public VariantSourceEntity read() { - VariantVcfReader reader = new VariantVcfReader(source, file.getPath()); - reader.open(); - reader.pre(); - reader.post(); - reader.close(); - - source.addMetadata(VARIANT_FILE_HEADER_KEY, reader.getHeader()); + public VariantSourceEntity read() throws Exception { + if (readAlreadyDone) { + return null; + } else { + readAlreadyDone = true; + return doRead(); + } + } + + private VariantSourceEntity doRead() { + if (variantReader == null) { + throw new IllegalStateException("The method VcfHeaderReader.open() should be called before reading"); + } + variantReader.pre(); + source.addMetadata(VARIANT_FILE_HEADER_KEY, variantReader.getHeader()); return new VariantSourceEntity(source); } + + @Override + public void open(ExecutionContext executionContext) throws ItemStreamException { + readAlreadyDone = false; + checkResourceIsProvided(); + String resourcePath = getResourcePath(); + variantReader = new VariantVcfReader(source, resourcePath); + doOpen(resourcePath); + } + + private void checkResourceIsProvided() { + if (resource == null) { + throw new ItemStreamException("Resource was not provided."); + } + } + + private String getResourcePath() { + try { + return resource.getFile().getAbsolutePath(); + } catch (IOException e) { + throw new ItemStreamException(e); + } + } + + private void doOpen(String path) { + if (!variantReader.open()) { + throw new ItemStreamException("Couldn't open file " + path); + } + } + + @Override + public void update(ExecutionContext executionContext) throws ItemStreamException { + } + + @Override + public void close() throws ItemStreamException { + variantReader.post(); + variantReader.close(); + } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/VcfReader.java b/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/VcfReader.java index 014e28579..1d8a3f324 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/VcfReader.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/io/readers/VcfReader.java @@ -15,7 +15,6 @@ */ package uk.ac.ebi.eva.pipeline.io.readers; -import org.opencb.biodata.models.variant.VariantSource; import org.springframework.batch.item.file.FlatFileItemReader; import org.springframework.batch.item.file.LineMapper; import org.springframework.core.io.Resource; @@ -38,12 +37,14 @@ */ public class VcfReader extends FlatFileItemReader> { - public VcfReader(VariantSource source, String file) throws IOException { - this(source, new File(file)); + public VcfReader(String fileId, String studyId, String file) + throws IOException { + this(fileId, studyId, new File(file)); } - public VcfReader(VariantSource source, File file) throws IOException { - this(new VcfLineMapper(source), file); + public VcfReader(String fileId, String studyId, File file) + throws IOException { + this(new VcfLineMapper(fileId, studyId), file); } public VcfReader(LineMapper> lineMapper, File file) throws IOException { diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/AggregatedVcfJob.java b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/AggregatedVcfJob.java index 81bbdd894..ee6558923 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/AggregatedVcfJob.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/AggregatedVcfJob.java @@ -1,5 +1,5 @@ /* - * Copyright 2015-2016 EMBL - European Bioinformatics Institute + * Copyright 2015-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,7 +18,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.batch.core.Job; -import org.springframework.batch.core.JobExecutionListener; import org.springframework.batch.core.Step; import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing; import org.springframework.batch.core.configuration.annotation.JobBuilderFactory; @@ -32,11 +31,10 @@ import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.Import; import org.springframework.context.annotation.Scope; - import uk.ac.ebi.eva.pipeline.jobs.flows.AnnotationFlowOptional; import uk.ac.ebi.eva.pipeline.jobs.steps.LoadFileStep; import uk.ac.ebi.eva.pipeline.jobs.steps.VariantLoaderStep; -import uk.ac.ebi.eva.pipeline.listeners.VariantOptionsConfigurerListener; +import uk.ac.ebi.eva.pipeline.parameters.validation.job.AggregatedVcfJobParametersValidator; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.AGGREGATED_VCF_JOB; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.LOAD_FILE_STEP; @@ -58,15 +56,6 @@ public class AggregatedVcfJob { private static final Logger logger = LoggerFactory.getLogger(AggregatedVcfJob.class); - //job default settings - private static final boolean INCLUDE_SAMPLES = false; - - private static final boolean COMPRESS_GENOTYPES = false; - - private static final boolean CALCULATE_STATS = true; - - private static final boolean INCLUDE_STATS = true; - @Autowired @Qualifier(VEP_ANNOTATION_OPTIONAL_FLOW) private Flow annotationFlowOptional; @@ -87,8 +76,7 @@ public Job aggregatedVcfJob(JobBuilderFactory jobBuilderFactory) { JobBuilder jobBuilder = jobBuilderFactory .get(AGGREGATED_VCF_JOB) .incrementer(new RunIdIncrementer()) - .listener(aggregatedJobListener()); - + .validator(new AggregatedVcfJobParametersValidator()); FlowJobBuilder builder = jobBuilder .flow(variantLoaderStep) .next(loadFileStep) @@ -97,13 +85,4 @@ public Job aggregatedVcfJob(JobBuilderFactory jobBuilderFactory) { return builder.build(); } - - @Bean - @Scope("prototype") - JobExecutionListener aggregatedJobListener() { - return new VariantOptionsConfigurerListener(INCLUDE_SAMPLES, - COMPRESS_GENOTYPES, - CALCULATE_STATS, - INCLUDE_STATS); - } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/AnnotationJob.java b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/AnnotationJob.java index 072680b22..73fe32a72 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/AnnotationJob.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/AnnotationJob.java @@ -44,6 +44,8 @@ * Optional flow: variantsAnnotGenerateInput --> (annotationCreate --> annotationLoad) * annotationCreate and annotationLoad steps are only executed if variantsAnnotGenerateInput is generating a * non-empty VEP input file + * + * TODO add a new AnnotationJobParametersValidator */ @Configuration diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/DatabaseInitializationJob.java b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/DatabaseInitializationJob.java index 949211139..2dbc5751e 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/DatabaseInitializationJob.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/DatabaseInitializationJob.java @@ -41,6 +41,8 @@ *

* 1. create the needed indexes in the DBs * 2. load genomic features for the species + * + * TODO add a new DatabaseInitializationJobParametersValidator */ @Configuration @EnableBatchProcessing diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/GenotypedVcfJob.java b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/GenotypedVcfJob.java index 25f35a855..3ed57f7a0 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/GenotypedVcfJob.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/GenotypedVcfJob.java @@ -1,5 +1,5 @@ /* - * Copyright 2015-2016 EMBL - European Bioinformatics Institute + * Copyright 2015-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,11 +18,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.batch.core.Job; -import org.springframework.batch.core.JobExecutionListener; import org.springframework.batch.core.Step; import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing; import org.springframework.batch.core.configuration.annotation.JobBuilderFactory; -import org.springframework.batch.core.configuration.annotation.JobScope; import org.springframework.batch.core.job.builder.FlowJobBuilder; import org.springframework.batch.core.job.builder.JobBuilder; import org.springframework.batch.core.job.flow.Flow; @@ -33,11 +31,10 @@ import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.Import; import org.springframework.context.annotation.Scope; - import uk.ac.ebi.eva.pipeline.jobs.flows.ParallelStatisticsAndAnnotationFlow; import uk.ac.ebi.eva.pipeline.jobs.steps.LoadFileStep; import uk.ac.ebi.eva.pipeline.jobs.steps.VariantLoaderStep; -import uk.ac.ebi.eva.pipeline.listeners.VariantOptionsConfigurerListener; +import uk.ac.ebi.eva.pipeline.parameters.validation.job.GenotypedVcfJobParametersValidator; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.GENOTYPED_VCF_JOB; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.LOAD_FILE_STEP; @@ -57,16 +54,8 @@ @EnableBatchProcessing @Import({VariantLoaderStep.class, LoadFileStep.class, ParallelStatisticsAndAnnotationFlow.class}) public class GenotypedVcfJob { - private static final Logger logger = LoggerFactory.getLogger(GenotypedVcfJob.class); - - //job default settings - private static final boolean INCLUDE_SAMPLES = true; - - private static final boolean COMPRESS_GENOTYPES = true; - private static final boolean CALCULATE_STATS = false; - - private static final boolean INCLUDE_STATS = false; + private static final Logger logger = LoggerFactory.getLogger(GenotypedVcfJob.class); @Autowired @Qualifier(PARALLEL_STATISTICS_AND_ANNOTATION) @@ -88,7 +77,7 @@ public Job genotypedVcfJob(JobBuilderFactory jobBuilderFactory) { JobBuilder jobBuilder = jobBuilderFactory .get(GENOTYPED_VCF_JOB) .incrementer(new RunIdIncrementer()) - .listener(genotypedJobListener()); + .validator(new GenotypedVcfJobParametersValidator()); FlowJobBuilder builder = jobBuilder .flow(variantLoaderStep) .next(loadFileStep) @@ -98,12 +87,4 @@ public Job genotypedVcfJob(JobBuilderFactory jobBuilderFactory) { return builder.build(); } - @Bean - @JobScope - public JobExecutionListener genotypedJobListener() { - return new VariantOptionsConfigurerListener(INCLUDE_SAMPLES, - COMPRESS_GENOTYPES, - CALCULATE_STATS, - INCLUDE_STATS); - } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/PopulationStatisticsJob.java b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/PopulationStatisticsJob.java index 366cb5f9e..3ab56281a 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/PopulationStatisticsJob.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/PopulationStatisticsJob.java @@ -33,10 +33,11 @@ import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.CALCULATE_STATISTICS_FLOW; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.CALCULATE_STATISTICS_JOB; -import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.CALCULATE_STATISTICS_OPTIONAL_FLOW; /** * Configuration to run a full Statistics job: variantStatsFlow: statsCreate --> statsLoad + * + * TODO add a new PopulationStatisticsJobParametersValidator */ @Configuration @EnableBatchProcessing diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/deciders/EmptyVepInputDecider.java b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/deciders/EmptyVepInputDecider.java new file mode 100644 index 000000000..7e44dde7e --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/deciders/EmptyVepInputDecider.java @@ -0,0 +1,74 @@ +/* + * Copyright 2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.pipeline.jobs.deciders; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.batch.core.JobExecution; +import org.springframework.batch.core.JobParameters; +import org.springframework.batch.core.StepExecution; +import org.springframework.batch.core.job.flow.FlowExecutionStatus; +import org.springframework.batch.core.job.flow.JobExecutionDecider; + +import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; +import uk.ac.ebi.eva.utils.URLHelper; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; + +/** + * Decider used to skip step(s) if the file vepInput is empty + */ +public class EmptyVepInputDecider implements JobExecutionDecider { + private static final Logger logger = LoggerFactory.getLogger(EmptyVepInputDecider.class); + + public static final String STOP_FLOW = "STOP_FLOW"; + + public static final String CONTINUE_FLOW = "CONTINUE_FLOW"; + + @Override + public FlowExecutionStatus decide(JobExecution jobExecution, StepExecution stepExecution) { + String vepInput = getVepInput(jobExecution); + + if (getFileSize(vepInput) <= 0) { + logger.info("File {} is empty so following steps will not run", vepInput); + return new FlowExecutionStatus(STOP_FLOW); + } + + return new FlowExecutionStatus(CONTINUE_FLOW); + } + + private String getVepInput(JobExecution jobExecution) { + JobParameters jobParameters = jobExecution.getJobParameters(); + + return URLHelper.resolveVepInput( + jobParameters.getString(JobParametersNames.OUTPUT_DIR_ANNOTATION), + jobParameters.getString(JobParametersNames.INPUT_STUDY_ID), + jobParameters.getString(JobParametersNames.INPUT_VCF_ID)); + } + + private long getFileSize(String file) { + long fileSize; + + try { + fileSize = Files.size(Paths.get(file)); + } catch (IOException e) { + throw new RuntimeException("File {} is not readable", e); + } + return fileSize; + } +} diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/deciders/SkipStepDecider.java b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/deciders/SkipStepDecider.java index d6bbae4bc..409f37a9e 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/deciders/SkipStepDecider.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/deciders/SkipStepDecider.java @@ -15,7 +15,6 @@ */ package uk.ac.ebi.eva.pipeline.jobs.deciders; -import org.opencb.datastore.core.ObjectMap; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.batch.core.JobExecution; @@ -29,31 +28,21 @@ public class SkipStepDecider implements JobExecutionDecider { private static final Logger logger = LoggerFactory.getLogger(SkipStepDecider.class); - private String skipStep; - private ObjectMap pipelineOptions; - public static final String SKIP_STEP = "SKIP_STEP"; public static final String DO_STEP = "DO_STEP"; - /** - * @param pipelineOptions ObjectMap that will have a boolean for .get(skipStep), telling whether to skip or not - * @param skipStep name of the key that the user sets to skip a step, e.g. "annotation.create.skip" - * It's recommended to use pre-defined constants, such as AnnotationJob.SKIP_ANNOT to avoid - * misspelling mistakes. - */ - public SkipStepDecider(ObjectMap pipelineOptions, String skipStep) { - this.skipStep = skipStep; - this.pipelineOptions = pipelineOptions; + public final String jobParameterName; + + public SkipStepDecider(String jobParameterName) { + this.jobParameterName = jobParameterName; } @Override public FlowExecutionStatus decide(JobExecution jobExecution, StepExecution stepExecution) { - if (Boolean.parseBoolean(pipelineOptions.getString(skipStep))) { - logger.info("Skipping step because {} is enabled", skipStep); + if (Boolean.parseBoolean(jobExecution.getJobParameters().getString(jobParameterName))) { + logger.info("Step skipped due to {} enabled", jobParameterName); return new FlowExecutionStatus(SKIP_STEP); } - - logger.info("Running step because {} is disabled", skipStep); return new FlowExecutionStatus(DO_STEP); } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/flows/AnnotationFlow.java b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/flows/AnnotationFlow.java index f01e634d2..f554f1107 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/flows/AnnotationFlow.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/flows/AnnotationFlow.java @@ -25,11 +25,11 @@ import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.Import; -import uk.ac.ebi.eva.pipeline.jobs.deciders.EmptyFileDecider; + +import uk.ac.ebi.eva.pipeline.jobs.deciders.EmptyVepInputDecider; import uk.ac.ebi.eva.pipeline.jobs.steps.AnnotationLoaderStep; import uk.ac.ebi.eva.pipeline.jobs.steps.GenerateVepAnnotationStep; import uk.ac.ebi.eva.pipeline.jobs.steps.VepInputGeneratorStep; -import uk.ac.ebi.eva.pipeline.parameters.JobOptions; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.GENERATE_VEP_ANNOTATION_STEP; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.GENERATE_VEP_INPUT_STEP; @@ -60,16 +60,15 @@ public class AnnotationFlow { private Step generateVepAnnotationStep; @Bean(VEP_ANNOTATION_FLOW) - public Flow vepAnnotationFlow(JobOptions jobOptions) { - EmptyFileDecider emptyFileDecider = new EmptyFileDecider(jobOptions.getPipelineOptions().getString(JobOptions - .VEP_INPUT)); + public Flow vepAnnotationFlow() { + EmptyVepInputDecider emptyVepInputDecider = new EmptyVepInputDecider(); return new FlowBuilder(VEP_ANNOTATION_FLOW) .start(generateVepInputStep) - .next(emptyFileDecider).on(EmptyFileDecider.CONTINUE_FLOW) + .next(emptyVepInputDecider).on(EmptyVepInputDecider.CONTINUE_FLOW) .to(generateVepAnnotationStep) .next(annotationLoadStep) - .from(emptyFileDecider).on(EmptyFileDecider.STOP_FLOW) + .from(emptyVepInputDecider).on(EmptyVepInputDecider.STOP_FLOW) .end(BatchStatus.COMPLETED.toString()) .build(); } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/flows/AnnotationFlowOptional.java b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/flows/AnnotationFlowOptional.java index 71289adc5..c1c704a96 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/flows/AnnotationFlowOptional.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/flows/AnnotationFlowOptional.java @@ -19,15 +19,15 @@ import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing; import org.springframework.batch.core.job.builder.FlowBuilder; import org.springframework.batch.core.job.flow.Flow; +import org.springframework.batch.core.job.flow.JobExecutionDecider; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.Import; -import org.springframework.context.annotation.Scope; +import uk.ac.ebi.eva.pipeline.configuration.JobExecutionDeciderConfiguration; import uk.ac.ebi.eva.pipeline.jobs.deciders.SkipStepDecider; -import uk.ac.ebi.eva.pipeline.parameters.JobOptions; -import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; +import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.ANNOTATION_SKIP_STEP_DECIDER; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.VEP_ANNOTATION_FLOW; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.VEP_ANNOTATION_OPTIONAL_FLOW; @@ -39,24 +39,18 @@ */ @Configuration @EnableBatchProcessing -@Import({AnnotationFlow.class}) +@Import({AnnotationFlow.class, JobExecutionDeciderConfiguration.class}) public class AnnotationFlowOptional { @Bean(VEP_ANNOTATION_OPTIONAL_FLOW) - Flow vepAnnotationOptionalFlow(@Qualifier(VEP_ANNOTATION_FLOW) Flow vepAnnotationFlow, - SkipStepDecider skipStepDecider) { + public Flow vepAnnotationOptionalFlow(@Qualifier(VEP_ANNOTATION_FLOW) Flow vepAnnotationFlow, + @Qualifier(ANNOTATION_SKIP_STEP_DECIDER) JobExecutionDecider decider) { return new FlowBuilder(VEP_ANNOTATION_OPTIONAL_FLOW) - .start(skipStepDecider).on(SkipStepDecider.DO_STEP) + .start(decider).on(SkipStepDecider.DO_STEP) .to(vepAnnotationFlow) - .from(skipStepDecider).on(SkipStepDecider.SKIP_STEP) + .from(decider).on(SkipStepDecider.SKIP_STEP) .end(BatchStatus.COMPLETED.toString()) .build(); } - @Bean - @Scope("prototype") - SkipStepDecider skipStepDecider(JobOptions jobOptions) { - return new SkipStepDecider(jobOptions.getPipelineOptions(), JobParametersNames.ANNOTATION_SKIP); - } - } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/flows/PopulationStatisticsOptionalFlow.java b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/flows/PopulationStatisticsOptionalFlow.java index 7cdc51975..8cd91f9f2 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/flows/PopulationStatisticsOptionalFlow.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/flows/PopulationStatisticsOptionalFlow.java @@ -19,15 +19,13 @@ import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing; import org.springframework.batch.core.job.builder.FlowBuilder; import org.springframework.batch.core.job.flow.Flow; -import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.batch.core.job.flow.JobExecutionDecider; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.Import; -import org.springframework.context.annotation.Scope; +import uk.ac.ebi.eva.pipeline.configuration.JobExecutionDeciderConfiguration; import uk.ac.ebi.eva.pipeline.jobs.deciders.SkipStepDecider; -import uk.ac.ebi.eva.pipeline.parameters.JobOptions; -import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.CALCULATE_STATISTICS_FLOW; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.CALCULATE_STATISTICS_OPTIONAL_FLOW; @@ -39,31 +37,18 @@ */ @Configuration @EnableBatchProcessing -@Import({PopulationStatisticsFlow.class}) +@Import({PopulationStatisticsFlow.class, JobExecutionDeciderConfiguration.class}) public class PopulationStatisticsOptionalFlow { - @Autowired - @Qualifier(CALCULATE_STATISTICS_FLOW) - private Flow calculateStatisticsflow; - - @Autowired - @Qualifier(STATISTICS_SKIP_STEP_DECIDER) - private SkipStepDecider skipStepDecider; - @Bean(CALCULATE_STATISTICS_OPTIONAL_FLOW) - public Flow calculateStatisticsOptionalFlow() { + public Flow calculateStatisticsOptionalFlow(@Qualifier(CALCULATE_STATISTICS_FLOW) Flow calculateStatisticsflow, + @Qualifier(STATISTICS_SKIP_STEP_DECIDER) JobExecutionDecider decider) { return new FlowBuilder(CALCULATE_STATISTICS_OPTIONAL_FLOW) - .start(skipStepDecider).on(SkipStepDecider.DO_STEP) + .start(decider).on(SkipStepDecider.DO_STEP) .to(calculateStatisticsflow) - .from(skipStepDecider).on(SkipStepDecider.SKIP_STEP) + .from(decider).on(SkipStepDecider.SKIP_STEP) .end(BatchStatus.COMPLETED.toString()) .build(); } - @Bean(STATISTICS_SKIP_STEP_DECIDER) - @Scope("prototype") - SkipStepDecider skipStepDecider(JobOptions jobOptions) { - return new SkipStepDecider(jobOptions.getPipelineOptions(), JobParametersNames.STATISTICS_SKIP); - } - } \ No newline at end of file diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStep.java b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStep.java index 51eebc15b..ee8662e59 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStep.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStep.java @@ -17,7 +17,6 @@ package uk.ac.ebi.eva.pipeline.jobs.steps; import org.opencb.biodata.models.variant.annotation.VariantAnnotation; -import org.opencb.datastore.core.ObjectMap; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.batch.core.Step; @@ -26,18 +25,19 @@ import org.springframework.batch.item.ItemStreamReader; import org.springframework.batch.item.ItemWriter; import org.springframework.batch.item.file.FlatFileParseException; +import org.springframework.batch.repeat.policy.SimpleCompletionPolicy; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.Import; +import uk.ac.ebi.eva.pipeline.configuration.ChunkSizeCompletionPolicyConfiguration; import uk.ac.ebi.eva.pipeline.configuration.readers.VariantAnnotationReaderConfiguration; import uk.ac.ebi.eva.pipeline.configuration.writers.VariantAnnotationWriterConfiguration; import uk.ac.ebi.eva.pipeline.io.readers.AnnotationFlatFileReader; import uk.ac.ebi.eva.pipeline.io.writers.VepAnnotationMongoWriter; import uk.ac.ebi.eva.pipeline.listeners.SkippedItemListener; import uk.ac.ebi.eva.pipeline.parameters.JobOptions; -import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.LOAD_VEP_ANNOTATION_STEP; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.VARIANT_ANNOTATION_READER; @@ -60,7 +60,8 @@ @Configuration @EnableBatchProcessing -@Import({VariantAnnotationReaderConfiguration.class, VariantAnnotationWriterConfiguration.class}) +@Import({VariantAnnotationReaderConfiguration.class, VariantAnnotationWriterConfiguration.class, + ChunkSizeCompletionPolicyConfiguration.class}) public class AnnotationLoaderStep { private static final Logger logger = LoggerFactory.getLogger(AnnotationLoaderStep.class); @@ -73,19 +74,16 @@ public class AnnotationLoaderStep { private ItemWriter variantAnnotationItemWriter; @Bean(LOAD_VEP_ANNOTATION_STEP) - public Step loadVepAnnotationStep(StepBuilderFactory stepBuilderFactory, JobOptions jobOptions) { + public Step loadVepAnnotationStep(StepBuilderFactory stepBuilderFactory, JobOptions jobOptions, + SimpleCompletionPolicy chunkSizeCompletionPolicy) { logger.debug("Building '" + LOAD_VEP_ANNOTATION_STEP + "'"); - ObjectMap pipelineOptions = jobOptions.getPipelineOptions(); - boolean startIfcomplete = pipelineOptions.getBoolean(JobParametersNames.CONFIG_RESTARTABILITY_ALLOW); - final int chunkSize = pipelineOptions.getInt(JobParametersNames.CONFIG_CHUNK_SIZE); - return stepBuilderFactory.get(LOAD_VEP_ANNOTATION_STEP) - .chunk(chunkSize) + .chunk(chunkSizeCompletionPolicy) .reader(variantAnnotationReader) .writer(variantAnnotationItemWriter) .faultTolerant().skipLimit(50).skip(FlatFileParseException.class) - .allowStartIfComplete(startIfcomplete) + .allowStartIfComplete(jobOptions.isAllowStartIfComplete()) .listener(new SkippedItemListener()) .build(); } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/CalculateStatisticsStep.java b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/CalculateStatisticsStep.java index 858f3e414..9c8ab583f 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/CalculateStatisticsStep.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/CalculateStatisticsStep.java @@ -40,7 +40,7 @@ public class CalculateStatisticsStep { @Bean @StepScope - PopulationStatisticsGeneratorStep populationStatisticsGeneratorStep() { + public PopulationStatisticsGeneratorStep populationStatisticsGeneratorStep() { return new PopulationStatisticsGeneratorStep(); } @@ -48,7 +48,7 @@ PopulationStatisticsGeneratorStep populationStatisticsGeneratorStep() { public TaskletStep calculateStatisticsStep(StepBuilderFactory stepBuilderFactory, JobOptions jobOptions) { logger.debug("Building '" + CALCULATE_STATISTICS_STEP + "'"); return TaskletUtils.generateStep(stepBuilderFactory, CALCULATE_STATISTICS_STEP, - populationStatisticsGeneratorStep(), jobOptions); + populationStatisticsGeneratorStep(), jobOptions.isAllowStartIfComplete()); } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/CreateDatabaseIndexesStep.java b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/CreateDatabaseIndexesStep.java index 0f1743844..ebf260312 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/CreateDatabaseIndexesStep.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/CreateDatabaseIndexesStep.java @@ -40,7 +40,7 @@ public class CreateDatabaseIndexesStep { @Bean @StepScope - IndexesGeneratorStep indexesGeneratorStep() { + public IndexesGeneratorStep indexesGeneratorStep() { return new IndexesGeneratorStep(); } @@ -48,7 +48,7 @@ IndexesGeneratorStep indexesGeneratorStep() { public TaskletStep createDatabaseIndexesStep(StepBuilderFactory stepBuilderFactory, JobOptions jobOptions) { logger.debug("Building '" + CREATE_DATABASE_INDEXES_STEP + "'"); return TaskletUtils.generateStep(stepBuilderFactory, CREATE_DATABASE_INDEXES_STEP, indexesGeneratorStep(), - jobOptions); + jobOptions.isAllowStartIfComplete()); } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/GeneLoaderStep.java b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/GeneLoaderStep.java index 1a58cb268..4b645b23b 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/GeneLoaderStep.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/GeneLoaderStep.java @@ -1,5 +1,5 @@ /* - * Copyright 2016 EMBL - European Bioinformatics Institute + * Copyright 2016-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,7 +16,6 @@ package uk.ac.ebi.eva.pipeline.jobs.steps; -import org.opencb.datastore.core.ObjectMap; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.batch.core.Step; @@ -25,11 +24,14 @@ import org.springframework.batch.item.ItemStreamReader; import org.springframework.batch.item.ItemWriter; import org.springframework.batch.item.file.FlatFileParseException; +import org.springframework.batch.repeat.policy.SimpleCompletionPolicy; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.Import; + +import uk.ac.ebi.eva.pipeline.configuration.ChunkSizeCompletionPolicyConfiguration; import uk.ac.ebi.eva.pipeline.configuration.readers.GeneReaderConfiguration; import uk.ac.ebi.eva.pipeline.configuration.writers.GeneWriterConfiguration; import uk.ac.ebi.eva.pipeline.io.mappers.GeneLineMapper; @@ -39,7 +41,6 @@ import uk.ac.ebi.eva.pipeline.listeners.SkippedItemListener; import uk.ac.ebi.eva.pipeline.model.FeatureCoordinates; import uk.ac.ebi.eva.pipeline.parameters.JobOptions; -import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.GENES_LOAD_STEP; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.GENE_READER; @@ -60,7 +61,7 @@ @Configuration @EnableBatchProcessing -@Import({GeneReaderConfiguration.class, GeneWriterConfiguration.class}) +@Import({GeneReaderConfiguration.class, GeneWriterConfiguration.class, ChunkSizeCompletionPolicyConfiguration.class}) public class GeneLoaderStep { private static final Logger logger = LoggerFactory.getLogger(GeneLoaderStep.class); @@ -74,19 +75,17 @@ public class GeneLoaderStep { private ItemWriter writer; @Bean(GENES_LOAD_STEP) - public Step genesLoadStep(StepBuilderFactory stepBuilderFactory, JobOptions jobOptions) { + public Step genesLoadStep(StepBuilderFactory stepBuilderFactory, JobOptions jobOptions, + SimpleCompletionPolicy chunkSizeCompletionPolicy) { logger.debug("Building '" + GENES_LOAD_STEP + "'"); - ObjectMap pipelineOptions = jobOptions.getPipelineOptions(); - boolean startIfcomplete = pipelineOptions.getBoolean(JobParametersNames.CONFIG_RESTARTABILITY_ALLOW); - return stepBuilderFactory.get(GENES_LOAD_STEP) - .chunk(jobOptions.getPipelineOptions().getInt(JobParametersNames.CONFIG_CHUNK_SIZE)) + .chunk(chunkSizeCompletionPolicy) .reader(reader) .processor(new GeneFilterProcessor()) .writer(writer) .faultTolerant().skipLimit(50).skip(FlatFileParseException.class) - .allowStartIfComplete(startIfcomplete) + .allowStartIfComplete(jobOptions.isAllowStartIfComplete()) .listener(new SkippedItemListener()) .build(); } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/GenerateVepAnnotationStep.java b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/GenerateVepAnnotationStep.java index 15d6c00d0..dc23708c8 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/GenerateVepAnnotationStep.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/GenerateVepAnnotationStep.java @@ -40,7 +40,7 @@ public class GenerateVepAnnotationStep { @Bean @StepScope - VepAnnotationGeneratorStep vepAnnotationGeneratorStep() { + public VepAnnotationGeneratorStep vepAnnotationGeneratorStep() { return new VepAnnotationGeneratorStep(); } @@ -48,7 +48,7 @@ VepAnnotationGeneratorStep vepAnnotationGeneratorStep() { public TaskletStep generateVepAnnotationStep(StepBuilderFactory stepBuilderFactory, JobOptions jobOptions) { logger.debug("Building '" + GENERATE_VEP_ANNOTATION_STEP + "'"); return TaskletUtils.generateStep(stepBuilderFactory, GENERATE_VEP_ANNOTATION_STEP, - vepAnnotationGeneratorStep(), jobOptions); + vepAnnotationGeneratorStep(), jobOptions.isAllowStartIfComplete()); } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/LoadFileStep.java b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/LoadFileStep.java index 55b8538b2..95bd76c8d 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/LoadFileStep.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/LoadFileStep.java @@ -23,9 +23,6 @@ import org.springframework.batch.core.step.tasklet.TaskletStep; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; -import org.springframework.context.annotation.Import; - -import uk.ac.ebi.eva.pipeline.configuration.MongoConfiguration; import uk.ac.ebi.eva.pipeline.jobs.steps.tasklets.FileLoaderStep; import uk.ac.ebi.eva.pipeline.parameters.JobOptions; import uk.ac.ebi.eva.utils.TaskletUtils; @@ -37,21 +34,21 @@ */ @Configuration @EnableBatchProcessing -@Import({ MongoConfiguration.class }) public class LoadFileStep { private static final Logger logger = LoggerFactory.getLogger(LoadFileStep.class); @Bean @StepScope - FileLoaderStep fileLoaderStep() { + public FileLoaderStep fileLoaderStep() { return new FileLoaderStep(); } @Bean(LOAD_FILE_STEP) public TaskletStep loadFileStep(StepBuilderFactory stepBuilderFactory, JobOptions jobOptions) { logger.debug("Building '" + LOAD_FILE_STEP + "'"); - return TaskletUtils.generateStep(stepBuilderFactory, LOAD_FILE_STEP, fileLoaderStep(), jobOptions); + return TaskletUtils.generateStep(stepBuilderFactory, LOAD_FILE_STEP, fileLoaderStep(), + jobOptions.isAllowStartIfComplete()); } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/LoadStatisticsStep.java b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/LoadStatisticsStep.java index a77476efb..5cbbf308f 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/LoadStatisticsStep.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/LoadStatisticsStep.java @@ -24,7 +24,6 @@ import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import uk.ac.ebi.eva.pipeline.jobs.steps.tasklets.PopulationStatisticsLoaderStep; -import uk.ac.ebi.eva.pipeline.jobs.steps.tasklets.VepAnnotationGeneratorStep; import uk.ac.ebi.eva.pipeline.parameters.JobOptions; import uk.ac.ebi.eva.utils.TaskletUtils; @@ -41,7 +40,7 @@ public class LoadStatisticsStep { @Bean @StepScope - PopulationStatisticsLoaderStep populationStatisticsLoaderStep() { + public PopulationStatisticsLoaderStep populationStatisticsLoaderStep() { return new PopulationStatisticsLoaderStep(); } @@ -49,7 +48,7 @@ PopulationStatisticsLoaderStep populationStatisticsLoaderStep() { public TaskletStep loadStatisticsStep(StepBuilderFactory stepBuilderFactory, JobOptions jobOptions) { logger.debug("Building '" + LOAD_STATISTICS_STEP + "'"); return TaskletUtils.generateStep(stepBuilderFactory, LOAD_STATISTICS_STEP, - populationStatisticsLoaderStep(), jobOptions); + populationStatisticsLoaderStep(), jobOptions.isAllowStartIfComplete()); } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/VariantLoaderStep.java b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/VariantLoaderStep.java index e71f4525c..6a7b3436b 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/VariantLoaderStep.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/VariantLoaderStep.java @@ -15,7 +15,6 @@ */ package uk.ac.ebi.eva.pipeline.jobs.steps; -import org.opencb.datastore.core.ObjectMap; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.batch.core.Step; @@ -24,17 +23,18 @@ import org.springframework.batch.item.ItemStreamReader; import org.springframework.batch.item.ItemWriter; import org.springframework.batch.item.file.FlatFileParseException; +import org.springframework.batch.repeat.policy.SimpleCompletionPolicy; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.Import; import uk.ac.ebi.eva.commons.models.data.Variant; +import uk.ac.ebi.eva.pipeline.configuration.ChunkSizeCompletionPolicyConfiguration; import uk.ac.ebi.eva.pipeline.configuration.readers.VcfReaderConfiguration; import uk.ac.ebi.eva.pipeline.configuration.writers.VariantWriterConfiguration; import uk.ac.ebi.eva.pipeline.listeners.SkippedItemListener; import uk.ac.ebi.eva.pipeline.parameters.JobOptions; -import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.LOAD_VARIANTS_STEP; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.VARIANT_READER; @@ -48,7 +48,7 @@ */ @Configuration @EnableBatchProcessing -@Import({VariantWriterConfiguration.class, VcfReaderConfiguration.class}) +@Import({VariantWriterConfiguration.class, VcfReaderConfiguration.class, ChunkSizeCompletionPolicyConfiguration.class}) public class VariantLoaderStep { private static final Logger logger = LoggerFactory.getLogger(VariantLoaderStep.class); @@ -62,18 +62,16 @@ public class VariantLoaderStep { private ItemWriter variantWriter; @Bean(LOAD_VARIANTS_STEP) - public Step loadVariantsStep(StepBuilderFactory stepBuilderFactory, JobOptions jobOptions) { + public Step loadVariantsStep(StepBuilderFactory stepBuilderFactory, JobOptions jobOptions, + SimpleCompletionPolicy chunkSizeCompletionPolicy) { logger.debug("Building '" + LOAD_VARIANTS_STEP + "'"); - ObjectMap pipelineOptions = jobOptions.getPipelineOptions(); - boolean startIfcomplete = pipelineOptions.getBoolean(JobParametersNames.CONFIG_RESTARTABILITY_ALLOW); - return stepBuilderFactory.get(LOAD_VARIANTS_STEP) - .chunk(jobOptions.getPipelineOptions().getInt(JobParametersNames.CONFIG_CHUNK_SIZE)) + .chunk(chunkSizeCompletionPolicy) .reader(reader) .writer(variantWriter) .faultTolerant().skipLimit(50).skip(FlatFileParseException.class) - .allowStartIfComplete(startIfcomplete) + .allowStartIfComplete(jobOptions.isAllowStartIfComplete()) .listener(new SkippedItemListener()) .build(); } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/VepInputGeneratorStep.java b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/VepInputGeneratorStep.java index 9c0caf707..cfbaafac5 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/VepInputGeneratorStep.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/VepInputGeneratorStep.java @@ -16,18 +16,19 @@ package uk.ac.ebi.eva.pipeline.jobs.steps; import com.mongodb.DBObject; -import org.opencb.datastore.core.ObjectMap; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.batch.core.Step; import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing; import org.springframework.batch.core.configuration.annotation.StepBuilderFactory; import org.springframework.batch.item.ItemStreamWriter; +import org.springframework.batch.repeat.policy.SimpleCompletionPolicy; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.Import; +import uk.ac.ebi.eva.pipeline.configuration.ChunkSizeCompletionPolicyConfiguration; import uk.ac.ebi.eva.pipeline.configuration.readers.NonAnnotatedVariantsMongoReaderConfiguration; import uk.ac.ebi.eva.pipeline.configuration.writers.VepInputFlatFileWriterConfiguration; import uk.ac.ebi.eva.pipeline.io.readers.NonAnnotatedVariantsMongoReader; @@ -35,7 +36,6 @@ import uk.ac.ebi.eva.pipeline.jobs.steps.processors.AnnotationProcessor; import uk.ac.ebi.eva.pipeline.model.VariantWrapper; import uk.ac.ebi.eva.pipeline.parameters.JobOptions; -import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.GENERATE_VEP_INPUT_STEP; import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.NON_ANNOTATED_VARIANTS_READER; @@ -58,7 +58,8 @@ */ @Configuration @EnableBatchProcessing -@Import({NonAnnotatedVariantsMongoReaderConfiguration.class, VepInputFlatFileWriterConfiguration.class}) +@Import({NonAnnotatedVariantsMongoReaderConfiguration.class, VepInputFlatFileWriterConfiguration.class, + ChunkSizeCompletionPolicyConfiguration.class}) public class VepInputGeneratorStep { private static final Logger logger = LoggerFactory.getLogger(VepInputGeneratorStep.class); @@ -72,19 +73,16 @@ public class VepInputGeneratorStep { private ItemStreamWriter writer; @Bean(GENERATE_VEP_INPUT_STEP) - public Step generateVepInputStep(StepBuilderFactory stepBuilderFactory, JobOptions jobOptions) { + public Step generateVepInputStep(StepBuilderFactory stepBuilderFactory, JobOptions jobOptions, + SimpleCompletionPolicy chunkSizeCompletionPolicy) { logger.debug("Building '" + GENERATE_VEP_INPUT_STEP + "'"); - ObjectMap pipelineOptions = jobOptions.getPipelineOptions(); - boolean startIfcomplete = pipelineOptions.getBoolean(JobParametersNames.CONFIG_RESTARTABILITY_ALLOW); - int chunkSize = pipelineOptions.getInt(JobParametersNames.CONFIG_CHUNK_SIZE); - return stepBuilderFactory.get(GENERATE_VEP_INPUT_STEP) - .chunk(chunkSize) + .chunk(chunkSizeCompletionPolicy) .reader(reader) .processor(new AnnotationProcessor()) .writer(writer) - .allowStartIfComplete(startIfcomplete) + .allowStartIfComplete(jobOptions.isAllowStartIfComplete()) .build(); } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/FileLoaderStep.java b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/FileLoaderStep.java index bd18f8baa..5ceaf5b3b 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/FileLoaderStep.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/FileLoaderStep.java @@ -15,22 +15,17 @@ */ package uk.ac.ebi.eva.pipeline.jobs.steps.tasklets; -import org.opencb.biodata.models.variant.VariantSource; -import org.opencb.datastore.core.ObjectMap; -import org.opencb.opencga.storage.core.variant.VariantStorageManager; import org.springframework.batch.core.StepContribution; import org.springframework.batch.core.scope.context.ChunkContext; import org.springframework.batch.core.step.tasklet.Tasklet; import org.springframework.batch.repeat.RepeatStatus; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.data.mongodb.core.MongoOperations; - import uk.ac.ebi.eva.commons.models.data.VariantSourceEntity; -import uk.ac.ebi.eva.pipeline.configuration.MongoConfiguration; import uk.ac.ebi.eva.pipeline.io.readers.VcfHeaderReader; import uk.ac.ebi.eva.pipeline.io.writers.VariantSourceEntityMongoWriter; -import uk.ac.ebi.eva.pipeline.parameters.JobOptions; -import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; +import uk.ac.ebi.eva.pipeline.parameters.DatabaseParameters; +import uk.ac.ebi.eva.pipeline.parameters.InputParameters; import java.io.File; import java.util.Collections; @@ -46,26 +41,29 @@ public class FileLoaderStep implements Tasklet { @Autowired - private MongoConfiguration mongoConfiguration; + private MongoOperations mongoOperations; + + @Autowired + private InputParameters inputParameters; @Autowired - private JobOptions jobOptions; + private DatabaseParameters dbParameters; @Override public RepeatStatus execute(StepContribution contribution, ChunkContext chunkContext) throws Exception { - ObjectMap variantOptions = jobOptions.getVariantOptions(); - ObjectMap pipelineOptions = jobOptions.getPipelineOptions(); - - VariantSource variantSource = variantOptions.get(VariantStorageManager.VARIANT_SOURCE, VariantSource.class); - File file = new File(pipelineOptions.getString(JobParametersNames.INPUT_VCF)); + File file = new File(inputParameters.getVcf()); - VcfHeaderReader vcfHeaderReader = new VcfHeaderReader(file, variantSource); + VcfHeaderReader vcfHeaderReader = new VcfHeaderReader(file, + inputParameters.getVcfId(), + inputParameters.getStudyId(), + inputParameters.getStudyName(), + inputParameters.getStudyType(), + inputParameters.getVcfAggregation()); + vcfHeaderReader.open(null); VariantSourceEntity variantSourceEntity = vcfHeaderReader.read(); - MongoOperations mongoOperations = mongoConfiguration.getMongoOperations( - jobOptions.getDbName(), jobOptions.getMongoConnection()); VariantSourceEntityMongoWriter variantSourceEntityMongoWriter = new VariantSourceEntityMongoWriter( - mongoOperations, jobOptions.getDbCollectionsFilesName()); + mongoOperations, dbParameters.getCollectionFilesName()); variantSourceEntityMongoWriter.write(Collections.singletonList(variantSourceEntity)); return RepeatStatus.FINISHED; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/IndexesGeneratorStep.java b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/IndexesGeneratorStep.java index a489cd3d2..a9e60d181 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/IndexesGeneratorStep.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/IndexesGeneratorStep.java @@ -22,9 +22,7 @@ import org.springframework.batch.repeat.RepeatStatus; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.data.mongodb.core.MongoOperations; - -import uk.ac.ebi.eva.pipeline.configuration.MongoConfiguration; -import uk.ac.ebi.eva.pipeline.parameters.JobOptions; +import uk.ac.ebi.eva.pipeline.parameters.DatabaseParameters; /** * This step initializes the indexes in the databases. @@ -34,18 +32,16 @@ public class IndexesGeneratorStep implements Tasklet { @Autowired - private MongoConfiguration mongoConfiguration; + private MongoOperations mongoOperations; @Autowired - private JobOptions jobOptions; + private DatabaseParameters databaseParameters; @Override public RepeatStatus execute(StepContribution contribution, ChunkContext chunkContext) throws Exception { - MongoOperations operations = mongoConfiguration.getMongoOperations( - jobOptions.getDbName(), jobOptions.getMongoConnection()); - operations.getCollection(jobOptions.getDbCollectionsFeaturesName()) - .createIndex(new BasicDBObject("name", 1), new BasicDBObject("sparse", true).append("background", true)); - + mongoOperations.getCollection(databaseParameters.getCollectionFeaturesName()) + .createIndex(new BasicDBObject("name", 1), new BasicDBObject("sparse", true) + .append("background", true)); return RepeatStatus.FINISHED; } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/PedLoaderStep.java b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/PedLoaderStep.java deleted file mode 100644 index d2697e699..000000000 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/PedLoaderStep.java +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright 2016 EMBL - European Bioinformatics Institute - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package uk.ac.ebi.eva.pipeline.jobs.steps.tasklets; - -import org.opencb.biodata.formats.pedigree.io.PedigreePedReader; -import org.opencb.biodata.formats.pedigree.io.PedigreeReader; -import org.opencb.biodata.models.pedigree.Pedigree; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.batch.core.StepContribution; -import org.springframework.batch.core.configuration.annotation.StepScope; -import org.springframework.batch.core.scope.context.ChunkContext; -import org.springframework.batch.core.step.tasklet.Tasklet; -import org.springframework.batch.repeat.RepeatStatus; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.context.annotation.Import; -import org.springframework.stereotype.Component; - -import uk.ac.ebi.eva.pipeline.parameters.JobOptions; -import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; - -/** - * Tasklet that parse and load a PED file into Mongo - *

- * PED specs - * http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#ped - *

- * TODO: only reading for now, to be completed.. - */ -public class PedLoaderStep implements Tasklet { - private static final Logger logger = LoggerFactory.getLogger(PedLoaderStep.class); - - @Autowired - private JobOptions jobOptions; - - private Pedigree pedigree; - - @Override - public RepeatStatus execute(StepContribution contribution, ChunkContext chunkContext) throws Exception { - PedigreeReader pedigreeReader = new PedigreePedReader(jobOptions.getPipelineOptions().getString(JobParametersNames.INPUT_PEDIGREE)); - pedigreeReader.open(); - pedigree = pedigreeReader.read().get(0); - pedigreeReader.close(); - - return RepeatStatus.FINISHED; - } - - public Pedigree getPedigree() { - return pedigree; - } - -} diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/PopulationStatisticsGeneratorStep.java b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/PopulationStatisticsGeneratorStep.java index 8abf18599..7a08b774a 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/PopulationStatisticsGeneratorStep.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/PopulationStatisticsGeneratorStep.java @@ -22,22 +22,24 @@ import org.opencb.opencga.storage.core.variant.VariantStorageManager; import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor; import org.opencb.opencga.storage.core.variant.stats.VariantStatisticsManager; +import org.opencb.opencga.storage.mongodb.variant.MongoDBVariantStorageManager; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.batch.core.StepContribution; -import org.springframework.batch.core.configuration.annotation.StepScope; import org.springframework.batch.core.scope.context.ChunkContext; import org.springframework.batch.core.step.tasklet.Tasklet; import org.springframework.batch.repeat.RepeatStatus; import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.context.annotation.Import; -import org.springframework.stereotype.Component; -import uk.ac.ebi.eva.pipeline.parameters.JobOptions; -import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; +import uk.ac.ebi.eva.pipeline.parameters.DatabaseParameters; +import uk.ac.ebi.eva.pipeline.parameters.InputParameters; +import uk.ac.ebi.eva.pipeline.parameters.MongoConnection; +import uk.ac.ebi.eva.pipeline.parameters.OutputParameters; import uk.ac.ebi.eva.utils.URLHelper; import java.net.URI; +import java.net.URISyntaxException; +import java.nio.file.Paths; /** * Tasklet that calculates statistics. See {@link org.opencb.biodata.models.variant.stats.VariantStats} for a list of @@ -50,22 +52,23 @@ public class PopulationStatisticsGeneratorStep implements Tasklet { private static final Logger logger = LoggerFactory.getLogger(PopulationStatisticsGeneratorStep.class); @Autowired - private JobOptions jobOptions; + private InputParameters inputParameters; + + @Autowired + private OutputParameters outputParameters; + + @Autowired + private DatabaseParameters dbParameters; @Override public RepeatStatus execute(StepContribution contribution, ChunkContext chunkContext) throws Exception { - ObjectMap variantOptions = jobOptions.getVariantOptions(); - ObjectMap pipelineOptions = jobOptions.getPipelineOptions(); - // HashMap> samples = new HashMap<>(); // TODO fill properly. if this is null overwrite will take on // samples.put("SOME", new HashSet<>(Arrays.asList("HG00096", "HG00097"))); + ObjectMap variantOptions = getVariantOptions(); VariantStorageManager variantStorageManager = StorageManagerFactory.getVariantStorageManager(); - VariantSource variantSource = variantOptions.get(VariantStorageManager.VARIANT_SOURCE, VariantSource.class); - VariantDBAdaptor dbAdaptor = variantStorageManager.getDBAdaptor( - variantOptions.getString(VariantStorageManager.DB_NAME), variantOptions); - URI outdirUri = URLHelper.createUri(pipelineOptions.getString(JobParametersNames.OUTPUT_DIR_STATISTICS)); - URI statsOutputUri = outdirUri.resolve(VariantStorageManager.buildFilename(variantSource)); + VariantDBAdaptor dbAdaptor = variantStorageManager.getDBAdaptor(dbParameters.getDatabaseName(), variantOptions); + URI statsOutputUri = getStatsBaseUri(); VariantStatisticsManager variantStatisticsManager = new VariantStatisticsManager(); QueryOptions statsOptions = new QueryOptions(variantOptions); @@ -75,4 +78,52 @@ public RepeatStatus execute(StepContribution contribution, ChunkContext chunkCon return RepeatStatus.FINISHED; } + + private URI getStatsBaseUri() throws URISyntaxException { + return URLHelper.getStatsBaseUri( + outputParameters.getOutputDirStatistics(), inputParameters.getStudyId(), inputParameters.getVcfId()); + } + + private ObjectMap getVariantOptions() { + + VariantSource source = getVariantSource(); + + // OpenCGA options with default values (non-customizable) + String compressExtension = ".gz"; + boolean annotate = false; + VariantStorageManager.IncludeSrc includeSourceLine = VariantStorageManager.IncludeSrc.FIRST_8_COLUMNS; + + ObjectMap variantOptions = new ObjectMap(); + variantOptions.put(VariantStorageManager.VARIANT_SOURCE, source); + variantOptions.put(VariantStorageManager.OVERWRITE_STATS, outputParameters.getStatisticsOverwrite()); + variantOptions.put(VariantStorageManager.INCLUDE_SRC, includeSourceLine); + variantOptions.put("compressExtension", compressExtension); + variantOptions.put(VariantStorageManager.ANNOTATE, annotate); + variantOptions.put(VariantStatisticsManager.BATCH_SIZE, inputParameters.getChunkSize()); + + variantOptions.put(VariantStorageManager.DB_NAME, dbParameters.getDatabaseName()); + MongoConnection mongoConnection = dbParameters.getMongoConnection(); + variantOptions.put(MongoDBVariantStorageManager.OPENCGA_STORAGE_MONGODB_VARIANT_DB_NAME, + dbParameters.getDatabaseName()); + variantOptions.put(MongoDBVariantStorageManager.OPENCGA_STORAGE_MONGODB_VARIANT_DB_HOSTS, + mongoConnection.getHosts()); + variantOptions.put(MongoDBVariantStorageManager.OPENCGA_STORAGE_MONGODB_VARIANT_DB_AUTHENTICATION_DB, + mongoConnection.getAuthenticationDatabase()); + variantOptions.put(MongoDBVariantStorageManager.OPENCGA_STORAGE_MONGODB_VARIANT_DB_USER, + mongoConnection.getUser()); + variantOptions.put(MongoDBVariantStorageManager.OPENCGA_STORAGE_MONGODB_VARIANT_DB_PASS, + mongoConnection.getPassword()); + + return variantOptions; + } + + private VariantSource getVariantSource() { + return new VariantSource( + Paths.get(inputParameters.getVcf()).getFileName().toString(), + inputParameters.getVcfId(), + inputParameters.getStudyId(), + inputParameters.getStudyName(), + inputParameters.getStudyType(), + inputParameters.getVcfAggregation()); + } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/PopulationStatisticsLoaderStep.java b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/PopulationStatisticsLoaderStep.java index 6bcc34ed9..61bf69a66 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/PopulationStatisticsLoaderStep.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/PopulationStatisticsLoaderStep.java @@ -40,9 +40,10 @@ import org.springframework.batch.repeat.RepeatStatus; import org.springframework.beans.factory.annotation.Autowired; -import uk.ac.ebi.eva.pipeline.parameters.JobOptions; -import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; -import uk.ac.ebi.eva.utils.MongoDBHelper; +import uk.ac.ebi.eva.pipeline.parameters.DatabaseParameters; +import uk.ac.ebi.eva.pipeline.parameters.InputParameters; +import uk.ac.ebi.eva.pipeline.parameters.MongoConnection; +import uk.ac.ebi.eva.pipeline.parameters.OutputParameters; import uk.ac.ebi.eva.utils.URLHelper; import java.io.FileInputStream; @@ -50,7 +51,6 @@ import java.io.InputStream; import java.net.URI; import java.net.UnknownHostException; -import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; import java.util.List; @@ -97,12 +97,14 @@ public class PopulationStatisticsLoaderStep implements Tasklet { private static final Logger logger = LoggerFactory.getLogger(PopulationStatisticsLoaderStep.class); - private static final String VARIANT_STATS_SUFFIX = ".variants.stats.json.gz"; + @Autowired + private InputParameters inputParameters; - private static final String SOURCE_STATS_SUFFIX = ".source.stats.json.gz"; + @Autowired + private OutputParameters outputParameters; @Autowired - private JobOptions jobOptions; + private DatabaseParameters dbParameters; private JsonFactory jsonFactory; @@ -116,51 +118,64 @@ public PopulationStatisticsLoaderStep() { @Override public RepeatStatus execute(StepContribution contribution, ChunkContext chunkContext) throws Exception { - ObjectMap variantOptions = jobOptions.getVariantOptions(); - ObjectMap pipelineOptions = jobOptions.getPipelineOptions(); - - VariantSource variantSource = variantOptions.get(VariantStorageManager.VARIANT_SOURCE, VariantSource.class); - URI outdirUri = URLHelper.createUri(pipelineOptions.getString(JobParametersNames.OUTPUT_DIR_STATISTICS)); - URI statsOutputUri = outdirUri.resolve(MongoDBHelper.buildStorageFileId( - variantSource.getStudyId(), variantSource.getFileId())); - - VariantDBAdaptor dbAdaptor = getDbAdaptor(pipelineOptions); - QueryOptions statsOptions = new QueryOptions(variantOptions); + VariantDBAdaptor dbAdaptor = getDbAdaptor(); + URI variantStatsOutputUri = URLHelper.getVariantsStatsUri( + outputParameters.getOutputDirStatistics(), inputParameters.getStudyId(), inputParameters.getVcfId()); + URI sourceStatsOutputUri = URLHelper.getSourceStatsUri( + outputParameters.getOutputDirStatistics(), inputParameters.getStudyId(), inputParameters.getVcfId()); + QueryOptions statsOptions = new QueryOptions(getVariantOptions()); // Load statistics for variants and the file - loadVariantStats(dbAdaptor, statsOutputUri, statsOptions); - loadSourceStats(dbAdaptor, statsOutputUri); + loadVariantStats(dbAdaptor, variantStatsOutputUri, statsOptions); + loadSourceStats(dbAdaptor, sourceStatsOutputUri); return RepeatStatus.FINISHED; } - private VariantDBAdaptor getDbAdaptor(ObjectMap properties) throws UnknownHostException, IllegalOpenCGACredentialsException { - MongoCredentials credentials = getMongoCredentials(properties); - String variantsCollectionName = properties.getString(JobParametersNames.DB_COLLECTIONS_VARIANTS_NAME); - String filesCollectionName = properties.getString(JobParametersNames.DB_COLLECTIONS_FILES_NAME); + private ObjectMap getVariantOptions() { + ObjectMap variantOptions = new ObjectMap(); + variantOptions.put(VariantStorageManager.VARIANT_SOURCE, getVariantSource()); + variantOptions.put(VariantStorageManager.OVERWRITE_STATS, outputParameters.getStatisticsOverwrite()); + return variantOptions; + } + + private VariantSource getVariantSource() { + return new VariantSource( + Paths.get(inputParameters.getVcf()).getFileName().toString(), + inputParameters.getVcfId(), + inputParameters.getStudyId(), + inputParameters.getStudyName(), + inputParameters.getStudyType(), + inputParameters.getVcfAggregation()); + } + private VariantDBAdaptor getDbAdaptor() throws UnknownHostException, IllegalOpenCGACredentialsException { + MongoCredentials credentials = getMongoCredentials(); + String variantsCollectionName = dbParameters.getCollectionVariantsName(); + String filesCollectionName = dbParameters.getCollectionFilesName(); logger.debug("Getting DBAdaptor to database '{}'", credentials.getMongoDbName()); return new VariantMongoDBAdaptor(credentials, variantsCollectionName, filesCollectionName); } - private MongoCredentials getMongoCredentials(ObjectMap properties) throws IllegalOpenCGACredentialsException { - String hosts = properties.getString(JobParametersNames.CONFIG_DB_HOSTS); + private MongoCredentials getMongoCredentials() throws IllegalOpenCGACredentialsException { + MongoConnection mongoConnection = dbParameters.getMongoConnection(); + String hosts = mongoConnection.getHosts(); List dataStoreServerAddresses = MongoCredentials.parseDataStoreServerAddresses(hosts); - String dbName = properties.getString(JobParametersNames.DB_NAME); - String authenticationDatabase = properties.getString(JobParametersNames.CONFIG_DB_AUTHENTICATIONDB, null); - String user = properties.getString(JobParametersNames.CONFIG_DB_USER, null); - String pass = properties.getString(JobParametersNames.CONFIG_DB_PASSWORD, null); + String dbName = dbParameters.getDatabaseName(); + String user = mongoConnection.getUser(); + String pass = mongoConnection.getPassword(); MongoCredentials mongoCredentials = new MongoCredentials(dataStoreServerAddresses, dbName, user, pass); - mongoCredentials.setAuthenticationDatabase(authenticationDatabase); + mongoCredentials.setAuthenticationDatabase(mongoConnection.getAuthenticationDatabase()); return mongoCredentials; } - private void loadVariantStats(VariantDBAdaptor variantDBAdaptor, URI uri, QueryOptions options) throws IOException { + private void loadVariantStats(VariantDBAdaptor variantDBAdaptor, URI variantsStatsUri, QueryOptions options) + throws IOException { + // Open input stream - Path variantInput = Paths.get(uri.getPath() + VARIANT_STATS_SUFFIX); - InputStream variantInputStream = new GZIPInputStream(new FileInputStream(variantInput.toFile())); + InputStream variantInputStream = new GZIPInputStream(new FileInputStream(variantsStatsUri.getPath())); // Initialize JSON parser JsonParser parser = jsonFactory.createParser(variantInputStream); @@ -196,14 +211,14 @@ private void loadVariantStats(VariantDBAdaptor variantDBAdaptor, URI uri, QueryO if (writes < variantsNumber) { logger.warn("provided statistics of {} variants, but only {} were updated", variantsNumber, writes); - logger.info("note: maybe those variants didn't had the proper study? maybe the new and the old stats were the same?"); + logger.info( + "note: maybe those variants didn't had the proper study? maybe the new and the old stats were the same?"); } } - private void loadSourceStats(VariantDBAdaptor variantDBAdaptor, URI uri) throws IOException { + private void loadSourceStats(VariantDBAdaptor variantDBAdaptor, URI sourceStatsUri) throws IOException { // Open input stream - Path sourceInput = Paths.get(uri.getPath() + SOURCE_STATS_SUFFIX); - InputStream sourceInputStream = new GZIPInputStream(new FileInputStream(sourceInput.toFile())); + InputStream sourceInputStream = new GZIPInputStream(new FileInputStream(sourceStatsUri.getPath())); // Read from JSON file JsonParser sourceParser = jsonFactory.createParser(sourceInputStream); diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/VepAnnotationGeneratorStep.java b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/VepAnnotationGeneratorStep.java index 03f3088b1..16024805f 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/VepAnnotationGeneratorStep.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/jobs/steps/tasklets/VepAnnotationGeneratorStep.java @@ -1,5 +1,5 @@ /* - * Copyright 2015-2016 EMBL - European Bioinformatics Institute + * Copyright 2015-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,7 +15,6 @@ */ package uk.ac.ebi.eva.pipeline.jobs.steps.tasklets; -import org.opencb.datastore.core.ObjectMap; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.batch.core.StepContribution; @@ -23,8 +22,8 @@ import org.springframework.batch.core.step.tasklet.Tasklet; import org.springframework.batch.repeat.RepeatStatus; import org.springframework.beans.factory.annotation.Autowired; -import uk.ac.ebi.eva.pipeline.parameters.JobOptions; -import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; + +import uk.ac.ebi.eva.pipeline.parameters.AnnotationParameters; import java.io.BufferedInputStream; import java.io.FileOutputStream; @@ -39,40 +38,42 @@ * coordinates of variants and nucleotide changes to determines the effect of the mutations. *

* Input: file listing all the coordinates of variants and nucleotide changes like: + * {@code * 20 60343 60343 G/A + * 20 60419 60419 A/G + * 20 60479 60479 C/T + * ... + * } *

+ * {@code * Output: file containing the VEP output * 20_60343_G/A 20:60343 A - - - intergenic_variant - - - - - - * 20_60419_A/G 20:60419 G - - - intergenic_variant - - - - - - * 20_60479_C/T 20:60479 T - - - intergenic_variant - - - - - rs149529999 GMAF=T:0.0018;AFR_MAF=T:0.01;AMR_MAF=T:0.0028 * .. + * } */ public class VepAnnotationGeneratorStep implements Tasklet { private static final Logger logger = LoggerFactory.getLogger(VepAnnotationGeneratorStep.class); @Autowired - private JobOptions jobOptions; + private AnnotationParameters annotationParameters; @Override public RepeatStatus execute(StepContribution contribution, ChunkContext chunkContext) throws Exception { - ObjectMap pipelineOptions = jobOptions.getPipelineOptions(); - ProcessBuilder processBuilder = new ProcessBuilder("perl", - pipelineOptions.getString(JobParametersNames.APP_VEP_PATH), - "--cache", - "--cache_version", pipelineOptions.getString(JobParametersNames.APP_VEP_CACHE_VERSION), - "-dir", pipelineOptions.getString(JobParametersNames.APP_VEP_CACHE_PATH), - "--species", pipelineOptions.getString(JobParametersNames.APP_VEP_CACHE_SPECIES), - "--fasta", pipelineOptions.getString(JobParametersNames.INPUT_FASTA), - "--fork", pipelineOptions.getString(JobParametersNames.APP_VEP_NUMFORKS), - "-i", pipelineOptions.getString(JobOptions.VEP_INPUT), - "-o", "STDOUT", - "--force_overwrite", - "--offline", - "--everything" + ProcessBuilder processBuilder = new ProcessBuilder("perl", annotationParameters.getVepPath(), + "--cache", + "--cache_version", annotationParameters.getVepCacheVersion(), + "-dir", annotationParameters.getVepCachePath(), + "--species", annotationParameters.getVepCacheSpecies(), + "--fasta", annotationParameters.getInputFasta(), + "--fork", annotationParameters.getVepNumForks(), + "-i", annotationParameters.getVepInput(), + "-o", "STDOUT", + "--force_overwrite", + "--offline", + "--everything" ); logger.debug("VEP annotation parameters = " + Arrays.toString(processBuilder.command().toArray())); @@ -80,18 +81,17 @@ public RepeatStatus execute(StepContribution contribution, ChunkContext chunkCon logger.info("Starting read from VEP output"); Process process = processBuilder.start(); - long written = connectStreams( - new BufferedInputStream(process.getInputStream()), - new GZIPOutputStream(new FileOutputStream(pipelineOptions.getString(JobOptions.VEP_OUTPUT)))); + long written = connectStreams(new BufferedInputStream(process.getInputStream()), + new GZIPOutputStream(new FileOutputStream(annotationParameters.getVepOutput()))); int exitValue = process.waitFor(); logger.info("Finishing read from VEP output, bytes written: " + written); if (exitValue > 0) { - String errorLog = pipelineOptions.getString(JobOptions.VEP_OUTPUT) + ".errors.txt"; + String errorLog = annotationParameters.getVepOutput() + ".errors.txt"; connectStreams(new BufferedInputStream(process.getErrorStream()), new FileOutputStream(errorLog)); throw new Exception("Error while running VEP (exit status " + exitValue + "). See " - + errorLog + " for the errors description from VEP."); + + errorLog + " for the errors description from VEP."); } return RepeatStatus.FINISHED; diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/listeners/VariantOptionsConfigurerListener.java b/src/main/java/uk/ac/ebi/eva/pipeline/listeners/VariantOptionsConfigurerListener.java deleted file mode 100644 index 9627084a9..000000000 --- a/src/main/java/uk/ac/ebi/eva/pipeline/listeners/VariantOptionsConfigurerListener.java +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright 2016 EMBL - European Bioinformatics Institute - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package uk.ac.ebi.eva.pipeline.listeners; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.batch.core.JobExecution; -import org.springframework.batch.core.JobExecutionListener; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.context.annotation.Import; - -import uk.ac.ebi.eva.pipeline.parameters.JobOptions; - -/** - * Modifies the JobOptions instance with job-specific configuration, where only the configuration for the - * running job should be set. This avoids setting incompatible settings for jobs that won't even be executed. - * This is achieved with a `beforeJob` listener, which only gets called when a job is going to be executed. - */ -public class VariantOptionsConfigurerListener implements JobExecutionListener { - - private static final Logger logger = LoggerFactory.getLogger(VariantOptionsConfigurerListener.class); - private final boolean includeSamples; - private final boolean compressGenotypes; - private final boolean calculateStats; - private final boolean includeStats; - - @Autowired - private JobOptions jobOptions; - - public VariantOptionsConfigurerListener(boolean includeSamples, - boolean compressGenotypes, - boolean calculateStats, - boolean includeStats) { - this.includeSamples = includeSamples; - this.compressGenotypes = compressGenotypes; - this.calculateStats = calculateStats; - this.includeStats = includeStats; - } - - @Override - public void beforeJob(JobExecution jobExecution) { - logger.debug("Setting up job " + jobExecution.getJobInstance().getJobName()); - jobOptions.configureGenotypesStorage(includeSamples, compressGenotypes); - jobOptions.configureStatisticsStorage(calculateStats, includeStats); - } - - @Override - public void afterJob(JobExecution jobExecution) { - } -} diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/model/converters/data/VariantToMongoDbObjectConverter.java b/src/main/java/uk/ac/ebi/eva/pipeline/model/converters/data/VariantToMongoDbObjectConverter.java index b1aead798..d6ad1c0ad 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/model/converters/data/VariantToMongoDbObjectConverter.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/model/converters/data/VariantToMongoDbObjectConverter.java @@ -17,7 +17,6 @@ import com.mongodb.BasicDBObject; import com.mongodb.DBObject; -import org.opencb.opencga.storage.core.variant.VariantStorageManager; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.core.convert.converter.Converter; @@ -44,11 +43,16 @@ public class VariantToMongoDbObjectConverter implements Converter sourceEntryStats = statsConverter.convert(variantSourceEntry); diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/AnnotationParameters.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/AnnotationParameters.java new file mode 100644 index 000000000..ed901881a --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/AnnotationParameters.java @@ -0,0 +1,101 @@ +/* + * Copyright 2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.pipeline.parameters; + +import org.springframework.batch.core.configuration.annotation.StepScope; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Service; + +import uk.ac.ebi.eva.utils.URLHelper; + +/** + * Service that holds access to the values for annotatation steps like VEP etc. + * + * NOTE the @StepScope this is probably because the Step/Tasklet in this case the + * {@link uk.ac.ebi.eva.pipeline.jobs.steps.tasklets.VepAnnotationGeneratorStep} is executed in parallel with statistics + * {@link uk.ac.ebi.eva.pipeline.jobs.flows.PopulationStatisticsFlow} and they are not sharing the same context. + * With @JobScope will not work! + */ +@Service +@StepScope +public class AnnotationParameters { + private static final String PARAMETER = "#{jobParameters['"; + private static final String END = "']}"; + + @Value(PARAMETER + JobParametersNames.OUTPUT_DIR_ANNOTATION + END) + private String outputDirAnnotation; + + @Value(PARAMETER + JobParametersNames.INPUT_STUDY_ID + END) + private String studyId; + + @Value(PARAMETER + JobParametersNames.INPUT_VCF_ID + END) + private String fileId; + + @Value(PARAMETER + JobParametersNames.APP_VEP_PATH + END) + private String vepPath; + + @Value(PARAMETER + JobParametersNames.APP_VEP_CACHE_VERSION + END) + private String vepCacheVersion; + + @Value(PARAMETER + JobParametersNames.APP_VEP_CACHE_PATH + END) + private String vepCachePath; + + @Value(PARAMETER + JobParametersNames.APP_VEP_CACHE_SPECIES + END) + private String vepCacheSpecies; + + @Value(PARAMETER + JobParametersNames.APP_VEP_NUMFORKS + END) + private String vepNumForks; + + @Value(PARAMETER + JobParametersNames.INPUT_FASTA + END) + private String inputFasta; + + public String getVepPath() { + return vepPath; + } + + public String getVepCacheVersion() { + return vepCacheVersion; + } + + public String getVepCachePath() { + return vepCachePath; + } + + public String getVepCacheSpecies() { + return vepCacheSpecies; + } + + public String getVepNumForks() { + return vepNumForks; + } + + public String getInputFasta() { + return inputFasta; + } + + public String getVepInput() { + return URLHelper.resolveVepInput(outputDirAnnotation, studyId, fileId); + } + + public String getVepOutput() { + return URLHelper.resolveVepOutput(outputDirAnnotation, studyId, fileId); + } + +} + + + + diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/ChunkSizeParameters.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/ChunkSizeParameters.java new file mode 100644 index 000000000..18ee703d2 --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/ChunkSizeParameters.java @@ -0,0 +1,35 @@ +/* + * Copyright 2015-2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.pipeline.parameters; + +import org.springframework.batch.core.configuration.annotation.StepScope; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Service; + +@Service +@StepScope +public class ChunkSizeParameters { + + private static final String PARAMETER = "#{jobParameters['"; + private static final String OR_DEFAULT = "']?:'1000'}"; + + @Value(PARAMETER + JobParametersNames.CONFIG_CHUNK_SIZE + OR_DEFAULT) + private Integer chunkSize; + + public Integer getChunkSize() { + return chunkSize; + } +} diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/DatabaseParameters.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/DatabaseParameters.java new file mode 100644 index 000000000..58f880920 --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/DatabaseParameters.java @@ -0,0 +1,68 @@ +/* + * Copyright 2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.pipeline.parameters; + +import org.springframework.batch.core.configuration.annotation.StepScope; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Service; + +/** + * Service that holds access to the values for database. This include the configuration + * values for database connection that are got as values not parameters. + */ +@Service +@StepScope +public class DatabaseParameters { + + private static final String PARAMETER = "#{jobParameters['"; + private static final String END = "']}"; + + @Value(PARAMETER + JobParametersNames.DB_NAME + END) + private String databaseName; + + @Value(PARAMETER + JobParametersNames.DB_COLLECTIONS_VARIANTS_NAME + END) + private String collectionVariantsName; + + @Value(PARAMETER + JobParametersNames.DB_COLLECTIONS_FILES_NAME + END) + private String collectionFilesName; + + @Value(PARAMETER + JobParametersNames.DB_COLLECTIONS_FEATURES_NAME + END) + private String collectionFeaturesName; + + @Autowired + private MongoConnection mongoConnection; + + public MongoConnection getMongoConnection() { + return mongoConnection; + } + + public String getDatabaseName() { + return databaseName; + } + + public String getCollectionVariantsName() { + return collectionVariantsName; + } + + public String getCollectionFilesName() { + return collectionFilesName; + } + + public String getCollectionFeaturesName() { + return collectionFeaturesName; + } +} diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/InputParameters.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/InputParameters.java new file mode 100644 index 000000000..16d2b0ff2 --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/InputParameters.java @@ -0,0 +1,104 @@ +/* + * Copyright 2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.pipeline.parameters; + +import org.opencb.biodata.models.variant.VariantSource; +import org.opencb.biodata.models.variant.VariantStudy; +import org.springframework.batch.core.configuration.annotation.StepScope; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Service; + +/** + * Service that holds access to Job input parameters. + */ +@Service +@StepScope +public class InputParameters { + + private static final String PARAMETER = "#{jobParameters['"; + private static final String END = "']}"; + private static final String OR_NULL = "']?:null}"; + + @Value(PARAMETER + JobParametersNames.INPUT_STUDY_ID + END) + private String studyId; + + @Value(PARAMETER + JobParametersNames.INPUT_VCF_ID + END) + private String vcfId; + + @Value(PARAMETER + JobParametersNames.INPUT_VCF + END) + private String vcf; + + @Value(PARAMETER + JobParametersNames.INPUT_GTF + END) + private String gtf; + + @Value(PARAMETER + JobParametersNames.INPUT_PEDIGREE + END) + private String pedigree; + + @Value(PARAMETER + JobParametersNames.INPUT_VCF_AGGREGATION + "']?:'NONE'}") + private String vcfAggregation; + + @Value(PARAMETER + JobParametersNames.INPUT_STUDY_NAME + END) + private String studyName; + + @Value(PARAMETER + JobParametersNames.INPUT_STUDY_TYPE + END) + private VariantStudy.StudyType studyType; + + @Value(PARAMETER + JobParametersNames.INPUT_VCF_AGGREGATION_MAPPING_PATH + OR_NULL) + private String aggregatedMappingFile; + + @Value(PARAMETER + JobParametersNames.CONFIG_CHUNK_SIZE + "']?:1000}") + private int chunkSize; + + public String getVcf() { + return vcf; + } + + public VariantSource.Aggregation getVcfAggregation() { + return VariantSource.Aggregation.valueOf(vcfAggregation); + } + + public String getStudyId() { + return studyId; + } + + public String getVcfId() { + return vcfId; + } + + public String getStudyName() { + return studyName; + } + + public VariantStudy.StudyType getStudyType() { + return studyType; + } + + public int getChunkSize() { + return chunkSize; + } + + public String getGtf() { + return gtf; + } + + public String getPedigree() { + return pedigree; + } + + public String getAggregatedMappingFile() { + return aggregatedMappingFile; + } +} diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/JobOptions.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/JobOptions.java index 0813acee4..01b1b36df 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/JobOptions.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/JobOptions.java @@ -15,29 +15,14 @@ */ package uk.ac.ebi.eva.pipeline.parameters; -import org.opencb.biodata.models.variant.VariantSource; -import org.opencb.biodata.models.variant.VariantStudy; -import org.opencb.datastore.core.ObjectMap; import org.opencb.opencga.lib.common.Config; -import org.opencb.opencga.storage.core.variant.VariantStorageManager; -import org.opencb.opencga.storage.core.variant.stats.VariantStatisticsManager; -import org.opencb.opencga.storage.mongodb.variant.MongoDBVariantStorageManager; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Component; -import uk.ac.ebi.eva.utils.MongoConnection; - import javax.annotation.PostConstruct; - -import java.io.File; -import java.io.FileInputStream; import java.io.IOException; -import java.io.InputStreamReader; -import java.net.URI; -import java.nio.file.Paths; -import java.util.Properties; /** * Class to extract configuration from properties files and from command line. @@ -50,255 +35,23 @@ public class JobOptions { private static final Logger logger = LoggerFactory.getLogger(JobOptions.class); - public static final String VEP_INPUT = "vep.input"; - public static final String VEP_OUTPUT = "vep.output"; - - // Input - @Value("${" + JobParametersNames.INPUT_VCF + "}") private String input; - @Value("${" + JobParametersNames.INPUT_VCF_ID + "}") private String fileId; - @Value("${" + JobParametersNames.INPUT_VCF_AGGREGATION + "}") private String aggregated; - @Value("${" + JobParametersNames.INPUT_STUDY_TYPE + "}") private String studyType; - @Value("${" + JobParametersNames.INPUT_STUDY_NAME + "}") private String studyName; - @Value("${" + JobParametersNames.INPUT_STUDY_ID + "}") private String studyId; - @Value("${" + JobParametersNames.INPUT_PEDIGREE + ":}") private String pedigree; - @Value("${" + JobParametersNames.INPUT_GTF + "}") private String gtf; - - // Output - @Value("${" + JobParametersNames.OUTPUT_DIR + "}") private String outputDir; - @Value("${" + JobParametersNames.OUTPUT_DIR_ANNOTATION + "}") private String outputDirAnnotation; - @Value("${" + JobParametersNames.OUTPUT_DIR_STATISTICS + "}") private String outputDirStatistics; - - @Value("${" + JobParametersNames.STATISTICS_OVERWRITE + ":false}") private boolean overwriteStats; - - @Value("${" + JobParametersNames.APP_OPENCGA_PATH + "}") private String opencgaAppHome; - - //// OpenCGA options with default values (non-customizable) - private String compressExtension = ".gz"; - private boolean annotate = false; - private VariantStorageManager.IncludeSrc includeSourceLine = VariantStorageManager.IncludeSrc.FIRST_8_COLUMNS; - - /// DB connection (most parameters read from OpenCGA "conf" folder) - @Value("${" + JobParametersNames.CONFIG_DB_HOSTS + ":#{null}}") private String dbHosts; - @Value("${" + JobParametersNames.CONFIG_DB_AUTHENTICATIONDB + ":#{null}}") private String dbAuthenticationDb; - @Value("${" + JobParametersNames.CONFIG_DB_USER + ":#{null}}") private String dbUser; - @Value("${" + JobParametersNames.CONFIG_DB_PASSWORD + ":#{null}}") private String dbPassword; - @Value("${" + JobParametersNames.DB_NAME + ":#{null}}") private String dbName; - @Value("${" + JobParametersNames.DB_COLLECTIONS_VARIANTS_NAME + ":#{null}}") private String dbCollectionVariantsName; - @Value("${" + JobParametersNames.DB_COLLECTIONS_FILES_NAME + ":#{null}}") private String dbCollectionFilesName; - @Value("${" + JobParametersNames.DB_COLLECTIONS_FEATURES_NAME +"}") private String dbCollectionGenesName; - @Value("${" + JobParametersNames.DB_COLLECTIONS_STATISTICS_NAME + "}") private String dbCollectionStatsName; - @Value("${" + JobParametersNames.CONFIG_DB_READPREFERENCE + "}") private String readPreference; - - // Skip steps - @Value("${" + JobParametersNames.ANNOTATION_SKIP + ":false}") private boolean skipAnnot; - @Value("${" + JobParametersNames.STATISTICS_SKIP + ":false}") private boolean skipStats; - - //VEP - @Value("${" + JobParametersNames.APP_VEP_PATH +"}") private String vepPath; - @Value("${" + JobParametersNames.APP_VEP_CACHE_PATH + "}") private String vepCacheDirectory; - @Value("${" + JobParametersNames.APP_VEP_CACHE_VERSION + "}") private String vepCacheVersion; - @Value("${" + JobParametersNames.APP_VEP_CACHE_SPECIES + "}") private String vepSpecies; - @Value("${" + JobParametersNames.INPUT_FASTA + "}") private String vepFasta; - @Value("${" + JobParametersNames.APP_VEP_NUMFORKS + "}") private String vepNumForks; + @Value("${" + JobParametersNames.APP_OPENCGA_PATH + ":#{null}}") private String opencgaAppHome; + // Pipeline application options. @Value("${" + JobParametersNames.CONFIG_RESTARTABILITY_ALLOW + ":false}") private boolean allowStartIfComplete; - @Value("${" + JobParametersNames.CONFIG_CHUNK_SIZE + ":1000}") private int chunkSize; - - private ObjectMap variantOptions = new ObjectMap(); - private ObjectMap pipelineOptions = new ObjectMap(); @PostConstruct - public void loadArgs() throws IOException { + public void loadArgs() { logger.info("Loading job arguments"); if (opencgaAppHome == null || opencgaAppHome.isEmpty()) { opencgaAppHome = System.getenv("OPENCGA_HOME") != null ? System.getenv("OPENCGA_HOME") : "/opt/opencga"; } Config.setOpenCGAHome(opencgaAppHome); - - loadDbConnectionOptions(); - loadOpencgaOptions(); - loadPipelineOptions(); - } - - private void loadDbConnectionOptions() throws IOException { - // TODO This block shall be removed when we use Spring Data parameters only - URI configUri = URI.create(Config.getOpenCGAHome() + "/").resolve("conf/").resolve("storage-mongodb.properties"); - Properties properties = new Properties(); - properties.load(new InputStreamReader(new FileInputStream(configUri.getPath()))); - - if (dbHosts == null) { - dbHosts = properties.getProperty(JobParametersNames.OPENCGA_DB_HOSTS); - } - if (dbAuthenticationDb == null) { - dbAuthenticationDb = properties.getProperty(JobParametersNames.OPENCGA_DB_AUTHENTICATIONDB, ""); - } - if (dbUser == null) { - dbUser = properties.getProperty(JobParametersNames.OPENCGA_DB_USER, ""); - } - if (dbPassword == null) { - dbPassword = properties.getProperty(JobParametersNames.OPENCGA_DB_PASSWORD, ""); - } - if (dbName == null) { - dbName = properties.getProperty(JobParametersNames.OPENCGA_DB_NAME); - } - if (dbCollectionVariantsName == null) { - dbCollectionVariantsName = properties.getProperty(JobParametersNames.OPENCGA_DB_COLLECTIONS_VARIANTS_NAME, "variants"); - } - if (dbCollectionFilesName == null) { - dbCollectionFilesName = properties.getProperty(JobParametersNames.OPENCGA_DB_COLLECTIONS_FILES_NAME, "files"); - } - // TODO End of block to be removed when we use Spring Data parameters only - - if (dbHosts == null || dbHosts.isEmpty()) { - throw new IllegalArgumentException("Please provide a database hostname"); - } - if (dbName == null || dbName.isEmpty()) { - throw new IllegalArgumentException("Please provide a database name"); - } - if (dbCollectionVariantsName == null || dbCollectionVariantsName.isEmpty()) { - throw new IllegalArgumentException("Please provide a name for the collection to store the variant information into"); - } - if (dbCollectionFilesName == null || dbCollectionFilesName.isEmpty()) { - throw new IllegalArgumentException("Please provide a name for the collection to store the file information into"); - } - } - - private void loadOpencgaOptions() { - VariantSource source = new VariantSource( - Paths.get(input).getFileName().toString(), - fileId, - studyId, - studyName, - VariantStudy.StudyType.valueOf(studyType), - VariantSource.Aggregation.valueOf(aggregated)); - - // TODO This block shall be removed when we use Spring Data parameters only - variantOptions.put(VariantStorageManager.VARIANT_SOURCE, source); - variantOptions.put(VariantStorageManager.OVERWRITE_STATS, overwriteStats); - variantOptions.put(VariantStorageManager.INCLUDE_SRC, includeSourceLine); - variantOptions.put("compressExtension", compressExtension); - variantOptions.put(VariantStorageManager.ANNOTATE, annotate); - variantOptions.put(VariantStatisticsManager.BATCH_SIZE, chunkSize); - - variantOptions.put(VariantStorageManager.DB_NAME, dbName); - variantOptions.put(MongoDBVariantStorageManager.OPENCGA_STORAGE_MONGODB_VARIANT_DB_NAME, dbName); - variantOptions.put(MongoDBVariantStorageManager.OPENCGA_STORAGE_MONGODB_VARIANT_DB_HOSTS, dbHosts); - variantOptions.put(MongoDBVariantStorageManager.OPENCGA_STORAGE_MONGODB_VARIANT_DB_AUTHENTICATION_DB, dbAuthenticationDb); - variantOptions.put(MongoDBVariantStorageManager.OPENCGA_STORAGE_MONGODB_VARIANT_DB_USER, dbUser); - variantOptions.put(MongoDBVariantStorageManager.OPENCGA_STORAGE_MONGODB_VARIANT_DB_PASS, dbPassword); - // TODO End of block to be removed when we use Spring Data parameters only - - logger.debug("Using as input: {}", input); - logger.debug("Using as variantOptions: {}", variantOptions.entrySet().toString()); - } - - private void loadPipelineOptions() { - pipelineOptions.put(JobParametersNames.INPUT_VCF, input); - pipelineOptions.put("compressExtension", compressExtension); - pipelineOptions.put(JobParametersNames.OUTPUT_DIR, outputDir); - pipelineOptions.put(JobParametersNames.OUTPUT_DIR_STATISTICS, outputDirStatistics); - pipelineOptions.put(JobParametersNames.INPUT_PEDIGREE, pedigree); - pipelineOptions.put(JobParametersNames.INPUT_GTF, gtf); - pipelineOptions.put(JobParametersNames.DB_NAME, dbName); - pipelineOptions.put(JobParametersNames.DB_COLLECTIONS_VARIANTS_NAME, dbCollectionVariantsName); - pipelineOptions.put(JobParametersNames.DB_COLLECTIONS_FILES_NAME, dbCollectionFilesName); - pipelineOptions.put(JobParametersNames.DB_COLLECTIONS_FEATURES_NAME, dbCollectionGenesName); - pipelineOptions.put(JobParametersNames.DB_COLLECTIONS_STATISTICS_NAME, dbCollectionStatsName); - pipelineOptions.put(JobParametersNames.CONFIG_DB_HOSTS, dbHosts); - pipelineOptions.put(JobParametersNames.CONFIG_DB_AUTHENTICATIONDB, dbAuthenticationDb); - pipelineOptions.put(JobParametersNames.CONFIG_DB_USER, dbUser); - pipelineOptions.put(JobParametersNames.CONFIG_DB_PASSWORD, dbPassword); - pipelineOptions.put(JobParametersNames.CONFIG_DB_READPREFERENCE, readPreference); - pipelineOptions.put(JobParametersNames.ANNOTATION_SKIP, skipAnnot); - pipelineOptions.put(JobParametersNames.STATISTICS_SKIP, skipStats); - - String annotationFilesPrefix = studyId + "_" + fileId; - pipelineOptions.put(VEP_INPUT, URI.create(outputDirAnnotation + "/").resolve(annotationFilesPrefix + "_variants_to_annotate.tsv").toString()); - pipelineOptions.put(VEP_OUTPUT, URI.create(outputDirAnnotation + "/").resolve(annotationFilesPrefix + "_vep_annotation.tsv.gz").toString()); - - pipelineOptions.put(JobParametersNames.APP_VEP_PATH, vepPath); - pipelineOptions.put(JobParametersNames.APP_VEP_CACHE_PATH, vepCacheDirectory); - pipelineOptions.put(JobParametersNames.APP_VEP_CACHE_VERSION, vepCacheVersion); - pipelineOptions.put(JobParametersNames.APP_VEP_CACHE_SPECIES, vepSpecies); - pipelineOptions.put(JobParametersNames.INPUT_FASTA, vepFasta); - pipelineOptions.put(JobParametersNames.APP_VEP_NUMFORKS, vepNumForks); - pipelineOptions.put(JobParametersNames.CONFIG_RESTARTABILITY_ALLOW, allowStartIfComplete); - pipelineOptions.put(JobParametersNames.CONFIG_CHUNK_SIZE, chunkSize); - - logger.debug("Using as pipelineOptions: {}", pipelineOptions.entrySet().toString()); - } - - public void configureGenotypesStorage(boolean includeSamples, boolean compressGenotypes) { - variantOptions.put(VariantStorageManager.INCLUDE_SAMPLES, includeSamples); - variantOptions.put(VariantStorageManager.COMPRESS_GENOTYPES, compressGenotypes); - } - - public void configureStatisticsStorage(boolean calculateStats, boolean includeStats) { - variantOptions.put(VariantStorageManager.CALCULATE_STATS, calculateStats); // this is tested by hand - variantOptions.put(VariantStorageManager.INCLUDE_STATS, includeStats); - } - - public ObjectMap getVariantOptions() { - return variantOptions; - } - - public ObjectMap getPipelineOptions() { - return pipelineOptions; - } - - public String getDbCollectionsFeaturesName() { - return getPipelineOptions().getString(JobParametersNames.DB_COLLECTIONS_FEATURES_NAME); - } - - public String getDbCollectionsVariantsName() { - return getPipelineOptions().getString(JobParametersNames.DB_COLLECTIONS_VARIANTS_NAME); - } - - public String getDbCollectionsFilesName() { - return getPipelineOptions().getString(JobParametersNames.DB_COLLECTIONS_FILES_NAME); } - public String getDbCollectionsStatsName() { - return getPipelineOptions().getString(JobParametersNames.DB_COLLECTIONS_STATISTICS_NAME); + public boolean isAllowStartIfComplete() { + return allowStartIfComplete; } - public String getVepInput() { - return getPipelineOptions().getString(VEP_INPUT); - } - - public void setVepInputFile(String vepInputFile) { - getPipelineOptions().put(VEP_INPUT, URI.create(vepInputFile)); - } - - public String getDbName() { - return getPipelineOptions().getString(JobParametersNames.DB_NAME); - } - - public void setDbName(String dbName) { - this.dbName = dbName; - getVariantOptions().put(VariantStorageManager.DB_NAME, dbName); - getVariantOptions().put(MongoDBVariantStorageManager.OPENCGA_STORAGE_MONGODB_VARIANT_DB_NAME, dbName); - getPipelineOptions().put(JobParametersNames.DB_NAME, dbName); - } - - public String getVepOutput() { - return getPipelineOptions().getString(VEP_OUTPUT); - } - - public void setVepOutput(String vepOutput) { - getPipelineOptions().put(VEP_OUTPUT, URI.create(vepOutput)); - } - - public void setAppVepPath(File appVepPath) { - getPipelineOptions().put(JobParametersNames.APP_VEP_PATH, appVepPath); - } - - public String getOutputDir() { - return getPipelineOptions().getString(JobParametersNames.OUTPUT_DIR); - } - - public MongoConnection getMongoConnection() { - return new MongoConnection(dbHosts, dbAuthenticationDb, dbUser, dbPassword, readPreference); - } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/JobParametersNames.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/JobParametersNames.java index 9a4239a40..bb26aa579 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/JobParametersNames.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/JobParametersNames.java @@ -41,6 +41,8 @@ public class JobParametersNames { public static final String INPUT_FASTA = "input.fasta"; + public static final String INPUT_VCF_AGGREGATION_MAPPING_PATH = "input.vcf.aggregation.mapping-path"; + /* * Output */ @@ -136,4 +138,9 @@ public class JobParametersNames { public static final String CONFIG_RESTARTABILITY_ALLOW = "config.restartability.allow"; public static final String CONFIG_CHUNK_SIZE = "config.chunk.size"; + + + public static final String PROPERTY_FILE_PROPERTY = "parameters.path"; + + public static final String RESTART_PROPERTY = "force.restart"; } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/MongoConnection.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/MongoConnection.java new file mode 100644 index 000000000..e4b6d3995 --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/MongoConnection.java @@ -0,0 +1,68 @@ +/* + * Copyright 2016-2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.pipeline.parameters; + +import com.mongodb.ReadPreference; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Service; + +/** + * Container of credentials for a connection to mongo. + * + * The values are injected directly from environment, not from JobParameters. + */ +@Service +public class MongoConnection { + + @Value("${" + JobParametersNames.CONFIG_DB_HOSTS + ":#{null}}") + private String hosts; + + @Value("${" + JobParametersNames.CONFIG_DB_AUTHENTICATIONDB + ":#{null}}") + private String authenticationDatabase; + + @Value("${" + JobParametersNames.CONFIG_DB_USER + ":#{null}}") + private String user; + + @Value("${" + JobParametersNames.CONFIG_DB_PASSWORD + ":#{null}}") + private String password; + + @Value("${" + JobParametersNames.CONFIG_DB_READPREFERENCE + ":#{null}}") + private String readPreference; + + public String getHosts() { + return hosts; + } + + public String getAuthenticationDatabase() { + return authenticationDatabase; + } + + public String getUser() { + return user; + } + + public String getPassword() { + return password; + } + + public String getReadPreferenceName() { + return readPreference; + } + + public ReadPreference getReadPreference() { + return ReadPreference.valueOf(readPreference); + } +} diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/OutputParameters.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/OutputParameters.java new file mode 100644 index 000000000..290a0dc40 --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/OutputParameters.java @@ -0,0 +1,59 @@ +/* + * Copyright 2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.pipeline.parameters; + +import org.springframework.batch.core.configuration.annotation.StepScope; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Service; + +/** + * Service that holds access to Job input parameters. + */ +@Service +@StepScope +public class OutputParameters { + + private static final String PARAMETER = "#{jobParameters['"; + private static final String END = "']}"; + + @Value(PARAMETER + JobParametersNames.OUTPUT_DIR + END) + private String outputDir; + + @Value(PARAMETER + JobParametersNames.OUTPUT_DIR_ANNOTATION + END) + private String outputDirAnnotation; + + @Value(PARAMETER + JobParametersNames.OUTPUT_DIR_STATISTICS + END) + private String outputDirStatistics; + + @Value(PARAMETER + JobParametersNames.STATISTICS_OVERWRITE + "']?:false}") + private boolean statisticsOverwrite; + + public String getOutputDir() { + return outputDir; + } + + public String getOutputDirAnnotation() { + return outputDirAnnotation; + } + + public String getOutputDirStatistics() { + return outputDirStatistics; + } + + public boolean getStatisticsOverwrite() { + return statisticsOverwrite; + } +} diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/ParametersFromProperties.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/ParametersFromProperties.java deleted file mode 100644 index 9f8098eb4..000000000 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/ParametersFromProperties.java +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright 2015-2017 EMBL - European Bioinformatics Institute - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package uk.ac.ebi.eva.pipeline.parameters; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.beans.factory.annotation.Value; -import org.springframework.stereotype.Service; - -import java.lang.reflect.Field; -import java.util.Properties; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -/** - * This class resolves all parameters to be used in the jobs from the application context - * (They can be loaded from properties, environmental values...) and can create a - * @see org.springframework.batch.core.JobParameters - */ -@Service -public class ParametersFromProperties { - - private static final Logger logger = LoggerFactory.getLogger(ParametersFromProperties.class); - private static final String PROPERTY = "${"; - private static final String OR_NULL = ":#{null}}"; - private static final String PROPERTY_ID_REGEX = "(?<=\\$\\{).*(?=:#\\{null})"; - - @Value(PROPERTY + JobParametersNames.DB_NAME + OR_NULL) - private String databaseName; - - @Value(PROPERTY + JobParametersNames.DB_COLLECTIONS_VARIANTS_NAME + OR_NULL) - private String collectionVariantsName; - - @Value(PROPERTY + JobParametersNames.INPUT_STUDY_ID + OR_NULL) - private String studyId; - - @Value(PROPERTY + JobParametersNames.INPUT_VCF_ID + OR_NULL) - private String vcfId; - - @Value(PROPERTY + JobParametersNames.INPUT_VCF + OR_NULL) - private String vcf; - - @Value(PROPERTY + JobParametersNames.INPUT_VCF_AGGREGATION + OR_NULL) - private String vcfAggregation; - - public Properties getProperties() { - Properties properties = new Properties(); - - for (Field field : getClass().getDeclaredFields()) { - try { - if (field.isAnnotationPresent(Value.class)) { - Value value = field.getAnnotation(Value.class); - String fieldName = value.value(); - Matcher matcher = Pattern.compile(PROPERTY_ID_REGEX).matcher(fieldName); - matcher.find(); - String propertyName = matcher.group(0); - if (field.get(this) != null) { - properties.put(propertyName, field.get(this)); - } - } - } catch (IllegalAccessException e) { - logger.debug("Value retrieval error", e); - } - } - return properties; - } - -} diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/ConfigRestartabilityAllowValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/ConfigRestartabilityAllowValidator.java index ea2fff346..bd2b29720 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/ConfigRestartabilityAllowValidator.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/ConfigRestartabilityAllowValidator.java @@ -33,8 +33,8 @@ public class ConfigRestartabilityAllowValidator implements JobParametersValidato public void validate(JobParameters parameters) throws JobParametersInvalidException { String configRestartabilityAllowValue = parameters.getString(JobParametersNames.CONFIG_RESTARTABILITY_ALLOW); - ParametersValidatorUtil.checkIsNotNullOrEmptyString(configRestartabilityAllowValue, - JobParametersNames.CONFIG_RESTARTABILITY_ALLOW); + ParametersValidatorUtil.checkIsValidString(configRestartabilityAllowValue, + JobParametersNames.CONFIG_RESTARTABILITY_ALLOW); ParametersValidatorUtil.checkIsBoolean(configRestartabilityAllowValue, JobParametersNames.CONFIG_RESTARTABILITY_ALLOW); } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/DbCollectionsFeaturesNameValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/DbCollectionsFeaturesNameValidator.java new file mode 100644 index 000000000..eda246e31 --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/DbCollectionsFeaturesNameValidator.java @@ -0,0 +1,37 @@ +/* + * Copyright 2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.pipeline.parameters.validation; + +import org.springframework.batch.core.JobParameters; +import org.springframework.batch.core.JobParametersInvalidException; +import org.springframework.batch.core.JobParametersValidator; + +import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; + +/** + * Checks that the name of the features collection has been filled in. + * + * @throws JobParametersInvalidException If the features collection name is null or empty + */ +public class DbCollectionsFeaturesNameValidator implements JobParametersValidator { + + @Override + public void validate(JobParameters parameters) throws JobParametersInvalidException { + ParametersValidatorUtil + .checkIsValidString(parameters.getString(JobParametersNames.DB_COLLECTIONS_FEATURES_NAME), + JobParametersNames.DB_COLLECTIONS_FEATURES_NAME); + } +} diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/DbCollectionsFilesNameValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/DbCollectionsFilesNameValidator.java index 41c6ecc36..d45ecbb0e 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/DbCollectionsFilesNameValidator.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/DbCollectionsFilesNameValidator.java @@ -30,7 +30,7 @@ public class DbCollectionsFilesNameValidator implements JobParametersValidator { @Override public void validate(JobParameters parameters) throws JobParametersInvalidException { - ParametersValidatorUtil.checkIsNotNullOrEmptyString( + ParametersValidatorUtil.checkIsValidString( parameters.getString(JobParametersNames.DB_COLLECTIONS_FILES_NAME), JobParametersNames.DB_COLLECTIONS_FILES_NAME); } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/DbCollectionsVariantsNameValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/DbCollectionsVariantsNameValidator.java index 1aafb6cf7..441e1fb61 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/DbCollectionsVariantsNameValidator.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/DbCollectionsVariantsNameValidator.java @@ -31,7 +31,7 @@ public class DbCollectionsVariantsNameValidator implements JobParametersValidato @Override public void validate(JobParameters parameters) throws JobParametersInvalidException { ParametersValidatorUtil - .checkIsNotNullOrEmptyString(parameters.getString(JobParametersNames.DB_COLLECTIONS_VARIANTS_NAME), - JobParametersNames.DB_COLLECTIONS_VARIANTS_NAME); + .checkIsValidString(parameters.getString(JobParametersNames.DB_COLLECTIONS_VARIANTS_NAME), + JobParametersNames.DB_COLLECTIONS_VARIANTS_NAME); } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/DbNameValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/DbNameValidator.java index a50bd0732..925a1efaa 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/DbNameValidator.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/DbNameValidator.java @@ -30,7 +30,7 @@ public class DbNameValidator implements JobParametersValidator { @Override public void validate(JobParameters parameters) throws JobParametersInvalidException { - ParametersValidatorUtil.checkIsNotNullOrEmptyString( + ParametersValidatorUtil.checkIsValidString( parameters.getString(JobParametersNames.DB_NAME), JobParametersNames.DB_NAME); } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/InputGtfValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/InputGtfValidator.java new file mode 100644 index 000000000..1326935d6 --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/InputGtfValidator.java @@ -0,0 +1,37 @@ +/* + * Copyright 2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.pipeline.parameters.validation; + +import org.springframework.batch.core.JobParameters; +import org.springframework.batch.core.JobParametersInvalidException; +import org.springframework.batch.core.JobParametersValidator; + +import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; + +/** + * Checks that the gtf input file exist and is readable + * + * @throws JobParametersInvalidException If the file is not a valid path, does not exist or is not readable + */ +public class InputGtfValidator implements JobParametersValidator { + @Override + public void validate(JobParameters parameters) throws JobParametersInvalidException { + ParametersValidatorUtil.checkFileExists(parameters.getString(JobParametersNames.INPUT_GTF), + JobParametersNames.INPUT_GTF); + ParametersValidatorUtil.checkFileIsReadable(parameters.getString(JobParametersNames.INPUT_GTF), + JobParametersNames.INPUT_GTF); + } +} diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/InputStudyIdValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/InputStudyIdValidator.java index 741d9b91a..58a34c0c6 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/InputStudyIdValidator.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/InputStudyIdValidator.java @@ -30,7 +30,7 @@ public class InputStudyIdValidator implements JobParametersValidator { @Override public void validate(JobParameters parameters) throws JobParametersInvalidException { - ParametersValidatorUtil.checkIsNotNullOrEmptyString(parameters.getString(JobParametersNames.INPUT_STUDY_ID), - JobParametersNames.INPUT_STUDY_ID); + ParametersValidatorUtil.checkIsValidString(parameters.getString(JobParametersNames.INPUT_STUDY_ID), + JobParametersNames.INPUT_STUDY_ID); } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/InputStudyNameValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/InputStudyNameValidator.java index 3e4e140b1..3047ef4f4 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/InputStudyNameValidator.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/InputStudyNameValidator.java @@ -29,7 +29,7 @@ public class InputStudyNameValidator implements JobParametersValidator { @Override public void validate(JobParameters parameters) throws JobParametersInvalidException { - ParametersValidatorUtil.checkIsNotNullOrEmptyString(parameters.getString(JobParametersNames.INPUT_STUDY_NAME), - JobParametersNames.INPUT_STUDY_NAME); + ParametersValidatorUtil.checkIsValidString(parameters.getString(JobParametersNames.INPUT_STUDY_NAME), + JobParametersNames.INPUT_STUDY_NAME); } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/InputStudyTypeValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/InputStudyTypeValidator.java index c26dd1602..a5302ad9b 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/InputStudyTypeValidator.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/InputStudyTypeValidator.java @@ -30,8 +30,8 @@ public class InputStudyTypeValidator implements JobParametersValidator { @Override public void validate(JobParameters parameters) throws JobParametersInvalidException { - ParametersValidatorUtil.checkIsNotNullOrEmptyString(parameters.getString(JobParametersNames.INPUT_STUDY_TYPE), - JobParametersNames.INPUT_STUDY_TYPE); + ParametersValidatorUtil.checkIsValidString(parameters.getString(JobParametersNames.INPUT_STUDY_TYPE), + JobParametersNames.INPUT_STUDY_TYPE); try { VariantStudy.StudyType.valueOf(parameters.getString(JobParametersNames.INPUT_STUDY_TYPE)); diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/InputVcfAggregationMappingPathValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/InputVcfAggregationMappingPathValidator.java new file mode 100644 index 000000000..73e644916 --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/InputVcfAggregationMappingPathValidator.java @@ -0,0 +1,38 @@ +/* + * Copyright 2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.pipeline.parameters.validation; + +import org.springframework.batch.core.JobParameters; +import org.springframework.batch.core.JobParametersInvalidException; +import org.springframework.batch.core.JobParametersValidator; +import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; + +/** + * Checks that the aggregated mapping file exists and is readable. + * + * @throws JobParametersInvalidException If the file is not a valid path, does not exist or is not readable + */ +public class InputVcfAggregationMappingPathValidator implements JobParametersValidator { + @Override + public void validate(JobParameters parameters) throws JobParametersInvalidException { + ParametersValidatorUtil.checkFileExists(parameters.getString( + JobParametersNames.INPUT_VCF_AGGREGATION_MAPPING_PATH), + JobParametersNames.INPUT_VCF_AGGREGATION_MAPPING_PATH); + ParametersValidatorUtil.checkFileIsReadable(parameters.getString( + JobParametersNames.INPUT_VCF_AGGREGATION_MAPPING_PATH), + JobParametersNames.INPUT_VCF_AGGREGATION_MAPPING_PATH); + } +} \ No newline at end of file diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/InputVcfAggregationValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/InputVcfAggregationValidator.java index 569427437..a341ea36e 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/InputVcfAggregationValidator.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/InputVcfAggregationValidator.java @@ -26,7 +26,7 @@ public class InputVcfAggregationValidator implements JobParametersValidator { @Override public void validate(JobParameters parameters) throws JobParametersInvalidException { - ParametersValidatorUtil.checkIsNotNullOrEmptyString( + ParametersValidatorUtil.checkIsValidString( parameters.getString(JobParametersNames.INPUT_VCF_AGGREGATION), JobParametersNames.INPUT_VCF_AGGREGATION); try { diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/InputVcfIdValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/InputVcfIdValidator.java index 5bb43130f..8b752d38f 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/InputVcfIdValidator.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/InputVcfIdValidator.java @@ -29,7 +29,7 @@ public class InputVcfIdValidator implements JobParametersValidator { @Override public void validate(JobParameters parameters) throws JobParametersInvalidException { - ParametersValidatorUtil.checkIsNotNullOrEmptyString(parameters.getString(JobParametersNames.INPUT_VCF_ID), - JobParametersNames.INPUT_VCF_ID); + ParametersValidatorUtil.checkIsValidString(parameters.getString(JobParametersNames.INPUT_VCF_ID), + JobParametersNames.INPUT_VCF_ID); } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/ParametersValidatorUtil.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/ParametersValidatorUtil.java index 9c937861f..41975486b 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/ParametersValidatorUtil.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/ParametersValidatorUtil.java @@ -15,22 +15,54 @@ */ package uk.ac.ebi.eva.pipeline.parameters.validation; -import com.google.common.base.Strings; import org.springframework.batch.core.JobParametersInvalidException; import java.nio.file.Files; import java.nio.file.InvalidPathException; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.regex.Pattern; /** * Utility class to hold the low level checks on strings, dirs, files... parameters */ public class ParametersValidatorUtil { - static void checkIsNotNullOrEmptyString(String stringToValidate, - String jobParametersName) throws JobParametersInvalidException { - if (Strings.isNullOrEmpty(stringToValidate) || stringToValidate.trim().length() == 0) { + static void checkIsValidString(String stringToValidate, + String jobParametersName) throws JobParametersInvalidException { + checkIsNotNullString(stringToValidate, jobParametersName); + checkDoesNotContainPrintableCharacters(stringToValidate, jobParametersName); + checkLength(stringToValidate, jobParametersName); + } + + /** + * \n or \r are valid non-printable characters + */ + static void checkDoesNotContainPrintableCharacters(String stringToValidate, + String jobParametersName) throws JobParametersInvalidException { + Pattern regex = Pattern.compile("[\\p{C}&&[^\n]&&[^\r]]"); + + if (regex.matcher(stringToValidate).find()) { + throw new JobParametersInvalidException( + String.format("%s in %s contains non printable characters", stringToValidate, jobParametersName)); + } + } + + static void checkIsNotNullString(String stringToValidate, + String jobParametersName) throws JobParametersInvalidException { + if (stringToValidate == null) { + throw new JobParametersInvalidException( + String.format("%s value is null", jobParametersName)); + } + } + + static void checkLength(String stringToValidate, String jobParametersName) throws JobParametersInvalidException { + if (stringToValidate.length() >= 250) { + throw new JobParametersInvalidException( + String.format("%s in %s can't exceed 250 characters", stringToValidate, jobParametersName)); + } + + if (stringToValidate.trim().length() == 0) { throw new JobParametersInvalidException( String.format("%s in %s must be specified", stringToValidate, jobParametersName)); } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/StatisticsOverwriteValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/StatisticsOverwriteValidator.java index 6f6e2061f..ca28dddc6 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/StatisticsOverwriteValidator.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/StatisticsOverwriteValidator.java @@ -33,7 +33,7 @@ public class StatisticsOverwriteValidator implements JobParametersValidator { public void validate(JobParameters parameters) throws JobParametersInvalidException { String statisticsOverwriteValue = parameters.getString(JobParametersNames.STATISTICS_OVERWRITE); - ParametersValidatorUtil.checkIsNotNullOrEmptyString( + ParametersValidatorUtil.checkIsValidString( statisticsOverwriteValue, JobParametersNames.STATISTICS_OVERWRITE); ParametersValidatorUtil.checkIsBoolean( statisticsOverwriteValue,JobParametersNames.STATISTICS_OVERWRITE); diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/VepCacheSpeciesValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/VepCacheSpeciesValidator.java index 9c3ea85fd..65bba08cc 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/VepCacheSpeciesValidator.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/VepCacheSpeciesValidator.java @@ -31,7 +31,7 @@ public class VepCacheSpeciesValidator implements JobParametersValidator { @Override public void validate(JobParameters parameters) throws JobParametersInvalidException { ParametersValidatorUtil - .checkIsNotNullOrEmptyString(parameters.getString(JobParametersNames.APP_VEP_CACHE_SPECIES), - JobParametersNames.APP_VEP_CACHE_SPECIES); + .checkIsValidString(parameters.getString(JobParametersNames.APP_VEP_CACHE_SPECIES), + JobParametersNames.APP_VEP_CACHE_SPECIES); } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/VepCacheVersionValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/VepCacheVersionValidator.java index 6ea571099..5c4a590ca 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/VepCacheVersionValidator.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/VepCacheVersionValidator.java @@ -30,7 +30,7 @@ public class VepCacheVersionValidator implements JobParametersValidator { @Override public void validate(JobParameters parameters) throws JobParametersInvalidException { ParametersValidatorUtil - .checkIsNotNullOrEmptyString(parameters.getString(JobParametersNames.APP_VEP_CACHE_VERSION), - JobParametersNames.APP_VEP_CACHE_VERSION); + .checkIsValidString(parameters.getString(JobParametersNames.APP_VEP_CACHE_VERSION), + JobParametersNames.APP_VEP_CACHE_VERSION); } } diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/AnnotationLoaderStepParametersValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/AnnotationLoaderStepParametersValidator.java index 8c430bed8..a11fb9dd2 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/AnnotationLoaderStepParametersValidator.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/AnnotationLoaderStepParametersValidator.java @@ -22,12 +22,7 @@ import org.springframework.batch.core.job.DefaultJobParametersValidator; import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; -import uk.ac.ebi.eva.pipeline.parameters.validation.ConfigChunkSizeValidator; -import uk.ac.ebi.eva.pipeline.parameters.validation.ConfigRestartabilityAllowValidator; -import uk.ac.ebi.eva.pipeline.parameters.validation.DbCollectionsVariantsNameValidator; -import uk.ac.ebi.eva.pipeline.parameters.validation.DbNameValidator; -import uk.ac.ebi.eva.pipeline.parameters.validation.OptionalValidator; -import uk.ac.ebi.eva.pipeline.parameters.validation.OutputDirAnnotationValidator; +import uk.ac.ebi.eva.pipeline.parameters.validation.*; import java.util.Arrays; import java.util.List; @@ -40,8 +35,10 @@ public class AnnotationLoaderStepParametersValidator extends DefaultJobParameter public AnnotationLoaderStepParametersValidator() { super(new String[]{JobParametersNames.DB_COLLECTIONS_VARIANTS_NAME, JobParametersNames.DB_NAME, - JobParametersNames.OUTPUT_DIR_ANNOTATION}, - new String[]{}); + JobParametersNames.OUTPUT_DIR_ANNOTATION, + JobParametersNames.INPUT_STUDY_ID, + JobParametersNames.INPUT_VCF_ID}, + new String[]{}); } @Override @@ -55,6 +52,8 @@ private CompositeJobParametersValidator compositeJobParametersValidator() { new DbCollectionsVariantsNameValidator(), new DbNameValidator(), new OutputDirAnnotationValidator(), + new InputStudyIdValidator(), + new InputVcfIdValidator(), new OptionalValidator(new ConfigRestartabilityAllowValidator(), JobParametersNames.CONFIG_RESTARTABILITY_ALLOW), new OptionalValidator(new ConfigChunkSizeValidator(), JobParametersNames.CONFIG_CHUNK_SIZE) diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/GeneLoaderStepParametersValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/GeneLoaderStepParametersValidator.java new file mode 100644 index 000000000..b52a5ffdb --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/GeneLoaderStepParametersValidator.java @@ -0,0 +1,68 @@ +/* + * Copyright 2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.pipeline.parameters.validation.step; + +import org.springframework.batch.core.JobParameters; +import org.springframework.batch.core.JobParametersInvalidException; +import org.springframework.batch.core.JobParametersValidator; +import org.springframework.batch.core.job.CompositeJobParametersValidator; +import org.springframework.batch.core.job.DefaultJobParametersValidator; + +import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; +import uk.ac.ebi.eva.pipeline.parameters.validation.ConfigChunkSizeValidator; +import uk.ac.ebi.eva.pipeline.parameters.validation.ConfigRestartabilityAllowValidator; +import uk.ac.ebi.eva.pipeline.parameters.validation.DbCollectionsFeaturesNameValidator; +import uk.ac.ebi.eva.pipeline.parameters.validation.DbNameValidator; +import uk.ac.ebi.eva.pipeline.parameters.validation.InputGtfValidator; +import uk.ac.ebi.eva.pipeline.parameters.validation.OptionalValidator; + +import java.util.Arrays; +import java.util.List; + +/** + * Validates the job parameters necessary to execute an {@link uk.ac.ebi.eva.pipeline.jobs.steps.GeneLoaderStep} + */ +public class GeneLoaderStepParametersValidator extends DefaultJobParametersValidator { + + public GeneLoaderStepParametersValidator() { + super(new String[]{JobParametersNames.DB_COLLECTIONS_FEATURES_NAME, + JobParametersNames.DB_NAME, + JobParametersNames.INPUT_GTF}, + new String[]{}); + } + + @Override + public void validate(JobParameters parameters) throws JobParametersInvalidException { + super.validate(parameters); + compositeJobParametersValidator().validate(parameters); + } + + private CompositeJobParametersValidator compositeJobParametersValidator() { + final List jobParametersValidators = Arrays.asList( + new DbCollectionsFeaturesNameValidator(), + new DbNameValidator(), + new InputGtfValidator(), + new OptionalValidator(new ConfigRestartabilityAllowValidator(), + JobParametersNames.CONFIG_RESTARTABILITY_ALLOW), + new OptionalValidator(new ConfigChunkSizeValidator(), JobParametersNames.CONFIG_CHUNK_SIZE) + ); + + CompositeJobParametersValidator compositeJobParametersValidator = new CompositeJobParametersValidator(); + compositeJobParametersValidator.setValidators(jobParametersValidators); + return compositeJobParametersValidator; + } + +} diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/IndexesGeneratorStepParametersValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/IndexesGeneratorStepParametersValidator.java new file mode 100644 index 000000000..67083c488 --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/IndexesGeneratorStepParametersValidator.java @@ -0,0 +1,63 @@ +/* + * Copyright 2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.pipeline.parameters.validation.step; + +import org.springframework.batch.core.JobParameters; +import org.springframework.batch.core.JobParametersInvalidException; +import org.springframework.batch.core.JobParametersValidator; +import org.springframework.batch.core.job.CompositeJobParametersValidator; +import org.springframework.batch.core.job.DefaultJobParametersValidator; + +import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; +import uk.ac.ebi.eva.pipeline.parameters.validation.ConfigRestartabilityAllowValidator; +import uk.ac.ebi.eva.pipeline.parameters.validation.DbCollectionsFeaturesNameValidator; +import uk.ac.ebi.eva.pipeline.parameters.validation.DbNameValidator; +import uk.ac.ebi.eva.pipeline.parameters.validation.OptionalValidator; + +import java.util.Arrays; +import java.util.List; + +/** + * Validates the job parameters necessary to execute an {@link uk.ac.ebi.eva.pipeline.jobs.steps.GeneLoaderStep} + */ +public class IndexesGeneratorStepParametersValidator extends DefaultJobParametersValidator { + + public IndexesGeneratorStepParametersValidator() { + super(new String[]{JobParametersNames.DB_COLLECTIONS_FEATURES_NAME, + JobParametersNames.DB_NAME}, + new String[]{}); + } + + @Override + public void validate(JobParameters parameters) throws JobParametersInvalidException { + super.validate(parameters); + compositeJobParametersValidator().validate(parameters); + } + + private CompositeJobParametersValidator compositeJobParametersValidator() { + final List jobParametersValidators = Arrays.asList( + new DbCollectionsFeaturesNameValidator(), + new DbNameValidator(), + new OptionalValidator(new ConfigRestartabilityAllowValidator(), + JobParametersNames.CONFIG_RESTARTABILITY_ALLOW) + ); + + CompositeJobParametersValidator compositeJobParametersValidator = new CompositeJobParametersValidator(); + compositeJobParametersValidator.setValidators(jobParametersValidators); + return compositeJobParametersValidator; + } + +} diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/VariantLoaderStepParametersValidator.java b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/VariantLoaderStepParametersValidator.java index 647dca31f..684a5132f 100644 --- a/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/VariantLoaderStepParametersValidator.java +++ b/src/main/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/VariantLoaderStepParametersValidator.java @@ -27,6 +27,7 @@ import uk.ac.ebi.eva.pipeline.parameters.validation.DbCollectionsVariantsNameValidator; import uk.ac.ebi.eva.pipeline.parameters.validation.DbNameValidator; import uk.ac.ebi.eva.pipeline.parameters.validation.InputStudyIdValidator; +import uk.ac.ebi.eva.pipeline.parameters.validation.InputVcfAggregationMappingPathValidator; import uk.ac.ebi.eva.pipeline.parameters.validation.InputVcfAggregationValidator; import uk.ac.ebi.eva.pipeline.parameters.validation.InputVcfIdValidator; import uk.ac.ebi.eva.pipeline.parameters.validation.InputVcfValidator; @@ -66,8 +67,11 @@ private CompositeJobParametersValidator compositeJobParametersValidator() { new InputVcfIdValidator(), new InputVcfValidator(), new InputVcfAggregationValidator(), + new OptionalValidator(new InputVcfAggregationMappingPathValidator(), + JobParametersNames.INPUT_VCF_AGGREGATION_MAPPING_PATH), new OptionalValidator(new ConfigChunkSizeValidator(), JobParametersNames.CONFIG_CHUNK_SIZE), - new OptionalValidator(new ConfigRestartabilityAllowValidator(), JobParametersNames.CONFIG_RESTARTABILITY_ALLOW) + new OptionalValidator(new ConfigRestartabilityAllowValidator(), + JobParametersNames.CONFIG_RESTARTABILITY_ALLOW) ); CompositeJobParametersValidator compositeJobParametersValidator = new CompositeJobParametersValidator(); diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/runner/EvaPipelineJobLauncherCommandLineRunner.java b/src/main/java/uk/ac/ebi/eva/pipeline/runner/EvaPipelineJobLauncherCommandLineRunner.java new file mode 100644 index 000000000..bfa86e994 --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/pipeline/runner/EvaPipelineJobLauncherCommandLineRunner.java @@ -0,0 +1,293 @@ +/* + * Copyright 2015-2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.pipeline.runner; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.batch.core.Job; +import org.springframework.batch.core.JobExecutionException; +import org.springframework.batch.core.JobParameters; +import org.springframework.batch.core.JobParametersInvalidException; +import org.springframework.batch.core.configuration.JobRegistry; +import org.springframework.batch.core.converter.DefaultJobParametersConverter; +import org.springframework.batch.core.converter.JobParametersConverter; +import org.springframework.batch.core.explore.JobExplorer; +import org.springframework.batch.core.launch.JobLauncher; +import org.springframework.batch.core.launch.JobParametersNotFoundException; +import org.springframework.batch.core.launch.NoSuchJobException; +import org.springframework.batch.core.repository.JobExecutionAlreadyRunningException; +import org.springframework.batch.core.repository.JobInstanceAlreadyCompleteException; +import org.springframework.batch.core.repository.JobRepository; +import org.springframework.batch.core.repository.JobRestartException; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.boot.ExitCodeGenerator; +import org.springframework.boot.autoconfigure.batch.JobLauncherCommandLineRunner; +import org.springframework.context.ApplicationEventPublisherAware; +import org.springframework.stereotype.Component; +import org.springframework.util.PatternMatchUtils; +import org.springframework.util.StringUtils; +import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; +import uk.ac.ebi.eva.pipeline.runner.exceptions.NoJobToExecuteException; +import uk.ac.ebi.eva.pipeline.runner.exceptions.NoParametersHaveBeenPassedException; +import uk.ac.ebi.eva.pipeline.runner.exceptions.NoPreviousJobExecutionException; +import uk.ac.ebi.eva.pipeline.runner.exceptions.NotValidParameterFormatException; +import uk.ac.ebi.eva.pipeline.runner.exceptions.UnexpectedErrorReadingFileException; +import uk.ac.ebi.eva.pipeline.runner.exceptions.UnexpectedFileEncodingException; +import uk.ac.ebi.eva.pipeline.runner.exceptions.UnknownJobException; + +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Objects; +import java.util.Optional; +import java.util.Properties; + +/** + * This class is a modified version of the default JobLauncherCommandLineRunner. + * Its main differences are: + * -If no job is specified then the execution stops. + * -Job parameters can be passed from command line as normal parameters. + * -Job parameters can be passed from a properties file by the user. + * -The user can restart a job that has been run previously marking the previous execution as failed. + */ +@Component +public class EvaPipelineJobLauncherCommandLineRunner extends JobLauncherCommandLineRunner implements + ApplicationEventPublisherAware, ExitCodeGenerator { + + private static final Logger logger = LoggerFactory.getLogger(EvaPipelineJobLauncherCommandLineRunner.class); + + public static final String SPRING_BATCH_JOB_NAME_PROPERTY = "spring.batch.job.names"; + + public static final int EXIT_WITHOUT_ERRORS = 0; + + public static final int EXIT_WITH_ERRORS = 1; + + @Value("${" + SPRING_BATCH_JOB_NAME_PROPERTY + ":#{null}}") + private String jobName; + + @Value("${" + JobParametersNames.PROPERTY_FILE_PROPERTY + ":#{null}}") + private String propertyFilePath; + + @Value("${" + JobParametersNames.RESTART_PROPERTY + ":false}") + private boolean restartPreviousExecution; + + private Collection jobs; + + private JobRepository jobRepository; + + private JobRegistry jobRegistry; + + private JobParametersConverter converter; + + @Autowired + private JobExecutionApplicationListener jobExecutionApplicationListener; + + private boolean abnormalExit; + + public EvaPipelineJobLauncherCommandLineRunner(JobLauncher jobLauncher, JobExplorer jobExplorer, + JobRepository jobRepository) { + super(jobLauncher, jobExplorer); + jobs = Collections.emptySet(); + this.jobRepository = jobRepository; + abnormalExit = false; + converter = new DefaultJobParametersConverter(); + + } + + @Autowired(required = false) + public void setJobRegistry(JobRegistry jobRegistry) { + this.jobRegistry = jobRegistry; + } + + @Autowired(required = false) + public void setJobParametersConverter(JobParametersConverter converter) { + this.converter = converter; + } + + @Autowired(required = false) + public void setJobs(Collection jobs) { + this.jobs = jobs; + } + + @Override + public void setJobNames(String jobName) { + this.jobName = jobName; + super.setJobNames(jobName); + } + + public void setPropertyFilePath(String propertyFilePath) { + this.propertyFilePath = propertyFilePath; + } + + @Override + public int getExitCode() { + if (!abnormalExit && jobExecutionApplicationListener.isJobExecutionComplete()) { + return EXIT_WITHOUT_ERRORS; + } else { + return EXIT_WITH_ERRORS; + } + } + + @Override + public void run(String... args) throws JobExecutionException { + try { + abnormalExit = false; + + Properties commandLineProperties = getJobParametersFromCommandLine(args) + .orElseThrow(NoParametersHaveBeenPassedException::new); + + Properties fileProperties = getJobParametersFromPropertiesFile(); + configureLauncherPropertiesFromFileProperties(fileProperties); + JobParameters jobParameters = getJobParameters(commandLineProperties, fileProperties); + + checkIfJobNameHasBeenDefined(); + checkIfPropertiesHaveBeenProvided(jobParameters); + if (restartPreviousExecution) { + restartPreviousJobExecution(jobParameters); + } + launchJob(jobParameters); + } catch (NoJobToExecuteException | NoParametersHaveBeenPassedException | UnexpectedFileEncodingException + | FileNotFoundException | UnexpectedErrorReadingFileException | NoPreviousJobExecutionException + | NotValidParameterFormatException | UnknownJobException | JobParametersInvalidException e) { + logger.error(e.getMessage()); + logger.debug("Error trace", e); + abnormalExit = true; + } + } + + private JobParameters getJobParameters(Properties commandLineProperties, Properties fileProperties) { + + // Command line properties have precedence over file defined ones. + Properties properties = new Properties(); + properties.putAll(fileProperties); + properties.putAll(commandLineProperties); + + // Filter all runner specific parameters + properties.remove(SPRING_BATCH_JOB_NAME_PROPERTY); + properties.remove(JobParametersNames.PROPERTY_FILE_PROPERTY); + properties.remove(JobParametersNames.RESTART_PROPERTY); + + return converter.getJobParameters(properties); + + } + + private void configureLauncherPropertiesFromFileProperties(Properties fileProperties) { + if (StringUtils.isEmpty(jobName)) { + jobName = (String) fileProperties.get(SPRING_BATCH_JOB_NAME_PROPERTY); + } else { + if (!Objects.equals(jobName, fileProperties.get(SPRING_BATCH_JOB_NAME_PROPERTY))) { + logger.info("You have passed a job name in your parameter file and in the command line, '" + jobName + + "' will be executed."); + } + } + } + + private Optional getJobParametersFromCommandLine(String[] args) + throws NotValidParameterFormatException { + checkAllParametersStartByDoubleDash(args); + String[] processedArgs = removeStartingHypens(args); + return Optional.ofNullable(StringUtils.splitArrayElementsIntoProperties(processedArgs, "=")); + } + + private void launchJob(JobParameters jobParameters) throws JobExecutionException, UnknownJobException { + for (Job job : this.jobs) { + if (PatternMatchUtils.simpleMatch(jobName, job.getName())) { + execute(job, jobParameters); + return; + } + } + + if (this.jobRegistry != null) { + try { + execute(jobRegistry.getJob(jobName), jobParameters); + } catch (NoSuchJobException ex) { + logger.error("No job found in registry for job name: " + jobName); + } + } + + throw new UnknownJobException(jobName); + } + + @Override + protected void execute(Job job, JobParameters jobParameters) throws JobExecutionAlreadyRunningException, + JobRestartException, JobInstanceAlreadyCompleteException, JobParametersInvalidException, + JobParametersNotFoundException { + logger.info("Running job '" + jobName + "' with parameters: " + jobParameters); + super.execute(job, jobParameters); + } + + private void restartPreviousJobExecution(JobParameters jobParameters) throws + NoPreviousJobExecutionException { + logger.info("Force restartPreviousExecution of job '" + jobName + "' with parameters: " + jobParameters); + ManageJobsUtils.markLastJobAsFailed(jobRepository, jobName, jobParameters); + } + + private void checkAllParametersStartByDoubleDash(String[] args) throws NotValidParameterFormatException { + for (String arg : args) { + if (!arg.startsWith("--")) { + throw new NotValidParameterFormatException(arg); + } + } + } + + private String[] removeStartingHypens(String[] args) { + return Arrays.stream(args).map(arg -> arg.substring(2)).toArray(String[]::new); + } + + private Properties getJobParametersFromPropertiesFile() throws FileNotFoundException, + UnexpectedErrorReadingFileException, UnexpectedFileEncodingException { + Properties propertiesFile = new Properties(); + if (propertyFilePath == null) { + return propertiesFile; + } + try { + propertiesFile.putAll(readPropertiesFromFile(propertyFilePath)); + return propertiesFile; + } catch (FileNotFoundException e) { + throw e; + } catch (IOException e) { + throw new UnexpectedErrorReadingFileException(propertyFilePath, e); + } catch (IllegalArgumentException e) { + throw new UnexpectedFileEncodingException(propertyFilePath, e); + } + } + + private void checkIfPropertiesHaveBeenProvided(JobParameters jobParameters) + throws NoParametersHaveBeenPassedException { + if (jobParameters == null || jobParameters.isEmpty()) { + throw new NoParametersHaveBeenPassedException(); + } + } + + private void checkIfJobNameHasBeenDefined() throws NoJobToExecuteException { + if (!StringUtils.hasText(jobName)) { + throw new NoJobToExecuteException(); + } + } + + private Properties readPropertiesFromFile(String propertyFilePath) throws IOException, IllegalArgumentException { + InputStream input = new FileInputStream(propertyFilePath); + Properties properties = new Properties(); + properties.load(input); + return properties; + } + +} diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/runner/JobExecutionApplicationListener.java b/src/main/java/uk/ac/ebi/eva/pipeline/runner/JobExecutionApplicationListener.java new file mode 100644 index 000000000..f42d2d690 --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/pipeline/runner/JobExecutionApplicationListener.java @@ -0,0 +1,41 @@ +/* + * Copyright 2015-2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.pipeline.runner; + +import org.springframework.batch.core.BatchStatus; +import org.springframework.batch.core.ExitStatus; +import org.springframework.batch.core.JobExecution; +import org.springframework.boot.autoconfigure.batch.JobExecutionEvent; +import org.springframework.context.ApplicationListener; + +/** + * This class stores the last job execution state + */ +public class JobExecutionApplicationListener implements ApplicationListener { + + private JobExecution execution; + + @Override + public void onApplicationEvent(JobExecutionEvent event) { + execution = event.getJobExecution(); + } + + public boolean isJobExecutionComplete() { + return execution != null + && execution.getStatus().equals(BatchStatus.COMPLETED) + && execution.getExitStatus().compareTo(ExitStatus.COMPLETED) == 0; + } +} diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/runner/ManageJobsUtils.java b/src/main/java/uk/ac/ebi/eva/pipeline/runner/ManageJobsUtils.java new file mode 100644 index 000000000..dc5714fd4 --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/pipeline/runner/ManageJobsUtils.java @@ -0,0 +1,56 @@ +/* + * Copyright 2015-2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.pipeline.runner; + +import org.springframework.batch.core.BatchStatus; +import org.springframework.batch.core.JobExecution; +import org.springframework.batch.core.JobParameters; +import org.springframework.batch.core.StepExecution; +import org.springframework.batch.core.repository.JobRepository; +import uk.ac.ebi.eva.pipeline.runner.exceptions.NoPreviousJobExecutionException; + +import java.util.Date; + +/** + * Utility class to change job / step status + */ +public class ManageJobsUtils { + + public static void markLastJobAsFailed(JobRepository jobRepository, String jobName, JobParameters + jobParameters) throws NoPreviousJobExecutionException { + JobExecution lastJobExecution = jobRepository.getLastJobExecution(jobName, jobParameters); + if (lastJobExecution == null) { + throw new NoPreviousJobExecutionException(jobName, jobParameters); + } + + Date currentTime = new Date(); + lastJobExecution.setEndTime(currentTime); + lastJobExecution.setStatus(BatchStatus.FAILED); + lastJobExecution.setExitStatus( + lastJobExecution.getExitStatus().replaceExitCode("FAILED").addExitDescription("Manually " + + "failed job") + ); + jobRepository.update(lastJobExecution); + + for (StepExecution stepExecution : lastJobExecution.getStepExecutions()) { + stepExecution.setEndTime(currentTime); + stepExecution.setStatus(BatchStatus.FAILED); + stepExecution.setExitStatus(lastJobExecution.getExitStatus().replaceExitCode("FAILED")); + jobRepository.update(stepExecution); + } + } + +} diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/runner/exceptions/NoJobToExecuteException.java b/src/main/java/uk/ac/ebi/eva/pipeline/runner/exceptions/NoJobToExecuteException.java new file mode 100644 index 000000000..6b6e9ade9 --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/pipeline/runner/exceptions/NoJobToExecuteException.java @@ -0,0 +1,26 @@ +/* + * Copyright 2015-2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.pipeline.runner.exceptions; + +/** + * Runner throws this exception when no job argument has been provided. + */ +public class NoJobToExecuteException extends Exception { + + public NoJobToExecuteException() { + super("No job name has been provided."); + } +} diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/runner/exceptions/NoParametersHaveBeenPassedException.java b/src/main/java/uk/ac/ebi/eva/pipeline/runner/exceptions/NoParametersHaveBeenPassedException.java new file mode 100644 index 000000000..82f2bf644 --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/pipeline/runner/exceptions/NoParametersHaveBeenPassedException.java @@ -0,0 +1,30 @@ +/* + * Copyright 2015-2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.pipeline.runner.exceptions; + +import static uk.ac.ebi.eva.pipeline.parameters.JobParametersNames.PROPERTY_FILE_PROPERTY; + +/** + * Exception thrown by the runner when no parameters have been passed to a job. + */ +public class NoParametersHaveBeenPassedException extends Exception { + + public NoParametersHaveBeenPassedException() { + super("No job parameters have been provided. Please list them as command line arguments, or in a file using " + + "the argument --" + PROPERTY_FILE_PROPERTY); + } + +} diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/runner/exceptions/NoPreviousJobExecutionException.java b/src/main/java/uk/ac/ebi/eva/pipeline/runner/exceptions/NoPreviousJobExecutionException.java new file mode 100644 index 000000000..916651b1a --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/pipeline/runner/exceptions/NoPreviousJobExecutionException.java @@ -0,0 +1,29 @@ +/* + * Copyright 2015-2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.pipeline.runner.exceptions; + +import org.springframework.batch.core.JobParameters; + +/** + * Exception thrown by the runner when trying to restart a job and no previous run is found. + */ +public class NoPreviousJobExecutionException extends Exception { + + public NoPreviousJobExecutionException(String jobName, JobParameters jobParameters) { + super("No previous execution from job '" + jobName + "' with parameters '" + jobParameters + "'"); + } + +} diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/runner/exceptions/NotValidParameterFormatException.java b/src/main/java/uk/ac/ebi/eva/pipeline/runner/exceptions/NotValidParameterFormatException.java new file mode 100644 index 000000000..804fff066 --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/pipeline/runner/exceptions/NotValidParameterFormatException.java @@ -0,0 +1,26 @@ +/* + * Copyright 2015-2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.pipeline.runner.exceptions; + +/** + * Exception thrown where a string value argument is not recognized or valid in a function. + */ +public class NotValidParameterFormatException extends Exception { + + public NotValidParameterFormatException(String arg) { + super("Parameters '" + arg + "' not valid"); + } +} diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/runner/exceptions/UnexpectedErrorReadingFileException.java b/src/main/java/uk/ac/ebi/eva/pipeline/runner/exceptions/UnexpectedErrorReadingFileException.java new file mode 100644 index 000000000..c34b596d0 --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/pipeline/runner/exceptions/UnexpectedErrorReadingFileException.java @@ -0,0 +1,29 @@ +/* + * Copyright 2015-2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.pipeline.runner.exceptions; + +import java.io.IOException; + +/** + * Unexpected error while reading or processing a file + */ +public class UnexpectedErrorReadingFileException extends Exception { + + public UnexpectedErrorReadingFileException(String filePath, IOException e) { + super("Unexpected error reading file '" + filePath + "'", e); + } + +} diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/runner/exceptions/UnexpectedFileEncodingException.java b/src/main/java/uk/ac/ebi/eva/pipeline/runner/exceptions/UnexpectedFileEncodingException.java new file mode 100644 index 000000000..7b0078000 --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/pipeline/runner/exceptions/UnexpectedFileEncodingException.java @@ -0,0 +1,26 @@ +/* + * Copyright 2015-2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.pipeline.runner.exceptions; + +/** + * Exception used to express a codification problem while interpreting a file + */ +public class UnexpectedFileEncodingException extends Exception { + + public UnexpectedFileEncodingException(String filePath, IllegalArgumentException e) { + super("Unexpected encoding in file '" + filePath + "'", e); + } +} diff --git a/src/main/java/uk/ac/ebi/eva/pipeline/runner/exceptions/UnknownJobException.java b/src/main/java/uk/ac/ebi/eva/pipeline/runner/exceptions/UnknownJobException.java new file mode 100644 index 000000000..bea689470 --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/pipeline/runner/exceptions/UnknownJobException.java @@ -0,0 +1,27 @@ +/* + * Copyright 2015-2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.pipeline.runner.exceptions; + +/** + * Exception thrown when a job cannot be found to run it. + */ +public class UnknownJobException extends Exception { + + public UnknownJobException(String jobName) { + super("Unknown job name '" + jobName + "'"); + } + +} diff --git a/src/main/java/uk/ac/ebi/eva/utils/FileUtils.java b/src/main/java/uk/ac/ebi/eva/utils/FileUtils.java index 5c69754e7..c5b315b28 100644 --- a/src/main/java/uk/ac/ebi/eva/utils/FileUtils.java +++ b/src/main/java/uk/ac/ebi/eva/utils/FileUtils.java @@ -1,19 +1,38 @@ +/* + * Copyright 2016 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package uk.ac.ebi.eva.utils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.springframework.core.io.FileSystemResource; import org.springframework.core.io.Resource; - -import org.springframework.util.FileCopyUtils; import uk.ac.ebi.eva.pipeline.io.GzipLazyResource; -import java.io.*; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; import java.net.URI; import java.net.URISyntaxException; -import java.net.URL; -import java.util.zip.GZIPOutputStream; +import java.util.Properties; public abstract class FileUtils { + private static final Logger logger = LoggerFactory.getLogger(FileUtils.class); + public static void validateDirectoryPath(String path, boolean emptyIsValid) throws FileNotFoundException { if (emptyIsValid && (path == null || path.isEmpty())) { return; @@ -41,4 +60,18 @@ public static Resource getResource(File file) throws IOException { } return resource; } + + public static File getResource(String resourcePath) { + return new File(FileUtils.class.getResource(resourcePath).getFile()); + } + + public static InputStream getResourceAsStream(String resourcePath) { + return FileUtils.class.getResourceAsStream(resourcePath); + } + + public static Properties getPropertiesFile(InputStream propertiesInputStream) throws IOException { + Properties properties = new Properties(); + properties.load(propertiesInputStream); + return properties; + } } diff --git a/src/main/java/uk/ac/ebi/eva/utils/MongoConnection.java b/src/main/java/uk/ac/ebi/eva/utils/MongoConnection.java deleted file mode 100644 index 7408d3fd2..000000000 --- a/src/main/java/uk/ac/ebi/eva/utils/MongoConnection.java +++ /dev/null @@ -1,49 +0,0 @@ -package uk.ac.ebi.eva.utils; - -import com.mongodb.ReadPreference; - -public class MongoConnection { - - private final String hosts; - - private final String authenticationDatabase; - - private final String user; - - private final String password; - - private final ReadPreference readPreference; - - public MongoConnection(String hosts, String authenticationDatabase, String user, String password, - String readPreference) { - this.hosts = hosts; - this.authenticationDatabase = authenticationDatabase; - this.user = user; - this.password = password; - this.readPreference = ReadPreference.valueOf(readPreference); - } - - public String getHosts() { - return hosts; - } - - public String getAuthenticationDatabase() { - return authenticationDatabase; - } - - public String getUser() { - return user; - } - - public String getPassword() { - return password; - } - - public ReadPreference getReadPreference() { - return readPreference; - } - - public String getReadPreferenceName() { - return readPreference.getName(); - } -} diff --git a/src/main/java/uk/ac/ebi/eva/utils/TaskletUtils.java b/src/main/java/uk/ac/ebi/eva/utils/TaskletUtils.java index 5e1844fcd..835e83206 100644 --- a/src/main/java/uk/ac/ebi/eva/utils/TaskletUtils.java +++ b/src/main/java/uk/ac/ebi/eva/utils/TaskletUtils.java @@ -5,19 +5,15 @@ import org.springframework.batch.core.step.builder.TaskletStepBuilder; import org.springframework.batch.core.step.tasklet.Tasklet; import org.springframework.batch.core.step.tasklet.TaskletStep; -import uk.ac.ebi.eva.pipeline.parameters.JobOptions; -import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; public class TaskletUtils { public static TaskletStep generateStep(StepBuilderFactory stepBuilderFactory, String stepName, Tasklet tasklet, - JobOptions jobOptions) { + boolean allowStartIfComplete) { StepBuilder step1 = stepBuilderFactory.get(stepName); final TaskletStepBuilder taskletBuilder = step1.tasklet(tasklet); // true: every job execution will do this step, even if this step is already COMPLETED // false(default): if the job was aborted and is relaunched, this step will NOT be done again - boolean allowStartIfComplete = jobOptions.getPipelineOptions().getBoolean(JobParametersNames - .CONFIG_RESTARTABILITY_ALLOW); taskletBuilder.allowStartIfComplete(allowStartIfComplete); return taskletBuilder.build(); } diff --git a/src/main/java/uk/ac/ebi/eva/utils/URLHelper.java b/src/main/java/uk/ac/ebi/eva/utils/URLHelper.java index f206e1083..015f5454c 100644 --- a/src/main/java/uk/ac/ebi/eva/utils/URLHelper.java +++ b/src/main/java/uk/ac/ebi/eva/utils/URLHelper.java @@ -21,6 +21,14 @@ public class URLHelper { + private static final String VARIANT_STATS_SUFFIX = ".variants.stats.json.gz"; + + private static final String SOURCE_STATS_SUFFIX = ".source.stats.json.gz"; + + public static final String VARIANTS_TO_ANNOTATE_SUFFIX = "_variants_to_annotate.tsv"; + + public static final String ANNOTATED_VARIANTS_SUFFIX = "_vep_annotation.tsv.gz"; + public static URI createUri(String input) throws URISyntaxException { URI sourceUri = new URI(input); if (sourceUri.getScheme() == null || sourceUri.getScheme().isEmpty()) { @@ -29,4 +37,26 @@ public static URI createUri(String input) throws URISyntaxException { return sourceUri; } + public static URI getVariantsStatsUri(String outputDirStatistics, String studyId, String fileId) throws URISyntaxException { + return URLHelper.createUri( + getStatsBaseUri(outputDirStatistics, studyId, fileId).getPath() + VARIANT_STATS_SUFFIX); + } + + public static URI getSourceStatsUri(String outputDirStatistics, String studyId, String fileId) throws URISyntaxException { + return URLHelper.createUri( + getStatsBaseUri(outputDirStatistics, studyId, fileId).getPath() + SOURCE_STATS_SUFFIX); + } + + public static URI getStatsBaseUri(String outputDirStatistics, String studyId, String fileId) throws URISyntaxException { + URI outdirUri = URLHelper.createUri(outputDirStatistics); + return outdirUri.resolve(MongoDBHelper.buildStorageFileId(studyId, fileId)); + } + + public static String resolveVepInput(String outputDirAnnotation, String studyId, String vcfId){ + return outputDirAnnotation + "/" + studyId + "_" + vcfId + VARIANTS_TO_ANNOTATE_SUFFIX; + } + + public static String resolveVepOutput(String outputDirAnnotation, String studyId, String vcfId){ + return outputDirAnnotation + "/" + studyId + "_" + vcfId + ANNOTATED_VARIANTS_SUFFIX; + } } diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index 08206d886..cb46494cc 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -3,32 +3,9 @@ ## "test" to use an in-memory database that will record a single run spring.profiles.active=production,mongo -# SUBMISSION FIELDS -input.vcf= -input.vcf.id= -input.vcf.aggregation=NONE -input.study.type=COLLECTION -input.study.name= -input.study.id= -input.pedigree= -input.gtf= -input.fasta= - -output.dir= -output.dir.annotation= -output.dir.statistics= - - # EXTERNAL APPLICATIONS app.opencga.path= -app.vep.path= -app.vep.num-forks=4 -app.vep.cache.path= -app.vep.cache.version= -app.vep.cache.species= - - # STEPS MANAGEMENT ## Job repository database job.repository.driverClassName= @@ -40,13 +17,14 @@ job.repository.password= ## true: The already COMPLETEd steps will be rerun. This is restarting the job from the beginning ## false(default): if the job was aborted and is relaunched, COMPLETEd steps will NOT be done again config.restartability.allow=false -config.chunk.size= # MONGO DATABASE +spring.data.mongodb.host= +spring.data.mongodb.port= +spring.data.mongodb.authentication-database= +spring.data.mongodb.username= +spring.data.mongodb.password= config.db.read-preference=primary -db.collections.features.name=features -db.collections.stats.name=populationStatistics - # LOGGING # https://docs.spring.io/spring-boot/docs/current/reference/html/boot-features-logging.html diff --git a/src/main/resources/mappings/evs-mapping.properties b/src/main/resources/mappings/evs-mapping.properties new file mode 100644 index 000000000..28692cd84 --- /dev/null +++ b/src/main/resources/mappings/evs-mapping.properties @@ -0,0 +1,15 @@ +#EA.AF=EA_AF +EA.AC=EA_AC +#EA.AN=EA_AN +EA.GTC=EA_GTC + +#AA.AF=AA_AF +AA.AC=AA_AC +#AA.AN=AA_AN +AA.GTC=AA_GTC + +#ALL.AF=AF +ALL.AC=TAC +#ALL.AN=AN +ALL.GTC=GTC +GROUPS_ORDER=EA,AA,ALL \ No newline at end of file diff --git a/src/main/resources/mappings/exac-mapping.properties b/src/main/resources/mappings/exac-mapping.properties new file mode 100644 index 000000000..b137b9dc8 --- /dev/null +++ b/src/main/resources/mappings/exac-mapping.properties @@ -0,0 +1,32 @@ +AFR.AC = AC_AFR +AFR.AN = AN_AFR +AFR.HET=Het_AFR +AFR.HOM=Hom_AFR +AMR.AC = AC_AMR +AMR.AN = AN_AMR +AMR.HET=Het_AMR +AMR.HOM=Hom_AMR +EAS.AC = AC_EAS +EAS.AN = AN_EAS +EAS.HET=Het_EAS +EAS.HOM=Hom_EAS +FIN.AC = AC_FIN +FIN.AN = AN_FIN +FIN.HET=Het_FIN +FIN.HOM=Hom_FIN +NFE.AC = AC_NFE +NFE.AN = AN_NFE +NFE.HET=Het_NFE +NFE.HOM=Hom_NFE +OTH.AC = AC_OTH +OTH.AN = AN_OTH +OTH.HET=Het_OTH +OTH.HOM=Hom_OTH +SAS.AC = AC_SAS +SAS.AN = AN_SAS +SAS.HET=Het_SAS +SAS.HOM=Hom_SAS +ALL.AC =AC_Adj +ALL.AN =AN_Adj +ALL.HET=AC_Het +ALL.HOM=AC_Hom \ No newline at end of file diff --git a/src/test/java/uk/ac/ebi/eva/commons/models/converters/data/VariantToDBObjectConverterTest.java b/src/test/java/uk/ac/ebi/eva/commons/models/converters/data/VariantToDBObjectConverterTest.java index b84608ad0..4a060798f 100644 --- a/src/test/java/uk/ac/ebi/eva/commons/models/converters/data/VariantToDBObjectConverterTest.java +++ b/src/test/java/uk/ac/ebi/eva/commons/models/converters/data/VariantToDBObjectConverterTest.java @@ -44,7 +44,7 @@ public class VariantToDBObjectConverterTest { private Variant variant; - protected VariantSourceEntry variantSourceEntry; + private VariantSourceEntry variantSourceEntry; @Before public void setUp() { diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/ApplicationTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/ApplicationTest.java deleted file mode 100644 index afa91650a..000000000 --- a/src/test/java/uk/ac/ebi/eva/pipeline/ApplicationTest.java +++ /dev/null @@ -1,55 +0,0 @@ -package uk.ac.ebi.eva.pipeline; - -import java.util.List; - -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.springframework.batch.core.ExitStatus; -import org.springframework.batch.core.JobExecution; -import org.springframework.batch.core.JobInstance; -import org.springframework.batch.core.explore.JobExplorer; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.boot.test.context.SpringBootTest; -import org.springframework.test.context.ActiveProfiles; -import org.springframework.test.context.junit4.SpringRunner; - -import uk.ac.ebi.eva.pipeline.configuration.BeanNames; -import uk.ac.ebi.eva.pipeline.parameters.JobOptions; -import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; - -/** - * The purpose of this test is to imitate an execution made by an user through the CLI. - * This is needed because all the other tests just instantiate what they need (just one step, or just one job) and - * sometimes we have errors due to collisions instantiating several jobs. This test should instantiate everything - * Spring instantiates in a real execution. - */ -@RunWith(SpringRunner.class) -@SpringBootTest -@ActiveProfiles({"integrationTest,test,mongo"}) -public class ApplicationTest { - - @Autowired - JobExplorer jobExplorer; - - @Autowired - JobOptions jobOptions; - - @Rule - public TemporaryMongoRule mongoRule = new TemporaryMongoRule(); - - @Test - public void main() throws Exception { - mongoRule.getTemporaryDatabase(jobOptions.getDbName()); - - Assert.assertEquals(1, jobExplorer.getJobNames().size()); - Assert.assertEquals(BeanNames.GENOTYPED_VCF_JOB, jobExplorer.getJobNames().get(0)); - - List jobInstances = jobExplorer.getJobInstances(BeanNames.GENOTYPED_VCF_JOB, 0, 100); - Assert.assertEquals(1, jobInstances.size()); - - JobExecution jobExecution = jobExplorer.getJobExecution(jobInstances.get(0).getInstanceId()); - Assert.assertEquals(ExitStatus.COMPLETED, jobExecution.getExitStatus()); - } -} diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/mappers/VariantAggregatedVcfFactoryTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/mappers/VariantAggregatedVcfFactoryTest.java index fed31c299..76a3652e3 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/mappers/VariantAggregatedVcfFactoryTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/mappers/VariantAggregatedVcfFactoryTest.java @@ -17,9 +17,7 @@ import org.junit.Test; import org.opencb.biodata.models.feature.Genotype; -import org.opencb.biodata.models.variant.VariantSource; import org.opencb.commons.test.GenericTest; - import uk.ac.ebi.eva.commons.models.data.Variant; import uk.ac.ebi.eva.commons.models.data.VariantStats; @@ -35,7 +33,9 @@ * output: a List of Variants */ public class VariantAggregatedVcfFactoryTest extends GenericTest { - private VariantSource source = new VariantSource("filename.vcf", "fileId", "studyId", "studyName"); + + private static final String FILE_ID = "fileId"; + private static final String STUDY_ID = "studyId"; private VariantAggregatedVcfFactory factory = new VariantAggregatedVcfFactory(); @@ -43,13 +43,13 @@ public class VariantAggregatedVcfFactoryTest extends GenericTest { public void parseAC_AN() { String line = "1\t54722\t.\tTTC\tT,TCTC\t999\tPASS\tDP4=3122,3282,891,558;DP=22582;INDEL;IS=3,0.272727;VQSLOD=6.76;AN=3854;AC=889,61;TYPE=del,ins;HWE=0;ICF=-0.155251"; // structure like uk10k - List variants = factory.create(source, line); + List variants = factory.create(FILE_ID, STUDY_ID, line); - VariantStats stats = variants.get(0).getSourceEntry(source.getFileId(), source.getStudyId()).getStats(); + VariantStats stats = variants.get(0).getSourceEntry(FILE_ID, STUDY_ID).getStats(); assertEquals(2904, stats.getRefAlleleCount()); assertEquals(889, stats.getAltAlleleCount()); - stats = variants.get(1).getSourceEntry(source.getFileId(), source.getStudyId()).getStats(); + stats = variants.get(1).getSourceEntry(FILE_ID, STUDY_ID).getStats(); assertEquals(2904, stats.getRefAlleleCount()); assertEquals(61, stats.getAltAlleleCount()); assertEquals(0.015827711, stats.getMaf(), 0.0001); @@ -59,9 +59,9 @@ public void parseAC_AN() { public void parseGTC() { String line = "20\t61098\trs6078030\tC\tT\t51254.56\tPASS\tAC=225;AN=996;GTC=304,163,31"; // structure like gonl - List variants = factory.create(source, line); + List variants = factory.create(FILE_ID, STUDY_ID, line); - VariantStats stats = variants.get(0).getSourceEntry(source.getFileId(), source.getStudyId()).getStats(); + VariantStats stats = variants.get(0).getSourceEntry(FILE_ID, STUDY_ID).getStats(); assertEquals(new Integer(304), stats.getGenotypesCount().get(new Genotype("0/0", "C", "T"))); assertEquals(new Integer(163), stats.getGenotypesCount().get(new Genotype("0/1", "C", "T"))); assertEquals(new Integer(31), stats.getGenotypesCount().get(new Genotype("T/T", "C", "T"))); @@ -77,10 +77,9 @@ public void parseCustomGTC() { properties.put("ALL.AC", "AC"); properties.put("ALL.AN", "AN"); properties.put("ALL.AF", "AF"); - List variants = new VariantAggregatedVcfFactory(properties).create( - source, line); + List variants = new VariantAggregatedVcfFactory(properties).create(FILE_ID, STUDY_ID, line); - VariantStats stats = variants.get(0).getSourceEntry(source.getFileId(), source.getStudyId()).getCohortStats( + VariantStats stats = variants.get(0).getSourceEntry(FILE_ID, STUDY_ID).getCohortStats( "ALL"); assertEquals(523, stats.getRefAlleleCount()); assertEquals(3, stats.getAltAlleleCount()); @@ -92,7 +91,7 @@ public void parseCustomGTC() { assertEquals(new Integer(6), stats.getGenotypesCount().get(new Genotype("0/2", "G", "A"))); assertEquals(new Integer(0), stats.getGenotypesCount().get(new Genotype("./.", "G", "A"))); - stats = variants.get(1).getSourceEntry(source.getFileId(), source.getStudyId()).getCohortStats("ALL"); + stats = variants.get(1).getSourceEntry(FILE_ID, STUDY_ID).getCohortStats("ALL"); assertEquals(new Integer(6), stats.getGenotypesCount().get(new Genotype("0/1", "G", "C"))); } @@ -101,9 +100,9 @@ public void parseCustomGTC() { public void parseWithGTS() { String line = "1\t861255\t.\tA\tG\t.\tPASS\tAC=2;AF=0.0285714285714286;AN=70;GTS=GG,GA,AA;GTC=1,0,34"; - List variants = factory.create(source, line); + List variants = factory.create(FILE_ID, STUDY_ID, line); - VariantStats stats = variants.get(0).getSourceEntry(source.getFileId(), source.getStudyId()).getStats(); + VariantStats stats = variants.get(0).getSourceEntry(FILE_ID, STUDY_ID).getStats(); assertEquals(new Integer(34), stats.getGenotypesCount().get(new Genotype("0/0", "A", "G"))); assertEquals(new Integer(0), stats.getGenotypesCount().get(new Genotype("0/1", "A", "G"))); assertEquals(new Integer(1), stats.getGenotypesCount().get(new Genotype("G/G", "A", "G"))); diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/mappers/VariantVcfEVSFactoryTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/mappers/VariantVcfEVSFactoryTest.java index 63f720688..bd75cc2b5 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/mappers/VariantVcfEVSFactoryTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/mappers/VariantVcfEVSFactoryTest.java @@ -17,9 +17,7 @@ import org.junit.Test; import org.opencb.biodata.models.feature.Genotype; -import org.opencb.biodata.models.variant.VariantSource; import org.opencb.commons.test.GenericTest; - import uk.ac.ebi.eva.commons.models.data.Variant; import uk.ac.ebi.eva.commons.models.data.VariantSourceEntry; @@ -42,7 +40,8 @@ */ public class VariantVcfEVSFactoryTest extends GenericTest { - private VariantSource source = new VariantSource("EVS", "EVS", "EVS", "EVS"); + private static final String FILE_ID = "EVS"; + private static final String STUDY_ID = "EVS"; private VariantVcfFactory factory = new VariantVcfEVSFactory(); @@ -51,12 +50,12 @@ public void testCreate_AA_AC_TT_GT() throws Exception { // AA,AC,TT,GT,... String line = "1\t69428\trs140739101\tT\tG\t.\tPASS\tMAF=4.5707,0.3663,3.0647;GTS=GG,GT,TT;GTC=93,141,5101"; - List res = factory.create(source, line); + List res = factory.create(FILE_ID, STUDY_ID, line); assertTrue(res.size() == 1); Variant v = res.get(0); - VariantSourceEntry avf = v.getSourceEntry(source.getFileId(), source.getStudyId()); + VariantSourceEntry avf = v.getSourceEntry(FILE_ID, STUDY_ID); Map genotypes = new HashMap<>(); @@ -72,12 +71,12 @@ public void testCreate_AA_AC_TT_GT() throws Exception { // AA,AC,TT,GT,... public void testCreate_A_C_T_G() { // A,C,T,G String line = "Y\t25375759\trs373156833\tT\tA\t.\tPASS\tMAF=0.0,0.1751,0.0409;GTS=A,T;GTC=1,2442"; - List res = factory.create(source, line); + List res = factory.create(FILE_ID, STUDY_ID, line); assertTrue(res.size() == 1); Variant v = res.get(0); - VariantSourceEntry avf = v.getSourceEntry(source.getFileId(), source.getStudyId()); + VariantSourceEntry avf = v.getSourceEntry(FILE_ID, STUDY_ID); Map genotypes = new HashMap<>(); @@ -92,7 +91,7 @@ public void testCreate_A_C_T_G() { // A,C,T,G public void testCreate_R_RR_A1R_A1A1() { // R, RR, A1R, A1A1 String line = "X\t100117423\t.\tAG\tA\t.\tPASS\tMAF=0.0308,0.0269,0.0294;GTS=A1A1,A1R,RR,R;GTC=1,1,3947,2306;"; - List res = factory.create(source, line); + List res = factory.create(FILE_ID, STUDY_ID, line); assertTrue(res.size() == 1); @@ -102,7 +101,7 @@ public void testCreate_R_RR_A1R_A1A1() { // R, RR, A1R, A1A1 assertEquals(v.getAlternate(), ""); - VariantSourceEntry avf = v.getSourceEntry(source.getFileId(), source.getStudyId()); + VariantSourceEntry avf = v.getSourceEntry(FILE_ID, STUDY_ID); Map genotypes = new HashMap<>(); @@ -119,7 +118,7 @@ public void testCreate_R_RR_A1A1_A1R_A1() { // A1,A2,A3 String line = "X\t106362078\trs3216052\tCT\tC\t.\tPASS\tMAF=18.1215,25.2889,38.7555;GTS=A1A1,A1R,A1,RR,R;GTC=960,1298,737,1691,1570"; - List res = factory.create(source, line); + List res = factory.create(FILE_ID, STUDY_ID, line); assertTrue(res.size() == 1); @@ -129,7 +128,7 @@ public void testCreate_R_RR_A1A1_A1R_A1() { // A1,A2,A3 assertEquals(v.getAlternate(), ""); - VariantSourceEntry avf = v.getSourceEntry(source.getFileId(), source.getStudyId()); + VariantSourceEntry avf = v.getSourceEntry(FILE_ID, STUDY_ID); Map genotypes = new HashMap<>(); @@ -145,7 +144,7 @@ public void testCreate_A1A1_A1A2_A2R_A2_RR_R() {// A1A2,A1A3... String line = "X\t14039552\t.\tCA\tCAA,C\t.\tPASS\tMAF=5.3453,4.2467,4.9459;GTS=A1A1,A1A2,A1R,A1,A2A2,A2R,A2,RR,R;GTC=0,0,134,162,4,92,107,3707,2027;"; - List res = factory.create(source, line); + List res = factory.create(FILE_ID, STUDY_ID, line); assertTrue(res.size() == 2); @@ -155,7 +154,7 @@ public void testCreate_A1A1_A1A2_A2R_A2_RR_R() {// A1A2,A1A3... assertEquals(v.getAlternate(), "A"); - VariantSourceEntry avf = v.getSourceEntry(source.getFileId(), source.getStudyId()); + VariantSourceEntry avf = v.getSourceEntry(FILE_ID, STUDY_ID); Map genotypes = new HashMap<>(); @@ -175,7 +174,7 @@ public void testCreate_A1A1_A1A2_A2R_A2_RR_R() {// A1A2,A1A3... assertEquals(v.getAlternate(), ""); - avf = v.getSourceEntry(source.getFileId(), source.getStudyId()); + avf = v.getSourceEntry(FILE_ID, STUDY_ID); genotypes = new HashMap<>(); @@ -208,7 +207,7 @@ public void testPopulation() { properties.put("GROUPS_ORDER", "EA,AA,ALL"); VariantVcfFactory evsFactory = new VariantVcfEVSFactory(properties); - List res = evsFactory.create(source, line); + List res = evsFactory.create(FILE_ID, STUDY_ID, line); // Allele count assertEquals(res.get(0).getSourceEntry("EVS", "EVS").getCohortStats("EA").getAltAlleleCount(), 1); @@ -237,7 +236,7 @@ public void testPopulation() { // -------------- SNV, (GTS are expressed in another way) line = "21\t10862547\trs373689868\tG\tA\t.\tPASS\tDBSNP=dbSNP_138;EA_AC=0,3182;AA_AC=6,1378;TAC=6,4560;MAF=0.0,0.4335,0.1314;GTS=AA,AG,GG;EA_GTC=0,0,1591;AA_GTC=0,6,686;GTC=0,6,2277;DP=93;GL=.;CP=0.0;CG=-1.5;AA=G;CA=.;EXOME_CHIP=no;GWAS_PUBMED=.;FG=intergenic;HGVS_CDNA_VAR=.;HGVS_PROTEIN_VAR=.;CDS_SIZES=.;GS=.;PH=.;EA_AGE=.;AA_AGE=."; - res = evsFactory.create(source, line); + res = evsFactory.create(FILE_ID, STUDY_ID, line); genotypes = new LinkedList<>(); genotypes.add(new Genotype("1/1", "G", "A")); @@ -266,7 +265,7 @@ public void testPopulationMultiallelic() { properties.put("GROUPS_ORDER", "EA,AA,ALL"); VariantVcfFactory evsFactory = new VariantVcfEVSFactory(properties); - List res = evsFactory.create(source, line); + List res = evsFactory.create(FILE_ID, STUDY_ID, line); // testing multiallelic AC assertEquals(res.get(0).getSourceEntry("EVS", "EVS").getCohortStats("AA").getAltAlleleCount(), 172); @@ -352,7 +351,7 @@ public void testPopulationMultiallelic() { // --------------------- testing multiallelic SNV line = "9\t17579190\trs4961573\tC\tG,A\t.\tPASS\tDBSNP=dbSNP_111;EA_AC=8156,0,0;AA_AC=4110,10,0;TAC=12266,10,0;MAF=0.0,0.2427,0.0815;GTS=GG,GA,GC,AA,AC,CC;EA_GTC=1,2,3,4,5,6;AA_GTC=2050,10,0,0,0,0;GTC=6128,10,0,0,0,0;DP=6;GL=SH3GL2;CP=0.0;CG=-1.8;AA=G;CA=.;EXOME_CHIP=no;GWAS_PUBMED=.;FG=NM_003026.2:utr-5,NM_003026.2:utr-5;HGVS_CDNA_VAR=NM_003026.2:c.-51C>A,NM_003026.2:c.-51C>G;HGVS_PROTEIN_VAR=.,.;CDS_SIZES=NM_003026.2:1059,NM_003026.2:1059;GS=.,.;PH=.,.;EA_AGE=.;AA_AGE=."; - res = evsFactory.create(source, line); + res = evsFactory.create(FILE_ID, STUDY_ID, line); // testing AC assertEquals(res.get(0).getSourceEntry("EVS", "EVS").getCohortStats("AA").getAltAlleleCount(), 4110); diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/mappers/VariantVcfExacFactoryTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/mappers/VariantVcfExacFactoryTest.java index 032973fd6..5b005eb13 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/mappers/VariantVcfExacFactoryTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/mappers/VariantVcfExacFactoryTest.java @@ -17,7 +17,6 @@ import org.junit.Test; import org.opencb.biodata.models.feature.Genotype; -import org.opencb.biodata.models.variant.VariantSource; import org.opencb.commons.test.GenericTest; import uk.ac.ebi.eva.commons.models.data.Variant; @@ -39,7 +38,8 @@ */ public class VariantVcfExacFactoryTest extends GenericTest { - private VariantSource source = new VariantSource("Exac", "Exac", "Exac", "Exac"); + private static final String FILE_ID = "Exac"; + private static final String STUDY_ID = "Exac"; private VariantVcfFactory factory = new VariantVcfExacFactory(); @@ -63,12 +63,12 @@ public void basicLine() { + "non_coding_transcript_variant|604||||||1||1|DDX11L1|HGNC|37102|transcribed_unprocessed_pseudogene|||||||||3/4|||" + "ENST00000518655.2:n.604G>T|||||||||||||||||||,T||ENSR00000528767|RegulatoryFeature|regulatory_region_variant|||||||" + "1||||||regulatory_region|||||||||||||||||||||||||||||||"; - List res = factory.create(source, line); + List res = factory.create(FILE_ID, STUDY_ID, line); assertTrue(res.size() == 1); Variant v = res.get(0); - VariantSourceEntry sourceEntry = v.getSourceEntry(source.getFileId(), source.getStudyId()); + VariantSourceEntry sourceEntry = v.getSourceEntry(FILE_ID, FILE_ID); Map genotypes = new HashMap<>(); @@ -112,12 +112,12 @@ public void multiallelicLine() { + "||regulatory_region|||||||||||||||||||||||||||||||,C||ENSR00000278218|RegulatoryFeature|regulatory_region_variant||" + "||||rs55874132|3||||||regulatory_region|||||||||||||||||||||||||||||||"; - List res = factory.create(source, line); + List res = factory.create(FILE_ID, STUDY_ID, line); assertTrue(res.size() == 3); Variant v = res.get(0); - VariantSourceEntry sourceEntry = v.getSourceEntry(source.getFileId(), source.getStudyId()); + VariantSourceEntry sourceEntry = v.getSourceEntry(FILE_ID, STUDY_ID); Map genotypes = new HashMap<>(); @@ -152,7 +152,7 @@ public void multiallelicLine() { genotypes.put(new Genotype("2/3", "G", "A"), 0); genotypes.put(new Genotype("3/3", "G", "A"), 0); - sourceEntry = res.get(1).getSourceEntry(source.getFileId(), source.getStudyId()); + sourceEntry = res.get(1).getSourceEntry(FILE_ID, STUDY_ID); assertEquals(genotypes, sourceEntry.getStats().getGenotypesCount()); assertEquals(3, sourceEntry.getStats().getAltAlleleCount()); @@ -171,7 +171,7 @@ public void multiallelicLine() { genotypes.put(new Genotype("2/3", "G", "C"), 0); genotypes.put(new Genotype("3/3", "G", "C"), 1); - sourceEntry = res.get(2).getSourceEntry(source.getFileId(), source.getStudyId()); + sourceEntry = res.get(2).getSourceEntry(FILE_ID, STUDY_ID); assertEquals(genotypes, sourceEntry.getStats().getGenotypesCount()); assertEquals(0, sourceEntry.getStats().getAltAlleleCount()); @@ -245,12 +245,12 @@ public void multiallelicPopulationGenotypes() { properties.put("ALL.HET", "AC_Het"); properties.put("ALL.HOM", "AC_Hom"); VariantVcfFactory exacFactory = new VariantVcfExacFactory(properties); - List res = exacFactory.create(source, line); + List res = exacFactory.create(FILE_ID, STUDY_ID, line); assertTrue(res.size() == 2); Variant v = res.get(0); - VariantSourceEntry sourceEntry = v.getSourceEntry(source.getFileId(), source.getStudyId()); + VariantSourceEntry sourceEntry = v.getSourceEntry(FILE_ID, STUDY_ID); // Allele and genotype counts assertEquals(12, sourceEntry.getCohortStats("AFR").getAltAlleleCount()); @@ -274,7 +274,7 @@ public void multiallelicPopulationGenotypes() { System.out.println("genotypes for C -> G in SAS: " + sourceEntry.getCohortStats("SAS").getGenotypesCount()); v = res.get(1); - sourceEntry = v.getSourceEntry(source.getFileId(), source.getStudyId()); + sourceEntry = v.getSourceEntry(FILE_ID, STUDY_ID); assertEquals(2, sourceEntry.getCohortStats("NFE").getAltAlleleCount()); genotype = new Genotype("0/2", v.getReference(), v.getAlternate()); diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/mappers/VariantVcfFactoryTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/mappers/VariantVcfFactoryTest.java index 9449a5c18..41c1e7cc9 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/mappers/VariantVcfFactoryTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/mappers/VariantVcfFactoryTest.java @@ -15,10 +15,7 @@ */ package uk.ac.ebi.eva.pipeline.io.mappers; -import org.junit.Before; import org.junit.Test; -import org.opencb.biodata.models.variant.VariantSource; - import uk.ac.ebi.eva.commons.models.data.Variant; import uk.ac.ebi.eva.commons.models.data.VariantSourceEntry; @@ -40,15 +37,11 @@ */ public class VariantVcfFactoryTest { - private VariantSource source = new VariantSource("filename.vcf", "fileId", "studyId", "studyName"); + private static final String FILE_ID = "fileId"; - private VariantVcfFactory factory = new VariantVcfFactory(); + private static final String STUDY_ID = "studyId"; - @Before - public void setUp() throws Exception { - List sampleNames = Arrays.asList("NA001", "NA002", "NA003"); - source.setSamples(sampleNames); - } + private VariantVcfFactory factory = new VariantVcfFactory(); @Test public void testCreateVariantFromVcfSameLengthRefAlt() { @@ -58,7 +51,7 @@ public void testCreateVariantFromVcfSameLengthRefAlt() { List expResult = new LinkedList<>(); expResult.add(new Variant("1", 1001, 1005, "CACCC", "GACGG")); - List result = factory.create(source, line); + List result = factory.create(FILE_ID, STUDY_ID, line); assertEquals(expResult, result); // Test when there are not differences at the end of the sequence @@ -67,7 +60,7 @@ public void testCreateVariantFromVcfSameLengthRefAlt() { expResult = new LinkedList<>(); expResult.add(new Variant("1", 1001, 1004, "CACC", "GACG")); - result = factory.create(source, line); + result = factory.create(FILE_ID, STUDY_ID, line); assertEquals(expResult, result); } @@ -78,7 +71,7 @@ public void testCreateVariantFromVcfInsertionEmptyRef() { List expResult = new LinkedList<>(); expResult.add(new Variant("1", 1000, 1000 + "TGACGC".length() - 1, "", "TGACGC")); - List result = factory.create(source, line); + List result = factory.create(FILE_ID, STUDY_ID, line); assertEquals(expResult, result); } @@ -89,7 +82,7 @@ public void testCreateVariantFromVcfDeletionEmptyAlt() { List expResult = new LinkedList<>(); expResult.add(new Variant("1", 1000, 1000 + "TCACCC".length() - 1, "TCACCC", "")); - List result = factory.create(source, line); + List result = factory.create(FILE_ID, STUDY_ID, line); assertEquals(expResult, result); } @@ -99,76 +92,73 @@ public void testCreateVariantFromVcfIndelNotEmptyFields() { List expResult = new LinkedList<>(); expResult.add(new Variant("1", 1000, 1000 + "CGATT".length() - 1, "CGATT", "TAC")); - List result = factory.create(source, line); + List result = factory.create(FILE_ID, STUDY_ID, line); assertEquals(expResult, result); line = "1\t1000\trs123\tAT\tA\t.\t.\t."; expResult = new LinkedList<>(); expResult.add(new Variant("1", 1001, 1001, "T", "")); - result = factory.create(source, line); + result = factory.create(FILE_ID, STUDY_ID, line); assertEquals(expResult, result); line = "1\t1000\trs123\tGATC\tG\t.\t.\t."; expResult = new LinkedList<>(); expResult.add(new Variant("1", 1001, 1003, "ATC", "")); - result = factory.create(source, line); + result = factory.create(FILE_ID, STUDY_ID, line); assertEquals(expResult, result); line = "1\t1000\trs123\t.\tATC\t.\t.\t."; expResult = new LinkedList<>(); expResult.add(new Variant("1", 1000, 1002, "", "ATC")); - result = factory.create(source, line); + result = factory.create(FILE_ID, STUDY_ID, line); assertEquals(expResult, result); line = "1\t1000\trs123\tA\tATC\t.\t.\t."; expResult = new LinkedList<>(); expResult.add(new Variant("1", 1001, 1002, "", "TC")); - result = factory.create(source, line); + result = factory.create(FILE_ID, STUDY_ID, line); assertEquals(expResult, result); line = "1\t1000\trs123\tAC\tACT\t.\t.\t."; expResult = new LinkedList<>(); expResult.add(new Variant("1", 1002, 1002, "", "T")); - result = factory.create(source, line); + result = factory.create(FILE_ID, STUDY_ID, line); assertEquals(expResult, result); // Printing those that are not currently managed line = "1\t1000\trs123\tAT\tT\t.\t.\t."; expResult = new LinkedList<>(); expResult.add(new Variant("1", 1000, 1000, "A", "")); - result = factory.create(source, line); + result = factory.create(FILE_ID, STUDY_ID, line); assertEquals(expResult, result); line = "1\t1000\trs123\tATC\tTC\t.\t.\t."; expResult = new LinkedList<>(); expResult.add(new Variant("1", 1000, 1000, "A", "")); - result = factory.create(source, line); + result = factory.create(FILE_ID, STUDY_ID, line); assertEquals(expResult, result); line = "1\t1000\trs123\tATC\tAC\t.\t.\t."; expResult = new LinkedList<>(); expResult.add(new Variant("1", 1001, 1001, "T", "")); - result = factory.create(source, line); + result = factory.create(FILE_ID, STUDY_ID, line); assertEquals(expResult, result); line = "1\t1000\trs123\tAC\tATC\t.\t.\t."; expResult = new LinkedList<>(); expResult.add(new Variant("1", 1001, 1001, "", "T")); - result = factory.create(source, line); + result = factory.create(FILE_ID, STUDY_ID, line); assertEquals(expResult, result); line = "1\t1000\trs123\tATC\tGC\t.\t.\t."; expResult = new LinkedList<>(); expResult.add(new Variant("1", 1000, 1001, "AT", "G")); - result = factory.create(source, line); + result = factory.create(FILE_ID, STUDY_ID, line); assertEquals(expResult, result); } @Test public void testCreateVariantFromVcfCoLocatedVariants_MainFields() { - List sampleNames = Arrays.asList("NA001", "NA002", "NA003", "NA004"); - source.setSamples(sampleNames); - String line = "1\t10040\trs123\tTGACGTAACGATT\tT,TGACGTAACGGTT,TGACGTAATAC\t.\t.\t.\tGT\t0/0\t0/1\t0/2\t1/2"; // 4 samples // Check proper conversion of main fields @@ -177,7 +167,7 @@ public void testCreateVariantFromVcfCoLocatedVariants_MainFields() { expResult.add(new Variant("1", 10050, 10050 + "A".length() - 1, "A", "G")); expResult.add(new Variant("1", 10048, 10048 + "CGATT".length() - 1, "CGATT", "TAC")); - List result = factory.create(source, line); + List result = factory.create(FILE_ID, STUDY_ID, line); assertEquals(expResult, result); } @@ -187,7 +177,7 @@ public void testCreateVariant_Samples() { // Initialize expected variants Variant var0 = new Variant("1", 10041, 10041 + "C".length() - 1, "T", "C"); - VariantSourceEntry file0 = new VariantSourceEntry(source.getFileId(), source.getStudyId()); + VariantSourceEntry file0 = new VariantSourceEntry(FILE_ID, STUDY_ID); var0.addSourceEntry(file0); // Initialize expected samples @@ -202,19 +192,19 @@ public void testCreateVariant_Samples() { Map na005 = new HashMap<>(); na005.put("GT", "1/1"); - var0.getSourceEntry(source.getFileId(), source.getStudyId()).addSampleData(na001); - var0.getSourceEntry(source.getFileId(), source.getStudyId()).addSampleData(na002); - var0.getSourceEntry(source.getFileId(), source.getStudyId()).addSampleData(na003); - var0.getSourceEntry(source.getFileId(), source.getStudyId()).addSampleData(na004); - var0.getSourceEntry(source.getFileId(), source.getStudyId()).addSampleData(na005); + var0.getSourceEntry(FILE_ID, STUDY_ID).addSampleData(na001); + var0.getSourceEntry(FILE_ID, STUDY_ID).addSampleData(na002); + var0.getSourceEntry(FILE_ID, STUDY_ID).addSampleData(na003); + var0.getSourceEntry(FILE_ID, STUDY_ID).addSampleData(na004); + var0.getSourceEntry(FILE_ID, STUDY_ID).addSampleData(na005); // Check proper conversion of samples - List result = factory.create(source, line); + List result = factory.create(FILE_ID, STUDY_ID, line); assertEquals(1, result.size()); Variant getVar0 = result.get(0); - assertEquals(var0.getSourceEntry(source.getFileId(), source.getStudyId()).getSamplesData(), - getVar0.getSourceEntry(source.getFileId(), source.getStudyId()).getSamplesData()); + assertEquals(var0.getSourceEntry(FILE_ID, STUDY_ID).getSamplesData(), + getVar0.getSourceEntry(FILE_ID, STUDY_ID).getSamplesData()); } @Test @@ -223,11 +213,11 @@ public void testCreateVariantFromVcfMultiallelicVariants_Samples() { // Initialize expected variants Variant var0 = new Variant("1", 123456, 123456, "T", "C"); - VariantSourceEntry file0 = new VariantSourceEntry(source.getFileId(), source.getStudyId()); + VariantSourceEntry file0 = new VariantSourceEntry(FILE_ID, STUDY_ID); var0.addSourceEntry(file0); Variant var1 = new Variant("1", 123456, 123456, "T", "G"); - VariantSourceEntry file1 = new VariantSourceEntry(source.getFileId(), source.getStudyId()); + VariantSourceEntry file1 = new VariantSourceEntry(FILE_ID, STUDY_ID); var1.addSourceEntry(file1); @@ -257,10 +247,10 @@ public void testCreateVariantFromVcfMultiallelicVariants_Samples() { na004_C.put("GQ", "99"); na004_C.put("PL", "162,0,180"); - var0.getSourceEntry(source.getFileId(), source.getStudyId()).addSampleData(na001_C); - var0.getSourceEntry(source.getFileId(), source.getStudyId()).addSampleData(na002_C); - var0.getSourceEntry(source.getFileId(), source.getStudyId()).addSampleData(na003_C); - var0.getSourceEntry(source.getFileId(), source.getStudyId()).addSampleData(na004_C); + var0.getSourceEntry(FILE_ID, STUDY_ID).addSampleData(na001_C); + var0.getSourceEntry(FILE_ID, STUDY_ID).addSampleData(na002_C); + var0.getSourceEntry(FILE_ID, STUDY_ID).addSampleData(na003_C); + var0.getSourceEntry(FILE_ID, STUDY_ID).addSampleData(na004_C); // Initialize expected samples in variant 2 (alt allele G) Map na001_G = new HashMap<>(); @@ -287,29 +277,27 @@ public void testCreateVariantFromVcfMultiallelicVariants_Samples() { na004_G.put("DP", "13"); na004_G.put("GQ", "99"); na004_G.put("PL", "162,0,180"); - var1.getSourceEntry(source.getFileId(), source.getStudyId()).addSampleData(na001_G); - var1.getSourceEntry(source.getFileId(), source.getStudyId()).addSampleData(na002_G); - var1.getSourceEntry(source.getFileId(), source.getStudyId()).addSampleData(na003_G); - var1.getSourceEntry(source.getFileId(), source.getStudyId()).addSampleData(na004_G); + var1.getSourceEntry(FILE_ID, STUDY_ID).addSampleData(na001_G); + var1.getSourceEntry(FILE_ID, STUDY_ID).addSampleData(na002_G); + var1.getSourceEntry(FILE_ID, STUDY_ID).addSampleData(na003_G); + var1.getSourceEntry(FILE_ID, STUDY_ID).addSampleData(na004_G); // Check proper conversion of samples and alternate alleles - List result = factory.create(source, line); + List result = factory.create(FILE_ID, STUDY_ID, line); assertEquals(2, result.size()); Variant getVar0 = result.get(0); assertEquals( - var0.getSourceEntry(source.getFileId(), source.getStudyId()).getSamplesData(), - getVar0.getSourceEntry(source.getFileId(), source.getStudyId()).getSamplesData()); - assertArrayEquals(new String[]{"G"}, - getVar0.getSourceEntry(source.getFileId(), source.getStudyId()).getSecondaryAlternates()); + var0.getSourceEntry(FILE_ID, STUDY_ID).getSamplesData(), + getVar0.getSourceEntry(FILE_ID, STUDY_ID).getSamplesData()); + assertArrayEquals(new String[]{"G"}, getVar0.getSourceEntry(FILE_ID, STUDY_ID).getSecondaryAlternates()); Variant getVar1 = result.get(1); assertEquals( - var1.getSourceEntry(source.getFileId(), source.getStudyId()).getSamplesData(), - getVar1.getSourceEntry(source.getFileId(), source.getStudyId()).getSamplesData()); - assertArrayEquals(new String[]{"C"}, - getVar1.getSourceEntry(source.getFileId(), source.getStudyId()).getSecondaryAlternates()); + var1.getSourceEntry(FILE_ID, STUDY_ID).getSamplesData(), + getVar1.getSourceEntry(FILE_ID, STUDY_ID).getSamplesData()); + assertArrayEquals(new String[]{"C"}, getVar1.getSourceEntry(FILE_ID, STUDY_ID).getSecondaryAlternates()); } @Test @@ -318,11 +306,11 @@ public void testCreateVariantFromVcfCoLocatedVariants_Samples() { // Initialize expected variants Variant var0 = new Variant("1", 10041, 10041 + "C".length() - 1, "T", "C"); - VariantSourceEntry file0 = new VariantSourceEntry(source.getFileId(), source.getStudyId()); + VariantSourceEntry file0 = new VariantSourceEntry(FILE_ID, STUDY_ID); var0.addSourceEntry(file0); Variant var1 = new Variant("1", 10050, 10050 + "GC".length() - 1, "T", "GC"); - VariantSourceEntry file1 = new VariantSourceEntry(source.getFileId(), source.getStudyId()); + VariantSourceEntry file1 = new VariantSourceEntry(FILE_ID, STUDY_ID); var1.addSourceEntry(file1); // Initialize expected samples in variant 1 (alt allele C) @@ -339,12 +327,12 @@ public void testCreateVariantFromVcfCoLocatedVariants_Samples() { Map na006_C = new HashMap<>(); na006_C.put("GT", "2/2"); - var0.getSourceEntry(source.getFileId(), source.getStudyId()).addSampleData(na001_C); - var0.getSourceEntry(source.getFileId(), source.getStudyId()).addSampleData(na002_C); - var0.getSourceEntry(source.getFileId(), source.getStudyId()).addSampleData(na003_C); - var0.getSourceEntry(source.getFileId(), source.getStudyId()).addSampleData(na004_C); - var0.getSourceEntry(source.getFileId(), source.getStudyId()).addSampleData(na005_C); - var0.getSourceEntry(source.getFileId(), source.getStudyId()).addSampleData(na006_C); + var0.getSourceEntry(FILE_ID, STUDY_ID).addSampleData(na001_C); + var0.getSourceEntry(FILE_ID, STUDY_ID).addSampleData(na002_C); + var0.getSourceEntry(FILE_ID, STUDY_ID).addSampleData(na003_C); + var0.getSourceEntry(FILE_ID, STUDY_ID).addSampleData(na004_C); + var0.getSourceEntry(FILE_ID, STUDY_ID).addSampleData(na005_C); + var0.getSourceEntry(FILE_ID, STUDY_ID).addSampleData(na006_C); // TODO Initialize expected samples in variant 2 (alt allele GC) Map na001_GC = new HashMap<>(); @@ -360,30 +348,30 @@ public void testCreateVariantFromVcfCoLocatedVariants_Samples() { Map na006_GC = new HashMap<>(); na006_GC.put("GT", "1/1"); - var1.getSourceEntry(source.getFileId(), source.getStudyId()).addSampleData(na001_GC); - var1.getSourceEntry(source.getFileId(), source.getStudyId()).addSampleData(na002_GC); - var1.getSourceEntry(source.getFileId(), source.getStudyId()).addSampleData(na003_GC); - var1.getSourceEntry(source.getFileId(), source.getStudyId()).addSampleData(na004_GC); - var1.getSourceEntry(source.getFileId(), source.getStudyId()).addSampleData(na005_GC); - var1.getSourceEntry(source.getFileId(), source.getStudyId()).addSampleData(na006_GC); + var1.getSourceEntry(FILE_ID, STUDY_ID).addSampleData(na001_GC); + var1.getSourceEntry(FILE_ID, STUDY_ID).addSampleData(na002_GC); + var1.getSourceEntry(FILE_ID, STUDY_ID).addSampleData(na003_GC); + var1.getSourceEntry(FILE_ID, STUDY_ID).addSampleData(na004_GC); + var1.getSourceEntry(FILE_ID, STUDY_ID).addSampleData(na005_GC); + var1.getSourceEntry(FILE_ID, STUDY_ID).addSampleData(na006_GC); // Check proper conversion of samples - List result = factory.create(source, line); + List result = factory.create(FILE_ID, STUDY_ID, line); assertEquals(2, result.size()); Variant getVar0 = result.get(0); assertEquals( - var0.getSourceEntry(source.getFileId(), source.getStudyId()).getSamplesData(), - getVar0.getSourceEntry(source.getFileId(), source.getStudyId()).getSamplesData()); + var0.getSourceEntry(FILE_ID, STUDY_ID).getSamplesData(), + getVar0.getSourceEntry(FILE_ID, STUDY_ID).getSamplesData()); assertArrayEquals(new String[]{"GC"}, - getVar0.getSourceEntry(source.getFileId(), source.getStudyId()).getSecondaryAlternates()); + getVar0.getSourceEntry(FILE_ID, STUDY_ID).getSecondaryAlternates()); Variant getVar1 = result.get(1); assertEquals( - var1.getSourceEntry(source.getFileId(), source.getStudyId()).getSamplesData(), - getVar1.getSourceEntry(source.getFileId(), source.getStudyId()).getSamplesData()); + var1.getSourceEntry(FILE_ID, STUDY_ID).getSamplesData(), + getVar1.getSourceEntry(FILE_ID, STUDY_ID).getSamplesData()); assertArrayEquals(new String[]{"C"}, - getVar1.getSourceEntry(source.getFileId(), source.getStudyId()).getSecondaryAlternates()); + getVar1.getSourceEntry(FILE_ID, STUDY_ID).getSecondaryAlternates()); } @Test @@ -392,7 +380,7 @@ public void testCreateVariantWithMissingGenotypes() { // Initialize expected variants Variant var0 = new Variant("1", 1407616, 1407616, "C", "G"); - VariantSourceEntry file0 = new VariantSourceEntry(source.getFileId(), source.getStudyId()); + VariantSourceEntry file0 = new VariantSourceEntry(FILE_ID, STUDY_ID); var0.addSourceEntry(file0); // Initialize expected samples @@ -421,18 +409,18 @@ public void testCreateVariantWithMissingGenotypes() { na004.put("GQ", "."); na004.put("PL", "."); - var0.getSourceEntry(source.getFileId(), source.getStudyId()).addSampleData(na001); - var0.getSourceEntry(source.getFileId(), source.getStudyId()).addSampleData(na002); - var0.getSourceEntry(source.getFileId(), source.getStudyId()).addSampleData(na003); - var0.getSourceEntry(source.getFileId(), source.getStudyId()).addSampleData(na004); + var0.getSourceEntry(FILE_ID, STUDY_ID).addSampleData(na001); + var0.getSourceEntry(FILE_ID, STUDY_ID).addSampleData(na002); + var0.getSourceEntry(FILE_ID, STUDY_ID).addSampleData(na003); + var0.getSourceEntry(FILE_ID, STUDY_ID).addSampleData(na004); // Check proper conversion of samples - List result = factory.create(source, line); + List result = factory.create(FILE_ID, STUDY_ID, line); assertEquals(1, result.size()); Variant getVar0 = result.get(0); - VariantSourceEntry getFile0 = getVar0.getSourceEntry(source.getFileId(), source.getStudyId()); + VariantSourceEntry getFile0 = getVar0.getSourceEntry(FILE_ID, STUDY_ID); Map na001Data = getFile0.getSampleData(0); assertEquals("./.", na001Data.get("GT")); @@ -470,11 +458,11 @@ public void testParseInfo() { // Initialize expected variants Variant var0 = new Variant("1", 123456, 123456, "T", "C"); - VariantSourceEntry file0 = new VariantSourceEntry(source.getFileId(), source.getStudyId()); + VariantSourceEntry file0 = new VariantSourceEntry(FILE_ID, STUDY_ID); var0.addSourceEntry(file0); Variant var1 = new Variant("1", 123456, 123456, "T", "G"); - VariantSourceEntry file1 = new VariantSourceEntry(source.getFileId(), source.getStudyId()); + VariantSourceEntry file1 = new VariantSourceEntry(FILE_ID, STUDY_ID); var1.addSourceEntry(file1); @@ -504,23 +492,23 @@ public void testParseInfo() { na004.put("GQ", "0"); na004.put("PL", "162,0,180"); - var0.getSourceEntry(source.getFileId(), source.getStudyId()).addSampleData(na001); - var0.getSourceEntry(source.getFileId(), source.getStudyId()).addSampleData(na002); - var0.getSourceEntry(source.getFileId(), source.getStudyId()).addSampleData(na003); - var0.getSourceEntry(source.getFileId(), source.getStudyId()).addSampleData(na004); + var0.getSourceEntry(FILE_ID, STUDY_ID).addSampleData(na001); + var0.getSourceEntry(FILE_ID, STUDY_ID).addSampleData(na002); + var0.getSourceEntry(FILE_ID, STUDY_ID).addSampleData(na003); + var0.getSourceEntry(FILE_ID, STUDY_ID).addSampleData(na004); - var1.getSourceEntry(source.getFileId(), source.getStudyId()).addSampleData(na001); - var1.getSourceEntry(source.getFileId(), source.getStudyId()).addSampleData(na002); - var1.getSourceEntry(source.getFileId(), source.getStudyId()).addSampleData(na003); - var1.getSourceEntry(source.getFileId(), source.getStudyId()).addSampleData(na004); + var1.getSourceEntry(FILE_ID, STUDY_ID).addSampleData(na001); + var1.getSourceEntry(FILE_ID, STUDY_ID).addSampleData(na002); + var1.getSourceEntry(FILE_ID, STUDY_ID).addSampleData(na003); + var1.getSourceEntry(FILE_ID, STUDY_ID).addSampleData(na004); // Check proper conversion of samples - List result = factory.create(source, line); + List result = factory.create(FILE_ID, STUDY_ID, line); assertEquals(2, result.size()); Variant getVar0 = result.get(0); - VariantSourceEntry getFile0 = getVar0.getSourceEntry(source.getFileId(), source.getStudyId()); + VariantSourceEntry getFile0 = getVar0.getSourceEntry(FILE_ID, STUDY_ID); assertEquals(4, Integer.parseInt(getFile0.getAttribute("NS"))); // assertEquals(2, Integer.parseInt(getFile0.getAttribute("AN"))); assertEquals(1, Integer.parseInt(getFile0.getAttribute("AC"))); @@ -530,7 +518,7 @@ public void testParseInfo() { assertEquals(1, Integer.parseInt(getFile0.getAttribute("MQ0"))); Variant getVar1 = result.get(1); - VariantSourceEntry getFile1 = getVar1.getSourceEntry(source.getFileId(), source.getStudyId()); + VariantSourceEntry getFile1 = getVar1.getSourceEntry(FILE_ID, STUDY_ID); assertEquals(4, Integer.parseInt(getFile1.getAttribute("NS"))); // assertEquals(2, Integer.parseInt(getFile1.getAttribute("AN"))); assertEquals(2, Integer.parseInt(getFile1.getAttribute("AC"))); @@ -548,7 +536,7 @@ public void testVariantIds() { List expResult = new LinkedList<>(); expResult.add(new Variant("1", 1000, 1000, "C", "T")); expResult.get(0).setIds(Collections.singleton("rs123")); - List result = factory.create(source, line); + List result = factory.create(FILE_ID, STUDY_ID, line); assertEquals(expResult, result); assertEquals(expResult.get(0).getIds(), result.get(0).getIds()); @@ -557,7 +545,7 @@ public void testVariantIds() { expResult = new LinkedList<>(); expResult.add(new Variant("1", 1000, 1000, "C", "T")); expResult.get(0).setIds(new HashSet<>(Arrays.asList("rs123", "rs456"))); - result = factory.create(source, line); + result = factory.create(FILE_ID, STUDY_ID, line); assertEquals(expResult, result); assertEquals(expResult.get(0).getIds(), result.get(0).getIds()); @@ -567,7 +555,7 @@ public void testVariantIds() { expResult.add(new Variant("1", 1000, 1000, "C", "T")); expResult.get(0).setIds( Collections.emptySet()); // note!: we store a "." as an empty set, not a set with an empty string - result = factory.create(source, line); + result = factory.create(FILE_ID, STUDY_ID, line); assertEquals(expResult, result); assertEquals(expResult.get(0).getIds().size(), result.get(0).getIds().size()); } diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/AggregatedVcfReaderTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/AggregatedVcfReaderTest.java index 7895cecd1..2fa826395 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/AggregatedVcfReaderTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/AggregatedVcfReaderTest.java @@ -1,15 +1,15 @@ package uk.ac.ebi.eva.pipeline.io.readers; +import org.junit.Rule; import org.junit.Test; import org.opencb.biodata.models.variant.VariantSource; -import org.opencb.biodata.models.variant.VariantStudy; import org.springframework.batch.item.ExecutionContext; import org.springframework.batch.test.MetaDataInstanceFactory; import uk.ac.ebi.eva.commons.models.data.Variant; import uk.ac.ebi.eva.commons.models.data.VariantSourceEntry; +import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; import uk.ac.ebi.eva.test.utils.JobTestUtils; -import uk.ac.ebi.eva.test.utils.TestFileUtils; import java.io.File; import java.io.FileInputStream; @@ -19,6 +19,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; +import static uk.ac.ebi.eva.utils.FileUtils.getResource; /** * {@link AggregatedVcfReader} @@ -31,13 +32,14 @@ public class AggregatedVcfReaderTest { private static final String STUDY_ID = "7"; - private static final String STUDY_NAME = "study name"; + private static final String INPUT_FILE_PATH = "/input-files/vcf/aggregated.vcf.gz"; - private static final String INPUT_FILE_PATH = "/aggregated.vcf.gz"; + private static final String INPUT_FILE_PATH_EXAC = "/input-files/vcf/aggregated.exac.vcf.gz"; - private static final String INPUT_FILE_PATH_EXAC = "/aggregated.exac.vcf.gz"; + private static final String INPUT_FILE_PATH_EVS = "/input-files/vcf/aggregated.evs.vcf.gz"; - private static final String INPUT_FILE_PATH_EVS = "/aggregated.evs.vcf.gz"; + @Rule + public PipelineTemporaryFolderRule temporaryFolderRule = new PipelineTemporaryFolderRule(); @Test public void shouldReadAllLines() throws Exception { @@ -60,13 +62,10 @@ private void shouldReadAllLinesHelper(VariantSource.Aggregation aggregationType, ExecutionContext executionContext = MetaDataInstanceFactory.createStepExecution().getExecutionContext(); // input vcf - File input = TestFileUtils.getResource(inputFilePath); + File input = getResource(inputFilePath); - VariantSource source = new VariantSource(input.getAbsolutePath(), FILE_ID, STUDY_ID, STUDY_NAME, - VariantStudy.StudyType.COLLECTION, - aggregationType); - - AggregatedVcfReader vcfReader = new AggregatedVcfReader(source, input); + AggregatedVcfReader vcfReader = new AggregatedVcfReader(FILE_ID, STUDY_ID, aggregationType, null, + input); vcfReader.setSaveState(false); vcfReader.open(executionContext); @@ -78,15 +77,12 @@ public void testUncompressedVcf() throws Exception { ExecutionContext executionContext = MetaDataInstanceFactory.createStepExecution().getExecutionContext(); // uncompress the input VCF into a temporal file - File input = TestFileUtils.getResource(INPUT_FILE_PATH); - File tempFile = JobTestUtils.createTempFile(); + File input = getResource(INPUT_FILE_PATH); + File tempFile = temporaryFolderRule.newFile(); JobTestUtils.uncompress(input.getAbsolutePath(), tempFile); - VariantSource source = new VariantSource(input.getAbsolutePath(), FILE_ID, STUDY_ID, STUDY_NAME, - VariantStudy.StudyType.COLLECTION, - VariantSource.Aggregation.BASIC); - - AggregatedVcfReader vcfReader = new AggregatedVcfReader(source, tempFile); + AggregatedVcfReader vcfReader = new AggregatedVcfReader(FILE_ID, STUDY_ID, VariantSource.Aggregation.BASIC, + null, tempFile); vcfReader.setSaveState(false); vcfReader.open(executionContext); diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/AnnotationFlatFileReaderTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/AnnotationFlatFileReaderTest.java index 7f838d012..3b3df64b9 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/AnnotationFlatFileReaderTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/AnnotationFlatFileReaderTest.java @@ -67,8 +67,8 @@ public void shouldReadAllLinesInVepOutput() throws Exception { assertEquals(count, consequenceTypeCount); // annotationFlatFileReader should get all the lines from the file - long actualCount = JobTestUtils.getLines(new GZIPInputStream(new FileInputStream(file))); - assertEquals(actualCount, count); + long expectedCount = JobTestUtils.getLines(new GZIPInputStream(new FileInputStream(file))); + assertEquals(expectedCount, count); } // Missing ':' in 20_63351 (should be 20:63351) diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/NonAnnotatedVariantsMongoReaderTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/NonAnnotatedVariantsMongoReaderTest.java index 99531cfd6..bcfd1adc4 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/NonAnnotatedVariantsMongoReaderTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/NonAnnotatedVariantsMongoReaderTest.java @@ -16,7 +16,6 @@ package uk.ac.ebi.eva.pipeline.io.readers; import com.mongodb.DBObject; -import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.runner.RunWith; @@ -24,15 +23,15 @@ import org.springframework.batch.test.MetaDataInstanceFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.data.mongodb.core.MongoOperations; +import org.springframework.data.mongodb.core.mapping.MongoMappingContext; import org.springframework.test.context.ActiveProfiles; import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.TestPropertySource; import org.springframework.test.context.junit4.SpringRunner; - +import uk.ac.ebi.eva.commons.models.converters.data.VariantToDBObjectConverter; +import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.configuration.MongoConfiguration; -import uk.ac.ebi.eva.pipeline.configuration.readers.NonAnnotatedVariantsMongoReaderConfiguration; -import uk.ac.ebi.eva.pipeline.parameters.JobOptions; -import uk.ac.ebi.eva.test.configuration.BaseTestConfiguration; +import uk.ac.ebi.eva.pipeline.parameters.MongoConnection; import uk.ac.ebi.eva.test.data.VariantData; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; @@ -48,50 +47,45 @@ * output: a DBObject each time `.read()` is called, with at least: chr, start, annot */ @RunWith(SpringRunner.class) -@ActiveProfiles("variant-annotation-mongo") -@TestPropertySource("classpath:annotation.properties") -@ContextConfiguration(classes = {NonAnnotatedVariantsMongoReaderConfiguration.class, BaseTestConfiguration.class}) +@ActiveProfiles(Application.VARIANT_ANNOTATION_MONGO_PROFILE) +@TestPropertySource({"classpath:test-mongo.properties"}) +@ContextConfiguration(classes = {MongoConnection.class, MongoMappingContext.class}) public class NonAnnotatedVariantsMongoReaderTest { - private static final String DOC_CHR = "chr"; - private static final String DOC_START = "start"; - private static final String DOC_ANNOT = "annot"; + private static final String COLLECTION_VARIANTS_NAME = "variants"; - @Rule - public TemporaryMongoRule mongoRule = new TemporaryMongoRule(); + private static final int EXPECTED_NON_ANNOTATED_VARIANTS = 1; @Autowired - private JobOptions jobOptions; + private MongoConnection mongoConnection; @Autowired - private MongoConfiguration mongoConfiguration; + private MongoMappingContext mongoMappingContext; - @Before - public void setUp() throws Exception { - jobOptions.loadArgs(); - } + @Rule + public TemporaryMongoRule mongoRule = new TemporaryMongoRule(); @Test public void shouldReadVariantsWithoutAnnotationField() throws Exception { ExecutionContext executionContext = MetaDataInstanceFactory.createStepExecution().getExecutionContext(); - String databaseName = insertDocuments(jobOptions.getDbCollectionsVariantsName()); + String databaseName = insertDocuments(COLLECTION_VARIANTS_NAME); - MongoOperations mongoOperations = mongoConfiguration.getMongoOperations(databaseName, - jobOptions.getMongoConnection()); + MongoOperations mongoOperations = MongoConfiguration.getMongoOperations(databaseName, mongoConnection, + mongoMappingContext); NonAnnotatedVariantsMongoReader mongoItemReader = new NonAnnotatedVariantsMongoReader( - mongoOperations, jobOptions.getDbCollectionsVariantsName()); + mongoOperations, COLLECTION_VARIANTS_NAME); mongoItemReader.open(executionContext); int itemCount = 0; - DBObject doc; - while ((doc = mongoItemReader.read()) != null) { + DBObject variantMongoDocument; + while ((variantMongoDocument = mongoItemReader.read()) != null) { itemCount++; - assertTrue(doc.containsField(DOC_CHR)); - assertTrue(doc.containsField(DOC_START)); - assertFalse(doc.containsField(DOC_ANNOT)); + assertTrue(variantMongoDocument.containsField(VariantToDBObjectConverter.CHROMOSOME_FIELD)); + assertTrue(variantMongoDocument.containsField(VariantToDBObjectConverter.START_FIELD)); + assertFalse(variantMongoDocument.containsField(VariantToDBObjectConverter.ANNOTATION_FIELD)); } - assertEquals(itemCount, 1); + assertEquals(EXPECTED_NON_ANNOTATED_VARIANTS, itemCount); mongoItemReader.close(); } diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/PedReaderTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/PedReaderTest.java new file mode 100644 index 000000000..37dd0f766 --- /dev/null +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/PedReaderTest.java @@ -0,0 +1,75 @@ +/* + * Copyright 2016 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.pipeline.io.readers; + +import com.google.common.collect.Sets; +import org.junit.Test; +import org.opencb.biodata.models.pedigree.Condition; +import org.opencb.biodata.models.pedigree.Individual; +import org.opencb.biodata.models.pedigree.Pedigree; +import org.opencb.biodata.models.pedigree.Sex; + +import java.util.stream.Collectors; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static uk.ac.ebi.eva.utils.FileUtils.getResource; + +/** + * Test for {@link PedReader} + *

+ * input: a pedigree file + * output: a Pedigree when method `.read()` is called. + */ +public class PedReaderTest { + private static final String PEDIGREE_FILE = "/input-files/ped/pedigree-test-file.ped"; + + private static final String MALFORMED_PEDIGREE = "/input-files/ped/malformed-pedigree-test-file.ped"; + + @Test + public void wholePedFileShouldBeParsedIntoPedigree() throws Exception { + String pedigreePath = getResource(PEDIGREE_FILE).getAbsolutePath(); + PedReader pedReader = new PedReader(pedigreePath); + pedReader.open(null); + Pedigree pedigree = pedReader.read(); + + //check that Pedigree.Individuals is correctly populated + assertEquals(4, pedigree.getIndividuals().size()); + Individual individualNA19660 = pedigree.getIndividuals().get("NA19660"); + assertTrue(individualNA19660.getFamily().equals("FAM")); + assertTrue(individualNA19660.getSex().equals("2")); + assertEquals(Sex.FEMALE, individualNA19660.getSexCode()); + assertTrue(individualNA19660.getPhenotype().equals("1")); + assertEquals(Condition.UNAFFECTED, individualNA19660.getCondition()); + assertEquals(2, individualNA19660.getChildren().size()); + assertEquals(Sets.newHashSet("NA19600", "NA19685"), + individualNA19660.getChildren().stream().map(Individual::getId).collect(Collectors.toSet())); + + //check that Pedigree.Families is correctly populated + assertEquals(1, pedigree.getFamilies().size()); + assertTrue(pedigree.getFamilies().containsKey("FAM")); + assertEquals(4, pedigree.getFamilies().get("FAM").size()); + } + + @Test(expected = IllegalArgumentException.class) + public void missingLastColumnInPedFileShouldThrowsException() throws Exception { + String pedigreePath = getResource(MALFORMED_PEDIGREE).getAbsolutePath(); + PedReader pedReader = new PedReader(pedigreePath); + pedReader.open(null); + pedReader.read(); + } + +} diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/UnwindingItemReaderTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/UnwindingItemReaderTest.java index 3d7c2444d..0f5311e33 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/UnwindingItemReaderTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/UnwindingItemReaderTest.java @@ -18,8 +18,6 @@ import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; -import org.opencb.biodata.models.variant.VariantSource; -import org.opencb.biodata.models.variant.VariantStudy; import org.springframework.batch.item.ExecutionContext; import org.springframework.batch.item.ItemReader; import org.springframework.batch.item.file.FlatFileParseException; @@ -27,8 +25,8 @@ import uk.ac.ebi.eva.commons.models.data.Variant; import uk.ac.ebi.eva.commons.models.data.VariantSourceEntry; +import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; import uk.ac.ebi.eva.test.utils.JobTestUtils; -import uk.ac.ebi.eva.test.utils.TestFileUtils; import java.io.File; import java.io.FileInputStream; @@ -37,34 +35,32 @@ import static org.hamcrest.Matchers.lessThanOrEqualTo; import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; +import static uk.ac.ebi.eva.utils.FileUtils.getResource; public class UnwindingItemReaderTest { @Rule public ExpectedException exception = ExpectedException.none(); - private static final String INPUT_FILE_PATH = "/small20.vcf.gz"; + @Rule + public PipelineTemporaryFolderRule temporaryFolderRule = new PipelineTemporaryFolderRule(); + + private static final String INPUT_FILE_PATH = "/input-files/vcf/genotyped.vcf.gz"; - private static final String INPUT_WRONG_FILE_PATH = "/wrong_no_alt.vcf.gz"; + private static final String INPUT_WRONG_FILE_PATH = "/input-files/vcf/wrong_no_alt.vcf.gz"; private static final String FILE_ID = "5"; private static final String STUDY_ID = "7"; - private static final String STUDY_NAME = "study name"; - @Test public void shouldReadAllLines() throws Exception { ExecutionContext executionContext = MetaDataInstanceFactory.createStepExecution().getExecutionContext(); // input vcf - File input = TestFileUtils.getResource(INPUT_FILE_PATH); - - VariantSource source = new VariantSource(input.getAbsolutePath(), FILE_ID, STUDY_ID, STUDY_NAME, - VariantStudy.StudyType.COLLECTION, - VariantSource.Aggregation.NONE); + File input = getResource(INPUT_FILE_PATH); - VcfReader vcfReader = new VcfReader(source, input); + VcfReader vcfReader = new VcfReader(FILE_ID, STUDY_ID, input); vcfReader.setSaveState(false); vcfReader.open(executionContext); @@ -76,13 +72,9 @@ public void invalidFileShouldFail() throws Exception { ExecutionContext executionContext = MetaDataInstanceFactory.createStepExecution().getExecutionContext(); // input vcf - File input = TestFileUtils.getResource(INPUT_WRONG_FILE_PATH); + File input = getResource(INPUT_WRONG_FILE_PATH); - VariantSource source = new VariantSource(input.getAbsolutePath(), FILE_ID, STUDY_ID, STUDY_NAME, - VariantStudy.StudyType.COLLECTION, - VariantSource.Aggregation.NONE); - - VcfReader vcfReader = new VcfReader(source, input); + VcfReader vcfReader = new VcfReader(FILE_ID, STUDY_ID, input); vcfReader.setSaveState(false); vcfReader.open(executionContext); @@ -99,16 +91,11 @@ public void testUncompressedVcf() throws Exception { ExecutionContext executionContext = MetaDataInstanceFactory.createStepExecution().getExecutionContext(); // uncompress the input VCF into a temporary file - File input = TestFileUtils.getResource(INPUT_FILE_PATH); - File tempFile = JobTestUtils.createTempFile(); // TODO replace with temporary rules + File input = getResource(INPUT_FILE_PATH); + File tempFile = temporaryFolderRule.newFile(); JobTestUtils.uncompress(input.getAbsolutePath(), tempFile); - VariantSource source = new VariantSource(input.getAbsolutePath(), FILE_ID, STUDY_ID, STUDY_NAME, - VariantStudy.StudyType.COLLECTION, - VariantSource.Aggregation.NONE); - - - VcfReader vcfReader = new VcfReader(source, tempFile); + VcfReader vcfReader = new VcfReader(FILE_ID, STUDY_ID, tempFile); vcfReader.setSaveState(false); vcfReader.open(executionContext); diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/UnwindingItemStreamReaderTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/UnwindingItemStreamReaderTest.java index 78061e798..7a6dff7bc 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/UnwindingItemStreamReaderTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/UnwindingItemStreamReaderTest.java @@ -18,8 +18,6 @@ import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; -import org.opencb.biodata.models.variant.VariantSource; -import org.opencb.biodata.models.variant.VariantStudy; import org.springframework.batch.item.ExecutionContext; import org.springframework.batch.item.ItemReader; import org.springframework.batch.item.file.FlatFileParseException; @@ -27,8 +25,8 @@ import uk.ac.ebi.eva.commons.models.data.Variant; import uk.ac.ebi.eva.commons.models.data.VariantSourceEntry; +import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; import uk.ac.ebi.eva.test.utils.JobTestUtils; -import uk.ac.ebi.eva.test.utils.TestFileUtils; import java.io.File; import java.io.FileInputStream; @@ -37,35 +35,32 @@ import static org.hamcrest.Matchers.lessThanOrEqualTo; import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; +import static uk.ac.ebi.eva.utils.FileUtils.getResource; public class UnwindingItemStreamReaderTest { @Rule public ExpectedException exception = ExpectedException.none(); - private static final String INPUT_FILE_PATH = "/small20.vcf.gz"; + @Rule + public PipelineTemporaryFolderRule temporaryFolderRule = new PipelineTemporaryFolderRule(); + + private static final String INPUT_FILE_PATH = "/input-files/vcf/genotyped.vcf.gz"; - private static final String INPUT_WRONG_FILE_PATH = "/wrong_no_alt.vcf.gz"; + private static final String INPUT_WRONG_FILE_PATH = "/input-files/vcf/wrong_no_alt.vcf.gz"; private static final String FILE_ID = "5"; private static final String STUDY_ID = "7"; - private static final String STUDY_NAME = "study name"; - @Test public void shouldReadAllLines() throws Exception { ExecutionContext executionContext = MetaDataInstanceFactory.createStepExecution().getExecutionContext(); // input vcf - File input = TestFileUtils.getResource(INPUT_FILE_PATH); - - VariantSource source = new VariantSource(input.getAbsolutePath(), FILE_ID, STUDY_ID, STUDY_NAME, - VariantStudy.StudyType.COLLECTION, - VariantSource.Aggregation.NONE); + File input = getResource(INPUT_FILE_PATH); - - VcfReader vcfReader = new VcfReader(source, input); + VcfReader vcfReader = new VcfReader(FILE_ID, STUDY_ID, input); vcfReader.setSaveState(false); UnwindingItemStreamReader unwindingItemStreamReader = new UnwindingItemStreamReader<>(vcfReader); @@ -79,13 +74,9 @@ public void invalidFileShouldFail() throws Exception { ExecutionContext executionContext = MetaDataInstanceFactory.createStepExecution().getExecutionContext(); // input vcf - File input = TestFileUtils.getResource(INPUT_WRONG_FILE_PATH); - - VariantSource source = new VariantSource(input.getAbsolutePath(), FILE_ID, STUDY_ID, STUDY_NAME, - VariantStudy.StudyType.COLLECTION, - VariantSource.Aggregation.NONE); + File input = getResource(INPUT_WRONG_FILE_PATH); - VcfReader vcfReader = new VcfReader(source, input); + VcfReader vcfReader = new VcfReader(FILE_ID, STUDY_ID, input); vcfReader.setSaveState(false); UnwindingItemStreamReader unwindingItemStreamReader = new UnwindingItemStreamReader<>(vcfReader); @@ -102,15 +93,11 @@ public void testUncompressedVcf() throws Exception { ExecutionContext executionContext = MetaDataInstanceFactory.createStepExecution().getExecutionContext(); // uncompress the input VCF into a temporary file - File input = TestFileUtils.getResource(INPUT_FILE_PATH); - File tempFile = JobTestUtils.createTempFile(); // TODO replace with temporary rules + File input = getResource(INPUT_FILE_PATH); + File tempFile = temporaryFolderRule.newFile(); JobTestUtils.uncompress(input.getAbsolutePath(), tempFile); - VariantSource source = new VariantSource(input.getAbsolutePath(), FILE_ID, STUDY_ID, STUDY_NAME, - VariantStudy.StudyType.COLLECTION, - VariantSource.Aggregation.NONE); - - VcfReader vcfReader = new VcfReader(source, tempFile); + VcfReader vcfReader = new VcfReader(FILE_ID, STUDY_ID, tempFile); vcfReader.setSaveState(false); UnwindingItemStreamReader unwindingItemStreamReader = new UnwindingItemStreamReader<>(vcfReader); diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/VcfHeaderReaderTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/VcfHeaderReaderTest.java index ff72b68ef..a0c560304 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/VcfHeaderReaderTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/VcfHeaderReaderTest.java @@ -1,16 +1,17 @@ package uk.ac.ebi.eva.pipeline.io.readers; +import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectWriter; import com.mongodb.BasicDBObject; import com.mongodb.util.JSON; +import org.junit.Rule; import org.junit.Test; import org.opencb.biodata.models.variant.VariantSource; import org.opencb.biodata.models.variant.VariantStudy; - import uk.ac.ebi.eva.commons.models.data.VariantSourceEntity; +import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; import uk.ac.ebi.eva.test.utils.JobTestUtils; -import uk.ac.ebi.eva.test.utils.TestFileUtils; import java.io.File; import java.util.Arrays; @@ -22,6 +23,7 @@ import static org.junit.Assert.assertTrue; import static uk.ac.ebi.eva.test.utils.JobTestUtils.checkFieldsInsideList; import static uk.ac.ebi.eva.test.utils.JobTestUtils.checkStringInsideList; +import static uk.ac.ebi.eva.utils.FileUtils.getResource; /** * {@link VcfHeaderReader} @@ -32,7 +34,7 @@ */ public class VcfHeaderReaderTest { - private static final String INPUT_FILE_PATH = "/small20.vcf.gz"; + private static final String INPUT_FILE_PATH = "/input-files/vcf/genotyped.vcf.gz"; private static final String FILE_ID = "5"; @@ -40,17 +42,21 @@ public class VcfHeaderReaderTest { private static final String STUDY_NAME = "study name"; - private static final String INPUT_AGGREGATED_FILE_PATH = "/aggregated.vcf.gz"; + private static final String INPUT_AGGREGATED_FILE_PATH = "/input-files/vcf/aggregated.vcf.gz"; + + @Rule + public PipelineTemporaryFolderRule temporaryFolderRule = new PipelineTemporaryFolderRule(); @Test public void testRead() throws Exception { - File input = TestFileUtils.getResource(INPUT_FILE_PATH); + File input = getResource(INPUT_FILE_PATH); VariantStudy.StudyType studyType = VariantStudy.StudyType.COLLECTION; VariantSource.Aggregation aggregation = VariantSource.Aggregation.NONE; VcfHeaderReader headerReader = new VcfHeaderReader(input, FILE_ID, STUDY_ID, STUDY_NAME, - studyType, aggregation); + studyType, aggregation); + headerReader.open(null); VariantSourceEntity source = headerReader.read(); assertEquals(FILE_ID, source.getFileId()); @@ -80,24 +86,17 @@ public void testRead() throws Exception { */ @Test public void testConversion() throws Exception { - File input = TestFileUtils.getResource(INPUT_FILE_PATH); + File input = getResource(INPUT_FILE_PATH); VcfHeaderReader headerReader = new VcfHeaderReader(input, FILE_ID, STUDY_ID, STUDY_NAME, - VariantStudy.StudyType.COLLECTION, - VariantSource.Aggregation.NONE); + VariantStudy.StudyType.COLLECTION, + VariantSource.Aggregation.NONE); + headerReader.open(null); VariantSourceEntity source = headerReader.read(); - char CHARACTER_TO_REPLACE_DOTS = (char) 163; Map meta = source.getMetadata(); - BasicDBObject metadataMongo = new BasicDBObject(); - for (Map.Entry metaEntry : meta.entrySet()) { - ObjectMapper mapper = new ObjectMapper(); - ObjectWriter writer = mapper.writer(); - String key = metaEntry.getKey().replace('.', CHARACTER_TO_REPLACE_DOTS); - Object value = metaEntry.getValue(); - String jsonString = writer.writeValueAsString(value); - metadataMongo.append(key, JSON.parse(jsonString)); - } + BasicDBObject metadataMongo = mapMetadataToDBObject(meta); + checkFieldsInsideList(metadataMongo, "INFO", Arrays.asList("id", "description", "number", "type")); checkFieldsInsideList(metadataMongo, "FORMAT", Arrays.asList("id", "description", "number", "type")); checkFieldsInsideList(metadataMongo, "ALT", Arrays.asList("id", "description")); @@ -108,17 +107,25 @@ public void testConversion() throws Exception { @Test public void testConversionAggregated() throws Exception { // uncompress the input VCF into a temporal file - File input = TestFileUtils.getResource(INPUT_AGGREGATED_FILE_PATH); - File tempFile = JobTestUtils.createTempFile(); // TODO replace with temporary rules + File input = getResource(INPUT_AGGREGATED_FILE_PATH); + File tempFile = temporaryFolderRule.newFile(); JobTestUtils.uncompress(input.getAbsolutePath(), tempFile); VcfHeaderReader headerReader = new VcfHeaderReader(input, FILE_ID, STUDY_ID, STUDY_NAME, - VariantStudy.StudyType.COLLECTION, - VariantSource.Aggregation.NONE); + VariantStudy.StudyType.COLLECTION, + VariantSource.Aggregation.NONE); + headerReader.open(null); VariantSourceEntity source = headerReader.read(); - char CHARACTER_TO_REPLACE_DOTS = (char) 163; Map meta = source.getMetadata(); + BasicDBObject metadataMongo = mapMetadataToDBObject(meta); + + checkFieldsInsideList(metadataMongo, "INFO", Arrays.asList("id", "description", "number", "type")); + checkStringInsideList(metadataMongo, "contig"); + } + + private BasicDBObject mapMetadataToDBObject(Map meta) throws JsonProcessingException { + char CHARACTER_TO_REPLACE_DOTS = (char) 163; BasicDBObject metadataMongo = new BasicDBObject(); for (Map.Entry metaEntry : meta.entrySet()) { ObjectMapper mapper = new ObjectMapper(); @@ -128,9 +135,7 @@ public void testConversionAggregated() throws Exception { String jsonString = writer.writeValueAsString(value); metadataMongo.append(key, JSON.parse(jsonString)); } - - checkFieldsInsideList(metadataMongo, "INFO", Arrays.asList("id", "description", "number", "type")); - checkStringInsideList(metadataMongo, "contig"); + return metadataMongo; } } diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/VcfReaderTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/VcfReaderTest.java index 1b0be8cb3..cf8f11661 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/VcfReaderTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/readers/VcfReaderTest.java @@ -3,16 +3,13 @@ import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; -import org.opencb.biodata.models.variant.VariantSource; -import org.opencb.biodata.models.variant.VariantStudy; import org.springframework.batch.item.ExecutionContext; import org.springframework.batch.item.file.FlatFileParseException; import org.springframework.batch.test.MetaDataInstanceFactory; - import uk.ac.ebi.eva.commons.models.data.Variant; import uk.ac.ebi.eva.commons.models.data.VariantSourceEntry; +import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; import uk.ac.ebi.eva.test.utils.JobTestUtils; -import uk.ac.ebi.eva.test.utils.TestFileUtils; import java.io.File; import java.io.FileInputStream; @@ -21,6 +18,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +import static uk.ac.ebi.eva.utils.FileUtils.getResource; /** * {@link VcfReader} @@ -34,28 +32,25 @@ public class VcfReaderTest { @Rule public ExpectedException exception = ExpectedException.none(); - private static final String INPUT_FILE_PATH = "/small20.vcf.gz"; + private static final String INPUT_FILE_PATH = "/input-files/vcf/genotyped.vcf.gz"; - private static final String INPUT_WRONG_FILE_PATH = "/wrong_no_alt.vcf.gz"; + private static final String INPUT_WRONG_FILE_PATH = "/input-files/vcf/wrong_no_alt.vcf.gz"; private static final String FILE_ID = "5"; private static final String STUDY_ID = "7"; - private static final String STUDY_NAME = "study name"; + @Rule + public PipelineTemporaryFolderRule temporaryFolderRule = new PipelineTemporaryFolderRule(); @Test public void shouldReadAllLines() throws Exception { ExecutionContext executionContext = MetaDataInstanceFactory.createStepExecution().getExecutionContext(); // input vcf - File input = TestFileUtils.getResource(INPUT_FILE_PATH); - - VariantSource source = new VariantSource(input.getAbsolutePath(), FILE_ID, STUDY_ID, STUDY_NAME, - VariantStudy.StudyType.COLLECTION, - VariantSource.Aggregation.NONE); + File input = getResource(INPUT_FILE_PATH); - VcfReader vcfReader = new VcfReader(source, input); + VcfReader vcfReader = new VcfReader(FILE_ID, STUDY_ID, input); vcfReader.setSaveState(false); vcfReader.open(executionContext); @@ -67,13 +62,9 @@ public void invalidFileShouldFail() throws Exception { ExecutionContext executionContext = MetaDataInstanceFactory.createStepExecution().getExecutionContext(); // input vcf - File input = TestFileUtils.getResource(INPUT_WRONG_FILE_PATH); + File input = getResource(INPUT_WRONG_FILE_PATH); - VariantSource source = new VariantSource(input.getAbsolutePath(), FILE_ID, STUDY_ID, STUDY_NAME, - VariantStudy.StudyType.COLLECTION, - VariantSource.Aggregation.NONE); - - VcfReader vcfReader = new VcfReader(source, input); + VcfReader vcfReader = new VcfReader(FILE_ID, STUDY_ID, input); vcfReader.setSaveState(false); vcfReader.open(executionContext); @@ -88,15 +79,11 @@ public void testUncompressedVcf() throws Exception { ExecutionContext executionContext = MetaDataInstanceFactory.createStepExecution().getExecutionContext(); // uncompress the input VCF into a temporary file - File input = TestFileUtils.getResource(INPUT_FILE_PATH); - File tempFile = JobTestUtils.createTempFile(); // TODO replace with temporary rules + File input = getResource(INPUT_FILE_PATH); + File tempFile = temporaryFolderRule.newFile(); JobTestUtils.uncompress(input.getAbsolutePath(), tempFile); - VariantSource source = new VariantSource(input.getAbsolutePath(), FILE_ID, STUDY_ID, STUDY_NAME, - VariantStudy.StudyType.COLLECTION, - VariantSource.Aggregation.NONE); - - VcfReader vcfReader = new VcfReader(source, tempFile); + VcfReader vcfReader = new VcfReader(FILE_ID, STUDY_ID, tempFile); vcfReader.setSaveState(false); vcfReader.open(executionContext); diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/GeneWriterTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/GeneWriterTest.java index 1eba220f7..3c2fd5b63 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/GeneWriterTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/GeneWriterTest.java @@ -24,16 +24,14 @@ import org.junit.runner.RunWith; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.data.mongodb.core.MongoOperations; +import org.springframework.data.mongodb.core.mapping.MongoMappingContext; import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.TestPropertySource; import org.springframework.test.context.junit4.SpringRunner; - import uk.ac.ebi.eva.pipeline.configuration.MongoConfiguration; -import uk.ac.ebi.eva.pipeline.configuration.writers.GeneWriterConfiguration; import uk.ac.ebi.eva.pipeline.io.mappers.GeneLineMapper; import uk.ac.ebi.eva.pipeline.model.FeatureCoordinates; -import uk.ac.ebi.eva.pipeline.parameters.JobOptions; -import uk.ac.ebi.eva.test.configuration.BaseTestConfiguration; +import uk.ac.ebi.eva.pipeline.parameters.MongoConnection; import uk.ac.ebi.eva.test.data.GtfStaticTestData; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; @@ -49,27 +47,28 @@ * output: the FeatureCoordinates get written in mongo, with at least: chromosome, start and end. */ @RunWith(SpringRunner.class) -@TestPropertySource({"classpath:initialize-database.properties"}) -@ContextConfiguration(classes = {BaseTestConfiguration.class, GeneWriterConfiguration.class}) +@TestPropertySource({"classpath:test-mongo.properties"}) +@ContextConfiguration(classes = {MongoConnection.class, MongoMappingContext.class}) public class GeneWriterTest { - @Rule - public TemporaryMongoRule mongoRule = new TemporaryMongoRule(); + private static final String COLLECTION_FEATURES_NAME = "features"; @Autowired - private JobOptions jobOptions; + private MongoConnection mongoConnection; @Autowired - private MongoConfiguration mongoConfiguration; + private MongoMappingContext mongoMappingContext; + + @Rule + public TemporaryMongoRule mongoRule = new TemporaryMongoRule(); @Test public void shouldWriteAllFieldsIntoMongoDb() throws Exception { String databaseName = mongoRule.getRandomTemporaryDatabaseName(); - MongoOperations mongoOperations = mongoConfiguration.getMongoOperations(databaseName, - jobOptions.getMongoConnection()); + MongoOperations mongoOperations = MongoConfiguration.getMongoOperations(databaseName, mongoConnection, mongoMappingContext); - GeneWriter geneWriter = new GeneWriter(mongoOperations, jobOptions.getDbCollectionsFeaturesName()); + GeneWriter geneWriter = new GeneWriter(mongoOperations, COLLECTION_FEATURES_NAME); GeneLineMapper lineMapper = new GeneLineMapper(); List genes = new ArrayList<>(); @@ -80,7 +79,7 @@ public void shouldWriteAllFieldsIntoMongoDb() throws Exception { } geneWriter.write(genes); - DBCollection genesCollection = mongoRule.getCollection(databaseName, jobOptions.getDbCollectionsFeaturesName()); + DBCollection genesCollection = mongoRule.getCollection(databaseName, COLLECTION_FEATURES_NAME); // count documents in DB and check they have region (chr + start + end) DBCursor cursor = genesCollection.find(); diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/StatisticsMongoWriterTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/StatisticsMongoWriterTest.java index 3dd363add..4815053f9 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/StatisticsMongoWriterTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/StatisticsMongoWriterTest.java @@ -26,14 +26,13 @@ import org.springframework.batch.item.file.mapping.JsonLineMapper; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.data.mongodb.core.MongoOperations; +import org.springframework.data.mongodb.core.mapping.MongoMappingContext; import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.TestPropertySource; import org.springframework.test.context.junit4.SpringRunner; - import uk.ac.ebi.eva.pipeline.configuration.MongoConfiguration; import uk.ac.ebi.eva.pipeline.model.PopulationStatistics; -import uk.ac.ebi.eva.pipeline.parameters.JobOptions; -import uk.ac.ebi.eva.test.configuration.BaseTestConfiguration; +import uk.ac.ebi.eva.pipeline.parameters.MongoConnection; import uk.ac.ebi.eva.test.data.VariantData; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; @@ -51,23 +50,25 @@ * {@link StatisticsMongoWriter} * input: a List of {@link PopulationStatistics} to each call of `.write()` * output: the FeatureCoordinates get written in mongo, with at least: chromosome, start and end. - * + *

* TODO Replace MongoDBHelper with StatisticsMongoWriterConfiguration in ContextConfiguration when the class exists */ @RunWith(SpringRunner.class) -@TestPropertySource({"classpath:common-configuration.properties"}) -@ContextConfiguration(classes = {BaseTestConfiguration.class, MongoConfiguration.class}) +@TestPropertySource({"classpath:test-mongo.properties"}) +@ContextConfiguration(classes = {MongoConnection.class, MongoMappingContext.class}) public class StatisticsMongoWriterTest { + private static final String COLLECTION_STATS_NAME = "populationStatistics"; + @Autowired - private MongoConfiguration mongoConfiguration; + private MongoConnection mongoConnection; + + @Autowired + private MongoMappingContext mongoMappingContext; @Rule public TemporaryMongoRule mongoRule = new TemporaryMongoRule(); - @Autowired - private JobOptions jobOptions; - @Test public void shouldWriteAllFieldsIntoMongoDb() throws Exception { List populationStatisticsList = buildPopulationStatsList(); @@ -75,13 +76,13 @@ public void shouldWriteAllFieldsIntoMongoDb() throws Exception { String databaseName = mongoRule.getRandomTemporaryDatabaseName(); StatisticsMongoWriter statisticsMongoWriter = getStatisticsMongoWriter(databaseName); - int n = 1; - for (int i = 0; i < n; i++) { + int expectedDocumentsCount = 1; + for (int i = 0; i < expectedDocumentsCount; i++) { statisticsMongoWriter.write(populationStatisticsList); } // do the checks - DBCollection statsCollection = mongoRule.getCollection(databaseName, jobOptions.getDbCollectionsStatsName()); + DBCollection statsCollection = mongoRule.getCollection(databaseName, COLLECTION_STATS_NAME); // count documents in DB and check they have at least the index fields (vid, sid, cid) and maf and genotypeCount DBCursor cursor = statsCollection.find(); @@ -99,7 +100,7 @@ public void shouldWriteAllFieldsIntoMongoDb() throws Exception { assertNotNull(next.get("maf")); assertNotNull(next.get("numGt")); } - assertEquals(n, count); + assertEquals(expectedDocumentsCount, count); } @Test @@ -111,7 +112,7 @@ public void shouldCreateIndexesInCollection() throws Exception { statisticsMongoWriter.write(populationStatisticsList); // do the checks - DBCollection statsCollection = mongoRule.getCollection(databaseName, jobOptions.getDbCollectionsStatsName()); + DBCollection statsCollection = mongoRule.getCollection(databaseName, COLLECTION_STATS_NAME); // check there is an index in chr + start + ref + alt + sid + cid List indexes = new ArrayList<>(); @@ -168,10 +169,9 @@ private List buildPopulationStatsList() throws Exception { } public StatisticsMongoWriter getStatisticsMongoWriter(String databaseName) throws UnknownHostException { - MongoOperations operations = mongoConfiguration.getMongoOperations( - databaseName, jobOptions.getMongoConnection()); - StatisticsMongoWriter statisticsMongoWriter = new StatisticsMongoWriter( - operations, jobOptions.getDbCollectionsStatsName()); + MongoOperations operations = MongoConfiguration.getMongoOperations(databaseName, mongoConnection, + mongoMappingContext); + StatisticsMongoWriter statisticsMongoWriter = new StatisticsMongoWriter(operations, COLLECTION_STATS_NAME); return statisticsMongoWriter; } } diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VariantMongoWriterTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VariantMongoWriterTest.java index be6342e5a..4941aad5c 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VariantMongoWriterTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VariantMongoWriterTest.java @@ -19,20 +19,20 @@ import com.mongodb.BulkWriteException; import com.mongodb.DBCollection; import com.mongodb.DBObject; - import org.junit.Rule; import org.junit.Test; import org.junit.runner.RunWith; import org.mockito.Mockito; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.data.mongodb.core.MongoOperations; +import org.springframework.data.mongodb.core.mapping.MongoMappingContext; import org.springframework.test.context.ContextConfiguration; +import org.springframework.test.context.TestPropertySource; import org.springframework.test.context.junit4.SpringRunner; - import uk.ac.ebi.eva.commons.models.data.Variant; import uk.ac.ebi.eva.pipeline.configuration.MongoConfiguration; -import uk.ac.ebi.eva.pipeline.configuration.writers.VariantWriterConfiguration; import uk.ac.ebi.eva.pipeline.model.converters.data.VariantToMongoDbObjectConverter; +import uk.ac.ebi.eva.pipeline.parameters.MongoConnection; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; import uk.ac.ebi.eva.utils.MongoDBHelper; @@ -55,26 +55,31 @@ * Testing {@link VariantMongoWriter} */ @RunWith(SpringRunner.class) -@ContextConfiguration(classes = { VariantWriterConfiguration.class }) +@TestPropertySource({"classpath:test-mongo.properties"}) +@ContextConfiguration(classes = {MongoConnection.class, MongoMappingContext.class}) public class VariantMongoWriterTest { - @Autowired - private MongoConfiguration mongoConfiguration; - private static final List EMPTY_LIST = new ArrayList<>(); private VariantToMongoDbObjectConverter variantToMongoDbObjectConverter = - Mockito.mock(VariantToMongoDbObjectConverter.class); + Mockito.mock(VariantToMongoDbObjectConverter.class); private final String collectionName = "variants"; + @Autowired + private MongoConnection mongoConnection; + + @Autowired + private MongoMappingContext mongoMappingContext; + @Rule public TemporaryMongoRule mongoRule = new TemporaryMongoRule(); @Test public void noVariantsNothingShouldBeWritten() throws UnknownHostException { String dbName = mongoRule.getRandomTemporaryDatabaseName(); - MongoOperations mongoOperations = mongoConfiguration.getDefaultMongoOperations(dbName); + MongoOperations mongoOperations = MongoConfiguration.getMongoOperations(dbName, mongoConnection, + mongoMappingContext); DBCollection dbCollection = mongoOperations.getCollection(collectionName); VariantMongoWriter variantMongoWriter = new VariantMongoWriter(collectionName, mongoOperations, @@ -90,7 +95,8 @@ public void variantsShouldBeWrittenIntoMongoDb() throws Exception { Variant variant2 = new Variant("2", 3, 4, "C", "G"); String dbName = mongoRule.getRandomTemporaryDatabaseName(); - MongoOperations mongoOperations = mongoConfiguration.getDefaultMongoOperations(dbName); + MongoOperations mongoOperations = MongoConfiguration.getMongoOperations(dbName, mongoConnection, + mongoMappingContext); DBCollection dbCollection = mongoOperations.getCollection(collectionName); BasicDBObject dbObject = new BasicDBObject(); @@ -108,7 +114,8 @@ public void variantsShouldBeWrittenIntoMongoDb() throws Exception { @Test public void indexesShouldBeCreatedInBackground() throws UnknownHostException { String dbName = mongoRule.getRandomTemporaryDatabaseName(); - MongoOperations mongoOperations = mongoConfiguration.getDefaultMongoOperations(dbName); + MongoOperations mongoOperations = MongoConfiguration.getMongoOperations(dbName, mongoConnection, + mongoMappingContext); DBCollection dbCollection = mongoOperations.getCollection(collectionName); VariantMongoWriter variantMongoWriter = new VariantMongoWriter(collectionName, mongoOperations, @@ -133,7 +140,8 @@ public void testNoDuplicatesCanBeInserted() throws Exception { Variant variant1 = new Variant("1", 1, 2, "A", "T"); String dbName = mongoRule.getRandomTemporaryDatabaseName(); - MongoOperations mongoOperations = mongoConfiguration.getDefaultMongoOperations(dbName); + MongoOperations mongoOperations = MongoConfiguration.getMongoOperations(dbName, mongoConnection, + mongoMappingContext); BasicDBObject dbObject = new BasicDBObject(); diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VariantSourceEntityMongoWriterTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VariantSourceEntityMongoWriterTest.java index 70f581793..c82ddaae3 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VariantSourceEntityMongoWriterTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VariantSourceEntityMongoWriterTest.java @@ -21,9 +21,9 @@ import org.junit.runner.RunWith; import org.opencb.biodata.models.variant.VariantSource; import org.opencb.biodata.models.variant.VariantStudy; -import org.opencb.opencga.storage.core.variant.VariantStorageManager; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.data.mongodb.core.MongoOperations; +import org.springframework.data.mongodb.core.mapping.MongoMappingContext; import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.TestPropertySource; import org.springframework.test.context.junit4.SpringRunner; @@ -36,8 +36,7 @@ import uk.ac.ebi.eva.pipeline.configuration.MongoConfiguration; import uk.ac.ebi.eva.pipeline.io.readers.VcfHeaderReader; import uk.ac.ebi.eva.pipeline.jobs.steps.LoadFileStep; -import uk.ac.ebi.eva.pipeline.parameters.JobOptions; -import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; +import uk.ac.ebi.eva.pipeline.parameters.MongoConnection; import uk.ac.ebi.eva.test.configuration.BaseTestConfiguration; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; import uk.ac.ebi.eva.utils.MongoDBHelper; @@ -56,7 +55,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; -import static uk.ac.ebi.eva.test.utils.TestFileUtils.getResource; +import static uk.ac.ebi.eva.utils.FileUtils.getResource; /** * {@link VariantSourceEntityMongoWriter} @@ -65,31 +64,44 @@ * date, aggregation. Stats are not there because those are written by the statistics job. */ @RunWith(SpringRunner.class) -@TestPropertySource({"classpath:genotyped-vcf.properties"}) +@TestPropertySource({"classpath:common-configuration.properties", "classpath:test-mongo.properties"}) @ContextConfiguration(classes = {BaseTestConfiguration.class, LoadFileStep.class}) public class VariantSourceEntityMongoWriterTest { - private static final String SMALL_VCF_FILE = "/small20.vcf.gz"; + private static final String SMALL_VCF_FILE = "/input-files/vcf/genotyped.vcf.gz"; - @Rule - public TemporaryMongoRule mongoRule = new TemporaryMongoRule(); + private static final String COLLECTION_FILES_NAME = "files"; + + private static final String FILE_ID = "1"; + + private static final String STUDY_ID = "1"; + + private static final String STUDY_NAME = "small"; + + private static final VariantStudy.StudyType STUDY_TYPE = VariantStudy.StudyType.COLLECTION; + + private static final VariantSource.Aggregation AGGREGATION = VariantSource.Aggregation.NONE; @Autowired - private JobOptions jobOptions; + private MongoConnection mongoConnection; @Autowired - private MongoConfiguration mongoConfiguration; + private MongoMappingContext mongoMappingContext; + + @Rule + public TemporaryMongoRule mongoRule = new TemporaryMongoRule(); private String input; @Test public void shouldWriteAllFieldsIntoMongoDb() throws Exception { String databaseName = mongoRule.getRandomTemporaryDatabaseName(); - MongoOperations mongoOperations = mongoConfiguration.getDefaultMongoOperations(databaseName); - DBCollection fileCollection = mongoRule.getCollection(databaseName, jobOptions.getDbCollectionsFilesName()); + MongoOperations mongoOperations = MongoConfiguration.getMongoOperations(databaseName, mongoConnection, + mongoMappingContext); + DBCollection fileCollection = mongoRule.getCollection(databaseName, COLLECTION_FILES_NAME); VariantSourceEntityMongoWriter filesWriter = new VariantSourceEntityMongoWriter( - mongoOperations, jobOptions.getDbCollectionsFilesName()); + mongoOperations, COLLECTION_FILES_NAME); VariantSourceEntity variantSourceEntity = getVariantSourceEntity(); filesWriter.write(Collections.singletonList(variantSourceEntity)); @@ -124,11 +136,12 @@ public void shouldWriteAllFieldsIntoMongoDb() throws Exception { @Test public void shouldWriteSamplesWithDotsInName() throws Exception { String databaseName = mongoRule.getRandomTemporaryDatabaseName(); - MongoOperations mongoOperations = mongoConfiguration.getDefaultMongoOperations(databaseName); - DBCollection fileCollection = mongoRule.getCollection(databaseName, jobOptions.getDbCollectionsFilesName()); + MongoOperations mongoOperations = MongoConfiguration.getMongoOperations(databaseName, mongoConnection, + mongoMappingContext); + DBCollection fileCollection = mongoRule.getCollection(databaseName, COLLECTION_FILES_NAME); VariantSourceEntityMongoWriter filesWriter = new VariantSourceEntityMongoWriter( - mongoOperations, jobOptions.getDbCollectionsFilesName()); + mongoOperations, COLLECTION_FILES_NAME); VariantSourceEntity variantSourceEntity = getVariantSourceEntity(); Map samplesPosition = new HashMap<>(); @@ -154,11 +167,12 @@ public void shouldWriteSamplesWithDotsInName() throws Exception { @Test public void shouldCreateUniqueFileIndex() throws Exception { String databaseName = mongoRule.getRandomTemporaryDatabaseName(); - MongoOperations mongoOperations = mongoConfiguration.getDefaultMongoOperations(databaseName); - DBCollection fileCollection = mongoRule.getCollection(databaseName, jobOptions.getDbCollectionsFilesName()); + MongoOperations mongoOperations = MongoConfiguration.getMongoOperations(databaseName, mongoConnection, + mongoMappingContext); + DBCollection fileCollection = mongoRule.getCollection(databaseName, COLLECTION_FILES_NAME); - VariantSourceEntityMongoWriter filesWriter = new VariantSourceEntityMongoWriter( - mongoOperations, jobOptions.getDbCollectionsFilesName()); + VariantSourceEntityMongoWriter filesWriter = new VariantSourceEntityMongoWriter( mongoOperations, + COLLECTION_FILES_NAME); VariantSourceEntity variantSourceEntity = getVariantSourceEntity(); filesWriter.write(Collections.singletonList(variantSourceEntity)); @@ -179,24 +193,15 @@ public void shouldCreateUniqueFileIndex() throws Exception { assertEquals("true", uniqueIndex.get(MongoDBHelper.BACKGROUND_INDEX).toString()); } - private VariantSourceEntity getVariantSourceEntity() { - VariantSource source = (VariantSource) jobOptions.getVariantOptions().get( - VariantStorageManager.VARIANT_SOURCE); - String fileId = source.getFileId(); - String studyId = source.getStudyId(); - String studyName = source.getStudyName(); - VariantStudy.StudyType studyType = source.getType(); - VariantSource.Aggregation aggregation = source.getAggregation(); - - VcfHeaderReader headerReader = new VcfHeaderReader(new File(input), fileId, studyId, studyName, - studyType, aggregation); - + private VariantSourceEntity getVariantSourceEntity() throws Exception { + VcfHeaderReader headerReader = new VcfHeaderReader(new File(input), FILE_ID, STUDY_ID, STUDY_NAME, + STUDY_TYPE, AGGREGATION); + headerReader.open(null); return headerReader.read(); } @Before public void setUp() throws Exception { input = getResource(SMALL_VCF_FILE).getAbsolutePath(); - jobOptions.getPipelineOptions().put(JobParametersNames.INPUT_VCF, input); } } diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VepAnnotationMongoWriterTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VepAnnotationMongoWriterTest.java index fa991024d..e07be440d 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VepAnnotationMongoWriterTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VepAnnotationMongoWriterTest.java @@ -27,16 +27,16 @@ import org.opencb.opencga.storage.mongodb.variant.DBObjectToVariantAnnotationConverter; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.data.mongodb.core.MongoOperations; +import org.springframework.data.mongodb.core.mapping.MongoMappingContext; import org.springframework.test.context.ActiveProfiles; import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.TestPropertySource; import org.springframework.test.context.junit4.SpringRunner; - +import uk.ac.ebi.eva.commons.models.converters.data.VariantToDBObjectConverter; +import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.configuration.MongoConfiguration; -import uk.ac.ebi.eva.pipeline.configuration.writers.VariantAnnotationWriterConfiguration; import uk.ac.ebi.eva.pipeline.io.mappers.AnnotationLineMapper; -import uk.ac.ebi.eva.pipeline.parameters.JobOptions; -import uk.ac.ebi.eva.test.configuration.BaseTestConfiguration; +import uk.ac.ebi.eva.pipeline.parameters.MongoConnection; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; import uk.ac.ebi.eva.utils.MongoDBHelper; @@ -58,20 +58,22 @@ * "consequence types" annotations set */ @RunWith(SpringRunner.class) -@ActiveProfiles("variant-annotation-mongo") -@TestPropertySource("classpath:annotation.properties") -@ContextConfiguration(classes = {BaseTestConfiguration.class, VariantAnnotationWriterConfiguration.class}) +@ActiveProfiles(Application.VARIANT_ANNOTATION_MONGO_PROFILE) +@TestPropertySource({"classpath:test-mongo.properties"}) +@ContextConfiguration(classes = {MongoConnection.class, MongoMappingContext.class}) public class VepAnnotationMongoWriterTest { + private static final String COLLECTION_VARIANTS_NAME = "variants"; + + @Autowired + private MongoConnection mongoConnection; + @Autowired - private MongoConfiguration mongoConfiguration; + private MongoMappingContext mongoMappingContext; @Rule public TemporaryMongoRule mongoRule = new TemporaryMongoRule(); - @Autowired - private JobOptions jobOptions; - private DBObjectToVariantAnnotationConverter converter; private VepAnnotationMongoWriter annotationWriter; private AnnotationLineMapper AnnotationLineMapper; @@ -85,30 +87,30 @@ public void shouldWriteAllFieldsIntoMongoDb() throws Exception { annotations.add(AnnotationLineMapper.mapLine(annotLine, 0)); } - String dbCollectionVariantsName = jobOptions.getDbCollectionsVariantsName(); - DBCollection variants = mongoRule.getCollection(databaseName, dbCollectionVariantsName); + DBCollection variants = mongoRule.getCollection(databaseName, COLLECTION_VARIANTS_NAME); // first do a mock of a "variants" collection, with just the _id writeIdsIntoMongo(annotations, variants); // now, load the annotation - MongoOperations operations = mongoConfiguration.getMongoOperations( - databaseName, jobOptions.getMongoConnection()); - annotationWriter = new VepAnnotationMongoWriter(operations, dbCollectionVariantsName); + MongoOperations operations = MongoConfiguration.getMongoOperations(databaseName, mongoConnection, + mongoMappingContext); + annotationWriter = new VepAnnotationMongoWriter(operations, COLLECTION_VARIANTS_NAME); annotationWriter.write(annotations); // and finally check that documents in DB have annotation (only consequence type) DBCursor cursor = variants.find(); - int cnt = 0; + int count = 0; int consequenceTypeCount = 0; while (cursor.hasNext()) { - cnt++; - VariantAnnotation annot = converter.convertToDataModelType((DBObject) cursor.next().get("annot")); + count++; + VariantAnnotation annot = converter.convertToDataModelType( + (DBObject) cursor.next().get(VariantToDBObjectConverter.ANNOTATION_FIELD)); assertNotNull(annot.getConsequenceTypes()); consequenceTypeCount += annot.getConsequenceTypes().size(); } - assertTrue(cnt > 0); + assertTrue(count > 0); assertEquals(annotations.size(), consequenceTypeCount); } @@ -125,7 +127,7 @@ public void shouldWriteAllFieldsIntoMongoDbMultipleSetsAnnotations() throws Exce for (String annotLine : vepOutputContent.split("\n")) { annotations.add(AnnotationLineMapper.mapLine(annotLine, 0)); } - String dbCollectionVariantsName = jobOptions.getDbCollectionsVariantsName(); + String dbCollectionVariantsName = COLLECTION_VARIANTS_NAME; DBCollection variants = mongoRule.getCollection(databaseName, dbCollectionVariantsName); // first do a mock of a "variants" collection, with just the _id @@ -151,8 +153,8 @@ public void shouldWriteAllFieldsIntoMongoDbMultipleSetsAnnotations() throws Exce } // now, load the annotation - MongoOperations operations = mongoConfiguration.getMongoOperations( - databaseName, jobOptions.getMongoConnection()); + MongoOperations operations = MongoConfiguration.getMongoOperations(databaseName, mongoConnection, + mongoMappingContext); annotationWriter = new VepAnnotationMongoWriter(operations, dbCollectionVariantsName); annotationWriter.write(annotationSet1); @@ -166,7 +168,8 @@ public void shouldWriteAllFieldsIntoMongoDbMultipleSetsAnnotations() throws Exce DBObject dbObject = cursor.next(); String id = dbObject.get("_id").toString(); - VariantAnnotation annot = converter.convertToDataModelType((DBObject) dbObject.get("annot")); + VariantAnnotation annot = converter.convertToDataModelType( + (DBObject) dbObject.get(VariantToDBObjectConverter.ANNOTATION_FIELD)); if (id.equals("20_63360_C_T") || id.equals("20_63399_G_A") || id.equals("20_63426_G_T")) { assertEquals(2, annot.getConsequenceTypes().size()); diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VepInputFlatFileWriterTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VepInputFlatFileWriterTest.java index 35a155914..fae92723a 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VepInputFlatFileWriterTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/io/writers/VepInputFlatFileWriterTest.java @@ -21,6 +21,7 @@ import org.opencb.opencga.storage.mongodb.variant.DBObjectToVariantConverter; import org.springframework.batch.item.ExecutionContext; import org.springframework.batch.test.MetaDataInstanceFactory; + import uk.ac.ebi.eva.pipeline.model.VariantWrapper; import uk.ac.ebi.eva.test.data.VariantData; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/AggregatedVcfJobTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/AggregatedVcfJobTest.java index d89e3b27b..10763bdcb 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/AggregatedVcfJobTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/AggregatedVcfJobTest.java @@ -16,12 +16,11 @@ package uk.ac.ebi.eva.pipeline.jobs; import org.junit.Assert; -import org.junit.Before; +import org.junit.Ignore; import org.junit.Rule; import org.junit.Test; import org.junit.runner.RunWith; import org.opencb.biodata.models.variant.Variant; -import org.opencb.biodata.models.variant.VariantSource; import org.opencb.datastore.core.QueryOptions; import org.opencb.opencga.lib.common.Config; import org.opencb.opencga.storage.core.StorageManagerFactory; @@ -31,21 +30,23 @@ import org.springframework.batch.core.BatchStatus; import org.springframework.batch.core.ExitStatus; import org.springframework.batch.core.JobExecution; +import org.springframework.batch.core.JobParameters; import org.springframework.batch.core.StepExecution; import org.springframework.batch.test.JobLauncherTestUtils; import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.test.context.ActiveProfiles; import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.TestPropertySource; import org.springframework.test.context.junit4.SpringRunner; - +import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.configuration.BeanNames; -import uk.ac.ebi.eva.pipeline.parameters.JobOptions; -import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; +import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; import uk.ac.ebi.eva.test.utils.JobTestUtils; +import uk.ac.ebi.eva.utils.EvaJobParameterBuilder; +import java.io.File; import java.io.FileInputStream; import java.util.ArrayList; import java.util.Arrays; @@ -57,35 +58,31 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static uk.ac.ebi.eva.utils.FileUtils.getResource; /** * Test for {@link AggregatedVcfJob} */ @RunWith(SpringRunner.class) -@SpringBootTest -@TestPropertySource({"classpath:variant-aggregated.properties"}) +@ActiveProfiles({Application.VARIANT_WRITER_MONGO_PROFILE, Application.VARIANT_ANNOTATION_MONGO_PROFILE}) +@TestPropertySource({"classpath:variant-aggregated.properties", "classpath:test-mongo.properties"}) @ContextConfiguration(classes = {AggregatedVcfJob.class, BatchTestConfiguration.class}) public class AggregatedVcfJobTest { + public static final String INPUT = "/input-files/vcf/aggregated.vcf.gz"; + + private static final String COLLECTION_VARIANTS_NAME = "variants"; - // TODO this test can't be modified to use fully the temporary folder rule / mongo rule. + private static final String COLLECTION_FILES_NAME = "files"; @Rule public TemporaryMongoRule mongoRule = new TemporaryMongoRule(); - @Autowired - private JobLauncherTestUtils jobLauncherTestUtils; + @Rule + public PipelineTemporaryFolderRule temporaryFolderRule = new PipelineTemporaryFolderRule(); @Autowired - private JobOptions jobOptions; - - private String input; - - private String outputDir; - - private String compressExtension; - - private String dbName; + private JobLauncherTestUtils jobLauncherTestUtils; private static String opencgaHome = System.getenv("OPENCGA_HOME") != null ? System .getenv("OPENCGA_HOME") : "/opt/opencga"; @@ -96,9 +93,22 @@ public class AggregatedVcfJobTest { @Test public void aggregatedTransformAndLoadShouldBeExecuted() throws Exception { Config.setOpenCGAHome(opencgaHome); - mongoRule.getTemporaryDatabase(dbName); - - JobExecution jobExecution = jobLauncherTestUtils.launchJob(); + String dbName = mongoRule.getRandomTemporaryDatabaseName(); + + JobParameters jobParameters = new EvaJobParameterBuilder() + .collectionFilesName(COLLECTION_FILES_NAME) + .collectionVariantsName(COLLECTION_VARIANTS_NAME) + .databaseName(dbName) + .inputStudyId("aggregated-job") + .inputStudyName("inputStudyName") + .inputStudyType("COLLECTION") + .inputVcf(getResource(INPUT).getAbsolutePath()) + .inputVcfAggregation("BASIC") + .inputVcfId("1") + .timestamp() + .annotationSkip(true) + .toJobParameters(); + JobExecution jobExecution = jobLauncherTestUtils.launchJob(jobParameters); assertEquals(ExitStatus.COMPLETED, jobExecution.getExitStatus()); assertEquals(BatchStatus.COMPLETED, jobExecution.getStatus()); @@ -108,7 +118,7 @@ public void aggregatedTransformAndLoadShouldBeExecuted() throws Exception { Collection stepExecutions = jobExecution.getStepExecutions(); Set names = stepExecutions.stream().map(StepExecution::getStepName) - .collect(Collectors.toSet()); + .collect(Collectors.toSet()); assertEquals(EXPECTED_REQUIRED_STEP_NAMES, names); @@ -120,7 +130,7 @@ public void aggregatedTransformAndLoadShouldBeExecuted() throws Exception { VariantDBAdaptor variantDBAdaptor = variantStorageManager.getDBAdaptor(dbName, null); VariantDBIterator iterator = variantDBAdaptor.iterator(new QueryOptions()); - String file = jobOptions.getPipelineOptions().getString(JobParametersNames.INPUT_VCF); + File file = getResource(INPUT); long lines = JobTestUtils.getLines(new GZIPInputStream(new FileInputStream(file))); Assert.assertEquals(lines, JobTestUtils.count(iterator)); @@ -129,39 +139,28 @@ public void aggregatedTransformAndLoadShouldBeExecuted() throws Exception { assertFalse(variant.getSourceEntries().values().iterator().next().getCohortStats().isEmpty()); } +// TODO This test needs to be refactored, as right the pipeline will handle the injection of the appropriate VcfReader +// even if the aggregated job has been selected. Maybe we should check this with jobParametersValidator? + @Ignore @Test public void aggregationNoneIsNotAllowed() throws Exception { + String dbName = mongoRule.getRandomTemporaryDatabaseName(); mongoRule.getTemporaryDatabase(dbName); - VariantSource source = - (VariantSource) jobOptions.getVariantOptions().get(VariantStorageManager.VARIANT_SOURCE); - jobOptions.getVariantOptions().put( - VariantStorageManager.VARIANT_SOURCE, new VariantSource( - input, - source.getFileId(), - source.getStudyId(), - source.getStudyName(), - source.getType(), - VariantSource.Aggregation.NONE)); - Config.setOpenCGAHome(opencgaHome); - JobExecution jobExecution = jobLauncherTestUtils.launchJob(); + JobParameters jobParameters = new EvaJobParameterBuilder() + .collectionFilesName(COLLECTION_FILES_NAME) + .collectionVariantsName(COLLECTION_VARIANTS_NAME) + .databaseName(dbName) + .inputVcf(getResource(INPUT).getAbsolutePath()) + .inputVcfId("1") + .inputStudyId("aggregated-job") + .inputVcfAggregation("NONE") + .timestamp() + .toJobParameters(); + JobExecution jobExecution = jobLauncherTestUtils.launchJob(jobParameters); assertEquals(ExitStatus.FAILED, jobExecution.getExitStatus()); assertEquals(BatchStatus.FAILED, jobExecution.getStatus()); } - - @Before - public void setUp() throws Exception { - jobOptions.loadArgs(); - - input = jobOptions.getPipelineOptions().getString(JobParametersNames.INPUT_VCF); - outputDir = jobOptions.getOutputDir(); - compressExtension = jobOptions.getPipelineOptions().getString("compressExtension"); - dbName = jobOptions.getPipelineOptions().getString(JobParametersNames.DB_NAME); - - String inputFile = AggregatedVcfJobTest.class.getResource(input).getFile(); - jobOptions.getPipelineOptions().put(JobParametersNames.INPUT_VCF, inputFile); - } - } diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/AnnotationJobTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/AnnotationJobTest.java index f92f4132f..1869361e7 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/AnnotationJobTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/AnnotationJobTest.java @@ -1,5 +1,5 @@ /* - * Copyright 2016 EMBL - European Bioinformatics Institute + * Copyright 2016-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,7 +18,6 @@ import com.mongodb.DBCursor; import com.mongodb.DBObject; -import org.junit.After; import org.junit.Before; import org.junit.Rule; import org.junit.Test; @@ -28,6 +27,7 @@ import org.springframework.batch.core.BatchStatus; import org.springframework.batch.core.ExitStatus; import org.springframework.batch.core.JobExecution; +import org.springframework.batch.core.JobParameters; import org.springframework.batch.core.StepExecution; import org.springframework.batch.test.JobLauncherTestUtils; import org.springframework.beans.factory.annotation.Autowired; @@ -35,12 +35,15 @@ import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.TestPropertySource; import org.springframework.test.context.junit4.SpringRunner; + +import uk.ac.ebi.eva.commons.models.converters.data.VariantToDBObjectConverter; import uk.ac.ebi.eva.pipeline.configuration.BeanNames; -import uk.ac.ebi.eva.pipeline.jobs.steps.GenerateVepAnnotationStep; -import uk.ac.ebi.eva.pipeline.parameters.JobOptions; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; +import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; import uk.ac.ebi.eva.test.utils.JobTestUtils; +import uk.ac.ebi.eva.utils.EvaJobParameterBuilder; +import uk.ac.ebi.eva.utils.URLHelper; import java.io.File; import java.nio.file.Files; @@ -52,38 +55,64 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; -import static uk.ac.ebi.eva.test.utils.TestFileUtils.getResource; import static uk.ac.ebi.eva.test.utils.TestFileUtils.getResourceUrl; +import static uk.ac.ebi.eva.utils.FileUtils.getResource; /** * Test for {@link AnnotationJob} + * + * TODO The test should fail when we will integrate the JobParameter validation since there are empty parameters for VEP */ @RunWith(SpringRunner.class) @SpringBootTest -@TestPropertySource({"classpath:annotation-job.properties"}) +@TestPropertySource({"classpath:common-configuration.properties", "classpath:test-mongo.properties"}) @ContextConfiguration(classes = {AnnotationJob.class, BatchTestConfiguration.class}) public class AnnotationJobTest { private static final String MOCK_VEP = "/mockvep.pl"; private static final String MONGO_DUMP = "/dump/VariantStatsConfigurationTest_vl"; - //TODO check later to substitute files for temporary ones / pay attention to vep Input file + private static final String INPUT_STUDY_ID = "annotation-job"; + private static final String INPUT_VCF_ID = "1"; + private static final String COLLECTION_VARIANTS_NAME = "variants"; @Rule public TemporaryMongoRule mongoRule = new TemporaryMongoRule(); - @Autowired - private JobLauncherTestUtils jobLauncherTestUtils; + @Rule + public PipelineTemporaryFolderRule temporaryFolderRule = new PipelineTemporaryFolderRule(); @Autowired - private JobOptions jobOptions; + private JobLauncherTestUtils jobLauncherTestUtils; - private File vepInputFile; private DBObjectToVariantAnnotationConverter converter; @Test public void allAnnotationStepsShouldBeExecuted() throws Exception { - mongoRule.restoreDump(getResourceUrl(MONGO_DUMP), jobOptions.getDbName()); - - JobExecution jobExecution = jobLauncherTestUtils.launchJob(); + String dbName = mongoRule.restoreDumpInTemporaryDatabase(getResourceUrl(MONGO_DUMP)); + String outputDirAnnot = temporaryFolderRule.getRoot().getAbsolutePath(); + + File vepInput = new File(URLHelper.resolveVepInput(outputDirAnnot, INPUT_STUDY_ID, INPUT_VCF_ID)); + String vepInputName = vepInput.getName(); + temporaryFolderRule.newFile(vepInputName); + + File vepOutput = new File(URLHelper.resolveVepOutput(outputDirAnnot, INPUT_STUDY_ID, INPUT_VCF_ID)); + String vepOutputName = vepOutput.getName(); + temporaryFolderRule.newFile(vepOutputName); + + JobParameters jobParameters = new EvaJobParameterBuilder() + .collectionVariantsName(COLLECTION_VARIANTS_NAME) + .databaseName(dbName) + .inputFasta("") + .inputStudyId(INPUT_STUDY_ID) + .inputVcfId(INPUT_VCF_ID) + .outputDirAnnotation(outputDirAnnot) + .vepCachePath("") + .vepCacheSpecies("") + .vepCacheVersion("") + .vepNumForks("") + .vepPath(getResource(MOCK_VEP).getPath()) + .toJobParameters(); + + JobExecution jobExecution = jobLauncherTestUtils.launchJob(jobParameters); assertEquals(ExitStatus.COMPLETED, jobExecution.getExitStatus()); assertEquals(BatchStatus.COMPLETED, jobExecution.getStatus()); @@ -99,18 +128,17 @@ public void allAnnotationStepsShouldBeExecuted() throws Exception { assertEquals(BeanNames.LOAD_VEP_ANNOTATION_STEP, loadVepAnnotationsStep.getStepName()); //check list of variants without annotation output file - assertTrue(vepInputFile.exists()); - assertEquals("20\t60343\t60343\tG/A\t+", JobTestUtils.readFirstLine(vepInputFile)); + assertTrue(vepInput.exists()); + assertEquals("20\t60343\t60343\tG/A\t+", JobTestUtils.readFirstLine(vepInput)); //check that documents have the annotation - DBCursor cursor = mongoRule.getCollection(jobOptions.getDbName(), jobOptions.getDbCollectionsVariantsName()) - .find(); + DBCursor cursor = mongoRule.getCollection(dbName, COLLECTION_VARIANTS_NAME).find(); - int cnt = 0; + int count = 0; int consequenceTypeCount = 0; while (cursor.hasNext()) { - cnt++; - DBObject dbObject = (DBObject) cursor.next().get("annot"); + count++; + DBObject dbObject = (DBObject) cursor.next().get(VariantToDBObjectConverter.ANNOTATION_FIELD); if (dbObject != null) { VariantAnnotation annot = converter.convertToDataModelType(dbObject); assertNotNull(annot.getConsequenceTypes()); @@ -118,7 +146,7 @@ public void allAnnotationStepsShouldBeExecuted() throws Exception { } } - assertEquals(300, cnt); + assertEquals(300, count); assertEquals(536, consequenceTypeCount); //check that one line is skipped because malformed @@ -130,7 +158,28 @@ public void allAnnotationStepsShouldBeExecuted() throws Exception { @Test public void noVariantsToAnnotateOnlyFindVariantsToAnnotateStepShouldRun() throws Exception { - JobExecution jobExecution = jobLauncherTestUtils.launchJob(); + String dbName = mongoRule.getRandomTemporaryDatabaseName(); + String outputDirAnnot = temporaryFolderRule.getRoot().getAbsolutePath(); + + File vepInput = new File(URLHelper.resolveVepInput(outputDirAnnot, INPUT_STUDY_ID, INPUT_VCF_ID)); + String vepInputName = vepInput.getName(); + temporaryFolderRule.newFile(vepInputName); + + JobParameters jobParameters = new EvaJobParameterBuilder() + .collectionVariantsName(COLLECTION_VARIANTS_NAME) + .databaseName(dbName) + .inputFasta("") + .inputStudyId(INPUT_STUDY_ID) + .inputVcfId(INPUT_VCF_ID) + .outputDirAnnotation(outputDirAnnot) + .vepCachePath("") + .vepCacheSpecies("") + .vepCacheVersion("") + .vepNumForks("") + .vepPath(getResource(MOCK_VEP).getPath()) + .toJobParameters(); + + JobExecution jobExecution = jobLauncherTestUtils.launchJob(jobParameters); assertEquals(ExitStatus.COMPLETED, jobExecution.getExitStatus()); assertEquals(BatchStatus.COMPLETED, jobExecution.getStatus()); @@ -140,27 +189,13 @@ public void noVariantsToAnnotateOnlyFindVariantsToAnnotateStepShouldRun() throws assertEquals(BeanNames.GENERATE_VEP_INPUT_STEP, findVariantsToAnnotateStep.getStepName()); - assertTrue(vepInputFile.exists()); - assertTrue(Files.size(Paths.get(vepInputFile.toPath().toUri())) == 0); + assertTrue(vepInput.exists()); + assertTrue(Files.size(Paths.get(vepInput.toPath().toUri())) == 0); } @Before public void setUp() throws Exception { - jobOptions.loadArgs(); - - vepInputFile = new File(jobOptions.getVepInput()); - jobOptions.setAppVepPath(getResource(MOCK_VEP)); - converter = new DBObjectToVariantAnnotationConverter(); } - /** - * Release resources and delete the temporary output file - */ - @After - public void tearDown() throws Exception { - vepInputFile.delete(); - new File(jobOptions.getVepOutput()).delete(); - } - } diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/GenotypedVcfJobTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/GenotypedVcfJobTest.java index 71882917b..545ffdfb5 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/GenotypedVcfJobTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/GenotypedVcfJobTest.java @@ -1,5 +1,5 @@ /* - * Copyright 2016 EMBL - European Bioinformatics Institute + * Copyright 2016-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,222 +16,150 @@ package uk.ac.ebi.eva.pipeline.jobs; -import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.runner.RunWith; -import org.opencb.biodata.models.variant.Variant; -import org.opencb.biodata.models.variant.VariantSource; -import org.opencb.datastore.core.QueryOptions; import org.opencb.opencga.lib.common.Config; -import org.opencb.opencga.storage.core.StorageManagerException; -import org.opencb.opencga.storage.core.StorageManagerFactory; -import org.opencb.opencga.storage.core.variant.VariantStorageManager; -import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor; -import org.opencb.opencga.storage.core.variant.adaptors.VariantDBIterator; import org.springframework.batch.core.BatchStatus; import org.springframework.batch.core.ExitStatus; import org.springframework.batch.core.JobExecution; -import org.springframework.batch.core.StepExecution; +import org.springframework.batch.core.JobParameters; import org.springframework.batch.test.JobLauncherTestUtils; import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.test.context.ActiveProfiles; import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.TestPropertySource; import org.springframework.test.context.junit4.SpringRunner; -import uk.ac.ebi.eva.pipeline.configuration.BeanNames; -import uk.ac.ebi.eva.pipeline.jobs.steps.AnnotationLoaderStep; -import uk.ac.ebi.eva.pipeline.parameters.JobOptions; -import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; +import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; +import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; +import uk.ac.ebi.eva.test.utils.GenotypedVcfJobTestUtils; +import uk.ac.ebi.eva.utils.EvaJobParameterBuilder; -import java.io.BufferedReader; import java.io.File; -import java.io.FileInputStream; -import java.io.InputStreamReader; -import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.stream.Collectors; -import java.util.zip.GZIPInputStream; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; -import static uk.ac.ebi.eva.test.utils.JobTestUtils.count; -import static uk.ac.ebi.eva.test.utils.JobTestUtils.getLines; -import static uk.ac.ebi.eva.test.utils.TestFileUtils.getResource; /** * Test for {@link GenotypedVcfJob} *

- * JobLauncherTestUtils is initialized in @Before because in GenotypedVcfJob there are two Job beans: - * genotypedVcfJob and annotateVariantsJob (used by test). In this way it is possible to specify the Job to run - * and avoid NoUniqueBeanDefinitionException. There are also other solutions like: - * - http://stackoverflow.com/questions/29655796/how-can-i-qualify-an-autowired-setter-that-i-dont-own - * - https://jira.spring.io/browse/BATCH-2366 - *

- * TODO: - * FILE_WRONG_NO_ALT should be renamed because the alt allele is not missing but is the same as the reference + * TODO: FILE_WRONG_NO_ALT should be renamed because the alt allele is not missing but is the same as the reference */ @RunWith(SpringRunner.class) -@SpringBootTest -@TestPropertySource({"classpath:genotyped-vcf.properties"}) +@ActiveProfiles({Application.VARIANT_WRITER_MONGO_PROFILE,Application.VARIANT_ANNOTATION_MONGO_PROFILE}) +@TestPropertySource({"classpath:common-configuration.properties", "classpath:test-mongo.properties"}) @ContextConfiguration(classes = {GenotypedVcfJob.class, BatchTestConfiguration.class}) public class GenotypedVcfJobTest { - //TODO check later to substitute files for temporary ones / pay attention to vep Input file - - private static final String MOCK_VEP = "/mockvep.pl"; - - private static final int EXPECTED_VALID_ANNOTATIONS = 536; - - private static final int EXPECTED_ANNOTATIONS = 537; - - private static final int EXPECTED_VARIANTS = 300; @Rule public TemporaryMongoRule mongoRule = new TemporaryMongoRule(); - @Autowired - private JobLauncherTestUtils jobLauncherTestUtils; + @Rule + public PipelineTemporaryFolderRule temporaryFolderRule = new PipelineTemporaryFolderRule(); @Autowired - private JobOptions jobOptions; - private String input; - private String outputDir; - private String compressExtension; - private String dbName; - private String vepInput; - - private String vepOutput; + private JobLauncherTestUtils jobLauncherTestUtils; - private static String opencgaHome = System.getenv("OPENCGA_HOME") != null ? System.getenv("OPENCGA_HOME") : "/opt/opencga"; + private static String opencgaHome = System.getenv("OPENCGA_HOME") != null ? System + .getenv("OPENCGA_HOME") : "/opt/opencga"; @Test public void fullGenotypedVcfJob() throws Exception { - jobOptions.getPipelineOptions().put(JobParametersNames.INPUT_VCF, getResource(input).getAbsolutePath()); - jobOptions.getPipelineOptions().put(JobParametersNames.APP_VEP_PATH, getResource(MOCK_VEP).getAbsolutePath()); - Config.setOpenCGAHome(opencgaHome); - mongoRule.getTemporaryDatabase(dbName); + File inputFile = GenotypedVcfJobTestUtils.getInputFile(); + File mockVep = GenotypedVcfJobTestUtils.getMockVep(); + String databaseName = mongoRule.getRandomTemporaryDatabaseName(); - //stats file init - VariantSource source = (VariantSource) jobOptions.getVariantOptions().get(VariantStorageManager.VARIANT_SOURCE); - File statsFile = new File(Paths.get(outputDir).resolve(VariantStorageManager.buildFilename(source)) - + ".variants.stats.json.gz"); - statsFile.delete(); - assertFalse(statsFile.exists()); // ensure the stats file doesn't exist from previous executions + String outputDirStats = temporaryFolderRule.newFolder().getAbsolutePath(); + String outputDirAnnotation = temporaryFolderRule.newFolder().getAbsolutePath(); - // annotation files init - File vepInputFile = new File(vepInput); - vepInputFile.delete(); - assertFalse(vepInputFile.exists()); + File variantsStatsFile = GenotypedVcfJobTestUtils.getVariantsStatsFile(outputDirStats); + File sourceStatsFile = GenotypedVcfJobTestUtils.getSourceStatsFile(outputDirStats); - File vepOutputFile = new File(vepOutput); - vepOutputFile.delete(); - assertFalse(vepOutputFile.exists()); + File vepInputFile = GenotypedVcfJobTestUtils.getVepInputFile(outputDirAnnotation); + File vepOutputFile = GenotypedVcfJobTestUtils.getVepOutputFile(outputDirAnnotation); - VariantDBIterator iterator; + File fasta = temporaryFolderRule.newFile(); // Run the Job - JobExecution jobExecution = jobLauncherTestUtils.launchJob(); + JobParameters jobParameters = new EvaJobParameterBuilder() + .collectionFilesName(GenotypedVcfJobTestUtils.COLLECTION_FILES_NAME) + .collectionVariantsName(GenotypedVcfJobTestUtils.COLLECTION_VARIANTS_NAME) + .databaseName(databaseName) + .inputFasta(fasta.getAbsolutePath()) + .inputStudyId(GenotypedVcfJobTestUtils.INPUT_STUDY_ID) + .inputStudyName("inputStudyName") + .inputStudyType("COLLECTION") + .inputVcf(inputFile.getAbsolutePath()) + .inputVcfAggregation("NONE") + .inputVcfId(GenotypedVcfJobTestUtils.INPUT_VCF_ID) + .outputDirAnnotation(outputDirAnnotation) + .outputDirStats(outputDirStats) + .vepCachePath("") + .vepCacheSpecies("human") + .vepCacheVersion("1") + .vepNumForks("1") + .vepPath(mockVep.getPath()) + .toJobParameters(); + + JobExecution jobExecution = jobLauncherTestUtils.launchJob(jobParameters); assertEquals(ExitStatus.COMPLETED, jobExecution.getExitStatus()); assertEquals(BatchStatus.COMPLETED, jobExecution.getStatus()); - // 1 load step: check ((documents in DB) == (lines in transformed file)) - //variantStorageManager = StorageManagerFactory.getVariantStorageManager(); - //variantDBAdaptor = variantStorageManager.getDBAdaptor(dbName, null); - iterator = getVariantDBIterator(); - assertEquals(EXPECTED_VARIANTS, count(iterator)); - - // 2 create stats step - assertTrue(statsFile.exists()); - - // 3 load stats step: check ((documents in DB) == (lines in transformed file)) - //variantStorageManager = StorageManagerFactory.getVariantStorageManager(); - //variantDBAdaptor = variantStorageManager.getDBAdaptor(dbName, null); - iterator = getVariantDBIterator(); - assertEquals(EXPECTED_VARIANTS, count(iterator)); - - // check the DB docs have the field "st" - iterator = getVariantDBIterator(); - - assertEquals(1, iterator.next().getSourceEntries().values().iterator().next().getCohortStats().size()); - - // 4 annotation flow - // annotation input vep generate step - BufferedReader testReader = new BufferedReader(new InputStreamReader(new FileInputStream( - getResource("/preannot.sorted")))); - BufferedReader actualReader = new BufferedReader(new InputStreamReader(new FileInputStream( - vepInputFile.toString()))); - - ArrayList rows = new ArrayList<>(); - - String s; - while ((s = actualReader.readLine()) != null) { - rows.add(s); - } - Collections.sort(rows); - - String testLine = testReader.readLine(); - for (String row : rows) { - assertEquals(testLine, row); - testLine = testReader.readLine(); - } - assertNull(testLine); // if both files have the same length testReader should be after the last line - - // 5 annotation create step - assertTrue(vepInputFile.exists()); - assertTrue(vepOutputFile.exists()); - - // Check output file length - assertEquals(EXPECTED_ANNOTATIONS, getLines(new GZIPInputStream(new FileInputStream(vepOutput)))); - - // 6 Annotation load step: check documents in DB have annotation (only consequence type) - iterator = getVariantDBIterator(); - - int cnt = 0; - int consequenceTypeCount = 0; - while (iterator.hasNext()) { - cnt++; - Variant next = iterator.next(); - if (next.getAnnotation().getConsequenceTypes() != null) { - consequenceTypeCount += next.getAnnotation().getConsequenceTypes().size(); - } - } - - assertEquals(EXPECTED_VARIANTS, cnt); - assertEquals(EXPECTED_VALID_ANNOTATIONS, consequenceTypeCount); - - //check that one line is skipped because malformed - List variantAnnotationLoadStepExecution = jobExecution.getStepExecutions().stream() - .filter(stepExecution -> stepExecution.getStepName().equals(BeanNames.LOAD_VEP_ANNOTATION_STEP)) - .collect(Collectors.toList()); - assertEquals(1, variantAnnotationLoadStepExecution.get(0).getReadSkipCount()); + GenotypedVcfJobTestUtils.checkLoadStep(databaseName); - } + GenotypedVcfJobTestUtils.checkCreateStatsStep(variantsStatsFile, sourceStatsFile); - @Before - public void setUp() throws Exception { - jobOptions.loadArgs(); + GenotypedVcfJobTestUtils.checkLoadStatsStep(databaseName); - input = jobOptions.getPipelineOptions().getString(JobParametersNames.INPUT_VCF); - outputDir = jobOptions.getOutputDir(); - compressExtension = jobOptions.getPipelineOptions().getString("compressExtension"); - dbName = jobOptions.getPipelineOptions().getString(JobParametersNames.DB_NAME); - vepInput = jobOptions.getPipelineOptions().getString(JobOptions.VEP_INPUT); - vepOutput = jobOptions.getPipelineOptions().getString(JobOptions.VEP_OUTPUT); - } + GenotypedVcfJobTestUtils.checkAnnotationInput(vepInputFile); + + GenotypedVcfJobTestUtils.checkAnnotationCreateStep(vepInputFile,vepOutputFile); + + GenotypedVcfJobTestUtils.checkOutputFileLength(vepOutputFile); + + GenotypedVcfJobTestUtils.checkLoadedAnnotation(databaseName); + + GenotypedVcfJobTestUtils.checkSkippedOneMalformedLine(jobExecution); - private VariantDBIterator getVariantDBIterator() throws IllegalAccessException, - ClassNotFoundException, InstantiationException, StorageManagerException { - VariantStorageManager variantStorageManager = StorageManagerFactory.getVariantStorageManager(); - VariantDBAdaptor variantDBAdaptor = variantStorageManager.getDBAdaptor(dbName, null); - return variantDBAdaptor.iterator(new QueryOptions()); } + @Test + public void aggregationIsNotAllowed() throws Exception { + String databaseName = mongoRule.getRandomTemporaryDatabaseName(); + mongoRule.getTemporaryDatabase(databaseName); + Config.setOpenCGAHome(opencgaHome); + File mockVep = GenotypedVcfJobTestUtils.getMockVep(); + String outputDirStats = temporaryFolderRule.newFolder().getAbsolutePath(); + String outputDirAnnotation = temporaryFolderRule.newFolder().getAbsolutePath(); + + File fasta = temporaryFolderRule.newFile(); + + JobParameters jobParameters = new EvaJobParameterBuilder() + .collectionFilesName(GenotypedVcfJobTestUtils.COLLECTION_FILES_NAME) + .collectionVariantsName(GenotypedVcfJobTestUtils.COLLECTION_VARIANTS_NAME) + .databaseName(databaseName) + .inputFasta(fasta.getAbsolutePath()) + .inputVcf(GenotypedVcfJobTestUtils.getInputFile().getAbsolutePath()) + .inputVcfId(GenotypedVcfJobTestUtils.INPUT_VCF_ID) + .inputStudyId(GenotypedVcfJobTestUtils.INPUT_STUDY_ID) + .inputStudyName("inputStudyName") + .inputStudyType("COLLECTION") + .inputVcfAggregation("BASIC") + .outputDirAnnotation(outputDirAnnotation) + .outputDirStats(outputDirStats) + .vepCachePath("") + .vepCacheSpecies("human") + .vepCacheVersion("1") + .vepNumForks("1") + .vepPath(mockVep.getPath()) + .timestamp() + .toJobParameters(); + JobExecution jobExecution = jobLauncherTestUtils.launchJob(jobParameters); + + assertEquals(ExitStatus.FAILED, jobExecution.getExitStatus()); + assertEquals(BatchStatus.FAILED, jobExecution.getStatus()); + } } diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/GenotypedVcfJobWorkflowTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/GenotypedVcfJobWorkflowTest.java index 0a1dd839c..241a3f59b 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/GenotypedVcfJobWorkflowTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/GenotypedVcfJobWorkflowTest.java @@ -1,5 +1,5 @@ /* - * Copyright 2016 EMBL - European Bioinformatics Institute + * Copyright 2016-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,37 +16,33 @@ package uk.ac.ebi.eva.pipeline.jobs; -import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.runner.RunWith; -import org.opencb.biodata.models.variant.VariantSource; import org.opencb.opencga.lib.common.Config; -import org.opencb.opencga.storage.core.variant.VariantStorageManager; import org.springframework.batch.core.ExitStatus; import org.springframework.batch.core.JobExecution; +import org.springframework.batch.core.JobParameters; import org.springframework.batch.core.StepExecution; import org.springframework.batch.test.JobLauncherTestUtils; import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.test.context.ActiveProfiles; import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.TestPropertySource; import org.springframework.test.context.junit4.SpringRunner; - +import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.configuration.BeanNames; import uk.ac.ebi.eva.pipeline.parameters.JobOptions; -import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; +import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; +import uk.ac.ebi.eva.utils.EvaJobParameterBuilder; import java.io.File; -import java.nio.file.Paths; +import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeSet; @@ -54,44 +50,38 @@ import java.util.stream.Collectors; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; -import static uk.ac.ebi.eva.test.utils.TestFileUtils.getResource; +import static uk.ac.ebi.eva.utils.FileUtils.getResource; /** * Workflow test for {@link GenotypedVcfJob} + *

+ * TODO The test should fail when we will integrate the JobParameter validation since there are empty parameters for VEP */ @RunWith(SpringRunner.class) -@SpringBootTest -@TestPropertySource({"classpath:genotyped-vcf-workflow.properties"}) +@ActiveProfiles({Application.VARIANT_WRITER_MONGO_PROFILE, Application.VARIANT_ANNOTATION_MONGO_PROFILE}) +@TestPropertySource({"classpath:common-configuration.properties", "classpath:test-mongo.properties"}) @ContextConfiguration(classes = {GenotypedVcfJob.class, BatchTestConfiguration.class}) public class GenotypedVcfJobWorkflowTest { private static final String MOCK_VEP = "/mockvep.pl"; - //TODO check later to substitute files for temporary ones / pay attention to vep Input file + private static final String INPUT_FILE = "/input-files/vcf/genotyped.vcf.gz"; @Rule public TemporaryMongoRule mongoRule = new TemporaryMongoRule(); - @Autowired - private JobLauncherTestUtils jobLauncherTestUtils; + @Rule + public PipelineTemporaryFolderRule temporaryFolderRule = new PipelineTemporaryFolderRule(); @Autowired - private JobOptions jobOptions; - - private String inputFileResouce; - - private String outputDir; - - private String compressExtension; - - private String vepInput; + private JobLauncherTestUtils jobLauncherTestUtils; - private String vepOutput; + private static String opencgaHome = System.getenv("OPENCGA_HOME") != null ? System + .getenv("OPENCGA_HOME") : "/opt/opencga"; - private static String opencgaHome = System.getenv("OPENCGA_HOME") != null ? System.getenv("OPENCGA_HOME") : - "/opt/opencga"; + @Autowired + private JobOptions jobOptions; // we need this for stats.skip and annot.skip public static final Set EXPECTED_REQUIRED_STEP_NAMES = new TreeSet<>( Arrays.asList(BeanNames.LOAD_VARIANTS_STEP, BeanNames.LOAD_FILE_STEP)); @@ -101,13 +91,14 @@ public class GenotypedVcfJobWorkflowTest { public static final Set EXPECTED_ANNOTATION_STEP_NAMES = new TreeSet<>( Arrays.asList(BeanNames.GENERATE_VEP_INPUT_STEP, BeanNames.GENERATE_VEP_ANNOTATION_STEP, - BeanNames.LOAD_VEP_ANNOTATION_STEP)); + BeanNames.LOAD_VEP_ANNOTATION_STEP)); @Test public void allStepsShouldBeExecuted() throws Exception { - initVariantConfigurationJob(); + EvaJobParameterBuilder builder = initVariantConfigurationJob(); + JobParameters jobParameters = builder.toJobParameters(); - JobExecution execution = jobLauncherTestUtils.launchJob(); + JobExecution execution = jobLauncherTestUtils.launchJob(jobParameters); assertEquals(ExitStatus.COMPLETED, execution.getExitStatus()); @@ -141,27 +132,25 @@ public void allStepsShouldBeExecuted() throws Exception { @Test public void optionalStepsShouldBeSkipped() throws Exception { - initVariantConfigurationJob(); + EvaJobParameterBuilder builder = initVariantConfigurationJob(); + JobParameters jobParameters = builder.annotationSkip(true).statisticsSkip(true).toJobParameters(); - jobOptions.getPipelineOptions().put(JobParametersNames.ANNOTATION_SKIP, true); - jobOptions.getPipelineOptions().put(JobParametersNames.STATISTICS_SKIP, true); - - JobExecution execution = jobLauncherTestUtils.launchJob(); + JobExecution execution = jobLauncherTestUtils.launchJob(jobParameters); assertEquals(ExitStatus.COMPLETED, execution.getExitStatus()); Set names = execution.getStepExecutions().stream().map(StepExecution::getStepName) - .collect(Collectors.toSet()); + .collect(Collectors.toSet()); assertEquals(EXPECTED_REQUIRED_STEP_NAMES, names); } @Test public void statsStepsShouldBeSkipped() throws Exception { - initVariantConfigurationJob(); - jobOptions.getPipelineOptions().put(JobParametersNames.STATISTICS_SKIP, true); + EvaJobParameterBuilder builder = initVariantConfigurationJob(); + JobParameters jobParameters = builder.statisticsSkip(true).toJobParameters(); - JobExecution execution = jobLauncherTestUtils.launchJob(); + JobExecution execution = jobLauncherTestUtils.launchJob(jobParameters); assertEquals(ExitStatus.COMPLETED, execution.getExitStatus()); @@ -190,10 +179,10 @@ public void statsStepsShouldBeSkipped() throws Exception { @Test public void annotationStepsShouldBeSkipped() throws Exception { - initVariantConfigurationJob(); - jobOptions.getPipelineOptions().put(JobParametersNames.ANNOTATION_SKIP, true); + EvaJobParameterBuilder builder = initVariantConfigurationJob(); + JobParameters jobParameters = builder.annotationSkip(true).toJobParameters(); - JobExecution execution = jobLauncherTestUtils.launchJob(); + JobExecution execution = jobLauncherTestUtils.launchJob(jobParameters); assertEquals(ExitStatus.COMPLETED, execution.getExitStatus()); @@ -220,52 +209,35 @@ public void annotationStepsShouldBeSkipped() throws Exception { .before(nameToStepExecution.get(BeanNames.LOAD_STATISTICS_STEP).getStartTime())); } - /** - * JobLauncherTestUtils is initialized here because in GenotypedVcfJob there are two Job beans - * in this way it is possible to specify the Job to run (and avoid NoUniqueBeanDefinitionException) - * - * @throws Exception - */ - @Before - public void setUp() throws Exception { - jobOptions.loadArgs(); - - inputFileResouce = jobOptions.getPipelineOptions().getString(JobParametersNames.INPUT_VCF); - outputDir = jobOptions.getOutputDir(); - compressExtension = jobOptions.getPipelineOptions().getString("compressExtension"); - vepInput = jobOptions.getPipelineOptions().getString(JobOptions.VEP_INPUT); - vepOutput = jobOptions.getPipelineOptions().getString(JobOptions.VEP_OUTPUT); - } - - private void initVariantConfigurationJob() { - mongoRule.getTemporaryDatabase(jobOptions.getDbName()); - jobOptions.getPipelineOptions().put(JobParametersNames.INPUT_VCF, - getResource(inputFileResouce).getAbsolutePath()); - jobOptions.getPipelineOptions().put(JobParametersNames.APP_VEP_PATH, getResource(MOCK_VEP).getAbsolutePath()); - + private EvaJobParameterBuilder initVariantConfigurationJob() throws IOException { Config.setOpenCGAHome(opencgaHome); - - // transformedVcf file init - String transformedVcf = outputDir + inputFileResouce + ".variants.json" + compressExtension; - File transformedVcfFile = new File(transformedVcf); - transformedVcfFile.delete(); - assertFalse(transformedVcfFile.exists()); - - //stats file init - VariantSource source = (VariantSource) jobOptions.getVariantOptions().get(VariantStorageManager.VARIANT_SOURCE); - File statsFile = new File(Paths.get(outputDir).resolve(VariantStorageManager.buildFilename(source)) - + ".variants.stats.json.gz"); - statsFile.delete(); - assertFalse(statsFile.exists()); // ensure the stats file doesn't exist from previous executions - - // annotation files init - File vepInputFile = new File(vepInput); - vepInputFile.delete(); - assertFalse(vepInputFile.exists()); - - File vepOutputFile = new File(vepOutput); - vepOutputFile.delete(); - assertFalse(vepOutputFile.exists()); + File inputFile = getResource(INPUT_FILE); + String dbName = mongoRule.getRandomTemporaryDatabaseName(); + String outputDirStats = temporaryFolderRule.newFolder().getAbsolutePath(); + String outputDirAnnotation = temporaryFolderRule.newFolder().getAbsolutePath(); + File fasta = temporaryFolderRule.newFile(); + + EvaJobParameterBuilder evaJobParameterBuilder = new EvaJobParameterBuilder() + .collectionFilesName("files") + .collectionVariantsName("variants") + .databaseName(dbName) + .inputFasta(fasta.getAbsolutePath()) + .inputStudyId("genotyped-job-workflow") + .inputStudyName("inputStudyName") + .inputStudyType("COLLECTION") + .inputVcf(inputFile.getAbsolutePath()) + .inputVcfAggregation("NONE") + .inputVcfId("1") + .outputDirAnnotation(outputDirAnnotation) + .outputDirStats(outputDirStats) + .timestamp() + .vepCachePath("") + .vepCacheSpecies("human") + .vepCacheVersion("1") + .vepNumForks("1") + .vepPath(getResource(MOCK_VEP).getPath()); + + return evaJobParameterBuilder; } } diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/PopulationStatisticsJobTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/PopulationStatisticsJobTest.java index 04c80adad..2fd0e07b2 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/PopulationStatisticsJobTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/PopulationStatisticsJobTest.java @@ -15,13 +15,9 @@ */ package uk.ac.ebi.eva.pipeline.jobs; -import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.runner.RunWith; -import org.opencb.biodata.models.variant.VariantSource; -import org.opencb.biodata.models.variant.VariantStudy; -import org.opencb.datastore.core.ObjectMap; import org.opencb.datastore.core.QueryOptions; import org.opencb.opencga.storage.core.StorageManagerFactory; import org.opencb.opencga.storage.core.variant.VariantStorageManager; @@ -30,39 +26,36 @@ import org.springframework.batch.core.BatchStatus; import org.springframework.batch.core.ExitStatus; import org.springframework.batch.core.JobExecution; +import org.springframework.batch.core.JobParameters; import org.springframework.batch.test.JobLauncherTestUtils; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.TestPropertySource; import org.springframework.test.context.junit4.SpringRunner; -import uk.ac.ebi.eva.pipeline.parameters.JobOptions; -import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; + import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; +import uk.ac.ebi.eva.utils.EvaJobParameterBuilder; +import uk.ac.ebi.eva.utils.URLHelper; import java.io.File; -import java.io.IOException; -import java.nio.file.Paths; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; -import static org.opencb.opencga.storage.core.variant.VariantStorageManager.VARIANT_SOURCE; -import static uk.ac.ebi.eva.test.utils.TestFileUtils.copyResource; import static uk.ac.ebi.eva.test.utils.TestFileUtils.getResourceUrl; +import static uk.ac.ebi.eva.utils.FileUtils.getResource; /** * Test for {@link PopulationStatisticsJob} */ @RunWith(SpringRunner.class) -@TestPropertySource({"classpath:common-configuration.properties"}) +@TestPropertySource({"classpath:common-configuration.properties", "classpath:test-mongo.properties"}) @ContextConfiguration(classes = {PopulationStatisticsJob.class, BatchTestConfiguration.class}) public class PopulationStatisticsJobTest { - private static final String SMALL_VCF_FILE = "/small20.vcf.gz"; + private static final String SMALL_VCF_FILE = "/input-files/vcf/genotyped.vcf.gz"; + private static final String MONGO_DUMP = "/dump/VariantStatsConfigurationTest_vl"; - private static final String VARIANTS_FILE_NAME = "/1_1.variants.stats.json.gz"; - private static final String SOURCE_FILE_NAME = "/1_1.source.stats.json.gz"; - private static final String VCF_FILE_NAME = "/small20.vcf.gz.variants.json.gz"; @Rule public PipelineTemporaryFolderRule temporaryFolderRule = new PipelineTemporaryFolderRule(); @@ -72,71 +65,44 @@ public class PopulationStatisticsJobTest { @Autowired private JobLauncherTestUtils jobLauncherTestUtils; - @Autowired - private JobOptions jobOptions; - - private ObjectMap variantOptions; - private ObjectMap pipelineOptions; @Test public void fullPopulationStatisticsJob() throws Exception { //Given a valid VCF input file String input = SMALL_VCF_FILE; - - pipelineOptions.put(JobParametersNames.INPUT_VCF, input); - - VariantSource source = new VariantSource( - input, - "1", - "1", - "studyName", - VariantStudy.StudyType.COLLECTION, - VariantSource.Aggregation.NONE); - - variantOptions.put(VARIANT_SOURCE, source); - - initStatsLoadStepFiles(); - - JobExecution jobExecution = jobLauncherTestUtils.launchJob(); + String statsDir = temporaryFolderRule.getRoot().getPath(); + String dbName = mongoRule.restoreDumpInTemporaryDatabase(getResourceUrl(MONGO_DUMP)); + String fileId = "1"; + String studyId = "1"; + + JobParameters jobParameters = new EvaJobParameterBuilder() + .collectionFilesName("files") + .collectionVariantsName("variants") + .databaseName(dbName) + .inputStudyId(studyId) + .inputVcf(getResource(input).getAbsolutePath()) + .inputVcfAggregation("BASIC") + .inputVcfId(fileId) + .outputDirStats(statsDir) + .timestamp() + .toJobParameters(); + + JobExecution jobExecution = jobLauncherTestUtils.launchJob(jobParameters); assertEquals(ExitStatus.COMPLETED, jobExecution.getExitStatus()); assertEquals(BatchStatus.COMPLETED, jobExecution.getStatus()); //and the file containing statistics should exist - File statsFile = new File(Paths.get(pipelineOptions.getString(JobParametersNames.OUTPUT_DIR_STATISTICS)) - .resolve(VariantStorageManager.buildFilename(source)) + ".variants.stats.json.gz"); + File statsFile = new File(URLHelper.getVariantsStatsUri(statsDir, studyId, fileId)); assertTrue(statsFile.exists()); + File sourceStatsFile = new File(URLHelper.getSourceStatsUri(statsDir, studyId, fileId)); + assertTrue(sourceStatsFile.exists()); // The DB docs should have the field "st" VariantStorageManager variantStorageManager = StorageManagerFactory.getVariantStorageManager(); - VariantDBAdaptor variantDBAdaptor = variantStorageManager.getDBAdaptor(jobOptions.getDbName(), null); + VariantDBAdaptor variantDBAdaptor = variantStorageManager.getDBAdaptor(dbName, null); VariantDBIterator iterator = variantDBAdaptor.iterator(new QueryOptions()); assertEquals(1, iterator.next().getSourceEntries().values().iterator().next().getCohortStats().size()); } - private void initStatsLoadStepFiles() throws IOException, InterruptedException { - String mongoDatabase = mongoRule.restoreDumpInTemporaryDatabase(getResourceUrl(MONGO_DUMP)); - jobOptions.setDbName(mongoDatabase); - - String outputDir = temporaryFolderRule.getRoot().getAbsolutePath(); - pipelineOptions.put(JobParametersNames.OUTPUT_DIR_STATISTICS, outputDir); - - // copy stat file to load - copyResource(VARIANTS_FILE_NAME, outputDir); - - // copy source file to load - copyResource(SOURCE_FILE_NAME, outputDir); - - // copy transformed vcf - copyResource(VCF_FILE_NAME, outputDir); - } - - @Before - public void setUp() throws Exception { - //re-initialize common config before each test - jobOptions.loadArgs(); - pipelineOptions = jobOptions.getPipelineOptions(); - variantOptions = jobOptions.getVariantOptions(); - } - } diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStepTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStepTest.java index f257bdc89..6bcffd05c 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStepTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/AnnotationLoaderStepTest.java @@ -26,27 +26,30 @@ import org.springframework.batch.core.BatchStatus; import org.springframework.batch.core.ExitStatus; import org.springframework.batch.core.JobExecution; +import org.springframework.batch.core.JobParameters; import org.springframework.batch.test.JobLauncherTestUtils; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.test.context.ActiveProfiles; import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.TestPropertySource; import org.springframework.test.context.junit4.SpringRunner; + +import uk.ac.ebi.eva.commons.models.converters.data.VariantToDBObjectConverter; import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.configuration.BeanNames; import uk.ac.ebi.eva.pipeline.jobs.AnnotationJob; -import uk.ac.ebi.eva.pipeline.parameters.JobOptions; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; import uk.ac.ebi.eva.test.data.VepOutputContent; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; +import uk.ac.ebi.eva.utils.EvaJobParameterBuilder; +import uk.ac.ebi.eva.utils.URLHelper; -import java.io.File; +import java.nio.file.Paths; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static uk.ac.ebi.eva.test.utils.TestFileUtils.getResourceUrl; -import static uk.ac.ebi.eva.test.utils.TestFileUtils.makeGzipFile; /** @@ -56,13 +59,9 @@ */ @RunWith(SpringRunner.class) @ActiveProfiles(Application.VARIANT_ANNOTATION_MONGO_PROFILE) -@TestPropertySource({"classpath:annotation-loader-step.properties"}) +@TestPropertySource({"classpath:common-configuration.properties", "classpath:test-mongo.properties"}) @ContextConfiguration(classes = {AnnotationJob.class, BatchTestConfiguration.class}) public class AnnotationLoaderStepTest { - // TODO vep Output must be passed as a job parameter to allow temporary files. Database name can't be changed to a - // random one. - - private static final String DATABASE_NAME = AnnotationLoaderStepTest.class.getSimpleName(); private static final String MONGO_DUMP = "/dump/VariantStatsConfigurationTest_vl"; @Rule @@ -73,29 +72,41 @@ public class AnnotationLoaderStepTest { @Autowired private JobLauncherTestUtils jobLauncherTestUtils; - @Autowired - private JobOptions jobOptions; @Test public void shouldLoadAllAnnotations() throws Exception { - setUp(); - - JobExecution jobExecution = jobLauncherTestUtils.launchStep(BeanNames.LOAD_VEP_ANNOTATION_STEP); + String annotationFolder = temporaryFolderRule.getRoot().getAbsolutePath(); + String dbName = mongoRule.restoreDumpInTemporaryDatabase(getResourceUrl(MONGO_DUMP)); + String collectionVariantsName = "variants"; + String studyId = "1"; + String fileId = "1"; + String vepOutput = URLHelper.resolveVepOutput(annotationFolder, studyId, fileId); + String vepOutputName = Paths.get(vepOutput).getFileName().toString(); + temporaryFolderRule.newGzipFile(VepOutputContent.vepOutputContent, vepOutputName); + + JobParameters jobParameters = new EvaJobParameterBuilder() + .collectionVariantsName(collectionVariantsName) + .databaseName(dbName) + .inputStudyId(studyId) + .inputVcfId(fileId) + .outputDirAnnotation(annotationFolder) + .toJobParameters(); + + JobExecution jobExecution = jobLauncherTestUtils.launchStep(BeanNames.LOAD_VEP_ANNOTATION_STEP, jobParameters); assertEquals(ExitStatus.COMPLETED, jobExecution.getExitStatus()); assertEquals(BatchStatus.COMPLETED, jobExecution.getStatus()); //check that documents have the annotation - DBCursor cursor = mongoRule.getCollection(jobOptions.getDbName(), jobOptions.getDbCollectionsVariantsName()) - .find(); + DBCursor cursor = mongoRule.getCollection(dbName, collectionVariantsName).find(); DBObjectToVariantAnnotationConverter converter = new DBObjectToVariantAnnotationConverter(); - int cnt = 0; + int count = 0; int consequenceTypeCount = 0; while (cursor.hasNext()) { - cnt++; - DBObject dbObject = (DBObject) cursor.next().get("annot"); + count++; + DBObject dbObject = (DBObject) cursor.next().get(VariantToDBObjectConverter.ANNOTATION_FIELD); if (dbObject != null) { VariantAnnotation annot = converter.convertToDataModelType(dbObject); Assert.assertNotNull(annot.getConsequenceTypes()); @@ -103,21 +114,8 @@ public void shouldLoadAllAnnotations() throws Exception { } } - assertEquals(300, cnt); + assertEquals(300, count); assertTrue("Annotations not found", consequenceTypeCount > 0); } - private void setUp() throws Exception { - jobOptions.loadArgs(); - jobOptions.setDbName(DATABASE_NAME); - - mongoRule.restoreDump(getResourceUrl(MONGO_DUMP), jobOptions.getDbName()); - - //TODO change for commented lines when vep output file can be passed as a job parameter - //File file = temporaryFolderRule.newGzipFile(VepOutputContent.vepOutputContent); - //jobOptions.setVepOutput(file.getAbsolutePath()); - File file = makeGzipFile(VepOutputContent.vepOutputContent, jobOptions.getVepOutput()); - - } - } diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/IndexesGeneratorStepTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/IndexesGeneratorStepTest.java index 5fc6dee25..0efa023e9 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/IndexesGeneratorStepTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/IndexesGeneratorStepTest.java @@ -18,24 +18,25 @@ import com.mongodb.BasicDBObject; import com.mongodb.DBCollection; import com.mongodb.DuplicateKeyException; -import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.runner.RunWith; import org.springframework.batch.core.BatchStatus; import org.springframework.batch.core.ExitStatus; import org.springframework.batch.core.JobExecution; +import org.springframework.batch.core.JobParameters; import org.springframework.batch.test.JobLauncherTestUtils; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.TestPropertySource; import org.springframework.test.context.junit4.SpringRunner; + import uk.ac.ebi.eva.pipeline.configuration.BeanNames; import uk.ac.ebi.eva.pipeline.jobs.DatabaseInitializationJob; import uk.ac.ebi.eva.pipeline.jobs.steps.tasklets.IndexesGeneratorStep; -import uk.ac.ebi.eva.pipeline.parameters.JobOptions; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; +import uk.ac.ebi.eva.utils.EvaJobParameterBuilder; import static org.junit.Assert.assertEquals; @@ -44,52 +45,55 @@ * Test {@link IndexesGeneratorStep} */ @RunWith(SpringRunner.class) -@TestPropertySource({"classpath:initialize-database.properties"}) +@TestPropertySource({"classpath:common-configuration.properties", "classpath:test-mongo.properties"}) @ContextConfiguration(classes = {DatabaseInitializationJob.class, BatchTestConfiguration.class}) public class IndexesGeneratorStepTest { + private static final String COLLECTION_FEATURES_NAME = "features"; + @Rule public TemporaryMongoRule mongoRule = new TemporaryMongoRule(); - @Autowired - public JobOptions jobOptions; - @Autowired private JobLauncherTestUtils jobLauncherTestUtils; - @Before - public void setUp() throws Exception { - jobOptions.loadArgs(); - } - @Test public void testIndexesAreCreated() throws Exception { - jobOptions.setDbName(mongoRule.getRandomTemporaryDatabaseName()); + String databaseName = mongoRule.getRandomTemporaryDatabaseName(); + JobParameters jobParameters = new EvaJobParameterBuilder() + .databaseName(databaseName) + .collectionFeaturesName(COLLECTION_FEATURES_NAME) + .toJobParameters(); - String dbCollectionGenesName = jobOptions.getDbCollectionsFeaturesName(); - JobExecution jobExecution = jobLauncherTestUtils.launchStep(BeanNames.CREATE_DATABASE_INDEXES_STEP); + JobExecution jobExecution = jobLauncherTestUtils.launchStep(BeanNames.CREATE_DATABASE_INDEXES_STEP, + jobParameters); assertEquals(ExitStatus.COMPLETED, jobExecution.getExitStatus()); assertEquals(BatchStatus.COMPLETED, jobExecution.getStatus()); - DBCollection genesCollection = mongoRule.getCollection(jobOptions.getDbName(), dbCollectionGenesName); + DBCollection genesCollection = mongoRule.getCollection(databaseName, COLLECTION_FEATURES_NAME); assertEquals("[{ \"v\" : 1 , \"key\" : { \"_id\" : 1} , \"name\" : \"_id_\" , \"ns\" : \"" + - jobOptions.getDbName() + "." + dbCollectionGenesName + + databaseName + "." + COLLECTION_FEATURES_NAME + "\"}, { \"v\" : 1 , \"key\" : { \"name\" : 1} , \"name\" : \"name_1\" , \"ns\" : \"" + - jobOptions.getDbName() + "." + dbCollectionGenesName + "\" , \"sparse\" : true , \"background\" : true}]", + databaseName + "." + COLLECTION_FEATURES_NAME + "\" , \"sparse\" : true , \"background\" : true}]", genesCollection.getIndexInfo().toString()); } @Test(expected = DuplicateKeyException.class) public void testNoDuplicatesCanBeInserted() throws Exception { - jobOptions.setDbName(mongoRule.getRandomTemporaryDatabaseName()); - String dbCollectionGenesName = jobOptions.getDbCollectionsFeaturesName(); - JobExecution jobExecution = jobLauncherTestUtils.launchStep(BeanNames.CREATE_DATABASE_INDEXES_STEP); + String databaseName = mongoRule.getRandomTemporaryDatabaseName(); + JobParameters jobParameters = new EvaJobParameterBuilder() + .databaseName(databaseName) + .collectionFeaturesName(COLLECTION_FEATURES_NAME) + .toJobParameters(); + + JobExecution jobExecution = jobLauncherTestUtils.launchStep(BeanNames.CREATE_DATABASE_INDEXES_STEP, + jobParameters); assertEquals(ExitStatus.COMPLETED, jobExecution.getExitStatus()); assertEquals(BatchStatus.COMPLETED, jobExecution.getStatus()); - DBCollection genesCollection = mongoRule.getCollection(jobOptions.getDbName(), dbCollectionGenesName); + DBCollection genesCollection = mongoRule.getCollection(databaseName, COLLECTION_FEATURES_NAME); genesCollection.insert(new BasicDBObject("_id", "example_id")); genesCollection.insert(new BasicDBObject("_id", "example_id")); } diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/LoadFileStepTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/LoadFileStepTest.java index cfaad7a1a..6650a08ef 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/LoadFileStepTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/LoadFileStepTest.java @@ -17,15 +17,13 @@ import com.mongodb.DBCollection; import com.mongodb.DBCursor; -import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.runner.RunWith; -import org.opencb.biodata.models.variant.VariantSource; -import org.opencb.biodata.models.variant.VariantStudy; import org.springframework.batch.core.BatchStatus; import org.springframework.batch.core.ExitStatus; import org.springframework.batch.core.JobExecution; +import org.springframework.batch.core.JobParameters; import org.springframework.batch.test.JobLauncherTestUtils; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.test.context.ActiveProfiles; @@ -36,32 +34,28 @@ import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.configuration.BeanNames; import uk.ac.ebi.eva.pipeline.jobs.GenotypedVcfJob; -import uk.ac.ebi.eva.pipeline.parameters.JobOptions; -import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; -import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; +import uk.ac.ebi.eva.utils.EvaJobParameterBuilder; import static org.junit.Assert.assertEquals; -import static org.opencb.opencga.storage.core.variant.VariantStorageManager.VARIANT_SOURCE; import static uk.ac.ebi.eva.test.utils.JobTestUtils.count; -import static uk.ac.ebi.eva.test.utils.TestFileUtils.getResource; +import static uk.ac.ebi.eva.utils.FileUtils.getResource; /** * Test for {@link LoadFileStep} */ @RunWith(SpringRunner.class) @ActiveProfiles({Application.VARIANT_WRITER_MONGO_PROFILE, Application.VARIANT_ANNOTATION_MONGO_PROFILE}) -@TestPropertySource({"classpath:genotyped-vcf.properties"}) +@TestPropertySource({"classpath:common-configuration.properties", "classpath:test-mongo.properties"}) @ContextConfiguration(classes = {GenotypedVcfJob.class, BatchTestConfiguration.class}) public class LoadFileStepTest { private static final int EXPECTED_FILES = 1; - private static final String SMALL_VCF_FILE = "/small20.vcf.gz"; + private static final String SMALL_VCF_FILE = "/input-files/vcf/genotyped.vcf.gz"; - @Rule - public PipelineTemporaryFolderRule temporaryFolderRule = new PipelineTemporaryFolderRule(); + private static final String COLLECTION_FILES_NAME = "files"; @Rule public TemporaryMongoRule mongoRule = new TemporaryMongoRule(); @@ -69,43 +63,32 @@ public class LoadFileStepTest { @Autowired private JobLauncherTestUtils jobLauncherTestUtils; - @Autowired - private JobOptions jobOptions; - - private String input; - @Test public void loaderStepShouldLoadAllFiles() throws Exception { - String outputDir = temporaryFolderRule.getRoot().getAbsolutePath(); - jobOptions.getPipelineOptions().put(JobParametersNames.OUTPUT_DIR, outputDir); + String input = getResource(SMALL_VCF_FILE).getAbsolutePath(); String databaseName = mongoRule.getRandomTemporaryDatabaseName(); - jobOptions.setDbName(databaseName); - jobOptions.getVariantOptions().put(VARIANT_SOURCE, new VariantSource( - input, - "1", - "1", - "studyName", - VariantStudy.StudyType.COLLECTION, - VariantSource.Aggregation.NONE)); + JobParameters jobParameters = new EvaJobParameterBuilder() + .collectionFilesName(COLLECTION_FILES_NAME) + .collectionVariantsName("variants") + .databaseName(databaseName) + .inputStudyId("1") + .inputVcf(input) + .inputVcfAggregation("NONE") + .inputVcfId("1") + .toJobParameters(); // When the execute method in variantsLoad is executed - JobExecution jobExecution = jobLauncherTestUtils.launchStep(BeanNames.LOAD_FILE_STEP); + JobExecution jobExecution = jobLauncherTestUtils.launchStep(BeanNames.LOAD_FILE_STEP, jobParameters); //Then variantsLoad step should complete correctly assertEquals(ExitStatus.COMPLETED, jobExecution.getExitStatus()); assertEquals(BatchStatus.COMPLETED, jobExecution.getStatus()); // And the number of documents in the DB should be equals to the number of VCF files loaded - DBCollection fileCollection = mongoRule.getCollection(databaseName, jobOptions.getDbCollectionsFilesName()); + DBCollection fileCollection = mongoRule.getCollection(databaseName, COLLECTION_FILES_NAME); DBCursor cursor = fileCollection.find(); assertEquals(EXPECTED_FILES, count(cursor)); } - @Before - public void setUp() throws Exception { - input = getResource(SMALL_VCF_FILE).getAbsolutePath(); - jobOptions.getPipelineOptions().put(JobParametersNames.INPUT_VCF, input); - } - } diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/PedLoaderStepTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/PedLoaderStepTest.java deleted file mode 100644 index 88bac2e99..000000000 --- a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/PedLoaderStepTest.java +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright 2016 EMBL - European Bioinformatics Institute - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package uk.ac.ebi.eva.pipeline.jobs.steps; - -import com.google.common.collect.Sets; -import org.junit.Before; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.opencb.biodata.models.pedigree.Condition; -import org.opencb.biodata.models.pedigree.Individual; -import org.opencb.biodata.models.pedigree.Pedigree; -import org.opencb.biodata.models.pedigree.Sex; -import org.springframework.batch.core.StepContribution; -import org.springframework.batch.core.StepExecution; -import org.springframework.batch.core.scope.context.ChunkContext; -import org.springframework.batch.repeat.RepeatStatus; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.test.context.ContextConfiguration; -import org.springframework.test.context.TestPropertySource; -import org.springframework.test.context.junit4.SpringRunner; -import org.springframework.test.util.ReflectionTestUtils; -import uk.ac.ebi.eva.pipeline.jobs.steps.tasklets.PedLoaderStep; -import uk.ac.ebi.eva.pipeline.parameters.JobOptions; -import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; -import uk.ac.ebi.eva.test.configuration.BaseTestConfiguration; - -import java.util.stream.Collectors; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; -import static uk.ac.ebi.eva.test.utils.TestFileUtils.getResource; - -/** - * Test for {@link PedLoaderStep} - *

- * Using reflections to inject jobOptions into the tasklet. There are other ways to do this but a been is required. - *

- * TODO: This could be simplified by using StepRunner. That requires using JobParametersinstead of our ownJobOptions - * http://docs.spring.io/spring-batch/apidocs/org/springframework/batch/test/StepRunner.html - */ -@RunWith(SpringRunner.class) -@TestPropertySource({"classpath:common-configuration.properties"}) -@ContextConfiguration(classes = {BaseTestConfiguration.class}) -public class PedLoaderStepTest { - - private static final String PEDIGREE_FILE = "/ped/pedigree-test-file.ped"; - private static final String MALFORMED_PEDIGREE = "/ped/malformed-pedigree-test-file.ped"; - - @Autowired - private JobOptions jobOptions; - - private PedLoaderStep pedLoaderStep; - private ChunkContext chunkContext; - private StepContribution stepContribution; - - @Test - public void allPedFileShouldBeParsedIntoPedigree() throws Exception { - jobOptions.getPipelineOptions().put(JobParametersNames.INPUT_PEDIGREE, getResource(PEDIGREE_FILE) - .getAbsolutePath()); - - ReflectionTestUtils.setField(pedLoaderStep, "jobOptions", jobOptions); - RepeatStatus status = pedLoaderStep.execute(stepContribution, chunkContext); - - Pedigree pedigree = pedLoaderStep.getPedigree(); - - assertTrue(status.equals(RepeatStatus.FINISHED)); - - //check that Pedigree.Individuals is correctly populated - assertEquals(4, pedigree.getIndividuals().size()); - Individual individualNA19660 = pedigree.getIndividuals().get("NA19660"); - assertTrue(individualNA19660.getFamily().equals("FAM")); - assertTrue(individualNA19660.getSex().equals("2")); - assertEquals(Sex.FEMALE, individualNA19660.getSexCode()); - assertTrue(individualNA19660.getPhenotype().equals("1")); - assertEquals(Condition.UNAFFECTED, individualNA19660.getCondition()); - assertEquals(2, individualNA19660.getChildren().size()); - assertEquals(Sets.newHashSet("NA19600", "NA19685"), - individualNA19660.getChildren().stream().map(Individual::getId).collect(Collectors.toSet())); - - //check that Pedigree.Families is correctly populated - assertEquals(1, pedigree.getFamilies().size()); - assertTrue(pedigree.getFamilies().containsKey("FAM")); - assertEquals(4, pedigree.getFamilies().get("FAM").size()); - } - - @Test(expected = IllegalArgumentException.class) - public void missingLastColumnInPedFileShouldThrowsException() throws Exception { - jobOptions.getPipelineOptions().put(JobParametersNames.INPUT_PEDIGREE, getResource(MALFORMED_PEDIGREE) - .getAbsolutePath()); - - ReflectionTestUtils.setField(pedLoaderStep, "jobOptions", jobOptions); - pedLoaderStep.execute(stepContribution, chunkContext); - } - - @Before - public void setUp() throws Exception { - pedLoaderStep = new PedLoaderStep(); - chunkContext = new ChunkContext(null); - stepContribution = new StepContribution(new StepExecution("PedLoader", null)); - - jobOptions.loadArgs(); - } - -} diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/PopulationStatisticsGeneratorStepTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/PopulationStatisticsGeneratorStepTest.java index 95b86fcad..db55ebc04 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/PopulationStatisticsGeneratorStepTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/PopulationStatisticsGeneratorStepTest.java @@ -15,50 +15,46 @@ */ package uk.ac.ebi.eva.pipeline.jobs.steps; -import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.runner.RunWith; -import org.opencb.biodata.models.variant.VariantSource; -import org.opencb.biodata.models.variant.VariantStudy; -import org.opencb.opencga.storage.core.variant.VariantStorageManager; import org.springframework.batch.core.BatchStatus; import org.springframework.batch.core.ExitStatus; import org.springframework.batch.core.JobExecution; +import org.springframework.batch.core.JobParameters; import org.springframework.batch.test.JobLauncherTestUtils; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.TestPropertySource; import org.springframework.test.context.junit4.SpringRunner; + import uk.ac.ebi.eva.pipeline.configuration.BeanNames; import uk.ac.ebi.eva.pipeline.jobs.PopulationStatisticsJob; import uk.ac.ebi.eva.pipeline.jobs.steps.tasklets.PopulationStatisticsGeneratorStep; -import uk.ac.ebi.eva.pipeline.parameters.JobOptions; -import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; +import uk.ac.ebi.eva.utils.EvaJobParameterBuilder; +import uk.ac.ebi.eva.utils.URLHelper; import java.io.File; import java.io.IOException; -import java.nio.file.Paths; +import java.net.URISyntaxException; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; -import static org.opencb.opencga.storage.core.variant.VariantStorageManager.VARIANT_SOURCE; import static uk.ac.ebi.eva.test.utils.TestFileUtils.getResourceUrl; /** * Test for {@link PopulationStatisticsGeneratorStep} */ @RunWith(SpringRunner.class) -@TestPropertySource({"classpath:common-configuration.properties"}) +@TestPropertySource({"classpath:common-configuration.properties", "classpath:test-mongo.properties"}) @ContextConfiguration(classes = {PopulationStatisticsJob.class, BatchTestConfiguration.class}) public class PopulationStatisticsGeneratorStepTest { + private static final String SMALL_VCF_FILE = "/input-files/vcf/genotyped.vcf.gz"; - private static final String SMALL_VCF_FILE = "/small20.vcf.gz"; - private static final String STATS_FILE_SUFFIX = ".variants.stats.json.gz"; private static final String MONGO_DUMP = "/dump/VariantStatsConfigurationTest_vl"; @Rule @@ -69,24 +65,31 @@ public class PopulationStatisticsGeneratorStepTest { @Autowired private JobLauncherTestUtils jobLauncherTestUtils; - @Autowired - private JobOptions jobOptions; @Test - public void statisticsGeneratorStepShouldCalculateStats() throws IOException, InterruptedException { + public void statisticsGeneratorStepShouldCalculateStats() throws IOException, InterruptedException, URISyntaxException { //Given a valid VCF input file - jobOptions.getPipelineOptions().put(JobParametersNames.INPUT_VCF, SMALL_VCF_FILE); - //and a valid variants load step already completed - mongoRule.restoreDump(getResourceUrl(MONGO_DUMP), jobOptions.getDbName()); - - VariantSource source = configureVariantSource(); - configureTempOutput(); - - File statsFile = getStatsFile(source); - assertFalse(statsFile.exists()); // ensure the stats file doesn't exist from previous executions + String databaseName = mongoRule.restoreDumpInTemporaryDatabase(getResourceUrl(MONGO_DUMP)); + String statsDir = temporaryFolderRule.newFolder().getAbsolutePath(); + String studyId = "1"; + String fileId = "1"; + + JobParameters jobParameters = new EvaJobParameterBuilder() + .databaseName(databaseName) + .inputVcf(SMALL_VCF_FILE) + .inputStudyId(studyId) + .inputVcfId(fileId) + .outputDirStats(statsDir) + .toJobParameters(); + + // and non-existent variants stats file and variantSource stats file + File statsFile = new File(URLHelper.getVariantsStatsUri(statsDir, studyId, fileId)); + assertFalse(statsFile.exists()); + File sourceStatsFile = new File(URLHelper.getSourceStatsUri(statsDir, studyId, fileId)); + assertFalse(sourceStatsFile.exists()); // When the execute method in variantsStatsCreate is executed - JobExecution jobExecution = jobLauncherTestUtils.launchStep(BeanNames.CALCULATE_STATISTICS_STEP); + JobExecution jobExecution = jobLauncherTestUtils.launchStep(BeanNames.CALCULATE_STATISTICS_STEP, jobParameters); //Then variantsStatsCreate step should complete correctly assertEquals(ExitStatus.COMPLETED, jobExecution.getExitStatus()); @@ -94,6 +97,7 @@ public void statisticsGeneratorStepShouldCalculateStats() throws IOException, In //and the file containing statistics should exist assertTrue(statsFile.exists()); + assertTrue(sourceStatsFile.exists()); } /** @@ -103,47 +107,27 @@ public void statisticsGeneratorStepShouldCalculateStats() throws IOException, In @Test public void statisticsGeneratorStepShouldFailIfVariantLoadStepIsNotCompleted() throws Exception { //Given a valid VCF input file - jobOptions.getPipelineOptions().put(JobParametersNames.INPUT_VCF, SMALL_VCF_FILE); - - VariantSource source = configureVariantSource(); - configureTempOutput(); - - File statsFile = getStatsFile(source); - assertFalse(statsFile.exists()); // ensure the stats file doesn't exist from previous executions + String databaseName = mongoRule.getRandomTemporaryDatabaseName(); + String statsDir = temporaryFolderRule.newFolder().getAbsolutePath(); + String wrongId = "non-existent-id"; + + JobParameters jobParameters = new EvaJobParameterBuilder() + .databaseName(databaseName) + .inputVcf(SMALL_VCF_FILE) + .inputStudyId(wrongId) + .inputVcfId(wrongId) + .outputDirStats(statsDir) + .toJobParameters(); + + // and non-existent variants stats file and variantSource stats file + File statsFile = new File(URLHelper.getVariantsStatsUri(statsDir, wrongId, wrongId)); + assertFalse(statsFile.exists()); + File sourceStatsFile = new File(URLHelper.getSourceStatsUri(statsDir, wrongId, wrongId)); + assertFalse(sourceStatsFile.exists()); // When the execute method in variantsStatsCreate is executed - JobExecution jobExecution = jobLauncherTestUtils.launchStep(BeanNames.CALCULATE_STATISTICS_STEP); + JobExecution jobExecution = jobLauncherTestUtils.launchStep(BeanNames.CALCULATE_STATISTICS_STEP, jobParameters); assertEquals(ExitStatus.FAILED.getExitCode(), jobExecution.getExitStatus().getExitCode()); } - private void configureTempOutput() throws IOException { - String tempFolder = temporaryFolderRule.newFolder().getAbsolutePath(); - jobOptions.getPipelineOptions().put(JobParametersNames.OUTPUT_DIR_STATISTICS, tempFolder); - } - - private VariantSource configureVariantSource() { - VariantSource source = new VariantSource( - SMALL_VCF_FILE, - "1", - "1", - "studyName", - VariantStudy.StudyType.COLLECTION, - VariantSource.Aggregation.NONE); - jobOptions.getVariantOptions().put(VARIANT_SOURCE, source); - return source; - } - - @Before - public void setUp() throws Exception { - jobOptions.loadArgs(); - jobOptions.setDbName(getClass().getSimpleName()); - } - - private File getStatsFile(VariantSource source) { - return new File( - Paths.get(jobOptions.getPipelineOptions().getString(JobParametersNames.OUTPUT_DIR_STATISTICS)) - .resolve(VariantStorageManager.buildFilename(source)) - + STATS_FILE_SUFFIX - ); - } } diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/PopulationStatisticsLoaderStepTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/PopulationStatisticsLoaderStepTest.java index 136831ea2..d2699e1a8 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/PopulationStatisticsLoaderStepTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/PopulationStatisticsLoaderStepTest.java @@ -1,60 +1,63 @@ package uk.ac.ebi.eva.pipeline.jobs.steps; -import org.junit.Before; +import com.mongodb.DBCursor; import org.junit.Rule; import org.junit.Test; import org.junit.runner.RunWith; -import org.opencb.biodata.models.variant.VariantSource; -import org.opencb.datastore.core.QueryOptions; +import org.opencb.biodata.models.variant.Variant; +import org.opencb.biodata.models.variant.stats.VariantStats; import org.opencb.opencga.storage.core.StorageManagerException; -import org.opencb.opencga.storage.core.StorageManagerFactory; import org.opencb.opencga.storage.core.variant.VariantStorageManager; -import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor; -import org.opencb.opencga.storage.core.variant.adaptors.VariantDBIterator; +import org.opencb.opencga.storage.mongodb.variant.DBObjectToVariantConverter; +import org.opencb.opencga.storage.mongodb.variant.DBObjectToVariantSourceEntryConverter; +import org.opencb.opencga.storage.mongodb.variant.DBObjectToVariantStatsConverter; import org.springframework.batch.core.BatchStatus; import org.springframework.batch.core.ExitStatus; import org.springframework.batch.core.JobExecution; import org.springframework.batch.core.JobExecutionException; +import org.springframework.batch.core.JobParameters; import org.springframework.batch.test.JobLauncherTestUtils; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.rule.OutputCapture; import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.TestPropertySource; import org.springframework.test.context.junit4.SpringRunner; + import uk.ac.ebi.eva.pipeline.configuration.BeanNames; import uk.ac.ebi.eva.pipeline.jobs.PopulationStatisticsJob; import uk.ac.ebi.eva.pipeline.jobs.steps.tasklets.PopulationStatisticsLoaderStep; -import uk.ac.ebi.eva.pipeline.parameters.JobOptions; -import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; +import uk.ac.ebi.eva.utils.EvaJobParameterBuilder; -import java.io.File; import java.io.IOException; +import java.util.Map; import static org.hamcrest.Matchers.containsString; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertThat; import static uk.ac.ebi.eva.test.utils.TestFileUtils.copyResource; -import static uk.ac.ebi.eva.test.utils.TestFileUtils.getResource; import static uk.ac.ebi.eva.test.utils.TestFileUtils.getResourceUrl; +import static uk.ac.ebi.eva.utils.FileUtils.getResource; /** * Test for {@link PopulationStatisticsLoaderStep} */ @RunWith(SpringRunner.class) -@TestPropertySource({"classpath:common-configuration.properties"}) +@TestPropertySource({"classpath:common-configuration.properties", "classpath:test-mongo.properties"}) @ContextConfiguration(classes = {PopulationStatisticsJob.class, BatchTestConfiguration.class}) public class PopulationStatisticsLoaderStepTest { - private static final String SMALL_VCF_FILE = "/small20.vcf.gz"; + private static final String SMALL_VCF_FILE = "/input-files/vcf/genotyped.vcf.gz"; private static final String MONGO_DUMP = "/dump/VariantStatsConfigurationTest_vl"; - private static final String SOURCE_FILE_NAME = "/1_1.source.stats.json.gz"; - private static final String VARIANTS_FILE_NAME = "/1_1.variants.stats.json.gz"; - private static final String VCF_FILE_NAME = "/small20.vcf.gz.variants.json.gz"; + private static final String SOURCE_FILE_NAME = "/input-files/statistics/1_1.source.stats.json.gz"; + private static final String VARIANTS_FILE_NAME = "/input-files/statistics/1_1.variants.stats.json.gz"; private static final String FILE_NOT_FOUND_EXCEPTION = "java.io.FileNotFoundException:"; + private static final String COLLECTION_FILES_NAME = "files"; + private static final String COLLECTION_VARIANTS_NAME = "variants"; + @Rule public PipelineTemporaryFolderRule temporaryFolderRule = new PipelineTemporaryFolderRule(); @@ -63,8 +66,6 @@ public class PopulationStatisticsLoaderStepTest { @Autowired private JobLauncherTestUtils jobLauncherTestUtils; - @Autowired - private JobOptions jobOptions; //Capture error output @Rule @@ -75,28 +76,46 @@ public void statisticsLoaderStepShouldLoadStatsIntoDb() throws StorageManagerExc ClassNotFoundException, InstantiationException, IOException, InterruptedException { //Given a valid VCF input file String input = getResource(SMALL_VCF_FILE).getAbsolutePath(); - VariantSource source = new VariantSource(input, "1", "1", "studyName"); - - jobOptions.getPipelineOptions().put(JobParametersNames.INPUT_VCF, input); - jobOptions.getVariantOptions().put(VariantStorageManager.VARIANT_SOURCE, source); + String fileId = "1"; + String studyId = "1"; + String dbName = mongoRule.restoreDumpInTemporaryDatabase(getResourceUrl(MONGO_DUMP)); + String statsDir = temporaryFolderRule.newFolder().getAbsolutePath(); + + JobParameters jobParameters = new EvaJobParameterBuilder() + .collectionFilesName(COLLECTION_FILES_NAME) + .collectionVariantsName(COLLECTION_VARIANTS_NAME) + .databaseName(dbName) + .inputStudyId(studyId) + .inputVcf(input) + .inputVcfId(fileId) + .outputDirStats(statsDir) + .toJobParameters(); //and a valid variants load and stats create steps already completed - jobOptions.setDbName(mongoRule.restoreDumpInTemporaryDatabase(getResourceUrl(MONGO_DUMP))); - - copyFilesToOutpurDir(createTempDirectoryForStatistics()); + copyFilesToOutpurDir(statsDir); // When the execute method in variantsStatsLoad is executed - JobExecution jobExecution = jobLauncherTestUtils.launchStep(BeanNames.LOAD_STATISTICS_STEP); + JobExecution jobExecution = jobLauncherTestUtils.launchStep(BeanNames.LOAD_STATISTICS_STEP, jobParameters); // Then variantsStatsLoad step should complete correctly assertEquals(ExitStatus.COMPLETED, jobExecution.getExitStatus()); assertEquals(BatchStatus.COMPLETED, jobExecution.getStatus()); // The DB docs should have the field "st" - VariantStorageManager variantStorageManager = StorageManagerFactory.getVariantStorageManager(); - VariantDBAdaptor variantDBAdaptor = variantStorageManager.getDBAdaptor(jobOptions.getDbName(), null); - VariantDBIterator iterator = variantDBAdaptor.iterator(new QueryOptions()); - assertEquals(1, iterator.next().getSourceEntries().values().iterator().next().getCohortStats().size()); + DBCursor cursor = mongoRule.getCollection(dbName, COLLECTION_VARIANTS_NAME).find(); + assertEquals(1, getCohortStatsFromFirstVariant(cursor).size()); + } + + private Map getCohortStatsFromFirstVariant(DBCursor cursor) { + DBObjectToVariantConverter variantConverter = getVariantConverter(); + Variant variant = variantConverter.convertToDataModelType(cursor.iterator().next()); + return variant.getSourceEntries().values().iterator().next().getCohortStats(); + } + + private DBObjectToVariantConverter getVariantConverter() { + return new DBObjectToVariantConverter( + new DBObjectToVariantSourceEntryConverter(VariantStorageManager.IncludeSrc.FIRST_8_COLUMNS), + new DBObjectToVariantStatsConverter()); } private void copyFilesToOutpurDir(String outputDir) throws IOException { @@ -104,36 +123,31 @@ private void copyFilesToOutpurDir(String outputDir) throws IOException { copyResource(VARIANTS_FILE_NAME, outputDir); // copy source file to load copyResource(SOURCE_FILE_NAME, outputDir); - // copy transformed vcf - copyResource(VCF_FILE_NAME, outputDir); - } - - private String createTempDirectoryForStatistics() { - File temporaryFolder = temporaryFolderRule.getRoot(); - jobOptions.getPipelineOptions().put(JobParametersNames.OUTPUT_DIR_STATISTICS, temporaryFolder); - String outputDir = temporaryFolder.getAbsolutePath(); - return outputDir; } @Test - public void statisticsLoaderStepShouldFaildBecauseVariantStatsFileIsMissing() throws JobExecutionException { + public void statisticsLoaderStepShouldFaildBecauseVariantStatsFileIsMissing() + throws JobExecutionException, IOException, InterruptedException { String input = getResource(SMALL_VCF_FILE).getAbsolutePath(); - VariantSource source = new VariantSource(input, "4", "1", "studyName"); - - jobOptions.setDbName(mongoRule.getRandomTemporaryDatabaseName()); - jobOptions.getPipelineOptions().put(JobParametersNames.INPUT_VCF, input); - jobOptions.getVariantOptions().put(VariantStorageManager.VARIANT_SOURCE, source); - - JobExecution jobExecution = jobLauncherTestUtils.launchStep(BeanNames.LOAD_STATISTICS_STEP); + String fileId = "1"; + String studyId = "1"; + String dbName = mongoRule.restoreDumpInTemporaryDatabase(getResourceUrl(MONGO_DUMP)); + String statsDir = temporaryFolderRule.newFolder().getAbsolutePath(); + + JobParameters jobParameters = new EvaJobParameterBuilder() + .collectionFilesName("files") + .collectionVariantsName("variants") + .databaseName(dbName) + .inputStudyId(studyId) + .inputVcf(input) + .inputVcfId(fileId) + .outputDirStats(statsDir) + .toJobParameters(); + + JobExecution jobExecution = jobLauncherTestUtils.launchStep(BeanNames.LOAD_STATISTICS_STEP, jobParameters); assertThat(capture.toString(), containsString(FILE_NOT_FOUND_EXCEPTION)); - assertEquals(input, jobOptions.getPipelineOptions().getString(JobParametersNames.INPUT_VCF)); assertEquals(ExitStatus.FAILED.getExitCode(), jobExecution.getExitStatus().getExitCode()); } - @Before - public void setUp() throws Exception { - jobOptions.loadArgs(); - } - } diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/VariantLoaderStepTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/VariantLoaderStepTest.java index 3c27b272f..3bceb2b7b 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/VariantLoaderStepTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/VariantLoaderStepTest.java @@ -19,8 +19,6 @@ import org.junit.Rule; import org.junit.Test; import org.junit.runner.RunWith; -import org.opencb.biodata.models.variant.VariantSource; -import org.opencb.biodata.models.variant.VariantStudy; import org.opencb.datastore.core.QueryOptions; import org.opencb.opencga.lib.common.Config; import org.opencb.opencga.storage.core.StorageManagerFactory; @@ -30,6 +28,7 @@ import org.springframework.batch.core.BatchStatus; import org.springframework.batch.core.ExitStatus; import org.springframework.batch.core.JobExecution; +import org.springframework.batch.core.JobParameters; import org.springframework.batch.test.JobLauncherTestUtils; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.test.context.ActiveProfiles; @@ -40,32 +39,26 @@ import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.configuration.BeanNames; import uk.ac.ebi.eva.pipeline.jobs.GenotypedVcfJob; -import uk.ac.ebi.eva.pipeline.parameters.JobOptions; -import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; -import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; +import uk.ac.ebi.eva.utils.EvaJobParameterBuilder; import static org.junit.Assert.assertEquals; -import static org.opencb.opencga.storage.core.variant.VariantStorageManager.VARIANT_SOURCE; import static uk.ac.ebi.eva.test.utils.JobTestUtils.count; -import static uk.ac.ebi.eva.test.utils.TestFileUtils.getResource; +import static uk.ac.ebi.eva.utils.FileUtils.getResource; /** * Test for {@link VariantLoaderStep} */ @RunWith(SpringRunner.class) @ActiveProfiles({Application.VARIANT_WRITER_MONGO_PROFILE, Application.VARIANT_ANNOTATION_MONGO_PROFILE}) -@TestPropertySource({"classpath:genotyped-vcf.properties"}) +@TestPropertySource({"classpath:common-configuration.properties", "classpath:test-mongo.properties"}) @ContextConfiguration(classes = {GenotypedVcfJob.class, BatchTestConfiguration.class}) public class VariantLoaderStepTest { private static final int EXPECTED_VARIANTS = 300; - private static final String SMALL_VCF_FILE = "/small20.vcf.gz"; - - @Rule - public PipelineTemporaryFolderRule temporaryFolderRule = new PipelineTemporaryFolderRule(); + private static final String SMALL_VCF_FILE = "/input-files/vcf/genotyped.vcf.gz"; @Rule public TemporaryMongoRule mongoRule = new TemporaryMongoRule(); @@ -73,9 +66,6 @@ public class VariantLoaderStepTest { @Autowired private JobLauncherTestUtils jobLauncherTestUtils; - @Autowired - private JobOptions jobOptions; - private String input; private static String opencgaHome = System.getenv("OPENCGA_HOME") != null ? System.getenv( @@ -83,23 +73,20 @@ public class VariantLoaderStepTest { @Test public void loaderStepShouldLoadAllVariants() throws Exception { - String outputDir = temporaryFolderRule.getRoot().getAbsolutePath(); - jobOptions.getPipelineOptions().put(JobParametersNames.OUTPUT_DIR, outputDir); - Config.setOpenCGAHome(opencgaHome); - String databaseName = mongoRule.getRandomTemporaryDatabaseName(); - jobOptions.setDbName(databaseName); - jobOptions.getVariantOptions().put(VARIANT_SOURCE, new VariantSource( - input, - "1", - "1", - "studyName", - VariantStudy.StudyType.COLLECTION, - VariantSource.Aggregation.NONE)); // When the execute method in variantsLoad is executed - JobExecution jobExecution = jobLauncherTestUtils.launchStep(BeanNames.LOAD_VARIANTS_STEP); + JobParameters jobParameters = new EvaJobParameterBuilder() + .collectionVariantsName("variants") + .databaseName(databaseName) + .inputStudyId("1") + .inputVcf(input) + .inputVcfAggregation("NONE") + .inputVcfId("1") + .toJobParameters(); + + JobExecution jobExecution = jobLauncherTestUtils.launchStep(BeanNames.LOAD_VARIANTS_STEP, jobParameters); //Then variantsLoad step should complete correctly assertEquals(ExitStatus.COMPLETED, jobExecution.getExitStatus()); @@ -116,7 +103,6 @@ public void loaderStepShouldLoadAllVariants() throws Exception { @Before public void setUp() throws Exception { input = getResource(SMALL_VCF_FILE).getAbsolutePath(); - jobOptions.getPipelineOptions().put(JobParametersNames.INPUT_VCF, input); } } diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/VepAnnotationGeneratorStepTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/VepAnnotationGeneratorStepTest.java index 2f84629c0..0faa9e526 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/VepAnnotationGeneratorStepTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/VepAnnotationGeneratorStepTest.java @@ -1,5 +1,5 @@ /* - * Copyright 2016 EMBL - European Bioinformatics Institute + * Copyright 2016-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,14 +15,13 @@ */ package uk.ac.ebi.eva.pipeline.jobs.steps; -import org.junit.Assert; -import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.runner.RunWith; import org.springframework.batch.core.BatchStatus; import org.springframework.batch.core.ExitStatus; import org.springframework.batch.core.JobExecution; +import org.springframework.batch.core.JobParameters; import org.springframework.batch.test.JobLauncherTestUtils; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.test.context.ActiveProfiles; @@ -33,10 +32,11 @@ import uk.ac.ebi.eva.pipeline.configuration.BeanNames; import uk.ac.ebi.eva.pipeline.jobs.AnnotationJob; import uk.ac.ebi.eva.pipeline.jobs.steps.tasklets.VepAnnotationGeneratorStep; -import uk.ac.ebi.eva.pipeline.parameters.JobOptions; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; import uk.ac.ebi.eva.test.utils.JobTestUtils; +import uk.ac.ebi.eva.utils.EvaJobParameterBuilder; +import uk.ac.ebi.eva.utils.URLHelper; import java.io.File; import java.io.FileInputStream; @@ -44,49 +44,58 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; -import static uk.ac.ebi.eva.test.utils.TestFileUtils.getResource; +import static uk.ac.ebi.eva.utils.FileUtils.getResource; /** * Test for {@link VepAnnotationGeneratorStep} */ @RunWith(SpringRunner.class) @ActiveProfiles(Application.VARIANT_ANNOTATION_MONGO_PROFILE) -@TestPropertySource("classpath:annotation.properties") +@TestPropertySource("classpath:common-configuration.properties") @ContextConfiguration(classes = {AnnotationJob.class, BatchTestConfiguration.class}) public class VepAnnotationGeneratorStepTest { - private static final String VEP_INPUT_CONTENT = "20\t60343\t60343\tG/A\t+"; private static final String MOCKVEP = "/mockvep.pl"; + + private static final String STUDY_ID = "7"; + + private static final String FILE_ID = "5"; + @Rule public PipelineTemporaryFolderRule temporaryFolderRule = new PipelineTemporaryFolderRule(); @Autowired private JobLauncherTestUtils jobLauncherTestUtils; - @Autowired - private JobOptions jobOptions; - - @Before - public void setUp() throws Exception { - jobOptions.loadArgs(); - jobOptions.setAppVepPath(getResource(MOCKVEP)); - } @Test public void shouldGenerateVepAnnotations() throws Exception { - jobOptions.setVepInputFile(temporaryFolderRule.newGzipFile(VEP_INPUT_CONTENT).getAbsolutePath()); - File vepOutputFile = temporaryFolderRule.newFile(); - jobOptions.setVepOutput(vepOutputFile.getAbsolutePath()); + File vepOutputFolder = temporaryFolderRule.newFolder(); + + JobParameters jobParameters = new EvaJobParameterBuilder() + .inputFasta("") + .inputStudyId(STUDY_ID) + .inputVcfId(FILE_ID) + .outputDirAnnotation(vepOutputFolder.getAbsolutePath()) + .vepCachePath("") + .vepCacheSpecies("") + .vepCacheVersion("") + .vepNumForks("") + .vepPath(getResource(MOCKVEP).getPath()) + .toJobParameters(); // When the execute method in variantsAnnotCreate is executed - JobExecution jobExecution = jobLauncherTestUtils.launchStep(BeanNames.GENERATE_VEP_ANNOTATION_STEP); + JobExecution jobExecution = jobLauncherTestUtils + .launchStep(BeanNames.GENERATE_VEP_ANNOTATION_STEP, jobParameters); //Then variantsAnnotCreate step should complete correctly assertEquals(ExitStatus.COMPLETED, jobExecution.getExitStatus()); assertEquals(BatchStatus.COMPLETED, jobExecution.getStatus()); // And VEP output should exist and annotations should be in the file + File vepOutputFile = new File(URLHelper.resolveVepOutput(vepOutputFolder.getAbsolutePath(), STUDY_ID, FILE_ID)); + assertTrue(vepOutputFile.exists()); - Assert.assertEquals(537, JobTestUtils.getLines(new GZIPInputStream(new FileInputStream(vepOutputFile)))); + assertEquals(537, JobTestUtils.getLines(new GZIPInputStream(new FileInputStream(vepOutputFile)))); } } diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/VepInputGeneratorStepTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/VepInputGeneratorStepTest.java index fa1d8cdf2..43e1862df 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/VepInputGeneratorStepTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/VepInputGeneratorStepTest.java @@ -1,5 +1,5 @@ /* - * Copyright 2016 EMBL - European Bioinformatics Institute + * Copyright 2016-2017 EMBL - European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,28 +15,32 @@ */ package uk.ac.ebi.eva.pipeline.jobs.steps; -import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.runner.RunWith; import org.springframework.batch.core.BatchStatus; import org.springframework.batch.core.ExitStatus; import org.springframework.batch.core.JobExecution; +import org.springframework.batch.core.JobParameters; import org.springframework.batch.test.JobLauncherTestUtils; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.test.context.ActiveProfiles; import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.TestPropertySource; import org.springframework.test.context.junit4.SpringRunner; + +import uk.ac.ebi.eva.pipeline.Application; import uk.ac.ebi.eva.pipeline.configuration.BeanNames; import uk.ac.ebi.eva.pipeline.jobs.AnnotationJob; -import uk.ac.ebi.eva.pipeline.parameters.JobOptions; import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; +import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; +import uk.ac.ebi.eva.utils.EvaJobParameterBuilder; +import uk.ac.ebi.eva.utils.URLHelper; import java.io.File; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import static uk.ac.ebi.eva.test.utils.JobTestUtils.readFirstLine; import static uk.ac.ebi.eva.test.utils.TestFileUtils.getResourceUrl; @@ -45,42 +49,49 @@ * Test {@link VepInputGeneratorStep} */ @RunWith(SpringRunner.class) -@TestPropertySource({"classpath:vep-input-generator-step.properties"}) +@TestPropertySource({"classpath:common-configuration.properties", "classpath:test-mongo.properties"}) +@ActiveProfiles(Application.VARIANT_ANNOTATION_MONGO_PROFILE) @ContextConfiguration(classes = {AnnotationJob.class, BatchTestConfiguration.class}) public class VepInputGeneratorStepTest { private static final String MONGO_DUMP = "/dump/VariantStatsConfigurationTest_vl"; + + private static final String STUDY_ID = "7"; + + private static final String FILE_ID = "5"; + + private static final String COLLECTION_VARIANTS_NAME = "variants"; + @Rule public TemporaryMongoRule mongoRule = new TemporaryMongoRule(); + @Rule + public PipelineTemporaryFolderRule temporaryFolderRule = new PipelineTemporaryFolderRule(); + @Autowired private JobLauncherTestUtils jobLauncherTestUtils; - @Autowired - private JobOptions jobOptions; - - @Before - public void setUp() throws Exception { - jobOptions.loadArgs(); - } @Test public void shouldGenerateVepInput() throws Exception { - // TODO This test can't be changed to use temporary directory right now, as vepInput is a composite parameter - // that is not being recalculated at execution time. - mongoRule.restoreDump(getResourceUrl(MONGO_DUMP), jobOptions.getDbName()); - File vepInputFile = new File(jobOptions.getVepInput()); - - if (vepInputFile.exists()) - vepInputFile.delete(); + String randomTemporaryDatabaseName = mongoRule.restoreDumpInTemporaryDatabase(getResourceUrl(MONGO_DUMP)); + String outputDirAnnot = temporaryFolderRule.getRoot().getAbsolutePath(); + File vepInput = new File(URLHelper.resolveVepInput(outputDirAnnot, STUDY_ID, FILE_ID)); + temporaryFolderRule.newFile(vepInput.getName()); - assertFalse(vepInputFile.exists()); + JobParameters jobParameters = new EvaJobParameterBuilder() + .collectionVariantsName(COLLECTION_VARIANTS_NAME) + .databaseName(randomTemporaryDatabaseName) + .inputStudyId(STUDY_ID) + .inputVcfId(FILE_ID) + .outputDirAnnotation(outputDirAnnot) + .toJobParameters(); - JobExecution jobExecution = jobLauncherTestUtils.launchStep(BeanNames.GENERATE_VEP_INPUT_STEP); + JobExecution jobExecution = jobLauncherTestUtils.launchStep(BeanNames.GENERATE_VEP_INPUT_STEP, jobParameters); assertEquals(ExitStatus.COMPLETED, jobExecution.getExitStatus()); assertEquals(BatchStatus.COMPLETED, jobExecution.getStatus()); - assertTrue(vepInputFile.exists()); - assertEquals("20\t60343\t60343\tG/A\t+", readFirstLine(vepInputFile)); + assertTrue(vepInput.exists()); + assertEquals("20\t60343\t60343\tG/A\t+", readFirstLine(vepInput)); } } diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/processor/GeneFilterProcessorTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/processor/GeneFilterProcessorTest.java index 5b3977d51..777b48201 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/processor/GeneFilterProcessorTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/jobs/steps/processor/GeneFilterProcessorTest.java @@ -45,7 +45,6 @@ public void shouldKeepGenesAndTranscripts() throws Exception { ExecutionContext executionContext = MetaDataInstanceFactory.createStepExecution().getExecutionContext(); GeneFilterProcessor geneFilterProcessor = new GeneFilterProcessor(); - //simulate VEP output file File file = temporaryFolderRule.newGzipFile(GtfStaticTestData.GTF_CONTENT); GeneReader geneReader = new GeneReader(file); diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/model/converters/data/VariantToMongoDbObjectConverterTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/model/converters/data/VariantToMongoDbObjectConverterTest.java index 4f81c92e7..7ce1d63d1 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/model/converters/data/VariantToMongoDbObjectConverterTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/model/converters/data/VariantToMongoDbObjectConverterTest.java @@ -33,7 +33,6 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; /** * Test {@link VariantToMongoDbObjectConverter} @@ -52,7 +51,7 @@ public class VariantToMongoDbObjectConverterTest { @Test(expected = IllegalArgumentException.class) public void convertNullVariantShouldThrowAnException() { - variantToMongoDbObjectConverter = new VariantToMongoDbObjectConverter(false, false, true, includeSrc); + variantToMongoDbObjectConverter = new VariantToMongoDbObjectConverter(false, true); variantToMongoDbObjectConverter.convert(null); } @@ -66,7 +65,7 @@ public void allFieldsOfVariantShouldBeConverted() { Variant variant = buildVariant(chromosome, start, end, reference, alternate, fileId, studyId); - variantToMongoDbObjectConverter = new VariantToMongoDbObjectConverter(false, false, true, includeSrc); + variantToMongoDbObjectConverter = new VariantToMongoDbObjectConverter(false, true); conversionService.addConverter(variantToMongoDbObjectConverter); DBObject dbObject = conversionService.convert(variant, DBObject.class); @@ -90,7 +89,7 @@ public void allFieldsOfVariantShouldBeConverted() { public void includeStatsTrueShouldIncludeStatistics() { Variant variant = buildVariant("12", 3, 4, "A", "T", fileId, studyId); - variantToMongoDbObjectConverter = new VariantToMongoDbObjectConverter(true, true, false, includeSrc); + variantToMongoDbObjectConverter = new VariantToMongoDbObjectConverter(true, false); conversionService.addConverter(variantToMongoDbObjectConverter); DBObject dbObject = conversionService.convert(variant, DBObject.class); @@ -105,7 +104,7 @@ public void includeStatsTrueShouldIncludeStatistics() { public void includeStatsFalseShouldNotIncludeStatistics() { Variant variant = buildVariant("12", 3, 4, "A", "T", fileId, studyId); - variantToMongoDbObjectConverter = new VariantToMongoDbObjectConverter(false, false, true, includeSrc); + variantToMongoDbObjectConverter = new VariantToMongoDbObjectConverter(false, true); conversionService.addConverter(variantToMongoDbObjectConverter); DBObject dbObject = conversionService.convert(variant, DBObject.class); @@ -121,7 +120,7 @@ public void idsIfPresentShouldBeWrittenIntoTheVariant() { Variant variant = buildVariant("12", 3, 4, "A", "T", fileId, studyId); variant.setIds(Sets.newHashSet("a", "b", "c")); - variantToMongoDbObjectConverter = new VariantToMongoDbObjectConverter(false, false, true, includeSrc); + variantToMongoDbObjectConverter = new VariantToMongoDbObjectConverter(false, true); conversionService.addConverter(variantToMongoDbObjectConverter); DBObject dbObject = conversionService.convert(variant, DBObject.class); @@ -136,7 +135,7 @@ public void idsIfPresentShouldBeWrittenIntoTheVariant() { public void idsIfNotPresentShouldNotBeWrittenIntoTheVariant() { Variant variant = buildVariant("12", 3, 4, "A", "T", fileId, studyId); - variantToMongoDbObjectConverter = new VariantToMongoDbObjectConverter(false, false, true, includeSrc); + variantToMongoDbObjectConverter = new VariantToMongoDbObjectConverter(false, true); conversionService.addConverter(variantToMongoDbObjectConverter); DBObject dbObject = conversionService.convert(variant, DBObject.class); diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/DbCollectionsFeaturesNameValidatorTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/DbCollectionsFeaturesNameValidatorTest.java new file mode 100644 index 000000000..bc6c46af6 --- /dev/null +++ b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/DbCollectionsFeaturesNameValidatorTest.java @@ -0,0 +1,63 @@ +/* + * Copyright 2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.pipeline.parameters.validation; + +import org.junit.Before; +import org.junit.Test; +import org.springframework.batch.core.JobParametersBuilder; +import org.springframework.batch.core.JobParametersInvalidException; + +import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; + +public class DbCollectionsFeaturesNameValidatorTest { + + private DbCollectionsFeaturesNameValidator validator; + + private JobParametersBuilder jobParametersBuilder; + + @Before + public void setUp() throws Exception { + validator = new DbCollectionsFeaturesNameValidator(); + } + + @Test + public void collectionsFeaturesNameIsValid() throws JobParametersInvalidException { + jobParametersBuilder = new JobParametersBuilder(); + jobParametersBuilder.addString(JobParametersNames.DB_COLLECTIONS_FEATURES_NAME, "collectionsFeaturesName"); + validator.validate(jobParametersBuilder.toJobParameters()); + } + + @Test(expected = JobParametersInvalidException.class) + public void collectionsFeaturesNameIsEmpty() throws JobParametersInvalidException { + jobParametersBuilder = new JobParametersBuilder(); + jobParametersBuilder.addString(JobParametersNames.DB_COLLECTIONS_FEATURES_NAME, ""); + validator.validate(jobParametersBuilder.toJobParameters()); + } + + @Test(expected = JobParametersInvalidException.class) + public void collectionsFeaturesNameIsWhitespace() throws JobParametersInvalidException { + jobParametersBuilder = new JobParametersBuilder(); + jobParametersBuilder.addString(JobParametersNames.DB_COLLECTIONS_FEATURES_NAME, " "); + validator.validate(jobParametersBuilder.toJobParameters()); + } + + @Test(expected = JobParametersInvalidException.class) + public void collectionsFeaturesNameIsNull() throws JobParametersInvalidException { + jobParametersBuilder = new JobParametersBuilder(); + jobParametersBuilder.addString(JobParametersNames.DB_COLLECTIONS_FEATURES_NAME, null); + validator.validate(jobParametersBuilder.toJobParameters()); + } +} diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/InputGtfValidatorTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/InputGtfValidatorTest.java new file mode 100644 index 000000000..6dcfc6d95 --- /dev/null +++ b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/InputGtfValidatorTest.java @@ -0,0 +1,76 @@ +/* + * Copyright 2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.pipeline.parameters.validation; + +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.springframework.batch.core.JobParametersBuilder; +import org.springframework.batch.core.JobParametersInvalidException; + +import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; +import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; + +import java.io.File; +import java.io.IOException; + +public class InputGtfValidatorTest { + + private InputGtfValidator validator; + + private JobParametersBuilder jobParametersBuilder; + + @Rule + public PipelineTemporaryFolderRule temporaryFolder = new PipelineTemporaryFolderRule(); + + @Before + public void setUp() throws Exception { + validator = new InputGtfValidator(); + } + + @Test + public void inputGtfIsValid() throws JobParametersInvalidException, IOException { + jobParametersBuilder = new JobParametersBuilder(); + jobParametersBuilder.addString(JobParametersNames.INPUT_GTF, + temporaryFolder.newFile().getCanonicalPath()); + validator.validate(jobParametersBuilder.toJobParameters()); + } + + @Test(expected = JobParametersInvalidException.class) + public void inputGtfNotExist() throws JobParametersInvalidException { + jobParametersBuilder = new JobParametersBuilder(); + jobParametersBuilder.addString(JobParametersNames.INPUT_GTF, "file://path/to/file.vcf"); + validator.validate(jobParametersBuilder.toJobParameters()); + } + + @Test(expected = JobParametersInvalidException.class) + public void inputGtfNotReadable() throws JobParametersInvalidException, IOException { + File file = temporaryFolder.newFile("not_readable.fa"); + file.setReadable(false); + + jobParametersBuilder = new JobParametersBuilder(); + jobParametersBuilder.addString(JobParametersNames.INPUT_GTF, file.getCanonicalPath()); + validator.validate(jobParametersBuilder.toJobParameters()); + } + + @Test(expected = JobParametersInvalidException.class) + public void inputGtfIsADirectory() throws JobParametersInvalidException, IOException { + jobParametersBuilder = new JobParametersBuilder(); + jobParametersBuilder.addString(JobParametersNames.INPUT_GTF, + temporaryFolder.getRoot().getCanonicalPath()); + validator.validate(jobParametersBuilder.toJobParameters()); + } +} diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/InputVcfAggregationMappingPathValidatorTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/InputVcfAggregationMappingPathValidatorTest.java new file mode 100644 index 000000000..9d609ac8f --- /dev/null +++ b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/InputVcfAggregationMappingPathValidatorTest.java @@ -0,0 +1,76 @@ +/* + * Copyright 2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.pipeline.parameters.validation; + +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.springframework.batch.core.JobParametersBuilder; +import org.springframework.batch.core.JobParametersInvalidException; +import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; +import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; + +import java.io.File; +import java.io.IOException; + +public class InputVcfAggregationMappingPathValidatorTest { + + private InputVcfAggregationMappingPathValidator validator; + + private JobParametersBuilder jobParametersBuilder; + + @Rule + public PipelineTemporaryFolderRule temporaryFolder = new PipelineTemporaryFolderRule(); + + @Before + public void setUp() throws Exception { + validator = new InputVcfAggregationMappingPathValidator(); + } + + @Test + public void inputVcfAggregationMappingPathIsValid() throws JobParametersInvalidException, IOException { + jobParametersBuilder = new JobParametersBuilder(); + jobParametersBuilder.addString(JobParametersNames.INPUT_VCF_AGGREGATION_MAPPING_PATH, + temporaryFolder.newFile().getCanonicalPath()); + validator.validate(jobParametersBuilder.toJobParameters()); + } + + @Test(expected = JobParametersInvalidException.class) + public void inputVcfAggregationMappingPathNotExist() throws JobParametersInvalidException { + jobParametersBuilder = new JobParametersBuilder(); + jobParametersBuilder.addString(JobParametersNames.INPUT_VCF_AGGREGATION_MAPPING_PATH, "file://path/to/file"); + validator.validate(jobParametersBuilder.toJobParameters()); + } + + @Test(expected = JobParametersInvalidException.class) + public void inputVcfAggregationMappingPathNotReadable() throws JobParametersInvalidException, IOException { + File file = temporaryFolder.newFile("not_readable"); + file.setReadable(false); + + jobParametersBuilder = new JobParametersBuilder(); + jobParametersBuilder.addString(JobParametersNames.INPUT_VCF_AGGREGATION_MAPPING_PATH, file.getCanonicalPath()); + validator.validate(jobParametersBuilder.toJobParameters()); + } + + @Test(expected = JobParametersInvalidException.class) + public void inputVcfAggregationMappingPathIsADirectory() throws JobParametersInvalidException, IOException { + jobParametersBuilder = new JobParametersBuilder(); + jobParametersBuilder.addString(JobParametersNames.INPUT_VCF_AGGREGATION_MAPPING_PATH, + temporaryFolder.getRoot().getCanonicalPath()); + validator.validate(jobParametersBuilder.toJobParameters()); + } + +} diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/ParametersValidatorUtilTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/ParametersValidatorUtilTest.java index d1e6614ba..bdacfd70d 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/ParametersValidatorUtilTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/ParametersValidatorUtilTest.java @@ -36,22 +36,78 @@ public class ParametersValidatorUtilTest { @Test public void validString() throws JobParametersInvalidException { - ParametersValidatorUtil.checkIsNotNullOrEmptyString("any string", JOB_PARAMETER_NAME); + ParametersValidatorUtil.checkIsValidString("any string", JOB_PARAMETER_NAME); } @Test(expected = JobParametersInvalidException.class) - public void stringIsEmpty() throws JobParametersInvalidException { - ParametersValidatorUtil.checkIsNotNullOrEmptyString("", JOB_PARAMETER_NAME); + public void stringIsNull() throws JobParametersInvalidException { + ParametersValidatorUtil.checkIsNotNullString(null, JOB_PARAMETER_NAME); } @Test(expected = JobParametersInvalidException.class) - public void stringIsAWhitespace() throws JobParametersInvalidException { - ParametersValidatorUtil.checkIsNotNullOrEmptyString(" ", JOB_PARAMETER_NAME); + public void stringWithNonPrintableCharacter() throws JobParametersInvalidException { + ParametersValidatorUtil.checkDoesNotContainPrintableCharacters("R\0al", JOB_PARAMETER_NAME); + } + + @Test + public void stringWithAccentCharacter() throws JobParametersInvalidException { + ParametersValidatorUtil.checkDoesNotContainPrintableCharacters("Réal", JOB_PARAMETER_NAME); + } + + @Test + public void stringWithDieresisCharacter() throws JobParametersInvalidException { + ParametersValidatorUtil.checkDoesNotContainPrintableCharacters("RÜal", JOB_PARAMETER_NAME); + } + + @Test + public void stringWithTildeCharacter() throws JobParametersInvalidException { + ParametersValidatorUtil.checkDoesNotContainPrintableCharacters("R Ã al", JOB_PARAMETER_NAME); + } + + @Test + public void stringWithStandardLineSeparator() throws JobParametersInvalidException { + ParametersValidatorUtil + .checkDoesNotContainPrintableCharacters("1000 Genomes Phase 3 \n Version 5", JOB_PARAMETER_NAME); + } + + @Test + public void stringWithOsXLineSeparator() throws JobParametersInvalidException { + ParametersValidatorUtil + .checkDoesNotContainPrintableCharacters("1000 Genomes Phase 3 \r Version 5", JOB_PARAMETER_NAME); + } + + @Test + public void stringWithWindowsLineSeparator() throws JobParametersInvalidException { + ParametersValidatorUtil + .checkDoesNotContainPrintableCharacters("1000 Genomes Phase 3 \r\n Version 5", JOB_PARAMETER_NAME); + } + + @Test + public void stringWithAllPrintableCharacters() throws JobParametersInvalidException { + ParametersValidatorUtil + .checkDoesNotContainPrintableCharacters("1000 Genomes Phase 3 Version 5", JOB_PARAMETER_NAME); + } + + @Test + public void stringSmallerThan250Characters() throws JobParametersInvalidException { + ParametersValidatorUtil.checkLength("1000 Genomes Phase 3 Version 5", JOB_PARAMETER_NAME); } @Test(expected = JobParametersInvalidException.class) - public void stringIsNull() throws JobParametersInvalidException { - ParametersValidatorUtil.checkIsNotNullOrEmptyString(null, JOB_PARAMETER_NAME); + public void stringBiggerThan250Characters() throws JobParametersInvalidException { + ParametersValidatorUtil.checkLength( + "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur.", + JOB_PARAMETER_NAME); + } + + @Test(expected = JobParametersInvalidException.class) + public void stringIsEmpty() throws JobParametersInvalidException { + ParametersValidatorUtil.checkLength("", JOB_PARAMETER_NAME); + } + + @Test(expected = JobParametersInvalidException.class) + public void stringIsAWhitespace() throws JobParametersInvalidException { + ParametersValidatorUtil.checkLength(" ", JOB_PARAMETER_NAME); } diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/GenotypedVcfJobParametersValidatorTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/GenotypedVcfJobParametersValidatorTest.java index 77e5cdc6b..278f36f01 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/GenotypedVcfJobParametersValidatorTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/job/GenotypedVcfJobParametersValidatorTest.java @@ -29,7 +29,7 @@ import java.util.TreeMap; /** - * Tests that the arguments necessary to run a {@link uk.ac.ebi.eva.pipeline.jobs.AggregatedVcfJob} are + * Tests that the arguments necessary to run a {@link uk.ac.ebi.eva.pipeline.jobs.GenotypedVcfJob} are * correctly validated */ public class GenotypedVcfJobParametersValidatorTest { diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/AnnotationLoaderStepParametersValidatorTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/AnnotationLoaderStepParametersValidatorTest.java index b38a569f3..fc39a8197 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/AnnotationLoaderStepParametersValidatorTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/AnnotationLoaderStepParametersValidatorTest.java @@ -53,6 +53,8 @@ public void setUp() throws Exception { new JobParameter("dbCollectionsVariantName")); requiredParameters.put(JobParametersNames.DB_NAME, new JobParameter("dbName")); requiredParameters.put(JobParametersNames.OUTPUT_DIR_ANNOTATION, new JobParameter(dir)); + requiredParameters.put(JobParametersNames.INPUT_VCF_ID, new JobParameter("fid")); + requiredParameters.put(JobParametersNames.INPUT_STUDY_ID, new JobParameter("sid")); optionalParameters = new TreeMap<>(); optionalParameters.put(JobParametersNames.CONFIG_CHUNK_SIZE, new JobParameter("100")); @@ -89,4 +91,16 @@ public void outputDirAnnotationIsRequired() throws JobParametersInvalidException requiredParameters.remove(JobParametersNames.OUTPUT_DIR_ANNOTATION); validator.validate(new JobParameters(requiredParameters)); } + + @Test(expected = JobParametersInvalidException.class) + public void inputVcfIdIsRequired() throws JobParametersInvalidException, IOException { + requiredParameters.remove(JobParametersNames.INPUT_VCF_ID); + validator.validate(new JobParameters(requiredParameters)); + } + + @Test(expected = JobParametersInvalidException.class) + public void inputStudyIdIsRequired() throws JobParametersInvalidException, IOException { + requiredParameters.remove(JobParametersNames.INPUT_STUDY_ID); + validator.validate(new JobParameters(requiredParameters)); + } } diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/GeneLoaderStepParametersValidatorTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/GeneLoaderStepParametersValidatorTest.java new file mode 100644 index 000000000..be1318de9 --- /dev/null +++ b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/GeneLoaderStepParametersValidatorTest.java @@ -0,0 +1,93 @@ +/* + * Copyright 2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.pipeline.parameters.validation.step; + +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.springframework.batch.core.JobParameter; +import org.springframework.batch.core.JobParameters; +import org.springframework.batch.core.JobParametersInvalidException; + +import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; +import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; + +import java.io.IOException; +import java.util.Map; +import java.util.TreeMap; + +/** + * Tests that the arguments necessary to run a {@link uk.ac.ebi.eva.pipeline.jobs.steps.AnnotationLoaderStep} are + * correctly validated + */ +public class GeneLoaderStepParametersValidatorTest { + private GeneLoaderStepParametersValidator validator; + + @Rule + public PipelineTemporaryFolderRule temporaryFolderRule = new PipelineTemporaryFolderRule(); + + private Map requiredParameters; + + private Map optionalParameters; + + @Before + public void setUp() throws Exception { + validator = new GeneLoaderStepParametersValidator(); + + requiredParameters = new TreeMap<>(); + requiredParameters.put(JobParametersNames.DB_NAME, new JobParameter("dbName")); + requiredParameters.put(JobParametersNames.DB_COLLECTIONS_FEATURES_NAME, + new JobParameter("dbCollectionsFeaturesName")); + requiredParameters.put(JobParametersNames.INPUT_GTF, + new JobParameter(temporaryFolderRule.newFile().getCanonicalPath())); + + optionalParameters = new TreeMap<>(); + optionalParameters.put(JobParametersNames.CONFIG_CHUNK_SIZE, new JobParameter("100")); + optionalParameters.put(JobParametersNames.CONFIG_RESTARTABILITY_ALLOW, new JobParameter("true")); + } + + @Test + public void allJobParametersAreValid() throws JobParametersInvalidException, IOException { + validator.validate(new JobParameters(requiredParameters)); + } + + @Test + public void allJobParametersIncludingOptionalAreValid() throws JobParametersInvalidException, IOException { + Map parameters = new TreeMap<>(); + parameters.putAll(requiredParameters); + parameters.putAll(optionalParameters); + validator.validate(new JobParameters(parameters)); + } + + @Test(expected = JobParametersInvalidException.class) + public void dbCollectionsFeaturesNameIsRequired() throws JobParametersInvalidException, IOException { + requiredParameters.remove(JobParametersNames.DB_COLLECTIONS_FEATURES_NAME); + validator.validate(new JobParameters(requiredParameters)); + } + + @Test(expected = JobParametersInvalidException.class) + public void dbNameIsRequired() throws JobParametersInvalidException, IOException { + requiredParameters.remove(JobParametersNames.DB_NAME); + validator.validate(new JobParameters(requiredParameters)); + } + + @Test(expected = JobParametersInvalidException.class) + public void inputGtfIsRequired() throws JobParametersInvalidException, IOException { + requiredParameters.remove(JobParametersNames.INPUT_GTF); + validator.validate(new JobParameters(requiredParameters)); + } + +} diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/IndexesGeneratorStepParametersValidatorTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/IndexesGeneratorStepParametersValidatorTest.java new file mode 100644 index 000000000..fe0ad7865 --- /dev/null +++ b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/IndexesGeneratorStepParametersValidatorTest.java @@ -0,0 +1,79 @@ +/* + * Copyright 2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.pipeline.parameters.validation.step; + +import org.junit.Before; +import org.junit.Test; +import org.springframework.batch.core.JobParameter; +import org.springframework.batch.core.JobParameters; +import org.springframework.batch.core.JobParametersInvalidException; + +import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; + +import java.io.IOException; +import java.util.Map; +import java.util.TreeMap; + +/** + * Tests that the arguments necessary to run a {@link uk.ac.ebi.eva.pipeline.jobs.steps.AnnotationLoaderStep} are + * correctly validated + */ +public class IndexesGeneratorStepParametersValidatorTest { + private IndexesGeneratorStepParametersValidator validator; + + private Map requiredParameters; + + private Map optionalParameters; + + @Before + public void setUp() throws Exception { + validator = new IndexesGeneratorStepParametersValidator(); + + requiredParameters = new TreeMap<>(); + requiredParameters.put(JobParametersNames.DB_NAME, new JobParameter("dbName")); + requiredParameters.put(JobParametersNames.DB_COLLECTIONS_FEATURES_NAME, + new JobParameter("dbCollectionsFeaturesName")); + + optionalParameters = new TreeMap<>(); + optionalParameters.put(JobParametersNames.CONFIG_RESTARTABILITY_ALLOW, new JobParameter("true")); + } + + @Test + public void allJobParametersAreValid() throws JobParametersInvalidException, IOException { + validator.validate(new JobParameters(requiredParameters)); + } + + @Test + public void allJobParametersIncludingOptionalAreValid() throws JobParametersInvalidException, IOException { + Map parameters = new TreeMap<>(); + parameters.putAll(requiredParameters); + parameters.putAll(optionalParameters); + validator.validate(new JobParameters(parameters)); + } + + @Test(expected = JobParametersInvalidException.class) + public void dbCollectionsFeaturesNameIsRequired() throws JobParametersInvalidException, IOException { + requiredParameters.remove(JobParametersNames.DB_COLLECTIONS_FEATURES_NAME); + validator.validate(new JobParameters(requiredParameters)); + } + + @Test(expected = JobParametersInvalidException.class) + public void dbNameIsRequired() throws JobParametersInvalidException, IOException { + requiredParameters.remove(JobParametersNames.DB_NAME); + validator.validate(new JobParameters(requiredParameters)); + } + +} diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/VariantLoaderStepParametersValidatorTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/VariantLoaderStepParametersValidatorTest.java index 7c51958aa..1f9ba1db3 100644 --- a/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/VariantLoaderStepParametersValidatorTest.java +++ b/src/test/java/uk/ac/ebi/eva/pipeline/parameters/validation/step/VariantLoaderStepParametersValidatorTest.java @@ -57,6 +57,8 @@ public void setUp() throws IOException { requiredParameters.put(JobParametersNames.INPUT_VCF_AGGREGATION, new JobParameter("NONE")); requiredParameters.put(JobParametersNames.INPUT_VCF, new JobParameter(temporaryFolderRule.newFile().getCanonicalPath())); + requiredParameters.put(JobParametersNames.INPUT_VCF_AGGREGATION_MAPPING_PATH, + new JobParameter(temporaryFolderRule.newFile().getCanonicalPath())); optionalParameters = new TreeMap<>(); optionalParameters.put(JobParametersNames.CONFIG_CHUNK_SIZE, new JobParameter("100")); diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/runner/EvaPipelineJobLauncherCommandLineRunnerTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/runner/EvaPipelineJobLauncherCommandLineRunnerTest.java new file mode 100644 index 000000000..193c040bb --- /dev/null +++ b/src/test/java/uk/ac/ebi/eva/pipeline/runner/EvaPipelineJobLauncherCommandLineRunnerTest.java @@ -0,0 +1,262 @@ +/* + * Copyright 2015-2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.pipeline.runner; + +import org.junit.Rule; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.opencb.opencga.storage.core.StorageManagerException; +import org.springframework.batch.core.BatchStatus; +import org.springframework.batch.core.ExitStatus; +import org.springframework.batch.core.JobExecution; +import org.springframework.batch.core.JobExecutionException; +import org.springframework.batch.core.explore.JobExplorer; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.boot.test.rule.OutputCapture; +import org.springframework.test.context.ActiveProfiles; +import org.springframework.test.context.TestPropertySource; +import org.springframework.test.context.junit4.SpringRunner; + +import uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule; +import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; +import uk.ac.ebi.eva.test.utils.GenotypedVcfJobTestUtils; +import uk.ac.ebi.eva.utils.EvaCommandLineBuilder; + +import java.io.File; +import java.io.IOException; +import java.net.URISyntaxException; + +import static org.hamcrest.core.StringContains.containsString; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertThat; +import static uk.ac.ebi.eva.pipeline.runner.EvaPipelineJobLauncherCommandLineRunner.SPRING_BATCH_JOB_NAME_PROPERTY; +import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.GENOTYPED_VCF_JOB; +import static uk.ac.ebi.eva.utils.FileUtils.getResource; + +/** + * This suit of tests checks the behaviour of the EvaPipelineJobLauncherCommandLineRunner and launches a full execution of the + * genotype vcf test. + */ +@RunWith(SpringRunner.class) +@SpringBootTest() +@ActiveProfiles({"test,mongo"}) +@TestPropertySource(value = {"classpath:test-mongo.properties"}, properties = "debug=true") +public class EvaPipelineJobLauncherCommandLineRunnerTest { + + private static final String GENOTYPED_PROPERTIES_FILE = "/genotype-test.properties"; + private static final String NO_JOB_NAME_HAS_BEEN_PROVIDED = "No job name has been provided"; + private static final String NO_JOB_PARAMETERS_HAVE_BEEN_PROVIDED = "No job parameters have been provided"; + + @Autowired + private JobExplorer jobExplorer; + + @Autowired + private EvaPipelineJobLauncherCommandLineRunner evaPipelineJobLauncherCommandLineRunner; + + @Rule + public OutputCapture capture = new OutputCapture(); + + @Rule + public TemporaryMongoRule mongoRule = new TemporaryMongoRule(); + + @Rule + public PipelineTemporaryFolderRule temporaryFolderRule = new PipelineTemporaryFolderRule(); + + @Test + public void noJobParametersHaveBeenProvided() throws JobExecutionException { + evaPipelineJobLauncherCommandLineRunner.run(); + assertThat(capture.toString(), containsString(NO_JOB_PARAMETERS_HAVE_BEEN_PROVIDED)); + } + + @Test + public void jobProvidedButNoParameters() throws JobExecutionException { + evaPipelineJobLauncherCommandLineRunner.setJobNames(GENOTYPED_VCF_JOB); + evaPipelineJobLauncherCommandLineRunner.run("--" + SPRING_BATCH_JOB_NAME_PROPERTY + "=" + GENOTYPED_VCF_JOB); + assertThat(capture.toString(), containsString(NO_JOB_PARAMETERS_HAVE_BEEN_PROVIDED)); + } + + @Test + public void noJobNameProvidedAndAParameter() throws JobExecutionException { + evaPipelineJobLauncherCommandLineRunner.run("--dummy=true"); + assertThat(capture.toString(), containsString(NO_JOB_NAME_HAS_BEEN_PROVIDED)); + } + + @Test + public void genotypedVcfJobTest() throws JobExecutionException, IOException, URISyntaxException, + ClassNotFoundException, StorageManagerException, InstantiationException, IllegalAccessException { + String databaseName = mongoRule.getRandomTemporaryDatabaseName(); + File inputFile = GenotypedVcfJobTestUtils.getInputFile(); + String outputDirStats = temporaryFolderRule.newFolder().getAbsolutePath(); + String outputDirAnnotation = temporaryFolderRule.newFolder().getAbsolutePath(); + + File variantsStatsFile = GenotypedVcfJobTestUtils.getVariantsStatsFile(outputDirStats); + File sourceStatsFile = GenotypedVcfJobTestUtils.getSourceStatsFile(outputDirStats); + + File vepInputFile = GenotypedVcfJobTestUtils.getVepInputFile(outputDirAnnotation); + File vepOutputFile = GenotypedVcfJobTestUtils.getVepOutputFile(outputDirAnnotation); + + File fasta = temporaryFolderRule.newFile(); + + evaPipelineJobLauncherCommandLineRunner.setJobNames(GENOTYPED_VCF_JOB); + evaPipelineJobLauncherCommandLineRunner.run(new EvaCommandLineBuilder() + .inputVcf(inputFile.getAbsolutePath()) + .inputVcfId(GenotypedVcfJobTestUtils.INPUT_VCF_ID) + .inputVcfAggregation("NONE") + .inputStudyName("small vcf") + .inputStudyId(GenotypedVcfJobTestUtils.INPUT_STUDY_ID) + .inputStudyType("COLLECTION") + .outputDirAnnotation(outputDirAnnotation) + .outputDirStatistics(outputDirStats) + .databaseName(databaseName) + .appVepPath(GenotypedVcfJobTestUtils.getMockVep().getPath()) + .vepCachePath("") + .vepCacheSpecies("human") + .vepCacheVersion("1") + .vepNumForks("1") + .inputFasta(fasta.getAbsolutePath()) + .configDbReadPreference("secondary") + .dbCollectionsVariantsName("variants") + .dbCollectionsFilesName("files") + .dbCollectionsFeaturesName("features") + .dbCollectionsStatisticsName("populationStatistics").build() + ); + + assertEquals(EvaPipelineJobLauncherCommandLineRunner.EXIT_WITHOUT_ERRORS, + evaPipelineJobLauncherCommandLineRunner.getExitCode()); + + assertFalse(jobExplorer.getJobInstances(GENOTYPED_VCF_JOB, 0, 1).isEmpty()); + JobExecution jobExecution = jobExplorer.getJobExecution(jobExplorer.getJobInstances(GENOTYPED_VCF_JOB, 0, 1) + .get(0).getInstanceId()); + + assertEquals(ExitStatus.COMPLETED, jobExecution.getExitStatus()); + assertEquals(BatchStatus.COMPLETED, jobExecution.getStatus()); + + assertEquals(ExitStatus.COMPLETED, jobExecution.getExitStatus()); + assertEquals(BatchStatus.COMPLETED, jobExecution.getStatus()); + + GenotypedVcfJobTestUtils.checkLoadStep(databaseName); + + GenotypedVcfJobTestUtils.checkCreateStatsStep(variantsStatsFile, sourceStatsFile); + + GenotypedVcfJobTestUtils.checkLoadStatsStep(databaseName); + + GenotypedVcfJobTestUtils.checkAnnotationInput(vepInputFile); + + GenotypedVcfJobTestUtils.checkAnnotationCreateStep(vepInputFile, vepOutputFile); + + GenotypedVcfJobTestUtils.checkOutputFileLength(vepOutputFile); + + GenotypedVcfJobTestUtils.checkLoadedAnnotation(databaseName); + + GenotypedVcfJobTestUtils.checkSkippedOneMalformedLine(jobExecution); + } + + @Test + public void genotypedVcfJobTestWithParametersFileAndCommandLineParameters() throws JobExecutionException, + IOException, URISyntaxException, ClassNotFoundException, StorageManagerException, InstantiationException, + IllegalAccessException { + + String databaseName = mongoRule.getRandomTemporaryDatabaseName(); + File inputFile = GenotypedVcfJobTestUtils.getInputFile(); + String outputDirStats = temporaryFolderRule.newFolder().getAbsolutePath(); + String outputDirAnnotation = temporaryFolderRule.newFolder().getAbsolutePath(); + + File variantsStatsFile = GenotypedVcfJobTestUtils.getVariantsStatsFile(outputDirStats); + File sourceStatsFile = GenotypedVcfJobTestUtils.getSourceStatsFile(outputDirStats); + + File vepInputFile = GenotypedVcfJobTestUtils.getVepInputFile(outputDirAnnotation); + File vepOutputFile = GenotypedVcfJobTestUtils.getVepOutputFile(outputDirAnnotation); + + File fasta = temporaryFolderRule.newFile(); + + //Set properties file to read + evaPipelineJobLauncherCommandLineRunner.setPropertyFilePath( + getResource(GENOTYPED_PROPERTIES_FILE).getAbsolutePath()); + + evaPipelineJobLauncherCommandLineRunner.run(new EvaCommandLineBuilder() + .inputVcf(inputFile.getAbsolutePath()) + .inputVcfId(GenotypedVcfJobTestUtils.INPUT_VCF_ID) + .inputStudyId(GenotypedVcfJobTestUtils.INPUT_STUDY_ID) + .outputDirAnnotation(outputDirAnnotation) + .outputDirStatistics(outputDirStats) + .databaseName(databaseName) + .appVepPath(GenotypedVcfJobTestUtils.getMockVep().getPath()) + .inputFasta(fasta.getAbsolutePath()) + .build() + ); + + assertEquals(EvaPipelineJobLauncherCommandLineRunner.EXIT_WITHOUT_ERRORS, + evaPipelineJobLauncherCommandLineRunner.getExitCode()); + + assertFalse(jobExplorer.getJobInstances(GENOTYPED_VCF_JOB, 0, 1).isEmpty()); + JobExecution jobExecution = jobExplorer.getJobExecution(jobExplorer.getJobInstances(GENOTYPED_VCF_JOB, 0, 1) + .get(0).getInstanceId()); + + assertEquals(ExitStatus.COMPLETED, jobExecution.getExitStatus()); + assertEquals(BatchStatus.COMPLETED, jobExecution.getStatus()); + + assertEquals(ExitStatus.COMPLETED, jobExecution.getExitStatus()); + assertEquals(BatchStatus.COMPLETED, jobExecution.getStatus()); + + GenotypedVcfJobTestUtils.checkLoadStep(databaseName); + + GenotypedVcfJobTestUtils.checkCreateStatsStep(variantsStatsFile, sourceStatsFile); + + GenotypedVcfJobTestUtils.checkLoadStatsStep(databaseName); + + GenotypedVcfJobTestUtils.checkAnnotationInput(vepInputFile); + + GenotypedVcfJobTestUtils.checkAnnotationCreateStep(vepInputFile, vepOutputFile); + + GenotypedVcfJobTestUtils.checkOutputFileLength(vepOutputFile); + + GenotypedVcfJobTestUtils.checkLoadedAnnotation(databaseName); + + GenotypedVcfJobTestUtils.checkSkippedOneMalformedLine(jobExecution); + } + + @Test + public void onlyFileWithoutParametersFailsValidation() throws JobExecutionException, IOException, + URISyntaxException, + ClassNotFoundException, StorageManagerException, InstantiationException, IllegalAccessException { + String databaseName = mongoRule.getRandomTemporaryDatabaseName(); + File inputFile = GenotypedVcfJobTestUtils.getInputFile(); + String outputDirStats = temporaryFolderRule.newFolder().getAbsolutePath(); + String outputDirAnnotation = temporaryFolderRule.newFolder().getAbsolutePath(); + + File variantsStatsFile = GenotypedVcfJobTestUtils.getVariantsStatsFile(outputDirStats); + File sourceStatsFile = GenotypedVcfJobTestUtils.getSourceStatsFile(outputDirStats); + + File vepInputFile = GenotypedVcfJobTestUtils.getVepInputFile(outputDirAnnotation); + File vepOutputFile = GenotypedVcfJobTestUtils.getVepOutputFile(outputDirAnnotation); + + File fasta = temporaryFolderRule.newFile(); + + //Set properties file to read + evaPipelineJobLauncherCommandLineRunner.setPropertyFilePath( + getResource(GENOTYPED_PROPERTIES_FILE).getAbsolutePath()); + + evaPipelineJobLauncherCommandLineRunner.setJobNames(GENOTYPED_VCF_JOB); + evaPipelineJobLauncherCommandLineRunner.run(new EvaCommandLineBuilder() + .build() + ); + + assertEquals(EvaPipelineJobLauncherCommandLineRunner.EXIT_WITH_ERRORS, + evaPipelineJobLauncherCommandLineRunner.getExitCode()); + } +} diff --git a/src/test/java/uk/ac/ebi/eva/runner/JobRestartAsynchronousTest.java b/src/test/java/uk/ac/ebi/eva/runner/JobRestartAsynchronousTest.java new file mode 100644 index 000000000..becce0c4a --- /dev/null +++ b/src/test/java/uk/ac/ebi/eva/runner/JobRestartAsynchronousTest.java @@ -0,0 +1,110 @@ +/* + * Copyright 2015-2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.runner; + +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.springframework.batch.core.Job; +import org.springframework.batch.core.JobExecution; +import org.springframework.batch.core.JobParameters; +import org.springframework.batch.core.launch.JobOperator; +import org.springframework.batch.core.repository.JobExecutionAlreadyRunningException; +import org.springframework.batch.test.JobLauncherTestUtils; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.test.annotation.DirtiesContext; +import org.springframework.test.context.ContextConfiguration; +import org.springframework.test.context.junit4.SpringRunner; +import uk.ac.ebi.eva.test.configuration.AsynchronousBatchTestConfiguration; +import uk.ac.ebi.eva.test.utils.AbstractJobRestartUtils; + +/** + * Test to check launcher behaviour in Asynchronous cases. + */ +@RunWith(SpringRunner.class) +@ContextConfiguration(classes = {AsynchronousBatchTestConfiguration.class}) +@DirtiesContext(classMode = DirtiesContext.ClassMode.AFTER_EACH_TEST_METHOD) +public class JobRestartAsynchronousTest extends AbstractJobRestartUtils { + + // Wait until the job has been launched properly. The launch operation is not transactional, and other + // instances of the same job with the same parameter can throw exceptions in this interval. + public static final int INITIALIZE_JOB_SLEEP = 100; + public static final int STEP_TIME_DURATION = 1000; + public static final int WAIT_FOR_JOB_TO_END = 2000; + + @Autowired + private JobOperator jobOperator; + + @Test(expected = JobExecutionAlreadyRunningException.class) + public void runSameJobWhileExecutingThrowsException() throws Exception { + JobLauncherTestUtils jobLauncherTestUtils = getJobLauncherTestUtils(getTestJob( + getWaitingStep(false, STEP_TIME_DURATION))); + launchJob(jobLauncherTestUtils); + launchJob(jobLauncherTestUtils); + Thread.sleep(WAIT_FOR_JOB_TO_END); + } + + @Test(expected = JobExecutionAlreadyRunningException.class) + public void cantRunSecondJobEvenIfFirstIsStopped() throws Exception { + JobLauncherTestUtils jobLauncherTestUtils = getJobLauncherTestUtils(getTestJob( + getWaitingStep(false, STEP_TIME_DURATION))); + JobExecution jobExecution = launchJob(jobLauncherTestUtils); + + jobOperator.stop(jobExecution.getJobId()); + jobLauncherTestUtils.launchJob(new JobParameters()); + Thread.sleep(WAIT_FOR_JOB_TO_END); + } + + @Test + public void jobStoppedAndAbandonedCanBeStarted() throws Exception { + JobLauncherTestUtils jobLauncherTestUtils = getJobLauncherTestUtils(getTestJob( + getWaitingStep(false, STEP_TIME_DURATION))); + JobExecution jobExecution = launchJob(jobLauncherTestUtils); + jobOperator.stop(jobExecution.getJobId()); + jobOperator.abandon(jobExecution.getJobId()); + jobLauncherTestUtils.launchJob(new JobParameters()); + Thread.sleep(WAIT_FOR_JOB_TO_END); + } + + @Test + public void abandonedJobsRequireStepsMarkedAsRestartable() throws Exception { + Job job = getTestJob(getQuickStep(false), getWaitingStep(false, STEP_TIME_DURATION)); + JobLauncherTestUtils jobLauncherTestUtils = getJobLauncherTestUtils(job); + JobExecution jobExecution = launchJob(jobLauncherTestUtils); + Thread.sleep(INITIALIZE_JOB_SLEEP); + jobOperator.stop(jobExecution.getJobId()); + jobOperator.abandon(jobExecution.getJobId()); + Thread.sleep(WAIT_FOR_JOB_TO_END); + jobExecution = launchJob(jobLauncherTestUtils); + Thread.sleep(WAIT_FOR_JOB_TO_END); + Assert.assertTrue(jobExecution.getStepExecutions().isEmpty()); + } + + @Test + public void abandonJobsMarkedAsRestartableRestartAllSteps() throws Exception { + Job job = getTestJob(getQuickStep(true), getWaitingStep(true, STEP_TIME_DURATION)); + JobLauncherTestUtils jobLauncherTestUtils = getJobLauncherTestUtils(job); + JobExecution jobExecution = launchJob(jobLauncherTestUtils); + Thread.sleep(INITIALIZE_JOB_SLEEP); + jobOperator.stop(jobExecution.getJobId()); + jobOperator.abandon(jobExecution.getJobId()); + Thread.sleep(WAIT_FOR_JOB_TO_END); + jobExecution = launchJob(jobLauncherTestUtils); + Thread.sleep(WAIT_FOR_JOB_TO_END); + Assert.assertFalse(jobExecution.getStepExecutions().isEmpty()); + } + +} diff --git a/src/test/java/uk/ac/ebi/eva/runner/JobRestartForceTest.java b/src/test/java/uk/ac/ebi/eva/runner/JobRestartForceTest.java new file mode 100644 index 000000000..0c096d800 --- /dev/null +++ b/src/test/java/uk/ac/ebi/eva/runner/JobRestartForceTest.java @@ -0,0 +1,65 @@ +/* + * Copyright 2015-2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.runner; + +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.springframework.batch.core.Job; +import org.springframework.batch.core.JobExecution; +import org.springframework.batch.core.JobParameters; +import org.springframework.batch.core.launch.JobOperator; +import org.springframework.batch.test.JobLauncherTestUtils; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.test.annotation.DirtiesContext; +import org.springframework.test.context.ContextConfiguration; +import org.springframework.test.context.junit4.SpringRunner; +import uk.ac.ebi.eva.pipeline.runner.ManageJobsUtils; +import uk.ac.ebi.eva.test.configuration.AsynchronousBatchTestConfiguration; +import uk.ac.ebi.eva.test.utils.AbstractJobRestartUtils; + +/** + * Test to check if the ManageJobUtils.markLastJobAsFailed let us restart a job redoing all the steps. + */ +@RunWith(SpringRunner.class) +@ContextConfiguration(classes = {AsynchronousBatchTestConfiguration.class}) +@DirtiesContext(classMode = DirtiesContext.ClassMode.AFTER_EACH_TEST_METHOD) +public class JobRestartForceTest extends AbstractJobRestartUtils { + + // Wait until the job has been launched properly. The launch operation is not transactional, and other + // instances of the same job with the same parameter can throw exceptions in this interval. + public static final int INITIALIZE_JOB_SLEEP = 100; + public static final int STEP_TIME_DURATION = 1000; + public static final int WAIT_FOR_JOB_TO_END = 2000; + + @Autowired + private JobOperator jobOperator; + + @Test + public void forceJobFailureEnsuresCleanRunEvenIfStepsNotRestartables() throws Exception { + Job job = getTestJob(getQuickStep(false), getWaitingStep(false, STEP_TIME_DURATION)); + JobLauncherTestUtils jobLauncherTestUtils = getJobLauncherTestUtils(job); + JobExecution jobExecution = launchJob(jobLauncherTestUtils); + Thread.sleep(INITIALIZE_JOB_SLEEP); + jobOperator.stop(jobExecution.getJobId()); + Thread.sleep(WAIT_FOR_JOB_TO_END); + ManageJobsUtils.markLastJobAsFailed(getJobRepository(), job.getName(), new JobParameters()); + jobExecution = launchJob(jobLauncherTestUtils); + Thread.sleep(WAIT_FOR_JOB_TO_END); + Assert.assertFalse(jobExecution.getStepExecutions().isEmpty()); + } + +} \ No newline at end of file diff --git a/src/test/java/uk/ac/ebi/eva/runner/JobRestartSynchronousTest.java b/src/test/java/uk/ac/ebi/eva/runner/JobRestartSynchronousTest.java new file mode 100644 index 000000000..9f92b256b --- /dev/null +++ b/src/test/java/uk/ac/ebi/eva/runner/JobRestartSynchronousTest.java @@ -0,0 +1,64 @@ +/* + * Copyright 2015-2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.runner; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.springframework.batch.core.JobParameters; +import org.springframework.batch.core.Step; +import org.springframework.batch.core.StepContribution; +import org.springframework.batch.core.scope.context.ChunkContext; +import org.springframework.batch.core.step.tasklet.Tasklet; +import org.springframework.batch.repeat.RepeatStatus; +import org.springframework.batch.test.JobLauncherTestUtils; +import org.springframework.test.context.ContextConfiguration; +import org.springframework.test.context.junit4.SpringRunner; +import uk.ac.ebi.eva.test.configuration.SynchronousBatchTestConfiguration; +import uk.ac.ebi.eva.test.utils.AbstractJobRestartUtils; + +import java.util.UUID; + +/** + * Test to check launcher behaviour in synchronous cases. + */ +@RunWith(SpringRunner.class) +@ContextConfiguration(classes = {SynchronousBatchTestConfiguration.class}) +public class JobRestartSynchronousTest extends AbstractJobRestartUtils { + + @Test + public void runCompleteJobTwiceWithSameParameters() throws Exception { + JobLauncherTestUtils jobLauncherTestUtils = getJobLauncherTestUtils(getTestJob(getQuickStep(false))); + jobLauncherTestUtils.launchJob(new JobParameters()); + jobLauncherTestUtils.launchJob(new JobParameters()); + } + + @Test + public void runFailedJobTwiceWithSameParameters() throws Exception { + JobLauncherTestUtils jobLauncherTestUtils = getJobLauncherTestUtils(getTestJob(getTestExceptionStep())); + jobLauncherTestUtils.launchJob(new JobParameters()); + jobLauncherTestUtils.launchJob(new JobParameters()); + } + + private Step getTestExceptionStep() { + return getStepBuilderFactory().get(UUID.randomUUID().toString()).tasklet(new Tasklet() { + @Override + public RepeatStatus execute(StepContribution contribution, ChunkContext chunkContext) throws Exception { + throw new RuntimeException("THIS IS A TEST EXCEPTION"); + } + }).build(); + } + +} diff --git a/src/test/java/uk/ac/ebi/eva/test/configuration/AsynchronousBatchTestConfiguration.java b/src/test/java/uk/ac/ebi/eva/test/configuration/AsynchronousBatchTestConfiguration.java new file mode 100644 index 000000000..b0710e1b8 --- /dev/null +++ b/src/test/java/uk/ac/ebi/eva/test/configuration/AsynchronousBatchTestConfiguration.java @@ -0,0 +1,78 @@ +/* + * Copyright 2015-2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.test.configuration; + +import org.springframework.batch.core.configuration.JobRegistry; +import org.springframework.batch.core.configuration.StepRegistry; +import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing; +import org.springframework.batch.core.configuration.support.MapStepRegistry; +import org.springframework.batch.core.explore.JobExplorer; +import org.springframework.batch.core.launch.JobLauncher; +import org.springframework.batch.core.launch.JobOperator; +import org.springframework.batch.core.launch.support.SimpleJobLauncher; +import org.springframework.batch.core.launch.support.SimpleJobOperator; +import org.springframework.batch.core.repository.JobRepository; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor; + +/** + * Specific configuration to use a spring batch context with an asynchronous thread pool executor. The standard base + * batch configuration injects a synchronous task executor. + */ +@Configuration +@EnableBatchProcessing +public class AsynchronousBatchTestConfiguration { + + @Autowired + private JobRepository jobRepository; + + @Autowired + private JobExplorer jobExplorer; + + @Autowired + private JobRegistry jobRegistry; + + @Bean + public JobOperator jobOperator() { + SimpleJobOperator jobOperator = new SimpleJobOperator(); + jobOperator.setJobRepository(jobRepository); + jobOperator.setJobLauncher(jobLauncher()); + jobOperator.setJobExplorer(jobExplorer); + jobOperator.setJobRegistry(jobRegistry); + return jobOperator; + } + + @Bean + public StepRegistry stepRegistry() { + return new MapStepRegistry(); + } + + @Bean + public JobLauncher jobLauncher() { + SimpleJobLauncher jobLauncher = new SimpleJobLauncher(); + jobLauncher.setTaskExecutor(threadPoolTaskExecutor()); + jobLauncher.setJobRepository(jobRepository); + return jobLauncher; + } + + @Bean + public ThreadPoolTaskExecutor threadPoolTaskExecutor() { + return new ThreadPoolTaskExecutor(); + } + +} diff --git a/src/test/java/uk/ac/ebi/eva/test/configuration/BaseTestConfiguration.java b/src/test/java/uk/ac/ebi/eva/test/configuration/BaseTestConfiguration.java index 0e5f7e936..2db4952a2 100644 --- a/src/test/java/uk/ac/ebi/eva/test/configuration/BaseTestConfiguration.java +++ b/src/test/java/uk/ac/ebi/eva/test/configuration/BaseTestConfiguration.java @@ -18,7 +18,9 @@ import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; +import org.springframework.data.mongodb.core.mapping.MongoMappingContext; import uk.ac.ebi.eva.pipeline.parameters.JobOptions; +import uk.ac.ebi.eva.pipeline.parameters.MongoConnection; @Configuration public class BaseTestConfiguration { @@ -28,4 +30,13 @@ public JobOptions jobOptions() { return new JobOptions(); } + @Bean + public MongoConnection mongoConnection() { + return new MongoConnection(); + } + + @Bean + public MongoMappingContext mongoMappingContext() { + return new MongoMappingContext(); + } } diff --git a/src/test/java/uk/ac/ebi/eva/test/configuration/BatchTestConfiguration.java b/src/test/java/uk/ac/ebi/eva/test/configuration/BatchTestConfiguration.java index d215be515..b1c5950e9 100644 --- a/src/test/java/uk/ac/ebi/eva/test/configuration/BatchTestConfiguration.java +++ b/src/test/java/uk/ac/ebi/eva/test/configuration/BatchTestConfiguration.java @@ -3,10 +3,15 @@ import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing; import org.springframework.batch.test.JobLauncherTestUtils; import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.ComponentScan; import org.springframework.context.annotation.Configuration; +import org.springframework.context.annotation.Import; +import uk.ac.ebi.eva.pipeline.configuration.MongoConfiguration; @Configuration @EnableBatchProcessing +@ComponentScan(basePackages = {"uk.ac.ebi.eva.pipeline.parameters"}) +@Import({MongoConfiguration.class}) public class BatchTestConfiguration extends BaseTestConfiguration { @Bean diff --git a/src/test/java/uk/ac/ebi/eva/test/configuration/SynchronousBatchTestConfiguration.java b/src/test/java/uk/ac/ebi/eva/test/configuration/SynchronousBatchTestConfiguration.java new file mode 100644 index 000000000..2bb1e0b9a --- /dev/null +++ b/src/test/java/uk/ac/ebi/eva/test/configuration/SynchronousBatchTestConfiguration.java @@ -0,0 +1,40 @@ +/* + * Copyright 2015-2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.test.configuration; + +import org.springframework.batch.core.configuration.StepRegistry; +import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing; +import org.springframework.batch.core.configuration.annotation.SimpleBatchConfiguration; +import org.springframework.batch.core.configuration.support.MapStepRegistry; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.context.annotation.Import; + +/** + * Configuration for simple spring batch tests without any database that also require a step registry to inject steps + * manually. + */ +@Configuration +@EnableBatchProcessing +@Import({SimpleBatchConfiguration.class}) +public class SynchronousBatchTestConfiguration { + + @Bean + public StepRegistry stepRegistry() { + return new MapStepRegistry(); + } + +} diff --git a/src/test/java/uk/ac/ebi/eva/test/data/VariantData.java b/src/test/java/uk/ac/ebi/eva/test/data/VariantData.java index 0a72f53bc..33bf7ee51 100644 --- a/src/test/java/uk/ac/ebi/eva/test/data/VariantData.java +++ b/src/test/java/uk/ac/ebi/eva/test/data/VariantData.java @@ -17,25 +17,25 @@ import org.apache.commons.io.FileUtils; -import uk.ac.ebi.eva.test.utils.TestFileUtils; - import java.io.IOException; +import static uk.ac.ebi.eva.utils.FileUtils.getResource; + public class VariantData { - private static final String VARIANT_WITHOUT_ANNOTATION_PATH = "/annotation/VariantWithOutAnnotation"; - private static final String VARIANT_WITH_ANNOTATION_PATH = "/annotation/VariantWithAnnotation"; - private static final String POPULATION_STATS_PATH = "/statistics/PopulationStatistics.json"; + private static final String VARIANT_WITHOUT_ANNOTATION_PATH = "/input-files/annotation/VariantWithOutAnnotation"; + private static final String VARIANT_WITH_ANNOTATION_PATH = "/input-files/annotation/VariantWithAnnotation"; + private static final String POPULATION_STATS_PATH = "/input-files/statistics/PopulationStatistics.json"; public static String getVariantWithoutAnnotation() throws IOException { - return FileUtils.readFileToString(TestFileUtils.getResource(VARIANT_WITHOUT_ANNOTATION_PATH)); + return FileUtils.readFileToString(getResource(VARIANT_WITHOUT_ANNOTATION_PATH)); } public static String getVariantWithAnnotation() throws IOException { - return FileUtils.readFileToString(TestFileUtils.getResource(VARIANT_WITH_ANNOTATION_PATH)); + return FileUtils.readFileToString(getResource(VARIANT_WITH_ANNOTATION_PATH)); } public static String getPopulationStatistics() throws IOException { - return FileUtils.readFileToString(TestFileUtils.getResource(POPULATION_STATS_PATH)); + return FileUtils.readFileToString(getResource(POPULATION_STATS_PATH)); } } diff --git a/src/test/java/uk/ac/ebi/eva/test/rules/PipelineTemporaryFolderRule.java b/src/test/java/uk/ac/ebi/eva/test/rules/PipelineTemporaryFolderRule.java index 2cc93e8c1..e19d19556 100644 --- a/src/test/java/uk/ac/ebi/eva/test/rules/PipelineTemporaryFolderRule.java +++ b/src/test/java/uk/ac/ebi/eva/test/rules/PipelineTemporaryFolderRule.java @@ -46,4 +46,22 @@ public File newGzipFile(String content) throws IOException { } return tempFile; } + + /** + * Creates a temporary GzipFile withe the content at {@param content}. This file is marked to be deleted by java + * after finishing the test process. + * @param content + * @param name how the temporal file will be called under the temporal folder + * @return + * @throws IOException + */ + public File newGzipFile(String content, String name) throws IOException { + File tempFile = newFile(name); + try (FileOutputStream output = new FileOutputStream(tempFile)) { + try (Writer writer = new OutputStreamWriter(new GZIPOutputStream(output), "UTF-8")) { + writer.write(content); + } + } + return tempFile; + } } diff --git a/src/test/java/uk/ac/ebi/eva/test/utils/AbstractJobRestartUtils.java b/src/test/java/uk/ac/ebi/eva/test/utils/AbstractJobRestartUtils.java new file mode 100644 index 000000000..57bec2027 --- /dev/null +++ b/src/test/java/uk/ac/ebi/eva/test/utils/AbstractJobRestartUtils.java @@ -0,0 +1,129 @@ +/* + * Copyright 2015-2016 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.test.utils; + +import org.springframework.batch.core.Job; +import org.springframework.batch.core.JobExecution; +import org.springframework.batch.core.JobParameters; +import org.springframework.batch.core.Step; +import org.springframework.batch.core.StepContribution; +import org.springframework.batch.core.configuration.DuplicateJobException; +import org.springframework.batch.core.configuration.JobRegistry; +import org.springframework.batch.core.configuration.StepRegistry; +import org.springframework.batch.core.configuration.annotation.JobBuilderFactory; +import org.springframework.batch.core.configuration.annotation.StepBuilderFactory; +import org.springframework.batch.core.configuration.support.ReferenceJobFactory; +import org.springframework.batch.core.job.builder.FlowBuilder; +import org.springframework.batch.core.job.flow.Flow; +import org.springframework.batch.core.launch.JobLauncher; +import org.springframework.batch.core.launch.support.RunIdIncrementer; +import org.springframework.batch.core.repository.JobRepository; +import org.springframework.batch.core.scope.context.ChunkContext; +import org.springframework.batch.core.step.tasklet.Tasklet; +import org.springframework.batch.repeat.RepeatStatus; +import org.springframework.batch.test.JobLauncherTestUtils; +import org.springframework.beans.factory.annotation.Autowired; +import uk.ac.ebi.eva.runner.JobRestartAsynchronousTest; + +import java.util.Arrays; +import java.util.UUID; + +/** + * Base class for the jobs to check the behaviour of spring boot. This class has all the utility methods to create + * jobs and steps on demand in the tests without any need to generate or inject them from specific configuration classes + */ +public abstract class AbstractJobRestartUtils { + + @Autowired + private StepRegistry stepRegistry; + + @Autowired + private JobRegistry jobRegistry; + + @Autowired + private JobRepository jobRepository; + + @Autowired + private JobLauncher jobLauncher; + + @Autowired + private StepBuilderFactory stepBuilderFactory; + + @Autowired + private JobBuilderFactory jobBuilderFactory; + + public JobRepository getJobRepository() { + return jobRepository; + } + + public StepBuilderFactory getStepBuilderFactory() { + return stepBuilderFactory; + } + + protected JobExecution launchJob(JobLauncherTestUtils jobLauncherTestUtils) throws Exception { + JobExecution jobExecution; + jobExecution = jobLauncherTestUtils.launchJob(new JobParameters()); + Thread.sleep(JobRestartAsynchronousTest.INITIALIZE_JOB_SLEEP); + return jobExecution; + } + + protected JobLauncherTestUtils getJobLauncherTestUtils(Job job) { + JobLauncherTestUtils jobLauncherTestUtils = new JobLauncherTestUtils(); + jobLauncherTestUtils.setJobLauncher(jobLauncher); + jobLauncherTestUtils.setJobRepository(jobRepository); + jobLauncherTestUtils.setJob(job); + return jobLauncherTestUtils; + } + + protected Job getTestJob(Step step, Step... steps) throws DuplicateJobException { + Job job; + if (steps == null) { + job = jobBuilderFactory.get(UUID.randomUUID().toString()).incrementer(new RunIdIncrementer()).start(step) + .build(); + } else { + FlowBuilder builder = new FlowBuilder(UUID.randomUUID().toString()).start(step); + for (Step arrayStep : steps) { + builder = builder.next(arrayStep); + } + Flow flow = builder.build(); + job = jobBuilderFactory.get(UUID.randomUUID().toString()).incrementer(new RunIdIncrementer()).start(flow) + .build().build(); + } + jobRegistry.register(new ReferenceJobFactory(job)); + stepRegistry.register(job.getName(), Arrays.asList(step)); + return job; + } + + protected Step getQuickStep(boolean restartable) { + return getWaitingStep(restartable, 0L); + } + + protected Step getWaitingStep(boolean restartable, final long waitTime) { + return stepBuilderFactory.get(UUID.randomUUID().toString()).tasklet(new Tasklet() { + @Override + public RepeatStatus execute(StepContribution contribution, ChunkContext chunkContext) throws Exception { + if (waitTime > 0) { + try { + Thread.sleep(waitTime); + } catch (Exception e) { + //Do nothing + } + } + return RepeatStatus.FINISHED; + } + }).allowStartIfComplete(restartable).build(); + } +} diff --git a/src/test/java/uk/ac/ebi/eva/test/utils/GenotypedVcfJobTestUtils.java b/src/test/java/uk/ac/ebi/eva/test/utils/GenotypedVcfJobTestUtils.java new file mode 100644 index 000000000..a9dcd5a46 --- /dev/null +++ b/src/test/java/uk/ac/ebi/eva/test/utils/GenotypedVcfJobTestUtils.java @@ -0,0 +1,196 @@ +package uk.ac.ebi.eva.test.utils; + +import org.opencb.biodata.models.variant.Variant; +import org.opencb.datastore.core.QueryOptions; +import org.opencb.opencga.storage.core.StorageManagerException; +import org.opencb.opencga.storage.core.StorageManagerFactory; +import org.opencb.opencga.storage.core.variant.VariantStorageManager; +import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor; +import org.opencb.opencga.storage.core.variant.adaptors.VariantDBIterator; +import org.springframework.batch.core.JobExecution; +import org.springframework.batch.core.StepExecution; +import uk.ac.ebi.eva.pipeline.configuration.BeanNames; +import uk.ac.ebi.eva.utils.URLHelper; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; +import java.util.zip.GZIPInputStream; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static uk.ac.ebi.eva.test.utils.JobTestUtils.count; +import static uk.ac.ebi.eva.test.utils.JobTestUtils.getLines; +import static uk.ac.ebi.eva.utils.FileUtils.getResource; + +/** + * GenotypedVcfJob test assertion functions and constants for testing. + */ +public class GenotypedVcfJobTestUtils { + + private static final String MOCK_VEP = "/mockvep.pl"; + + public static final String INPUT_VCF_ID = "1"; + + public static final String INPUT_STUDY_ID = "genotyped-job"; + + private static final String INPUT_FILE = "/input-files/vcf/genotyped.vcf.gz"; + + public static final String COLLECTION_FILES_NAME = "files"; + + public static final String COLLECTION_VARIANTS_NAME = "variants"; + + private static final int EXPECTED_ANNOTATIONS = 537; + + private static final int EXPECTED_VARIANTS = 300; + + private static final int EXPECTED_VALID_ANNOTATIONS = 536; + + public static VariantDBIterator getVariantDBIterator(String dbName) throws IllegalAccessException, + ClassNotFoundException, InstantiationException, StorageManagerException { + VariantStorageManager variantStorageManager = StorageManagerFactory.getVariantStorageManager(); + VariantDBAdaptor variantDBAdaptor = variantStorageManager.getDBAdaptor(dbName, null); + return variantDBAdaptor.iterator(new QueryOptions()); + } + + /** + * 4 annotation flow annotation input vep generate step + * + * @param vepInputFile + * @throws IOException + */ + public static void checkAnnotationInput(File vepInputFile) throws IOException { + BufferedReader testReader = new BufferedReader(new InputStreamReader(new FileInputStream( + getResource("/expected-output/preannot.sorted")))); + BufferedReader actualReader = new BufferedReader(new InputStreamReader(new FileInputStream( + vepInputFile.toString()))); + + ArrayList rows = new ArrayList<>(); + + String s; + while ((s = actualReader.readLine()) != null) { + rows.add(s); + } + Collections.sort(rows); + + String testLine = testReader.readLine(); + for (String row : rows) { + assertEquals(testLine, row); + testLine = testReader.readLine(); + } + assertNull(testLine); // if both files have the same length testReader should be after the last line + } + + + /** + * Annotation load step: check documents in DB have annotation (only consequence type) + * + * @param dbName + * @throws IllegalAccessException + * @throws ClassNotFoundException + * @throws InstantiationException + * @throws StorageManagerException + */ + public static void checkLoadedAnnotation(String dbName) throws IllegalAccessException, ClassNotFoundException, + InstantiationException, StorageManagerException { + VariantDBIterator iterator; + iterator = getVariantDBIterator(dbName); + + int count = 0; + int consequenceTypeCount = 0; + while (iterator.hasNext()) { + count++; + Variant next = iterator.next(); + if (next.getAnnotation().getConsequenceTypes() != null) { + consequenceTypeCount += next.getAnnotation().getConsequenceTypes().size(); + } + } + + assertEquals(EXPECTED_VARIANTS, count); + assertEquals(EXPECTED_VALID_ANNOTATIONS, consequenceTypeCount); + } + + public static void checkOutputFileLength(File vepOutputFile) throws IOException { + assertEquals(EXPECTED_ANNOTATIONS, getLines(new GZIPInputStream(new FileInputStream(vepOutputFile)))); + } + + public static void checkAnnotationCreateStep(File vepInputFile, File vepOutputFile) { + assertTrue(vepInputFile.exists()); + assertTrue(vepOutputFile.exists()); + } + + /** + * load stats step: check the DB docs have the field "st" + * + * @param dbName + */ + public static void checkLoadStatsStep(String dbName) throws ClassNotFoundException, StorageManagerException, + InstantiationException, IllegalAccessException { + VariantDBIterator iterator = GenotypedVcfJobTestUtils.getVariantDBIterator(dbName); + assertEquals(1, iterator.next().getSourceEntries().values().iterator().next().getCohortStats().size()); + } + + /** + * 1 load step: check ((documents in DB) == (lines in transformed file)) + * variantStorageManager = StorageManagerFactory.getVariantStorageManager(); + * variantDBAdaptor = variantStorageManager.getDBAdaptor(dbName, null); + * + * @param dbName + */ + public static void checkLoadStep(String dbName) throws ClassNotFoundException, StorageManagerException, + InstantiationException, IllegalAccessException { + VariantDBIterator iterator = GenotypedVcfJobTestUtils.getVariantDBIterator(dbName); + assertEquals(EXPECTED_VARIANTS, count(iterator)); + } + + /** + * 2 create stats step + * + * @param variantsStatsFile + * @param sourceStatsFile + */ + public static void checkCreateStatsStep(File variantsStatsFile, File sourceStatsFile) { + assertTrue(variantsStatsFile.exists()); + assertTrue(sourceStatsFile.exists()); + } + + public static void checkSkippedOneMalformedLine(JobExecution jobExecution) { + //check that one line is skipped because malformed + List variantAnnotationLoadStepExecution = jobExecution.getStepExecutions().stream() + .filter(stepExecution -> stepExecution.getStepName().equals(BeanNames.LOAD_VEP_ANNOTATION_STEP)) + .collect(Collectors.toList()); + assertEquals(1, variantAnnotationLoadStepExecution.get(0).getReadSkipCount()); + } + + public static File getVariantsStatsFile(String outputDirStats) throws URISyntaxException { + return new File(URLHelper.getVariantsStatsUri(outputDirStats, INPUT_STUDY_ID, INPUT_VCF_ID)); + } + + public static File getSourceStatsFile(String outputDirStats) throws URISyntaxException { + return new File(URLHelper.getSourceStatsUri(outputDirStats, INPUT_STUDY_ID, INPUT_VCF_ID)); + } + + public static File getVepInputFile(String outputDirAnnotation) { + return new File(URLHelper.resolveVepInput(outputDirAnnotation, INPUT_STUDY_ID, INPUT_VCF_ID)); + } + + public static File getVepOutputFile(String outputDirAnnotation) { + return new File(URLHelper.resolveVepOutput(outputDirAnnotation, INPUT_STUDY_ID, INPUT_VCF_ID)); + } + + public static File getInputFile() { + return getResource(INPUT_FILE); + } + + public static File getMockVep() { + return getResource(MOCK_VEP); + } +} diff --git a/src/test/java/uk/ac/ebi/eva/test/utils/JobTestUtils.java b/src/test/java/uk/ac/ebi/eva/test/utils/JobTestUtils.java index 15a9de694..3435b1559 100644 --- a/src/test/java/uk/ac/ebi/eva/test/utils/JobTestUtils.java +++ b/src/test/java/uk/ac/ebi/eva/test/utils/JobTestUtils.java @@ -32,8 +32,6 @@ import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; -import java.nio.file.Path; -import java.nio.file.Paths; import java.util.Iterator; import java.util.List; import java.util.Set; @@ -47,10 +45,6 @@ public abstract class JobTestUtils { private static final Logger logger = LoggerFactory.getLogger(JobTestUtils.class); - private static final String EVA_PIPELINE_TEMP_PREFIX = "eva-pipeline-test"; - - private static final java.lang.String EVA_PIPELINE_TEMP_POSTFIX = ".tmp"; - /** * reads the file and sorts it in memory to return the first ordered line. Don't use for big files! * @@ -95,21 +89,11 @@ public static long count(Iterator iterator) { return rows; } - public static String getTransformedOutputPath(Path input, String compressExtension, String outputDir) { - return Paths.get(outputDir).resolve(input) + ".variants.json" + compressExtension; - } - public static JobParameters getJobParameters() { return new JobParametersBuilder() .addLong("time", System.currentTimeMillis()).toJobParameters(); } - public static File createTempFile() throws IOException { - File tempFile = File.createTempFile(EVA_PIPELINE_TEMP_PREFIX, EVA_PIPELINE_TEMP_POSTFIX); - tempFile.deleteOnExit(); - return tempFile; - } - /** * Returns a DBObject obtained by parsing a given string * @@ -147,16 +131,17 @@ public static void checkFieldsInsideList(BasicDBObject metadataMongo, String fie } public static void uncompress(String inputCompressedFile, File outputFile) throws IOException { - GZIPInputStream gzis = new GZIPInputStream(new FileInputStream(inputCompressedFile)); - FileOutputStream out = new FileOutputStream(outputFile); + GZIPInputStream gzipInputStream = new GZIPInputStream(new FileInputStream(inputCompressedFile)); + FileOutputStream fileOutputStream = new FileOutputStream(outputFile); byte[] buffer = new byte[1024]; - int len; - while ((len = gzis.read(buffer)) > 0) { - out.write(buffer, 0, len); + final int offset = 0; + int length; + while ((length = gzipInputStream.read(buffer)) > 0) { + fileOutputStream.write(buffer, offset, length); } - gzis.close(); - out.close(); + gzipInputStream.close(); + fileOutputStream.close(); } } diff --git a/src/test/java/uk/ac/ebi/eva/test/utils/TestFileUtils.java b/src/test/java/uk/ac/ebi/eva/test/utils/TestFileUtils.java index e66106eb4..1543b504b 100644 --- a/src/test/java/uk/ac/ebi/eva/test/utils/TestFileUtils.java +++ b/src/test/java/uk/ac/ebi/eva/test/utils/TestFileUtils.java @@ -8,6 +8,7 @@ public abstract class TestFileUtils { + /** use {@link uk.ac.ebi.eva.test.rules.PipelineTemporaryFolderRule#newGzipFile(java.lang.String)} instead */ @Deprecated public static File makeGzipFile(String content, String vepOutput) throws IOException { File tempFile = new File(vepOutput); @@ -21,11 +22,7 @@ public static File makeGzipFile(String content, String vepOutput) throws IOExcep public static void copyResource(String resourcePath, String outputDir) throws IOException { File vcfFile = new File(TestFileUtils.class.getResource(resourcePath).getFile()); - FileCopyUtils.copy(vcfFile, new File(outputDir, resourcePath)); - } - - public static File getResource(String resourcePath) { - return new File(TestFileUtils.class.getResource(resourcePath).getFile()); + FileCopyUtils.copy(vcfFile, new File(outputDir, vcfFile.getName())); } public static URL getResourceUrl(String resourcePath) { diff --git a/src/test/java/uk/ac/ebi/eva/utils/EvaCommandLineBuilder.java b/src/test/java/uk/ac/ebi/eva/utils/EvaCommandLineBuilder.java new file mode 100644 index 000000000..3d98dee40 --- /dev/null +++ b/src/test/java/uk/ac/ebi/eva/utils/EvaCommandLineBuilder.java @@ -0,0 +1,173 @@ +/* + * Copyright 2015-2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.utils; + +import org.springframework.util.Assert; +import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +/** + * Builder to generate the command line array that is accepted by the EvaPipelineCommandLineRunner. + */ +public class EvaCommandLineBuilder { + + private final Map parameterMap; + + public EvaCommandLineBuilder() { + this.parameterMap = new LinkedHashMap<>(); + } + + protected EvaCommandLineBuilder addString(String key, String parameter) { + Assert.notNull(parameter); + parameterMap.put(key, new String(parameter)); + return this; + } + + private EvaCommandLineBuilder addBoolean(String key, boolean value) { + parameterMap.put(key, Boolean.toString(value)); + return this; + } + + public EvaCommandLineBuilder inputStudyId(String inputStudyId) { + return addString(JobParametersNames.INPUT_STUDY_ID, inputStudyId); + } + + public EvaCommandLineBuilder inputVcfId(String inputVcfId) { + return addString(JobParametersNames.INPUT_VCF_ID, inputVcfId); + } + + public EvaCommandLineBuilder inputVcf(String inputVcf) { + return addString(JobParametersNames.INPUT_VCF, inputVcf); + } + + public EvaCommandLineBuilder inputVcfAggregation(String inputVcfAggregation) { + return addString(JobParametersNames.INPUT_VCF_AGGREGATION, inputVcfAggregation); + } + + public EvaCommandLineBuilder databaseName(String databaseName) { + return addString(JobParametersNames.DB_NAME, databaseName); + } + + public EvaCommandLineBuilder collectionVariantsName(String collectionVariantsName) { + return addString(JobParametersNames.DB_COLLECTIONS_VARIANTS_NAME, collectionVariantsName); + } + + public EvaCommandLineBuilder collectionFilesName(String collectionFilesName) { + return addString(JobParametersNames.DB_COLLECTIONS_FILES_NAME, collectionFilesName); + } + + public EvaCommandLineBuilder collectionFeaturesName(String collectionFeaturesName) { + return addString(JobParametersNames.DB_COLLECTIONS_FEATURES_NAME, collectionFeaturesName); + } + + public EvaCommandLineBuilder vepPath(String vepPath) { + return addString(JobParametersNames.APP_VEP_PATH, vepPath); + } + + public EvaCommandLineBuilder vepCacheVersion(String vepCacheVersion) { + return addString(JobParametersNames.APP_VEP_CACHE_VERSION, vepCacheVersion); + } + + public EvaCommandLineBuilder vepCachePath(String vepCachePath) { + return addString(JobParametersNames.APP_VEP_CACHE_PATH, vepCachePath); + } + + public EvaCommandLineBuilder vepCacheSpecies(String vepCacheSpecies) { + return addString(JobParametersNames.APP_VEP_CACHE_SPECIES, vepCacheSpecies); + } + + public EvaCommandLineBuilder vepNumForks(String vepNumForks) { + return addString(JobParametersNames.APP_VEP_NUMFORKS, vepNumForks); + } + + public EvaCommandLineBuilder inputFasta(String inputFasta) { + return addString(JobParametersNames.INPUT_FASTA, inputFasta); + } + + public EvaCommandLineBuilder outputDirAnnotation(String outputDirAnnotation) { + return addString(JobParametersNames.OUTPUT_DIR_ANNOTATION, outputDirAnnotation); + } + + public EvaCommandLineBuilder outputDirStatistics(String outputDirStats) { + return addString(JobParametersNames.OUTPUT_DIR_STATISTICS, outputDirStats); + } + + public EvaCommandLineBuilder annotationSkip(boolean annotationSkip) { + return addBoolean(JobParametersNames.ANNOTATION_SKIP, annotationSkip); + } + + public EvaCommandLineBuilder statisticsSkip(boolean statisticsSkip) { + return addBoolean(JobParametersNames.STATISTICS_SKIP, statisticsSkip); + } + + public String[] build() { + List parameters = new ArrayList<>(); + parameterMap.forEach((key, value) -> parameters.add("--" + key + "=" + value)); + return parameters.toArray(new String[parameters.size()]); + } + + public EvaCommandLineBuilder inputStudyName(String studyName) { + return addString(JobParametersNames.INPUT_STUDY_NAME, studyName); + } + + public EvaCommandLineBuilder inputStudyType(String studyType) { + return addString(JobParametersNames.INPUT_STUDY_TYPE, studyType); + } + + public EvaCommandLineBuilder appVepPath(String appVepPath) { + return addString(JobParametersNames.APP_VEP_PATH, appVepPath); + } + + public EvaCommandLineBuilder appVepNumForks(String appVepNumForks) { + return addString(JobParametersNames.APP_VEP_NUMFORKS, appVepNumForks); + } + + public EvaCommandLineBuilder appVepCachePath(String appVepCachePath) { + return addString(JobParametersNames.APP_VEP_CACHE_PATH, appVepCachePath); + } + + public EvaCommandLineBuilder appVepCacheVersion(String appVepCacheVersion) { + return addString(JobParametersNames.APP_VEP_CACHE_VERSION, appVepCacheVersion); + } + + public EvaCommandLineBuilder appVepCacheSpecies(String appVepCacheSpecies) { + return addString(JobParametersNames.APP_VEP_CACHE_SPECIES, appVepCacheSpecies); + } + + public EvaCommandLineBuilder configDbReadPreference(String preference) { + return addString(JobParametersNames.CONFIG_DB_READPREFERENCE, preference); + } + + public EvaCommandLineBuilder dbCollectionsVariantsName(String name) { + return addString(JobParametersNames.DB_COLLECTIONS_VARIANTS_NAME, name); + } + + public EvaCommandLineBuilder dbCollectionsFeaturesName(String name) { + return addString(JobParametersNames.DB_COLLECTIONS_FEATURES_NAME, name); + } + + public EvaCommandLineBuilder dbCollectionsFilesName(String name) { + return addString(JobParametersNames.DB_COLLECTIONS_FILES_NAME, name); + } + + public EvaCommandLineBuilder dbCollectionsStatisticsName(String name) { + return addString(JobParametersNames.DB_COLLECTIONS_STATISTICS_NAME, name); + } +} diff --git a/src/test/java/uk/ac/ebi/eva/utils/EvaJobParameterBuilder.java b/src/test/java/uk/ac/ebi/eva/utils/EvaJobParameterBuilder.java index b771ce212..71dcd37cf 100644 --- a/src/test/java/uk/ac/ebi/eva/utils/EvaJobParameterBuilder.java +++ b/src/test/java/uk/ac/ebi/eva/utils/EvaJobParameterBuilder.java @@ -16,34 +16,41 @@ package uk.ac.ebi.eva.utils; import org.springframework.batch.core.JobParameter; -import org.springframework.batch.core.JobParameters; import org.springframework.batch.core.JobParametersBuilder; + import uk.ac.ebi.eva.pipeline.parameters.JobParametersNames; import java.sql.Timestamp; import java.util.Date; -import java.util.HashMap; -import java.util.Map; - -public class EvaJobParameterBuilder extends JobParametersBuilder{ +public class EvaJobParameterBuilder extends JobParametersBuilder { - public EvaJobParameterBuilder inputStudyId(String inputStudyId){ + public EvaJobParameterBuilder inputStudyId(String inputStudyId) { addParameter(JobParametersNames.INPUT_STUDY_ID, new JobParameter(inputStudyId)); return this; } - public EvaJobParameterBuilder inputVcfId(String inputVcfId){ + public EvaJobParameterBuilder inputStudyType(String inputStudyType) { + addParameter(JobParametersNames.INPUT_STUDY_TYPE, new JobParameter(inputStudyType)); + return this; + } + + public EvaJobParameterBuilder inputStudyName(String inputStudyName) { + addParameter(JobParametersNames.INPUT_STUDY_NAME, new JobParameter(inputStudyName)); + return this; + } + + public EvaJobParameterBuilder inputVcfId(String inputVcfId) { addParameter(JobParametersNames.INPUT_VCF_ID, new JobParameter(inputVcfId)); return this; } - public EvaJobParameterBuilder inputVcf(String inputVcf){ + public EvaJobParameterBuilder inputVcf(String inputVcf) { addParameter(JobParametersNames.INPUT_VCF, new JobParameter(inputVcf)); return this; } - public EvaJobParameterBuilder inputVcfAggregation(String inputVcfAggregation){ + public EvaJobParameterBuilder inputVcfAggregation(String inputVcfAggregation) { addParameter(JobParametersNames.INPUT_VCF_AGGREGATION, new JobParameter(inputVcfAggregation)); return this; } @@ -62,5 +69,63 @@ public EvaJobParameterBuilder collectionVariantsName(String collectionVariantsNa addParameter(JobParametersNames.DB_COLLECTIONS_VARIANTS_NAME, new JobParameter(collectionVariantsName)); return this; } + public EvaJobParameterBuilder collectionFilesName(String collectionFilesName) { + addParameter(JobParametersNames.DB_COLLECTIONS_FILES_NAME, new JobParameter(collectionFilesName)); + return this; + } + + public EvaJobParameterBuilder collectionFeaturesName(String collectionFeaturesName) { + addParameter(JobParametersNames.DB_COLLECTIONS_FEATURES_NAME, new JobParameter(collectionFeaturesName)); + return this; + } + + public EvaJobParameterBuilder vepPath(String vepPath) { + addParameter(JobParametersNames.APP_VEP_PATH, new JobParameter(vepPath)); + return this; + } + + public EvaJobParameterBuilder vepCacheVersion(String vepCacheVersion) { + addParameter(JobParametersNames.APP_VEP_CACHE_VERSION, new JobParameter(vepCacheVersion)); + return this; + } + + public EvaJobParameterBuilder vepCachePath(String vepCachePath) { + addParameter(JobParametersNames.APP_VEP_CACHE_PATH, new JobParameter(vepCachePath)); + return this; + } + + public EvaJobParameterBuilder vepCacheSpecies(String vepCacheSpecies) { + addParameter(JobParametersNames.APP_VEP_CACHE_SPECIES, new JobParameter(vepCacheSpecies)); + return this; + } + public EvaJobParameterBuilder vepNumForks(String vepNumForks) { + addParameter(JobParametersNames.APP_VEP_NUMFORKS, new JobParameter(vepNumForks)); + return this; + } + + public EvaJobParameterBuilder inputFasta(String inputFasta) { + addParameter(JobParametersNames.INPUT_FASTA, new JobParameter(inputFasta)); + return this; + } + + public EvaJobParameterBuilder outputDirAnnotation(String outputDirAnnotation) { + addParameter(JobParametersNames.OUTPUT_DIR_ANNOTATION, new JobParameter(outputDirAnnotation)); + return this; + } + + public EvaJobParameterBuilder outputDirStats(String outputDirStats) { + addParameter(JobParametersNames.OUTPUT_DIR_STATISTICS, new JobParameter(outputDirStats)); + return this; + } + + public EvaJobParameterBuilder annotationSkip(boolean annotationSkip) { + addParameter(JobParametersNames.ANNOTATION_SKIP, new JobParameter(Boolean.toString(annotationSkip))); + return this; + } + + public EvaJobParameterBuilder statisticsSkip(boolean statisticsSkip) { + addParameter(JobParametersNames.STATISTICS_SKIP, new JobParameter(Boolean.toString(statisticsSkip))); + return this; + } } diff --git a/src/test/resources/1_4.source.stats.json.gz b/src/test/resources/1_4.source.stats.json.gz deleted file mode 100644 index 18dd1502c..000000000 Binary files a/src/test/resources/1_4.source.stats.json.gz and /dev/null differ diff --git a/src/test/resources/1_4.variants.stats.json.gz b/src/test/resources/1_4.variants.stats.json.gz deleted file mode 100644 index 87186fdb7..000000000 Binary files a/src/test/resources/1_4.variants.stats.json.gz and /dev/null differ diff --git a/src/test/resources/annot.tsv.gz b/src/test/resources/annot.tsv.gz deleted file mode 100644 index 688a5909b..000000000 Binary files a/src/test/resources/annot.tsv.gz and /dev/null differ diff --git a/src/test/resources/annotation-job.properties b/src/test/resources/annotation-job.properties deleted file mode 100644 index b2cac0e13..000000000 --- a/src/test/resources/annotation-job.properties +++ /dev/null @@ -1,34 +0,0 @@ -input.vcf=/small20.vcf.gz -input.vcf.id=1 -input.vcf.aggregation=NONE -input.study.type=COLLECTION -input.study.name=ELOAD-58 -input.study.id=annotation-job -input.pedigree= -input.gtf= -input.fasta= - -output.dir= -output.dir.annotation=/tmp/ -output.dir.statistics=/tmp/ - -app.opencga.path= -app.vep.path=/tmp/mockvep.pl - -# VEP -app.vep.cache.path= -app.vep.cache.version= -app.vep.cache.species= -app.vep.num-forks= - - -# Repeat steps -# true: The already COMPLETEd steps will be rerun. This is restarting the job from the beginning -# false(default): if the job was aborted and is relaunched, COMPLETEd steps will NOT be done again -config.restartability.allow=false - -config.db.read-preference=primary - -db.collections.features.name=features -db.collections.stats.name=populationStatistics -db.name=AnnotationJobTest diff --git a/src/test/resources/annotation-loader-step.properties b/src/test/resources/annotation-loader-step.properties deleted file mode 100644 index 37dea05a9..000000000 --- a/src/test/resources/annotation-loader-step.properties +++ /dev/null @@ -1,34 +0,0 @@ -input.vcf=/small20.vcf.gz -input.vcf.id=1 -input.vcf.aggregation=NONE -input.study.type=COLLECTION -input.study.name=ELOAD-58 -input.study.id=annotation-loader-step -input.pedigree= -input.gtf= -input.fasta= - -output.dir= -output.dir.annotation=/tmp/ -output.dir.statistics=/tmp/ - -app.opencga.path= -app.vep.path=/tmp/mockvep.pl - -# VEP -app.vep.cache.path= -app.vep.cache.version= -app.vep.cache.species= -app.vep.num-forks= - - -# Repeat steps -# true: The already COMPLETEd steps will be rerun. This is restarting the job from the beginning -# false(default): if the job was aborted and is relaunched, COMPLETEd steps will NOT be done again -config.restartability.allow=false - -config.db.read-preference=primary - -db.collections.features.name=features -db.collections.stats.name=populationStatistics -spring.data.mongodb.database=AnnotationLoaderStepTest diff --git a/src/test/resources/annotation.properties b/src/test/resources/annotation.properties deleted file mode 100644 index b479c2fb7..000000000 --- a/src/test/resources/annotation.properties +++ /dev/null @@ -1,33 +0,0 @@ -input.vcf=/small20.vcf.gz -input.vcf.id=5 -input.vcf.aggregation=NONE -input.study.type=COLLECTION -input.study.name=ELOAD-58 -input.study.id=7 -input.pedigree= -input.gtf= -input.fasta= - -output.dir= -output.dir.annotation=/tmp/ -output.dir.statistics=/tmp/ - -app.opencga.path= -app.vep.path=/tmp/mockvep.pl - -# VEP -app.vep.cache.path= -app.vep.cache.version= -app.vep.cache.species= -app.vep.num-forks= - - -# Repeat steps -# true: The already COMPLETEd steps will be rerun. This is restarting the job from the beginning -# false(default): if the job was aborted and is relaunched, COMPLETEd steps will NOT be done again -config.restartability.allow=false - -config.db.read-preference=primary -spring.data.mongodb.database=VariantAnnotation -db.collections.features.name=features -db.collections.stats.name=populationStatistics diff --git a/src/test/resources/application-integrationTest.properties b/src/test/resources/application-integrationTest.properties deleted file mode 100644 index 9fe0c55c6..000000000 --- a/src/test/resources/application-integrationTest.properties +++ /dev/null @@ -1,51 +0,0 @@ -# JOB -spring.batch.job.names=genotyped-vcf-job -spring.profiles.active=test,mongo - -# SUBMISSION FIELDS -input.vcf=target/test-classes/small20.vcf.gz -input.vcf.id=5 -input.vcf.aggregation=NONE - -input.study.name=small vcf -input.study.id=small_load_2 -input.study.type=COLLECTION - -input.pedigree= -input.gtf=/path/to/gtf/sample.gtf.gz -input.fasta=/path/to/homo_sapiens/sequence.fa - -output.dir=/tmp -output.dir.annotation=/tmp -output.dir.statistics=/tmp - -db.name=test_full_integration -spring.data.mongodb.database=${db.name} -#db.collections.variants.name=variants_name_app -#db.collections.files.name=files_name_app - -# EXTERNAL APPLICATIONS -app.opencga.path= - -app.vep.path=/path/to/vep/executable -app.vep.num-forks=4 -app.vep.cache.path=/path/to/vep/cache_folder -app.vep.cache.version=82 -app.vep.cache.species=homo_sapiens - -# MONGO DATABASE -config.db.read-preference=secondary -db.collections.features.name=features -db.collections.stats.name=populationStatistics - -# STEPS MANAGEMENT -## Job repository database - -## Skip steps -statistics.skip=false -annotation.skip=true - -## Repeat steps -## true: The already COMPLETED steps will be rerun. This is restarting the job from the beginning -## false(default): if the job was aborted and is relaunched, COMPLETEd steps will NOT be done again -config.restartability.allow=false diff --git a/src/test/resources/common-configuration.properties b/src/test/resources/common-configuration.properties index cc96c3aa6..df2237ae9 100644 --- a/src/test/resources/common-configuration.properties +++ b/src/test/resources/common-configuration.properties @@ -1,43 +1 @@ -input.vcf= -input.vcf.id=1 -input.vcf.aggregation=NONE -input.study.type=COLLECTION -input.study.name=input.study.name -input.study.id=1 -input.pedigree= -input.gtf= -input.fasta= - -output.dir=/tmp -output.dir.annotation= -output.dir.statistics=/tmp - -statistics.overwrite=false - -spring.data.mongodb.host=localhost:27017 -#spring.data.mongodb.authentication-database -#spring.data.mongodb.username -#spring.data.mongodb.password -#config.db.read-preference - -db.collections.variants.name=variants -db.collections.files.name=files -db.collections.features.name=feature -db.collections.stats.name=populationStatistics - app.opencga.path= -app.vep.path= - -# VEP -app.vep.cache.path= -app.vep.cache.version= -app.vep.cache.species= -app.vep.num-forks=3 - -# Repeat steps -# true: The already COMPLETEd steps will be rerun. This is restarting the job from the beginning -# false(default): if the job was aborted and is relaunched, COMPLETEd steps will NOT be done again -config.restartability.allow=false - -config.db.read-preference=primary -spring.data.mongodb.database=VariantAnnotation diff --git a/src/test/resources/preannot.sorted b/src/test/resources/expected-output/preannot.sorted similarity index 100% rename from src/test/resources/preannot.sorted rename to src/test/resources/expected-output/preannot.sorted diff --git a/src/test/resources/genotype-test.properties b/src/test/resources/genotype-test.properties new file mode 100644 index 000000000..e175e66bd --- /dev/null +++ b/src/test/resources/genotype-test.properties @@ -0,0 +1,14 @@ +spring.batch.job.names=genotyped-vcf-job + +app.vep.num-forks=1 +app.vep.cache.path= +app.vep.cache.version=1 +app.vep.cache.species=human +input.vcf.aggregation=NONE +input.study.name=small vcf +input.study.type=COLLECTION +config.db.read-preference=secondary +db.collections.variants.name=variants +db.collections.files.name=files +db.collections.features.name=features +db.collections.stats.name=populationStatistics \ No newline at end of file diff --git a/src/test/resources/genotyped-vcf-workflow.properties b/src/test/resources/genotyped-vcf-workflow.properties deleted file mode 100644 index c333c1836..000000000 --- a/src/test/resources/genotyped-vcf-workflow.properties +++ /dev/null @@ -1,32 +0,0 @@ -input.vcf=/small20.vcf.gz -input.vcf.id=1 -input.vcf.aggregation=NONE -input.study.type=COLLECTION -input.study.name=studyName -input.study.id=genotyped-job-workflow -input.pedigree= -input.gtf= -input.fasta=/path/to/file.fa - -output.dir=/tmp -output.dir.annotation=/tmp -output.dir.statistics=/tmp - -app.opencga.path= -app.vep.path= - -# VEP -app.vep.cache.path=/path/to/cache -app.vep.cache.version=79 -app.vep.cache.species=homo_sapiens -app.vep.num-forks=4 - - -# Repeat steps -# true: The already COMPLETEd steps will be rerun. This is restarting the job from the beginning -# false(default): if the job was aborted and is relaunched, COMPLETEd steps will NOT be done again -config.restartability.allow=false - -config.db.read-preference=primary -db.collections.features.name=features -db.collections.stats.name=populationStatistics diff --git a/src/test/resources/genotyped-vcf.properties b/src/test/resources/genotyped-vcf.properties deleted file mode 100644 index d61557b00..000000000 --- a/src/test/resources/genotyped-vcf.properties +++ /dev/null @@ -1,34 +0,0 @@ -spring.profiles.active=variant-writer-mongo, variant-annotation-mongo -input.vcf=/small20.vcf.gz -input.vcf.id=1 -input.vcf.aggregation=NONE -input.study.type=COLLECTION -input.study.name=studyName -input.study.id=genotyped-job -input.pedigree= -input.gtf= -input.fasta=/path/to/file.fa - -output.dir=/tmp -output.dir.annotation=/tmp -output.dir.statistics=/tmp - -app.opencga.path= -app.vep.path= - -# VEP -app.vep.cache.path=/path/to/cache -app.vep.cache.version=79 -app.vep.cache.species=homo_sapiens -app.vep.num-forks=4 - - -# Repeat steps -# true: The already COMPLETEd steps will be rerun. This is restarting the job from the beginning -# false(default): if the job was aborted and is relaunched, COMPLETEd steps will NOT be done again -config.restartability.allow=false - -config.db.read-preference=primary -db.collections.features.name=features -db.collections.stats.name=populationStatistics -spring.data.mongodb.database=GenotypedVcfJobTest diff --git a/src/test/resources/initialize-database.properties b/src/test/resources/initialize-database.properties deleted file mode 100644 index e0cf2d773..000000000 --- a/src/test/resources/initialize-database.properties +++ /dev/null @@ -1,32 +0,0 @@ -input.vcf=/small20.vcf.gz -input.vcf.id=5 -input.vcf.aggregation=NONE -input.study.type=COLLECTION -input.study.name=ELOAD-58 -input.study.id=7 -input.pedigree= -input.gtf=features.gtf.gz -input.fasta= - -output.dir= -output.dir.annotation=/tmp/ -output.dir.statistics=/tmp/ - -app.opencga.path= -app.vep.path= - -# VEP -app.vep.cache.path= -app.vep.cache.version= -app.vep.cache.species= -app.vep.num-forks= - - -# Repeat steps -# true: The already COMPLETEd steps will be rerun. This is restarting the job from the beginning -# false(default): if the job was aborted and is relaunched, COMPLETEd steps will NOT be done again -config.restartability.allow=false - -config.db.read-preference=primary -db.collections.features.name=features -db.collections.stats.name=populationStatistics diff --git a/src/test/resources/annotation/VariantWithAnnotation b/src/test/resources/input-files/annotation/VariantWithAnnotation similarity index 100% rename from src/test/resources/annotation/VariantWithAnnotation rename to src/test/resources/input-files/annotation/VariantWithAnnotation diff --git a/src/test/resources/annotation/VariantWithOutAnnotation b/src/test/resources/input-files/annotation/VariantWithOutAnnotation similarity index 100% rename from src/test/resources/annotation/VariantWithOutAnnotation rename to src/test/resources/input-files/annotation/VariantWithOutAnnotation diff --git a/src/test/resources/ped/integrated_call_samples.20101123.ped b/src/test/resources/input-files/ped/integrated_call_samples.20101123.ped similarity index 100% rename from src/test/resources/ped/integrated_call_samples.20101123.ped rename to src/test/resources/input-files/ped/integrated_call_samples.20101123.ped diff --git a/src/test/resources/ped/malformed-pedigree-test-file.ped b/src/test/resources/input-files/ped/malformed-pedigree-test-file.ped similarity index 100% rename from src/test/resources/ped/malformed-pedigree-test-file.ped rename to src/test/resources/input-files/ped/malformed-pedigree-test-file.ped diff --git a/src/test/resources/ped/pedigree-test-file.ped b/src/test/resources/input-files/ped/pedigree-test-file.ped similarity index 100% rename from src/test/resources/ped/pedigree-test-file.ped rename to src/test/resources/input-files/ped/pedigree-test-file.ped diff --git a/src/test/resources/1_1.source.stats.json.gz b/src/test/resources/input-files/statistics/1_1.source.stats.json.gz similarity index 100% rename from src/test/resources/1_1.source.stats.json.gz rename to src/test/resources/input-files/statistics/1_1.source.stats.json.gz diff --git a/src/test/resources/1_1.variants.stats.json.gz b/src/test/resources/input-files/statistics/1_1.variants.stats.json.gz similarity index 100% rename from src/test/resources/1_1.variants.stats.json.gz rename to src/test/resources/input-files/statistics/1_1.variants.stats.json.gz diff --git a/src/test/resources/statistics/PopulationStatistics.json b/src/test/resources/input-files/statistics/PopulationStatistics.json similarity index 100% rename from src/test/resources/statistics/PopulationStatistics.json rename to src/test/resources/input-files/statistics/PopulationStatistics.json diff --git a/src/test/resources/aggregated.evs.vcf.gz b/src/test/resources/input-files/vcf/aggregated.evs.vcf.gz similarity index 100% rename from src/test/resources/aggregated.evs.vcf.gz rename to src/test/resources/input-files/vcf/aggregated.evs.vcf.gz diff --git a/src/test/resources/aggregated.exac.vcf.gz b/src/test/resources/input-files/vcf/aggregated.exac.vcf.gz similarity index 100% rename from src/test/resources/aggregated.exac.vcf.gz rename to src/test/resources/input-files/vcf/aggregated.exac.vcf.gz diff --git a/src/test/resources/aggregated.vcf.gz b/src/test/resources/input-files/vcf/aggregated.vcf.gz similarity index 100% rename from src/test/resources/aggregated.vcf.gz rename to src/test/resources/input-files/vcf/aggregated.vcf.gz diff --git a/src/test/resources/small20.vcf.gz b/src/test/resources/input-files/vcf/genotyped.vcf.gz similarity index 100% rename from src/test/resources/small20.vcf.gz rename to src/test/resources/input-files/vcf/genotyped.vcf.gz diff --git a/src/test/resources/wrong_no_alt.vcf.gz b/src/test/resources/input-files/vcf/wrong_no_alt.vcf.gz similarity index 100% rename from src/test/resources/wrong_no_alt.vcf.gz rename to src/test/resources/input-files/vcf/wrong_no_alt.vcf.gz diff --git a/src/test/resources/preannot.sorted.gz b/src/test/resources/preannot.sorted.gz deleted file mode 100644 index 83672ce93..000000000 Binary files a/src/test/resources/preannot.sorted.gz and /dev/null differ diff --git a/src/test/resources/small20.vcf.gz.file.json.gz b/src/test/resources/small20.vcf.gz.file.json.gz deleted file mode 100644 index 9cfe1665f..000000000 Binary files a/src/test/resources/small20.vcf.gz.file.json.gz and /dev/null differ diff --git a/src/test/resources/small20.vcf.gz.variants.json.gz b/src/test/resources/small20.vcf.gz.variants.json.gz deleted file mode 100644 index ebec04c37..000000000 Binary files a/src/test/resources/small20.vcf.gz.variants.json.gz and /dev/null differ diff --git a/src/test/resources/small22.vcf.gz b/src/test/resources/small22.vcf.gz deleted file mode 100644 index d7918f318..000000000 Binary files a/src/test/resources/small22.vcf.gz and /dev/null differ diff --git a/src/test/resources/test-mongo.properties b/src/test/resources/test-mongo.properties new file mode 100644 index 000000000..60416a88d --- /dev/null +++ b/src/test/resources/test-mongo.properties @@ -0,0 +1,12 @@ +db.collections.variants.name=variants +db.collections.files.name=files +db.collections.features.name=features +db.collections.stats.name=populationStatistics + +#spring.data.mongodb.database +spring.data.mongodb.host=localhost:27017 +#spring.data.mongodb.authentication-database +#spring.data.mongodb.username +#spring.data.mongodb.password + +config.db.read-preference=primary \ No newline at end of file diff --git a/src/test/resources/variant-aggregated.properties b/src/test/resources/variant-aggregated.properties index 2593e6da2..76e43a659 100644 --- a/src/test/resources/variant-aggregated.properties +++ b/src/test/resources/variant-aggregated.properties @@ -1,36 +1,2 @@ -spring.profiles.active=variant-writer-mongo, variant-annotation-mongo -input.vcf=/aggregated.vcf.gz -input.vcf.id=1 -input.vcf.aggregation=BASIC -input.study.type=COLLECTION -input.study.name=studyName -input.study.id=aggregated-job -input.pedigree= -input.gtf= -input.fasta=/path/to/file.fa - -output.dir=/tmp -output.dir.annotation=/tmp -output.dir.statistics=/tmp - app.opencga.path= -app.vep.path= - -# VEP -app.vep.cache.path=/path/to/cache -app.vep.cache.version=79 -app.vep.cache.species=homo_sapiens -app.vep.num-forks=4 - - -# Repeat steps -# true: The already COMPLETEd steps will be rerun. This is restarting the job from the beginning -# false(default): if the job was aborted and is relaunched, COMPLETEd steps will NOT be done again -config.restartability.allow=false - -config.db.read-preference=primary -db.collections.features.name=features -db.collections.stats.name=populationStatistics -spring.data.mongodb.database=AggregatedJobTest - annotation.skip=true diff --git a/src/test/resources/vep-input-generator-step.properties b/src/test/resources/vep-input-generator-step.properties deleted file mode 100644 index 548ec5b77..000000000 --- a/src/test/resources/vep-input-generator-step.properties +++ /dev/null @@ -1,34 +0,0 @@ -spring.profiles.active=variant-annotation-mongo -input.vcf=/small20.vcf.gz -input.vcf.id=1 -input.vcf.aggregation=NONE -input.study.type=COLLECTION -input.study.name=ELOAD-58 -input.study.id=vep-input-generator-step -input.pedigree= -input.gtf= -input.fasta= - -output.dir= -output.dir.annotation=/tmp/ -output.dir.statistics=/tmp/ - -app.opencga.path= -app.vep.path=/tmp/mockvep.pl - -# VEP -app.vep.cache.path= -app.vep.cache.version= -app.vep.cache.species= -app.vep.num-forks= - - -# Repeat steps -# true: The already COMPLETEd steps will be rerun. This is restarting the job from the beginning -# false(default): if the job was aborted and is relaunched, COMPLETEd steps will NOT be done again -config.restartability.allow=false - -config.db.read-preference=primary -spring.data.mongodb.database=VepInputGeneratorStepTest -db.collections.features.name=features -db.collections.stats.name=populationStatistics