diff --git a/.github/workflows/pull-request-approved.yml b/.github/workflows/pull-request-approved.yml index eb410c9cb..d339f65b9 100644 --- a/.github/workflows/pull-request-approved.yml +++ b/.github/workflows/pull-request-approved.yml @@ -1,15 +1,40 @@ name: Pull request approve workflow +run-name: 'Pull request approve workflow ${{ github.event.pull_request.head.ref }} -> ${{ github.event.pull_request.base.ref }} by @${{ github.actor }}' on: pull_request_review: types: [ submitted ] jobs: - build: - uses: opencb/java-common-libs/.github/workflows/build-java-app-workflow.yml@develop + calculate-xetabase-branch: + name: Calculate Xetabase branch + runs-on: ubuntu-22.04 + outputs: + xetabase_branch: ${{ steps.get_xetabase_branch.outputs.xetabase_branch }} + steps: + - name: Clone java-common-libs + uses: actions/checkout@v4 + with: + fetch-depth: '10' + ## This is important to avoid the error in the next step: "fatal: repository 'https://github.com/zetta-genomics/opencga-enterprise.git/' not found" + persist-credentials: false + - id: get_xetabase_branch + name: "Get current branch for Xetabase from target branch" + run: | + chmod +x ./.github/workflows/scripts/get-xetabase-branch.sh + echo "github.event.pull_request.base.ref: ${{ github.event.pull_request.base.ref }}" + echo "github.event.pull_request.head.ref: ${{ github.event.pull_request.head.ref }}" + xetabase_branch=$(./.github/workflows/scripts/get-xetabase-branch.sh ${{ github.event.pull_request.head.ref }}) + echo "__Xetabase ref:__ \"${xetabase_branch}\"" | tee -a ${GITHUB_STEP_SUMMARY} + echo "xetabase_branch=${xetabase_branch}" >> $GITHUB_OUTPUT + env: + ZETTA_REPO_ACCESS_TOKEN: ${{ secrets.ZETTA_REPO_ACCESS_TOKEN }} test: - name: "Test analysis" - uses: ./.github/workflows/test-analysis.yml - needs: build - secrets: inherit + name: "Run all tests before merging" + needs: calculate-xetabase-branch + uses: opencb/java-common-libs/.github/workflows/test-xetabase-workflow.yml@develop + with: + branch: ${{ needs.calculate-xetabase-branch.outputs.xetabase_branch }} + task: ${{ github.event.pull_request.head.ref }} + secrets: inherit \ No newline at end of file diff --git a/.github/workflows/scripts/get-xetabase-branch.sh b/.github/workflows/scripts/get-xetabase-branch.sh new file mode 100644 index 000000000..a1eb7e529 --- /dev/null +++ b/.github/workflows/scripts/get-xetabase-branch.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +# Function to calculate the corresponding branch of Xetabase project +get_xetabase_branch() { + # Input parameter (branch name) + input_branch="$1" + + # If the branch begins with 'TASK' and exists in the opencga-enterprise repository, I return it + if [[ $input_branch == TASK* ]]; then + if [ "$(git ls-remote "https://$ZETTA_REPO_ACCESS_TOKEN@github.com/zetta-genomics/opencga-enterprise.git" "$input_branch" )" ] ; then + echo $input_branch; + return 0; + fi + fi + + # Check if the branch name is "develop" in that case return the same branch name + if [[ "$input_branch" == "develop" ]]; then + echo "develop" + return 0 + fi + + # Check if the branch name starts with "release-" and follows the patterns "release-a.b.x" or "release-a.b.c.x" + if [[ "$input_branch" =~ ^release-([0-9]+)\.([0-9]+)\.x$ ]] || [[ "$input_branch" =~ ^release-([0-9]+)\.([0-9]+)\.([0-9]+)\.x$ ]]; then + # Extract the MAJOR part of the branch name + MAJOR=${BASH_REMATCH[1]} + # Calculate the XETABASE_MAJOR by subtracting 3 from MAJOR + XETABASE_MAJOR=$((MAJOR - 1)) + # Check if the XETABASE_MAJOR is negative + if (( XETABASE_MAJOR < 0 )); then + echo "Error: 'MAJOR' digit after subtraction results in a negative number." + return 1 + fi + # Construct and echo the new branch name + echo "release-$XETABASE_MAJOR.${input_branch#release-$MAJOR.}" + return 0 + fi + + # If the branch name does not match any of the expected patterns + echo "Error: The branch name is not correct." + return 1 +} + +# Check if the script receives exactly one argument +if [ "$#" -ne 1 ]; then + echo "Usage: $0 " + exit 1 +fi + +# Call the function with the input branch name +get_xetabase_branch "$1" diff --git a/biodata-external/pom.xml b/biodata-external/pom.xml index 12a6b4f5b..dafe64c2c 100644 --- a/biodata-external/pom.xml +++ b/biodata-external/pom.xml @@ -6,7 +6,7 @@ biodata org.opencb.biodata - 3.2.0 + 3.2.1 ../pom.xml diff --git a/biodata-formats/pom.xml b/biodata-formats/pom.xml index bfef3117f..e071844ff 100644 --- a/biodata-formats/pom.xml +++ b/biodata-formats/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 3.2.0 + 3.2.1 ../pom.xml diff --git a/biodata-models/pom.xml b/biodata-models/pom.xml index 887ce92d9..f2311443d 100644 --- a/biodata-models/pom.xml +++ b/biodata-models/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 3.2.0 + 3.2.1 ../pom.xml diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/clinical/interpretation/Interpretation.java b/biodata-models/src/main/java/org/opencb/biodata/models/clinical/interpretation/Interpretation.java index 99fbff5d8..36f9c7e02 100644 --- a/biodata-models/src/main/java/org/opencb/biodata/models/clinical/interpretation/Interpretation.java +++ b/biodata-models/src/main/java/org/opencb/biodata/models/clinical/interpretation/Interpretation.java @@ -28,29 +28,29 @@ public class Interpretation { - private String id; - private String uuid; - private String description; - private String clinicalAnalysisId; + protected String id; + protected String uuid; + protected String name; + protected String description; + protected String clinicalAnalysisId; /** * Interpretation algorithm tool used to generate this interpretation. */ - private ClinicalAnalyst analyst; - private InterpretationMethod method; + protected ClinicalAnalyst analyst; + protected InterpretationMethod method; - private List primaryFindings; - private List secondaryFindings; + protected List primaryFindings; + protected List secondaryFindings; - private List comments; + protected List comments; - private InterpretationStats stats; + protected InterpretationStats stats; - private boolean locked; - private Status status; - private String creationDate; - private String modificationDate; - private int version; + protected boolean locked; + protected String creationDate; + protected String modificationDate; + protected int version; /** * Users can add custom information in this field. @@ -61,12 +61,23 @@ public class Interpretation { public Interpretation() { } + @Deprecated public Interpretation(String id, String uuid, String description, String clinicalAnalysisId, ClinicalAnalyst analyst, InterpretationMethod method, List primaryFindings, List secondaryFindings, List comments, InterpretationStats stats, Status status, String creationDate, String modificationDate, boolean locked, int version, Map attributes) { + this(id, uuid, id, description, clinicalAnalysisId, analyst, method, primaryFindings, secondaryFindings, + comments, stats, locked, creationDate, modificationDate, version, attributes); + } + + public Interpretation(String id, String uuid, String name, String description, String clinicalAnalysisId, + ClinicalAnalyst analyst, InterpretationMethod method, List primaryFindings, + List secondaryFindings, List comments, + InterpretationStats stats, boolean locked, String creationDate, String modificationDate, + int version, Map attributes) { this.id = id; this.uuid = uuid; + this.name = name; this.description = description; this.clinicalAnalysisId = clinicalAnalysisId; this.analyst = analyst; @@ -75,10 +86,9 @@ public Interpretation(String id, String uuid, String description, String clinica this.secondaryFindings = secondaryFindings; this.comments = comments; this.stats = stats; - this.status = status; + this.locked = locked; this.creationDate = creationDate; this.modificationDate = modificationDate; - this.locked = locked; this.version = version; this.attributes = attributes; } @@ -88,6 +98,7 @@ public String toString() { final StringBuilder sb = new StringBuilder("Interpretation{"); sb.append("id='").append(id).append('\''); sb.append(", uuid='").append(uuid).append('\''); + sb.append(", name='").append(name).append('\''); sb.append(", description='").append(description).append('\''); sb.append(", clinicalAnalysisId='").append(clinicalAnalysisId).append('\''); sb.append(", analyst=").append(analyst); @@ -96,7 +107,6 @@ public String toString() { sb.append(", secondaryFindings=").append(secondaryFindings); sb.append(", comments=").append(comments); sb.append(", stats=").append(stats); - sb.append(", status=").append(status); sb.append(", creationDate='").append(creationDate).append('\''); sb.append(", modificationDate='").append(modificationDate).append('\''); sb.append(", locked='").append(locked).append('\''); @@ -124,6 +134,15 @@ public Interpretation setUuid(String uuid) { return this; } + public String getName() { + return name; + } + + public Interpretation setName(String name) { + this.name = name; + return this; + } + public String getDescription() { return description; } @@ -196,15 +215,6 @@ public Interpretation setStats(InterpretationStats stats) { return this; } - public Status getStatus() { - return status; - } - - public Interpretation setStatus(Status status) { - this.status = status; - return this; - } - public String getCreationDate() { return creationDate; } diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/common/Status.java b/biodata-models/src/main/java/org/opencb/biodata/models/common/Status.java index 7563ce9e1..65ffd8895 100644 --- a/biodata-models/src/main/java/org/opencb/biodata/models/common/Status.java +++ b/biodata-models/src/main/java/org/opencb/biodata/models/common/Status.java @@ -19,17 +19,22 @@ public class Status { protected String id; - protected String name; protected String description; protected String date; public Status() { - this("", "", "", ""); + this("", "", ""); } + public Status(String id, String description, String date) { + this.id = id; + this.description = description; + this.date = date; + } + + @Deprecated public Status(String id, String name, String description, String date) { this.id = id; - this.name = name; this.description = description; this.date = date; } @@ -38,7 +43,6 @@ public Status(String id, String name, String description, String date) { public String toString() { final StringBuilder sb = new StringBuilder("Status{"); sb.append("id='").append(id).append('\''); - sb.append(", name='").append(name).append('\''); sb.append(", description='").append(description).append('\''); sb.append(", date='").append(date).append('\''); sb.append('}'); @@ -53,7 +57,6 @@ public boolean equals(Object o) { Status status = (Status) o; if (!id.equals(status.id)) return false; - if (name != null ? !name.equals(status.name) : status.name != null) return false; if (description != null ? !description.equals(status.description) : status.description != null) return false; return date != null ? date.equals(status.date) : status.date == null; } @@ -61,7 +64,6 @@ public boolean equals(Object o) { @Override public int hashCode() { int result = id.hashCode(); - result = 31 * result + (name != null ? name.hashCode() : 0); result = 31 * result + (description != null ? description.hashCode() : 0); result = 31 * result + (date != null ? date.hashCode() : 0); return result; @@ -76,12 +78,13 @@ public Status setId(String id) { return this; } + @Deprecated public String getName() { - return name; + return id; } + @Deprecated public Status setName(String name) { - this.name = name; return this; } diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/Snp.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/Snp.java new file mode 100644 index 000000000..8f8cc7126 --- /dev/null +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/Snp.java @@ -0,0 +1,147 @@ +/* + * + * + */ + +package org.opencb.biodata.models.core; + +import java.util.List; + +public class Snp { + private String id; + private String chromosome; + private int position; + private String reference; + private List alternates; + private String type; + private String source; + private String version; + private SnpAnnotation annotation; + + public Snp() { + } + + public Snp(String id, String chromosome, int position, String reference, List alternates, String type, + String source, String version, SnpAnnotation annotation) { + this.id = id; + this.chromosome = chromosome; + this.position = position; + this.reference = reference; + this.alternates = alternates; + this.type = type; + this.source = source; + this.version = version; + this.annotation = annotation; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("Snp{"); + sb.append("id='").append(id).append('\''); + sb.append(", chromosome='").append(chromosome).append('\''); + sb.append(", position=").append(position); + sb.append(", reference='").append(reference).append('\''); + sb.append(", alternates=").append(alternates); + sb.append(", type='").append(type).append('\''); + sb.append(", source='").append(source).append('\''); + sb.append(", version='").append(version).append('\''); + sb.append(", annotation=").append(annotation); + sb.append('}'); + return sb.toString(); + } + + public String getId() { + return id; + } + + public Snp setId(String id) { + this.id = id; + return this; + } + + public String getChromosome() { + return chromosome; + } + + public Snp setChromosome(String chromosome) { + this.chromosome = chromosome; + return this; + } + + public int getPosition() { + return position; + } + + public Snp setPosition(int position) { + this.position = position; + return this; + } + + public String getReference() { + return reference; + } + + public Snp setReference(String reference) { + this.reference = reference; + return this; + } + + public List getAlternates() { + return alternates; + } + + public Snp setAlternates(List alternates) { + this.alternates = alternates; + return this; + } + + public String getType() { + return type; + } + + public Snp setType(String type) { + this.type = type; + return this; + } + + public String getSource() { + return source; + } + + public Snp setSource(String source) { + this.source = source; + return this; + } + + public String getVersion() { + return version; + } + + public Snp setVersion(String version) { + this.version = version; + return this; + } + + public SnpAnnotation getAnnotation() { + return annotation; + } + + public Snp setAnnotation(SnpAnnotation annotation) { + this.annotation = annotation; + return this; + } +} diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/SnpAnnotation.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/SnpAnnotation.java new file mode 100644 index 000000000..16fab718e --- /dev/null +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/SnpAnnotation.java @@ -0,0 +1,79 @@ +/* + * + * + */ + +package org.opencb.biodata.models.core; + +import org.opencb.biodata.models.variant.avro.PopulationFrequency; + +import java.util.List; +import java.util.Map; + +public class SnpAnnotation { + + private List flags; + private String gene; + private List populationFrequencies; + private Map additionalAttributes; + + public SnpAnnotation() { + } + + public SnpAnnotation(List flags, String gene, List populationFrequencies, Map additionalAttributes) { + this.flags = flags; + this.gene = gene; + this.populationFrequencies = populationFrequencies; + this.additionalAttributes = additionalAttributes; + } + + public List getFlags() { + return flags; + } + + public SnpAnnotation setFlags(List flags) { + this.flags = flags; + return this; + } + + public String getGene() { + return gene; + } + + public SnpAnnotation setGene(String gene) { + this.gene = gene; + return this; + } + + public List getPopulationFrequencies() { + return populationFrequencies; + } + + public SnpAnnotation setPopulationFrequencies(List populationFrequencies) { + this.populationFrequencies = populationFrequencies; + return this; + } + + public Map getAdditionalAttributes() { + return additionalAttributes; + } + + public SnpAnnotation setAdditionalAttributes(Map additionalAttributes) { + this.additionalAttributes = additionalAttributes; + return this; + } +} diff --git a/biodata-tools/pom.xml b/biodata-tools/pom.xml index 03bfa0e25..a9a269a70 100644 --- a/biodata-tools/pom.xml +++ b/biodata-tools/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 3.2.0 + 3.2.1 ../pom.xml diff --git a/biodata-tools/src/main/java/org/opencb/biodata/tools/alignment/BamManager.java b/biodata-tools/src/main/java/org/opencb/biodata/tools/alignment/BamManager.java index 895767e99..54b84cc07 100644 --- a/biodata-tools/src/main/java/org/opencb/biodata/tools/alignment/BamManager.java +++ b/biodata-tools/src/main/java/org/opencb/biodata/tools/alignment/BamManager.java @@ -49,6 +49,7 @@ import java.nio.file.Paths; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.List; /** @@ -62,7 +63,7 @@ public class BamManager implements AutoCloseable { public static final int DEFAULT_WINDOW_SIZE = 1; public static final int MAX_NUM_RECORDS = 50000; - public static final int MAX_REGION_COVERAGE = 100000; + public static final int MAX_REGION_COVERAGE = 500000; public static final String COVERAGE_BIGWIG_EXTENSION = ".bw"; private Logger logger; @@ -191,7 +192,10 @@ public Path calculateBigWigCoverage(Path bigWigPath, int windowSize) throws IOEx return bigWigPath; } - + /** + * @deprecated (since getFileHeader().getTextHeader() is deprecated !) + */ + @Deprecated public String header() { return samReader.getFileHeader().getTextHeader(); } @@ -338,7 +342,7 @@ public List getChunks(Region region) { BAMIndex index = samReader.indexing().getIndex(); return index.getSpanOverlapping(sequenceIndex, start, end).getChunks(); } - return null; + return Collections.emptyList(); } public List getBreakpoints(Region region) throws IOException { @@ -378,7 +382,7 @@ public List getBreakpoints(Region region) throws IOException { } } } - return null; + return Collections.emptyList(); } /** @@ -445,7 +449,7 @@ public AlignmentGlobalStats stats(Region region, AlignmentFilters fil return calculateGlobalStats(iterator(region, filters, options)); } - private AlignmentGlobalStats calculateGlobalStats(BamIterator iterator) throws IOException { + private AlignmentGlobalStats calculateGlobalStats(BamIterator iterator) { AlignmentGlobalStats alignmentGlobalStats = new AlignmentGlobalStats(); SamRecordAlignmentGlobalStatsCalculator calculator = new SamRecordAlignmentGlobalStatsCalculator(); while (iterator.hasNext()) { diff --git a/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/VariantNormalizer.java b/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/VariantNormalizer.java index e3bb3d6e2..3e16977f5 100644 --- a/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/VariantNormalizer.java +++ b/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/VariantNormalizer.java @@ -301,24 +301,20 @@ public List normalize(List batch, boolean reuse) throws NonSta normalizedVariants.add(variant); continue; } - String reference = variant.getReference(); //Save original values, as they can be changed + //Save original values, as they can be changed + String reference = variant.getReference(); String alternate = variant.getAlternate(); Integer start = variant.getStart(); Integer end = variant.getEnd(); String chromosome = variant.getChromosome(); - StructuralVariation sv = variant.getSv(); if (variant.getStudies() == null || variant.getStudies().isEmpty()) { - List keyFieldsList; - if (isSymbolic(variant)) { - keyFieldsList = normalizeSymbolic(start, end, reference, alternate, sv); - } else { - keyFieldsList = normalize(chromosome, start, reference, alternate); - } + List keyFieldsList = normalizeAlleles(variant); + // Iterate keyFields sorting by position, so the generated variants are ordered. Do not modify original order! for (VariantKeyFields keyFields : sortByPosition(keyFieldsList)) { OriginalCall call = new OriginalCall(variant.toString(), keyFields.getNumAllele()); - Variant normalizedVariant = newVariant(variant, keyFields, sv); + Variant normalizedVariant = newVariant(variant, keyFields); if (keyFields.getPhaseSet() != null) { StudyEntry studyEntry = new StudyEntry(); studyEntry.setSamples( @@ -332,25 +328,16 @@ public List normalize(List batch, boolean reuse) throws NonSta normalizedVariants.add(normalizedVariant); } } else { - for (StudyEntry entry : variant.getStudies()) { - List originalAlternates = new ArrayList<>(1 + entry.getSecondaryAlternates().size()); - List alternates = new ArrayList<>(1 + entry.getSecondaryAlternates().size()); - alternates.add(alternate); - originalAlternates.add(alternate); - for (String secondaryAlternatesAllele : entry.getSecondaryAlternatesAlleles()) { - alternates.add(secondaryAlternatesAllele); - originalAlternates.add(secondaryAlternatesAllele); - } + if (variant.getStudies().size() != 1) { + throw new IllegalStateException("Only one study per variant is supported when normalizing variants. Found " + + variant.getStudies().size() + " studies. Variant: " + variant); + } else { + StudyEntry entry = variant.getStudies().get(0); + List alternates = getAllAlternates(variant); // FIXME: assumes there wont be multinucleotide positions with CNVs and short variants mixed - List keyFieldsList; - List originalKeyFieldsList; - if (isSymbolic(variant)) { - keyFieldsList = normalizeSymbolic(start, end, reference, alternates, sv); - } else { - keyFieldsList = normalize(chromosome, start, reference, alternates); - } - originalKeyFieldsList = keyFieldsList + List keyFieldsList = normalizeAlleles(variant); + List originalKeyFieldsList = keyFieldsList .stream() .filter(k -> !k.isReferenceBlock()) .map(k -> k.originalKeyFields) @@ -373,8 +360,8 @@ public List normalize(List batch, boolean reuse) throws NonSta originalCall = entry.getFiles().get(0).getCall().getVariantId(); } else { StringBuilder sb = new StringBuilder(variant.toString()); - for (int i = 1; i < originalAlternates.size(); i++) { - sb.append(",").append(originalAlternates.get(i)); + for (int i = 1; i < alternates.size(); i++) { + sb.append(",").append(alternates.get(i)); } originalCall = sb.toString(); } @@ -400,6 +387,9 @@ public List normalize(List batch, boolean reuse) throws NonSta variant.setEnd(keyFields.getEnd()); variant.setReference(keyFields.getReference()); variant.setAlternate(keyFields.getAlternate()); + if (keyFields.getSv() != null) { + variant.setSv(keyFields.getSv()); + } variant.reset(); // Variant is being reused, must ensure the SV field si appropriately created // if (isSymbolic(variant)) { @@ -415,7 +405,7 @@ public List normalize(List batch, boolean reuse) throws NonSta } samples = entry.getSamples(); } else { - normalizedVariant = newVariant(variant, keyFields, sv); + normalizedVariant = newVariant(variant, keyFields); normalizedEntry = new StudyEntry(); normalizedEntry.setStudyId(entry.getStudyId()); @@ -598,17 +588,54 @@ private Collection sortByPosition(List keyFi // } // } + protected List normalizeAlleles(Variant variant) { + List alternates = getAllAlternates(variant); + + List keyFieldsList; + if (isSymbolic(variant)) { + keyFieldsList = normalizeSymbolic(variant.getStart(), variant.getEnd(), variant.getReference(), alternates, variant.getSv()); + } else { + keyFieldsList = normalize(variant.getChromosome(), variant.getStart(), variant.getReference(), alternates, variant.getSv()); + } + return keyFieldsList; + } + + private static List getAllAlternates(Variant variant) { + List alternates; + if (variant.getStudies() != null && !variant.getStudies().isEmpty()) { + StudyEntry entry = variant.getStudies().get(0); + String alternate = variant.getAlternate(); + alternates = new ArrayList<>(1 + entry.getSecondaryAlternates().size()); + alternates.add(alternate); + for (AlternateCoordinate secondaryAlternate : entry.getSecondaryAlternates()) { + if (secondaryAlternate.getStart() != null && !secondaryAlternate.getStart().equals(variant.getStart())) { + throw new IllegalStateException("Unable to normalize variant where secondary alternates do not start at the same position. " + + "Variant: " + variant + " , secondaryAlternate: " + secondaryAlternate); + } + if (secondaryAlternate.getEnd() != null && !secondaryAlternate.getEnd().equals(variant.getEnd())) { + throw new IllegalStateException("Unable to normalize variant where secondary alternates do not end at the same position. " + + "Variant: " + variant + " (end=" + variant.getEnd() + ") , secondaryAlternate: " + secondaryAlternate); + } + alternates.add(secondaryAlternate.getAlternate()); + } + } else { + alternates = Collections.singletonList(variant.getAlternate()); + } + return Collections.unmodifiableList(alternates); + } + + @Deprecated // Test purposes only public List normalizeSymbolic(Integer start, Integer end, String reference, String alternate, StructuralVariation sv) { return normalizeSymbolic(start, end, reference, Collections.singletonList(alternate), sv); } - @Deprecated + @Deprecated // Test purposes only public List normalizeSymbolic(final Integer start, final Integer end, final String reference, final List alternates) { return normalizeSymbolic(start, end, reference, alternates, null); } - public List normalizeSymbolic(final Integer start, final Integer end, final String reference, + protected List normalizeSymbolic(final Integer start, final Integer end, final String reference, final List alternates, StructuralVariation sv) { List list = new ArrayList<>(alternates.size()); @@ -624,12 +651,56 @@ public List normalizeSymbolic(final Integer start, final Integ Integer copyNumber = sv == null ? null : sv.getCopyNumber(); keyFields = normalizeSymbolic(start, end, reference, alternate, alternates, copyNumber, numAllelesIdx); } + + if (alternate.equals(VariantBuilder.DUP_TANDEM_ALT)) { + if (keyFields.getSv() == null) { + keyFields.setSv(new StructuralVariation()); + } + keyFields.getSv().setType(StructuralVariantType.TANDEM_DUPLICATION); + } + + normalizeSvField(sv, keyFields); + list.add(keyFields); } return list; } + private static void normalizeSvField(StructuralVariation sv, VariantKeyFields keyFields) { + if (sv != null) { + StructuralVariation normalizedSv = keyFields.getSv(); + if (normalizedSv == null) { + normalizedSv = new StructuralVariation(); + } + // CI positions may change during the normalization. Update them. + normalizedSv.setCiStartLeft(sv.getCiStartLeft()); + normalizedSv.setCiStartRight(sv.getCiStartRight()); + + // Structural variants that affect a single point (INSERTIONS or Breakends) should not have CIEND. + // At this point, we're removing the CIEND from the normalized variant. + // Do not remove the value from the INFO field (if any). + // The END is the same as the start (which, in base-1 means that "end == start -1" , so "end < start") + if (keyFields.getEnd() < keyFields.getStart()) { + normalizedSv.setCiEndLeft(null); + normalizedSv.setCiEndRight(null); + } else { + normalizedSv.setCiEndLeft(sv.getCiEndLeft()); + normalizedSv.setCiEndRight(sv.getCiEndRight()); + } + normalizedSv.setLeftSvInsSeq(sv.getLeftSvInsSeq()); + normalizedSv.setRightSvInsSeq(sv.getRightSvInsSeq()); + + if (keyFields.getSv() == null) { + if (normalizedSv.getCiStartLeft() != null || normalizedSv.getCiStartRight() != null + || normalizedSv.getCiEndLeft() != null || normalizedSv.getCiEndRight() != null + || normalizedSv.getLeftSvInsSeq() != null || normalizedSv.getRightSvInsSeq() != null) { + keyFields.setSv(normalizedSv); + } + } + } + } + private boolean isNonRef(String alternate) { return alternate.equals(Allele.NO_CALL_STRING) || alternate.equals(VariantBuilder.NON_REF_ALT) @@ -695,7 +766,7 @@ private static VariantKeyFields normalizeMateBreakend( } VariantKeyFields keyFields = new VariantKeyFields(newStart, newStart - 1, numAllelesIdx, newReference, newAlternate); - keyFields.getSv().setBreakend(breakend); + keyFields.setBreakend(breakend); return keyFields; } @@ -718,29 +789,37 @@ private VariantKeyFields normalizeSymbolic( + "contain 0 or 1 nt, but no more. Please, check."); } - Integer cn = VariantBuilder.getCopyNumberFromAlternate(alternate); // if (cn != null) { // // Alternate with the form , being xxx the number of copies, must be normalized into "" // newAlternate = ""; // } String newAlternate; + Integer newCn; if (alternate.equals("") && copyNumber != null) { // Alternate must be of the form , being xxx the number of copies newAlternate = ""; + newCn = copyNumber; } else { newAlternate = alternate; + newCn = VariantBuilder.getCopyNumberFromAlternate(alternate); } + return new VariantKeyFields(newStart, end, numAllelesIdx, newReference, newAlternate, - null, cn, false); + null, newCn, false); } + @Deprecated // Test purposes only public List normalize(String chromosome, int position, String reference, String alternate) { - return normalize(chromosome, position, reference, Collections.singletonList(alternate)); + return normalize(chromosome, position, reference, Collections.singletonList(alternate), null); + } + + @Deprecated // Test purposes only + public List normalize(String chromosome, int position, String reference, List alternates) { + return normalize(chromosome, position, reference, alternates, null); } - public List normalize(String chromosome, int position, String reference, List alternates) - { + protected List normalize(String chromosome, int position, String reference, List alternates, StructuralVariation sv) { List list = new ArrayList<>(alternates.size()); int numAllelesIdx = 0; // This index is necessary for getting the samples where the mutated allele is present @@ -784,6 +863,8 @@ public List normalize(String chromosome, int position, String } } + normalizeSvField(sv, keyFields); + if (keyFields != null) { // To deal with cases such as A>GT @@ -1380,34 +1461,24 @@ private int[] getGenotypesReorderingMap(int numAllele, int[] alleleMap) { } } - - private Variant newVariant(Variant variant, VariantKeyFields keyFields, StructuralVariation sv) { + private Variant newVariant(Variant variant, VariantKeyFields keyFields) { Variant normalizedVariant = new Variant(variant.getChromosome(), keyFields.getStart(), keyFields.getEnd(), keyFields.getReference(), keyFields.getAlternate()) .setId(variant.getId()) .setNames(variant.getNames()) .setStrand(variant.getStrand()); - if (sv != null) { - if (normalizedVariant.getSv() != null) { - // CI positions may change during the normalization. Update them. - normalizedVariant.getSv().setCiStartLeft(sv.getCiStartLeft()); - normalizedVariant.getSv().setCiStartRight(sv.getCiStartRight()); - normalizedVariant.getSv().setCiEndLeft(sv.getCiEndLeft()); - normalizedVariant.getSv().setCiEndRight(sv.getCiEndRight()); - normalizedVariant.getSv().setLeftSvInsSeq(sv.getLeftSvInsSeq()); - normalizedVariant.getSv().setRightSvInsSeq(sv.getRightSvInsSeq()); - - // Variant will never have CopyNumber, because the Alternate is normalized from to - normalizedVariant.getSv().setCopyNumber(keyFields.getCopyNumber()); - VariantType cnvSubtype = VariantBuilder.getCopyNumberSubtype(keyFields.getCopyNumber()); - if (cnvSubtype != null) { - normalizedVariant.setType(cnvSubtype); - } - } + if (keyFields.getSv() != null) { + normalizedVariant.setSv(keyFields.getSv()); } - normalizedVariant.setAnnotation(variant.getAnnotation()); + if (keyFields.getCopyNumber() != null) { + VariantType cnvSubtype = VariantBuilder.getCopyNumberSubtype(keyFields.getCopyNumber()); + if (cnvSubtype != null) { + normalizedVariant.setType(cnvSubtype); + } + } + return normalizedVariant; // normalizedVariant.setAnnotation(variant.getAnnotation()); // if (isSymbolic(variant)) { @@ -1527,8 +1598,10 @@ public VariantKeyFields(int start, int end, int numAllele, String reference, Str this.alternate = alternate; this.originalKeyFields = originalKeyFields == null ? this : originalKeyFields; this.referenceBlock = referenceBlock; - this.sv = new StructuralVariation(); - setCopyNumber(copyNumber); + this.sv = null; + if (copyNumber != null) { + setCopyNumber(copyNumber); + } } @@ -1604,7 +1677,28 @@ public Integer getCopyNumber() { } public VariantKeyFields setCopyNumber(Integer copyNumber) { - sv.setCopyNumber(copyNumber); + if (sv == null) { + if (copyNumber != null) { + sv = new StructuralVariation(); + sv.setCopyNumber(copyNumber); + sv.setType(VariantBuilder.getCNVSubtype(copyNumber)); + } + } else { + sv.setCopyNumber(copyNumber); + sv.setType(VariantBuilder.getCNVSubtype(copyNumber)); + } + return this; + } + + public VariantKeyFields setBreakend(Breakend breakend) { + if (sv == null) { + if (breakend != null) { + sv = new StructuralVariation(); + sv.setBreakend(breakend); + } + } else { + sv.setBreakend(breakend); + } return this; } diff --git a/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/VariantNormalizerGenericTest.java b/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/VariantNormalizerGenericTest.java index f097d1e1a..e59ad530f 100644 --- a/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/VariantNormalizerGenericTest.java +++ b/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/VariantNormalizerGenericTest.java @@ -230,7 +230,7 @@ protected Variant newVariant(int position, String ref, String altsCsv) { return newVariant(position, position, ref, Arrays.asList(altsCsv.split(",")), "2"); } - protected Variant newVariant(int start, int end, String ref, String altsCsv) { + protected Variant newVariant(int start, Integer end, String ref, String altsCsv) { return newVariant(start, end, ref, Arrays.asList(altsCsv.split(",")), "2"); } @@ -238,12 +238,16 @@ protected Variant newVariant(int position, String ref, List altsList, St return newVariant(position, position, ref, altsList, studyId); } - protected Variant newVariant(int start, int end, String ref, List altsList, String studyId) { + protected Variant newVariant(int start, Integer end, String ref, List altsList, String studyId) { return newVariantBuilder(start, end, ref, altsList, studyId).build(); } - protected VariantBuilder newVariantBuilder(int position, int end, String ref, List altsList, String studyId) { - return Variant.newBuilder("1", position, end, ref, String.join(",", altsList)) + protected VariantBuilder newVariantBuilder(int position, Integer end, String ref, List altsList, String studyId) { + return newVariantBuilder(position, end, ref, String.join(",", altsList), studyId); + } + + protected VariantBuilder newVariantBuilder(int position, Integer end, String ref, String alts, String studyId) { + return Variant.newBuilder("1", position, end, ref, alts) .setStudyId(studyId) .setSampleDataKeys("GT") .setSamples(new ArrayList<>()) diff --git a/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/VariantNormalizerTest.java b/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/VariantNormalizerTest.java index a4a62f061..a1faf4869 100644 --- a/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/VariantNormalizerTest.java +++ b/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/VariantNormalizerTest.java @@ -9,6 +9,7 @@ import org.opencb.biodata.models.variant.exceptions.NonStandardCompliantSampleField; import java.util.*; +import java.util.function.Consumer; import java.util.stream.Collectors; import static org.junit.Assert.*; @@ -582,9 +583,7 @@ public void testMultiSNP() throws NonStandardCompliantSampleField { public void testNormalizeMultiAllelicPL() throws NonStandardCompliantSampleField { Variant variant = generateVariantWithFormat("X:100:A:T", "GT:GL", "S01", "0/0", "1,2,3", "S02", "0", "1,2"); - List normalize1 = normalizer.normalize(Collections.singletonList(variant), false); - assertEquals("1,2,3", normalize1.get(0).getStudies().get(0).getSampleData("S01", "GL")); - assertEquals("1,2", normalize1.get(0).getStudies().get(0).getSampleData("S02", "GL")); + normalizeUnmodified(variant); Variant variant2 = generateVariantWithFormat("X:100:A:T,C", "GT:GL", "S01", "0/0", "1,2,3,4,5,6", "S02", "A", "1,2,3"); List normalize2 = normalizer.normalize(Collections.singletonList(variant2), false); @@ -614,14 +613,164 @@ public void testCNVsNormalization() throws Exception { .addSample("HG00096", "0|0") .build(); - List normalizedVariantList = normalizer.normalize(Collections.singletonList(variant), true); - assertEquals(1, normalizedVariantList.size()); - assertEquals(new StructuralVariation(86, 150, 150, 211, 0, null, null, - StructuralVariantType.COPY_NUMBER_LOSS, null), normalizedVariantList.get(0).getSv()); - // Normalize CNV alternate - assertEquals("", normalizedVariantList.get(0).getAlternate()); - assertEquals("1:86<100<150-150<200<211:C:", normalizedVariantList.get(0).getStudies().get(0).getFiles().get(0).getCall().getVariantId()); - assertEquals(0, normalizedVariantList.get(0).getStudies().get(0).getFiles().get(0).getCall().getAlleleIndex().intValue()); + normalizeOne(variant, normalizedVariant -> { + assertEquals(new StructuralVariation(86, 150, 150, 211, 0, null, null, + StructuralVariantType.COPY_NUMBER_LOSS, null), normalizedVariant.getSv()); + // Normalize CNV alternate + assertEquals("", normalizedVariant.getAlternate()); + assertEquals("1:86<100<150-150<200<211:C:", normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getVariantId()); + assertEquals(0, normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getAlleleIndex().intValue()); + }); + } + + @Test + public void testCNVsNormalizationNoNumber() throws Exception { + Variant variant = newVariantBuilder(100, 200, "C", Collections.singletonList(""), "2") + .addFileData("CIPOS", "-14,50") + .addFileData("CIEND", "-50,11") + .addSample("HG00096", "0|0") + .build(); + + normalizeOne(variant, normalizedVariant -> { + assertEquals(new StructuralVariation(86, 150, 150, 211, null, null, null, null, null), normalizedVariant.getSv()); + // Normalize CNV alternate + assertEquals("", normalizedVariant.getAlternate()); + assertEquals("1:86<100<150-150<200<211:C:", normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getVariantId()); + assertEquals(0, normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getAlleleIndex().intValue()); + }); + } + + @Test + public void testCNVsNormalizationNoNumberNoCipos() throws Exception { + Variant variant = newVariantBuilder(100, 200, "C", Collections.singletonList(""), "2") + .addSample("HG00096", "0|0") + .build(); + + normalizeOne(variant, normalizedVariant -> { + assertEquals(new StructuralVariation(null, null, null, null, null, null, null, null, null), normalizedVariant.getSv()); + // Normalize CNV alternate + assertEquals("", normalizedVariant.getAlternate()); + assertEquals("1:100-200:C:", normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getVariantId()); + assertEquals(0, normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getAlleleIndex().intValue()); + }); + } + + @Test + public void testCNVsNormalizationUnmodified() throws Exception { + Variant variant = newVariantBuilder(101, 200, "-", Collections.singletonList(""), "2") + .addSample("HG00096", "0|0") + .build(); + + normalizeUnmodified(variant); + } + + @Test + public void testINSsNormalizationWithCIEND() throws Exception { + Variant variant = newVariantBuilder(100, null, "C", Collections.singletonList(""), "2") + .addFileData("CIPOS", "-14,50") + .addFileData("CIEND", "-50,11") + .addFileData("LEFT_SVINSSEQ", "AAAA") + .addFileData("RIGHT_SVINSSEQ", "CCCC") + .addSample("HG00096", "0|0") + .build(); + + normalizeOne(variant, normalizedVariant -> { + assertEquals(new StructuralVariation(86, 150, null, null, null, "AAAA", "CCCC", null, null), normalizedVariant.getSv()); + // Normalize CNV alternate + assertEquals("", normalizedVariant.getAlternate()); + assertEquals("1:86<100<150-50<100<111:C:AAAA...CCCC", normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getVariantId()); + assertEquals("1:86<101<150:-:AAAA...CCCC", normalizedVariant.toString()); + assertEquals(0, normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getAlleleIndex().intValue()); + }); + } + + @Test + public void testNormalizeNonSymbolicInsertion() throws Exception { + Variant variant = newVariantBuilder(100, null, "C", Collections.singletonList("CAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"), "2") + .addFileData("CIPOS", "-14,50") + .addFileData("CIEND", "-50,11") + .addSample("HG00096", "0|0") + .build(); + + normalizeOne(variant, normalizedVariant -> { + assertEquals(new StructuralVariation(86, 150, null, null, null, null, null, null, null), normalizedVariant.getSv()); + }); + } + + @Test + public void testNormalizeNonSymbolicDeletion() throws Exception { + Variant variant = newVariantBuilder(100, null, "CAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", "C", "2") + .addFileData("CIPOS", "-14,50") + .addFileData("CIEND", "-1,1") + .addSample("HG00096", "0|1") + .build(); + + normalizeOne(variant, normalizedVariant -> { + assertEquals(new StructuralVariation(86, 150, 179, 181, null, null, null, null, null), normalizedVariant.getSv()); + }); + } + + @Test + public void testDUPTANDEMNormalization() throws Exception { + Variant variant = newVariantBuilder(100, 200, "C", Collections.singletonList(""), "2") + .addFileData("CIPOS", "-14,50") + .addFileData("CIEND", "-50,11") + .addSample("HG00096", "0|0") + .build(); + normalizeOne(variant, normalizedVariant -> { + assertEquals(new StructuralVariation(86, 150, 150, 211, null, null, null, StructuralVariantType.TANDEM_DUPLICATION, null), normalizedVariant.getSv()); + // Normalize CNV alternate + assertEquals("", normalizedVariant.getAlternate()); + assertEquals("1:86<100<150-150<200<211:C:", normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getVariantId()); + assertEquals("1:86<101<150-150<200<211:-:", normalizedVariant.toString()); + assertEquals(0, normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getAlleleIndex().intValue()); + }); + } + + public void normalizeUnmodified(Variant variant) throws NonStandardCompliantSampleField { + normalizer.setGenerateReferenceBlocks(false); + + int hashCode = variant.hashCode(); + List list = normalizer.normalize(Collections.singletonList(variant), false); + assertEquals(1, list.size()); + Variant normVar = list.get(0); + + assertEquals(variant.toString(), normVar.toString()); + assertEquals("Ensure input variant is not modified", hashCode, variant.hashCode()); + assertEquals("Ensure norm variant is not modified", hashCode, normVar.hashCode()); + + list = normalizer.normalize(Collections.singletonList(variant), true); + assertEquals(1, list.size()); + normVar = list.get(0); + + assertEquals(variant.toString(), normVar.toString()); + assertEquals("Ensure input variant is not modified", hashCode, variant.hashCode()); + assertEquals("Ensure norm variant is not modified", hashCode, normVar.hashCode()); + + } + + public void normalizeOne(Variant variant, Consumer consumer) throws NonStandardCompliantSampleField { + normalizer.setGenerateReferenceBlocks(false); + + int hashCode = variant.hashCode(); + List list = normalizer.normalize(Collections.singletonList(variant), false); + assertEquals(1, list.size()); + consumer.accept(list.get(0)); + + int hashCode2 = variant.hashCode(); + + // Check that the original variant has not been modified, and check again, but reusing the input variant + assertEquals("Ensure input variant is not modified", hashCode, hashCode2); + + + list = normalizer.normalize(Collections.singletonList(variant), true); + assertEquals(1, list.size()); + assertSame(variant, list.get(0)); + consumer.accept(variant); + consumer.accept(list.get(0)); + + int hashCode3 = variant.hashCode(); + assertNotEquals(hashCode3, hashCode); } @Test @@ -670,23 +819,40 @@ public void testVNCNormalizationMultiallelic() throws NonStandardCompliantSample @Test public void testCNVsNormalizationCopyNumber() throws NonStandardCompliantSampleField { Variant variant; - List normalizedVariantList; variant = newVariantBuilder(100, 200, "C", Arrays.asList(""), "2") .setSampleDataKeys("GT", "CN") .addSample("HG00096", "0|1","3") .build(); - normalizedVariantList = normalizer.normalize(Collections.singletonList(variant), true); - assertEquals(1, normalizedVariantList.size()); - Variant normalizedVariant = normalizedVariantList.get(0); - assertEquals(new StructuralVariation(null, null, null, null, 3, null, null, - StructuralVariantType.COPY_NUMBER_GAIN, null), normalizedVariant.getSv()); - // Normalize CNV alternate - assertEquals("", normalizedVariant.getAlternate()); - assertEquals(101, normalizedVariant.getStart().intValue()); - assertEquals("", normalizedVariant.getReference()); - assertEquals("1:100-200:C:", normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getVariantId()); - assertEquals(0, normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getAlleleIndex().intValue()); + normalizeOne(variant, normalizedVariant->{ + assertEquals(new StructuralVariation(null, null, null, null, 3, null, null, + StructuralVariantType.COPY_NUMBER_GAIN, null), normalizedVariant.getSv()); + // Normalize CNV alternate + assertEquals("", normalizedVariant.getAlternate()); + assertEquals(101, normalizedVariant.getStart().intValue()); + assertEquals("", normalizedVariant.getReference()); + assertEquals("1:100-200:C:", normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getVariantId()); + assertEquals(0, normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getAlleleIndex().intValue()); + }); + } + @Test + public void testCNVsNormalizationCopyNumberWithCipos() throws NonStandardCompliantSampleField { + Variant variant; + variant = newVariantBuilder(100, 200, "C", Arrays.asList(""), "2") + .addFileData("CIPOS", "-10,50") + .setSampleDataKeys("GT", "CN") + .addSample("HG00096", "0|1","3") + .build(); + normalizeOne(variant, normalizedVariant->{ + assertEquals(new StructuralVariation(90, 150, null, null, 3, null, null, + StructuralVariantType.COPY_NUMBER_GAIN, null), normalizedVariant.getSv()); + // Normalize CNV alternate + assertEquals("", normalizedVariant.getAlternate()); + assertEquals(101, normalizedVariant.getStart().intValue()); + assertEquals("", normalizedVariant.getReference()); + assertEquals("1:90<100<150-200:C:", normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getVariantId()); + assertEquals(0, normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getAlleleIndex().intValue()); + }); } @Test @@ -725,38 +891,33 @@ public void testNormalizeSV() throws NonStandardCompliantSampleField { @Test public void testNormalizeDEL() throws NonStandardCompliantSampleField { - Variant variant = newVariant(100, 200, "N", Collections.singletonList(""), STUDY_ID); - List normalized = normalizer.normalize(Collections.singletonList(variant), false); - - assertEquals(1, normalized.size()); - assertEquals(101, normalized.get(0).getStart().intValue()); - assertEquals(200, normalized.get(0).getEnd().intValue()); - assertEquals(new StructuralVariation(), normalized.get(0).getSv()); - System.out.println(normalized.get(0).toJson()); + normalizeOne(variant, normalized -> { + assertEquals(101, normalized.getStart().intValue()); + assertEquals(200, normalized.getEnd().intValue()); + assertEquals(new StructuralVariation(), normalized.getSv()); +// System.out.println(normalized.toJson()); + }); } @Test public void testNormalizeINS() throws NonStandardCompliantSampleField { - String seq = "ACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTG"; Variant variant = newVariantBuilder(100, 100, "N", Collections.singletonList(""), STUDY_ID) .addFileData("SVINSSEQ", seq) .build(); - List list = new VariantNormalizer().normalize(Collections.singletonList(variant), false); - - assertEquals(1, list.size()); - Variant normalized = list.get(0); - assertEquals(101, normalized.getStart().intValue()); - assertEquals(100, normalized.getEnd().intValue()); - assertEquals(seq.length(), normalized.getLength().intValue()); - assertEquals(seq.length(), normalized.getLengthAlternate().intValue()); - assertEquals(0, normalized.getLengthReference().intValue()); - assertEquals("", normalized.getReference()); - assertEquals(seq, normalized.getAlternate()); - assertEquals(new StructuralVariation(), normalized.getSv()); - assertEquals("1:100-100:N:", normalized.getStudies().get(0).getFiles().get(0).getCall().getVariantId()); - assertEquals(0, normalized.getStudies().get(0).getFiles().get(0).getCall().getAlleleIndex().intValue()); + normalizeOne(variant, normalizedVariant -> { + assertEquals(101, normalizedVariant.getStart().intValue()); + assertEquals(100, normalizedVariant.getEnd().intValue()); + assertEquals(seq.length(), normalizedVariant.getLength().intValue()); + assertEquals(seq.length(), normalizedVariant.getLengthAlternate().intValue()); + assertEquals(0, normalizedVariant.getLengthReference().intValue()); + assertEquals("", normalizedVariant.getReference()); + assertEquals(seq, normalizedVariant.getAlternate()); + assertEquals(new StructuralVariation(), normalizedVariant.getSv()); + assertEquals("1:100-100:N:", normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getVariantId()); + assertEquals(0, normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getAlleleIndex().intValue()); + }); } @Test @@ -768,66 +929,60 @@ public void testNormalizeSvToIndel() throws NonStandardCompliantSampleField { assertEquals(Variant.SV_THRESHOLD + 1, variant.getLengthAlternate().intValue()); assertNotNull(variant.getSv()); - Variant normVar = new VariantNormalizer().normalize(Collections.singletonList(variant), false).get(0); - assertEquals(VariantType.INDEL, normVar.getType()); - assertEquals(Variant.SV_THRESHOLD, normVar.getLengthAlternate().intValue()); - assertNull(normVar.getSv()); - - // Check that the original variant has not been modified, and check again, but reusing the input variant - assertEquals(VariantType.INSERTION, variant.getType()); - assertEquals(Variant.SV_THRESHOLD + 1, variant.getLengthAlternate().intValue()); - assertNotNull(variant.getSv()); - Variant normVarReuse = new VariantNormalizer().normalize(Collections.singletonList(variant), true).get(0); - assertEquals(VariantType.INDEL, normVarReuse.getType()); - assertEquals(Variant.SV_THRESHOLD, normVarReuse.getLengthAlternate().intValue()); - assertNull(normVarReuse.getSv()); - + normalizeOne(variant, normVar -> { + assertEquals(VariantType.INDEL, normVar.getType()); + assertEquals(Variant.SV_THRESHOLD, normVar.getLengthAlternate().intValue()); + assertNull(normVar.getSv()); + }); } @Test public void testNormalizeWithInsSeq() throws NonStandardCompliantSampleField { Variant variant = new Variant("1:799984<800001<800022:-:ACCACACCCACACAACACACA...TGTGGTGTGTGTGGTGTG"); - Variant normVar = new VariantNormalizer().normalize(Collections.singletonList(variant), false).get(0); - assertEquals(variant, normVar); - assertEquals(variant.toString(), normVar.toString()); + normalizeUnmodified(variant); } @Test public void testNormalizeBND() throws NonStandardCompliantSampleField { - normalizeBnd(newVariant(101, 100, "", ".[9:10["), newVariant(100, 99, "A", "A[chr9:10[")); - normalizeBnd(newVariant(100, 99, "", "[22:10[."), newVariant(100, 99, "A", "[chr22:10[A")); - normalizeBnd(newVariant(101, 100, "", ".]9:10]"), newVariant(100, 99, "A", "A]chr9:10]")); - normalizeBnd(newVariant(100, 99, "", "]22:10]."), newVariant(100, 99, "A", "]chr22:10]A")); - normalizeBnd(newVariant(100, 99, "", "]22:10]NNN"), newVariant(100, 99, "A", "]chr22:10]NNNA")); + normalizeBnd(newVariant(101, 100, "", ".[9:10["), newVariant(100, null, "A", "A[chr9:10[")); + normalizeBnd(newVariant(100, 99, "", "[22:10[."), newVariant(100, null, "A", "[chr22:10[A")); + normalizeBnd(newVariant(101, 100, "", ".]9:10]"), newVariant(100, null, "A", "A]chr9:10]")); + normalizeBnd(newVariant(100, 99, "", "]22:10]."), newVariant(100, null, "A", "]chr22:10]A")); + normalizeBnd(newVariant(100, 99, "", "]22:10]NNN"), newVariant(100, null, "A", "]chr22:10]NNNA")); + + normalizeBnd(newVariant(100, 99, "", "[1:10[T"), newVariant(100, null, "A", "[1:10[TA")); + normalizeBnd(newVariant(100, 99, "", "[1:10[T"), newVariant(100, null, "AC", "[1:10[TAC")); - normalizeBnd(newVariant(100, 99, "", "[1:10[T"), newVariant(100, 99, "A", "[1:10[TA")); - normalizeBnd(newVariant(100, 99, "", "[1:10[T"), newVariant(100, 99, "AC", "[1:10[TAC")); + normalizeBnd(newVariant(100, 99, "TAC", "[1:10[AC"), newVariant(100, null, "TAC", "[1:10[AC")); + normalizeBnd(newVariant(100, 99, "TAC", "TA[1:10["), newVariant(100, null, "TAC", "TA[1:10[")); - normalizeBnd(newVariant(100, 99, "TAC", "[1:10[AC"), newVariant(100, 99, "TAC", "[1:10[AC")); - normalizeBnd(newVariant(100, 99, "TAC", "TA[1:10["), newVariant(100, 99, "TAC", "TA[1:10[")); + normalizeBnd(newVariantBuilder(101, 100, "", ".[9:10[", "s1").setCiStart(95, 105).build(), + newVariantBuilder(100, null, "A", "A[chr9:10[", "s1").setCiStart(95,105).setCiEnd(95,105).build()); } private void normalizeBnd(Variant expectedVariant, Variant variant) throws NonStandardCompliantSampleField { - System.out.println("---"); +// System.out.println("---"); boolean expectsNormalization = !expectedVariant.equals(variant); - System.out.println(" - Actual"); - System.out.println(" " + variant.toString()); - System.out.println(" " + variant.toJson()); - System.out.println(" - Expected"); - System.out.println(" " + expectedVariant.toString()); - System.out.println(" " + expectedVariant.toJson()); - System.out.println(" - Normalized (same = " + !expectsNormalization + ")"); - List normalized = normalizer.normalize(Collections.singletonList(variant), false); - - for (Variant v : normalized) { - System.out.println(" " + v.toString()); - System.out.println(" " + v.toJson()); - if (expectsNormalization) { - assertNotNull(v.getStudies().get(0).getFiles().get(0).getCall()); - v.getStudies().get(0).getFiles().get(0).setCall(null); - } - assertEquals(expectedVariant, v); +// System.out.println(" - Actual"); +// System.out.println(" " + variant.toString()); +// System.out.println(" " + variant.toJson()); +// System.out.println(" - Expected"); +// System.out.println(" " + expectedVariant.toString()); +// System.out.println(" " + expectedVariant.toJson()); +// System.out.println(" - Normalized (same = " + !expectsNormalization + ")"); + if (expectsNormalization) { + normalizeOne(variant, normVar -> { + System.out.println(" " + normVar.toString()); + System.out.println(" " + normVar.toJson()); + OriginalCall call = normVar.getStudies().get(0).getFiles().get(0).getCall(); + assertNotNull(call); + normVar.getStudies().get(0).getFiles().get(0).setCall(null); + assertEquals(expectedVariant, normVar); + normVar.getStudies().get(0).getFiles().get(0).setCall(call); + }); + } else { + normalizeUnmodified(variant); } } diff --git a/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/merge/VariantMergerTest.java b/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/merge/VariantMergerTest.java index 46ab5800e..07533ab5b 100644 --- a/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/merge/VariantMergerTest.java +++ b/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/merge/VariantMergerTest.java @@ -498,7 +498,8 @@ public void testMergeIndelCase1() throws NonStandardCompliantSampleField { Variant v1 = VariantTestUtils.generateVariantWithFormat("1:328:CTT:C", VCFConstants.GENOTYPE_KEY + "," + VCFConstants.GENOTYPE_FILTER_KEY, "S1", "1/2","PASS"); - v1.getStudies().get(0).getSecondaryAlternates().add(new AlternateCoordinate(null,null,331,"CTT", "CTTTC", VariantType.INDEL)); + + v1.getStudies().get(0).getSecondaryAlternates().add(new AlternateCoordinate(null, null, 330, "CTT", "CTTTC", VariantType.INDEL)); Variant v2 = VariantTestUtils.generateVariantWithFormat("1:331:T:TCT", VCFConstants.GENOTYPE_KEY + "," + VCFConstants.GENOTYPE_FILTER_KEY, diff --git a/pom.xml b/pom.xml index c1c07dc3d..cd18672db 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 3.2.0 + 3.2.1 pom Biodata @@ -38,7 +38,7 @@ - 5.2.0 + 5.2.1 2.14.3 4.4 @@ -199,6 +199,12 @@ com.databricks SnpEff ${SnpEff.version} + + + distlib + distlib + + com.google.guava