diff --git a/tsinfer/formats.py b/tsinfer/formats.py index ee2125a9..0ab97ed5 100644 --- a/tsinfer/formats.py +++ b/tsinfer/formats.py @@ -2405,11 +2405,16 @@ def sites_ancestral_allele(self): except IndexError: unknown_alleles[allele] += 1 ret[i] = allele_index - if sum(unknown_alleles.values()) > 0: + tot = unknown_alleles.total() + if tot > 0: warnings.warn( - "The following alleles were not found in the variant_allele array " - "and will be treated as unknown:\n" - f"{unknown_alleles}" + "Ancestral alleles were not found in the variant_allele " + f"array for {tot} sites ({tot/len(string_allele)*100:.2f}%). " + "They are listed below, and will be treated as unknown:\n " + + "\n ".join( + f"'{k}': {v} ({v/len(string_allele)*100:.2f}% of sites)" + for k, v in unknown_alleles.items() + ) ) return ret